From d4094c84e70012550ec86d7fd3a0b497b289eb9a Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 28 May 2021 10:08:46 +0200 Subject: [PATCH 01/95] Fix for detecting left recursion in rules like: indexing_list = indexing_element | indexing_list ',' indexing_element . --- src/Tab.cpp | 12 ++++++------ src/Tab.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 731ec12..9b6b152 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -886,17 +886,17 @@ bool Tab::GrammarOk() { //--------------- check for circular productions ---------------------- -void Tab::GetSingles(Node *p, ArrayList *singles) { +void Tab::GetSingles(Node *p, ArrayList *singles, Node *rule) { if (p == NULL) return; // end of graph if (p->typ == Node::nt) { - if (p->up || DelGraph(p->next)) singles->Add(p->sym); + if (p->up || DelGraph(p->next) || p->sym->graph == rule) singles->Add(p->sym); } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { if (p->up || DelGraph(p->next)) { - GetSingles(p->sub, singles); - if (p->typ == Node::alt) GetSingles(p->down, singles); + GetSingles(p->sub, singles, rule); + if (p->typ == Node::alt) GetSingles(p->down, singles, rule); } } - if (!p->up && DelNode(p)) GetSingles(p->next, singles); + if (!p->up && DelNode(p)) GetSingles(p->next, singles, rule); } bool Tab::NoCircularProductions() { @@ -909,7 +909,7 @@ bool Tab::NoCircularProductions() { for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); ArrayList *singles = new ArrayList(); - GetSingles(sym->graph, singles); // get nonterminals s such that sym-->s + GetSingles(sym->graph, singles, sym->graph); // get nonterminals s such that sym-->s Symbol *s; for (int j=0; jCount; j++) { s = (Symbol*)((*singles)[j]); diff --git a/src/Tab.h b/src/Tab.h index ae788aa..bf7e49d 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -200,7 +200,7 @@ class Tab { } }; - void GetSingles(Node *p, ArrayList *singles); + void GetSingles(Node *p, ArrayList *singles, Node *rule); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- From d890d7838874f361bfe9349a7773b856d1754a3c Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 1 Jun 2021 10:30:55 +0200 Subject: [PATCH 02/95] Fix segfault due to bad format string parameters --- src/Tab.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 9b6b152..b76c6c3 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -100,7 +100,7 @@ int Tab::Num(Node *p) { void Tab::PrintSym(Symbol *sym) { wchar_t *paddedName = Name(sym->name); - fwprintf(trace, L"%3d %14s %ls", sym->n, paddedName, nTyp[sym->typ]); + fwprintf(trace, L"%3d %14s %s", sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); if (sym->attrPos==NULL) fwprintf(trace, L" false "); else fwprintf(trace, L" true "); @@ -110,7 +110,7 @@ void Tab::PrintSym(Symbol *sym) { } else fwprintf(trace, L" "); - fwprintf(trace, L"%5d %ls\n", sym->line, tKind[sym->tokenKind]); + fwprintf(trace, L"%5d %s\n", sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { @@ -343,7 +343,7 @@ void Tab::PrintNodes() { Node *p; for (int i=0; iCount; i++) { p = (Node*)((*nodes)[i]); - fwprintf(trace, L"%4d %ls ", p->n, (nTyp[p->typ])); + fwprintf(trace, L"%4d %s ", p->n, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); fwprintf(trace, L"%12s ", paddedName); From 93fc7f9387ac06f402bd491d8a52066b7e8567ae Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 09:28:00 +0200 Subject: [PATCH 03/95] Replace instantiations using 'new' with RAII stack instances --- src/Coco.cpp | 56 +++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Coco.cpp b/src/Coco.cpp index f860e9f..468e818 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -90,57 +90,55 @@ int main(int argc, char *argv_[]) { wchar_t* file = coco_string_create(srcName); wchar_t* srcDir = coco_string_create(srcName, 0, pos+1); - Coco::Scanner *scanner = new Coco::Scanner(file); - Coco::Parser *parser = new Coco::Parser(scanner); + Coco::Scanner scanner(file); + Coco::Parser parser(&scanner); traceFileName = coco_string_create_append(srcDir, L"trace.txt"); chTrFileName = coco_string_create_char(traceFileName); - if ((parser->trace = fopen(chTrFileName, "w")) == NULL) { + if ((parser.trace = fopen(chTrFileName, "w")) == NULL) { wprintf(L"-- could not open %hs\n", chTrFileName); exit(1); } - parser->tab = new Coco::Tab(parser); - parser->dfa = new Coco::DFA(parser); - parser->pgen = new Coco::ParserGen(parser); + Coco::Tab tab(&parser); + tab.srcName = coco_string_create(srcName); + tab.srcDir = coco_string_create(srcDir); + tab.nsName = nsName ? coco_string_create(nsName) : NULL; + tab.frameDir = coco_string_create(frameDir); + tab.outDir = coco_string_create(outDir != NULL ? outDir : srcDir); + tab.emitLines = emitLines; + if (ddtString != NULL) tab.SetDDT(ddtString); + parser.tab = &tab; - parser->tab->srcName = coco_string_create(srcName); - parser->tab->srcDir = coco_string_create(srcDir); - parser->tab->nsName = nsName ? coco_string_create(nsName) : NULL; - parser->tab->frameDir = coco_string_create(frameDir); - parser->tab->outDir = coco_string_create(outDir != NULL ? outDir : srcDir); - parser->tab->emitLines = emitLines; + Coco::DFA dfa(&parser); + parser.dfa = &dfa; + Coco::ParserGen pgen(&parser); + parser.pgen = &pgen; - if (ddtString != NULL) parser->tab->SetDDT(ddtString); + parser.Parse(); - parser->Parse(); - - fclose(parser->trace); + fclose(parser.trace); // obtain the FileSize - parser->trace = fopen(chTrFileName, "r"); - fseek(parser->trace, 0, SEEK_END); - long fileSize = ftell(parser->trace); - fclose(parser->trace); + parser.trace = fopen(chTrFileName, "r"); + fseek(parser.trace, 0, SEEK_END); + long fileSize = ftell(parser.trace); + fclose(parser.trace); if (fileSize == 0) { remove(chTrFileName); } else { wprintf(L"trace output is in %hs\n", chTrFileName); } - wprintf(L"%d errors detected\n", parser->errors->count); - if (parser->errors->count != 0) { + coco_string_delete(file); + coco_string_delete(srcDir); + + wprintf(L"%d errors detected\n", parser.errors->count); + if (parser.errors->count != 0) { exit(1); } - delete parser->pgen; - delete parser->dfa; - delete parser->tab; - delete parser; - delete scanner; - coco_string_delete(file); - coco_string_delete(srcDir); } else { wprintf(L"Usage: Coco Grammar.ATG {Option}\n"); wprintf(L"Options:\n"); From 375e702d1e09a5164f4d8fb0021519e021b4429d Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 09:33:08 +0200 Subject: [PATCH 04/95] Avoid unnecessary string copy/leak in 'Comment' creation --- src/Comment.cpp | 8 +++++--- src/Comment.h | 2 +- src/DFA.cpp | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Comment.cpp b/src/Comment.cpp index 7c91746..88a5865 100644 --- a/src/Comment.cpp +++ b/src/Comment.cpp @@ -31,15 +31,17 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -Comment::Comment(wchar_t* start, wchar_t* stop, bool nested) { - this->start = coco_string_create(start); - this->stop = coco_string_create(stop); +Comment::Comment(wchar_t* start, wchar_t* stop, bool nested, bool needCopy) { + this->start = needCopy ? coco_string_create(start) : start; + this->stop = needCopy ? coco_string_create(stop) : stop; this->nested = nested; + this->next = NULL; } Comment::~Comment() { coco_string_delete(start); coco_string_delete(stop); + delete next; } }; // namespace diff --git a/src/Comment.h b/src/Comment.h index ffc1c82..d3ce323 100644 --- a/src/Comment.h +++ b/src/Comment.h @@ -41,7 +41,7 @@ class Comment // info about comment syntax bool nested; Comment *next; - Comment(wchar_t* start, wchar_t* stop, bool nested); + Comment(wchar_t* start, wchar_t* stop, bool nested, bool needCopy=true); virtual ~Comment(); }; diff --git a/src/DFA.cpp b/src/DFA.cpp index cf4414f..d16c798 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -497,7 +497,7 @@ wchar_t* DFA::CommentStr(Node *p) { void DFA::NewComment(Node *from, Node *to, bool nested) { - Comment *c = new Comment(CommentStr(from), CommentStr(to), nested); + Comment *c = new Comment(CommentStr(from), CommentStr(to), nested, false); c->next = firstComment; firstComment = c; } From 8bcf91868590dc51adb52c33ec8aeba2570e7a9c Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 09:48:44 +0200 Subject: [PATCH 05/95] Replace instatiation with 'new' by RAII, also remove unnecessary string copies for static strings --- src/Coco.atg | 2 +- src/Coco.cpp | 4 +- src/DFA.cpp | 2 +- src/Parser.cpp | 124 +++++++++++++++++++++++----------------------- src/Parser.frame | 20 ++++---- src/Parser.h | 2 +- src/ParserGen.cpp | 10 ++-- src/Tab.cpp | 2 +- 8 files changed, 84 insertions(+), 82 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index b63b9a2..d9279e9 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -178,7 +178,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); - if (errors->count == 0) { + if (errors.count == 0) { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); diff --git a/src/Coco.cpp b/src/Coco.cpp index 468e818..2d2851b 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -134,8 +134,8 @@ int main(int argc, char *argv_[]) { coco_string_delete(file); coco_string_delete(srcDir); - wprintf(L"%d errors detected\n", parser.errors->count); - if (parser.errors->count != 0) { + wprintf(L"%d errors detected\n", parser.errors.count); + if (parser.errors.count != 0) { exit(1); } diff --git a/src/DFA.cpp b/src/DFA.cpp index d16c798..c241fcd 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -852,7 +852,7 @@ void DFA::WriteScanner() { DFA::DFA(Parser *parser) { this->parser = parser; tab = parser->tab; - errors = parser->errors; + errors = &parser->errors; trace = parser->trace; firstState = NULL; lastState = NULL; lastStateNr = -1; firstState = NewState(); diff --git a/src/Parser.cpp b/src/Parser.cpp index ce0f16e..081156d 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -36,12 +36,12 @@ namespace Coco { void Parser::SynErr(int n) { - if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { - if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + if (errDist >= minErrDist) errors.Error(t->line, t->col, msg); errDist = 0; } @@ -209,7 +209,7 @@ void Parser::Coco() { tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); - if (errors->count == 0) { + if (errors.count == 0) { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); @@ -788,7 +788,6 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; - errors = new Errors(); } bool Parser::StartOf(int s) { @@ -826,8 +825,9 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); - delete errors; delete dummyToken; + coco_string_delete(noString); + coco_string_delete(tokenString); } Errors::Errors() { @@ -835,71 +835,71 @@ Errors::Errors() { } void Errors::SynErr(int line, int col, int n) { - wchar_t* s; + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; switch (n) { - case 0: s = coco_string_create(L"EOF expected"); break; - case 1: s = coco_string_create(L"ident expected"); break; - case 2: s = coco_string_create(L"number expected"); break; - case 3: s = coco_string_create(L"string expected"); break; - case 4: s = coco_string_create(L"badString expected"); break; - case 5: s = coco_string_create(L"char expected"); break; - case 6: s = coco_string_create(L"\"COMPILER\" expected"); break; - case 7: s = coco_string_create(L"\"IGNORECASE\" expected"); break; - case 8: s = coco_string_create(L"\"CHARACTERS\" expected"); break; - case 9: s = coco_string_create(L"\"TOKENS\" expected"); break; - case 10: s = coco_string_create(L"\"PRAGMAS\" expected"); break; - case 11: s = coco_string_create(L"\"COMMENTS\" expected"); break; - case 12: s = coco_string_create(L"\"FROM\" expected"); break; - case 13: s = coco_string_create(L"\"TO\" expected"); break; - case 14: s = coco_string_create(L"\"NESTED\" expected"); break; - case 15: s = coco_string_create(L"\"IGNORE\" expected"); break; - case 16: s = coco_string_create(L"\"PRODUCTIONS\" expected"); break; - case 17: s = coco_string_create(L"\"=\" expected"); break; - case 18: s = coco_string_create(L"\".\" expected"); break; - case 19: s = coco_string_create(L"\"END\" expected"); break; - case 20: s = coco_string_create(L"\"+\" expected"); break; - case 21: s = coco_string_create(L"\"-\" expected"); break; - case 22: s = coco_string_create(L"\"..\" expected"); break; - case 23: s = coco_string_create(L"\"ANY\" expected"); break; - case 24: s = coco_string_create(L"\"<\" expected"); break; - case 25: s = coco_string_create(L"\">\" expected"); break; - case 26: s = coco_string_create(L"\"<.\" expected"); break; - case 27: s = coco_string_create(L"\".>\" expected"); break; - case 28: s = coco_string_create(L"\"|\" expected"); break; - case 29: s = coco_string_create(L"\"WEAK\" expected"); break; - case 30: s = coco_string_create(L"\"(\" expected"); break; - case 31: s = coco_string_create(L"\")\" expected"); break; - case 32: s = coco_string_create(L"\"[\" expected"); break; - case 33: s = coco_string_create(L"\"]\" expected"); break; - case 34: s = coco_string_create(L"\"{\" expected"); break; - case 35: s = coco_string_create(L"\"}\" expected"); break; - case 36: s = coco_string_create(L"\"SYNC\" expected"); break; - case 37: s = coco_string_create(L"\"IF\" expected"); break; - case 38: s = coco_string_create(L"\"CONTEXT\" expected"); break; - case 39: s = coco_string_create(L"\"(.\" expected"); break; - case 40: s = coco_string_create(L"\".)\" expected"); break; - case 41: s = coco_string_create(L"??? expected"); break; - case 42: s = coco_string_create(L"this symbol not expected in Coco"); break; - case 43: s = coco_string_create(L"this symbol not expected in TokenDecl"); break; - case 44: s = coco_string_create(L"invalid TokenDecl"); break; - case 45: s = coco_string_create(L"invalid AttrDecl"); break; - case 46: s = coco_string_create(L"invalid SimSet"); break; - case 47: s = coco_string_create(L"invalid Sym"); break; - case 48: s = coco_string_create(L"invalid Term"); break; - case 49: s = coco_string_create(L"invalid Factor"); break; - case 50: s = coco_string_create(L"invalid Attribs"); break; - case 51: s = coco_string_create(L"invalid TokenFactor"); break; + case 0: s = L"EOF expected"; break; + case 1: s = L"ident expected"; break; + case 2: s = L"number expected"; break; + case 3: s = L"string expected"; break; + case 4: s = L"badString expected"; break; + case 5: s = L"char expected"; break; + case 6: s = L"\"COMPILER\" expected"; break; + case 7: s = L"\"IGNORECASE\" expected"; break; + case 8: s = L"\"CHARACTERS\" expected"; break; + case 9: s = L"\"TOKENS\" expected"; break; + case 10: s = L"\"PRAGMAS\" expected"; break; + case 11: s = L"\"COMMENTS\" expected"; break; + case 12: s = L"\"FROM\" expected"; break; + case 13: s = L"\"TO\" expected"; break; + case 14: s = L"\"NESTED\" expected"; break; + case 15: s = L"\"IGNORE\" expected"; break; + case 16: s = L"\"PRODUCTIONS\" expected"; break; + case 17: s = L"\"=\" expected"; break; + case 18: s = L"\".\" expected"; break; + case 19: s = L"\"END\" expected"; break; + case 20: s = L"\"+\" expected"; break; + case 21: s = L"\"-\" expected"; break; + case 22: s = L"\"..\" expected"; break; + case 23: s = L"\"ANY\" expected"; break; + case 24: s = L"\"<\" expected"; break; + case 25: s = L"\">\" expected"; break; + case 26: s = L"\"<.\" expected"; break; + case 27: s = L"\".>\" expected"; break; + case 28: s = L"\"|\" expected"; break; + case 29: s = L"\"WEAK\" expected"; break; + case 30: s = L"\"(\" expected"; break; + case 31: s = L"\")\" expected"; break; + case 32: s = L"\"[\" expected"; break; + case 33: s = L"\"]\" expected"; break; + case 34: s = L"\"{\" expected"; break; + case 35: s = L"\"}\" expected"; break; + case 36: s = L"\"SYNC\" expected"; break; + case 37: s = L"\"IF\" expected"; break; + case 38: s = L"\"CONTEXT\" expected"; break; + case 39: s = L"\"(.\" expected"; break; + case 40: s = L"\".)\" expected"; break; + case 41: s = L"??? expected"; break; + case 42: s = L"this symbol not expected in Coco"; break; + case 43: s = L"this symbol not expected in TokenDecl"; break; + case 44: s = L"invalid TokenDecl"; break; + case 45: s = L"invalid AttrDecl"; break; + case 46: s = L"invalid SimSet"; break; + case 47: s = L"invalid Sym"; break; + case 48: s = L"invalid Term"; break; + case 49: s = L"invalid Factor"; break; + case 50: s = L"invalid Attribs"; break; + case 51: s = L"invalid TokenFactor"; break; default: { - wchar_t format[20]; - coco_swprintf(format, 20, L"error %d", n); - s = coco_string_create(format); + coco_swprintf(format, format_size, L"error %d", n); + s = format; } break; } wprintf(L"-- line %d col %d: %ls\n", line, col, s); - coco_string_delete(s); count++; } diff --git a/src/Parser.frame b/src/Parser.frame index 85bd8b5..a61dd7b 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -70,7 +70,7 @@ private: public: Scanner *scanner; - Errors *errors; + Errors errors; Token *t; // last recognized token Token *la; // lookahead token @@ -106,12 +106,12 @@ Parser.cpp Specification -->namespace_open void Parser::SynErr(int n) { - if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { - if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + if (errDist >= minErrDist) errors.Error(t->line, t->col, msg); errDist = 0; } @@ -266,7 +266,6 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; - errors = new Errors(); } bool Parser::StartOf(int s) { @@ -280,8 +279,9 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); - delete errors; delete dummyToken; + coco_string_delete(noString); + coco_string_delete(tokenString); } Errors::Errors() { @@ -289,19 +289,19 @@ Errors::Errors() { } void Errors::SynErr(int line, int col, int n) { - wchar_t* s; + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; switch (n) { -->errors default: { - wchar_t format[20]; - coco_swprintf(format, 20, L"error %d", n); - s = coco_string_create(format); + coco_swprintf(format, format_size, L"error %d", n); + s = format; } break; } wprintf(L"-- line %d col %d: %ls\n", line, col, s); - coco_string_delete(s); count++; } diff --git a/src/Parser.h b/src/Parser.h index c02102d..3087562 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -80,7 +80,7 @@ class Parser { public: Scanner *scanner; - Errors *errors; + Errors errors; Token *t; // last recognized token Token *la; // lookahead token diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index ee8e938..f0d7e5e 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -120,12 +120,14 @@ void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { errorNr++; const int formatLen = 1000; wchar_t format[formatLen]; - coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = coco_string_create(L\"", errorNr); + coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = L\"", errorNr); coco_string_merge(err, format); if (errTyp == tErr) { if (sym->name[0] == L'"') { - coco_swprintf(format, formatLen, L"%ls expected", tab->Escape(sym->name)); + wchar_t *se = tab->Escape(sym->name); + coco_swprintf(format, formatLen, L"%ls expected", se); coco_string_merge(err, format); + coco_string_delete(se); } else { coco_swprintf(format, formatLen, L"%ls expected", sym->name); coco_string_merge(err, format); @@ -137,7 +139,7 @@ void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { coco_swprintf(format, formatLen, L"this symbol not expected in %ls", sym->name); coco_string_merge(err, format); } - coco_swprintf(format, formatLen, L"\"); break;\n"); + coco_swprintf(format, formatLen, L"\"; break;\n"); coco_string_merge(err, format); } @@ -473,7 +475,7 @@ ParserGen::ParserGen (Parser *parser) { altErr = 1; syncErr = 2; tab = parser->tab; - errors = parser->errors; + errors = &parser->errors; trace = parser->trace; buffer = parser->scanner->buffer; errorNr = -1; diff --git a/src/Tab.cpp b/src/Tab.cpp index b76c6c3..68da52e 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -54,7 +54,7 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; - errors = parser->errors; + errors = &parser->errors; eofSy = NewSym(Node::t, L"EOF", 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); literals = new HashTable(); From 35a18bf7214bdb88b4939a4ee6275b9648d95049 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:08:27 +0200 Subject: [PATCH 06/95] Replace instatiation with 'new' by RAII --- src/ArrayList.cpp | 4 + src/ArrayList.h | 1 + src/Coco.atg | 6 +- src/DFA.cpp | 18 ++--- src/Parser.cpp | 6 +- src/ParserGen.cpp | 77 +++++++++--------- src/ParserGen.h | 2 +- src/Tab.cpp | 200 ++++++++++++++++++++++------------------------ src/Tab.h | 12 +-- 9 files changed, 162 insertions(+), 164 deletions(-) diff --git a/src/ArrayList.cpp b/src/ArrayList.cpp index 0f50ddf..9e94c05 100644 --- a/src/ArrayList.cpp +++ b/src/ArrayList.cpp @@ -41,6 +41,10 @@ ArrayList::~ArrayList() { delete [] Data; } +void ArrayList::Clear() { + Count = 0; +} + void ArrayList::Add(void *value) { if (Count < Capacity) { Data[Count] = value; diff --git a/src/ArrayList.h b/src/ArrayList.h index c91c032..b94401d 100644 --- a/src/ArrayList.h +++ b/src/ArrayList.h @@ -39,6 +39,7 @@ class ArrayList void Add(void *value); void Remove(void *value); + void Clear(); void* operator[](int index); int Count; diff --git a/src/Coco.atg b/src/Coco.atg index d9279e9..5c03625 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -287,9 +287,9 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string - if ((*(tab->literals))[tokenString] != NULL) + if (tab->literals[tokenString] != NULL) SemErr(L"token string declared twice"); - tab->literals->Set(tokenString, sym); + tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } .) @@ -358,7 +358,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; ] Sym (. Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) - sym = (Symbol*)((*(tab->literals))[name]); + sym = (Symbol*)tab->literals[name]; bool undef = (sym == NULL); if (undef) { if (kind == id) diff --git a/src/DFA.cpp b/src/DFA.cpp index c241fcd..d0d773e 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -168,7 +168,7 @@ void DFA::Step(State *from, Node *p, BitArray *stepped) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); if (p->state != from) { - BitArray *newStepped = new BitArray(tab->nodes->Count); + BitArray *newStepped = new BitArray(tab->nodes.Count); Step(p->state, p, newStepped); delete newStepped; } @@ -211,7 +211,7 @@ void DFA::FindTrans (Node *p, bool start, BitArray *marked) { if (p == NULL || (*marked)[p->n]) return; marked->Set(p->n, true); if (start) { - BitArray *stepped = new BitArray(tab->nodes->Count); + BitArray *stepped = new BitArray(tab->nodes.Count); Step(p->state, p, stepped); // start of group of equally numbered nodes delete stepped; } @@ -234,9 +234,9 @@ void DFA::ConvertToStates(Node *p, Symbol *sym) { return; } NumberNodes(curGraph, firstState, true); - FindTrans(curGraph, true, new BitArray(tab->nodes->Count)); + FindTrans(curGraph, true, new BitArray(tab->nodes.Count)); if (p->typ == Node::iter) { - BitArray *stepped = new BitArray(tab->nodes->Count); + BitArray *stepped = new BitArray(tab->nodes.Count); Step(firstState, p, stepped); delete stepped; } @@ -388,7 +388,7 @@ void DFA::PrintStates() { for (Action *action = state->firstAction; action != NULL; action = action->next) { if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); - if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)(*tab->classes)[action->sym])->name); + if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)tab->classes[action->sym])->name); else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, L"%3d", targ->state->nr); @@ -583,7 +583,7 @@ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literal if (('a'<=sym->name[0] && sym->name[0]<='z') || ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0]) - Iterator *iter = tab->literals->GetIterator(); + Iterator *iter = tab->literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); if (e->val == sym) { return e->key; } @@ -596,8 +596,8 @@ void DFA::GenLiterals () { Symbol *sym; ArrayList *ts[2]; - ts[0] = tab->terminals; - ts[1] = tab->pragmas; + ts[0] = &tab->terminals; + ts[1] = &tab->pragmas; for (int i = 0; i < 2; ++i) { for (int j = 0; j < ts[i]->Count; j++) { @@ -791,7 +791,7 @@ void DFA::WriteScanner() { nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(L"-->declarations"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count - 1); + fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals.Count - 1); fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n); WriteStartTab(); GenLiterals(); diff --git a/src/Parser.cpp b/src/Parser.cpp index 081156d..f6dc900 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -266,9 +266,9 @@ void Parser::TokenDecl(int typ) { if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string - if ((*(tab->literals))[tokenString] != NULL) + if (tab->literals[tokenString] != NULL) SemErr(L"token string declared twice"); - tab->literals->Set(tokenString, sym); + tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } @@ -498,7 +498,7 @@ void Parser::Factor(Graph* &g) { Sym(name, kind); Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) - sym = (Symbol*)((*(tab->literals))[name]); + sym = (Symbol*)tab->literals[name]; bool undef = (sym == NULL); if (undef) { if (kind == id) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index f0d7e5e..6fa65bc 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -46,7 +46,7 @@ bool ParserGen::UseSwitch (Node *p) { BitArray *s1, *s2; if (p->typ != Node::alt) return false; int nAlts = 0; - s1 = new BitArray(tab->terminals->Count); + s1 = new BitArray(tab->terminals.Count); while (p != NULL) { s2 = tab->Expected0(p->sub, curSy); // must not optimize with switch statement, if there are ll1 warnings @@ -59,7 +59,7 @@ bool ParserGen::UseSwitch (Node *p) { } return nAlts > 5; } - + int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { if (nsName == NULL || coco_string_length(nsName) == 0) { return 0; @@ -144,10 +144,10 @@ void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { } int ParserGen::NewCondSet (BitArray *s) { - for (int i = 1; i < symSet->Count; i++) // skip symSet[0] (reserved for union of SYNC sets) - if (Sets::Equals(s, (BitArray*)(*symSet)[i])) return i; - symSet->Add(s->Clone()); - return symSet->Count - 1; + for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) + if (Sets::Equals(s, (BitArray*)symSet[i])) return i; + symSet.Add(s->Clone()); + return symSet.Count - 1; } void ParserGen::GenCond (BitArray *s, Node *p) { @@ -157,8 +157,8 @@ void ParserGen::GenCond (BitArray *s, Node *p) { if (n == 0) fwprintf(gen, L"false"); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (int i=0; iterminals.Count; i++) { + sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { fwprintf(gen, L"la->kind == "); WriteSymbolOrCode(gen, sym); @@ -173,8 +173,8 @@ void ParserGen::GenCond (BitArray *s, Node *p) { void ParserGen::PutCaseLabels (BitArray *s) { Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (int i=0; iterminals.Count; i++) { + sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { fwprintf(gen, L"case "); WriteSymbolOrCode(gen, sym); @@ -211,7 +211,7 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { } if (p->typ == Node::any) { Indent(indent); int acc = Sets::Elements(p->set); - if (tab->terminals->Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { + if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here fwprintf(gen, L"Get();\n"); } else { @@ -276,7 +276,7 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { fwprintf(gen, L"WeakSeparator("); WriteSymbolOrCode(gen, p2->sym); fwprintf(gen, L",%d,%d) ", NewCondSet(s1), NewCondSet(s2)); - s1 = new BitArray(tab->terminals->Count); // for inner structure + s1 = new BitArray(tab->terminals.Count); // for inner structure if (p2->up || p2->next == NULL) p2 = NULL; else p2 = p2->next; } else { s1 = tab->First(p2); @@ -308,8 +308,8 @@ void ParserGen::GenTokensHeader() { fwprintf(gen, L"\tenum {\n"); // tokens - for (i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (i=0; iterminals.Count; i++) { + sym = (Symbol*)tab->terminals[i]; if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } @@ -319,11 +319,11 @@ void ParserGen::GenTokensHeader() { } // pragmas - for (i=0; ipragmas->Count; i++) { + for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } else { fwprintf(gen , L",\n"); } - sym = (Symbol*)((*(tab->pragmas))[i]); + sym = (Symbol*)tab->pragmas[i]; fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); } @@ -332,8 +332,8 @@ void ParserGen::GenTokensHeader() { void ParserGen::GenCodePragmas() { Symbol *sym; - for (int i=0; ipragmas->Count; i++) { - sym = (Symbol*)((*(tab->pragmas))[i]); + for (int i=0; ipragmas.Count; i++) { + sym = (Symbol*)tab->pragmas[i]; fwprintf(gen, L"\t\tif (la->kind == "); WriteSymbolOrCode(gen, sym); fwprintf(gen, L") {\n"); @@ -352,8 +352,8 @@ void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { void ParserGen::GenProductionsHeader() { Symbol *sym; - for (int i=0; inonterminals->Count; i++) { - sym = (Symbol*)((*(tab->nonterminals))[i]); + for (int i=0; inonterminals.Count; i++) { + sym = (Symbol*)tab->nonterminals[i]; curSy = sym; fwprintf(gen, L"\tvoid %ls(", sym->name); CopySourcePart(sym->attrPos, 0); @@ -363,33 +363,33 @@ void ParserGen::GenProductionsHeader() { void ParserGen::GenProductions() { Symbol *sym; - for (int i=0; inonterminals->Count; i++) { - sym = (Symbol*)((*(tab->nonterminals))[i]); + for (int i=0; inonterminals.Count; i++) { + sym = (Symbol*)tab->nonterminals[i]; curSy = sym; fwprintf(gen, L"void Parser::%ls(", sym->name); CopySourcePart(sym->attrPos, 0); fwprintf(gen, L") {\n"); CopySourcePart(sym->semPos, 2); - GenCode(sym->graph, 2, new BitArray(tab->terminals->Count)); + GenCode(sym->graph, 2, new BitArray(tab->terminals.Count)); fwprintf(gen, L"}\n"); fwprintf(gen, L"\n"); } } void ParserGen::InitSets() { - fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet->Count, tab->terminals->Count+1); + fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet.Count, tab->terminals.Count+1); - for (int i = 0; i < symSet->Count; i++) { - BitArray *s = (BitArray*)(*symSet)[i]; + for (int i = 0; i < symSet.Count; i++) { + BitArray *s = (BitArray*)symSet[i]; fwprintf(gen, L"\t\t{"); int j = 0; Symbol *sym; - for (int k=0; kterminals->Count; k++) { - sym = (Symbol*)((*(tab->terminals))[k]); + for (int k=0; kterminals.Count; k++) { + sym = (Symbol*)tab->terminals[k]; if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,"); ++j; if (j%4 == 0) fwprintf(gen, L" "); } - if (i == symSet->Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n"); + if (i == symSet.Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n"); } fwprintf(gen, L"\t};\n\n"); } @@ -397,14 +397,14 @@ void ParserGen::InitSets() { void ParserGen::WriteParser () { Generator g = Generator(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart - symSet->Add(tab->allSyncSets); + symSet.Add(tab->allSyncSets); fram = g.OpenFrame(L"Parser.frame"); gen = g.OpenGen(L"Parser.h"); Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (int i=0; iterminals.Count; i++) { + sym = (Symbol*)tab->terminals[i]; GenErrorMsg(tErr, sym); } @@ -446,7 +446,7 @@ void ParserGen::WriteParser () { g.CopyFramePart(L"-->productions"); GenProductions(); g.CopyFramePart(L"-->parseRoot"); fwprintf(gen, L"\t%ls();\n", tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, L"\tExpect(0);"); g.CopyFramePart(L"-->constants"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count-1); + fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals.Count-1); g.CopyFramePart(L"-->initialization"); InitSets(); g.CopyFramePart(L"-->errors"); fwprintf(gen, L"%ls", err); g.CopyFramePart(L"-->namespace_close"); @@ -459,11 +459,11 @@ void ParserGen::WriteParser () { void ParserGen::WriteStatistics () { fwprintf(trace, L"\n"); - fwprintf(trace, L"%d terminals\n", tab->terminals->Count); - fwprintf(trace, L"%d symbols\n", tab->terminals->Count + tab->pragmas->Count + - tab->nonterminals->Count); - fwprintf(trace, L"%d nodes\n", tab->nodes->Count); - fwprintf(trace, L"%d sets\n", symSet->Count); + fwprintf(trace, L"%d terminals\n", tab->terminals.Count); + fwprintf(trace, L"%d symbols\n", tab->terminals.Count + tab->pragmas.Count + + tab->nonterminals.Count); + fwprintf(trace, L"%d nodes\n", tab->nodes.Count); + fwprintf(trace, L"%d sets\n", symSet.Count); } @@ -481,7 +481,6 @@ ParserGen::ParserGen (Parser *parser) { errorNr = -1; usingPos = NULL; - symSet = new ArrayList(); err = NULL; } diff --git a/src/ParserGen.h b/src/ParserGen.h index ac6dd05..1af514c 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -60,7 +60,7 @@ class ParserGen FILE* fram; // parser frame file FILE* gen; // generated parser source file wchar_t* err; // generated parser error messages - ArrayList *symSet; + ArrayList symSet; Tab *tab; // other Coco objects FILE* trace; diff --git a/src/Tab.cpp b/src/Tab.cpp index 68da52e..0baeb63 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -44,12 +44,7 @@ const char* Tab::tKind[] = {"fixedToken", "classToken", "litToken", "classLitTok Tab::Tab(Parser *parser) { for (int i=0; i<10; i++) ddt[i] = false; - terminals = new ArrayList(); - pragmas = new ArrayList(); - nonterminals = new ArrayList(); - nodes = new ArrayList(); dummyNode = NULL; - classes= new ArrayList(); dummyName = 'A'; this->parser = parser; @@ -57,7 +52,6 @@ Tab::Tab(Parser *parser) { errors = &parser->errors; eofSy = NewSym(Node::t, L"EOF", 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); - literals = new HashTable(); checkEOF = true; } @@ -70,11 +64,11 @@ Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { Symbol *sym = new Symbol(typ, name, line); if (typ == Node::t) { - sym->n = terminals->Count; terminals->Add(sym); + sym->n = terminals.Count; terminals.Add(sym); } else if (typ == Node::pr) { - pragmas->Add(sym); + pragmas.Add(sym); } else if (typ == Node::nt) { - sym->n = nonterminals->Count; nonterminals->Add(sym); + sym->n = nonterminals.Count; nonterminals.Add(sym); } return sym; @@ -83,12 +77,12 @@ Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { Symbol* Tab::FindSym(const wchar_t* name) { Symbol *s; int i; - for (i=0; iCount; i++) { - s = (Symbol*)((*terminals)[i]); + for (i=0; iname, name)) return s; } - for (i=0; iCount; i++) { - s = (Symbol*)((*nonterminals)[i]); + for (i=0; iname, name)) return s; } return NULL; @@ -120,16 +114,16 @@ void Tab::PrintSymbolTable() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (i=0; iCount; i++) { - sym = (Symbol*)((*pragmas)[i]); + for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; iGetIterator(); + Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); fwprintf(trace, L"_%ls = %ls.\n", ((Symbol*) (e->val))->name, e->key); @@ -149,8 +143,8 @@ void Tab::PrintSet(BitArray *s, int indent) { int col, len; col = indent; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (int i=0; in]) { len = coco_string_length(sym->name); if (col + len >= 80) { @@ -171,8 +165,8 @@ void Tab::PrintSet(BitArray *s, int indent) { Node* Tab::NewNode(int typ, Symbol *sym, int line) { Node* node = new Node(typ, sym, line); - node->n = nodes->Count; - nodes->Add(node); + node->n = nodes.Count; + nodes.Add(node); return node; } @@ -248,7 +242,7 @@ void Tab::Finish(Graph *g) { } void Tab::DeleteNodes() { - nodes = new ArrayList(); + nodes.Clear(); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); } @@ -341,15 +335,15 @@ void Tab::PrintNodes() { fwprintf(trace, L"----------------------------------------------------\n"); Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); + for (int i=0; in, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { - CharClass *c = (CharClass*)(*classes)[p->val]; + CharClass *c = (CharClass*)classes[p->val]; wchar_t *paddedName = Name(c->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); @@ -388,15 +382,15 @@ CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { } else { c = new CharClass(name, s); } - c->n = classes->Count; - classes->Add(c); + c->n = classes.Count; + classes.Add(c); return c; } CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); + for (int i=0; iname, name)) return c; } return NULL; @@ -404,15 +398,15 @@ CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass* Tab::FindCharClass(CharSet *s) { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); + for (int i=0; iEquals(c->set)) return c; } return NULL; } CharSet* Tab::CharClassSet(int i) { - return ((CharClass*)((*classes)[i]))->set; + return ((CharClass*)classes[i])->set; } //----------- character class printing @@ -447,8 +441,8 @@ void Tab::WriteCharSet(CharSet *s) { void Tab::WriteCharClasses () { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); + for (int i=0; iname, L" "); wchar_t* format = coco_string_create(format2, 0, 10); @@ -469,7 +463,7 @@ void Tab::WriteCharClasses () { /* Computes the first set for the given Node. */ BitArray* Tab::First0(Node *p, BitArray *mark) { - BitArray *fs = new BitArray(terminals->Count); + BitArray *fs = new BitArray(terminals.Count); while (p != NULL && !((*mark)[p->n])) { mark->Set(p->n, true); if (p->typ == Node::nt) { @@ -508,7 +502,7 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { } BitArray* Tab::First(Node *p) { - BitArray *mark = new BitArray(nodes->Count); + BitArray *mark = new BitArray(nodes.Count); BitArray *fs = First0(p, mark); delete mark; if (ddt[3]) { @@ -524,13 +518,13 @@ BitArray* Tab::First(Node *p) { void Tab::CompFirstSets() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - sym->first = new BitArray(terminals->Count); + for (i=0; ifirst = new BitArray(terminals.Count); sym->firstReady = false; } - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; ifirst = First(sym->graph); sym->firstReady = true; } @@ -557,8 +551,8 @@ void Tab::Complete(Symbol *sym) { if (!((*visited)[sym->n])) { visited->Set(sym->n, true); Symbol *s; - for (int i=0; iCount; i++) { - s = (Symbol*)((*nonterminals)[i]); + for (int i=0; ints))[s->n]) { Complete(s); sym->follow->Or(s->follow); @@ -571,22 +565,22 @@ void Tab::Complete(Symbol *sym) { void Tab::CompFollowSets() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - sym->follow = new BitArray(terminals->Count); - sym->nts = new BitArray(nonterminals->Count); + for (i=0; ifollow = new BitArray(terminals.Count); + sym->nts = new BitArray(nonterminals.Count); } gramSy->follow->Set(eofSy->n, true); - visited = new BitArray(nodes->Count); - for (i=0; iCount; i++) { // get direct successors of nonterminals - sym = (Symbol*)((*nonterminals)[i]); + visited = new BitArray(nodes.Count); + for (i=0; igraph); } - for (i=0; iCount; i++) { // add indirect successors to followers - sym = (Symbol*)((*nonterminals)[i]); - visited = new BitArray(nonterminals->Count); + for (i=0; isub); if (a != NULL) Sets::Subtract(a->set, First(p->next)); } else if (p->typ == Node::alt) { - BitArray *s1 = new BitArray(terminals->Count); + BitArray *s1 = new BitArray(terminals.Count); Node *q = p; while (q != NULL) { FindAS(q->sub); @@ -650,8 +644,8 @@ void Tab::FindAS(Node *p) { // find ANY sets void Tab::CompAnySets() { Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } } @@ -665,7 +659,7 @@ BitArray* Tab::Expected(Node *p, Symbol *curSy) { // does not look behind resolvers; only called during LL(1) test and in CheckRes BitArray* Tab::Expected0(Node *p, Symbol *curSy) { - if (p->typ == Node::rslv) return new BitArray(terminals->Count); + if (p->typ == Node::rslv) return new BitArray(terminals.Count); else return Expected(p, curSy); } @@ -686,13 +680,13 @@ void Tab::CompSync(Node *p) { } void Tab::CompSyncSets() { - allSyncSets = new BitArray(terminals->Count); + allSyncSets = new BitArray(terminals.Count); allSyncSets->Set(eofSy->n, true); - visited = new BitArray(nodes->Count); + visited = new BitArray(nodes.Count); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } @@ -700,10 +694,10 @@ void Tab::CompSyncSets() { void Tab::SetupAnys() { Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); + for (int i=0; ityp == Node::any) { - p->set = new BitArray(terminals->Count, true); + p->set = new BitArray(terminals.Count, true); p->set->Set(eofSy->n, false); } } @@ -715,26 +709,26 @@ void Tab::CompDeletableSymbols() { int i; do { changed = false; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; ideletable && sym->graph != NULL && DelGraph(sym->graph)) { sym->deletable = true; changed = true; } } } while (changed); - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; ideletable) wprintf(L" %ls deletable\n", sym->name); } } void Tab::RenumberPragmas() { - int n = terminals->Count; + int n = terminals.Count; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*pragmas)[i]); + for (int i=0; in = n++; } } @@ -751,8 +745,8 @@ void Tab::CompSymbolSets() { fwprintf(trace, L"----------------------\n\n"); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; iname); fwprintf(trace, L"first: "); PrintSet(sym->first, 10); fwprintf(trace, L"follow: "); PrintSet(sym->follow, 10); @@ -765,8 +759,8 @@ void Tab::CompSymbolSets() { fwprintf(trace, L"-----------------\n"); Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); + for (int i=0; ityp == Node::any || p->typ == Node::sync) { fwprintf(trace, L"%4d %4s ", p->n, nTyp[p->typ]); PrintSet(p->set, 11); @@ -906,8 +900,8 @@ bool Tab::NoCircularProductions() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; igraph, singles, sym->graph); // get nonterminals s such that sym-->s Symbol *s; @@ -961,8 +955,8 @@ void Tab::LL1Error(int cond, Symbol *sym) { void Tab::CheckOverlap(BitArray *s1, BitArray *s2, int cond) { Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (int i=0; in] && (*s2)[sym->n]) { LL1Error(cond, sym); } @@ -974,7 +968,7 @@ void Tab::CheckAlts(Node *p) { while (p != NULL) { if (p->typ == Node::alt) { Node *q = p; - s1 = new BitArray(terminals->Count); + s1 = new BitArray(terminals.Count); while (q != NULL) { // for all alternatives s2 = Expected0(q->sub, curSy); CheckOverlap(s1, s2, 1); @@ -1001,8 +995,8 @@ void Tab::CheckAlts(Node *p) { void Tab::CheckLL1() { Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } @@ -1019,10 +1013,10 @@ void Tab::CheckRes(Node *p, bool rslvAllowed) { Node *q; if (p->typ == Node::alt) { - BitArray *expected = new BitArray(terminals->Count); + BitArray *expected = new BitArray(terminals.Count); for (q = p; q != NULL; q = q->down) expected->Or(Expected0(q->sub, curSy)); - BitArray *soFar = new BitArray(terminals->Count); + BitArray *soFar = new BitArray(terminals.Count); for (q = p; q != NULL; q = q->down) { if (q->sub->typ == Node::rslv) { BitArray *fs = Expected(q->sub->next, curSy); @@ -1053,8 +1047,8 @@ void Tab::CheckRes(Node *p, bool rslvAllowed) { } void Tab::CheckResolvers() { - for (int i=0; iCount; i++) { - curSy = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph, false); } } @@ -1065,8 +1059,8 @@ void Tab::CheckResolvers() { bool Tab::NtsComplete() { bool complete = true; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph == NULL) { complete = false; errors->count++; wprintf(L" No production for %ls\n", sym->name); @@ -1093,12 +1087,12 @@ void Tab::MarkReachedNts(Node *p) { bool Tab::AllNtReached() { bool ok = true; - visited = new BitArray(nonterminals->Count); + visited = new BitArray(nonterminals.Count); visited->Set(gramSy->n, true); MarkReachedNts(gramSy->graph); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; in])) { ok = false; errors->count++; wprintf(L" %ls cannot be reached\n", sym->name); @@ -1123,22 +1117,22 @@ bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to t bool Tab::AllNtToTerm() { bool changed, ok = true; - BitArray *mark = new BitArray(nonterminals->Count); + BitArray *mark = new BitArray(nonterminals.Count); // a nonterminal is marked if it can be derived to terminal symbols Symbol *sym; int i; do { changed = false; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; in]) && IsTerm(sym->graph, mark)) { mark->Set(sym->n, true); changed = true; } } } while (changed); - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; in])) { ok = false; errors->count++; wprintf(L" %ls cannot be derived to terminals\n", sym->name); @@ -1156,8 +1150,8 @@ void Tab::XRef() { // collect lines where symbols have been defined Symbol *sym; int i, j; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; iGet(sym)); if (list == NULL) {list = new ArrayList(); xref->Set(sym, list);} int *intg = new int(- sym->line); @@ -1165,8 +1159,8 @@ void Tab::XRef() { } // collect lines where symbols have been referenced Node *n; - for (i=0; iCount; i++) { - n = (Node*)((*nodes)[i]); + for (i=0; ityp == Node::t || n->typ == Node::wt || n->typ == Node::nt) { ArrayList *list = (ArrayList*)(xref->Get(n->sym)); if (list == NULL) {list = new ArrayList(); xref->Set(n->sym, list);} diff --git a/src/Tab.h b/src/Tab.h index bf7e49d..3a0a74b 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -57,7 +57,7 @@ class Tab { Symbol *eofSy; // end of file symbol Symbol *noSym; // used in case of an error BitArray *allSyncSets; // union of all synchronisation sets - HashTable *literals; // symbols that are used as literals + HashTable literals; // symbols that are used as literals wchar_t* srcName; // name of the atg file (including path) wchar_t* srcDir; // directory path of the atg file @@ -76,16 +76,16 @@ class Tab { Errors *errors; - ArrayList *terminals; - ArrayList *pragmas; - ArrayList *nonterminals; + ArrayList terminals; + ArrayList pragmas; + ArrayList nonterminals; - ArrayList *nodes; + ArrayList nodes; static const char* nTyp[]; Node *dummyNode; - ArrayList *classes; + ArrayList classes; int dummyName; From 0712997a67ff19a83fb5acdd915d38e522c5dff1 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:13:29 +0200 Subject: [PATCH 07/95] Replace multiple copies of bitwise expression by a macro --- src/BitArray.cpp | 55 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/src/BitArray.cpp b/src/BitArray.cpp index f578e46..ede7270 100644 --- a/src/BitArray.cpp +++ b/src/BitArray.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -32,20 +32,24 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +#define CALC_BIT_BYTES(b) ((b+7)>>3) + BitArray::BitArray(const int length, const bool defaultValue) { Count = length; - Data = new unsigned char[ (length+7)>>3 ]; + unsigned int size = CALC_BIT_BYTES(length); + Data = new unsigned char[ size ]; if (defaultValue) - memset(Data, 0xFF, (length+7)>>3); + memset(Data, 0xFF, size); else - memset(Data, 0x00, (length+7)>>3); + memset(Data, 0x00, size); } BitArray::BitArray(const BitArray ©) { Count = copy.Count; - Data = new unsigned char[ (copy.Count+7)>>3 ]; - memcpy(Data, copy.Data, (copy.Count+7)>>3); + unsigned int size = CALC_BIT_BYTES(copy.Count); + Data = new unsigned char[ size ]; + memcpy(Data, copy.Data, size); } BitArray::~BitArray() @@ -76,37 +80,41 @@ void BitArray::Set(const int index, const bool value) void BitArray::SetAll(const bool value) { + unsigned int size = CALC_BIT_BYTES(Count); if (value) - memset(Data, 0xFF, (Count+7)>>3); + memset(Data, 0xFF, size); else - memset(Data, 0x00, (Count+7)>>3); + memset(Data, 0x00, size); } void BitArray::Not() { - for (int i=0; i<(Count+7)>>3; i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count); i>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } void BitArray::Or(const BitArray *value) { - for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } void BitArray::Xor(const BitArray *value) { - for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } @@ -115,7 +123,7 @@ BitArray* BitArray::Clone() const { BitArray *newBitArray = new BitArray(Count); newBitArray->Count = Count; - memcpy(newBitArray->Data, Data, (Count+7)>>3); + memcpy(newBitArray->Data, Data, CALC_BIT_BYTES(Count)); return newBitArray; } @@ -147,8 +155,9 @@ const BitArray &BitArray::operator=(const BitArray &right) if ( &right != this ) { // avoid self assignment delete [] Data; // prevents memory leak Count = right.Count; - Data = new unsigned char[ (Count+7)>>3 ]; - memcpy(Data, right.Data, (Count+7)>>3); + unsigned int size = CALC_BIT_BYTES(Count); + Data = new unsigned char[ size ]; + memcpy(Data, right.Data, size); } return *this; // enables cascaded assignments } From 94860db0053d16b0abfb5993c61469d160b283d5 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:23:42 +0200 Subject: [PATCH 08/95] Remove unecessary string copy/delete from Scanner --- src/DFA.cpp | 4 ++-- src/Scanner.cpp | 23 ++++++++++++++++++++++- src/Scanner.frame | 36 +++++++++++++++++++++++++++++++++--- src/Scanner.h | 15 ++++++++++++--- 4 files changed, 69 insertions(+), 9 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index d0d773e..beeb991 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -716,9 +716,9 @@ void DFA::WriteState(State *state) { fwprintf(gen, L"t->kind = %d; ", endOf->n); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, L"wchar_t *literal = coco_string_create_lower(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); + fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, true); break;}\n"); } else { - fwprintf(gen, L"wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); + fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, false); break;}\n"); } } else { fwprintf(gen, L"break;}\n"); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index d58d8f7..520468d 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -175,6 +175,18 @@ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } @@ -189,6 +201,15 @@ unsigned int coco_string_hash(const wchar_t *data) { return h; } +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + // string handling, ascii character wchar_t* coco_string_create(const char* value) { @@ -674,7 +695,7 @@ Token* Scanner::NextToken() { case_1: recEnd = pos; recKind = 1; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} - else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + else {t->kind = 1; t->kind = keywords.get(tval, tlen, t->kind, false); break;} case 2: case_2: recEnd = pos; recKind = 2; diff --git a/src/Scanner.frame b/src/Scanner.frame index 0c36f7b..5f9ecb4 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -82,8 +82,12 @@ int coco_string_indexof(const wchar_t* data, const wchar_t value); int coco_string_lastindexof(const wchar_t* data, const wchar_t value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); +unsigned int coco_string_hash(const wchar_t* data, size_t size); // string handling, ascii character wchar_t* coco_string_create(const char *value); @@ -224,9 +228,14 @@ public: e->next = tab[k]; tab[k] = e; } - int get(const wchar_t *key, int defaultVal) { - Elem *e = tab[coco_string_hash(key) % 128]; - while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; + int get(const wchar_t *key, size_t size, int defaultVal, bool ignoreCase) { + Elem *e = tab[coco_string_hash(key, size) % 128]; + if(ignoreCase) { + while (e != NULL && !coco_string_equal_nocase_n(e->key, key, size)) e = e->next; + } + else { + while (e != NULL && !coco_string_equal_n(e->key, key, size)) e = e->next; + } return e == NULL ? defaultVal : e->val; } }; @@ -445,6 +454,18 @@ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } @@ -459,6 +480,15 @@ unsigned int coco_string_hash(const wchar_t *data) { return h; } +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + // string handling, ascii character wchar_t* coco_string_create(const char* value) { diff --git a/src/Scanner.h b/src/Scanner.h index b183771..09498db 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -78,8 +78,12 @@ int coco_string_indexof(const wchar_t* data, const wchar_t value); int coco_string_lastindexof(const wchar_t* data, const wchar_t value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); +unsigned int coco_string_hash(const wchar_t* data, size_t size); // string handling, ascii character wchar_t* coco_string_create(const char *value); @@ -220,9 +224,14 @@ class KeywordMap { e->next = tab[k]; tab[k] = e; } - int get(const wchar_t *key, int defaultVal) { - Elem *e = tab[coco_string_hash(key) % 128]; - while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; + int get(const wchar_t *key, size_t size, int defaultVal, bool ignoreCase) { + Elem *e = tab[coco_string_hash(key, size) % 128]; + if(ignoreCase) { + while (e != NULL && !coco_string_equal_nocase_n(e->key, key, size)) e = e->next; + } + else { + while (e != NULL && !coco_string_equal_n(e->key, key, size)) e = e->next; + } return e == NULL ? defaultVal : e->val; } }; From 0df8d32bf96ad880c1be3b9513d406c30c84f1c9 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:27:47 +0200 Subject: [PATCH 09/95] Hash table now makes a copy of the key to avoid dangling pointers, also delete iterators after usage --- src/DFA.cpp | 1 + src/HashTable.cpp | 3 ++- src/HashTable.h | 1 + src/Tab.cpp | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index beeb991..dbd74e5 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -588,6 +588,7 @@ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literal DictionaryEntry *e = iter->Next(); if (e->val == sym) { return e->key; } } + delete iter; } return sym->name; } diff --git a/src/HashTable.cpp b/src/HashTable.cpp index da0a92a..08b97d1 100644 --- a/src/HashTable.cpp +++ b/src/HashTable.cpp @@ -45,6 +45,7 @@ HashTable::~HashTable() { while (o != NULL) { Obj *del = o; o = o->next; + coco_string_delete(del->key); delete del; } } @@ -67,7 +68,7 @@ void HashTable::Set(wchar_t *key, void *val) { // new entry int k = coco_string_hash(key) % size; o = new Obj(); - o->key = key; + o->key = coco_string_create(key); o->val = val; o->next = data[k]; data[k] = o; diff --git a/src/HashTable.h b/src/HashTable.h index 487f1b9..c9ba572 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -41,6 +41,7 @@ class DictionaryEntry { class Iterator { public: + virtual ~Iterator() {}; virtual bool HasNext() = 0; virtual DictionaryEntry* Next() = 0; }; diff --git a/src/Tab.cpp b/src/Tab.cpp index 0baeb63..1604ca3 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -136,6 +136,7 @@ void Tab::PrintSymbolTable() { DictionaryEntry *e = iter->Next(); fwprintf(trace, L"_%ls = %ls.\n", ((Symbol*) (e->val))->name, e->key); } + delete iter; fwprintf(trace, L"\n"); } From 8009b1c84aa31666b9e625ef743e0c4fd7f7ade0 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:30:59 +0200 Subject: [PATCH 10/95] add missing cleanup --- src/CharClass.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CharClass.cpp b/src/CharClass.cpp index deca847..eaf11f0 100644 --- a/src/CharClass.cpp +++ b/src/CharClass.cpp @@ -37,6 +37,7 @@ CharClass::CharClass(const wchar_t* name, CharSet *s) { CharClass::~CharClass() { coco_string_delete(name); + delete this->set; } }; // namespace From 2fe0330a74439248540f4e65dc979d49a009e5d2 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:31:38 +0200 Subject: [PATCH 11/95] Replace instantiation with 'new' by RAII --- src/CharSet.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/CharSet.cpp b/src/CharSet.cpp index 04267eb..fa7d2d8 100644 --- a/src/CharSet.cpp +++ b/src/CharSet.cpp @@ -102,33 +102,31 @@ void CharSet::Or(CharSet *s) { } void CharSet::And(CharSet *s) { - CharSet *x = new CharSet(); + CharSet x; Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) - if (s->Get(i)) x->Set(i); + if (s->Get(i)) x.Set(i); Range *del = p; p = p->next; delete del; } - head = x->head; - x->head = NULL; - delete x; + head = x.head; + x.head = NULL; } void CharSet::Subtract(CharSet *s) { - CharSet *x = new CharSet(); + CharSet x; Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) - if (!s->Get(i)) x->Set(i); + if (!s->Get(i)) x.Set(i); Range *del = p; p = p->next; delete del; } - head = x->head; - x->head = NULL; - delete x; + head = x.head; + x.head = NULL; } bool CharSet::Includes(CharSet *s) const { From ed2ecca1a708131c81e20834a8fc2ee3ac173139 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:34:33 +0200 Subject: [PATCH 12/95] Add destructor for cleanup --- src/Generator.cpp | 7 ++++++- src/Generator.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Generator.cpp b/src/Generator.cpp index f742b03..bb6ca63 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -39,6 +39,11 @@ namespace Coco { frameFile = NULL; } + Generator::~Generator() { + coco_string_delete(frameFile); + if(fram) fclose(fram); + } + FILE* Generator::OpenFrame(const wchar_t* frame) { if (coco_string_length(tab->frameDir) != 0) { frameFile = coco_string_create_append(tab->frameDir, L"/"); @@ -179,4 +184,4 @@ namespace Coco { } } -} \ No newline at end of file +} diff --git a/src/Generator.h b/src/Generator.h index cb7e1d1..edab4a6 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -38,6 +38,7 @@ namespace Coco { class Generator { public: Generator(Tab *tab, Errors *errors); + ~Generator(); FILE* OpenFrame(const wchar_t* frame); FILE* OpenGen(const wchar_t *genName); void GenCopyright(); From 13107ccb9b6c8b6215fa24af9141227a5548e4ee Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:36:17 +0200 Subject: [PATCH 13/95] Added cleanup --- src/SortedList.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SortedList.cpp b/src/SortedList.cpp index 656100e..b1673fb 100644 --- a/src/SortedList.cpp +++ b/src/SortedList.cpp @@ -43,6 +43,7 @@ SortedEntry::SortedEntry(Symbol* Key, void* Value) { } SortedEntry::~SortedEntry() { + delete next; }; SortedList::SortedList() { @@ -51,6 +52,7 @@ SortedList::SortedList() { } SortedList::~SortedList() { + delete Data; } bool SortedList::Find(Symbol* key) { From a2cfa190f6951c9cb2c1852fe3c38528a28f3f19 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:37:12 +0200 Subject: [PATCH 14/95] Added cleanup --- src/Symbol.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Symbol.cpp b/src/Symbol.cpp index a340d99..37deeda 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -28,6 +28,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Symbol.h" #include "Scanner.h" +#include "BitArray.h" namespace Coco { @@ -56,6 +57,10 @@ Symbol::Symbol(int typ, const wchar_t* name, int line) { Symbol::~Symbol() { coco_string_delete(name); + delete this->first; + delete this->follow; + delete this->nts; + delete this->semPos; } }; // namespace From 7ee5eda649037596f2a0558f0ce957a365982410 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:40:10 +0200 Subject: [PATCH 15/95] Add cleanup --- src/Target.cpp | 4 ++++ src/Target.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/Target.cpp b/src/Target.cpp index dcbeefe..363dacf 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -38,4 +38,8 @@ Target::Target(State *s) { state = s; } +Target::~Target() { + delete next; +} + }; // namespace diff --git a/src/Target.h b/src/Target.h index c54d4ca..4943e97 100644 --- a/src/Target.h +++ b/src/Target.h @@ -37,6 +37,7 @@ class Target // set of states that are reached by an action { public: Target (State *s); + ~Target(); State *state; // target state Target *next; From d608f8d5c5655c01d670cd5e2b88ebc82d940573 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 10:44:12 +0200 Subject: [PATCH 16/95] Fix several memory leaks --- src/ParserGen.cpp | 29 +++++++++++++++++++++++------ src/ParserGen.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 6fa65bc..0f3e739 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -43,15 +43,16 @@ void ParserGen::Indent (int n) { // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning bool ParserGen::UseSwitch (Node *p) { - BitArray *s1, *s2; + BitArray *s2; if (p->typ != Node::alt) return false; int nAlts = 0; - s1 = new BitArray(tab->terminals.Count); + BitArray s1(tab->terminals.Count); while (p != NULL) { s2 = tab->Expected0(p->sub, curSy); // must not optimize with switch statement, if there are ll1 warnings - if (s1->Overlaps(s2)) { return false; } - s1->Or(s2); + if (s1.Overlaps(s2)) {delete s2; return false; } + s1.Or(s2); + delete s2; ++nAlts; // must not optimize with switch-statement, if alt uses a resolver expression if (p->sub->typ == Node::rslv) return false; @@ -208,6 +209,7 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { fwprintf(gen, L"ExpectWeak("); WriteSymbolOrCode(gen, p->sym); fwprintf(gen, L", %d);\n", NewCondSet(s1)); + delete s1; } if (p->typ == Node::any) { Indent(indent); int acc = Sets::Elements(p->set); @@ -230,9 +232,11 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { s1 = p->set->Clone(); fwprintf(gen, L"while (!("); GenCond(s1, p); fwprintf(gen, L")) {"); fwprintf(gen, L"SynErr(%d); Get();", errorNr); fwprintf(gen, L"}\n"); + delete s1; } if (p->typ == Node::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); + delete s1; bool useSwitch = UseSwitch(p); if (useSwitch) { Indent(indent); fwprintf(gen, L"switch (la->kind) {\n"); } p2 = p; @@ -253,6 +257,7 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { Indent(indent); fwprintf(gen, L"}\n"); } p2 = p2->down; + delete s1; } Indent(indent); if (equal) { @@ -276,6 +281,8 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { fwprintf(gen, L"WeakSeparator("); WriteSymbolOrCode(gen, p2->sym); fwprintf(gen, L",%d,%d) ", NewCondSet(s1), NewCondSet(s2)); + delete s1; + delete s2; s1 = new BitArray(tab->terminals.Count); // for inner structure if (p2->up || p2->next == NULL) p2 = NULL; else p2 = p2->next; } else { @@ -285,12 +292,14 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { fwprintf(gen, L") {\n"); GenCode(p2, indent + 1, s1); Indent(indent); fwprintf(gen, L"}\n"); + delete s1; } if (p->typ == Node::opt) { s1 = tab->First(p->sub); Indent(indent); fwprintf(gen, L"if ("); GenCond(s1, p->sub); fwprintf(gen, L") {\n"); GenCode(p->sub, indent + 1, s1); Indent(indent); fwprintf(gen, L"}\n"); + delete s1; } if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) isChecked->SetAll(false); // = new BitArray(Symbol.terminals.Count); @@ -363,6 +372,7 @@ void ParserGen::GenProductionsHeader() { void ParserGen::GenProductions() { Symbol *sym; + BitArray ba(tab->terminals.Count); for (int i=0; inonterminals.Count; i++) { sym = (Symbol*)tab->nonterminals[i]; curSy = sym; @@ -370,7 +380,8 @@ void ParserGen::GenProductions() { CopySourcePart(sym->attrPos, 0); fwprintf(gen, L") {\n"); CopySourcePart(sym->semPos, 2); - GenCode(sym->graph, 2, new BitArray(tab->terminals.Count)); + ba.SetAll(false); + GenCode(sym->graph, 2, &ba); fwprintf(gen, L"}\n"); fwprintf(gen, L"\n"); } } @@ -395,7 +406,7 @@ void ParserGen::InitSets() { } void ParserGen::WriteParser () { - Generator g = Generator(tab, errors); + Generator g(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart symSet.Add(tab->allSyncSets); @@ -484,4 +495,10 @@ ParserGen::ParserGen (Parser *parser) { err = NULL; } +ParserGen::~ParserGen () { + for(int i=0; i Date: Thu, 3 Jun 2021 10:57:23 +0200 Subject: [PATCH 17/95] Fix several memory leaks --- src/Tab.cpp | 199 +++++++++++++++++++++++++++++++++------------------- src/Tab.h | 6 +- 2 files changed, 128 insertions(+), 77 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 1604ca3..8874f17 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -53,6 +53,27 @@ Tab::Tab(Parser *parser) { eofSy = NewSym(Node::t, L"EOF", 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); checkEOF = true; + visited = allSyncSets = NULL; + srcName = srcDir = nsName = frameDir = outDir = NULL; +} + +Tab::~Tab() { + for(int i=0; in; } -wchar_t* Tab::Pos(Position *pos) { - wchar_t* format = new wchar_t[10]; +typedef wchar_t wchar_t_10[10]; + +static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { coco_swprintf(format, 10, L" "); } else { @@ -336,6 +359,7 @@ void Tab::PrintNodes() { fwprintf(trace, L"----------------------------------------------------\n"); Node *p; + wchar_t_10 format; for (int i=0; in, (nTyp[p->typ])); @@ -352,7 +376,7 @@ void Tab::PrintNodes() { fwprintf(trace, L"%5d ", Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { - fwprintf(trace, L" %5s", Pos(p->pos)); + fwprintf(trace, L" %5s", TabPos(p->pos, format)); } if (p->typ == Node::chr) { fwprintf(trace, L"%5d %5d ", p->val, p->code); } if (p->typ == Node::clas) { @@ -360,7 +384,7 @@ void Tab::PrintNodes() { } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { fwprintf(trace, L"%5d %5d ", Ptr(p->down, false), Ptr(p->sub, false)); } if (p->typ == Node::sem) { - fwprintf(trace, L" %5s", Pos(p->pos)); + fwprintf(trace, L" %5s", TabPos(p->pos, format)); } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { fwprintf(trace, L" "); } @@ -412,8 +436,7 @@ CharSet* Tab::CharClassSet(int i) { //----------- character class printing -wchar_t* Tab::Ch(const wchar_t ch) { - wchar_t* format = new wchar_t[10]; +wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') { coco_swprintf(format, 10, L"%d", ch); return format; @@ -424,18 +447,16 @@ wchar_t* Tab::Ch(const wchar_t ch) { } void Tab::WriteCharSet(CharSet *s) { + wchar_t_10 fmt1, fmt2; for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from < r->to) { - wchar_t *from = Ch(r->from); - wchar_t *to = Ch(r->to); + wchar_t *from = TabCh(r->from, fmt1); + wchar_t *to = TabCh(r->to, fmt2); fwprintf(trace, L"%ls .. %ls ", from, to); - delete [] from; - delete [] to; } else { - wchar_t *from = Ch(r->from); + wchar_t *from = TabCh(r->from, fmt1); fwprintf(trace, L"%ls ", from); - delete [] from; } } } @@ -503,9 +524,8 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { } BitArray* Tab::First(Node *p) { - BitArray *mark = new BitArray(nodes.Count); - BitArray *fs = First0(p, mark); - delete mark; + BitArray mark(nodes.Count); + BitArray *fs = First0(p, &mark); if (ddt[3]) { fwprintf(trace, L"\n"); if (p != NULL) fwprintf(trace, L"First: node = %d\n", p->n ); @@ -521,12 +541,15 @@ void Tab::CompFirstSets() { int i; for (i=0; ifirst; sym->first = new BitArray(terminals.Count); sym->firstReady = false; } for (i=0; ifirst; sym->first = First(sym->graph); + delete saved; sym->firstReady = true; } } @@ -537,6 +560,7 @@ void Tab::CompFollow(Node *p) { if (p->typ == Node::nt) { BitArray *s = First(p->next); p->sym->follow->Or(s); + delete s; if (DelGraph(p->next)) p->sym->nts->Set(curSy->n, true); } else if (p->typ == Node::opt || p->typ == Node::iter) { @@ -572,6 +596,7 @@ void Tab::CompFollowSets() { sym->nts = new BitArray(nonterminals.Count); } gramSy->follow->Set(eofSy->n, true); + delete visited; visited = new BitArray(nodes.Count); for (i=0; ityp == Node::opt || p->typ == Node::iter) { FindAS(p->sub); a = LeadingAny(p->sub); - if (a != NULL) Sets::Subtract(a->set, First(p->next)); + BitArray *ba = First(p->next); + if (a != NULL) Sets::Subtract(a->set, ba); + delete ba; } else if (p->typ == Node::alt) { - BitArray *s1 = new BitArray(terminals.Count); + BitArray s1(terminals.Count); Node *q = p; while (q != NULL) { FindAS(q->sub); a = LeadingAny(q->sub); if (a != NULL) { BitArray *tmp = First(q->down); - tmp->Or(s1); + tmp->Or(&s1); Sets::Subtract(a->set, tmp); + delete tmp; } else { BitArray *f = First(q->sub); - s1->Or(f); + s1.Or(f); delete f; } q = q->down; @@ -634,7 +663,9 @@ void Tab::FindAS(Node *p) { // find ANY sets a = LeadingAny(p->next); if (a != NULL) { Node *q = (p->typ == Node::nt) ? p->sym->graph : p->sub; - Sets::Subtract(a->set, First(q)); + BitArray *ba = First(q); + Sets::Subtract(a->set, ba); + delete ba; } } @@ -683,6 +714,7 @@ void Tab::CompSync(Node *p) { void Tab::CompSyncSets() { allSyncSets = new BitArray(terminals.Count); allSyncSets->Set(eofSy->n, true); + delete visited; visited = new BitArray(nodes.Count); Symbol *sym; @@ -774,9 +806,8 @@ void Tab::CompSymbolSets() { // String handling //--------------------------------------------------------------------- -wchar_t Tab::Hex2Char(const wchar_t* s) { +wchar_t Tab::Hex2Char(const wchar_t* s, int len) { int val = 0; - int len = coco_string_length(s); for (int i = 0; i < len; i++) { wchar_t ch = s[i]; if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0'); @@ -790,15 +821,14 @@ wchar_t Tab::Hex2Char(const wchar_t* s) { return (wchar_t) val; } -wchar_t* Tab::Char2Hex(const wchar_t ch) { - wchar_t* format = new wchar_t[10]; +static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { coco_swprintf(format, 10, L"\\0x%04x", ch); return format; } wchar_t* Tab::Unescape (const wchar_t* s) { /* replaces escape sequences in s by their Unicode values. */ - StringBuilder buf = StringBuilder(); + StringBuilder buf; int i = 0; int len = coco_string_length(s); while (i < len) { @@ -817,9 +847,7 @@ wchar_t* Tab::Unescape (const wchar_t* s) { case L'v': buf.Append(L'\v'); i += 2; break; case L'u': case L'x': if (i + 6 <= coco_string_length(s)) { - wchar_t *subS = coco_string_create(s, i+2, 4); - buf.Append(Hex2Char(subS)); i += 6; break; - coco_string_delete(subS); + buf.Append(Hex2Char(s +i+2, 4)); i += 6; break; } else { parser->SemErr(L"bad escape sequence in string or character"); i = coco_string_length(s); break; @@ -839,9 +867,10 @@ wchar_t* Tab::Unescape (const wchar_t* s) { wchar_t* Tab::Escape (const wchar_t* s) { - StringBuilder buf = StringBuilder(); + StringBuilder buf; wchar_t ch; int len = coco_string_length(s); + wchar_t_10 fmt; for (int i=0; i < len; i++) { ch = s[i]; switch(ch) { @@ -853,9 +882,8 @@ wchar_t* Tab::Escape (const wchar_t* s) { case L'\n': buf.Append(L"\\n"); break; default: if ((ch < L' ') || (ch > 0x7f)) { - wchar_t* res = Char2Hex(ch); + wchar_t* res = TabChar2Hex(ch, fmt); buf.Append(res); - delete [] res; } else buf.Append(ch); break; @@ -896,46 +924,48 @@ void Tab::GetSingles(Node *p, ArrayList *singles, Node *rule) { bool Tab::NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; - ArrayList *list = new ArrayList(); + ArrayList list; Symbol *sym; int i; for (i=0; igraph, singles, sym->graph); // get nonterminals s such that sym-->s + ArrayList singles; + GetSingles(sym->graph, &singles, sym->graph); // get nonterminals s such that sym-->s Symbol *s; - for (int j=0; jCount; j++) { - s = (Symbol*)((*singles)[j]); - list->Add(new CNode(sym, s)); + for (int j=0; jCount; i++) { - n = (CNode*)(*list)[i]; + for (i = 0; i < list.Count; i++) { + n = (CNode*)list[i]; onLeftSide = false; onRightSide = false; CNode *m; - for (int j=0; jCount; j++) { - m = (CNode*)((*list)[j]); + for (int j=0; jleft == m->right) onRightSide = true; if (n->right == m->left) onLeftSide = true; } if (!onLeftSide || !onRightSide) { - list->Remove(n); i--; changed = true; + delete n; + list.Remove(n); i--; changed = true; } } } while(changed); ok = true; - for (i=0; iCount; i++) { - n = (CNode*)((*list)[i]); + for (i=0; icount++; wprintf(L" %ls --> %ls", n->left->name, n->right->name); } + for(int i=0; ityp == Node::alt) { Node *q = p; - s1 = new BitArray(terminals.Count); + s0.SetAll(false); while (q != NULL) { // for all alternatives s2 = Expected0(q->sub, curSy); - CheckOverlap(s1, s2, 1); - s1->Or(s2); + CheckOverlap(&s0, s2, 1); + s0.Or(s2); + delete s2; CheckAlts(q->sub); q = q->down; } @@ -983,6 +1014,7 @@ void Tab::CheckAlts(Node *p) { s1 = Expected0(p->sub, curSy); s2 = Expected(p->next, curSy); CheckOverlap(s1, s2, 2); + delete s1; delete s2; } CheckAlts(p->sub); } else if (p->typ == Node::any) { @@ -1010,29 +1042,40 @@ void Tab::ResErr(Node *p, const wchar_t* msg) { } void Tab::CheckRes(Node *p, bool rslvAllowed) { + BitArray expected(terminals.Count), soFar(terminals.Count); while (p != NULL) { Node *q; if (p->typ == Node::alt) { - BitArray *expected = new BitArray(terminals.Count); - for (q = p; q != NULL; q = q->down) - expected->Or(Expected0(q->sub, curSy)); - BitArray *soFar = new BitArray(terminals.Count); + expected.SetAll(false); + for (q = p; q != NULL; q = q->down) { + BitArray *ba = Expected0(q->sub, curSy); + expected.Or(ba); + delete ba; + } + soFar.SetAll(false); for (q = p; q != NULL; q = q->down) { if (q->sub->typ == Node::rslv) { BitArray *fs = Expected(q->sub->next, curSy); - if (Sets::Intersect(fs, soFar)) + if (Sets::Intersect(fs, &soFar)) ResErr(q->sub, L"Warning: Resolver will never be evaluated. Place it at previous conflicting alternative."); - if (!Sets::Intersect(fs, expected)) + if (!Sets::Intersect(fs, &expected)) ResErr(q->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); - } else soFar->Or(Expected(q->sub, curSy)); + delete fs; + } else { + BitArray *ba = Expected(q->sub, curSy); + soFar.Or(ba); + delete ba; + } CheckRes(q->sub, true); } } else if (p->typ == Node::iter || p->typ == Node::opt) { if (p->sub->typ == Node::rslv) { BitArray *fs = First(p->sub->next); BitArray *fsNext = Expected(p->next, curSy); - if (!Sets::Intersect(fs, fsNext)) + bool bsi = Sets::Intersect(fs, fsNext); + delete fs; delete fsNext; + if (!bsi) ResErr(p->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); } CheckRes(p->sub, true); @@ -1088,6 +1131,7 @@ void Tab::MarkReachedNts(Node *p) { bool Tab::AllNtReached() { bool ok = true; + delete visited; visited = new BitArray(nonterminals.Count); visited->Set(gramSy->n, true); MarkReachedNts(gramSy->graph); @@ -1118,7 +1162,7 @@ bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to t bool Tab::AllNtToTerm() { bool changed, ok = true; - BitArray *mark = new BitArray(nonterminals.Count); + BitArray mark(nonterminals.Count); // a nonterminal is marked if it can be derived to terminal symbols Symbol *sym; int i; @@ -1127,14 +1171,14 @@ bool Tab::AllNtToTerm() { for (i=0; in]) && IsTerm(sym->graph, mark)) { - mark->Set(sym->n, true); changed = true; + if (!mark[sym->n] && IsTerm(sym->graph, &mark)) { + mark.Set(sym->n, true); changed = true; } } } while (changed); for (i=0; in])) { + if (!mark[sym->n]) { ok = false; errors->count++; wprintf(L" %ls cannot be derived to terminals\n", sym->name); } @@ -1147,26 +1191,24 @@ bool Tab::AllNtToTerm() { //--------------------------------------------------------------------- void Tab::XRef() { - SortedList *xref = new SortedList(); + SortedList xref; // collect lines where symbols have been defined Symbol *sym; int i, j; for (i=0; iGet(sym)); - if (list == NULL) {list = new ArrayList(); xref->Set(sym, list);} - int *intg = new int(- sym->line); - list->Add(intg); + ArrayList *list = (ArrayList*)(xref.Get(sym)); + if (list == NULL) {list = new ArrayList(); xref.Set(sym, list);} + list->Add((void*)(ssize_t)(-sym->line)); } // collect lines where symbols have been referenced Node *n; for (i=0; ityp == Node::t || n->typ == Node::wt || n->typ == Node::nt) { - ArrayList *list = (ArrayList*)(xref->Get(n->sym)); - if (list == NULL) {list = new ArrayList(); xref->Set(n->sym, list);} - int *intg = new int(n->line); - list->Add(intg); + ArrayList *list = (ArrayList*)(xref.Get(n->sym)); + if (list == NULL) {list = new ArrayList(); xref.Set(n->sym, list);} + list->Add((void*)(ssize_t)n->line); } } // print cross reference list @@ -1174,16 +1216,16 @@ void Tab::XRef() { fwprintf(trace, L"Cross reference list:\n"); fwprintf(trace, L"--------------------\n\n"); - for (i=0; iCount; i++) { - sym = (Symbol*)(xref->GetKey(i)); + for (i=0; iname); fwprintf(trace, L" %12ls", paddedName); coco_string_delete(paddedName); - ArrayList *list = (ArrayList*)(xref->Get(sym)); + ArrayList *list = (ArrayList*)(xref.Get(sym)); int col = 14; int line; for (j=0; jCount; j++) { - line = *(int*)((*list)[j]); + line = (int)(ssize_t)((*list)[j]); if (col + 5 > 80) { fwprintf(trace, L"\n"); for (col = 1; col <= 14; col++) fwprintf(trace, L" "); @@ -1193,6 +1235,17 @@ void Tab::XRef() { fwprintf(trace, L"\n"); } fwprintf(trace, L"\n\n"); + for(int i=0; i < xref.Count; ++i) { + SortedEntry *se = xref[i]; + /* + while(se->next) { + SortedEntry *tmp = se->next; + delete (ArrayList*)tmp->Value; + se->next = tmp; + } + */ + delete (ArrayList*)se->Value; + } } void Tab::SetDDT(const wchar_t* s) { diff --git a/src/Tab.h b/src/Tab.h index 3a0a74b..4ee6816 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -90,6 +90,7 @@ class Tab { Tab(Parser *parser); + ~Tab(); //--------------------------------------------------------------------- // Symbol list management @@ -131,7 +132,6 @@ class Tab { //----------------- graph printing ---------------------- int Ptr(Node *p, bool up); - wchar_t* Pos(Position *pos); wchar_t* Name(const wchar_t* name); void PrintNodes(); @@ -146,7 +146,6 @@ class Tab { //----------- character class printing - wchar_t* Ch(const wchar_t ch); void WriteCharSet(CharSet *s); void WriteCharClasses (); @@ -178,8 +177,7 @@ class Tab { // String handling //--------------------------------------------------------------------- - wchar_t Hex2Char(const wchar_t* s); - wchar_t* Char2Hex(const wchar_t ch); + wchar_t Hex2Char(const wchar_t* s, int len); wchar_t* Unescape(const wchar_t* s); wchar_t* Escape(const wchar_t* s); From fa03b867399cb66f825950823c5217df4ff5e14f Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:00:54 +0200 Subject: [PATCH 18/95] Fix memory leaks, and change function 'DetachAction' to return an indication that it has detached one or more actions --- src/State.cpp | 9 ++++++++- src/State.h | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/State.cpp b/src/State.cpp index 1f7eeb6..d80b610 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -38,6 +38,11 @@ State::State() { this->next = NULL; } +State::~State() { + delete firstAction; + delete next; +} + void State::AddAction(Action *act) { Action *lasta = NULL, *a = firstAction; while (a != NULL && act->typ >= a->typ) {lasta = a; a = a->next;} @@ -51,7 +56,7 @@ void State::AddAction(Action *act) { } } -void State::DetachAction(Action *act) { +bool State::DetachAction(Action *act) { Action *lasta = NULL, *a = firstAction; while (a != NULL && a != act) {lasta = a; a = a->next;} if (a != NULL) { @@ -61,7 +66,9 @@ void State::DetachAction(Action *act) { else { lasta->next = a->next; } + return true; } + return false; } diff --git a/src/State.h b/src/State.h index 318d77a..23e994d 100644 --- a/src/State.h +++ b/src/State.h @@ -45,8 +45,9 @@ class State // state of finite automaton State *next; State(); + ~State(); void AddAction(Action *act); - void DetachAction(Action *act); + bool DetachAction(Action *act); void MeltWith(State *s); }; From 8f68e61d06e30e676dac3e7dd635443a8adf2360 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:02:55 +0200 Subject: [PATCH 19/95] Fix memory leaks --- src/Node.cpp | 7 ++++++- src/Node.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Node.cpp b/src/Node.cpp index 4fd8adb..591576a 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -27,6 +27,7 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Node.h" +#include "BitArray.h" namespace Coco { @@ -61,9 +62,13 @@ Node::Node(int typ, Symbol *sym, int line) { this->set = NULL; this->pos = NULL; this->state = NULL; - this->state = 0; this->typ = typ; this->sym = sym; this->line = line; } +Node::~Node() { + delete pos; + delete set; +} + }; // namespace diff --git a/src/Node.h b/src/Node.h index 882da38..f97f4d3 100644 --- a/src/Node.h +++ b/src/Node.h @@ -79,6 +79,7 @@ class Node { // (only used in DFA.ConvertToStates) Node(int typ, Symbol *sym, int line); + ~Node(); }; }; // namespace From 2b935283d3b61f29331b6e3bd257da2f0e88e57d Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:05:44 +0200 Subject: [PATCH 20/95] Fix several memory leaks --- src/Coco.atg | 42 ++++++++++++++++++++++++++---------------- src/Parser.cpp | 42 ++++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 5c03625..ca24a38 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -131,9 +131,9 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) - ] (. dfa->NewComment(g1->l, g2->l, nested); .) + ] (. dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; .) } - { "IGNORE" Set (. tab->ignored->Or(s); .) + { "IGNORE" Set (. tab->ignored->Or(s); delete s; .) } SYNC @@ -160,6 +160,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra '=' Expression (. sym->graph = g->l; tab->Finish(g); + delete g; .) WEAK '.' @@ -167,6 +168,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra "END" ident (. if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); + coco_string_delete(gramName); if (tab->gramSy == NULL) SemErr(L"missing production for grammar name"); else { @@ -209,6 +211,7 @@ SetDecl (. CharSet *s; .) .) '=' Set (. if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); + coco_string_delete(name); .) '.' . @@ -218,8 +221,8 @@ SetDecl (. CharSet *s; .) Set (. CharSet *s2; .) = SimSet - { '+' SimSet (. s->Or(s2); .) - | '-' SimSet (. s->Subtract(s2); .) + { '+' SimSet (. s->Or(s2); delete s2; .) + | '-' SimSet (. s->Subtract(s2); delete s2; .) } . @@ -248,7 +251,7 @@ SimSet (. int n1, n2; .) | Char (. s->Set(n1); .) [ ".." Char (. for (int i = n1; i <= n2; i++) s->Set(i); .) ] -| "ANY" (. s = new CharSet(); s->Fill(); .) +| "ANY" (. delete s; s = new CharSet(); s->Fill(); .) ) . @@ -279,7 +282,8 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; sym = tab->NewSym(typ, name, t->line); sym->tokenKind = Symbol::fixedToken; } - tokenString = NULL; + coco_string_delete(name); + coco_string_delete(tokenString); .) SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr(L"a literal must not be declared with a structure"); @@ -292,6 +296,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } + delete g; .) | (. if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); @@ -327,7 +332,7 @@ Expression (. Graph *g2; .) { WEAK '|' Term (. if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; .) } . @@ -339,9 +344,9 @@ Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) ( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] - Factor (. if (rslv != NULL) tab->MakeSequence(g, g2); + Factor (. if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; .) - { Factor (. tab->MakeSequence(g, g2); .) + { Factor (. tab->MakeSequence(g, g2); delete g2; .) } | (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) ) (. if (g == NULL) // invalid start of Term @@ -371,6 +376,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; sym = tab->eofSy; // dummy } } + coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) SemErr(L"this symbol kind is not allowed in a production"); @@ -425,7 +431,7 @@ TokenExpr (. Graph *g2; .) { WEAK '|' TokenTerm (. if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; .) } . @@ -435,11 +441,11 @@ TokenExpr (. Graph *g2; .) TokenTerm (. Graph *g2; .) = TokenFactor - { TokenFactor (. tab->MakeSequence(g, g2); .) + { TokenFactor (. tab->MakeSequence(g, g2); delete g2; .) } [ "CONTEXT" '(' TokenExpr (. tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; - tab->MakeSequence(g, g2); .) + tab->MakeSequence(g, g2); delete g2; .) ')' ] . @@ -457,16 +463,20 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; g = new Graph(p); - tokenString = coco_string_create(noString); + coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); - else tokenString = coco_string_create(noString); + else { + coco_string_delete(tokenString); + tokenString = coco_string_create(noString); + } } + coco_string_delete(name); .) | '(' TokenExpr ')' -| '[' TokenExpr ']' (. tab->MakeOption(g); tokenString = coco_string_create(noString); .) -| '{' TokenExpr '}' (. tab->MakeIteration(g); tokenString = coco_string_create(noString); .) +| '[' TokenExpr ']' (. tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) +| '{' TokenExpr '}' (. tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . diff --git a/src/Parser.cpp b/src/Parser.cpp index f6dc900..a45602b 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -150,12 +150,12 @@ void Parser::Coco() { Get(); nested = true; } - dfa->NewComment(g1->l, g2->l, nested); + dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; } while (la->kind == 15 /* "IGNORE" */) { Get(); Set(s); - tab->ignored->Or(s); + tab->ignored->Or(s); delete s; } while (!(la->kind == _EOF || la->kind == 16 /* "PRODUCTIONS" */)) {SynErr(42); Get();} Expect(16 /* "PRODUCTIONS" */); @@ -190,6 +190,7 @@ void Parser::Coco() { Expression(g); sym->graph = g->l; tab->Finish(g); + delete g; ExpectWeak(18 /* "." */, 4); } @@ -198,6 +199,7 @@ void Parser::Coco() { if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); + coco_string_delete(gramName); if (tab->gramSy == NULL) SemErr(L"missing production for grammar name"); else { @@ -241,6 +243,7 @@ void Parser::SetDecl() { Set(s); if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); + coco_string_delete(name); Expect(18 /* "." */); } @@ -254,7 +257,8 @@ void Parser::TokenDecl(int typ) { sym = tab->NewSym(typ, name, t->line); sym->tokenKind = Symbol::fixedToken; } - tokenString = NULL; + coco_string_delete(name); + coco_string_delete(tokenString); while (!(StartOf(5))) {SynErr(43); Get();} if (la->kind == 17 /* "=" */) { @@ -271,6 +275,7 @@ void Parser::TokenDecl(int typ) { tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } + delete g; } else if (StartOf(6)) { if (kind == id) genScanner = false; @@ -290,7 +295,7 @@ void Parser::TokenExpr(Graph* &g) { while (WeakSeparator(28 /* "|" */,8,7) ) { TokenTerm(g2); if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; } } @@ -302,11 +307,11 @@ void Parser::Set(CharSet* &s) { if (la->kind == 20 /* "+" */) { Get(); SimSet(s2); - s->Or(s2); + s->Or(s2); delete s2; } else { Get(); SimSet(s2); - s->Subtract(s2); + s->Subtract(s2); delete s2; } } } @@ -368,7 +373,7 @@ void Parser::Expression(Graph* &g) { while (WeakSeparator(28 /* "|" */,16,15) ) { Term(g2); if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; } } @@ -407,7 +412,7 @@ void Parser::SimSet(CharSet* &s) { } } else if (la->kind == 23 /* "ANY" */) { Get(); - s = new CharSet(); s->Fill(); + delete s; s = new CharSet(); s->Fill(); } else SynErr(46); } @@ -464,11 +469,11 @@ void Parser::Term(Graph* &g) { g = new Graph(rslv); } Factor(g2); - if (rslv != NULL) tab->MakeSequence(g, g2); + if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; while (StartOf(18)) { Factor(g2); - tab->MakeSequence(g, g2); + tab->MakeSequence(g, g2); delete g2; } } else if (StartOf(19)) { g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); @@ -511,6 +516,7 @@ void Parser::Factor(Graph* &g) { sym = tab->eofSy; // dummy } } + coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) SemErr(L"this symbol kind is not allowed in a production"); @@ -628,14 +634,14 @@ void Parser::TokenTerm(Graph* &g) { TokenFactor(g); while (StartOf(8)) { TokenFactor(g2); - tab->MakeSequence(g, g2); + tab->MakeSequence(g, g2); delete g2; } if (la->kind == 38 /* "CONTEXT" */) { Get(); Expect(30 /* "(" */); TokenExpr(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; - tab->MakeSequence(g, g2); + tab->MakeSequence(g, g2); delete g2; Expect(31 /* ")" */); } } @@ -653,12 +659,16 @@ void Parser::TokenFactor(Graph* &g) { } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; g = new Graph(p); - tokenString = coco_string_create(noString); + coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); - else tokenString = coco_string_create(noString); + else { + coco_string_delete(tokenString); + tokenString = coco_string_create(noString); + } } + coco_string_delete(name); } else if (la->kind == 30 /* "(" */) { Get(); @@ -668,12 +678,12 @@ void Parser::TokenFactor(Graph* &g) { Get(); TokenExpr(g); Expect(33 /* "]" */); - tab->MakeOption(g); tokenString = coco_string_create(noString); + tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else if (la->kind == 34 /* "{" */) { Get(); TokenExpr(g); Expect(35 /* "}" */); - tab->MakeIteration(g); tokenString = coco_string_create(noString); + tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else SynErr(51); if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); From 839e7deba056acff053f396e726dae4793d2c53d Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:07:33 +0200 Subject: [PATCH 21/95] Fix memory leaks --- src/Melted.cpp | 8 +++++++- src/Melted.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Melted.cpp b/src/Melted.cpp index f63c9f0..7e71f93 100644 --- a/src/Melted.cpp +++ b/src/Melted.cpp @@ -27,13 +27,19 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Melted.h" +#include "BitArray.h" namespace Coco { class BitArray; Melted::Melted(BitArray *set, State *state) { - this->set = set; this->state = state; + this->set = set; this->state = state; this->next = NULL; +} + +Melted::~Melted() { + delete set; + delete next; } }; // namespace diff --git a/src/Melted.h b/src/Melted.h index 960faa2..a5bbfe0 100644 --- a/src/Melted.h +++ b/src/Melted.h @@ -44,6 +44,7 @@ class Melted // info about melted states Melted *next; Melted(BitArray *set, State *state); + ~Melted(); }; }; // namespace From 170138fa30d24d2d444da8e21d1841948e4b939e Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:13:41 +0200 Subject: [PATCH 22/95] Fix several memory leaks --- src/Action.cpp | 14 ++++++++++++-- src/Action.h | 3 ++- src/DFA.cpp | 15 +++++++++------ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Action.cpp b/src/Action.cpp index d6857b3..9c44edd 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -39,6 +39,11 @@ Action::Action(int typ, int sym, int tc) { this->typ = typ; this->sym = sym; this->tc = tc; } +Action::~Action() { + delete this->target; + delete this->next; +} + void Action::AddTarget(Target *t) { // add t to the action.targets Target *last = NULL; Target *p = target; @@ -68,14 +73,19 @@ CharSet* Action::Symbols(Tab *tab) { return s; } -void Action::ShiftWith(CharSet *s, Tab *tab) { +bool Action::ShiftWith(CharSet *s, Tab *tab) { //return true if it used the CharSet *s + bool rc = false; if (s->Elements() == 1) { typ = Node::chr; sym = s->First(); } else { CharClass *c = tab->FindCharClass(s); - if (c == NULL) c = tab->NewCharClass(L"#", s); // class with dummy name + if (c == NULL) { + c = tab->NewCharClass(L"#", s); // class with dummy name + rc = true; + } typ = Node::clas; sym = c->n; } + return rc; } }; // namespace diff --git a/src/Action.h b/src/Action.h index 4148b63..85ec4ca 100644 --- a/src/Action.h +++ b/src/Action.h @@ -47,10 +47,11 @@ class Action // action of finite automaton Action *next; Action(int typ, int sym, int tc); + ~Action(); void AddTarget(Target *t); // add t to the action.targets void AddTargets(Action *a); // add copy of a.targets to action.targets CharSet* Symbols(Tab *tab); - void ShiftWith(CharSet *s, Tab *tab); + bool ShiftWith(CharSet *s, Tab *tab); //return true if it used the CharSet *s }; }; // namespace diff --git a/src/DFA.cpp b/src/DFA.cpp index dbd74e5..a8cb802 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -104,8 +104,9 @@ void DFA::CombineShifts() { if (a->target->state == b->target->state && a->tc == b->tc) { seta = a->Symbols(tab); setb = b->Symbols(tab); seta->Or(setb); - a->ShiftWith(seta, tab); + if(!a->ShiftWith(seta, tab)) delete seta; c = b; b = b->next; state->DetachAction(c); + delete setb; } else b = b->next; } } @@ -289,23 +290,25 @@ void DFA::SplitActions(State *state, Action *a, Action *b) { } else if (seta->Includes(setb)) { setc = seta->Clone(); setc->Subtract(setb); b->AddTargets(a); - a->ShiftWith(setc, tab); + if(!a->ShiftWith(setc, tab)) delete setc; } else if (setb->Includes(seta)) { setc = setb->Clone(); setc->Subtract(seta); a->AddTargets(b); - b->ShiftWith(setc, tab); + if(!b->ShiftWith(setc, tab)) delete setc; } else { setc = seta->Clone(); setc->And(setb); seta->Subtract(setc); setb->Subtract(setc); - a->ShiftWith(seta, tab); - b->ShiftWith(setb, tab); + if(!a->ShiftWith(seta, tab)) delete seta; + if(!b->ShiftWith(setb, tab)) delete setb; c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith c->AddTargets(a); c->AddTargets(b); - c->ShiftWith(setc, tab); + if(!c->ShiftWith(setc, tab)) delete setc; state->AddAction(c); + return; //don't need to delete anything } + delete seta; delete setb; } bool DFA::Overlap(Action *a, Action *b) { From dd477f2278369092dd25ba157f9586241001223e Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 11:21:44 +0200 Subject: [PATCH 23/95] Cleanup and fix several memory leaks --- src/DFA.cpp | 101 ++++++++++++++++++++++++++-------------------------- src/DFA.h | 3 +- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index a8cb802..1e2ca34 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -38,9 +38,11 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +typedef wchar_t wchar_t_10[10]; +typedef wchar_t wchar_t_20[20]; + //---------- Output primitives -wchar_t* DFA::Ch(wchar_t ch) { - wchar_t* format = new wchar_t[10]; +static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') coco_swprintf(format, 10, L"%d\0", (int) ch); else @@ -48,29 +50,26 @@ wchar_t* DFA::Ch(wchar_t ch) { return format; } -wchar_t* DFA::ChCond(wchar_t ch) { - wchar_t* format = new wchar_t[20]; - wchar_t* res = Ch(ch); +static wchar_t* DFAChCond(wchar_t ch, wchar_t_20 &format) { + wchar_t_10 fmt; + wchar_t* res = DFACh(ch, fmt); coco_swprintf(format, 20, L"ch == %ls\0", res); - delete [] res; return format; } void DFA::PutRange(CharSet *s) { + wchar_t_10 fmt1, fmt2; for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { - wchar_t *from = Ch((wchar_t) r->from); + wchar_t *from = DFACh((wchar_t) r->from, fmt1); fwprintf(gen, L"ch == %ls", from); - delete [] from; } else if (r->from == 0) { - wchar_t *to = Ch((wchar_t) r->to); + wchar_t *to = DFACh((wchar_t) r->to, fmt1); fwprintf(gen, L"ch <= %ls", to); - delete [] to; } else { - wchar_t *from = Ch((wchar_t) r->from); - wchar_t *to = Ch((wchar_t) r->to); + wchar_t *from = DFACh((wchar_t) r->from, fmt1); + wchar_t *to = DFACh((wchar_t) r->to, fmt2); fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to); - delete [] from; delete [] to; } if (r->next != NULL) fwprintf(gen, L" || "); } @@ -122,29 +121,28 @@ void DFA::FindUsedStates(State *state, BitArray *used) { void DFA::DeleteRedundantStates() { //State *newState = new State[State::lastNr + 1]; State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1)); - BitArray *used = new BitArray(lastStateNr + 1); - FindUsedStates(firstState, used); + BitArray used(lastStateNr + 1); + FindUsedStates(firstState, &used); // combine equal final states for (State *s1 = firstState->next; s1 != NULL; s1 = s1->next) // firstState cannot be final - if ((*used)[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx)) + if (used[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx)) for (State *s2 = s1->next; s2 != NULL; s2 = s2->next) - if ((*used)[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) { - used->Set(s2->nr, false); newState[s2->nr] = s1; + if (used[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) { + used.Set(s2->nr, false); newState[s2->nr] = s1; } State *state; for (state = firstState; state != NULL; state = state->next) - if ((*used)[state->nr]) + if (used[state->nr]) for (Action *a = state->firstAction; a != NULL; a = a->next) - if (!((*used)[a->target->state->nr])) + if (!(used[a->target->state->nr])) a->target->state = newState[a->target->state->nr]; // delete unused states lastState = firstState; lastStateNr = 0; // firstState has number 0 for (state = firstState->next; state != NULL; state = state->next) - if ((*used)[state->nr]) {state->nr = ++lastStateNr; lastState = state;} + if (used[state->nr]) {state->nr = ++lastStateNr; lastState = state;} else lastState->next = state->next; free (newState); - delete used; } State* DFA::TheState(Node *p) { @@ -169,9 +167,8 @@ void DFA::Step(State *from, Node *p, BitArray *stepped) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); if (p->state != from) { - BitArray *newStepped = new BitArray(tab->nodes.Count); - Step(p->state, p, newStepped); - delete newStepped; + BitArray newStepped(tab->nodes.Count); + Step(p->state, p, &newStepped); } } else if (p->typ == Node::opt) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); @@ -212,9 +209,8 @@ void DFA::FindTrans (Node *p, bool start, BitArray *marked) { if (p == NULL || (*marked)[p->n]) return; marked->Set(p->n, true); if (start) { - BitArray *stepped = new BitArray(tab->nodes.Count); - Step(p->state, p, stepped); // start of group of equally numbered nodes - delete stepped; + BitArray stepped(tab->nodes.Count); + Step(p->state, p, &stepped); // start of group of equally numbered nodes } if (p->typ == Node::clas || p->typ == Node::chr) { @@ -235,11 +231,11 @@ void DFA::ConvertToStates(Node *p, Symbol *sym) { return; } NumberNodes(curGraph, firstState, true); - FindTrans(curGraph, true, new BitArray(tab->nodes.Count)); + BitArray ba(tab->nodes.Count); + FindTrans(curGraph, true, &ba); if (p->typ == Node::iter) { - BitArray *stepped = new BitArray(tab->nodes.Count); - Step(firstState, p, stepped); - delete stepped; + ba.SetAll(false); + Step(firstState, p, &ba); } } @@ -349,6 +345,7 @@ void DFA::MeltStates(State *state) { do {changed = MakeUnique(s);} while (changed); melt = NewMelted(targets, s); } + else delete targets; action->target->next = NULL; action->target->state = melt->state; } @@ -378,6 +375,7 @@ void DFA::MakeDeterministic() { void DFA::PrintStates() { fwprintf(trace, L"\n"); fwprintf(trace, L"---------- states ----------\n"); + wchar_t_10 fmt; for (State *state = firstState; state != NULL; state = state->next) { bool first = true; if (state->endOf == NULL) fwprintf(trace, L" "); @@ -392,7 +390,7 @@ void DFA::PrintStates() { if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)tab->classes[action->sym])->name); - else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym)); + else fwprintf(trace, L"%3s", DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, L"%3d", targ->state->nr); } @@ -479,7 +477,7 @@ Melted* DFA::StateWithSet(BitArray *s) { //------------------------ comments -------------------------------- wchar_t* DFA::CommentStr(Node *p) { - StringBuilder s = StringBuilder(); + StringBuilder s; while (p != NULL) { if (p->typ == Node::chr) { s.Append((wchar_t)p->val); @@ -510,10 +508,10 @@ void DFA::NewComment(Node *from, Node *to, bool nested) { void DFA::GenComBody(Comment *com) { fwprintf(gen, L"\t\tfor(;;) {\n"); - wchar_t* res = ChCond(com->stop[0]); + wchar_t_20 fmt; + wchar_t* res = DFAChCond(com->stop[0], fmt); fwprintf(gen, L"\t\t\tif (%ls) ", res); fwprintf(gen, L"{\n"); - delete [] res; if (coco_string_length(com->stop) == 1) { fwprintf(gen, L"\t\t\t\tlevel--;\n"); @@ -521,9 +519,8 @@ void DFA::GenComBody(Comment *com) { fwprintf(gen, L"\t\t\t\tNextCh();\n"); } else { fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = ChCond(com->stop[1]); + wchar_t* res = DFAChCond(com->stop[1], fmt); fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res); - delete [] res; fwprintf(gen, L"\t\t\t\t\tlevel--;\n"); fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); fwprintf(gen, L"\t\t\t\t\tNextCh();\n"); @@ -531,17 +528,15 @@ void DFA::GenComBody(Comment *com) { } if (com->nested) { fwprintf(gen, L"\t\t\t}"); - wchar_t* res = ChCond(com->start[0]); + wchar_t* res = DFAChCond(com->start[0], fmt); fwprintf(gen, L" else if (%ls) ", res); - delete [] res; fwprintf(gen, L"{\n"); if (coco_string_length(com->stop) == 1) fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n"); else { fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = ChCond(com->start[1]); + wchar_t* res = DFAChCond(com->start[1], fmt); fwprintf(gen, L"\t\t\t\tif (%ls) ", res); - delete [] res; fwprintf(gen, L"{\n"); fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n"); fwprintf(gen, L"\t\t\t\t}\n"); @@ -561,14 +556,14 @@ void DFA::GenComment(Comment *com, int i) { fwprintf(gen, L"bool Scanner::Comment%d() ", i); fwprintf(gen, L"{\n"); fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n"); + wchar_t_20 fmt; if (coco_string_length(com->start) == 1) { fwprintf(gen, L"\tNextCh();\n"); GenComBody(com); } else { fwprintf(gen, L"\tNextCh();\n"); - wchar_t* res = ChCond(com->start[1]); + wchar_t* res = DFAChCond(com->start[1], fmt); fwprintf(gen, L"\tif (%ls) ", res); - delete [] res; fwprintf(gen, L"{\n"); fwprintf(gen, L"\t\tNextCh();\n"); @@ -589,7 +584,7 @@ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literal Iterator *iter = tab->literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - if (e->val == sym) { return e->key; } + if (e->val == sym) { delete iter; return e->key; } } delete iter; } @@ -684,13 +679,13 @@ void DFA::WriteState(State *state) { } bool ctxEnd = state->ctx; + wchar_t_20 fmt; for (Action *action = state->firstAction; action != NULL; action = action->next) { if (action == state->firstAction) fwprintf(gen, L"\t\t\tif ("); else fwprintf(gen, L"\t\t\telse if ("); if (action->typ == Node::chr) { - wchar_t* res = ChCond((wchar_t)action->sym); + wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); fwprintf(gen, L"%ls", res); - delete [] res; } else PutRange(tab->CharClassSet(action->sym)); fwprintf(gen, L") {"); @@ -751,7 +746,7 @@ void DFA::WriteStartTab() { } void DFA::WriteScanner() { - Generator g = Generator(tab, errors); + Generator g(tab, errors); fram = g.OpenFrame(L"Scanner.frame"); gen = g.OpenGen(L"Scanner.h"); if (dirtyDFA) MakeDeterministic(); @@ -825,10 +820,10 @@ void DFA::WriteScanner() { if (firstComment != NULL) { fwprintf(gen, L"\tif ("); com = firstComment; cmdIdx = 0; + wchar_t_20 fmt; while (com != NULL) { - wchar_t* res = ChCond(com->start[0]); + wchar_t* res = DFAChCond(com->start[0], fmt); fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx); - delete [] res; if (com->next != NULL) { fwprintf(gen, L" || "); } @@ -866,4 +861,10 @@ DFA::DFA(Parser *parser) { hasCtxMoves = false; } +DFA::~DFA() { + delete firstState; + delete firstComment; + delete firstMelted; +} + }; // namespace diff --git a/src/DFA.h b/src/DFA.h index 57ed846..8abebc9 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -72,8 +72,6 @@ class DFA Comment *firstComment; // list of comments //---------- Output primitives - wchar_t* Ch(wchar_t ch); - wchar_t* ChCond(wchar_t ch); void PutRange(CharSet *s); //---------- State handling @@ -125,6 +123,7 @@ class DFA void OpenGen(const wchar_t* genName, bool backUp); /* pdt */ void WriteScanner(); DFA(Parser *parser); + ~DFA(); }; }; // namespace From eac5f1ecf301fac765298f3293f0ee68facc683e Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 18:10:35 +0200 Subject: [PATCH 24/95] Convert ArrayList to a templated one for future simplifications --- src/ArrayList.cpp | 83 ------------------------------------------- src/ArrayList.h | 90 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 96 deletions(-) delete mode 100644 src/ArrayList.cpp diff --git a/src/ArrayList.cpp b/src/ArrayList.cpp deleted file mode 100644 index 9e94c05..0000000 --- a/src/ArrayList.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/*------------------------------------------------------------------------- -Compiler Generator Coco/R, -Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz -extended by M. Loeberbauer & A. Woess, Univ. of Linz -ported to C++ by Csaba Balazs, University of Szeged -with improvements by Pat Terry, Rhodes University - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -As an exception, it is allowed to write an extension of Coco/R that is -used as a plugin in non-free software. - -If not otherwise stated, any source code generated by Coco/R (other than -Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ - -#include -#include "ArrayList.h" - -namespace Coco { - -ArrayList::ArrayList() { - Count = 0; - Capacity = 10; - Data = new void*[ Capacity ]; -} - -ArrayList::~ArrayList() { - delete [] Data; -} - -void ArrayList::Clear() { - Count = 0; -} - -void ArrayList::Add(void *value) { - if (Count < Capacity) { - Data[Count] = value; - Count++; - } else { - Capacity *= 2; - void** newData = new void*[Capacity]; - for (int i=0; i + namespace Coco { -class ArrayList +template +class TArrayList { + T** Data; public: - ArrayList(); - virtual ~ArrayList(); - - void Add(void *value); - void Remove(void *value); - void Clear(); - void* operator[](int index); - - int Count; - int Capacity; -private: - void** Data; + typedef int tsize_t; + tsize_t Count; + tsize_t Capacity; + + TArrayList() { + Count = 0; + Capacity = 10; + Data = new T*[ Capacity ]; + } + virtual ~TArrayList() { + delete [] Data; + } + + void Add(T *value) { + if (Count < Capacity) { + Data[Count] = value; + Count++; + } else { + Capacity *= 2; + T** newData = new T*[Capacity]; + for (tsize_t i=0; i ArrayList; + }; // namespace #endif // !defined(COCO_ARRAYLIST_H__) From 4f22deafabf11a8d25184a273af470f643552426 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 18:12:18 +0200 Subject: [PATCH 25/95] Add a basic AST generator based on https://github.com/rochus-keller/EbnfStudio --- src/Coco.atg | 8 + src/Parser.cpp | 700 ++++++++++++++++++++++++++++++++++++---------- src/Parser.frame | 126 ++++++++- src/Parser.h | 51 +++- src/ParserGen.cpp | 39 ++- src/ParserGen.h | 1 + src/Scanner.cpp | 108 +++---- src/Scanner.frame | 44 +-- src/Scanner.h | 11 +- src/Tab.h | 1 - 10 files changed, 860 insertions(+), 229 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index ca24a38..f946d35 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -35,6 +35,7 @@ $namespace=Coco #include "Tab.h" #include "DFA.h" #include "ParserGen.h" +#define COCO_FRAME_PARSER COMPILER Coco @@ -124,6 +125,13 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra .) { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ + [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); + if (sym != NULL) SemErr(L"name declared twice"); + else { + sym = tab->NewSym(Node::t, t->val, t->line); + sym->tokenKind = Symbol::fixedToken; + }.) + } ] /*from cocoxml*/ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] diff --git a/src/Parser.cpp b/src/Parser.cpp index a45602b..54598ca 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -35,6 +35,30 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ((SynTree*)ast_stack.Top())->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ((SynTree*)ast_stack.Top())->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + void Parser::SynErr(int n) { if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); errDist = 0; @@ -97,6 +121,9 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { void Parser::Coco() { Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create("Coco");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif int beg = la->pos; int line = la->line; while (StartOf(1)) { Get(); @@ -106,9 +133,15 @@ void Parser::Coco() { } Expect(6 /* "COMPILER" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif genScanner = true; tab->ignored = new CharSet(); Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif gramName = coco_string_create(t->val); beg = la->pos; line = la->line; @@ -119,51 +152,102 @@ void Parser::Coco() { tab->semDeclPos = new Position(beg, la->pos, 0, line); if (la->kind == 7 /* "IGNORECASE" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif dfa->ignoreCase = true; } - if (la->kind == 8 /* "CHARACTERS" */) { + if (la->kind == 8 /* "TERMINALS" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (la->kind == _ident) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + sym = tab->FindSym(t->val); + if (sym != NULL) SemErr(L"name declared twice"); + else { + sym = tab->NewSym(Node::t, t->val, t->line); + sym->tokenKind = Symbol::fixedToken; + } + } + } + if (la->kind == 9 /* "CHARACTERS" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif while (la->kind == _ident) { SetDecl(); } } - if (la->kind == 9 /* "TOKENS" */) { + if (la->kind == 10 /* "TOKENS" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif while (la->kind == _ident || la->kind == _string || la->kind == _char) { TokenDecl(Node::t); } } - if (la->kind == 10 /* "PRAGMAS" */) { + if (la->kind == 11 /* "PRAGMAS" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif while (la->kind == _ident || la->kind == _string || la->kind == _char) { TokenDecl(Node::pr); } } - while (la->kind == 11 /* "COMMENTS" */) { + while (la->kind == 12 /* "COMMENTS" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif bool nested = false; - Expect(12 /* "FROM" */); + Expect(13 /* "FROM" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g1); - Expect(13 /* "TO" */); + Expect(14 /* "TO" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g2); - if (la->kind == 14 /* "NESTED" */) { + if (la->kind == 15 /* "NESTED" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif nested = true; } dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; } - while (la->kind == 15 /* "IGNORE" */) { + while (la->kind == 16 /* "IGNORE" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Set(s); tab->ignored->Or(s); delete s; } - while (!(la->kind == _EOF || la->kind == 16 /* "PRODUCTIONS" */)) {SynErr(42); Get();} - Expect(16 /* "PRODUCTIONS" */); + while (!(la->kind == _EOF || la->kind == 17 /* "PRODUCTIONS" */)) {SynErr(43); Get();} + Expect(17 /* "PRODUCTIONS" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); while (la->kind == _ident) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif sym = tab->FindSym(t->val); bool undef = (sym == NULL); if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); @@ -176,26 +260,32 @@ void Parser::Coco() { bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; - if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { + if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { AttrDecl(sym); } if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); - if (la->kind == 39 /* "(." */) { + if (la->kind == 40 /* "(." */) { SemText(sym->semPos); } - ExpectWeak(17 /* "=" */, 3); + ExpectWeak(18 /* "=" */, 3); Expression(g); sym->graph = g->l; tab->Finish(g); delete g; - ExpectWeak(18 /* "." */, 4); + ExpectWeak(19 /* "." */, 4); } - Expect(19 /* "END" */); + Expect(20 /* "END" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); @@ -229,27 +319,51 @@ void Parser::Coco() { } if (tab->ddt[6]) tab->PrintSymbolTable(); - Expect(18 /* "." */); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif } void Parser::SetDecl() { CharSet *s; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SetDecl, "SetDecl", la->line); +#endif Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); if (c != NULL) SemErr(L"name declared twice"); - Expect(17 /* "=" */); + Expect(18 /* "=" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Set(s); if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); coco_string_delete(name); - Expect(18 /* "." */); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::TokenDecl(int typ) { wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, "TokenDecl", la->line); +#endif Sym(name, kind); sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); @@ -260,11 +374,17 @@ void Parser::TokenDecl(int typ) { coco_string_delete(name); coco_string_delete(tokenString); - while (!(StartOf(5))) {SynErr(43); Get();} - if (la->kind == 17 /* "=" */) { + while (!(StartOf(5))) {SynErr(44); Get();} + if (la->kind == 18 /* "=" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g); - Expect(18 /* "." */); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (kind == str) SemErr(L"a literal must not be declared with a structure"); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) @@ -281,113 +401,191 @@ void Parser::TokenDecl(int typ) { if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); - } else SynErr(44); - if (la->kind == 39 /* "(." */) { + } else SynErr(45); + if (la->kind == 40 /* "(." */) { SemText(sym->semPos); if (typ != Node::pr) SemErr(L"semantic action not allowed here"); } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::TokenExpr(Graph* &g) { Graph *g2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenExpr, "TokenExpr", la->line); +#endif TokenTerm(g); bool first = true; - while (WeakSeparator(28 /* "|" */,8,7) ) { + while (WeakSeparator(29 /* "|" */,8,7) ) { TokenTerm(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Set(CharSet* &s) { CharSet *s2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, "Set", la->line); +#endif SimSet(s); - while (la->kind == 20 /* "+" */ || la->kind == 21 /* "-" */) { - if (la->kind == 20 /* "+" */) { + while (la->kind == 21 /* "+" */ || la->kind == 22 /* "-" */) { + if (la->kind == 21 /* "+" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SimSet(s2); s->Or(s2); delete s2; } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SimSet(s2); s->Subtract(s2); delete s2; } } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::AttrDecl(Symbol *sym) { - if (la->kind == 24 /* "<" */) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, "AttrDecl", la->line); +#endif + if (la->kind == 25 /* "<" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(9)) { if (StartOf(10)) { Get(); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"bad string in attributes"); } } - Expect(25 /* ">" */); + Expect(26 /* ">" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else if (la->kind == 26 /* "<." */) { + } else if (la->kind == 27 /* "<." */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(11)) { if (StartOf(12)) { Get(); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"bad string in attributes"); } } - Expect(27 /* ".>" */); + Expect(28 /* ".>" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else SynErr(45); + } else SynErr(46); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::SemText(Position* &pos) { - Expect(39 /* "(." */); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, "SemText", la->line); +#endif + Expect(40 /* "(." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = t->line; while (StartOf(13)) { if (StartOf(14)) { Get(); } else if (la->kind == _badString) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"bad string in semantic action"); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"missing end of previous semantic action"); } } - Expect(40 /* ".)" */); + Expect(41 /* ".)" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif pos = new Position(beg, t->pos, col, line); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Expression(Graph* &g) { Graph *g2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Expression, "Expression", la->line); +#endif Term(g); bool first = true; - while (WeakSeparator(28 /* "|" */,16,15) ) { + while (WeakSeparator(29 /* "|" */,16,15) ) { Term(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::SimSet(CharSet* &s) { int n1, n2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, "SimSet", la->line); +#endif s = new CharSet(); if (la->kind == _ident) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif CharClass *c = tab->FindCharClass(t->val); if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); } else if (la->kind == _string) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); @@ -405,19 +603,34 @@ void Parser::SimSet(CharSet* &s) { } else if (la->kind == _char) { Char(n1); s->Set(n1); - if (la->kind == 22 /* ".." */) { + if (la->kind == 23 /* ".." */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Char(n2); for (int i = n1; i <= n2; i++) s->Set(i); } - } else if (la->kind == 23 /* "ANY" */) { + } else if (la->kind == 24 /* "ANY" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif delete s; s = new CharSet(); s->Fill(); - } else SynErr(46); + } else SynErr(47); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Char(int &n) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Char, "Char", la->line); +#endif Expect(_char); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif n = 0; wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t* name = tab->Unescape(subName); @@ -429,19 +642,34 @@ void Parser::Char(int &n) { coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Sym(wchar_t* &name, int &kind) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, "Sym", la->line); +#endif name = coco_string_create(L"???"); kind = id; if (la->kind == _ident) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif kind = id; coco_string_delete(name); name = coco_string_create(t->val); } else if (la->kind == _string || la->kind == _char) { if (la->kind == _string) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif coco_string_delete(name); name = coco_string_create(t->val); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); name = coco_string_create_append(L"\"", subName); @@ -457,13 +685,19 @@ void Parser::Sym(wchar_t* &name, int &kind) { } if (coco_string_indexof(name, ' ') >= 0) SemErr(L"literal tokens must not contain blanks"); - } else SynErr(47); + } else SynErr(48); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Term(Graph* &g) { Graph *g2; Node *rslv = NULL; g = NULL; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, "Term", la->line); +#endif if (StartOf(17)) { - if (la->kind == 37 /* "IF" */) { + if (la->kind == 38 /* "IF" */) { rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); Resolver(rslv->pos); g = new Graph(rslv); @@ -477,27 +711,48 @@ void Parser::Term(Graph* &g) { } } else if (StartOf(19)) { g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); - } else SynErr(48); + } else SynErr(49); if (g == NULL) // invalid start of Term g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Resolver(Position* &pos) { - Expect(37 /* "IF" */); - Expect(30 /* "(" */); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, "Resolver", la->line); +#endif + Expect(38 /* "IF" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(31 /* "(" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; Condition(); pos = new Position(beg, t->pos, col, line); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Factor(Graph* &g) { wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Factor, "Factor", la->line); +#endif switch (la->kind) { - case _ident: case _string: case _char: case 29 /* "WEAK" */: { - if (la->kind == 29 /* "WEAK" */) { + case _ident: case _string: case _char: case 30 /* "WEAK" */: { + if (la->kind == 30 /* "WEAK" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif weak = true; } Sym(name, kind); @@ -527,7 +782,7 @@ void Parser::Factor(Graph* &g) { Node *p = tab->NewNode(typ, sym, t->line); g = new Graph(p); - if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { + if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { Attribs(p); if (kind != id) SemErr(L"a literal must not have attributes"); } @@ -538,27 +793,45 @@ void Parser::Factor(Graph* &g) { break; } - case 30 /* "(" */: { + case 31 /* "(" */: { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Expression(g); - Expect(31 /* ")" */); + Expect(32 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif break; } - case 32 /* "[" */: { + case 33 /* "[" */: { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Expression(g); - Expect(33 /* "]" */); + Expect(34 /* "]" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeOption(g); break; } - case 34 /* "{" */: { + case 35 /* "{" */: { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Expression(g); - Expect(35 /* "}" */); + Expect(36 /* "}" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeIteration(g); break; } - case 39 /* "(." */: { + case 40 /* "(." */: { SemText(pos); Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); p->pos = pos; @@ -566,88 +839,151 @@ void Parser::Factor(Graph* &g) { break; } - case 23 /* "ANY" */: { + case 24 /* "ANY" */: { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys g = new Graph(p); break; } - case 36 /* "SYNC" */: { + case 37 /* "SYNC" */: { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); g = new Graph(p); break; } - default: SynErr(49); break; + default: SynErr(50); break; } if (g == NULL) // invalid start of Factor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Attribs(Node *p) { - if (la->kind == 24 /* "<" */) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, "Attribs", la->line); +#endif + if (la->kind == 25 /* "<" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(9)) { if (StartOf(10)) { Get(); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"bad string in attributes"); } } - Expect(25 /* ">" */); + Expect(26 /* ">" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else if (la->kind == 26 /* "<." */) { + } else if (la->kind == 27 /* "<." */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(11)) { if (StartOf(12)) { Get(); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif SemErr(L"bad string in attributes"); } } - Expect(27 /* ".>" */); + Expect(28 /* ".>" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else SynErr(50); + } else SynErr(51); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::Condition() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, "Condition", la->line); +#endif while (StartOf(20)) { - if (la->kind == 30 /* "(" */) { + if (la->kind == 31 /* "(" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Condition(); } else { Get(); } } - Expect(31 /* ")" */); + Expect(32 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::TokenTerm(Graph* &g) { Graph *g2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, "TokenTerm", la->line); +#endif TokenFactor(g); while (StartOf(8)) { TokenFactor(g2); tab->MakeSequence(g, g2); delete g2; } - if (la->kind == 38 /* "CONTEXT" */) { + if (la->kind == 39 /* "CONTEXT" */) { Get(); - Expect(30 /* "(" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(31 /* "(" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); delete g2; - Expect(31 /* ")" */); + Expect(32 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } void Parser::TokenFactor(Graph* &g) { wchar_t* name = NULL; int kind; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, "TokenFactor", la->line); +#endif g = NULL; if (la->kind == _ident || la->kind == _string || la->kind == _char) { Sym(name, kind); @@ -670,23 +1006,44 @@ void Parser::TokenFactor(Graph* &g) { } coco_string_delete(name); - } else if (la->kind == 30 /* "(" */) { + } else if (la->kind == 31 /* "(" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g); - Expect(31 /* ")" */); - } else if (la->kind == 32 /* "[" */) { + Expect(32 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 33 /* "[" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g); - Expect(33 /* "]" */); + Expect(34 /* "]" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); - } else if (la->kind == 34 /* "{" */) { + } else if (la->kind == 35 /* "{" */) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif TokenExpr(g); - Expect(35 /* "}" */); + Expect(36 /* "}" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); - } else SynErr(51); + } else SynErr(52); if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } @@ -706,7 +1063,7 @@ struct ParserInitExistsRecognizer { struct InitIsMissingType { char dummy1; }; - + struct InitExistsType { char dummy1; char dummy2; }; @@ -730,7 +1087,7 @@ struct ParserDestroyExistsRecognizer { struct DestroyIsMissingType { char dummy1; }; - + struct DestroyExistsType { char dummy1; char dummy2; }; @@ -790,7 +1147,7 @@ void Parser::Parse() { } Parser::Parser(Scanner *scanner) { - maxT = 41; + maxT = 42; ParserInitCaller::CallInit(this); dummyToken = NULL; @@ -804,28 +1161,28 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[21][43] = { - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,x, x,x,x,x, T,T,T,x, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,T,x, x,x,x,T, x,x,x,x, T,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,T, x,T,T,T, T,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, T,x,T,x, x,x,x,x, x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, x,T,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,T, x,x,x,x, T,T,T,T, T,T,T,T, T,T,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,x,x,T, x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, T,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,x} + static bool set[21][44] = { + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, + {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {x,T,T,T, T,T,T,x, x,x,x,x, x,T,T,T, x,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,T, x,x,x,x, T,x,x,x, x,T,T,T, x,T,x,T, x,T,T,x, T,x,x,x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, + {x,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, + {x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x,x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,x,T,x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, T,x,x,x, x,T,T,T, T,T,T,T, T,T,T,x, T,x,x,x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,T,T, x,T,x,T, x,T,T,x, T,x,x,x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,T,T, x,T,x,T, x,T,x,x, T,x,x,x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,T,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,x} }; @@ -836,8 +1193,14 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete dummyToken; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER coco_string_delete(noString); coco_string_delete(tokenString); +#endif } Errors::Errors() { @@ -857,50 +1220,51 @@ void Errors::SynErr(int line, int col, int n) { case 5: s = L"char expected"; break; case 6: s = L"\"COMPILER\" expected"; break; case 7: s = L"\"IGNORECASE\" expected"; break; - case 8: s = L"\"CHARACTERS\" expected"; break; - case 9: s = L"\"TOKENS\" expected"; break; - case 10: s = L"\"PRAGMAS\" expected"; break; - case 11: s = L"\"COMMENTS\" expected"; break; - case 12: s = L"\"FROM\" expected"; break; - case 13: s = L"\"TO\" expected"; break; - case 14: s = L"\"NESTED\" expected"; break; - case 15: s = L"\"IGNORE\" expected"; break; - case 16: s = L"\"PRODUCTIONS\" expected"; break; - case 17: s = L"\"=\" expected"; break; - case 18: s = L"\".\" expected"; break; - case 19: s = L"\"END\" expected"; break; - case 20: s = L"\"+\" expected"; break; - case 21: s = L"\"-\" expected"; break; - case 22: s = L"\"..\" expected"; break; - case 23: s = L"\"ANY\" expected"; break; - case 24: s = L"\"<\" expected"; break; - case 25: s = L"\">\" expected"; break; - case 26: s = L"\"<.\" expected"; break; - case 27: s = L"\".>\" expected"; break; - case 28: s = L"\"|\" expected"; break; - case 29: s = L"\"WEAK\" expected"; break; - case 30: s = L"\"(\" expected"; break; - case 31: s = L"\")\" expected"; break; - case 32: s = L"\"[\" expected"; break; - case 33: s = L"\"]\" expected"; break; - case 34: s = L"\"{\" expected"; break; - case 35: s = L"\"}\" expected"; break; - case 36: s = L"\"SYNC\" expected"; break; - case 37: s = L"\"IF\" expected"; break; - case 38: s = L"\"CONTEXT\" expected"; break; - case 39: s = L"\"(.\" expected"; break; - case 40: s = L"\".)\" expected"; break; - case 41: s = L"??? expected"; break; - case 42: s = L"this symbol not expected in Coco"; break; - case 43: s = L"this symbol not expected in TokenDecl"; break; - case 44: s = L"invalid TokenDecl"; break; - case 45: s = L"invalid AttrDecl"; break; - case 46: s = L"invalid SimSet"; break; - case 47: s = L"invalid Sym"; break; - case 48: s = L"invalid Term"; break; - case 49: s = L"invalid Factor"; break; - case 50: s = L"invalid Attribs"; break; - case 51: s = L"invalid TokenFactor"; break; + case 8: s = L"\"TERMINALS\" expected"; break; + case 9: s = L"\"CHARACTERS\" expected"; break; + case 10: s = L"\"TOKENS\" expected"; break; + case 11: s = L"\"PRAGMAS\" expected"; break; + case 12: s = L"\"COMMENTS\" expected"; break; + case 13: s = L"\"FROM\" expected"; break; + case 14: s = L"\"TO\" expected"; break; + case 15: s = L"\"NESTED\" expected"; break; + case 16: s = L"\"IGNORE\" expected"; break; + case 17: s = L"\"PRODUCTIONS\" expected"; break; + case 18: s = L"\"=\" expected"; break; + case 19: s = L"\".\" expected"; break; + case 20: s = L"\"END\" expected"; break; + case 21: s = L"\"+\" expected"; break; + case 22: s = L"\"-\" expected"; break; + case 23: s = L"\"..\" expected"; break; + case 24: s = L"\"ANY\" expected"; break; + case 25: s = L"\"<\" expected"; break; + case 26: s = L"\">\" expected"; break; + case 27: s = L"\"<.\" expected"; break; + case 28: s = L"\".>\" expected"; break; + case 29: s = L"\"|\" expected"; break; + case 30: s = L"\"WEAK\" expected"; break; + case 31: s = L"\"(\" expected"; break; + case 32: s = L"\")\" expected"; break; + case 33: s = L"\"[\" expected"; break; + case 34: s = L"\"]\" expected"; break; + case 35: s = L"\"{\" expected"; break; + case 36: s = L"\"}\" expected"; break; + case 37: s = L"\"SYNC\" expected"; break; + case 38: s = L"\"IF\" expected"; break; + case 39: s = L"\"CONTEXT\" expected"; break; + case 40: s = L"\"(.\" expected"; break; + case 41: s = L"\".)\" expected"; break; + case 42: s = L"??? expected"; break; + case 43: s = L"this symbol not expected in Coco"; break; + case 44: s = L"this symbol not expected in TokenDecl"; break; + case 45: s = L"invalid TokenDecl"; break; + case 46: s = L"invalid AttrDecl"; break; + case 47: s = L"invalid SimSet"; break; + case 48: s = L"invalid Sym"; break; + case 49: s = L"invalid Term"; break; + case 50: s = L"invalid Factor"; break; + case 51: s = L"invalid Attribs"; break; + case 52: s = L"invalid TokenFactor"; break; default: { @@ -927,9 +1291,61 @@ void Errors::Warning(const wchar_t *s) { } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(L"%ls", s); exit(1); } +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(L" "); +} + +SynTree::~SynTree() { + //wprintf(L"Token %ls : %d : %d : %d : %d\n", tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + } // namespace diff --git a/src/Parser.frame b/src/Parser.frame index a61dd7b..95c6349 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -41,6 +41,21 @@ Parser.h Specification -->namespace_open +#ifdef PARSER_WITH_AST + +struct SynTree { + SynTree(Token *t ): tok(t){} + ~SynTree(); + + Token *tok; + ArrayList children; + + void dump(int indent=0, bool isLast=false); + void dump2(int maxT, int indent=0, bool isLast=false); +}; + +#endif + class Errors { public: int count; // number of errors detected @@ -61,6 +76,11 @@ private: int errDist; int minErrDist; +#ifdef PARSER_WITH_AST + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + void AstPopNonTerminal(); +#endif void SynErr(int n); void Get(); void Expect(int n); @@ -105,6 +125,30 @@ Parser.cpp Specification -->namespace_open +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ((SynTree*)ast_stack.Top())->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ((SynTree*)ast_stack.Top())->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + void Parser::SynErr(int n) { if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); errDist = 0; @@ -176,7 +220,7 @@ struct ParserInitExistsRecognizer { struct InitIsMissingType { char dummy1; }; - + struct InitExistsType { char dummy1; char dummy2; }; @@ -200,7 +244,7 @@ struct ParserDestroyExistsRecognizer { struct DestroyIsMissingType { char dummy1; }; - + struct DestroyExistsType { char dummy1; char dummy2; }; @@ -280,8 +324,14 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete dummyToken; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER coco_string_delete(noString); coco_string_delete(tokenString); +#endif } Errors::Errors() { @@ -319,8 +369,60 @@ void Errors::Warning(const wchar_t *s) { } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(L"%ls", s); exit(1); } +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(L" "); +} + +SynTree::~SynTree() { + //wprintf(L"Token %ls : %d : %d : %d : %d\n", tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + -->namespace_close diff --git a/src/Parser.h b/src/Parser.h index 3087562..87b68bc 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -33,6 +33,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Tab.h" #include "DFA.h" #include "ParserGen.h" +#define COCO_FRAME_PARSER #include "Scanner.h" @@ -40,6 +41,21 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +#ifdef PARSER_WITH_AST + +struct SynTree { + SynTree(Token *t ): tok(t){} + ~SynTree(); + + Token *tok; + ArrayList children; + + void dump(int indent=0, bool isLast=false); + void dump2(int maxT, int indent=0, bool isLast=false); +}; + +#endif + class Errors { public: int count; // number of errors detected @@ -62,15 +78,42 @@ class Parser { _string=3, _badString=4, _char=5, - _ddtSym=42, - _optionSym=43 + _ddtSym=43, + _optionSym=44 + }; +#ifdef PARSER_WITH_AST + enum eNonTerminals{ + _Coco=0, + _SetDecl=1, + _TokenDecl=2, + _TokenExpr=3, + _Set=4, + _AttrDecl=5, + _SemText=6, + _Expression=7, + _SimSet=8, + _Char=9, + _Sym=10, + _Term=11, + _Resolver=12, + _Factor=13, + _Attribs=14, + _Condition=15, + _TokenTerm=16, + _TokenFactor=17 }; +#endif int maxT; Token *dummyToken; int errDist; int minErrDist; +#ifdef PARSER_WITH_AST + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + void AstPopNonTerminal(); +#endif void SynErr(int n); void Get(); void Expect(int n); @@ -85,6 +128,10 @@ class Parser { Token *t; // last recognized token Token *la; // lookahead token +#ifdef PARSER_WITH_AST + SynTree *ast_root; + ArrayList ast_stack; +#endif int id; int str; diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 0f3e739..4adca5f 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -196,11 +196,17 @@ void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { } else if (p->typ == Node::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n - if ((*isChecked)[p->sym->n]) fwprintf(gen, L"Get();\n"); + if ((*isChecked)[p->sym->n]) { + fwprintf(gen, L"Get();\n"); + //copy and pasted bellow + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"); + } else { fwprintf(gen, L"Expect("); WriteSymbolOrCode(gen, p->sym); fwprintf(gen, L");\n"); + //copy and pasted from above + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"); } } if (p->typ == Node::wt) { Indent(indent); @@ -337,6 +343,19 @@ void ParserGen::GenTokensHeader() { } fwprintf(gen, L"\n\t};\n"); + + // nonterminals + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"); + isFirst = true; + for (i=0; inonterminals.Count; i++) { + sym = (Symbol*)tab->nonterminals[i]; + if (isFirst) { isFirst = false; } + else { fwprintf(gen , L",\n"); } + + fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + } + fwprintf(gen, L"\n\t};\n#endif\n"); + } void ParserGen::GenCodePragmas() { @@ -380,9 +399,19 @@ void ParserGen::GenProductions() { CopySourcePart(sym->attrPos, 0); fwprintf(gen, L") {\n"); CopySourcePart(sym->semPos, 2); + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n"); + if(i == 0) fwprintf(gen, L"\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n", sym->name, sym->name); + else { + fwprintf(gen, L"\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n", sym->name, sym->name); + } + fwprintf(gen, L"#endif\n"); ba.SetAll(false); GenCode(sym->graph, 2, &ba); - fwprintf(gen, L"}\n"); fwprintf(gen, L"\n"); + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n"); + if(i == 0) fwprintf(gen, L"\t\tAstPopNonTerminal();\n"); + else fwprintf(gen, L"\t\tif(ntAdded) AstPopNonTerminal();\n"); + fwprintf(gen, L"#endif\n"); + fwprintf(gen, L"}\n\n"); } } @@ -405,6 +434,10 @@ void ParserGen::InitSets() { fwprintf(gen, L"\t};\n\n"); } +void ParserGen::CheckAstGen() { + fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tSynTree *ast_root;\n\tArrayList ast_stack;\n#endif\n"); +} + void ParserGen::WriteParser () { Generator g(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart @@ -437,7 +470,7 @@ void ParserGen::WriteParser () { g.CopyFramePart(L"-->constantsheader"); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ fwprintf(gen, L"\tint maxT;\n"); - g.CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0); + g.CopyFramePart(L"-->declarations"); CheckAstGen(); CopySourcePart(tab->semDeclPos, 0); g.CopyFramePart(L"-->productionsheader"); GenProductionsHeader(); g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); diff --git a/src/ParserGen.h b/src/ParserGen.h index 2c25357..267c63b 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -90,6 +90,7 @@ class ParserGen void WriteParser(); void WriteStatistics(); void WriteSymbolOrCode(FILE *gen, const Symbol *sym); + void CheckAstGen(); ParserGen (Parser *parser); ~ParserGen(); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 520468d..9f3cc11 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -244,6 +244,17 @@ Token::Token() { next = NULL; } +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + Token::~Token() { coco_string_delete(val); } @@ -264,7 +275,7 @@ Buffer::Buffer(FILE* s, bool isUserStream) { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; - buf = new unsigned char[bufCapacity]; + buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); @@ -294,7 +305,7 @@ Buffer::Buffer(const unsigned char* buf, int len) { } Buffer::~Buffer() { - Close(); + Close(); if (buf != NULL) { delete [] buf; buf = NULL; @@ -469,8 +480,8 @@ Scanner::~Scanner() { void Scanner::Init() { EOL = '\n'; eofSym = 0; - maxT = 41; - noSym = 41; + maxT = 42; + noSym = 42; int i; for (i = 65; i <= 90; ++i) start.set(i, 1); for (i = 95; i <= 95; ++i) start.set(i, 1); @@ -495,21 +506,22 @@ void Scanner::Init() { start.set(Buffer::EoF, -1); keywords.set(L"COMPILER", 6); keywords.set(L"IGNORECASE", 7); - keywords.set(L"CHARACTERS", 8); - keywords.set(L"TOKENS", 9); - keywords.set(L"PRAGMAS", 10); - keywords.set(L"COMMENTS", 11); - keywords.set(L"FROM", 12); - keywords.set(L"TO", 13); - keywords.set(L"NESTED", 14); - keywords.set(L"IGNORE", 15); - keywords.set(L"PRODUCTIONS", 16); - keywords.set(L"END", 19); - keywords.set(L"ANY", 23); - keywords.set(L"WEAK", 29); - keywords.set(L"SYNC", 36); - keywords.set(L"IF", 37); - keywords.set(L"CONTEXT", 38); + keywords.set(L"TERMINALS", 8); + keywords.set(L"CHARACTERS", 9); + keywords.set(L"TOKENS", 10); + keywords.set(L"PRAGMAS", 11); + keywords.set(L"COMMENTS", 12); + keywords.set(L"FROM", 13); + keywords.set(L"TO", 14); + keywords.set(L"NESTED", 15); + keywords.set(L"IGNORE", 16); + keywords.set(L"PRODUCTIONS", 17); + keywords.set(L"END", 20); + keywords.set(L"ANY", 24); + keywords.set(L"WEAK", 30); + keywords.set(L"SYNC", 37); + keywords.set(L"IF", 38); + keywords.set(L"CONTEXT", 39); tvalLength = 128; @@ -729,14 +741,14 @@ Token* Scanner::NextToken() { {t->kind = 5; break;} case 10: case_10: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} - else {t->kind = 42; break;} + else {t->kind = 43; break;} case 11: case_11: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 44; if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} - else {t->kind = 43; break;} + else {t->kind = 44; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} @@ -745,70 +757,70 @@ Token* Scanner::NextToken() { else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else {t->kind = 42; break;} + else {t->kind = 43; break;} case 14: case_14: if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} else if (ch == L'=') {AddCh(); goto case_11;} - else {t->kind = 42; break;} + else {t->kind = 43; break;} case 16: - {t->kind = 17; break;} + {t->kind = 18; break;} case 17: - {t->kind = 20; break;} - case 18: {t->kind = 21; break;} + case 18: + {t->kind = 22; break;} case 19: case_19: - {t->kind = 22; break;} + {t->kind = 23; break;} case 20: - {t->kind = 25; break;} + {t->kind = 26; break;} case 21: case_21: - {t->kind = 26; break;} + {t->kind = 27; break;} case 22: case_22: - {t->kind = 27; break;} - case 23: {t->kind = 28; break;} + case 23: + {t->kind = 29; break;} case 24: - {t->kind = 31; break;} - case 25: {t->kind = 32; break;} - case 26: + case 25: {t->kind = 33; break;} - case 27: + case 26: {t->kind = 34; break;} - case 28: + case 27: {t->kind = 35; break;} + case 28: + {t->kind = 36; break;} case 29: case_29: - {t->kind = 39; break;} + {t->kind = 40; break;} case 30: case_30: - {t->kind = 40; break;} + {t->kind = 41; break;} case 31: - recEnd = pos; recKind = 18; + recEnd = pos; recKind = 19; if (ch == L'.') {AddCh(); goto case_19;} else if (ch == L'>') {AddCh(); goto case_22;} else if (ch == L')') {AddCh(); goto case_30;} - else {t->kind = 18; break;} + else {t->kind = 19; break;} case 32: - recEnd = pos; recKind = 24; + recEnd = pos; recKind = 25; if (ch == L'.') {AddCh(); goto case_21;} - else {t->kind = 24; break;} + else {t->kind = 25; break;} case 33: - recEnd = pos; recKind = 30; + recEnd = pos; recKind = 31; if (ch == L'.') {AddCh(); goto case_29;} - else {t->kind = 30; break;} + else {t->kind = 31; break;} } AppendVal(t); diff --git a/src/Scanner.frame b/src/Scanner.frame index 5f9ecb4..3f2dde7 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ @@ -95,7 +95,7 @@ char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); -class Token +class Token { public: int kind; // token kind @@ -107,6 +107,7 @@ public: Token *next; // ML 2005-03-11 Peek tokens are kept in linked list Token(); + Token *Clone(); ~Token(); }; @@ -125,10 +126,10 @@ private: int bufPos; // current position in buffer FILE* stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? - + int ReadNextStreamChunk(); bool CanSeek(); // true if stream can be seeked otherwise false - + public: static const int EoF = COCO_WCHAR_MAX + 1; @@ -136,7 +137,7 @@ public: Buffer(const unsigned char* buf, int len); Buffer(Buffer *b); virtual ~Buffer(); - + virtual void Close(); virtual int Read(); virtual int Peek(); @@ -284,7 +285,7 @@ private: public: Buffer *buffer; // scanner buffer - + Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); @@ -523,6 +524,17 @@ Token::Token() { next = NULL; } +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + Token::~Token() { coco_string_delete(val); } @@ -543,7 +555,7 @@ Buffer::Buffer(FILE* s, bool isUserStream) { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; - buf = new unsigned char[bufCapacity]; + buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); @@ -573,7 +585,7 @@ Buffer::Buffer(const unsigned char* buf, int len) { } Buffer::~Buffer() { - Close(); + Close(); if (buf != NULL) { delete [] buf; buf = NULL; diff --git a/src/Scanner.h b/src/Scanner.h index 09498db..050bb06 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -91,7 +91,7 @@ char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); -class Token +class Token { public: int kind; // token kind @@ -103,6 +103,7 @@ class Token Token *next; // ML 2005-03-11 Peek tokens are kept in linked list Token(); + Token *Clone(); ~Token(); }; @@ -121,10 +122,10 @@ class Buffer { int bufPos; // current position in buffer FILE* stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? - + int ReadNextStreamChunk(); bool CanSeek(); // true if stream can be seeked otherwise false - + public: static const int EoF = COCO_WCHAR_MAX + 1; @@ -132,7 +133,7 @@ class Buffer { Buffer(const unsigned char* buf, int len); Buffer(Buffer *b); virtual ~Buffer(); - + virtual void Close(); virtual int Read(); virtual int Peek(); @@ -282,7 +283,7 @@ class Scanner { public: Buffer *buffer; // scanner buffer - + Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); diff --git a/src/Tab.h b/src/Tab.h index 4ee6816..b1a808c 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -88,7 +88,6 @@ class Tab { ArrayList classes; int dummyName; - Tab(Parser *parser); ~Tab(); From 8fe04c03c048e38ec3d414ddc286577c28af4732 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 3 Jun 2021 18:47:55 +0200 Subject: [PATCH 26/95] Convert ArrayList to TArrayList --- src/DFA.cpp | 4 +-- src/Parser.cpp | 4 +-- src/Parser.frame | 19 ++++++----- src/Parser.h | 15 ++++----- src/ParserGen.cpp | 30 ++++++++--------- src/ParserGen.h | 2 +- src/Tab.cpp | 84 +++++++++++++++++++++++------------------------ src/Tab.h | 10 +++--- 8 files changed, 83 insertions(+), 85 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 1e2ca34..6f560a7 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -389,7 +389,7 @@ void DFA::PrintStates() { for (Action *action = state->firstAction; action != NULL; action = action->next) { if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); - if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)tab->classes[action->sym])->name); + if (action->typ == Node::clas) fwprintf(trace, L"%ls", tab->classes[action->sym]->name); else fwprintf(trace, L"%3s", DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, L"%3d", targ->state->nr); @@ -594,7 +594,7 @@ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literal void DFA::GenLiterals () { Symbol *sym; - ArrayList *ts[2]; + TArrayList *ts[2]; ts[0] = &tab->terminals; ts[1] = &tab->pragmas; diff --git a/src/Parser.cpp b/src/Parser.cpp index 54598ca..30294b6 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -39,7 +39,7 @@ namespace Coco { void Parser::AstAddTerminal() { SynTree *st_t = new SynTree( t->Clone() ); - ((SynTree*)ast_stack.Top())->children.Add(st_t); + ast_stack.Top()->children.Add(st_t); } bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { @@ -48,7 +48,7 @@ bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line ntTok->line = line; ntTok->val = coco_string_create(nt_name); SynTree *st = new SynTree( ntTok ); - ((SynTree*)ast_stack.Top())->children.Add(st); + ast_stack.Top()->children.Add(st); ast_stack.Add(st); return true; } diff --git a/src/Parser.frame b/src/Parser.frame index 95c6349..f3b9193 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -48,7 +48,7 @@ struct SynTree { ~SynTree(); Token *tok; - ArrayList children; + TArrayList children; void dump(int indent=0, bool isLast=false); void dump2(int maxT, int indent=0, bool isLast=false); @@ -76,11 +76,6 @@ private: int errDist; int minErrDist; -#ifdef PARSER_WITH_AST - void AstAddTerminal(); - bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); - void AstPopNonTerminal(); -#endif void SynErr(int n); void Get(); void Expect(int n); @@ -95,6 +90,14 @@ public: Token *t; // last recognized token Token *la; // lookahead token +#ifdef PARSER_WITH_AST + SynTree *ast_root; + TArrayList ast_stack; + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + void AstPopNonTerminal(); +#endif + -->declarations Parser(Scanner *scanner); @@ -129,7 +132,7 @@ Parser.cpp Specification void Parser::AstAddTerminal() { SynTree *st_t = new SynTree( t->Clone() ); - ((SynTree*)ast_stack.Top())->children.Add(st_t); + ast_stack.Top()->children.Add(st_t); } bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { @@ -138,7 +141,7 @@ bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line ntTok->line = line; ntTok->val = coco_string_create(nt_name); SynTree *st = new SynTree( ntTok ); - ((SynTree*)ast_stack.Top())->children.Add(st); + ast_stack.Top()->children.Add(st); ast_stack.Add(st); return true; } diff --git a/src/Parser.h b/src/Parser.h index 87b68bc..5110271 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -48,7 +48,7 @@ struct SynTree { ~SynTree(); Token *tok; - ArrayList children; + TArrayList children; void dump(int indent=0, bool isLast=false); void dump2(int maxT, int indent=0, bool isLast=false); @@ -109,11 +109,6 @@ class Parser { int errDist; int minErrDist; -#ifdef PARSER_WITH_AST - void AstAddTerminal(); - bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); - void AstPopNonTerminal(); -#endif void SynErr(int n); void Get(); void Expect(int n); @@ -129,9 +124,13 @@ class Parser { Token *la; // lookahead token #ifdef PARSER_WITH_AST - SynTree *ast_root; - ArrayList ast_stack; + SynTree *ast_root; + TArrayList ast_stack; + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + void AstPopNonTerminal(); #endif + int id; int str; diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 4adca5f..9027329 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -146,7 +146,7 @@ void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { int ParserGen::NewCondSet (BitArray *s) { for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) - if (Sets::Equals(s, (BitArray*)symSet[i])) return i; + if (Sets::Equals(s, symSet[i])) return i; symSet.Add(s->Clone()); return symSet.Count - 1; } @@ -175,7 +175,7 @@ void ParserGen::GenCond (BitArray *s, Node *p) { void ParserGen::PutCaseLabels (BitArray *s) { Symbol *sym; for (int i=0; iterminals.Count; i++) { - sym = (Symbol*)tab->terminals[i]; + sym = tab->terminals[i]; if ((*s)[sym->n]) { fwprintf(gen, L"case "); WriteSymbolOrCode(gen, sym); @@ -324,7 +324,7 @@ void ParserGen::GenTokensHeader() { // tokens for (i=0; iterminals.Count; i++) { - sym = (Symbol*)tab->terminals[i]; + sym = tab->terminals[i]; if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } @@ -338,7 +338,7 @@ void ParserGen::GenTokensHeader() { if (isFirst) { isFirst = false; } else { fwprintf(gen , L",\n"); } - sym = (Symbol*)tab->pragmas[i]; + sym = tab->pragmas[i]; fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); } @@ -348,7 +348,7 @@ void ParserGen::GenTokensHeader() { fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"); isFirst = true; for (i=0; inonterminals.Count; i++) { - sym = (Symbol*)tab->nonterminals[i]; + sym = tab->nonterminals[i]; if (isFirst) { isFirst = false; } else { fwprintf(gen , L",\n"); } @@ -361,7 +361,7 @@ void ParserGen::GenTokensHeader() { void ParserGen::GenCodePragmas() { Symbol *sym; for (int i=0; ipragmas.Count; i++) { - sym = (Symbol*)tab->pragmas[i]; + sym = tab->pragmas[i]; fwprintf(gen, L"\t\tif (la->kind == "); WriteSymbolOrCode(gen, sym); fwprintf(gen, L") {\n"); @@ -381,7 +381,7 @@ void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { void ParserGen::GenProductionsHeader() { Symbol *sym; for (int i=0; inonterminals.Count; i++) { - sym = (Symbol*)tab->nonterminals[i]; + sym = tab->nonterminals[i]; curSy = sym; fwprintf(gen, L"\tvoid %ls(", sym->name); CopySourcePart(sym->attrPos, 0); @@ -393,7 +393,7 @@ void ParserGen::GenProductions() { Symbol *sym; BitArray ba(tab->terminals.Count); for (int i=0; inonterminals.Count; i++) { - sym = (Symbol*)tab->nonterminals[i]; + sym = tab->nonterminals[i]; curSy = sym; fwprintf(gen, L"void Parser::%ls(", sym->name); CopySourcePart(sym->attrPos, 0); @@ -419,12 +419,12 @@ void ParserGen::InitSets() { fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet.Count, tab->terminals.Count+1); for (int i = 0; i < symSet.Count; i++) { - BitArray *s = (BitArray*)symSet[i]; + BitArray *s = symSet[i]; fwprintf(gen, L"\t\t{"); int j = 0; Symbol *sym; for (int k=0; kterminals.Count; k++) { - sym = (Symbol*)tab->terminals[k]; + sym = tab->terminals[k]; if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,"); ++j; if (j%4 == 0) fwprintf(gen, L" "); @@ -434,10 +434,6 @@ void ParserGen::InitSets() { fwprintf(gen, L"\t};\n\n"); } -void ParserGen::CheckAstGen() { - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tSynTree *ast_root;\n\tArrayList ast_stack;\n#endif\n"); -} - void ParserGen::WriteParser () { Generator g(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart @@ -448,7 +444,7 @@ void ParserGen::WriteParser () { Symbol *sym; for (int i=0; iterminals.Count; i++) { - sym = (Symbol*)tab->terminals[i]; + sym = tab->terminals[i]; GenErrorMsg(tErr, sym); } @@ -470,7 +466,7 @@ void ParserGen::WriteParser () { g.CopyFramePart(L"-->constantsheader"); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ fwprintf(gen, L"\tint maxT;\n"); - g.CopyFramePart(L"-->declarations"); CheckAstGen(); CopySourcePart(tab->semDeclPos, 0); + g.CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0); g.CopyFramePart(L"-->productionsheader"); GenProductionsHeader(); g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); @@ -529,7 +525,7 @@ ParserGen::ParserGen (Parser *parser) { } ParserGen::~ParserGen () { - for(int i=0; i symSet; Tab *tab; // other Coco objects FILE* trace; diff --git a/src/Tab.cpp b/src/Tab.cpp index 8874f17..e8b7076 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -58,11 +58,11 @@ Tab::Tab(Parser *parser) { } Tab::~Tab() { - for(int i=0; iname, name)) return s; } for (i=0; iname, name)) return s; } return NULL; @@ -136,15 +136,15 @@ void Tab::PrintSymbolTable() { Symbol *sym; int i; for (i=0; in]) { len = coco_string_length(sym->name); if (col + len >= 80) { @@ -264,7 +264,7 @@ void Tab::Finish(Graph *g) { } void Tab::DeleteNodes() { - for(int i=0; in, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { - CharClass *c = (CharClass*)classes[p->val]; + CharClass *c = classes[p->val]; wchar_t *paddedName = Name(c->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); @@ -415,7 +415,7 @@ CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass *c; for (int i=0; iname, name)) return c; } return NULL; @@ -424,14 +424,14 @@ CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass* Tab::FindCharClass(CharSet *s) { CharClass *c; for (int i=0; iEquals(c->set)) return c; } return NULL; } CharSet* Tab::CharClassSet(int i) { - return ((CharClass*)classes[i])->set; + return classes[i]->set; } //----------- character class printing @@ -464,7 +464,7 @@ void Tab::WriteCharSet(CharSet *s) { void Tab::WriteCharClasses () { CharClass *c; for (int i=0; iname, L" "); wchar_t* format = coco_string_create(format2, 0, 10); @@ -540,13 +540,13 @@ void Tab::CompFirstSets() { Symbol *sym; int i; for (i=0; ifirst; sym->first = new BitArray(terminals.Count); sym->firstReady = false; } for (i=0; ifirst; sym->first = First(sym->graph); delete saved; @@ -577,7 +577,7 @@ void Tab::Complete(Symbol *sym) { visited->Set(sym->n, true); Symbol *s; for (int i=0; ints))[s->n]) { Complete(s); sym->follow->Or(s->follow); @@ -591,7 +591,7 @@ void Tab::CompFollowSets() { Symbol *sym; int i; for (i=0; ifollow = new BitArray(terminals.Count); sym->nts = new BitArray(nonterminals.Count); } @@ -599,13 +599,13 @@ void Tab::CompFollowSets() { delete visited; visited = new BitArray(nodes.Count); for (i=0; igraph); } for (i=0; igraph); } } @@ -719,7 +719,7 @@ void Tab::CompSyncSets() { Symbol *sym; for (int i=0; igraph); } @@ -728,7 +728,7 @@ void Tab::CompSyncSets() { void Tab::SetupAnys() { Node *p; for (int i=0; ityp == Node::any) { p->set = new BitArray(terminals.Count, true); p->set->Set(eofSy->n, false); @@ -743,7 +743,7 @@ void Tab::CompDeletableSymbols() { do { changed = false; for (i=0; ideletable && sym->graph != NULL && DelGraph(sym->graph)) { sym->deletable = true; changed = true; } @@ -751,7 +751,7 @@ void Tab::CompDeletableSymbols() { } while (changed); for (i=0; ideletable) wprintf(L" %ls deletable\n", sym->name); } @@ -761,7 +761,7 @@ void Tab::RenumberPragmas() { int n = terminals.Count; Symbol *sym; for (int i=0; in = n++; } } @@ -779,7 +779,7 @@ void Tab::CompSymbolSets() { Symbol *sym; for (int i=0; iname); fwprintf(trace, L"first: "); PrintSet(sym->first, 10); fwprintf(trace, L"follow: "); PrintSet(sym->follow, 10); @@ -793,7 +793,7 @@ void Tab::CompSymbolSets() { Node *p; for (int i=0; ityp == Node::any || p->typ == Node::sync) { fwprintf(trace, L"%4d %4s ", p->n, nTyp[p->typ]); PrintSet(p->set, 11); @@ -930,7 +930,7 @@ bool Tab::NoCircularProductions() { Symbol *sym; int i; for (i=0; igraph, &singles, sym->graph); // get nonterminals s such that sym-->s Symbol *s; @@ -987,7 +987,7 @@ void Tab::LL1Error(int cond, Symbol *sym) { void Tab::CheckOverlap(BitArray *s1, BitArray *s2, int cond) { Symbol *sym; for (int i=0; in] && (*s2)[sym->n]) { LL1Error(cond, sym); } @@ -1029,7 +1029,7 @@ void Tab::CheckAlts(Node *p) { void Tab::CheckLL1() { Symbol *sym; for (int i=0; igraph); } @@ -1092,7 +1092,7 @@ void Tab::CheckRes(Node *p, bool rslvAllowed) { void Tab::CheckResolvers() { for (int i=0; igraph, false); } } @@ -1104,7 +1104,7 @@ bool Tab::NtsComplete() { bool complete = true; Symbol *sym; for (int i=0; igraph == NULL) { complete = false; errors->count++; wprintf(L" No production for %ls\n", sym->name); @@ -1137,7 +1137,7 @@ bool Tab::AllNtReached() { MarkReachedNts(gramSy->graph); Symbol *sym; for (int i=0; in])) { ok = false; errors->count++; wprintf(L" %ls cannot be reached\n", sym->name); @@ -1170,14 +1170,14 @@ bool Tab::AllNtToTerm() { changed = false; for (i=0; in] && IsTerm(sym->graph, &mark)) { mark.Set(sym->n, true); changed = true; } } } while (changed); for (i=0; in]) { ok = false; errors->count++; wprintf(L" %ls cannot be derived to terminals\n", sym->name); @@ -1196,7 +1196,7 @@ void Tab::XRef() { Symbol *sym; int i, j; for (i=0; iAdd((void*)(ssize_t)(-sym->line)); @@ -1204,7 +1204,7 @@ void Tab::XRef() { // collect lines where symbols have been referenced Node *n; for (i=0; ityp == Node::t || n->typ == Node::wt || n->typ == Node::nt) { ArrayList *list = (ArrayList*)(xref.Get(n->sym)); if (list == NULL) {list = new ArrayList(); xref.Set(n->sym, list);} diff --git a/src/Tab.h b/src/Tab.h index b1a808c..7af087a 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -76,16 +76,16 @@ class Tab { Errors *errors; - ArrayList terminals; - ArrayList pragmas; - ArrayList nonterminals; + TArrayList terminals; + TArrayList pragmas; + TArrayList nonterminals; - ArrayList nodes; + TArrayList nodes; static const char* nTyp[]; Node *dummyNode; - ArrayList classes; + TArrayList classes; int dummyName; Tab(Parser *parser); From 1750865f94320c06480c2b58d2b26acf30e20aa8 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 09:51:09 +0200 Subject: [PATCH 27/95] Add 'const' qualifier in several places --- src/BitArray.cpp | 2 +- src/BitArray.h | 2 +- src/CharSet.cpp | 12 +++++------ src/CharSet.h | 12 +++++------ src/DFA.cpp | 32 ++++++++++++++-------------- src/DFA.h | 32 ++++++++++++++-------------- src/HashTable.cpp | 6 +++--- src/HashTable.h | 8 +++---- src/Melted.h | 2 +- src/ParserGen.cpp | 16 +++++++------- src/ParserGen.h | 14 ++++++------- src/Sets.h | 12 +++++------ src/SortedList.cpp | 12 +++++------ src/SortedList.h | 14 ++++++------- src/Symbol.cpp | 8 +++---- src/Symbol.h | 8 +++---- src/Tab.cpp | 52 +++++++++++++++++++++++----------------------- src/Tab.h | 48 +++++++++++++++++++++--------------------- 18 files changed, 146 insertions(+), 146 deletions(-) diff --git a/src/BitArray.cpp b/src/BitArray.cpp index ede7270..e01fec5 100644 --- a/src/BitArray.cpp +++ b/src/BitArray.cpp @@ -58,7 +58,7 @@ BitArray::~BitArray() Data = NULL; } -int BitArray::getCount() { +int BitArray::getCount() const { return Count; } diff --git a/src/BitArray.h b/src/BitArray.h index 31d0617..3694540 100644 --- a/src/BitArray.h +++ b/src/BitArray.h @@ -38,7 +38,7 @@ class BitArray BitArray(const BitArray © ); virtual ~BitArray(); - int getCount(); + int getCount() const; bool Get(const int index) const; void Set(const int index, const bool value); diff --git a/src/CharSet.cpp b/src/CharSet.cpp index fa7d2d8..f9f8f0c 100644 --- a/src/CharSet.cpp +++ b/src/CharSet.cpp @@ -76,7 +76,7 @@ CharSet* CharSet::Clone() const { return s; } -bool CharSet::Equals(CharSet *s) const { +bool CharSet::Equals(const CharSet *s) const { Range *p = head, *q = s->head; while (p != NULL && q != NULL) { if (p->from != q->from || p->to != q->to) return false; @@ -96,12 +96,12 @@ int CharSet::First() const { return -1; } -void CharSet::Or(CharSet *s) { +void CharSet::Or(const CharSet *s) { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) Set(i); } -void CharSet::And(CharSet *s) { +void CharSet::And(const CharSet *s) { CharSet x; Range *p = head; while (p != NULL) { @@ -115,7 +115,7 @@ void CharSet::And(CharSet *s) { x.head = NULL; } -void CharSet::Subtract(CharSet *s) { +void CharSet::Subtract(const CharSet *s) { CharSet x; Range *p = head; while (p != NULL) { @@ -129,14 +129,14 @@ void CharSet::Subtract(CharSet *s) { x.head = NULL; } -bool CharSet::Includes(CharSet *s) const { +bool CharSet::Includes(const CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (!Get(i)) return false; return true; } -bool CharSet::Intersects(CharSet *s) const { +bool CharSet::Intersects(const CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (Get(i)) return true; diff --git a/src/CharSet.h b/src/CharSet.h index 4164d2d..d549fea 100644 --- a/src/CharSet.h +++ b/src/CharSet.h @@ -51,14 +51,14 @@ class CharSet { bool Get(int i) const; void Set(int i); CharSet* Clone() const; - bool Equals(CharSet *s) const; + bool Equals(const CharSet *s) const; int Elements() const; int First() const; - void Or(CharSet *s); - void And(CharSet *s); - void Subtract(CharSet *s); - bool Includes(CharSet *s) const; - bool Intersects(CharSet *s) const; + void Or(const CharSet *s); + void And(const CharSet *s); + void Subtract(const CharSet *s); + bool Includes(const CharSet *s) const; + bool Intersects(const CharSet *s) const; void Clear(); void Fill(); }; diff --git a/src/DFA.cpp b/src/DFA.cpp index 6f560a7..0001217 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -111,7 +111,7 @@ void DFA::CombineShifts() { } } -void DFA::FindUsedStates(State *state, BitArray *used) { +void DFA::FindUsedStates(const State *state, BitArray *used) { if ((*used)[state->nr]) return; used->Set(state->nr, true); for (Action *a = state->firstAction; a != NULL; a = a->next) @@ -145,13 +145,13 @@ void DFA::DeleteRedundantStates() { free (newState); } -State* DFA::TheState(Node *p) { +State* DFA::TheState(const Node *p) { State *state; if (p == NULL) {state = NewState(); state->endOf = curSy; return state;} else return p->state; } -void DFA::Step(State *from, Node *p, BitArray *stepped) { +void DFA::Step(State *from, const Node *p, BitArray *stepped) { if (p == NULL) return; stepped->Set(p->n, true); @@ -205,7 +205,7 @@ void DFA::NumberNodes(Node *p, State *state, bool renumIter) { } } -void DFA::FindTrans (Node *p, bool start, BitArray *marked) { +void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { if (p == NULL || (*marked)[p->n]) return; marked->Set(p->n, true); if (start) { @@ -307,7 +307,7 @@ void DFA::SplitActions(State *state, Action *a, Action *b) { delete seta; delete setb; } -bool DFA::Overlap(Action *a, Action *b) { +bool DFA::Overlap(const Action *a, const Action *b) { CharSet *seta, *setb; if (a->typ == Node::chr) if (b->typ == Node::chr) return (a->sym == b->sym); @@ -403,7 +403,7 @@ void DFA::PrintStates() { //---------------------------- actions -------------------------------- -Action* DFA::FindAction(State *state, wchar_t ch) { +Action* DFA::FindAction(const State *state, wchar_t ch) { for (Action *a = state->firstAction; a != NULL; a = a->next) if (a->typ == Node::chr && ch == a->sym) return a; else if (a->typ == Node::clas) { @@ -414,7 +414,7 @@ Action* DFA::FindAction(State *state, wchar_t ch) { } -void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) { +void DFA::GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) { // compute the set of target states targets = new BitArray(maxStates); endOf = NULL; ctx = false; @@ -457,7 +457,7 @@ Melted* DFA::NewMelted(BitArray *set, State *state) { } -BitArray* DFA::MeltedSet(int nr) { +const BitArray* DFA::MeltedSet(int nr) { Melted *m = firstMelted; while (m != NULL) { if (m->state->nr == nr) return m->set; else m = m->next; @@ -467,7 +467,7 @@ BitArray* DFA::MeltedSet(int nr) { return NULL; } -Melted* DFA::StateWithSet(BitArray *s) { +Melted* DFA::StateWithSet(const BitArray *s) { for (Melted *m = firstMelted; m != NULL; m = m->next) if (Sets::Equals(s, m->set)) return m; return NULL; @@ -476,7 +476,7 @@ Melted* DFA::StateWithSet(BitArray *s) { //------------------------ comments -------------------------------- -wchar_t* DFA::CommentStr(Node *p) { +wchar_t* DFA::CommentStr(const Node *p) { StringBuilder s; while (p != NULL) { if (p->typ == Node::chr) { @@ -497,7 +497,7 @@ wchar_t* DFA::CommentStr(Node *p) { } -void DFA::NewComment(Node *from, Node *to, bool nested) { +void DFA::NewComment(const Node *from, const Node *to, bool nested) { Comment *c = new Comment(CommentStr(from), CommentStr(to), nested, false); c->next = firstComment; firstComment = c; } @@ -505,7 +505,7 @@ void DFA::NewComment(Node *from, Node *to, bool nested) { //------------------------ scanner generation ---------------------- -void DFA::GenComBody(Comment *com) { +void DFA::GenComBody(const Comment *com) { fwprintf(gen, L"\t\tfor(;;) {\n"); wchar_t_20 fmt; @@ -547,11 +547,11 @@ void DFA::GenComBody(Comment *com) { fwprintf(gen, L"\t\t}\n"); } -void DFA::GenCommentHeader(Comment *com, int i) { +void DFA::GenCommentHeader(const Comment *com, int i) { fwprintf(gen, L"\tbool Comment%d();\n", i); } -void DFA::GenComment(Comment *com, int i) { +void DFA::GenComment(const Comment *com, int i) { fwprintf(gen, L"\n"); fwprintf(gen, L"bool Scanner::Comment%d() ", i); fwprintf(gen, L"{\n"); @@ -577,7 +577,7 @@ void DFA::GenComment(Comment *com, int i) { fwprintf(gen, L"}\n"); } -wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literals +const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in Tab.literals if (('a'<=sym->name[0] && sym->name[0]<='z') || ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0]) @@ -668,7 +668,7 @@ void DFA::CheckLabels() { } } -void DFA::WriteState(State *state) { +void DFA::WriteState(const State *state) { Symbol *endOf = state->endOf; fwprintf(gen, L"\t\tcase %d:\n", state->nr); if (existLabel[state->nr]) diff --git a/src/DFA.h b/src/DFA.h index 8abebc9..3a5f38c 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -78,17 +78,17 @@ class DFA State* NewState(); void NewTransition(State *from, State *to, int typ, int sym, int tc); void CombineShifts(); - void FindUsedStates(State *state, BitArray *used); + void FindUsedStates(const State *state, BitArray *used); void DeleteRedundantStates(); - State* TheState(Node *p); - void Step(State *from, Node *p, BitArray *stepped); + State* TheState(const Node *p); + void Step(State *from, const Node *p, BitArray *stepped); void NumberNodes(Node *p, State *state, bool renumIter); - void FindTrans (Node *p, bool start, BitArray *marked); + void FindTrans (const Node *p, bool start, BitArray *marked); void ConvertToStates(Node *p, Symbol *sym); // match string against current automaton; store it either as a fixedToken or as a litToken void MatchLiteral(wchar_t* s, Symbol *sym); void SplitActions(State *state, Action *a, Action *b); - bool Overlap(Action *a, Action *b); + bool Overlap(const Action *a, const Action *b); bool MakeUnique(State *state); // return true if actions were split void MeltStates(State *state); void FindCtxStates(); @@ -97,28 +97,28 @@ class DFA void CheckLabels(); //---------------------------- actions -------------------------------- - Action* FindAction(State *state, wchar_t ch); - void GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); + Action* FindAction(const State *state, wchar_t ch); + void GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); //------------------------- melted states ------------------------------ Melted* NewMelted(BitArray *set, State *state); - BitArray* MeltedSet(int nr); - Melted* StateWithSet(BitArray *s); + const BitArray* MeltedSet(int nr); + Melted* StateWithSet(const BitArray *s); //------------------------ comments -------------------------------- - wchar_t* CommentStr(Node *p); - void NewComment(Node *from, Node *to, bool nested); + wchar_t* CommentStr(const Node *p); + void NewComment(const Node *from, const Node *to, bool nested); //------------------------ scanner generation ---------------------- - void GenComBody(Comment *com); - void GenCommentHeader(Comment *com, int i); - void GenComment(Comment *com, int i); + void GenComBody(const Comment *com); + void GenCommentHeader(const Comment *com, int i); + void GenComment(const Comment *com, int i); void CopyFramePart(const wchar_t* stop); - wchar_t* SymName(Symbol *sym); // real name value is stored in Tab.literals + const wchar_t* SymName(const Symbol *sym); // real name value is stored in Tab.literals void GenLiterals (); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); - void WriteState(State *state); + void WriteState(const State *state); void WriteStartTab(); void OpenGen(const wchar_t* genName, bool backUp); /* pdt */ void WriteScanner(); diff --git a/src/HashTable.cpp b/src/HashTable.cpp index 08b97d1..012cf4a 100644 --- a/src/HashTable.cpp +++ b/src/HashTable.cpp @@ -53,7 +53,7 @@ HashTable::~HashTable() { data = NULL; }; -HashTable::Obj* HashTable::Get0(wchar_t *key) const { +HashTable::Obj* HashTable::Get0(const wchar_t *key) const { int k = coco_string_hash(key) % size; HashTable::Obj *o = data[k]; while (o != NULL && !coco_string_equal(key, o->key)) { @@ -62,7 +62,7 @@ HashTable::Obj* HashTable::Get0(wchar_t *key) const { return o; } -void HashTable::Set(wchar_t *key, void *val) { +void HashTable::Set(const wchar_t *key, void *val) { HashTable::Obj *o = Get0(key); if (o == NULL) { // new entry @@ -78,7 +78,7 @@ void HashTable::Set(wchar_t *key, void *val) { } } -void* HashTable::Get(wchar_t *key) const { +void* HashTable::Get(const wchar_t *key) const { HashTable::Obj *o = Get0(key); if (o != NULL) { return o->val; diff --git a/src/HashTable.h b/src/HashTable.h index c9ba572..2d44cf0 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -52,9 +52,9 @@ class HashTable HashTable(int size = 128); virtual ~HashTable(); - virtual void Set(wchar_t *key, void *value); - virtual void* Get(wchar_t *key) const; - inline void* operator[](wchar_t *key) const { return Get(key); }; + virtual void Set(const wchar_t *key, void *value); + virtual void* Get(const wchar_t *key) const; + inline void* operator[](const wchar_t *key) const { return Get(key); }; virtual Iterator* GetIterator(); private: @@ -75,7 +75,7 @@ class HashTable virtual DictionaryEntry* Next(); }; - Obj* Get0(wchar_t *key) const; + Obj* Get0(const wchar_t *key) const; Obj **data; int size; }; diff --git a/src/Melted.h b/src/Melted.h index a5bbfe0..08137e6 100644 --- a/src/Melted.h +++ b/src/Melted.h @@ -39,7 +39,7 @@ class BitArray; class Melted // info about melted states { public: - BitArray *set; // set of old states + const BitArray *set; // set of old states State *state; // new state Melted *next; diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 9027329..5011ca1 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -42,7 +42,7 @@ void ParserGen::Indent (int n) { } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning -bool ParserGen::UseSwitch (Node *p) { +bool ParserGen::UseSwitch (const Node *p) { BitArray *s2; if (p->typ != Node::alt) return false; int nAlts = 0; @@ -89,7 +89,7 @@ void ParserGen::GenNamespaceClose(int nrOfNs) { } } -void ParserGen::CopySourcePart (Position *pos, int indent) { +void ParserGen::CopySourcePart (const Position *pos, int indent) { // Copy text described by pos from atg to gen int ch, i; if (pos != NULL) { @@ -117,7 +117,7 @@ void ParserGen::CopySourcePart (Position *pos, int indent) { } } -void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { +void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { errorNr++; const int formatLen = 1000; wchar_t format[formatLen]; @@ -144,14 +144,14 @@ void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { coco_string_merge(err, format); } -int ParserGen::NewCondSet (BitArray *s) { +int ParserGen::NewCondSet (const BitArray *s) { for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) if (Sets::Equals(s, symSet[i])) return i; symSet.Add(s->Clone()); return symSet.Count - 1; } -void ParserGen::GenCond (BitArray *s, Node *p) { +void ParserGen::GenCond (const BitArray *s, const Node *p) { if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); @@ -172,7 +172,7 @@ void ParserGen::GenCond (BitArray *s, Node *p) { } } -void ParserGen::PutCaseLabels (BitArray *s) { +void ParserGen::PutCaseLabels (const BitArray *s) { Symbol *sym; for (int i=0; iterminals.Count; i++) { sym = tab->terminals[i]; @@ -184,8 +184,8 @@ void ParserGen::PutCaseLabels (BitArray *s) { } } -void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { - Node *p2; +void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { + const Node *p2; BitArray *s1, *s2; while (p != NULL) { if (p->typ == Node::nt) { diff --git a/src/ParserGen.h b/src/ParserGen.h index c07dd27..e8ceb14 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -68,16 +68,16 @@ class ParserGen Buffer *buffer; void Indent(int n); - bool UseSwitch(Node *p); + bool UseSwitch(const Node *p); void CopyFramePart(const wchar_t* stop); - void CopySourcePart(Position *pos, int indent); + void CopySourcePart(const Position *pos, int indent); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); - void GenErrorMsg(int errTyp, Symbol *sym); - int NewCondSet(BitArray *s); - void GenCond(BitArray *s, Node *p); - void PutCaseLabels(BitArray *s); - void GenCode(Node *p, int indent, BitArray *isChecked); + void GenErrorMsg(int errTyp, const Symbol *sym); + int NewCondSet(const BitArray *s); + void GenCond(const BitArray *s, const Node *p); + void PutCaseLabels(const BitArray *s); + void GenCode(const Node *p, int indent, BitArray *isChecked); void GenTokens(); void GenTokensHeader(); void GenPragmas(); diff --git a/src/Sets.h b/src/Sets.h index 4acd050..c09f407 100644 --- a/src/Sets.h +++ b/src/Sets.h @@ -35,14 +35,14 @@ namespace Coco { class Sets { public: - static int First(BitArray *s) { + static int First(const BitArray *s) { int max = s->getCount(); for (int i=0; igetCount(); int n = 0; for (int i=0; igetCount(); for (int i=0; i b ? + static bool Includes(const BitArray *a, const BitArray *b) { // a > b ? int max = a->getCount(); for (int i=0; igetCount(); for (int i=0; iClone(); c->Not(); a->And(c); diff --git a/src/SortedList.cpp b/src/SortedList.cpp index b1673fb..0ad855c 100644 --- a/src/SortedList.cpp +++ b/src/SortedList.cpp @@ -32,11 +32,11 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -int Compare(Symbol *x, Symbol *y) { +int Compare(const Symbol *x, const Symbol *y) { return coco_string_compareto(x->name, y->name); } -SortedEntry::SortedEntry(Symbol* Key, void* Value) { +SortedEntry::SortedEntry(const Symbol* Key, const void* Value) { this->Key = Key; this->Value = Value; this->next = NULL; @@ -55,7 +55,7 @@ SortedList::~SortedList() { delete Data; } -bool SortedList::Find(Symbol* key) { +bool SortedList::Find(const Symbol* key) { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { if (!Compare(pSortedEntry->Key, key)) @@ -65,7 +65,7 @@ bool SortedList::Find(Symbol* key) { return false; } -void SortedList::Set(Symbol *key, void *value) { +void SortedList::Set(const Symbol *key, const void *value) { if (!Find(key)) { // new entry SortedEntry* pSortedEntry = Data; @@ -103,7 +103,7 @@ void SortedList::Set(Symbol *key, void *value) { } } -void* SortedList::Get( Symbol* key ) const // Value +const void* SortedList::Get( const Symbol* key ) const // Value { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { @@ -115,7 +115,7 @@ void* SortedList::Get( Symbol* key ) const // Value } -void* SortedList::GetKey( int index ) const // Key +const void* SortedList::GetKey( int index ) const // Key { if (0 <= index && index < Count) { SortedEntry* pSortedEntry = Data; diff --git a/src/SortedList.h b/src/SortedList.h index 5939675..2008d8f 100644 --- a/src/SortedList.h +++ b/src/SortedList.h @@ -36,11 +36,11 @@ class Symbol; class SortedEntry { public: - Symbol* Key; - void* Value; + const Symbol* Key; + const void* Value; SortedEntry* next; - SortedEntry(Symbol* Key, void* Value); + SortedEntry(const Symbol* Key, const void* Value); virtual ~SortedEntry(); }; @@ -50,14 +50,14 @@ class SortedList SortedList(); virtual ~SortedList(); - void Set(Symbol *key, void *value); - void* Get( Symbol* key ) const; // Value - void* GetKey( int index ) const ;// Key + void Set(const Symbol *key, const void *value); + const void* Get( const Symbol* key ) const; // Value + const void* GetKey( int index ) const ;// Key SortedEntry* operator[]( int index ) const; int Count; private: - bool Find(Symbol* key); + bool Find(const Symbol* key); SortedEntry *Data; diff --git a/src/Symbol.cpp b/src/Symbol.cpp index 37deeda..dfd05f4 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -32,10 +32,10 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -int Symbol::fixedToken = 0; -int Symbol::classToken = 1; -int Symbol::litToken = 2; -int Symbol::classLitToken = 3; +const int Symbol::fixedToken = 0; +const int Symbol::classToken = 1; +const int Symbol::litToken = 2; +const int Symbol::classLitToken = 3; Symbol::Symbol(int typ, const wchar_t* name, int line) { diff --git a/src/Symbol.h b/src/Symbol.h index 63cb8e8..01517ae 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -40,10 +40,10 @@ class BitArray; class Symbol { public: // token kinds - static int fixedToken; // e.g. 'a' ('b' | 'c') (structure of literals) - static int classToken; // e.g. digit {digit} (at least one char class) - static int litToken; // e.g. "while" - static int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ + static const int fixedToken; // e.g. 'a' ('b' | 'c') (structure of literals) + static const int classToken; // e.g. digit {digit} (at least one char class) + static const int litToken; // e.g. "while" + static const int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ int n; // symbol number int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ diff --git a/src/Tab.cpp b/src/Tab.cpp index e8b7076..0db9417 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -109,11 +109,11 @@ Symbol* Tab::FindSym(const wchar_t* name) { return NULL; } -int Tab::Num(Node *p) { +int Tab::Num(const Node *p) { if (p == NULL) return 0; else return p->n; } -void Tab::PrintSym(Symbol *sym) { +void Tab::PrintSym(const Symbol *sym) { wchar_t *paddedName = Name(sym->name); fwprintf(trace, L"%3d %14s %s", sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); @@ -161,7 +161,7 @@ void Tab::PrintSymbolTable() { fwprintf(trace, L"\n"); } -void Tab::PrintSet(BitArray *s, int indent) { +void Tab::PrintSet(const BitArray *s, int indent) { int col, len; col = indent; Symbol *sym; @@ -302,15 +302,15 @@ void Tab::SetContextTrans(Node *p) { // set transition code in the graph rooted //------------ graph deletability check ----------------- -bool Tab::DelGraph(Node* p) { +bool Tab::DelGraph(const Node* p) { return p == NULL || (DelNode(p) && DelGraph(p->next)); } -bool Tab::DelSubGraph(Node* p) { +bool Tab::DelSubGraph(const Node* p) { return p == NULL || (DelNode(p) && (p->up || DelSubGraph(p->next))); } -bool Tab::DelNode(Node* p) { +bool Tab::DelNode(const Node* p) { if (p->typ == Node::nt) { return p->sym->deletable; } @@ -325,7 +325,7 @@ bool Tab::DelNode(Node* p) { //----------------- graph printing ---------------------- -int Tab::Ptr(Node *p, bool up) { +int Tab::Ptr(const Node *p, bool up) { if (p == NULL) return 0; else if (up) return -(p->n); else return p->n; @@ -421,7 +421,7 @@ CharClass* Tab::FindCharClass(const wchar_t* name) { return NULL; } -CharClass* Tab::FindCharClass(CharSet *s) { +CharClass* Tab::FindCharClass(const CharSet *s) { CharClass *c; for (int i=0; ihead; r != NULL; r = r->next) { if (r->from < r->to) { @@ -484,7 +484,7 @@ void Tab::WriteCharClasses () { //--------------------------------------------------------------------- /* Computes the first set for the given Node. */ -BitArray* Tab::First0(Node *p, BitArray *mark) { +BitArray* Tab::First0(const Node *p, BitArray *mark) { BitArray *fs = new BitArray(terminals.Count); while (p != NULL && !((*mark)[p->n])) { mark->Set(p->n, true); @@ -523,7 +523,7 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { return fs; } -BitArray* Tab::First(Node *p) { +BitArray* Tab::First(const Node *p) { BitArray mark(nodes.Count); BitArray *fs = First0(p, &mark); if (ddt[3]) { @@ -613,9 +613,9 @@ void Tab::CompFollowSets() { } } -Node* Tab::LeadingAny(Node *p) { +const Node* Tab::LeadingAny(const Node *p) { if (p == NULL) return NULL; - Node *a = NULL; + const Node *a = NULL; if (p->typ == Node::any) a = p; else if (p->typ == Node::alt) { a = LeadingAny(p->sub); @@ -626,8 +626,8 @@ Node* Tab::LeadingAny(Node *p) { return a; } -void Tab::FindAS(Node *p) { // find ANY sets - Node *a; +void Tab::FindAS(const Node *p) { // find ANY sets + const Node *a; while (p != NULL) { if (p->typ == Node::opt || p->typ == Node::iter) { FindAS(p->sub); @@ -637,7 +637,7 @@ void Tab::FindAS(Node *p) { // find ANY sets delete ba; } else if (p->typ == Node::alt) { BitArray s1(terminals.Count); - Node *q = p; + const Node *q = p; while (q != NULL) { FindAS(q->sub); a = LeadingAny(q->sub); @@ -682,7 +682,7 @@ void Tab::CompAnySets() { } } -BitArray* Tab::Expected(Node *p, Symbol *curSy) { +BitArray* Tab::Expected(const Node *p, const Symbol *curSy) { BitArray *s = First(p); if (DelGraph(p)) s->Or(curSy->follow); @@ -690,7 +690,7 @@ BitArray* Tab::Expected(Node *p, Symbol *curSy) { } // does not look behind resolvers; only called during LL(1) test and in CheckRes -BitArray* Tab::Expected0(Node *p, Symbol *curSy) { +BitArray* Tab::Expected0(const Node *p, const Symbol *curSy) { if (p->typ == Node::rslv) return new BitArray(terminals.Count); else return Expected(p, curSy); } @@ -909,7 +909,7 @@ bool Tab::GrammarOk() { //--------------- check for circular productions ---------------------- -void Tab::GetSingles(Node *p, ArrayList *singles, Node *rule) { +void Tab::GetSingles(const Node *p, ArrayList *singles, const Node *rule) { if (p == NULL) return; // end of graph if (p->typ == Node::nt) { if (p->up || DelGraph(p->next) || p->sym->graph == rule) singles->Add(p->sym); @@ -972,7 +972,7 @@ bool Tab::NoCircularProductions() { //--------------- check for LL(1) errors ---------------------- -void Tab::LL1Error(int cond, Symbol *sym) { +void Tab::LL1Error(int cond, const Symbol *sym) { wprintf(L" LL1 warning in %ls: ", curSy->name); if (sym != NULL) wprintf(L"%ls is ", sym->name); switch (cond) { @@ -984,7 +984,7 @@ void Tab::LL1Error(int cond, Symbol *sym) { } -void Tab::CheckOverlap(BitArray *s1, BitArray *s2, int cond) { +void Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { Symbol *sym; for (int i=0; iWarning(p->line, p->pos->col, msg); } -void Tab::CheckRes(Node *p, bool rslvAllowed) { +void Tab::CheckRes(const Node *p, bool rslvAllowed) { BitArray expected(terminals.Count), soFar(terminals.Count); while (p != NULL) { - Node *q; + const Node *q; if (p->typ == Node::alt) { expected.SetAll(false); for (q = p; q != NULL; q = q->down) { @@ -1115,7 +1115,7 @@ bool Tab::NtsComplete() { //-------------- check if every nts can be reached ----------------- -void Tab::MarkReachedNts(Node *p) { +void Tab::MarkReachedNts(const Node *p) { while (p != NULL) { if (p->typ == Node::nt && !((*visited)[p->sym->n])) { // new nt reached visited->Set(p->sym->n, true); @@ -1148,7 +1148,7 @@ bool Tab::AllNtReached() { //--------- check if every nts can be derived to terminals ------------ -bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to terminals +bool Tab::IsTerm(const Node *p, const BitArray *mark) { // true if graph can be derived to terminals while (p != NULL) { if (p->typ == Node::nt && !((*mark)[p->sym->n])) return false; if (p->typ == Node::alt && !IsTerm(p->sub, mark) diff --git a/src/Tab.h b/src/Tab.h index 7af087a..178e3bc 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -100,10 +100,10 @@ class Tab { Symbol* NewSym(int typ, const wchar_t* name, int line); Symbol* FindSym(const wchar_t* name); - int Num(Node *p); - void PrintSym(Symbol *sym); + int Num(const Node *p); + void PrintSym(const Symbol *sym); void PrintSymbolTable(); - void PrintSet(BitArray *s, int indent); + void PrintSet(const BitArray *s, int indent); //--------------------------------------------------------------------- // Syntax graph management @@ -124,13 +124,13 @@ class Tab { //------------ graph deletability check ----------------- - bool DelGraph(Node* p); - bool DelSubGraph(Node* p); - bool DelNode(Node* p); + bool DelGraph(const Node* p); + bool DelSubGraph(const Node* p); + bool DelNode(const Node* p); //----------------- graph printing ---------------------- - int Ptr(Node *p, bool up); + int Ptr(const Node *p, bool up); wchar_t* Name(const wchar_t* name); void PrintNodes(); @@ -140,12 +140,12 @@ class Tab { CharClass* NewCharClass(const wchar_t* name, CharSet *s); CharClass* FindCharClass(const wchar_t* name); - CharClass* FindCharClass(CharSet *s); + CharClass* FindCharClass(const CharSet *s); CharSet* CharClassSet(int i); //----------- character class printing - void WriteCharSet(CharSet *s); + void WriteCharSet(const CharSet *s); void WriteCharClasses (); //--------------------------------------------------------------------- @@ -153,18 +153,18 @@ class Tab { //--------------------------------------------------------------------- /* Computes the first set for the given Node. */ - BitArray* First0(Node *p, BitArray *mark); - BitArray* First(Node *p); + BitArray* First0(const Node *p, BitArray *mark); + BitArray* First(const Node *p); void CompFirstSets(); void CompFollow(Node *p); void Complete(Symbol *sym); void CompFollowSets(); - Node* LeadingAny(Node *p); - void FindAS(Node *p); // find ANY sets + const Node* LeadingAny(const Node *p); + void FindAS(const Node *p); // find ANY sets void CompAnySets(); - BitArray* Expected(Node *p, Symbol *curSy); + BitArray* Expected(const Node *p, const Symbol *curSy); // does not look behind resolvers; only called during LL(1) test and in CheckRes - BitArray* Expected0(Node *p, Symbol *curSy); + BitArray* Expected0(const Node *p, const Symbol *curSy); void CompSync(Node *p); void CompSyncSets(); void SetupAnys(); @@ -190,27 +190,27 @@ class Tab { class CNode { // node of list for finding circular productions public: - Symbol *left, *right; + const Symbol *left, *right; - CNode (Symbol *l, Symbol *r) { + CNode (const Symbol *l, const Symbol *r) { left = l; right = r; } }; - void GetSingles(Node *p, ArrayList *singles, Node *rule); + void GetSingles(const Node *p, ArrayList *singles, const Node *rule); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- - void LL1Error(int cond, Symbol *sym); - void CheckOverlap(BitArray *s1, BitArray *s2, int cond); + void LL1Error(int cond, const Symbol *sym); + void CheckOverlap(const BitArray *s1, const BitArray *s2, int cond); void CheckAlts(Node *p); void CheckLL1(); //------------- check if resolvers are legal -------------------- - void ResErr(Node *p, const wchar_t* msg); - void CheckRes(Node *p, bool rslvAllowed); + void ResErr(const Node *p, const wchar_t* msg); + void CheckRes(const Node *p, bool rslvAllowed); void CheckResolvers(); //------------- check if every nts has a production -------------------- @@ -219,12 +219,12 @@ class Tab { //-------------- check if every nts can be reached ----------------- - void MarkReachedNts(Node *p); + void MarkReachedNts(const Node *p); bool AllNtReached(); //--------- check if every nts can be derived to terminals ------------ - bool IsTerm(Node *p, BitArray *mark); // true if graph can be derived to terminals + bool IsTerm(const Node *p, const BitArray *mark); // true if graph can be derived to terminals bool AllNtToTerm(); //--------------------------------------------------------------------- From 076d923480e769d54315d84b9dc64ed454dc6629 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 10:05:06 +0200 Subject: [PATCH 28/95] Replace recursive calls to 'Scanner::NextToken()' with iteration --- src/DFA.cpp | 5 +++-- src/Scanner.cpp | 10 +++++++--- src/Scanner.frame | 8 ++++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 0001217..5a5a5dc 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -818,7 +818,7 @@ void DFA::WriteScanner() { g.CopyFramePart(L"-->scan2"); if (firstComment != NULL) { - fwprintf(gen, L"\tif ("); + fwprintf(gen, L"\t\tif ("); com = firstComment; cmdIdx = 0; wchar_t_20 fmt; while (com != NULL) { @@ -829,8 +829,9 @@ void DFA::WriteScanner() { } com = com->next; cmdIdx++; } - fwprintf(gen, L") return NextToken();"); + fwprintf(gen, L") continue;"); } + g.CopyFramePart(L"-->scan22"); if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */ g.CopyFramePart(L"-->scan3"); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 9f3cc11..5bb283d 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -682,10 +682,14 @@ void Scanner::AppendVal(Token *t) { } Token* Scanner::NextToken() { - while (ch == ' ' || + while(true) { + while (ch == ' ' || (ch >= 9 && ch <= 10) || ch == 13 - ) NextCh(); - if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken(); + ) NextCh(); + if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = CreateToken(); diff --git a/src/Scanner.frame b/src/Scanner.frame index 3f2dde7..429ad2e 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -874,10 +874,14 @@ void Scanner::AppendVal(Token *t) { } Token* Scanner::NextToken() { - while (ch == ' ' || + while(true) { + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = CreateToken(); From 2c69a1d950bbe460e3c5b88df026a9311f86946c Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 11:52:09 +0200 Subject: [PATCH 29/95] Allow till 8 characters for multiline comment delimiters --- src/DFA.cpp | 102 +++++++++++++++++++++++++++------------------- src/DFA.h | 1 + src/Scanner.cpp | 18 ++++---- src/Scanner.frame | 6 +-- 4 files changed, 72 insertions(+), 55 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 5a5a5dc..49cfe7d 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -489,8 +489,8 @@ wchar_t* DFA::CommentStr(const Node *p) { else parser->SemErr(L"comment delimiters may not be structured"); p = p->next; } - if (s.GetLength() == 0 || s.GetLength() > 2) { - parser->SemErr(L"comment delimiters must be 1 or 2 characters long"); + if (s.GetLength() == 0 || s.GetLength() > 8) { + parser->SemErr(L"comment delimiters must be 1 or 8 characters long"); s = StringBuilder(L"?"); } return s.ToString(); @@ -505,46 +505,64 @@ void DFA::NewComment(const Node *from, const Node *to, bool nested) { //------------------------ scanner generation ---------------------- +void DFA::GenCommentIndented(int n, const wchar_t *s) { + for(int i= 1; i < n; ++i) fwprintf(gen, L"\t"); + fwprintf(gen, s); +} + void DFA::GenComBody(const Comment *com) { - fwprintf(gen, L"\t\tfor(;;) {\n"); + int imax = coco_string_length(com->start)-1; + int imaxStop = coco_string_length(com->stop)-1; + GenCommentIndented(imax, L"\t\tfor(;;) {\n"); - wchar_t_20 fmt; + wchar_t_20 fmt; wchar_t* res = DFAChCond(com->stop[0], fmt); - fwprintf(gen, L"\t\t\tif (%ls) ", res); - fwprintf(gen, L"{\n"); + GenCommentIndented(imax, L"\t\t\tif ("); + fwprintf(gen, L"%ls) {\n", res); - if (coco_string_length(com->stop) == 1) { + if (imaxStop == 0) { fwprintf(gen, L"\t\t\t\tlevel--;\n"); fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); fwprintf(gen, L"\t\t\t\tNextCh();\n"); } else { - fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = DFAChCond(com->stop[1], fmt); - fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res); - fwprintf(gen, L"\t\t\t\t\tlevel--;\n"); - fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); - fwprintf(gen, L"\t\t\t\t\tNextCh();\n"); - fwprintf(gen, L"\t\t\t\t}\n"); + int currIndent, indent = imax - 1; + for(int sidx = 1; sidx <= imaxStop; ++sidx) { + currIndent = indent + sidx; + GenCommentIndented(currIndent, L"\t\t\t\tNextCh();\n"); + GenCommentIndented(currIndent, L"\t\t\t\tif ("); + fwprintf(gen, L"%ls) {\n", DFAChCond(com->stop[sidx], fmt)); + } + currIndent = indent + imax; + GenCommentIndented(currIndent, L"\t\t\tlevel--;\n"); + GenCommentIndented(currIndent, L"\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n"); + GenCommentIndented(currIndent, L"\t\t\tNextCh();\n"); + for(int sidx = imaxStop; sidx > 0; --sidx) { + GenCommentIndented(indent + sidx, L"\t\t\t\t}\n"); + } } if (com->nested) { - fwprintf(gen, L"\t\t\t}"); - wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, L" else if (%ls) ", res); - fwprintf(gen, L"{\n"); - if (coco_string_length(com->stop) == 1) - fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n"); + GenCommentIndented(imax, L"\t\t\t}"); + wchar_t* res = DFAChCond(com->start[0], fmt); + fwprintf(gen, L" else if (%ls) {\n", res); + if (imaxStop == 0) + fwprintf(gen, L"\t\t\tlevel++; NextCh();\n"); else { - fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = DFAChCond(com->start[1], fmt); - fwprintf(gen, L"\t\t\t\tif (%ls) ", res); - fwprintf(gen, L"{\n"); - fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n"); - fwprintf(gen, L"\t\t\t\t}\n"); + int indent = imax - 1; + for(int sidx = 1; sidx <= imax; ++sidx) { + int loopIndent = indent + sidx; + GenCommentIndented(loopIndent, L"\t\t\t\tNextCh();\n"); + GenCommentIndented(loopIndent, L"\t\t\t\tif ("); + fwprintf(gen, L"%ls) {\n", DFAChCond(com->start[sidx], fmt)); + } + GenCommentIndented(indent + imax, L"\t\t\t\t\tlevel++; NextCh();\n"); + for(int sidx = imax; sidx > 0; --sidx) { + GenCommentIndented(indent + sidx, L"\t\t\t\t}\n"); + } } } - fwprintf(gen, L"\t\t\t} else if (ch == buffer->EoF) return false;\n"); - fwprintf(gen, L"\t\t\telse NextCh();\n"); - fwprintf(gen, L"\t\t}\n"); + GenCommentIndented(imax, L"\t\t\t} else if (ch == buffer->EoF) return false;\n"); + GenCommentIndented(imax, L"\t\t\telse NextCh();\n"); + GenCommentIndented(imax, L"\t\t}\n"); } void DFA::GenCommentHeader(const Comment *com, int i) { @@ -557,21 +575,21 @@ void DFA::GenComment(const Comment *com, int i) { fwprintf(gen, L"{\n"); fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n"); wchar_t_20 fmt; - if (coco_string_length(com->start) == 1) { - fwprintf(gen, L"\tNextCh();\n"); + fwprintf(gen, L"\tNextCh();\n"); + int imax = coco_string_length(com->start)-1; + if (imax == 0) { GenComBody(com); } else { - fwprintf(gen, L"\tNextCh();\n"); - wchar_t* res = DFAChCond(com->start[1], fmt); - fwprintf(gen, L"\tif (%ls) ", res); - fwprintf(gen, L"{\n"); - - fwprintf(gen, L"\t\tNextCh();\n"); - GenComBody(com); - - fwprintf(gen, L"\t} else {\n"); - fwprintf(gen, L"\t\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n"); - fwprintf(gen, L"\t}\n"); + for(int sidx = 1; sidx <= imax; ++sidx) { + GenCommentIndented(sidx, L"\tif ("); + fwprintf(gen, L"%ls) {\n", DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(sidx, L"\t\tNextCh();\n"); + } + GenComBody(com); + for(int sidx = imax; sidx > 0; --sidx) { + GenCommentIndented(sidx, L"\t}\n"); + } + fwprintf(gen, L"\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n"); fwprintf(gen, L"\treturn false;\n"); } fwprintf(gen, L"}\n"); diff --git a/src/DFA.h b/src/DFA.h index 3a5f38c..110278e 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -110,6 +110,7 @@ class DFA void NewComment(const Node *from, const Node *to, bool nested); //------------------------ scanner generation ---------------------- + void GenCommentIndented(int n, const wchar_t *s); void GenComBody(const Comment *com); void GenCommentHeader(const Comment *com, int i); void GenComment(const Comment *com, int i); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 5bb283d..d2ff7fc 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -600,9 +600,8 @@ bool Scanner::Comment0() { } else if (ch == buffer->EoF) return false; else NextCh(); } - } else { - buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -615,9 +614,9 @@ bool Scanner::Comment1() { if (ch == L'*') { NextCh(); if (ch == L'/') { - level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } - NextCh(); + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); } } else if (ch == L'/') { NextCh(); @@ -627,9 +626,8 @@ bool Scanner::Comment1() { } else if (ch == buffer->EoF) return false; else NextCh(); } - } else { - buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -683,11 +681,11 @@ void Scanner::AppendVal(Token *t) { Token* Scanner::NextToken() { while(true) { - while (ch == ' ' || + while (ch == ' ' || (ch >= 9 && ch <= 10) || ch == 13 - ) NextCh(); + ) NextCh(); if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) continue; - break; + break; } int recKind = noSym; diff --git a/src/Scanner.frame b/src/Scanner.frame index 429ad2e..7139e2e 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -875,11 +875,11 @@ void Scanner::AppendVal(Token *t) { Token* Scanner::NextToken() { while(true) { - while (ch == ' ' || + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 - break; + break; } -->scan22 int recKind = noSym; From e0a955fc3266ecd7e77e6ad402fd109887b40b73 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 13:01:01 +0200 Subject: [PATCH 30/95] Add a limited semantic action to TokenDecl to allow for example parsing custom strings like in Lua --- src/Coco.atg | 2 +- src/DFA.cpp | 19 ++++++++++-- src/DFA.h | 1 + src/Parser.cpp | 2 +- src/Scanner.cpp | 79 ++++++++++++++++++++++++----------------------- src/Scanner.frame | 25 ++++++++------- 6 files changed, 74 insertions(+), 54 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index f946d35..0069ad1 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -310,7 +310,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) + [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(L"Warning semantic action on token declarations require a custom Scanner"); .) //(. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) ] . diff --git a/src/DFA.cpp b/src/DFA.cpp index 49cfe7d..d6cd7af 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -686,6 +686,17 @@ void DFA::CheckLabels() { } } +/* TODO better interface for CopySourcePart */ +void DFA::CopySourcePart (const Position *pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser->pgen->buffer->GetPos(); // Pos is modified by CopySourcePart + FILE* prevGen = parser->pgen->gen; + parser->pgen->gen = gen; + parser->pgen->CopySourcePart(pos, 0); + parser->pgen->gen = prevGen; + parser->pgen->buffer->SetPos(oldPos); +} + void DFA::WriteState(const State *state) { Symbol *endOf = state->endOf; fwprintf(gen, L"\t\tcase %d:\n", state->nr); @@ -733,12 +744,14 @@ void DFA::WriteState(const State *state) { fwprintf(gen, L"t->kind = %d; ", endOf->n); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, true); break;}\n"); + fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); } else { - fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, false); break;}\n"); + fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); } } else { - fwprintf(gen, L"break;}\n"); + fwprintf(gen, L"loopState = false;"); + if(endOf->semPos && endOf->typ == Node::t) CopySourcePart(endOf->semPos, 0); + fwprintf(gen, L" break;}\n"); } } } diff --git a/src/DFA.h b/src/DFA.h index 110278e..a4a9646 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -119,6 +119,7 @@ class DFA void GenLiterals (); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); + void CopySourcePart (const Position *pos, int indent); void WriteState(const State *state); void WriteStartTab(); void OpenGen(const wchar_t* genName, bool backUp); /* pdt */ diff --git a/src/Parser.cpp b/src/Parser.cpp index 30294b6..dc15551 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -404,7 +404,7 @@ void Parser::TokenDecl(int typ) { } else SynErr(45); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); - if (typ != Node::pr) SemErr(L"semantic action not allowed here"); + if (typ == Node::t) errors.Warning(L"Warning semantic action on token declarations require a custom Scanner"); } #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index d2ff7fc..5c9008a 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -695,32 +695,34 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - switch (state) { - case -1: { t->kind = eofSym; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; break; - } // NextCh already done + bool loopState = true; + while(loopState) { + switch (state) { + case -1: { t->kind = eofSym; loopState = false; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; loopState = false; break; + } // NextCh already done case 1: case_1: recEnd = pos; recKind = 1; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} - else {t->kind = 1; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + else {t->kind = 1; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} case 2: case_2: recEnd = pos; recKind = 2; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;} - else {t->kind = 2; break;} + else {t->kind = 2; loopState = false; break;} case 3: case_3: - {t->kind = 3; break;} + {t->kind = 3; loopState = false; break;} case 4: case_4: - {t->kind = 4; break;} + {t->kind = 4; loopState = false; break;} case 5: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} @@ -740,17 +742,17 @@ Token* Scanner::NextToken() { else {goto case_0;} case 9: case_9: - {t->kind = 5; break;} + {t->kind = 5; loopState = false; break;} case 10: case_10: recEnd = pos; recKind = 43; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} - else {t->kind = 43; break;} + else {t->kind = 43; loopState = false; break;} case 11: case_11: recEnd = pos; recKind = 44; if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} - else {t->kind = 44; break;} + else {t->kind = 44; loopState = false; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} @@ -762,7 +764,7 @@ Token* Scanner::NextToken() { recEnd = pos; recKind = 43; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else {t->kind = 43; break;} + else {t->kind = 43; loopState = false; break;} case 14: case_14: if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} @@ -773,58 +775,59 @@ Token* Scanner::NextToken() { if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} else if (ch == L'=') {AddCh(); goto case_11;} - else {t->kind = 43; break;} + else {t->kind = 43; loopState = false; break;} case 16: - {t->kind = 18; break;} + {t->kind = 18; loopState = false; break;} case 17: - {t->kind = 21; break;} + {t->kind = 21; loopState = false; break;} case 18: - {t->kind = 22; break;} + {t->kind = 22; loopState = false; break;} case 19: case_19: - {t->kind = 23; break;} + {t->kind = 23; loopState = false; break;} case 20: - {t->kind = 26; break;} + {t->kind = 26; loopState = false; break;} case 21: case_21: - {t->kind = 27; break;} + {t->kind = 27; loopState = false; break;} case 22: case_22: - {t->kind = 28; break;} + {t->kind = 28; loopState = false; break;} case 23: - {t->kind = 29; break;} + {t->kind = 29; loopState = false; break;} case 24: - {t->kind = 32; break;} + {t->kind = 32; loopState = false; break;} case 25: - {t->kind = 33; break;} + {t->kind = 33; loopState = false; break;} case 26: - {t->kind = 34; break;} + {t->kind = 34; loopState = false; break;} case 27: - {t->kind = 35; break;} + {t->kind = 35; loopState = false; break;} case 28: - {t->kind = 36; break;} + {t->kind = 36; loopState = false; break;} case 29: case_29: - {t->kind = 40; break;} + {t->kind = 40; loopState = false; break;} case 30: case_30: - {t->kind = 41; break;} + {t->kind = 41; loopState = false; break;} case 31: recEnd = pos; recKind = 19; if (ch == L'.') {AddCh(); goto case_19;} else if (ch == L'>') {AddCh(); goto case_22;} else if (ch == L')') {AddCh(); goto case_30;} - else {t->kind = 19; break;} + else {t->kind = 19; loopState = false; break;} case 32: recEnd = pos; recKind = 25; if (ch == L'.') {AddCh(); goto case_21;} - else {t->kind = 25; break;} + else {t->kind = 25; loopState = false; break;} case 33: recEnd = pos; recKind = 31; if (ch == L'.') {AddCh(); goto case_29;} - else {t->kind = 31; break;} + else {t->kind = 31; loopState = false; break;} - } + } + } AppendVal(t); return t; } diff --git a/src/Scanner.frame b/src/Scanner.frame index 7139e2e..f0fb4c2 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -889,18 +889,21 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - switch (state) { - case -1: { t->kind = eofSym; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; break; - } // NextCh already done + bool loopState = true; + while(loopState) { + switch (state) { + case -1: { t->kind = eofSym; loopState = false; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; loopState = false; break; + } // NextCh already done -->scan3 - } + } + } AppendVal(t); return t; } From 42922ccaf9831be2da2b7fde26ca291cd7374154 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 13:45:19 +0200 Subject: [PATCH 31/95] Add column info to Node and Symbol to create better diagnostics, also add an option to ignore grammar errors for example rules not reachable. --- src/Coco.atg | 43 +++++++++++++++++++++++++------------------ src/Coco.cpp | 5 ++++- src/Node.cpp | 4 ++-- src/Node.h | 3 ++- src/Parser.cpp | 40 +++++++++++++++++++++++----------------- src/Parser.h | 3 ++- src/Symbol.cpp | 3 ++- src/Symbol.h | 3 ++- src/Tab.cpp | 31 ++++++++++++++++++++----------- src/Tab.h | 7 ++++--- 10 files changed, 86 insertions(+), 56 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 0069ad1..61634bd 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -47,7 +47,7 @@ COMPILER Coco DFA *dfa; ParserGen *pgen; - bool genScanner; + bool genScanner, ignoreGammarErrors; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens @@ -61,6 +61,7 @@ COMPILER Coco str = 1; tokenString = NULL; noString = coco_string_create(L"-none-"); + ignoreGammarErrors = false; } // Uncomment this method if cleanup is necessary, @@ -128,7 +129,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); if (sym != NULL) SemErr(L"name declared twice"); else { - sym = tab->NewSym(Node::t, t->val, t->line); + sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; }.) } ] /*from cocoxml*/ @@ -150,7 +151,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra .) { ident (. sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); + if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(L"name declared twice"); @@ -184,7 +185,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra if (sym->attrPos != NULL) SemErr(L"grammar symbol must not have attributes"); } - tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, L"???", 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); @@ -192,7 +193,13 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); - if (tab->GrammarOk()) { + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab->GrammarCheckAll(); + } + else doGenCode = tab->GrammarOk(); + if (doGenCode) { wprintf(L"parser"); pgen->WriteParser(); if (genScanner) { @@ -287,7 +294,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Sym (. sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); else { - sym = tab->NewSym(typ, name, t->line); + sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } coco_string_delete(name); @@ -349,16 +356,16 @@ Expression (. Graph *g2; .) Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) = -( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); .) +( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] Factor (. if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; .) { Factor (. tab->MakeSequence(g, g2); delete g2; .) } -| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) +| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) ) (. if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) . /*------------------------------------------------------------------------------------*/ @@ -375,9 +382,9 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0); // forward nt + sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line); + sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(L"undefined string in production"); @@ -392,7 +399,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; if (typ == Node::t) typ = Node::wt; else SemErr(L"only terminals may be weak"); } - Node *p = tab->NewNode(typ, sym, t->line); + Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); .) [ Attribs

(. if (kind != id) SemErr(L"a literal must not have attributes"); .) @@ -404,18 +411,18 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) -| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); +| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); p->pos = pos; g = new Graph(p); .) -| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys +| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0, 0); // p.set is set in tab->SetupAnys g = new Graph(p); .) -| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); +| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0, 0); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) . @@ -469,7 +476,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) SemErr(L"undefined name"); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; + Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -486,7 +493,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) | '[' TokenExpr ']' (. tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) | '{' TokenExpr '}' (. tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) . /*------------------------------------------------------------------------------------*/ diff --git a/src/Coco.cpp b/src/Coco.cpp index 2d2851b..bb232c6 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -66,7 +66,7 @@ int main(int argc, char *argv_[]) { wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; char *chTrFileName = NULL; - bool emitLines = false; + bool emitLines = false, ignoreGammarErrors = false; for (int i = 1; i < argc; i++) { if (coco_string_equal(argv[i], L"-namespace") && i < argc - 1) nsName = coco_string_create(argv[++i]); @@ -74,6 +74,7 @@ int main(int argc, char *argv_[]) { else if (coco_string_equal(argv[i], L"-trace") && i < argc - 1) ddtString = coco_string_create(argv[++i]); else if (coco_string_equal(argv[i], L"-o") && i < argc - 1) outDir = coco_string_create_append(argv[++i], L"/"); else if (coco_string_equal(argv[i], L"-lines")) emitLines = true; + else if (coco_string_equal(argv[i], L"-ignoreGammarErrors")) ignoreGammarErrors = true; else srcName = coco_string_create(argv[i]); } @@ -108,6 +109,7 @@ int main(int argc, char *argv_[]) { tab.frameDir = coco_string_create(frameDir); tab.outDir = coco_string_create(outDir != NULL ? outDir : srcDir); tab.emitLines = emitLines; + parser.ignoreGammarErrors = ignoreGammarErrors; if (ddtString != NULL) tab.SetDDT(ddtString); parser.tab = &tab; @@ -147,6 +149,7 @@ int main(int argc, char *argv_[]) { wprintf(L" -trace \n"); wprintf(L" -o \n"); wprintf(L" -lines\n"); + wprintf(L" -ignoreGammarErrors\n"); wprintf(L"Valid characters in the trace string:\n"); wprintf(L" A trace automaton\n"); wprintf(L" F list first/follow sets\n"); diff --git a/src/Node.cpp b/src/Node.cpp index 591576a..919fcc6 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -51,7 +51,7 @@ int Node::normalTrans = 0; // transition codes int Node::contextTrans = 1; -Node::Node(int typ, Symbol *sym, int line) { +Node::Node(int typ, Symbol *sym, int line, int col) { this->n = 0; this->next = NULL; this->down = NULL; @@ -63,7 +63,7 @@ Node::Node(int typ, Symbol *sym, int line) { this->pos = NULL; this->state = NULL; - this->typ = typ; this->sym = sym; this->line = line; + this->typ = typ; this->sym = sym; this->line = line; this->col = col; } Node::~Node() { diff --git a/src/Node.h b/src/Node.h index f97f4d3..4746f21 100644 --- a/src/Node.h +++ b/src/Node.h @@ -75,10 +75,11 @@ class Node { // sem: pos of semantic action in source text // rslv: pos of resolver in source text int line; // source text line number of item in this node + int col; // source text line column number of item in this node State *state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) - Node(int typ, Symbol *sym, int line); + Node(int typ, Symbol *sym, int line, int col); ~Node(); }; diff --git a/src/Parser.cpp b/src/Parser.cpp index dc15551..0a38356 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -170,7 +170,7 @@ void Parser::Coco() { sym = tab->FindSym(t->val); if (sym != NULL) SemErr(L"name declared twice"); else { - sym = tab->NewSym(Node::t, t->val, t->line); + sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } } @@ -250,7 +250,7 @@ void Parser::Coco() { #endif sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); + if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(L"name declared twice"); @@ -297,7 +297,7 @@ void Parser::Coco() { if (sym->attrPos != NULL) SemErr(L"grammar symbol must not have attributes"); } - tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, L"???", 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); @@ -305,7 +305,13 @@ void Parser::Coco() { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); - if (tab->GrammarOk()) { + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab->GrammarCheckAll(); + } + else doGenCode = tab->GrammarOk(); + if (doGenCode) { wprintf(L"parser"); pgen->WriteParser(); if (genScanner) { @@ -368,7 +374,7 @@ void Parser::TokenDecl(int typ) { sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); else { - sym = tab->NewSym(typ, name, t->line); + sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } coco_string_delete(name); @@ -698,7 +704,7 @@ void Parser::Term(Graph* &g) { #endif if (StartOf(17)) { if (la->kind == 38 /* "IF" */) { - rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); + rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); Resolver(rslv->pos); g = new Graph(rslv); } @@ -710,10 +716,10 @@ void Parser::Term(Graph* &g) { tab->MakeSequence(g, g2); delete g2; } } else if (StartOf(19)) { - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); } else SynErr(49); if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -762,9 +768,9 @@ void Parser::Factor(Graph* &g) { bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0); // forward nt + sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line); + sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(L"undefined string in production"); @@ -779,7 +785,7 @@ void Parser::Factor(Graph* &g) { if (typ == Node::t) typ = Node::wt; else SemErr(L"only terminals may be weak"); } - Node *p = tab->NewNode(typ, sym, t->line); + Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { @@ -833,7 +839,7 @@ void Parser::Factor(Graph* &g) { } case 40 /* "(." */: { SemText(pos); - Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); + Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); p->pos = pos; g = new Graph(p); @@ -844,7 +850,7 @@ void Parser::Factor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys + Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0, 0); // p.set is set in tab->SetupAnys g = new Graph(p); break; @@ -854,7 +860,7 @@ void Parser::Factor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); + Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0, 0); g = new Graph(p); break; @@ -862,7 +868,7 @@ void Parser::Factor(Graph* &g) { default: SynErr(50); break; } if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -993,7 +999,7 @@ void Parser::TokenFactor(Graph* &g) { SemErr(L"undefined name"); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; + Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -1040,7 +1046,7 @@ void Parser::TokenFactor(Graph* &g) { tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else SynErr(52); if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif diff --git a/src/Parser.h b/src/Parser.h index 5110271..dc06aab 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -139,7 +139,7 @@ int id; DFA *dfa; ParserGen *pgen; - bool genScanner; + bool genScanner, ignoreGammarErrors; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens @@ -153,6 +153,7 @@ int id; str = 1; tokenString = NULL; noString = coco_string_create(L"-none-"); + ignoreGammarErrors = false; } // Uncomment this method if cleanup is necessary, diff --git a/src/Symbol.cpp b/src/Symbol.cpp index dfd05f4..ce3b682 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -38,7 +38,7 @@ const int Symbol::litToken = 2; const int Symbol::classLitToken = 3; -Symbol::Symbol(int typ, const wchar_t* name, int line) { +Symbol::Symbol(int typ, const wchar_t* name, int line, int col) { n = 0; graph = NULL; tokenKind = 0; @@ -53,6 +53,7 @@ Symbol::Symbol(int typ, const wchar_t* name, int line) { this->typ = typ; this->name = coco_string_create(name); this->line = line; + this->col = col; } Symbol::~Symbol() { diff --git a/src/Symbol.h b/src/Symbol.h index 01517ae..e6b5e16 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -56,12 +56,13 @@ class Symbol { BitArray *follow; // nt: terminal followers BitArray *nts; // nt: nonterminals whose followers have to be added to this sym int line; // source text line number of item in this node + int col; // source text line column number of item in this node Position *attrPos; // nt: position of attributes in source text (or null) Position *semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) - Symbol(int typ, const wchar_t* name, int line); + Symbol(int typ, const wchar_t* name, int line, int col); virtual ~Symbol(); }; diff --git a/src/Tab.cpp b/src/Tab.cpp index 0db9417..8971c26 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -50,8 +50,8 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; errors = &parser->errors; - eofSy = NewSym(Node::t, L"EOF", 0); - dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); + eofSy = NewSym(Node::t, L"EOF", 0, 0); + dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); checkEOF = true; visited = allSyncSets = NULL; srcName = srcDir = nsName = frameDir = outDir = NULL; @@ -77,12 +77,12 @@ Tab::~Tab() { } -Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { +Symbol* Tab::NewSym(int typ, const wchar_t* name, int line, int col) { if (coco_string_length(name) == 2 && name[0] == '"') { parser->SemErr(L"empty token not allowed"); name = coco_string_create(L"???"); } - Symbol *sym = new Symbol(typ, name, line); + Symbol *sym = new Symbol(typ, name, line, col); if (typ == Node::t) { sym->n = terminals.Count; terminals.Add(sym); @@ -185,8 +185,8 @@ void Tab::PrintSet(const BitArray *s, int indent) { // Syntax graph management //--------------------------------------------------------------------- -Node* Tab::NewNode(int typ, Symbol *sym, int line) { - Node* node = new Node(typ, sym, line); +Node* Tab::NewNode(int typ, Symbol *sym, int line, int col) { + Node* node = new Node(typ, sym, line, col); node->n = nodes.Count; nodes.Add(node); return node; @@ -194,13 +194,13 @@ Node* Tab::NewNode(int typ, Symbol *sym, int line) { Node* Tab::NewNode(int typ, Node* sub) { - Node* node = NewNode(typ, (Symbol*)NULL, 0); + Node* node = NewNode(typ, (Symbol*)NULL, 0, 0); node->sub = sub; return node; } -Node* Tab::NewNode(int typ, int val, int line) { - Node* node = NewNode(typ, (Symbol*)NULL, line); +Node* Tab::NewNode(int typ, int val, int line, int col) { + Node* node = NewNode(typ, (Symbol*)NULL, line, col); node->val = val; return node; } @@ -266,7 +266,7 @@ void Tab::Finish(Graph *g) { void Tab::DeleteNodes() { for(int i=0; ir = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { - Node *p = NewNode(Node::chr, (int)s[i], 0); + Node *p = NewNode(Node::chr, (int)s[i], 0, 0); g->r->next = p; g->r = p; } g->l = dummyNode->next; dummyNode->next = NULL; @@ -906,6 +906,15 @@ bool Tab::GrammarOk() { return ok; } +bool Tab::GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) ++errors; + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; +} //--------------- check for circular productions ---------------------- diff --git a/src/Tab.h b/src/Tab.h index 178e3bc..6ea687a 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -98,7 +98,7 @@ class Tab { static const char* tKind[]; - Symbol* NewSym(int typ, const wchar_t* name, int line); + Symbol* NewSym(int typ, const wchar_t* name, int line, int col); Symbol* FindSym(const wchar_t* name); int Num(const Node *p); void PrintSym(const Symbol *sym); @@ -109,9 +109,9 @@ class Tab { // Syntax graph management //--------------------------------------------------------------------- - Node* NewNode(int typ, Symbol *sym, int line); + Node* NewNode(int typ, Symbol *sym, int line, int col); Node* NewNode(int typ, Node* sub); - Node* NewNode(int typ, int val, int line); + Node* NewNode(int typ, int val, int line, int col); void MakeFirstAlt(Graph *g); void MakeAlternative(Graph *g1, Graph *g2); void MakeSequence(Graph *g1, Graph *g2); @@ -185,6 +185,7 @@ class Tab { //--------------------------------------------------------------------- bool GrammarOk(); + bool GrammarCheckAll(); //--------------- check for circular productions ---------------------- From 6b258cacecd6874ee79df5e9c05fb228e1cda425 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 14:02:27 +0200 Subject: [PATCH 32/95] Small code reformat --- src/Coco.atg | 66 +++++++++++++++++++++++++------------------------- src/Parser.cpp | 24 +++++++++--------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 61634bd..1c94d95 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -249,20 +249,20 @@ SimSet (. int n1, n2; .) if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); .) | string (. - wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - wchar_t *name = tab->Unescape(subName2); - coco_string_delete(subName2); - wchar_t ch; - int len = coco_string_length(name); - for(int i=0; i < len; i++) { - ch = name[i]; - if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() - } - s->Set(ch); - } - coco_string_delete(name); - .) + wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + wchar_t *name = tab->Unescape(subName2); + coco_string_delete(subName2); + wchar_t ch; + int len = coco_string_length(name); + for(int i=0; i < len; i++) { + ch = name[i]; + if (dfa->ignoreCase) { + if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() + } + s->Set(ch); + } + coco_string_delete(name); + .) | Char (. s->Set(n1); .) [ ".." Char (. for (int i = n1; i <= n2; i++) s->Set(i); .) ] @@ -275,15 +275,15 @@ SimSet (. int n1, n2; .) Char = char (. n = 0; - wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - wchar_t* name = tab->Unescape(subName); - coco_string_delete(subName); - - // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ - if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(L"unacceptable character value"); - coco_string_delete(name); - if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; + wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + wchar_t* name = tab->Unescape(subName); + coco_string_delete(subName); + + // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ + if (coco_string_length(name) <= 1) n = name[0]; + else SemErr(L"unacceptable character value"); + coco_string_delete(name); + if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) . @@ -503,18 +503,18 @@ Sym ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. - wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - coco_string_delete(name); - name = coco_string_create_append(L"\"", subName); - coco_string_delete(subName); - coco_string_merge(name, L"\""); - .) + wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + coco_string_delete(name); + name = coco_string_create_append(L"\"", subName); + coco_string_delete(subName); + coco_string_merge(name, L"\""); + .) ) (. kind = str; if (dfa->ignoreCase) { - wchar_t *oldName = name; - name = coco_string_create_lower(name); - coco_string_delete(oldName); - } + wchar_t *oldName = name; + name = coco_string_create_lower(name); + coco_string_delete(oldName); + } if (coco_string_indexof(name, ' ') >= 0) SemErr(L"literal tokens must not contain blanks"); .) ) diff --git a/src/Parser.cpp b/src/Parser.cpp index 0a38356..e77f0bc 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -595,17 +595,17 @@ void Parser::SimSet(CharSet* &s) { wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); - wchar_t ch; - int len = coco_string_length(name); - for(int i=0; i < len; i++) { - ch = name[i]; - if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() - } - s->Set(ch); - } + wchar_t ch; + int len = coco_string_length(name); + for(int i=0; i < len; i++) { + ch = name[i]; + if (dfa->ignoreCase) { + if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() + } + s->Set(ch); + } coco_string_delete(name); - + } else if (la->kind == _char) { Char(n1); s->Set(n1); @@ -677,10 +677,10 @@ void Parser::Sym(wchar_t* &name, int &kind) { AstAddTerminal(); #endif wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - coco_string_delete(name); + coco_string_delete(name); name = coco_string_create_append(L"\"", subName); coco_string_delete(subName); - coco_string_merge(name, L"\""); + coco_string_merge(name, L"\""); } kind = str; From f2e7af539ca6c96bd45f42ebcbbdb382a8cb1466 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 14:54:32 +0200 Subject: [PATCH 33/95] Replace constants for node kinds by enum --- src/Node.cpp | 19 ------------------ src/Node.h | 57 +++++++++++++++++++++++++++------------------------- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/src/Node.cpp b/src/Node.cpp index 919fcc6..7688cf0 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -31,25 +31,6 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -// constants for node kinds -int Node::t = 1; // terminal symbol -int Node::pr = 2; // pragma -int Node::nt = 3; // nonterminal symbol -int Node::clas = 4; // character class -int Node::chr = 5; // character -int Node::wt = 6; // weak terminal symbol -int Node::any = 7; // -int Node::eps = 8; // empty -int Node::sync = 9; // synchronization symbol -int Node::sem = 10; // semantic action: (. .) -int Node::alt = 11; // alternative: | -int Node::iter = 12; // iteration: { } -int Node::opt = 13; // option: [ ] -int Node::rslv = 14; // resolver expr - -int Node::normalTrans = 0; // transition codes -int Node::contextTrans = 1; - Node::Node(int typ, Symbol *sym, int line, int col) { this->n = 0; diff --git a/src/Node.h b/src/Node.h index 4746f21..6367b3c 100644 --- a/src/Node.h +++ b/src/Node.h @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -42,23 +42,26 @@ class BitArray; class Node { public: // constants for node kinds - static int t; // terminal symbol - static int pr; // pragma - static int nt; // nonterminal symbol - static int clas; // character class - static int chr; // character - static int wt; // weak terminal symbol - static int any; // - static int eps; // empty - static int sync; // synchronization symbol - static int sem; // semantic action: (. .) - static int alt; // alternative: | - static int iter; // iteration: { } - static int opt; // option: [ ] - static int rslv; // resolver expr - - static int normalTrans; // transition codes - static int contextTrans; + enum { + t = 1, // terminal symbol + pr, // pragma + nt, // nonterminal symbol + clas, // character class + chr, // character + wt, // weak terminal symbol + any, // + eps, // empty + sync, // synchronization symbol + sem, // semantic action: (. .) + alt, // alternative: | + iter, // iteration: { } + opt, // option: [ ] + rslv, // resolver expr + }; + enum { + normalTrans, // transition codes + contextTrans, + }; int n; // node number int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv @@ -81,7 +84,7 @@ class Node { Node(int typ, Symbol *sym, int line, int col); ~Node(); -}; +}; }; // namespace From 3b4c868e28983e8a697dfdce555621d52b0936f4 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 14:56:21 +0200 Subject: [PATCH 34/95] Initial implementation of a kind of TreeView for LL1 errors --- src/Tab.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++++++---- src/Tab.h | 5 ++-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 8971c26..796ef4a 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -982,7 +982,7 @@ bool Tab::NoCircularProductions() { //--------------- check for LL(1) errors ---------------------- void Tab::LL1Error(int cond, const Symbol *sym) { - wprintf(L" LL1 warning in %ls: ", curSy->name); + wprintf(L" LL1 warning in %ls:%d:%d: ", curSy->name, curSy->line, curSy->col); if (sym != NULL) wprintf(L"%ls is ", sym->name); switch (cond) { case 1: wprintf(L"start of several alternatives\n"); break; @@ -993,17 +993,65 @@ void Tab::LL1Error(int cond, const Symbol *sym) { } -void Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { +int Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { + int overlaped = 0; Symbol *sym; for (int i=0; in] && (*s2)[sym->n]) { LL1Error(cond, sym); + ++overlaped; } } + return overlaped; +} + +/* print the path for first set that contains token tok for the graph rooted at p */ +void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { + while (p != NULL) { + //if(p->sym) wprintf(L"%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); + switch (p->typ) { + case Node::nt: { + if (p->sym->firstReady) { + if(p->sym->first->Get(tok)) { + if(coco_string_length(indent) == 1) + wprintf(L"%ls=> %ls:%d:%d:\n", indent, p->sym->name, p->line, p->col); + wprintf(L"%ls-> %ls:%d:%d:\n", indent, p->sym->name, p->sym->line, p->sym->col); + if(p->sym->graph) { + wchar_t *new_indent = coco_string_create_append(indent, L" "); + PrintFirstPath(p->sym->graph, tok, new_indent); + coco_string_delete(new_indent); + } + return; + } + } + break; + } + case Node::t: case Node::wt: { + if(p->sym->n == tok) + wprintf(L"%ls= %ls:%d:%d:\n", indent, p->sym->name, p->line, p->col); + break; + } + case Node::any: { + break; + } + case Node::alt: { + PrintFirstPath(p->sub, tok, indent); + PrintFirstPath(p->down, tok, indent); + break; + } + case Node::iter: case Node::opt: { + PrintFirstPath(p->sub, tok, indent); + break; + } + } + if (!DelNode(p)) break; + p = p->next; + } } -void Tab::CheckAlts(Node *p) { +int Tab::CheckAlts(Node *p) { + int rc = 0; BitArray s0(terminals.Count), *s1, *s2; while (p != NULL) { if (p->typ == Node::alt) { @@ -1011,7 +1059,18 @@ void Tab::CheckAlts(Node *p) { s0.SetAll(false); while (q != NULL) { // for all alternatives s2 = Expected0(q->sub, curSy); - CheckOverlap(&s0, s2, 1); + int overlaped = CheckOverlap(&s0, s2, 1); + if(overlaped > 0) { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i=0; in) && s2->Get(sym->n)) {overlapToken = sym->n; break;} + } + //print(format("\t-> %s:%d: %d", first_overlap.sub.sym.name, first_overlap.sub.sym.line, overlaped)); + PrintFirstPath( p, overlapToken); + rc += overlaped; + } s0.Or(s2); delete s2; CheckAlts(q->sub); @@ -1022,7 +1081,18 @@ void Tab::CheckAlts(Node *p) { else { s1 = Expected0(p->sub, curSy); s2 = Expected(p->next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if(overlaped > 0) { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i=0; iGet(sym->n) && s2->Get(sym->n)) {overlapToken = sym->n; break;} + } + //print(format("\t=>:%d: %d", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } delete s1; delete s2; } CheckAlts(p->sub); @@ -1033,6 +1103,7 @@ void Tab::CheckAlts(Node *p) { if (p->up) break; p = p->next; } + return rc; } void Tab::CheckLL1() { diff --git a/src/Tab.h b/src/Tab.h index 6ea687a..590e08f 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -204,8 +204,9 @@ class Tab { //--------------- check for LL(1) errors ---------------------- void LL1Error(int cond, const Symbol *sym); - void CheckOverlap(const BitArray *s1, const BitArray *s2, int cond); - void CheckAlts(Node *p); + int CheckOverlap(const BitArray *s1, const BitArray *s2, int cond); + void PrintFirstPath(const Node *p, int tok, const wchar_t *indent=L"\t"); + int CheckAlts(Node *p); void CheckLL1(); //------------- check if resolvers are legal -------------------- From a658cca2161b97a3e8a8310f55fc9ed897edaa80 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 20:17:21 +0200 Subject: [PATCH 35/95] Add the token names between comments in several places to make easier to read the generated code --- src/DFA.cpp | 4 +-- src/Parser.cpp | 38 ++++++++++++------------- src/ParserGen.cpp | 2 +- src/Scanner.cpp | 72 +++++++++++++++++++++++------------------------ 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index d6cd7af..abf7271 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -704,7 +704,7 @@ void DFA::WriteState(const State *state) { fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr); if (endOf != NULL && state->firstAction != NULL) { - fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d;\n", endOf->n); + fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d /* %ls */;\n", endOf->n, endOf->name); } bool ctxEnd = state->ctx; @@ -741,7 +741,7 @@ void DFA::WriteState(const State *state) { if (endOf == NULL) { fwprintf(gen, L"goto case_0;}\n"); } else { - fwprintf(gen, L"t->kind = %d; ", endOf->n); + fwprintf(gen, L"t->kind = %d /* %ls */; ", endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); diff --git a/src/Parser.cpp b/src/Parser.cpp index e77f0bc..2e5ebe9 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -125,7 +125,7 @@ void Parser::Coco() { Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create("Coco");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif int beg = la->pos; int line = la->line; - while (StartOf(1)) { + while (StartOf(1 /* any */)) { Get(); } if (la->pos != beg) { @@ -146,7 +146,7 @@ void Parser::Coco() { beg = la->pos; line = la->line; - while (StartOf(2)) { + while (StartOf(2 /* any */)) { Get(); } tab->semDeclPos = new Position(beg, la->pos, 0, line); @@ -380,7 +380,7 @@ void Parser::TokenDecl(int typ) { coco_string_delete(name); coco_string_delete(tokenString); - while (!(StartOf(5))) {SynErr(44); Get();} + while (!(StartOf(5 /* sync */))) {SynErr(44); Get();} if (la->kind == 18 /* "=" */) { Get(); #ifdef PARSER_WITH_AST @@ -403,7 +403,7 @@ void Parser::TokenDecl(int typ) { } delete g; - } else if (StartOf(6)) { + } else if (StartOf(6 /* sem */)) { if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); @@ -473,8 +473,8 @@ void Parser::AttrDecl(Symbol *sym) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); @@ -496,8 +496,8 @@ void Parser::AttrDecl(Symbol *sym) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); @@ -528,8 +528,8 @@ void Parser::SemText(Position* &pos) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = t->line; - while (StartOf(13)) { - if (StartOf(14)) { + while (StartOf(13 /* alt */)) { + if (StartOf(14 /* any */)) { Get(); } else if (la->kind == _badString) { Get(); @@ -702,7 +702,7 @@ void Parser::Term(Graph* &g) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, "Term", la->line); #endif - if (StartOf(17)) { + if (StartOf(17 /* opt */)) { if (la->kind == 38 /* "IF" */) { rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); Resolver(rslv->pos); @@ -711,11 +711,11 @@ void Parser::Term(Graph* &g) { Factor(g2); if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; - while (StartOf(18)) { + while (StartOf(18 /* nt */)) { Factor(g2); tab->MakeSequence(g, g2); delete g2; } - } else if (StartOf(19)) { + } else if (StartOf(19 /* sem */)) { g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); } else SynErr(49); if (g == NULL) // invalid start of Term @@ -885,8 +885,8 @@ void Parser::Attribs(Node *p) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); @@ -907,8 +907,8 @@ void Parser::Attribs(Node *p) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); @@ -933,7 +933,7 @@ void Parser::Condition() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, "Condition", la->line); #endif - while (StartOf(20)) { + while (StartOf(20 /* alt */)) { if (la->kind == 31 /* "(" */) { Get(); #ifdef PARSER_WITH_AST @@ -959,7 +959,7 @@ void Parser::TokenTerm(Graph* &g) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, "TokenTerm", la->line); #endif TokenFactor(g); - while (StartOf(8)) { + while (StartOf(8 /* nt */)) { TokenFactor(g2); tab->MakeSequence(g, g2); delete g2; } diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 5011ca1..a63b79f 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -168,7 +168,7 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { } } } else - fwprintf(gen, L"StartOf(%d)", NewCondSet(s)); + fwprintf(gen, L"StartOf(%d /* %s */)", NewCondSet(s), (tab->nTyp[p->typ])); } } diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 5c9008a..a1ea891 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -709,20 +709,20 @@ Token* Scanner::NextToken() { } // NextCh already done case 1: case_1: - recEnd = pos; recKind = 1; + recEnd = pos; recKind = 1 /* ident */; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} - else {t->kind = 1; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} case 2: case_2: - recEnd = pos; recKind = 2; + recEnd = pos; recKind = 2 /* number */; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;} - else {t->kind = 2; loopState = false; break;} + else {t->kind = 2 /* number */; loopState = false; break;} case 3: case_3: - {t->kind = 3; loopState = false; break;} + {t->kind = 3 /* string */; loopState = false; break;} case 4: case_4: - {t->kind = 4; loopState = false; break;} + {t->kind = 4 /* badString */; loopState = false; break;} case 5: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} @@ -742,17 +742,17 @@ Token* Scanner::NextToken() { else {goto case_0;} case 9: case_9: - {t->kind = 5; loopState = false; break;} + {t->kind = 5 /* char */; loopState = false; break;} case 10: case_10: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 43 /* ddtSym */; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} - else {t->kind = 43; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 11: case_11: - recEnd = pos; recKind = 44; + recEnd = pos; recKind = 44 /* optionSym */; if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} - else {t->kind = 44; loopState = false; break;} + else {t->kind = 44 /* optionSym */; loopState = false; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} @@ -761,70 +761,70 @@ Token* Scanner::NextToken() { else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 43 /* ddtSym */; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else {t->kind = 43; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 14: case_14: if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 43 /* ddtSym */; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} else if (ch == L'=') {AddCh(); goto case_11;} - else {t->kind = 43; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 16: - {t->kind = 18; loopState = false; break;} + {t->kind = 18 /* "=" */; loopState = false; break;} case 17: - {t->kind = 21; loopState = false; break;} + {t->kind = 21 /* "+" */; loopState = false; break;} case 18: - {t->kind = 22; loopState = false; break;} + {t->kind = 22 /* "-" */; loopState = false; break;} case 19: case_19: - {t->kind = 23; loopState = false; break;} + {t->kind = 23 /* ".." */; loopState = false; break;} case 20: - {t->kind = 26; loopState = false; break;} + {t->kind = 26 /* ">" */; loopState = false; break;} case 21: case_21: - {t->kind = 27; loopState = false; break;} + {t->kind = 27 /* "<." */; loopState = false; break;} case 22: case_22: - {t->kind = 28; loopState = false; break;} + {t->kind = 28 /* ".>" */; loopState = false; break;} case 23: - {t->kind = 29; loopState = false; break;} + {t->kind = 29 /* "|" */; loopState = false; break;} case 24: - {t->kind = 32; loopState = false; break;} + {t->kind = 32 /* ")" */; loopState = false; break;} case 25: - {t->kind = 33; loopState = false; break;} + {t->kind = 33 /* "[" */; loopState = false; break;} case 26: - {t->kind = 34; loopState = false; break;} + {t->kind = 34 /* "]" */; loopState = false; break;} case 27: - {t->kind = 35; loopState = false; break;} + {t->kind = 35 /* "{" */; loopState = false; break;} case 28: - {t->kind = 36; loopState = false; break;} + {t->kind = 36 /* "}" */; loopState = false; break;} case 29: case_29: - {t->kind = 40; loopState = false; break;} + {t->kind = 40 /* "(." */; loopState = false; break;} case 30: case_30: - {t->kind = 41; loopState = false; break;} + {t->kind = 41 /* ".)" */; loopState = false; break;} case 31: - recEnd = pos; recKind = 19; + recEnd = pos; recKind = 19 /* "." */; if (ch == L'.') {AddCh(); goto case_19;} else if (ch == L'>') {AddCh(); goto case_22;} else if (ch == L')') {AddCh(); goto case_30;} - else {t->kind = 19; loopState = false; break;} + else {t->kind = 19 /* "." */; loopState = false; break;} case 32: - recEnd = pos; recKind = 25; + recEnd = pos; recKind = 25 /* "<" */; if (ch == L'.') {AddCh(); goto case_21;} - else {t->kind = 25; loopState = false; break;} + else {t->kind = 25 /* "<" */; loopState = false; break;} case 33: - recEnd = pos; recKind = 31; + recEnd = pos; recKind = 31 /* "(" */; if (ch == L'.') {AddCh(); goto case_29;} - else {t->kind = 31; loopState = false; break;} + else {t->kind = 31 /* "(" */; loopState = false; break;} } } From d13715ba212733407e2e23fd5296806bc4eb2d4a Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 4 Jun 2021 23:11:21 +0200 Subject: [PATCH 36/95] Start the refactoring to allow compile with and without wchar_t --- src/Action.cpp | 20 +-- src/Coco.atg | 76 ++++++------ src/Coco.cpp | 60 ++++----- src/DFA.cpp | 276 +++++++++++++++++++++--------------------- src/Generator.cpp | 26 ++-- src/Parser.cpp | 208 +++++++++++++++---------------- src/Parser.frame | 28 ++--- src/Parser.h | 2 +- src/ParserGen.cpp | 232 +++++++++++++++++------------------ src/Scanner.cpp | 118 +++++++++--------- src/Scanner.frame | 65 +++++++--- src/Scanner.h | 37 +++++- src/StringBuilder.cpp | 2 +- src/Tab.cpp | 266 ++++++++++++++++++++-------------------- 14 files changed, 743 insertions(+), 673 deletions(-) diff --git a/src/Action.cpp b/src/Action.cpp index 9c44edd..b2b7806 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -80,7 +80,7 @@ bool Action::ShiftWith(CharSet *s, Tab *tab) { //return true if it used the Char } else { CharClass *c = tab->FindCharClass(s); if (c == NULL) { - c = tab->NewCharClass(L"#", s); // class with dummy name + c = tab->NewCharClass(STRL("#"), s); // class with dummy name rc = true; } typ = Node::clas; sym = c->n; diff --git a/src/Coco.atg b/src/Coco.atg index 1c94d95..6e2b6c0 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -60,7 +60,7 @@ COMPILER Coco id = 0; str = 1; tokenString = NULL; - noString = coco_string_create(L"-none-"); + noString = coco_string_create(STRL("-none-")); ignoreGammarErrors = false; } @@ -127,7 +127,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(STRL("name declared twice")); else { sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -154,8 +154,8 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(L"name declared twice"); - } else SemErr(L"this symbol kind not allowed on left side of production"); + if (sym->graph != NULL) SemErr(STRL("name declared twice")); + } else SemErr(STRL("this symbol kind not allowed on left side of production")); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); @@ -163,7 +163,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra .) [ AttrDecl ] (. if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(STRL("attribute mismatch between declaration and use of this symbol")); .) [ SemText<.sym->semPos.> ] WEAK '=' @@ -175,22 +175,22 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra '.' } "END" ident (. if (!coco_string_equal(gramName, t->val)) - SemErr(L"name does not match grammar name"); + SemErr(STRL("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(L"missing production for grammar name"); + SemErr(STRL("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(L"grammar symbol must not have attributes"); + SemErr(STRL("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, L"???", 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, STRL("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors.count == 0) { - wprintf(L"checking\n"); + wprintf(STRL("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); bool doGenCode = false; @@ -200,14 +200,14 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra } else doGenCode = tab->GrammarOk(); if (doGenCode) { - wprintf(L"parser"); + wprintf(STRL("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(L" + scanner"); + wprintf(STRL(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(L" generated\n"); + wprintf(STRL(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } @@ -222,9 +222,9 @@ SetDecl (. CharSet *s; .) = ident (. wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(L"name declared twice"); + if (c != NULL) SemErr(STRL("name declared twice")); .) - '=' Set (. if (s->Elements() == 0) SemErr(L"character set must not be empty"); + '=' Set (. if (s->Elements() == 0) SemErr(STRL("character set must not be empty")); tab->NewCharClass(name, s); coco_string_delete(name); .) @@ -246,7 +246,7 @@ Set (. CharSet *s2; .) SimSet (. int n1, n2; .) = (. s = new CharSet(); .) ( ident (. CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); + if (c == NULL) SemErr(STRL("undefined name")); else s->Or(c->set); .) | string (. wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); @@ -257,7 +257,7 @@ SimSet (. int n1, n2; .) for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() + if ((CHL('A') <= ch) && (ch <= CHL('Z'))) ch = ch - (CHL('A') - CHL('a')); // ch.ToLower() } s->Set(ch); } @@ -281,7 +281,7 @@ Char // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(L"unacceptable character value"); + else SemErr(STRL("unacceptable character value")); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) @@ -292,7 +292,7 @@ Char TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) = Sym (. sym = tab->FindSym(name); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(STRL("name declared twice")); else { sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -301,13 +301,13 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; coco_string_delete(tokenString); .) SYNC - ( '=' TokenExpr '.' (. if (kind == str) SemErr(L"a literal must not be declared with a structure"); + ( '=' TokenExpr '.' (. if (kind == str) SemErr(STRL("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if (tab->literals[tokenString] != NULL) - SemErr(L"token string declared twice"); + SemErr(STRL("token string declared twice")); tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } @@ -317,7 +317,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(L"Warning semantic action on token declarations require a custom Scanner"); .) //(. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) + [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(STRL("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(STRL("semantic action not allowed here")); .) ] . @@ -327,13 +327,13 @@ AttrDecl = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(STRL("bad string in attributes")); .) } '>' (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(STRL("bad string in attributes")); .) } ".>" (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) @@ -387,26 +387,26 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(L"undefined string in production"); + SemErr(STRL("undefined string in production")); sym = tab->eofSy; // dummy } } coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) - SemErr(L"this symbol kind is not allowed in a production"); + SemErr(STRL("this symbol kind is not allowed in a production")); if (weak) { if (typ == Node::t) typ = Node::wt; - else SemErr(L"only terminals may be weak"); + else SemErr(STRL("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); .) - [ Attribs

(. if (kind != id) SemErr(L"a literal must not have attributes"); .) + [ Attribs

(. if (kind != id) SemErr(STRL("a literal must not have attributes")); .) ] (. if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(STRL("attribute mismatch between declaration and use of this symbol")); .) | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) @@ -473,7 +473,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) ( Sym (. if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(L"undefined name"); + SemErr(STRL("undefined name")); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; @@ -499,15 +499,15 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) /*------------------------------------------------------------------------------------*/ Sym -= (. name = coco_string_create(L"???"); kind = id; .) += (. name = coco_string_create(STRL("???")); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); - name = coco_string_create_append(L"\"", subName); + name = coco_string_create_append(STRL("\""), subName); coco_string_delete(subName); - coco_string_merge(name, L"\""); + coco_string_merge(name, STRL("\"")); .) ) (. kind = str; if (dfa->ignoreCase) { @@ -516,7 +516,7 @@ Sym coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) - SemErr(L"literal tokens must not contain blanks"); .) + SemErr(STRL("literal tokens must not contain blanks")); .) ) . @@ -526,12 +526,12 @@ Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(STRL("bad string in attributes")); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(STRL("bad string in attributes")); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) . @@ -542,8 +542,8 @@ SemText = "(." (. int beg = la->pos; int col = la->col; int line = t->line; .) { ANY - | badString (. SemErr(L"bad string in semantic action"); .) - | "(." (. SemErr(L"missing end of previous semantic action"); .) + | badString (. SemErr(STRL("bad string in semantic action")); .) + | "(." (. SemErr(STRL("missing end of previous semantic action")); .) } ".)" (. pos = new Position(beg, t->pos, col, line); .) . diff --git a/src/Coco.cpp b/src/Coco.cpp index bb232c6..94a25af 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -61,7 +61,7 @@ int main(int argc, char *argv_[]) { #error unknown compiler! #endif - wprintf(L"Coco/R (Dec 01, 2018)\n"); + wprintf(STRL("Coco/R (Dec 01, 2018)\n")); wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; @@ -69,12 +69,12 @@ int main(int argc, char *argv_[]) { bool emitLines = false, ignoreGammarErrors = false; for (int i = 1; i < argc; i++) { - if (coco_string_equal(argv[i], L"-namespace") && i < argc - 1) nsName = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-frames") && i < argc - 1) frameDir = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-trace") && i < argc - 1) ddtString = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-o") && i < argc - 1) outDir = coco_string_create_append(argv[++i], L"/"); - else if (coco_string_equal(argv[i], L"-lines")) emitLines = true; - else if (coco_string_equal(argv[i], L"-ignoreGammarErrors")) ignoreGammarErrors = true; + if (coco_string_equal(argv[i], STRL("-namespace")) && i < argc - 1) nsName = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], STRL("-frames")) && i < argc - 1) frameDir = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], STRL("-trace")) && i < argc - 1) ddtString = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], STRL("-o")) && i < argc - 1) outDir = coco_string_create_append(argv[++i], STRL("/")); + else if (coco_string_equal(argv[i], STRL("-lines"))) emitLines = true; + else if (coco_string_equal(argv[i], STRL("-ignoreGammarErrors"))) ignoreGammarErrors = true; else srcName = coco_string_create(argv[i]); } @@ -94,11 +94,11 @@ int main(int argc, char *argv_[]) { Coco::Scanner scanner(file); Coco::Parser parser(&scanner); - traceFileName = coco_string_create_append(srcDir, L"trace.txt"); + traceFileName = coco_string_create_append(srcDir, STRL("trace.txt")); chTrFileName = coco_string_create_char(traceFileName); if ((parser.trace = fopen(chTrFileName, "w")) == NULL) { - wprintf(L"-- could not open %hs\n", chTrFileName); + wprintf(STRL("-- could not open %hs\n"), chTrFileName); exit(1); } @@ -130,37 +130,37 @@ int main(int argc, char *argv_[]) { if (fileSize == 0) { remove(chTrFileName); } else { - wprintf(L"trace output is in %hs\n", chTrFileName); + wprintf(STRL("trace output is in %hs\n"), chTrFileName); } coco_string_delete(file); coco_string_delete(srcDir); - wprintf(L"%d errors detected\n", parser.errors.count); + wprintf(STRL("%d errors detected\n"), parser.errors.count); if (parser.errors.count != 0) { exit(1); } } else { - wprintf(L"Usage: Coco Grammar.ATG {Option}\n"); - wprintf(L"Options:\n"); - wprintf(L" -namespace \n"); - wprintf(L" -frames \n"); - wprintf(L" -trace \n"); - wprintf(L" -o \n"); - wprintf(L" -lines\n"); - wprintf(L" -ignoreGammarErrors\n"); - wprintf(L"Valid characters in the trace string:\n"); - wprintf(L" A trace automaton\n"); - wprintf(L" F list first/follow sets\n"); - wprintf(L" G print syntax graph\n"); - wprintf(L" I trace computation of first sets\n"); - wprintf(L" J list ANY and SYNC sets\n"); - wprintf(L" P print statistics\n"); - wprintf(L" S list symbol table\n"); - wprintf(L" X list cross reference table\n"); - wprintf(L"Scanner.frame and Parser.frame files needed in ATG directory\n"); - wprintf(L"or in a directory specified in the -frames option.\n"); + wprintf(STRL("Usage: Coco Grammar.ATG {Option}\n")); + wprintf(STRL("Options:\n")); + wprintf(STRL(" -namespace \n")); + wprintf(STRL(" -frames \n")); + wprintf(STRL(" -trace \n")); + wprintf(STRL(" -o \n")); + wprintf(STRL(" -lines\n")); + wprintf(STRL(" -ignoreGammarErrors\n")); + wprintf(STRL("Valid characters in the trace string:\n")); + wprintf(STRL(" A trace automaton\n")); + wprintf(STRL(" F list first/follow sets\n")); + wprintf(STRL(" G print syntax graph\n")); + wprintf(STRL(" I trace computation of first sets\n")); + wprintf(STRL(" J list ANY and SYNC sets\n")); + wprintf(STRL(" P print statistics\n")); + wprintf(STRL(" S list symbol table\n")); + wprintf(STRL(" X list cross reference table\n")); + wprintf(STRL("Scanner.frame and Parser.frame files needed in ATG directory\n")); + wprintf(STRL("or in a directory specified in the -frames option.\n")); } coco_string_delete(srcName); diff --git a/src/DFA.cpp b/src/DFA.cpp index abf7271..c87a919 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -43,17 +43,17 @@ typedef wchar_t wchar_t_20[20]; //---------- Output primitives static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { - if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') - coco_swprintf(format, 10, L"%d\0", (int) ch); + if (ch < CHL(' ') || ch >= 127 || ch == CHL('\'') || ch == CHL('\\')) + coco_swprintf(format, 10, STRL("%d\0"), (int) ch); else - coco_swprintf(format, 10, L"L'%lc'\0", (int) ch); + coco_swprintf(format, 10, STRL("CHL('%lc')\0"), (int) ch); return format; } static wchar_t* DFAChCond(wchar_t ch, wchar_t_20 &format) { wchar_t_10 fmt; wchar_t* res = DFACh(ch, fmt); - coco_swprintf(format, 20, L"ch == %ls\0", res); + coco_swprintf(format, 20, STRL("ch == %ls\0"), res); return format; } @@ -62,16 +62,16 @@ void DFA::PutRange(CharSet *s) { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { wchar_t *from = DFACh((wchar_t) r->from, fmt1); - fwprintf(gen, L"ch == %ls", from); + fwprintf(gen, STRL("ch == %ls"), from); } else if (r->from == 0) { wchar_t *to = DFACh((wchar_t) r->to, fmt1); - fwprintf(gen, L"ch <= %ls", to); + fwprintf(gen, STRL("ch <= %ls"), to); } else { wchar_t *from = DFACh((wchar_t) r->from, fmt1); wchar_t *to = DFACh((wchar_t) r->to, fmt2); - fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to); + fwprintf(gen, STRL("(ch >= %ls && ch <= %ls)"), from, to); } - if (r->next != NULL) fwprintf(gen, L" || "); + if (r->next != NULL) fwprintf(gen, STRL(" || ")); } } @@ -161,7 +161,7 @@ void DFA::Step(State *from, const Node *p, BitArray *stepped) { Step(from, p->sub, stepped); Step(from, p->down, stepped); } else if (p->typ == Node::iter) { if (tab->DelSubGraph(p->sub)) { - parser->SemErr(L"contents of {...} must not be deletable"); + parser->SemErr(STRL("contents of {...} must not be deletable")); return; } if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); @@ -227,7 +227,7 @@ void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { void DFA::ConvertToStates(Node *p, Symbol *sym) { curGraph = p; curSy = sym; if (tab->DelGraph(curGraph)) { - parser->SemErr(L"token might be empty"); + parser->SemErr(STRL("token might be empty")); return; } NumberNodes(curGraph, firstState, true); @@ -269,7 +269,7 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off wchar_t format[200]; - coco_swprintf(format, 200, L"tokens %ls and %ls cannot be distinguished", sym->name, matchedSym->name); + coco_swprintf(format, 200, STRL("tokens %ls and %ls cannot be distinguished"), sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; @@ -373,31 +373,31 @@ void DFA::MakeDeterministic() { } void DFA::PrintStates() { - fwprintf(trace, L"\n"); - fwprintf(trace, L"---------- states ----------\n"); + fwprintf(trace, STRL("\n")); + fwprintf(trace, STRL("---------- states ----------\n")); wchar_t_10 fmt; for (State *state = firstState; state != NULL; state = state->next) { bool first = true; - if (state->endOf == NULL) fwprintf(trace, L" "); + if (state->endOf == NULL) fwprintf(trace, STRL(" ")); else { wchar_t *paddedName = tab->Name(state->endOf->name); - fwprintf(trace, L"E(%12s)", paddedName); + fwprintf(trace, STRL("E(%12s)"), paddedName); coco_string_delete(paddedName); } - fwprintf(trace, L"%3d:", state->nr); - if (state->firstAction == NULL) fwprintf(trace, L"\n"); + fwprintf(trace, STRL("%3d:"), state->nr); + if (state->firstAction == NULL) fwprintf(trace, STRL("\n")); for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); + if (first) {fwprintf(trace, STRL(" ")); first = false;} else fwprintf(trace, STRL(" ")); - if (action->typ == Node::clas) fwprintf(trace, L"%ls", tab->classes[action->sym]->name); - else fwprintf(trace, L"%3s", DFACh((wchar_t)action->sym, fmt)); + if (action->typ == Node::clas) fwprintf(trace, STRL("%ls"), tab->classes[action->sym]->name); + else fwprintf(trace, STRL("%3s"), DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { - fwprintf(trace, L"%3d", targ->state->nr); + fwprintf(trace, STRL("%3d"), targ->state->nr); } - if (action->tc == Node::contextTrans) fwprintf(trace, L" context\n"); else fwprintf(trace, L"\n"); + if (action->tc == Node::contextTrans) fwprintf(trace, STRL(" context\n")); else fwprintf(trace, STRL("\n")); } } - fwprintf(trace, L"\n---------- character classes ----------\n"); + fwprintf(trace, STRL("\n---------- character classes ----------\n")); tab->WriteCharClasses(); } @@ -427,7 +427,7 @@ void DFA::GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, b endOf = t->state->endOf; } else { - wprintf(L"Tokens %ls and %ls cannot be distinguished\n", endOf->name, t->state->endOf->name); + wprintf(STRL("Tokens %ls and %ls cannot be distinguished\n"), endOf->name, t->state->endOf->name); errors->count++; } } @@ -483,15 +483,15 @@ wchar_t* DFA::CommentStr(const Node *p) { s.Append((wchar_t)p->val); } else if (p->typ == Node::clas) { CharSet *set = tab->CharClassSet(p->val); - if (set->Elements() != 1) parser->SemErr(L"character set contains more than 1 character"); + if (set->Elements() != 1) parser->SemErr(STRL("character set contains more than 1 character")); s.Append((wchar_t) set->First()); } - else parser->SemErr(L"comment delimiters may not be structured"); + else parser->SemErr(STRL("comment delimiters may not be structured")); p = p->next; } if (s.GetLength() == 0 || s.GetLength() > 8) { - parser->SemErr(L"comment delimiters must be 1 or 8 characters long"); - s = StringBuilder(L"?"); + parser->SemErr(STRL("comment delimiters must be 1 or 8 characters long")); + s = StringBuilder(STRL("?")); } return s.ToString(); } @@ -506,93 +506,93 @@ void DFA::NewComment(const Node *from, const Node *to, bool nested) { //------------------------ scanner generation ---------------------- void DFA::GenCommentIndented(int n, const wchar_t *s) { - for(int i= 1; i < n; ++i) fwprintf(gen, L"\t"); + for(int i= 1; i < n; ++i) fwprintf(gen, STRL("\t")); fwprintf(gen, s); } void DFA::GenComBody(const Comment *com) { int imax = coco_string_length(com->start)-1; int imaxStop = coco_string_length(com->stop)-1; - GenCommentIndented(imax, L"\t\tfor(;;) {\n"); + GenCommentIndented(imax, STRL("\t\tfor(;;) {\n")); wchar_t_20 fmt; wchar_t* res = DFAChCond(com->stop[0], fmt); - GenCommentIndented(imax, L"\t\t\tif ("); - fwprintf(gen, L"%ls) {\n", res); + GenCommentIndented(imax, STRL("\t\t\tif (")); + fwprintf(gen, STRL("%ls) {\n"), res); if (imaxStop == 0) { - fwprintf(gen, L"\t\t\t\tlevel--;\n"); - fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); - fwprintf(gen, L"\t\t\t\tNextCh();\n"); + fwprintf(gen, STRL("\t\t\t\tlevel--;\n")); + fwprintf(gen, STRL("\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n")); + fwprintf(gen, STRL("\t\t\t\tNextCh();\n")); } else { int currIndent, indent = imax - 1; for(int sidx = 1; sidx <= imaxStop; ++sidx) { currIndent = indent + sidx; - GenCommentIndented(currIndent, L"\t\t\t\tNextCh();\n"); - GenCommentIndented(currIndent, L"\t\t\t\tif ("); - fwprintf(gen, L"%ls) {\n", DFAChCond(com->stop[sidx], fmt)); + GenCommentIndented(currIndent, STRL("\t\t\t\tNextCh();\n")); + GenCommentIndented(currIndent, STRL("\t\t\t\tif (")); + fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->stop[sidx], fmt)); } currIndent = indent + imax; - GenCommentIndented(currIndent, L"\t\t\tlevel--;\n"); - GenCommentIndented(currIndent, L"\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n"); - GenCommentIndented(currIndent, L"\t\t\tNextCh();\n"); + GenCommentIndented(currIndent, STRL("\t\t\tlevel--;\n")); + GenCommentIndented(currIndent, STRL("\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n")); + GenCommentIndented(currIndent, STRL("\t\t\tNextCh();\n")); for(int sidx = imaxStop; sidx > 0; --sidx) { - GenCommentIndented(indent + sidx, L"\t\t\t\t}\n"); + GenCommentIndented(indent + sidx, STRL("\t\t\t\t}\n")); } } if (com->nested) { - GenCommentIndented(imax, L"\t\t\t}"); + GenCommentIndented(imax, STRL("\t\t\t}")); wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, L" else if (%ls) {\n", res); + fwprintf(gen, STRL(" else if (%ls) {\n"), res); if (imaxStop == 0) - fwprintf(gen, L"\t\t\tlevel++; NextCh();\n"); + fwprintf(gen, STRL("\t\t\tlevel++; NextCh();\n")); else { int indent = imax - 1; for(int sidx = 1; sidx <= imax; ++sidx) { int loopIndent = indent + sidx; - GenCommentIndented(loopIndent, L"\t\t\t\tNextCh();\n"); - GenCommentIndented(loopIndent, L"\t\t\t\tif ("); - fwprintf(gen, L"%ls) {\n", DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(loopIndent, STRL("\t\t\t\tNextCh();\n")); + GenCommentIndented(loopIndent, STRL("\t\t\t\tif (")); + fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); } - GenCommentIndented(indent + imax, L"\t\t\t\t\tlevel++; NextCh();\n"); + GenCommentIndented(indent + imax, STRL("\t\t\t\t\tlevel++; NextCh();\n")); for(int sidx = imax; sidx > 0; --sidx) { - GenCommentIndented(indent + sidx, L"\t\t\t\t}\n"); + GenCommentIndented(indent + sidx, STRL("\t\t\t\t}\n")); } } } - GenCommentIndented(imax, L"\t\t\t} else if (ch == buffer->EoF) return false;\n"); - GenCommentIndented(imax, L"\t\t\telse NextCh();\n"); - GenCommentIndented(imax, L"\t\t}\n"); + GenCommentIndented(imax, STRL("\t\t\t} else if (ch == buffer->EoF) return false;\n")); + GenCommentIndented(imax, STRL("\t\t\telse NextCh();\n")); + GenCommentIndented(imax, STRL("\t\t}\n")); } void DFA::GenCommentHeader(const Comment *com, int i) { - fwprintf(gen, L"\tbool Comment%d();\n", i); + fwprintf(gen, STRL("\tbool Comment%d();\n"), i); } void DFA::GenComment(const Comment *com, int i) { - fwprintf(gen, L"\n"); - fwprintf(gen, L"bool Scanner::Comment%d() ", i); - fwprintf(gen, L"{\n"); - fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n"); + fwprintf(gen, STRL("\n")); + fwprintf(gen, STRL("bool Scanner::Comment%d() "), i); + fwprintf(gen, STRL("{\n")); + fwprintf(gen, STRL("\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n")); wchar_t_20 fmt; - fwprintf(gen, L"\tNextCh();\n"); + fwprintf(gen, STRL("\tNextCh();\n")); int imax = coco_string_length(com->start)-1; if (imax == 0) { GenComBody(com); } else { for(int sidx = 1; sidx <= imax; ++sidx) { - GenCommentIndented(sidx, L"\tif ("); - fwprintf(gen, L"%ls) {\n", DFAChCond(com->start[sidx], fmt)); - GenCommentIndented(sidx, L"\t\tNextCh();\n"); + GenCommentIndented(sidx, STRL("\tif (")); + fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(sidx, STRL("\t\tNextCh();\n")); } GenComBody(com); for(int sidx = imax; sidx > 0; --sidx) { - GenCommentIndented(sidx, L"\t}\n"); + GenCommentIndented(sidx, STRL("\t}\n")); } - fwprintf(gen, L"\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n"); - fwprintf(gen, L"\treturn false;\n"); + fwprintf(gen, STRL("\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n")); + fwprintf(gen, STRL("\treturn false;\n")); } - fwprintf(gen, L"}\n"); + fwprintf(gen, STRL("}\n")); } const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in Tab.literals @@ -628,13 +628,13 @@ void DFA::GenLiterals () { } // sym.name stores literals with quotes, e.g. "\"Literal\"" - fwprintf(gen, L"\tkeywords.set(L"); + fwprintf(gen, STRL("\tkeywords.set(STRL(")); // write keyword, escape non printable characters - for (int k = 0; name[k] != L'\0'; k++) { + for (int k = 0; name[k] != CHL('\0'); k++) { wchar_t c = name[k]; - fwprintf(gen, (c >= 32 && c <= 127) ? L"%lc" : L"\\x%04x", c); + fwprintf(gen, (c >= 32 && c <= 127) ? STRL("%lc") : STRL("\\x%04x"), c); } - fwprintf(gen, L", %d);\n", sym->n); + fwprintf(gen, STRL("), %d);\n"), sym->n); coco_string_delete(name); } @@ -653,7 +653,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"namespace %ls {\n", curNs); + fwprintf(gen, STRL("namespace %ls {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -666,7 +666,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { void DFA::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, L"} // namespace\n"); + fwprintf(gen, STRL("} // namespace\n")); } } @@ -699,59 +699,59 @@ void DFA::CopySourcePart (const Position *pos, int indent) { void DFA::WriteState(const State *state) { Symbol *endOf = state->endOf; - fwprintf(gen, L"\t\tcase %d:\n", state->nr); + fwprintf(gen, STRL("\t\tcase %d:\n"), state->nr); if (existLabel[state->nr]) - fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr); + fwprintf(gen, STRL("\t\t\tcase_%d:\n"), state->nr); if (endOf != NULL && state->firstAction != NULL) { - fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d /* %ls */;\n", endOf->n, endOf->name); + fwprintf(gen, STRL("\t\t\trecEnd = pos; recKind = %d /* %ls */;\n"), endOf->n, endOf->name); } bool ctxEnd = state->ctx; wchar_t_20 fmt; for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (action == state->firstAction) fwprintf(gen, L"\t\t\tif ("); - else fwprintf(gen, L"\t\t\telse if ("); + if (action == state->firstAction) fwprintf(gen, STRL("\t\t\tif (")); + else fwprintf(gen, STRL("\t\t\telse if (")); if (action->typ == Node::chr) { wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); - fwprintf(gen, L"%ls", res); + fwprintf(gen, STRL("%ls"), res); } else PutRange(tab->CharClassSet(action->sym)); - fwprintf(gen, L") {"); + fwprintf(gen, STRL(") {")); if (action->tc == Node::contextTrans) { - fwprintf(gen, L"apx++; "); ctxEnd = false; + fwprintf(gen, STRL("apx++; ")); ctxEnd = false; } else if (state->ctx) - fwprintf(gen, L"apx = 0; "); - fwprintf(gen, L"AddCh(); goto case_%d;", action->target->state->nr); - fwprintf(gen, L"}\n"); + fwprintf(gen, STRL("apx = 0; ")); + fwprintf(gen, STRL("AddCh(); goto case_%d;"), action->target->state->nr); + fwprintf(gen, STRL("}\n")); } if (state->firstAction == NULL) - fwprintf(gen, L"\t\t\t{"); + fwprintf(gen, STRL("\t\t\t{")); else - fwprintf(gen, L"\t\t\telse {"); + fwprintf(gen, STRL("\t\t\telse {")); if (ctxEnd) { // final context state: cut appendix - fwprintf(gen, L"\n"); - fwprintf(gen, L"\t\t\t\ttlen -= apx;\n"); - fwprintf(gen, L"\t\t\t\tSetScannerBehindT();"); + fwprintf(gen, STRL("\n")); + fwprintf(gen, STRL("\t\t\t\ttlen -= apx;\n")); + fwprintf(gen, STRL("\t\t\t\tSetScannerBehindT();")); - fwprintf(gen, L"\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n"); - fwprintf(gen, L"\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n"); - fwprintf(gen, L"\t\t\t\t"); + fwprintf(gen, STRL("\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n")); + fwprintf(gen, STRL("\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n")); + fwprintf(gen, STRL("\t\t\t\t")); } if (endOf == NULL) { - fwprintf(gen, L"goto case_0;}\n"); + fwprintf(gen, STRL("goto case_0;}\n")); } else { - fwprintf(gen, L"t->kind = %d /* %ls */; ", endOf->n, endOf->name); + fwprintf(gen, STRL("t->kind = %d /* %ls */; "), endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); + fwprintf(gen, STRL("t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n")); } else { - fwprintf(gen, L"t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); + fwprintf(gen, STRL("t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n")); } } else { - fwprintf(gen, L"loopState = false;"); + fwprintf(gen, STRL("loopState = false;")); if(endOf->semPos && endOf->typ == Node::t) CopySourcePart(endOf->semPos, 0); - fwprintf(gen, L" break;}\n"); + fwprintf(gen, STRL(" break;}\n")); } } } @@ -761,45 +761,45 @@ void DFA::WriteStartTab() { for (Action *action = firstState->firstAction; action != NULL; action = action->next) { int targetState = action->target->state->nr; if (action->typ == Node::chr) { - fwprintf(gen, L"\tstart.set(%d, %d);\n", action->sym, targetState); + fwprintf(gen, STRL("\tstart.set(%d, %d);\n"), action->sym, targetState); } else { CharSet *s = tab->CharClassSet(action->sym); for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (firstRange) { firstRange = false; - fwprintf(gen, L"\tint i;\n"); + fwprintf(gen, STRL("\tint i;\n")); } - fwprintf(gen, L"\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n", r->from, r->to, targetState); + fwprintf(gen, STRL("\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n"), r->from, r->to, targetState); } } } - fwprintf(gen, L"\t\tstart.set(Buffer::EoF, -1);\n"); + fwprintf(gen, STRL("\t\tstart.set(Buffer::EoF, -1);\n")); } void DFA::WriteScanner() { Generator g(tab, errors); - fram = g.OpenFrame(L"Scanner.frame"); - gen = g.OpenGen(L"Scanner.h"); + fram = g.OpenFrame(STRL("Scanner.frame")); + gen = g.OpenGen(STRL("Scanner.h")); if (dirtyDFA) MakeDeterministic(); // Header g.GenCopyright(); - g.SkipFramePart(L"-->begin"); + g.SkipFramePart(STRL("-->begin")); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(STRL("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(STRL("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->namespace_open"); + g.CopyFramePart(STRL("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->casing0"); + g.CopyFramePart(STRL("-->casing0")); if (ignoreCase) { - fwprintf(gen, L"\twchar_t valCh; // current input character (for token.val)\n"); + fwprintf(gen, STRL("\twchar_t valCh; // current input character (for token.val)\n")); } - g.CopyFramePart(L"-->commentsheader"); + g.CopyFramePart(STRL("-->commentsheader")); Comment *com = firstComment; int cmdIdx = 0; while (com != NULL) { @@ -807,64 +807,64 @@ void DFA::WriteScanner() { com = com->next; cmdIdx++; } - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(STRL("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(L"-->implementation"); + g.CopyFramePart(STRL("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(L"Scanner.cpp"); + gen = g.OpenGen(STRL("Scanner.cpp")); g.GenCopyright(); - g.SkipFramePart(L"-->begin"); - g.CopyFramePart(L"-->namespace_open"); + g.SkipFramePart(STRL("-->begin")); + g.CopyFramePart(STRL("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->declarations"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals.Count - 1); - fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n); + g.CopyFramePart(STRL("-->declarations")); + fwprintf(gen, STRL("\tmaxT = %d;\n"), tab->terminals.Count - 1); + fwprintf(gen, STRL("\tnoSym = %d;\n"), tab->noSym->n); WriteStartTab(); GenLiterals(); - g.CopyFramePart(L"-->initialization"); - g.CopyFramePart(L"-->casing1"); + g.CopyFramePart(STRL("-->initialization")); + g.CopyFramePart(STRL("-->casing1")); if (ignoreCase) { - fwprintf(gen, L"\t\tvalCh = ch;\n"); - fwprintf(gen, L"\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); + fwprintf(gen, STRL("\t\tvalCh = ch;\n")); + fwprintf(gen, STRL("\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()")); } - g.CopyFramePart(L"-->casing2"); - fwprintf(gen, L"\t\ttval[tlen++] = "); - if (ignoreCase) fwprintf(gen, L"valCh;"); else fwprintf(gen, L"ch;"); + g.CopyFramePart(STRL("-->casing2")); + fwprintf(gen, STRL("\t\ttval[tlen++] = ")); + if (ignoreCase) fwprintf(gen, STRL("valCh;")); else fwprintf(gen, STRL("ch;")); - g.CopyFramePart(L"-->comments"); + g.CopyFramePart(STRL("-->comments")); com = firstComment; cmdIdx = 0; while (com != NULL) { GenComment(com, cmdIdx); com = com->next; cmdIdx++; } - g.CopyFramePart(L"-->scan1"); - fwprintf(gen, L"\t\t\t"); - if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, L"false"); } + g.CopyFramePart(STRL("-->scan1")); + fwprintf(gen, STRL("\t\t\t")); + if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, STRL("false")); } - g.CopyFramePart(L"-->scan2"); + g.CopyFramePart(STRL("-->scan2")); if (firstComment != NULL) { - fwprintf(gen, L"\t\tif ("); + fwprintf(gen, STRL("\t\tif (")); com = firstComment; cmdIdx = 0; wchar_t_20 fmt; while (com != NULL) { wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx); + fwprintf(gen, STRL("(%ls && Comment%d())"), res, cmdIdx); if (com->next != NULL) { - fwprintf(gen, L" || "); + fwprintf(gen, STRL(" || ")); } com = com->next; cmdIdx++; } - fwprintf(gen, L") continue;"); + fwprintf(gen, STRL(") continue;")); } - g.CopyFramePart(L"-->scan22"); - if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */ - g.CopyFramePart(L"-->scan3"); + g.CopyFramePart(STRL("-->scan22")); + if (hasCtxMoves) { fwprintf(gen, STRL("\n")); fwprintf(gen, STRL("\tint apx = 0;")); } /* pdt */ + g.CopyFramePart(STRL("-->scan3")); /* CSB 02-10-05 check the Labels */ existLabel = new bool[lastStateNr+1]; @@ -873,7 +873,7 @@ void DFA::WriteScanner() { WriteState(state); delete [] existLabel; - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(STRL("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); diff --git a/src/Generator.cpp b/src/Generator.cpp index bb6ca63..ee75458 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -46,7 +46,7 @@ namespace Coco { FILE* Generator::OpenFrame(const wchar_t* frame) { if (coco_string_length(tab->frameDir) != 0) { - frameFile = coco_string_create_append(tab->frameDir, L"/"); + frameFile = coco_string_create_append(tab->frameDir, STRL("/")); coco_string_merge(frameFile, frame); char *chFrameFile = coco_string_create_char(frameFile); fram = fopen(chFrameFile, "r"); @@ -60,7 +60,7 @@ namespace Coco { delete [] chFrameFile; } if (fram == NULL) { - wchar_t *message = coco_string_create_append(L"-- Cannot find : ", frame); + wchar_t *message = coco_string_create_append(STRL("-- Cannot find : "), frame); errors->Exception(message); delete [] message; } @@ -75,14 +75,14 @@ namespace Coco { if ((gen = fopen(chFn, "r")) != NULL) { fclose(gen); - wchar_t *oldName = coco_string_create_append(fn, L".old"); + wchar_t *oldName = coco_string_create_append(fn, STRL(".old")); char *chOldName = coco_string_create_char(oldName); remove(chOldName); rename(chFn, chOldName); // copy with overwrite coco_string_delete(chOldName); coco_string_delete(oldName); } if ((gen = fopen(chFn, "w")) == NULL) { - wchar_t *message = coco_string_create_append(L"-- Cannot generate : ", genName); + wchar_t *message = coco_string_create_append(STRL("-- Cannot generate : "), genName); errors->Exception(message); delete [] message; } @@ -97,14 +97,14 @@ namespace Coco { FILE *file = NULL; if (coco_string_length(tab->frameDir) != 0) { - wchar_t *copyFr = coco_string_create_append(tab->frameDir, L"/Copyright.frame"); + wchar_t *copyFr = coco_string_create_append(tab->frameDir, STRL("/Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; delete [] chCopyFr; } if (file == NULL) { - wchar_t *copyFr = coco_string_create_append(tab->srcDir, L"Copyright.frame"); + wchar_t *copyFr = coco_string_create_append(tab->srcDir, STRL("Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; @@ -134,7 +134,7 @@ namespace Coco { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"%ls_", curNs); + fwprintf(gen, STRL("%ls_"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; } while (startPos < len); @@ -158,27 +158,27 @@ namespace Coco { endOfStopString = coco_string_length(stop)-1; } - fwscanf(fram, L"%lc", &ch); // fram.ReadByte(); + fwscanf(fram, STRL("%lc"), &ch); // fram.ReadByte(); while (!feof(fram)) { // ch != EOF if (stop != NULL && ch == startCh) { int i = 0; do { if (i == endOfStopString) return; // stop[0..i] found - fwscanf(fram, L"%lc", &ch); i++; + fwscanf(fram, STRL("%lc"), &ch); i++; } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { wchar_t *subStop = coco_string_create(stop, 0, i); - fwprintf(gen, L"%ls", subStop); + fwprintf(gen, STRL("%ls"), subStop); coco_string_delete(subStop); } } else { - if (generateOutput) { fwprintf(gen, L"%lc", ch); } - fwscanf(fram, L"%lc", &ch); + if (generateOutput) { fwprintf(gen, STRL("%lc"), ch); } + fwscanf(fram, STRL("%lc"), &ch); } } if (stop != NULL) { - wchar_t *message = coco_string_create_append(L" -- Incomplete or corrupt frame file: ", frameFile); + wchar_t *message = coco_string_create_append(STRL(" -- Incomplete or corrupt frame file: "), frameFile); errors->Exception(message); delete [] message; } diff --git a/src/Parser.cpp b/src/Parser.cpp index 2e5ebe9..bdfe1fd 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -168,7 +168,7 @@ void Parser::Coco() { AstAddTerminal(); #endif sym = tab->FindSym(t->val); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(STRL("name declared twice")); else { sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -253,8 +253,8 @@ void Parser::Coco() { if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(L"name declared twice"); - } else SemErr(L"this symbol kind not allowed on left side of production"); + if (sym->graph != NULL) SemErr(STRL("name declared twice")); + } else SemErr(STRL("this symbol kind not allowed on left side of production")); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); @@ -265,7 +265,7 @@ void Parser::Coco() { } if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(STRL("attribute mismatch between declaration and use of this symbol")); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); @@ -287,22 +287,22 @@ void Parser::Coco() { AstAddTerminal(); #endif if (!coco_string_equal(gramName, t->val)) - SemErr(L"name does not match grammar name"); + SemErr(STRL("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(L"missing production for grammar name"); + SemErr(STRL("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(L"grammar symbol must not have attributes"); + SemErr(STRL("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, L"???", 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, STRL("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors.count == 0) { - wprintf(L"checking\n"); + wprintf(STRL("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); bool doGenCode = false; @@ -312,14 +312,14 @@ void Parser::Coco() { } else doGenCode = tab->GrammarOk(); if (doGenCode) { - wprintf(L"parser"); + wprintf(STRL("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(L" + scanner"); + wprintf(STRL(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(L" generated\n"); + wprintf(STRL(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } @@ -345,14 +345,14 @@ void Parser::SetDecl() { #endif wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(L"name declared twice"); + if (c != NULL) SemErr(STRL("name declared twice")); Expect(18 /* "=" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif Set(s); - if (s->Elements() == 0) SemErr(L"character set must not be empty"); + if (s->Elements() == 0) SemErr(STRL("character set must not be empty")); tab->NewCharClass(name, s); coco_string_delete(name); @@ -372,7 +372,7 @@ void Parser::TokenDecl(int typ) { #endif Sym(name, kind); sym = tab->FindSym(name); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(STRL("name declared twice")); else { sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -391,13 +391,13 @@ void Parser::TokenDecl(int typ) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - if (kind == str) SemErr(L"a literal must not be declared with a structure"); + if (kind == str) SemErr(STRL("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if (tab->literals[tokenString] != NULL) - SemErr(L"token string declared twice"); + SemErr(STRL("token string declared twice")); tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } @@ -410,7 +410,7 @@ void Parser::TokenDecl(int typ) { } else SynErr(45); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); - if (typ == Node::t) errors.Warning(L"Warning semantic action on token declarations require a custom Scanner"); + if (typ == Node::t) errors.Warning(STRL("Warning semantic action on token declarations require a custom Scanner")); } #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -481,7 +481,7 @@ void Parser::AttrDecl(Symbol *sym) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"bad string in attributes"); + SemErr(STRL("bad string in attributes")); } } Expect(26 /* ">" */); @@ -504,7 +504,7 @@ void Parser::AttrDecl(Symbol *sym) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"bad string in attributes"); + SemErr(STRL("bad string in attributes")); } } Expect(28 /* ".>" */); @@ -536,13 +536,13 @@ void Parser::SemText(Position* &pos) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"bad string in semantic action"); + SemErr(STRL("bad string in semantic action")); } else { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"missing end of previous semantic action"); + SemErr(STRL("missing end of previous semantic action")); } } Expect(41 /* ".)" */); @@ -585,7 +585,7 @@ void Parser::SimSet(CharSet* &s) { AstAddTerminal(); #endif CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); + if (c == NULL) SemErr(STRL("undefined name")); else s->Or(c->set); } else if (la->kind == _string) { Get(); @@ -600,7 +600,7 @@ void Parser::SimSet(CharSet* &s) { for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() + if ((CHL('A') <= ch) && (ch <= CHL('Z'))) ch = ch - (CHL('A') - CHL('a')); // ch.ToLower() } s->Set(ch); } @@ -644,7 +644,7 @@ void Parser::Char(int &n) { // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(L"unacceptable character value"); + else SemErr(STRL("unacceptable character value")); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; @@ -657,7 +657,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, "Sym", la->line); #endif - name = coco_string_create(L"???"); kind = id; + name = coco_string_create(STRL("???")); kind = id; if (la->kind == _ident) { Get(); #ifdef PARSER_WITH_AST @@ -678,9 +678,9 @@ void Parser::Sym(wchar_t* &name, int &kind) { #endif wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); - name = coco_string_create_append(L"\"", subName); + name = coco_string_create_append(STRL("\""), subName); coco_string_delete(subName); - coco_string_merge(name, L"\""); + coco_string_merge(name, STRL("\"")); } kind = str; @@ -690,7 +690,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) - SemErr(L"literal tokens must not contain blanks"); + SemErr(STRL("literal tokens must not contain blanks")); } else SynErr(48); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -773,29 +773,29 @@ void Parser::Factor(Graph* &g) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(L"undefined string in production"); + SemErr(STRL("undefined string in production")); sym = tab->eofSy; // dummy } } coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) - SemErr(L"this symbol kind is not allowed in a production"); + SemErr(STRL("this symbol kind is not allowed in a production")); if (weak) { if (typ == Node::t) typ = Node::wt; - else SemErr(L"only terminals may be weak"); + else SemErr(STRL("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { Attribs(p); - if (kind != id) SemErr(L"a literal must not have attributes"); + if (kind != id) SemErr(STRL("a literal must not have attributes")); } if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(STRL("attribute mismatch between declaration and use of this symbol")); break; } @@ -893,7 +893,7 @@ void Parser::Attribs(Node *p) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"bad string in attributes"); + SemErr(STRL("bad string in attributes")); } } Expect(26 /* ">" */); @@ -915,7 +915,7 @@ void Parser::Attribs(Node *p) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(L"bad string in attributes"); + SemErr(STRL("bad string in attributes")); } } Expect(28 /* ".>" */); @@ -996,7 +996,7 @@ void Parser::TokenFactor(Graph* &g) { if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(L"undefined name"); + SemErr(STRL("undefined name")); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; @@ -1146,7 +1146,7 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(L"Dummy Token"); + la->val = coco_string_create(STRL("Dummy Token")); Get(); Coco(); Expect(0); @@ -1218,97 +1218,97 @@ void Errors::SynErr(int line, int col, int n) { const size_t format_size = 20; wchar_t format[format_size]; switch (n) { - case 0: s = L"EOF expected"; break; - case 1: s = L"ident expected"; break; - case 2: s = L"number expected"; break; - case 3: s = L"string expected"; break; - case 4: s = L"badString expected"; break; - case 5: s = L"char expected"; break; - case 6: s = L"\"COMPILER\" expected"; break; - case 7: s = L"\"IGNORECASE\" expected"; break; - case 8: s = L"\"TERMINALS\" expected"; break; - case 9: s = L"\"CHARACTERS\" expected"; break; - case 10: s = L"\"TOKENS\" expected"; break; - case 11: s = L"\"PRAGMAS\" expected"; break; - case 12: s = L"\"COMMENTS\" expected"; break; - case 13: s = L"\"FROM\" expected"; break; - case 14: s = L"\"TO\" expected"; break; - case 15: s = L"\"NESTED\" expected"; break; - case 16: s = L"\"IGNORE\" expected"; break; - case 17: s = L"\"PRODUCTIONS\" expected"; break; - case 18: s = L"\"=\" expected"; break; - case 19: s = L"\".\" expected"; break; - case 20: s = L"\"END\" expected"; break; - case 21: s = L"\"+\" expected"; break; - case 22: s = L"\"-\" expected"; break; - case 23: s = L"\"..\" expected"; break; - case 24: s = L"\"ANY\" expected"; break; - case 25: s = L"\"<\" expected"; break; - case 26: s = L"\">\" expected"; break; - case 27: s = L"\"<.\" expected"; break; - case 28: s = L"\".>\" expected"; break; - case 29: s = L"\"|\" expected"; break; - case 30: s = L"\"WEAK\" expected"; break; - case 31: s = L"\"(\" expected"; break; - case 32: s = L"\")\" expected"; break; - case 33: s = L"\"[\" expected"; break; - case 34: s = L"\"]\" expected"; break; - case 35: s = L"\"{\" expected"; break; - case 36: s = L"\"}\" expected"; break; - case 37: s = L"\"SYNC\" expected"; break; - case 38: s = L"\"IF\" expected"; break; - case 39: s = L"\"CONTEXT\" expected"; break; - case 40: s = L"\"(.\" expected"; break; - case 41: s = L"\".)\" expected"; break; - case 42: s = L"??? expected"; break; - case 43: s = L"this symbol not expected in Coco"; break; - case 44: s = L"this symbol not expected in TokenDecl"; break; - case 45: s = L"invalid TokenDecl"; break; - case 46: s = L"invalid AttrDecl"; break; - case 47: s = L"invalid SimSet"; break; - case 48: s = L"invalid Sym"; break; - case 49: s = L"invalid Term"; break; - case 50: s = L"invalid Factor"; break; - case 51: s = L"invalid Attribs"; break; - case 52: s = L"invalid TokenFactor"; break; + case 0: s = STRL("EOF expected"); break; + case 1: s = STRL("ident expected"); break; + case 2: s = STRL("number expected"); break; + case 3: s = STRL("string expected"); break; + case 4: s = STRL("badString expected"); break; + case 5: s = STRL("char expected"); break; + case 6: s = STRL("\"COMPILER\" expected"); break; + case 7: s = STRL("\"IGNORECASE\" expected"); break; + case 8: s = STRL("\"TERMINALS\" expected"); break; + case 9: s = STRL("\"CHARACTERS\" expected"); break; + case 10: s = STRL("\"TOKENS\" expected"); break; + case 11: s = STRL("\"PRAGMAS\" expected"); break; + case 12: s = STRL("\"COMMENTS\" expected"); break; + case 13: s = STRL("\"FROM\" expected"); break; + case 14: s = STRL("\"TO\" expected"); break; + case 15: s = STRL("\"NESTED\" expected"); break; + case 16: s = STRL("\"IGNORE\" expected"); break; + case 17: s = STRL("\"PRODUCTIONS\" expected"); break; + case 18: s = STRL("\"=\" expected"); break; + case 19: s = STRL("\".\" expected"); break; + case 20: s = STRL("\"END\" expected"); break; + case 21: s = STRL("\"+\" expected"); break; + case 22: s = STRL("\"-\" expected"); break; + case 23: s = STRL("\"..\" expected"); break; + case 24: s = STRL("\"ANY\" expected"); break; + case 25: s = STRL("\"<\" expected"); break; + case 26: s = STRL("\">\" expected"); break; + case 27: s = STRL("\"<.\" expected"); break; + case 28: s = STRL("\".>\" expected"); break; + case 29: s = STRL("\"|\" expected"); break; + case 30: s = STRL("\"WEAK\" expected"); break; + case 31: s = STRL("\"(\" expected"); break; + case 32: s = STRL("\")\" expected"); break; + case 33: s = STRL("\"[\" expected"); break; + case 34: s = STRL("\"]\" expected"); break; + case 35: s = STRL("\"{\" expected"); break; + case 36: s = STRL("\"}\" expected"); break; + case 37: s = STRL("\"SYNC\" expected"); break; + case 38: s = STRL("\"IF\" expected"); break; + case 39: s = STRL("\"CONTEXT\" expected"); break; + case 40: s = STRL("\"(.\" expected"); break; + case 41: s = STRL("\".)\" expected"); break; + case 42: s = STRL("??? expected"); break; + case 43: s = STRL("this symbol not expected in Coco"); break; + case 44: s = STRL("this symbol not expected in TokenDecl"); break; + case 45: s = STRL("invalid TokenDecl"); break; + case 46: s = STRL("invalid AttrDecl"); break; + case 47: s = STRL("invalid SimSet"); break; + case 48: s = STRL("invalid Sym"); break; + case 49: s = STRL("invalid Term"); break; + case 50: s = STRL("invalid Factor"); break; + case 51: s = STRL("invalid Attribs"); break; + case 52: s = STRL("invalid TokenFactor"); break; default: { - coco_swprintf(format, format_size, L"error %d", n); + coco_swprintf(format, format_size, STRL("error %d"), n); s = format; } break; } - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(L"%ls\n", s); + wprintf(STRL("%ls\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(STRL("%ls"), s); exit(1); } #ifdef PARSER_WITH_AST static void printIndent(int n) { - for(int i=0; i < n; ++i) wprintf(L" "); + for(int i=0; i < n; ++i) wprintf(STRL(" ")); } SynTree::~SynTree() { - //wprintf(L"Token %ls : %d : %d : %d : %d\n", tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(STRL("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -1332,18 +1332,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/Parser.frame b/src/Parser.frame index f3b9193..038452d 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -300,7 +300,7 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(L"Dummy Token"); + la->val = coco_string_create(STRL("Dummy Token")); Get(); -->parseRoot } @@ -349,41 +349,41 @@ void Errors::SynErr(int line, int col, int n) { -->errors default: { - coco_swprintf(format, format_size, L"error %d", n); + coco_swprintf(format, format_size, STRL("error %d"), n); s = format; } break; } - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(L"%ls\n", s); + wprintf(STRL("%ls\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(STRL("%ls"), s); exit(1); } #ifdef PARSER_WITH_AST static void printIndent(int n) { - for(int i=0; i < n; ++i) wprintf(L" "); + for(int i=0; i < n; ++i) wprintf(STRL(" ")); } SynTree::~SynTree() { - //wprintf(L"Token %ls : %d : %d : %d : %d\n", tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(STRL("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -407,18 +407,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(L"%s\t%d\t%d\t%d\t%ls\n", ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(L"%d\t%d\t%d\t%ls\n", children.Count, tok->line, tok->kind, tok->val); + wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/Parser.h b/src/Parser.h index dc06aab..90ff828 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -152,7 +152,7 @@ int id; id = 0; str = 1; tokenString = NULL; - noString = coco_string_create(L"-none-"); + noString = coco_string_create(STRL("-none-")); ignoreGammarErrors = false; } diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index a63b79f..7ace38b 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -38,7 +38,7 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { void ParserGen::Indent (int n) { - for (int i = 1; i <= n; i++) fwprintf(gen, L"\t"); + for (int i = 1; i <= n; i++) fwprintf(gen, STRL("\t")); } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning @@ -72,7 +72,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"namespace %ls {\n", curNs); + fwprintf(gen, STRL("namespace %ls {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -85,7 +85,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { void ParserGen::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, L"} // namespace\n"); + fwprintf(gen, STRL("} // namespace\n")); } } @@ -95,12 +95,12 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { if (pos != NULL) { buffer->SetPos(pos->beg); ch = buffer->Read(); if (tab->emitLines && pos->line) { - fwprintf(gen, L"\n#line %d \"%ls\"\n", pos->line, tab->srcName); + fwprintf(gen, STRL("\n#line %d \"%ls\"\n"), pos->line, tab->srcName); } Indent(indent); while (buffer->GetPos() <= pos->end) { while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF - fwprintf(gen, L"\n"); Indent(indent); + fwprintf(gen, STRL("\n")); Indent(indent); if (ch == CR) { ch = buffer->Read(); } // skip CR if (ch == LF) { ch = buffer->Read(); } // skip LF for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) { @@ -109,11 +109,11 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { } if (buffer->GetPos() > pos->end) goto done; } - fwprintf(gen, L"%lc", ch); + fwprintf(gen, STRL("%lc"), ch); ch = buffer->Read(); } done: - if (indent > 0) fwprintf(gen, L"\n"); + if (indent > 0) fwprintf(gen, STRL("\n")); } } @@ -121,26 +121,26 @@ void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { errorNr++; const int formatLen = 1000; wchar_t format[formatLen]; - coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = L\"", errorNr); + coco_swprintf(format, formatLen, STRL("\t\t\tcase %d: s = STRL(\""), errorNr); coco_string_merge(err, format); if (errTyp == tErr) { - if (sym->name[0] == L'"') { + if (sym->name[0] == CHL('"')) { wchar_t *se = tab->Escape(sym->name); - coco_swprintf(format, formatLen, L"%ls expected", se); + coco_swprintf(format, formatLen, STRL("%ls expected"), se); coco_string_merge(err, format); coco_string_delete(se); } else { - coco_swprintf(format, formatLen, L"%ls expected", sym->name); + coco_swprintf(format, formatLen, STRL("%ls expected"), sym->name); coco_string_merge(err, format); } } else if (errTyp == altErr) { - coco_swprintf(format, formatLen, L"invalid %ls", sym->name); + coco_swprintf(format, formatLen, STRL("invalid %ls"), sym->name); coco_string_merge(err, format); } else if (errTyp == syncErr) { - coco_swprintf(format, formatLen, L"this symbol not expected in %ls", sym->name); + coco_swprintf(format, formatLen, STRL("this symbol not expected in %ls"), sym->name); coco_string_merge(err, format); } - coco_swprintf(format, formatLen, L"\"; break;\n"); + coco_swprintf(format, formatLen, STRL("\"); break;\n")); coco_string_merge(err, format); } @@ -155,20 +155,20 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); - if (n == 0) fwprintf(gen, L"false"); // happens if an ANY set matches no symbol + if (n == 0) fwprintf(gen, STRL("false")); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; for (int i=0; iterminals.Count; i++) { sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, L"la->kind == "); + fwprintf(gen, STRL("la->kind == ")); WriteSymbolOrCode(gen, sym); --n; - if (n > 0) fwprintf(gen, L" || "); + if (n > 0) fwprintf(gen, STRL(" || ")); } } } else - fwprintf(gen, L"StartOf(%d /* %s */)", NewCondSet(s), (tab->nTyp[p->typ])); + fwprintf(gen, STRL("StartOf(%d /* %s */)"), NewCondSet(s), (tab->nTyp[p->typ])); } } @@ -177,9 +177,9 @@ void ParserGen::PutCaseLabels (const BitArray *s) { for (int i=0; iterminals.Count; i++) { sym = tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, L"case "); + fwprintf(gen, STRL("case ")); WriteSymbolOrCode(gen, sym); - fwprintf(gen, L": "); + fwprintf(gen, STRL(": ")); } } } @@ -190,43 +190,43 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { while (p != NULL) { if (p->typ == Node::nt) { Indent(indent); - fwprintf(gen, L"%ls(", p->sym->name); + fwprintf(gen, STRL("%ls("), p->sym->name); CopySourcePart(p->pos, 0); - fwprintf(gen, L");\n"); + fwprintf(gen, STRL(");\n")); } else if (p->typ == Node::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) { - fwprintf(gen, L"Get();\n"); + fwprintf(gen, STRL("Get();\n")); //copy and pasted bellow - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"); + fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n")); } else { - fwprintf(gen, L"Expect("); + fwprintf(gen, STRL("Expect(")); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, L");\n"); + fwprintf(gen, STRL(");\n")); //copy and pasted from above - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"); + fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n")); } } if (p->typ == Node::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); - fwprintf(gen, L"ExpectWeak("); + fwprintf(gen, STRL("ExpectWeak(")); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, L", %d);\n", NewCondSet(s1)); + fwprintf(gen, STRL(", %d);\n"), NewCondSet(s1)); delete s1; } if (p->typ == Node::any) { Indent(indent); int acc = Sets::Elements(p->set); if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here - fwprintf(gen, L"Get();\n"); + fwprintf(gen, STRL("Get();\n")); } else { GenErrorMsg(altErr, curSy); if (acc > 0) { - fwprintf(gen, L"if ("); GenCond(p->set, p); fwprintf(gen, L") Get(); else SynErr(%d);\n", errorNr); - } else fwprintf(gen, L"SynErr(%d); // ANY node that matches no symbol\n", errorNr); + fwprintf(gen, STRL("if (")); GenCond(p->set, p); fwprintf(gen, STRL(") Get(); else SynErr(%d);\n"), errorNr); + } else fwprintf(gen, STRL("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); } } if (p->typ == Node::eps) { // nothing } if (p->typ == Node::rslv) { // nothing @@ -236,57 +236,57 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); - fwprintf(gen, L"while (!("); GenCond(s1, p); fwprintf(gen, L")) {"); - fwprintf(gen, L"SynErr(%d); Get();", errorNr); fwprintf(gen, L"}\n"); + fwprintf(gen, STRL("while (!(")); GenCond(s1, p); fwprintf(gen, STRL(")) {")); + fwprintf(gen, STRL("SynErr(%d); Get();"), errorNr); fwprintf(gen, STRL("}\n")); delete s1; } if (p->typ == Node::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); delete s1; bool useSwitch = UseSwitch(p); - if (useSwitch) { Indent(indent); fwprintf(gen, L"switch (la->kind) {\n"); } + if (useSwitch) { Indent(indent); fwprintf(gen, STRL("switch (la->kind) {\n")); } p2 = p; while (p2 != NULL) { s1 = tab->Expected(p2->sub, curSy); Indent(indent); if (useSwitch) { - PutCaseLabels(s1); fwprintf(gen, L"{\n"); + PutCaseLabels(s1); fwprintf(gen, STRL("{\n")); } else if (p2 == p) { - fwprintf(gen, L"if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); - } else if (p2->down == NULL && equal) { fwprintf(gen, L"} else {\n"); + fwprintf(gen, STRL("if (")); GenCond(s1, p2->sub); fwprintf(gen, STRL(") {\n")); + } else if (p2->down == NULL && equal) { fwprintf(gen, STRL("} else {\n")); } else { - fwprintf(gen, L"} else if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); + fwprintf(gen, STRL("} else if (")); GenCond(s1, p2->sub); fwprintf(gen, STRL(") {\n")); } GenCode(p2->sub, indent + 1, s1); if (useSwitch) { - Indent(indent); fwprintf(gen, L"\tbreak;\n"); - Indent(indent); fwprintf(gen, L"}\n"); + Indent(indent); fwprintf(gen, STRL("\tbreak;\n")); + Indent(indent); fwprintf(gen, STRL("}\n")); } p2 = p2->down; delete s1; } Indent(indent); if (equal) { - fwprintf(gen, L"}\n"); + fwprintf(gen, STRL("}\n")); } else { GenErrorMsg(altErr, curSy); if (useSwitch) { - fwprintf(gen, L"default: SynErr(%d); break;\n", errorNr); - Indent(indent); fwprintf(gen, L"}\n"); + fwprintf(gen, STRL("default: SynErr(%d); break;\n"), errorNr); + Indent(indent); fwprintf(gen, STRL("}\n")); } else { - fwprintf(gen, L"} "); fwprintf(gen, L"else SynErr(%d);\n", errorNr); + fwprintf(gen, STRL("} ")); fwprintf(gen, STRL("else SynErr(%d);\n"), errorNr); } } } if (p->typ == Node::iter) { Indent(indent); p2 = p->sub; - fwprintf(gen, L"while ("); + fwprintf(gen, STRL("while (")); if (p2->typ == Node::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); - fwprintf(gen, L"WeakSeparator("); + fwprintf(gen, STRL("WeakSeparator(")); WriteSymbolOrCode(gen, p2->sym); - fwprintf(gen, L",%d,%d) ", NewCondSet(s1), NewCondSet(s2)); + fwprintf(gen, STRL(",%d,%d) "), NewCondSet(s1), NewCondSet(s2)); delete s1; delete s2; s1 = new BitArray(tab->terminals.Count); // for inner structure @@ -295,16 +295,16 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { s1 = tab->First(p2); GenCond(s1, p2); } - fwprintf(gen, L") {\n"); + fwprintf(gen, STRL(") {\n")); GenCode(p2, indent + 1, s1); - Indent(indent); fwprintf(gen, L"}\n"); + Indent(indent); fwprintf(gen, STRL("}\n")); delete s1; } if (p->typ == Node::opt) { s1 = tab->First(p->sub); Indent(indent); - fwprintf(gen, L"if ("); GenCond(s1, p->sub); fwprintf(gen, L") {\n"); + fwprintf(gen, STRL("if (")); GenCond(s1, p->sub); fwprintf(gen, STRL(") {\n")); GenCode(p->sub, indent + 1, s1); - Indent(indent); fwprintf(gen, L"}\n"); + Indent(indent); fwprintf(gen, STRL("}\n")); delete s1; } if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) @@ -320,7 +320,7 @@ void ParserGen::GenTokensHeader() { int i; bool isFirst = true; - fwprintf(gen, L"\tenum {\n"); + fwprintf(gen, STRL("\tenum {\n")); // tokens for (i=0; iterminals.Count; i++) { @@ -328,33 +328,33 @@ void ParserGen::GenTokensHeader() { if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } - else { fwprintf(gen , L",\n"); } + else { fwprintf(gen , STRL(",\n")); } - fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } // pragmas for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } - else { fwprintf(gen , L",\n"); } + else { fwprintf(gen , STRL(",\n")); } sym = tab->pragmas[i]; - fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } - fwprintf(gen, L"\n\t};\n"); + fwprintf(gen, STRL("\n\t};\n")); // nonterminals - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"); + fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n")); isFirst = true; for (i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; if (isFirst) { isFirst = false; } - else { fwprintf(gen , L",\n"); } + else { fwprintf(gen , STRL(",\n")); } - fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } - fwprintf(gen, L"\n\t};\n#endif\n"); + fwprintf(gen, STRL("\n\t};\n#endif\n")); } @@ -362,19 +362,19 @@ void ParserGen::GenCodePragmas() { Symbol *sym; for (int i=0; ipragmas.Count; i++) { sym = tab->pragmas[i]; - fwprintf(gen, L"\t\tif (la->kind == "); + fwprintf(gen, STRL("\t\tif (la->kind == ")); WriteSymbolOrCode(gen, sym); - fwprintf(gen, L") {\n"); + fwprintf(gen, STRL(") {\n")); CopySourcePart(sym->semPos, 4); - fwprintf(gen, L"\t\t}\n"); + fwprintf(gen, STRL("\t\t}\n")); } } void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { - fwprintf(gen, L"%d /* %ls */", sym->n, sym->name); + fwprintf(gen, STRL("%d /* %ls */"), sym->n, sym->name); } else { - fwprintf(gen, L"_%ls", sym->name); + fwprintf(gen, STRL("_%ls"), sym->name); } } @@ -383,9 +383,9 @@ void ParserGen::GenProductionsHeader() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, L"\tvoid %ls(", sym->name); + fwprintf(gen, STRL("\tvoid %ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, L");\n"); + fwprintf(gen, STRL(");\n")); } } @@ -395,43 +395,43 @@ void ParserGen::GenProductions() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, L"void Parser::%ls(", sym->name); + fwprintf(gen, STRL("void Parser::%ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, L") {\n"); + fwprintf(gen, STRL(") {\n")); CopySourcePart(sym->semPos, 2); - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n"); - if(i == 0) fwprintf(gen, L"\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n", sym->name, sym->name); + fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n")); + if(i == 0) fwprintf(gen, STRL("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); else { - fwprintf(gen, L"\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n", sym->name, sym->name); + fwprintf(gen, STRL("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n"), sym->name, sym->name); } - fwprintf(gen, L"#endif\n"); + fwprintf(gen, STRL("#endif\n")); ba.SetAll(false); GenCode(sym->graph, 2, &ba); - fwprintf(gen, L"#ifdef PARSER_WITH_AST\n"); - if(i == 0) fwprintf(gen, L"\t\tAstPopNonTerminal();\n"); - else fwprintf(gen, L"\t\tif(ntAdded) AstPopNonTerminal();\n"); - fwprintf(gen, L"#endif\n"); - fwprintf(gen, L"}\n\n"); + fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n")); + if(i == 0) fwprintf(gen, STRL("\t\tAstPopNonTerminal();\n")); + else fwprintf(gen, STRL("\t\tif(ntAdded) AstPopNonTerminal();\n")); + fwprintf(gen, STRL("#endif\n")); + fwprintf(gen, STRL("}\n\n")); } } void ParserGen::InitSets() { - fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet.Count, tab->terminals.Count+1); + fwprintf(gen, STRL("\tstatic bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); for (int i = 0; i < symSet.Count; i++) { BitArray *s = symSet[i]; - fwprintf(gen, L"\t\t{"); + fwprintf(gen, STRL("\t\t{")); int j = 0; Symbol *sym; for (int k=0; kterminals.Count; k++) { sym = tab->terminals[k]; - if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,"); + if ((*s)[sym->n]) fwprintf(gen, STRL("T,")); else fwprintf(gen, STRL("x,")); ++j; - if (j%4 == 0) fwprintf(gen, L" "); + if (j%4 == 0) fwprintf(gen, STRL(" ")); } - if (i == symSet.Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n"); + if (i == symSet.Count-1) fwprintf(gen, STRL("x}\n")); else fwprintf(gen, STRL("x},\n")); } - fwprintf(gen, L"\t};\n\n"); + fwprintf(gen, STRL("\t};\n\n")); } void ParserGen::WriteParser () { @@ -439,8 +439,8 @@ void ParserGen::WriteParser () { int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart symSet.Add(tab->allSyncSets); - fram = g.OpenFrame(L"Parser.frame"); - gen = g.OpenGen(L"Parser.h"); + fram = g.OpenFrame(STRL("Parser.frame")); + gen = g.OpenGen(STRL("Parser.h")); Symbol *sym; for (int i=0; iterminals.Count; i++) { @@ -449,47 +449,47 @@ void ParserGen::WriteParser () { } g.GenCopyright(); - g.SkipFramePart(L"-->begin"); + g.SkipFramePart(STRL("-->begin")); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(STRL("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(STRL("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->headerdef"); + g.CopyFramePart(STRL("-->headerdef")); - if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, L"\n");} - g.CopyFramePart(L"-->namespace_open"); + if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, STRL("\n"));} + g.CopyFramePart(STRL("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->constantsheader"); + g.CopyFramePart(STRL("-->constantsheader")); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ - fwprintf(gen, L"\tint maxT;\n"); - g.CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0); - g.CopyFramePart(L"-->productionsheader"); GenProductionsHeader(); - g.CopyFramePart(L"-->namespace_close"); + fwprintf(gen, STRL("\tint maxT;\n")); + g.CopyFramePart(STRL("-->declarations")); CopySourcePart(tab->semDeclPos, 0); + g.CopyFramePart(STRL("-->productionsheader")); GenProductionsHeader(); + g.CopyFramePart(STRL("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(L"-->implementation"); + g.CopyFramePart(STRL("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(L"Parser.cpp"); + gen = g.OpenGen(STRL("Parser.cpp")); g.GenCopyright(); - g.SkipFramePart(L"-->begin"); - g.CopyFramePart(L"-->namespace_open"); + g.SkipFramePart(STRL("-->begin")); + g.CopyFramePart(STRL("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->pragmas"); GenCodePragmas(); - g.CopyFramePart(L"-->productions"); GenProductions(); - g.CopyFramePart(L"-->parseRoot"); fwprintf(gen, L"\t%ls();\n", tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, L"\tExpect(0);"); - g.CopyFramePart(L"-->constants"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals.Count-1); - g.CopyFramePart(L"-->initialization"); InitSets(); - g.CopyFramePart(L"-->errors"); fwprintf(gen, L"%ls", err); - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(STRL("-->pragmas")); GenCodePragmas(); + g.CopyFramePart(STRL("-->productions")); GenProductions(); + g.CopyFramePart(STRL("-->parseRoot")); fwprintf(gen, STRL("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, STRL("\tExpect(0);")); + g.CopyFramePart(STRL("-->constants")); + fwprintf(gen, STRL("\tmaxT = %d;\n"), tab->terminals.Count-1); + g.CopyFramePart(STRL("-->initialization")); InitSets(); + g.CopyFramePart(STRL("-->errors")); fwprintf(gen, STRL("%ls"), err); + g.CopyFramePart(STRL("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); fclose(gen); @@ -498,12 +498,12 @@ void ParserGen::WriteParser () { void ParserGen::WriteStatistics () { - fwprintf(trace, L"\n"); - fwprintf(trace, L"%d terminals\n", tab->terminals.Count); - fwprintf(trace, L"%d symbols\n", tab->terminals.Count + tab->pragmas.Count + + fwprintf(trace, STRL("\n")); + fwprintf(trace, STRL("%d terminals\n"), tab->terminals.Count); + fwprintf(trace, STRL("%d symbols\n"), tab->terminals.Count + tab->pragmas.Count + tab->nonterminals.Count); - fwprintf(trace, L"%d nodes\n", tab->nodes.Count); - fwprintf(trace, L"%d sets\n", symSet.Count); + fwprintf(trace, STRL("%d nodes\n"), tab->nodes.Count); + fwprintf(trace, STRL("%d sets\n"), symSet.Count); } diff --git a/src/Scanner.cpp b/src/Scanner.cpp index a1ea891..32eda16 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -75,13 +75,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((L'a' <= data[i]) && (data[i] <= L'z')) { - newData[i] = data[i] + (L'A' - L'a'); + if ((CHL('a') <= data[i]) && (data[i] <= CHL('z'))) { + newData[i] = data[i] + (CHL('A') - CHL('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = L'\0'; + newData[dataLen] = CHL('\0'); return newData; } @@ -98,12 +98,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((L'A' <= ch) && (ch <= L'Z')) { - newData[i] = ch - (L'A' - L'a'); + if ((CHL('A') <= ch) && (ch <= CHL('Z'))) { + newData[i] = ch - (CHL('A') - CHL('a')); } else { newData[i] = ch; } } - newData[dataLen] = L'\0'; + newData[dataLen] = CHL('\0'); return newData; } @@ -367,7 +367,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(L"--- buffer out of bounds access, position: %d\n", value); + wprintf(STRL("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -452,7 +452,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(L"--- Cannot open file %ls\n", fileName); + wprintf(STRL("--- Cannot open file %ls\n"), fileName); exit(1); } coco_string_delete(chFileName); @@ -504,24 +504,24 @@ void Scanner::Init() { start.set(123, 27); start.set(125, 28); start.set(Buffer::EoF, -1); - keywords.set(L"COMPILER", 6); - keywords.set(L"IGNORECASE", 7); - keywords.set(L"TERMINALS", 8); - keywords.set(L"CHARACTERS", 9); - keywords.set(L"TOKENS", 10); - keywords.set(L"PRAGMAS", 11); - keywords.set(L"COMMENTS", 12); - keywords.set(L"FROM", 13); - keywords.set(L"TO", 14); - keywords.set(L"NESTED", 15); - keywords.set(L"IGNORE", 16); - keywords.set(L"PRODUCTIONS", 17); - keywords.set(L"END", 20); - keywords.set(L"ANY", 24); - keywords.set(L"WEAK", 30); - keywords.set(L"SYNC", 37); - keywords.set(L"IF", 38); - keywords.set(L"CONTEXT", 39); + keywords.set(STRL("COMPILER"), 6); + keywords.set(STRL("IGNORECASE"), 7); + keywords.set(STRL("TERMINALS"), 8); + keywords.set(STRL("CHARACTERS"), 9); + keywords.set(STRL("TOKENS"), 10); + keywords.set(STRL("PRAGMAS"), 11); + keywords.set(STRL("COMMENTS"), 12); + keywords.set(STRL("FROM"), 13); + keywords.set(STRL("TO"), 14); + keywords.set(STRL("NESTED"), 15); + keywords.set(STRL("IGNORE"), 16); + keywords.set(STRL("PRODUCTIONS"), 17); + keywords.set(STRL("END"), 20); + keywords.set(STRL("ANY"), 24); + keywords.set(STRL("WEAK"), 30); + keywords.set(STRL("SYNC"), 37); + keywords.set(STRL("IF"), 38); + keywords.set(STRL("CONTEXT"), 39); tvalLength = 128; @@ -534,7 +534,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + wprintf(STRL("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -545,7 +545,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(L"Illegal byte order mark at start of file"); + wprintf(STRL("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -566,7 +566,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == CHL('\r') && buffer->Peek() != CHL('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } @@ -590,7 +590,7 @@ void Scanner::AddCh() { bool Scanner::Comment0() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == L'/') { + if (ch == CHL('/')) { NextCh(); for(;;) { if (ch == 10) { @@ -608,19 +608,19 @@ bool Scanner::Comment0() { bool Scanner::Comment1() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == L'*') { + if (ch == CHL('*')) { NextCh(); for(;;) { - if (ch == L'*') { + if (ch == CHL('*')) { NextCh(); - if (ch == L'/') { + if (ch == CHL('/')) { level--; if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } - } else if (ch == L'/') { + } else if (ch == CHL('/')) { NextCh(); - if (ch == L'*') { + if (ch == CHL('*')) { level++; NextCh(); } } else if (ch == buffer->EoF) return false; @@ -667,7 +667,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too long token value\n"); + wprintf(STRL("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -676,15 +676,15 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = L'\0'; + t->val[tlen] = CHL('\0'); } Token* Scanner::NextToken() { while(true) { - while (ch == ' ' || + while (ch == CHL(' ') || (ch >= 9 && ch <= 10) || ch == 13 ) NextCh(); - if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) continue; + if ((ch == CHL('/') && Comment0()) || (ch == CHL('/') && Comment1())) continue; break; } @@ -710,12 +710,12 @@ Token* Scanner::NextToken() { case 1: case_1: recEnd = pos; recKind = 1 /* ident */; - if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} + if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_1;} else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} case 2: case_2: recEnd = pos; recKind = 2 /* number */; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;} + if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_2;} else {t->kind = 2 /* number */; loopState = false; break;} case 3: case_3: @@ -724,7 +724,7 @@ Token* Scanner::NextToken() { case_4: {t->kind = 4 /* badString */; loopState = false; break;} case 5: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= CHL('&')) || (ch >= CHL('(') && ch <= CHL('[')) || (ch >= CHL(']') && ch <= 65535)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} else {goto case_0;} case 6: @@ -733,11 +733,11 @@ Token* Scanner::NextToken() { else {goto case_0;} case 7: case_7: - if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_8;} + if ((ch >= CHL(' ') && ch <= CHL('~'))) {AddCh(); goto case_8;} else {goto case_0;} case 8: case_8: - if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_8;} + if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('a') && ch <= CHL('f'))) {AddCh(); goto case_8;} else if (ch == 39) {AddCh(); goto case_9;} else {goto case_0;} case 9: @@ -746,35 +746,35 @@ Token* Scanner::NextToken() { case 10: case_10: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} + if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_10;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 11: case_11: recEnd = pos; recKind = 44 /* optionSym */; - if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} + if ((ch >= CHL('-') && ch <= CHL('.')) || (ch >= CHL('0') && ch <= CHL(':')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_11;} else {t->kind = 44 /* optionSym */; loopState = false; break;} case 12: case_12: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= CHL('!')) || (ch >= CHL('#') && ch <= CHL('[')) || (ch >= CHL(']') && ch <= 65535)) {AddCh(); goto case_12;} else if (ch == 10 || ch == 13) {AddCh(); goto case_4;} - else if (ch == L'"') {AddCh(); goto case_3;} + else if (ch == CHL('"')) {AddCh(); goto case_3;} else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} - else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} + if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_10;} + else if ((ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_15;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 14: case_14: - if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} + if ((ch >= CHL(' ') && ch <= CHL('~'))) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} - else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else if (ch == L'=') {AddCh(); goto case_11;} + if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_10;} + else if ((ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_15;} + else if (ch == CHL('=')) {AddCh(); goto case_11;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 16: {t->kind = 18 /* "=" */; loopState = false; break;} @@ -813,17 +813,17 @@ Token* Scanner::NextToken() { {t->kind = 41 /* ".)" */; loopState = false; break;} case 31: recEnd = pos; recKind = 19 /* "." */; - if (ch == L'.') {AddCh(); goto case_19;} - else if (ch == L'>') {AddCh(); goto case_22;} - else if (ch == L')') {AddCh(); goto case_30;} + if (ch == CHL('.')) {AddCh(); goto case_19;} + else if (ch == CHL('>')) {AddCh(); goto case_22;} + else if (ch == CHL(')')) {AddCh(); goto case_30;} else {t->kind = 19 /* "." */; loopState = false; break;} case 32: recEnd = pos; recKind = 25 /* "<" */; - if (ch == L'.') {AddCh(); goto case_21;} + if (ch == CHL('.')) {AddCh(); goto case_21;} else {t->kind = 25 /* "<" */; loopState = false; break;} case 33: recEnd = pos; recKind = 31 /* "(" */; - if (ch == L'.') {AddCh(); goto case_29;} + if (ch == CHL('.')) {AddCh(); goto case_29;} else {t->kind = 31 /* "(" */; loopState = false; break;} } diff --git a/src/Scanner.frame b/src/Scanner.frame index f0fb4c2..3c2c5fb 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -47,6 +47,41 @@ Scanner.h Specification #include #endif +//#define WITHOUT_WCHAR + +#ifdef WITHOUT_WCHAR +#define wchar_t char +#define SFMT_LCHR "%c" +#define SFMT_SLCHR "%c" +#define SFMT_HSTR "%s" +#define SFMT_LSTR "%s" +#define SFMT_SLSTR "%s" +#define SFMT_LS "s" +#define STRL(s) s +#define CHL(s) s +#define wprintf printf +#define swprintf snprintf +#define fwprintf fprintf +#define fwscanf fscanf +#define swscanf scanf +#define wcslen strlen +#define wcscpy strcpy +#define wcsncpy strncpy +#define wcscmp strcmp +#define wcschr strchr +#define wcsrchr strrchr +#else +#include +#define SFMT_HSTR "%hs" +#define SFMT_LSTR "%ls" +#define SFMT_SLSTR L"%ls" +#define SFMT_LS "ls" +#define SFMT_LCHR "%lc" +#define SFMT_SLCHR L"%lc" +#define STRL(s) L##s +#define CHL(s) L##s +#endif + #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 @@ -62,7 +97,7 @@ Scanner.h Specification #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR L':' +#define COCO_CPP_NAMESPACE_SEPARATOR CHL(':') -->namespace_open @@ -355,13 +390,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((L'a' <= data[i]) && (data[i] <= L'z')) { - newData[i] = data[i] + (L'A' - L'a'); + if ((CHL('a') <= data[i]) && (data[i] <= CHL('z'))) { + newData[i] = data[i] + (CHL('A') - CHL('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = L'\0'; + newData[dataLen] = CHL('\0'); return newData; } @@ -378,12 +413,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((L'A' <= ch) && (ch <= L'Z')) { - newData[i] = ch - (L'A' - L'a'); + if ((CHL('A') <= ch) && (ch <= CHL('Z'))) { + newData[i] = ch - (CHL('A') - CHL('a')); } else { newData[i] = ch; } } - newData[dataLen] = L'\0'; + newData[dataLen] = CHL('\0'); return newData; } @@ -647,7 +682,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(L"--- buffer out of bounds access, position: %d\n", value); + wprintf(STRL("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -732,7 +767,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(L"--- Cannot open file %ls\n", fileName); + wprintf(STRL("--- Cannot open file %ls\n"), fileName); exit(1); } coco_string_delete(chFileName); @@ -772,7 +807,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + wprintf(STRL("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -783,7 +818,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(L"Illegal byte order mark at start of file"); + wprintf(STRL("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -804,7 +839,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == CHL('\r') && buffer->Peek() != CHL('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } -->casing1 @@ -861,7 +896,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too long token value\n"); + wprintf(STRL("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -870,12 +905,12 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = L'\0'; + t->val[tlen] = CHL('\0'); } Token* Scanner::NextToken() { while(true) { - while (ch == ' ' || + while (ch == CHL(' ') || -->scan1 ) NextCh(); -->scan2 diff --git a/src/Scanner.h b/src/Scanner.h index 050bb06..0431ac9 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -42,6 +42,41 @@ Coco/R itself) does not fall under the GNU General Public License. #include #endif +//#define WITHOUT_WCHAR + +#ifdef WITHOUT_WCHAR +#define wchar_t char +#define SFMT_LCHR "%c" +#define SFMT_SLCHR "%c" +#define SFMT_HSTR "%s" +#define SFMT_LSTR "%s" +#define SFMT_SLSTR "%s" +#define SFMT_LS "s" +#define STRL(s) s +#define CHL(s) s +#define wprintf printf +#define swprintf snprintf +#define fwprintf fprintf +#define fwscanf fscanf +#define swscanf scanf +#define wcslen strlen +#define wcscpy strcpy +#define wcsncpy strncpy +#define wcscmp strcmp +#define wcschr strchr +#define wcsrchr strrchr +#else +#include +#define SFMT_HSTR "%hs" +#define SFMT_LSTR "%ls" +#define SFMT_SLSTR L"%ls" +#define SFMT_LS "ls" +#define SFMT_LCHR "%lc" +#define SFMT_SLCHR L"%lc" +#define STRL(s) L##s +#define CHL(s) L##s +#endif + #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 @@ -57,7 +92,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR L':' +#define COCO_CPP_NAMESPACE_SEPARATOR CHL(':') namespace Coco { diff --git a/src/StringBuilder.cpp b/src/StringBuilder.cpp index a9cf4b4..203f69d 100644 --- a/src/StringBuilder.cpp +++ b/src/StringBuilder.cpp @@ -70,7 +70,7 @@ void StringBuilder::Append(const wchar_t value) { data[length] = value; length++; - data[length] = '\0'; + data[length] = CHL('\0'); } void StringBuilder::Append(const wchar_t *value) { diff --git a/src/Tab.cpp b/src/Tab.cpp index 796ef4a..fb2e727 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -50,7 +50,7 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; errors = &parser->errors; - eofSy = NewSym(Node::t, L"EOF", 0, 0); + eofSy = NewSym(Node::t, STRL("EOF"), 0, 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); checkEOF = true; visited = allSyncSets = NULL; @@ -79,8 +79,8 @@ Tab::~Tab() { Symbol* Tab::NewSym(int typ, const wchar_t* name, int line, int col) { if (coco_string_length(name) == 2 && name[0] == '"') { - parser->SemErr(L"empty token not allowed"); - name = coco_string_create(L"???"); + parser->SemErr(STRL("empty token not allowed")); + name = coco_string_create(STRL("???")); } Symbol *sym = new Symbol(typ, name, line, col); @@ -115,23 +115,23 @@ int Tab::Num(const Node *p) { void Tab::PrintSym(const Symbol *sym) { wchar_t *paddedName = Name(sym->name); - fwprintf(trace, L"%3d %14s %s", sym->n, paddedName, nTyp[sym->typ]); + fwprintf(trace, STRL("%3d %14s %s"), sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); - if (sym->attrPos==NULL) fwprintf(trace, L" false "); else fwprintf(trace, L" true "); + if (sym->attrPos==NULL) fwprintf(trace, STRL(" false ")); else fwprintf(trace, STRL(" true ")); if (sym->typ == Node::nt) { - fwprintf(trace, L"%5d", Num(sym->graph)); - if (sym->deletable) fwprintf(trace, L" true "); else fwprintf(trace, L" false "); + fwprintf(trace, STRL("%5d"), Num(sym->graph)); + if (sym->deletable) fwprintf(trace, STRL(" true ")); else fwprintf(trace, STRL(" false ")); } else - fwprintf(trace, L" "); + fwprintf(trace, STRL(" ")); - fwprintf(trace, L"%5d %s\n", sym->line, tKind[sym->tokenKind]); + fwprintf(trace, STRL("%5d %s\n"), sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { - fwprintf(trace, L"Symbol Table:\n"); - fwprintf(trace, L"------------\n\n"); - fwprintf(trace, L" nr name typ hasAt graph del line tokenKind\n"); + fwprintf(trace, STRL("Symbol Table:\n")); + fwprintf(trace, STRL("------------\n\n")); + fwprintf(trace, STRL(" nr name typ hasAt graph del line tokenKind\n")); Symbol *sym; int i; @@ -149,16 +149,16 @@ void Tab::PrintSymbolTable() { } - fwprintf(trace, L"\nLiteral Tokens:\n"); - fwprintf(trace, L"--------------\n"); + fwprintf(trace, STRL("\nLiteral Tokens:\n")); + fwprintf(trace, STRL("--------------\n")); Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - fwprintf(trace, L"_%ls = %ls.\n", ((Symbol*) (e->val))->name, e->key); + fwprintf(trace, STRL("_%ls = %ls.\n"), ((Symbol*) (e->val))->name, e->key); } delete iter; - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("\n")); } void Tab::PrintSet(const BitArray *s, int indent) { @@ -170,15 +170,15 @@ void Tab::PrintSet(const BitArray *s, int indent) { if ((*s)[sym->n]) { len = coco_string_length(sym->name); if (col + len >= 80) { - fwprintf(trace, L"\n"); - for (col = 1; col < indent; col++) fwprintf(trace, L" "); + fwprintf(trace, STRL("\n")); + for (col = 1; col < indent; col++) fwprintf(trace, STRL(" ")); } - fwprintf(trace, L"%ls ", sym->name); + fwprintf(trace, STRL("%ls "), sym->name); col += len + 1; } } - if (col == indent) fwprintf(trace, L"-- empty set --"); - fwprintf(trace, L"\n"); + if (col == indent) fwprintf(trace, STRL("-- empty set --")); + fwprintf(trace, STRL("\n")); } //--------------------------------------------------------------------- @@ -273,7 +273,7 @@ Graph* Tab::StrToGraph(const wchar_t* str) { wchar_t *subStr = coco_string_create(str, 1, coco_string_length(str)-2); wchar_t *s = Unescape(subStr); coco_string_delete(subStr); - if (coco_string_length(s) == 0) parser->SemErr(L"empty token not allowed"); + if (coco_string_length(s) == 0) parser->SemErr(STRL("empty token not allowed")); Graph *g = new Graph(); g->r = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { @@ -335,15 +335,15 @@ typedef wchar_t wchar_t_10[10]; static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { - coco_swprintf(format, 10, L" "); + coco_swprintf(format, 10, STRL(" ")); } else { - coco_swprintf(format, 10, L"%5d", pos->beg); + coco_swprintf(format, 10, STRL("%5d"), pos->beg); } return format; } wchar_t* Tab::Name(const wchar_t *name) { - wchar_t *name2 = coco_string_create_append(name, L" "); + wchar_t *name2 = coco_string_create_append(name, STRL(" ")); wchar_t *subName2 = coco_string_create(name2, 0, 12); coco_string_delete(name2); return subName2; @@ -352,45 +352,45 @@ wchar_t* Tab::Name(const wchar_t *name) { } void Tab::PrintNodes() { - fwprintf(trace, L"Graph nodes:\n"); - fwprintf(trace, L"----------------------------------------------------\n"); - fwprintf(trace, L" n type name next down sub pos line\n"); - fwprintf(trace, L" val code\n"); - fwprintf(trace, L"----------------------------------------------------\n"); + fwprintf(trace, STRL("Graph nodes:\n")); + fwprintf(trace, STRL("----------------------------------------------------\n")); + fwprintf(trace, STRL(" n type name next down sub pos line\n")); + fwprintf(trace, STRL(" val code\n")); + fwprintf(trace, STRL("----------------------------------------------------\n")); Node *p; wchar_t_10 format; for (int i=0; in, (nTyp[p->typ])); + fwprintf(trace, STRL("%4d %s "), p->n, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); - fwprintf(trace, L"%12s ", paddedName); + fwprintf(trace, STRL("%12s "), paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { CharClass *c = classes[p->val]; wchar_t *paddedName = Name(c->name); - fwprintf(trace, L"%12s ", paddedName); + fwprintf(trace, STRL("%12s "), paddedName); coco_string_delete(paddedName); - } else fwprintf(trace, L" "); - fwprintf(trace, L"%5d ", Ptr(p->next, p->up)); + } else fwprintf(trace, STRL(" ")); + fwprintf(trace, STRL("%5d "), Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { - fwprintf(trace, L" %5s", TabPos(p->pos, format)); + fwprintf(trace, STRL(" %5s"), TabPos(p->pos, format)); } if (p->typ == Node::chr) { - fwprintf(trace, L"%5d %5d ", p->val, p->code); + fwprintf(trace, STRL("%5d %5d "), p->val, p->code); } if (p->typ == Node::clas) { - fwprintf(trace, L" %5d ", p->code); + fwprintf(trace, STRL(" %5d "), p->code); } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { - fwprintf(trace, L"%5d %5d ", Ptr(p->down, false), Ptr(p->sub, false)); + fwprintf(trace, STRL("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); } if (p->typ == Node::sem) { - fwprintf(trace, L" %5s", TabPos(p->pos, format)); + fwprintf(trace, STRL(" %5s"), TabPos(p->pos, format)); } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { - fwprintf(trace, L" "); + fwprintf(trace, STRL(" ")); } - fwprintf(trace, L"%5d\n", p->line); + fwprintf(trace, STRL("%5d\n"), p->line); } - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("\n")); } //--------------------------------------------------------------------- @@ -400,7 +400,7 @@ void Tab::PrintNodes() { CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { CharClass *c; - if (coco_string_equal(name, L"#")) { + if (coco_string_equal(name, STRL("#"))) { wchar_t* temp = coco_string_create_append(name, (wchar_t) dummyName++); c = new CharClass(temp, s); coco_string_delete(temp); @@ -437,11 +437,11 @@ CharSet* Tab::CharClassSet(int i) { //----------- character class printing wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { - if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') { - coco_swprintf(format, 10, L"%d", ch); + if (ch < CHL(' ') || ch >= 127 || ch == CHL('\'') || ch == CHL('\\')) { + coco_swprintf(format, 10, STRL("%d"), ch); return format; } else { - coco_swprintf(format, 10, L"'%lc'", ch); + coco_swprintf(format, 10, STRL("'%lc'"), ch); return format; } } @@ -452,11 +452,11 @@ void Tab::WriteCharSet(const CharSet *s) { if (r->from < r->to) { wchar_t *from = TabCh(r->from, fmt1); wchar_t *to = TabCh(r->to, fmt2); - fwprintf(trace, L"%ls .. %ls ", from, to); + fwprintf(trace, STRL("%ls .. %ls "), from, to); } else { wchar_t *from = TabCh(r->from, fmt1); - fwprintf(trace, L"%ls ", from); + fwprintf(trace, STRL("%ls "), from); } } } @@ -466,17 +466,17 @@ void Tab::WriteCharClasses () { for (int i=0; iname, L" "); + wchar_t* format2 = coco_string_create_append(c->name, STRL(" ")); wchar_t* format = coco_string_create(format2, 0, 10); - coco_string_merge(format, L": "); + coco_string_merge(format, STRL(": ")); fwprintf(trace, format); WriteCharSet(c->set); - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("\n")); coco_string_delete(format); coco_string_delete(format2); } - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("\n")); } //--------------------------------------------------------------------- @@ -527,9 +527,9 @@ BitArray* Tab::First(const Node *p) { BitArray mark(nodes.Count); BitArray *fs = First0(p, &mark); if (ddt[3]) { - fwprintf(trace, L"\n"); - if (p != NULL) fwprintf(trace, L"First: node = %d\n", p->n ); - else fwprintf(trace, L"First: node = null\n"); + fwprintf(trace, STRL("\n")); + if (p != NULL) fwprintf(trace, STRL("First: node = %d\n"), p->n ); + else fwprintf(trace, STRL("First: node = null\n")); PrintSet(fs, 0); } return fs; @@ -753,7 +753,7 @@ void Tab::CompDeletableSymbols() { for (i=0; ideletable) - wprintf(L" %ls deletable\n", sym->name); + wprintf(STRL(" %ls deletable\n"), sym->name); } } @@ -773,29 +773,29 @@ void Tab::CompSymbolSets() { CompFollowSets(); CompSyncSets(); if (ddt[1]) { - fwprintf(trace, L"\n"); - fwprintf(trace, L"First & follow symbols:\n"); - fwprintf(trace, L"----------------------\n\n"); + fwprintf(trace, STRL("\n")); + fwprintf(trace, STRL("First & follow symbols:\n")); + fwprintf(trace, STRL("----------------------\n\n")); Symbol *sym; for (int i=0; iname); - fwprintf(trace, L"first: "); PrintSet(sym->first, 10); - fwprintf(trace, L"follow: "); PrintSet(sym->follow, 10); - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("%ls\n"), sym->name); + fwprintf(trace, STRL("first: ")); PrintSet(sym->first, 10); + fwprintf(trace, STRL("follow: ")); PrintSet(sym->follow, 10); + fwprintf(trace, STRL("\n")); } } if (ddt[4]) { - fwprintf(trace, L"\n"); - fwprintf(trace, L"ANY and SYNC sets:\n"); - fwprintf(trace, L"-----------------\n"); + fwprintf(trace, STRL("\n")); + fwprintf(trace, STRL("ANY and SYNC sets:\n")); + fwprintf(trace, STRL("-----------------\n")); Node *p; for (int i=0; ityp == Node::any || p->typ == Node::sync) { - fwprintf(trace, L"%4d %4s ", p->n, nTyp[p->typ]); + fwprintf(trace, STRL("%4d %4s "), p->n, nTyp[p->typ]); PrintSet(p->set, 11); } } @@ -813,16 +813,16 @@ wchar_t Tab::Hex2Char(const wchar_t* s, int len) { if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0'); else if ('a' <= ch && ch <= 'f') val = 16 * val + (10 + ch - 'a'); else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A'); - else parser->SemErr(L"bad escape sequence in string or character"); + else parser->SemErr(STRL("bad escape sequence in string or character")); } if (val >= COCO_WCHAR_MAX) {/* pdt */ - parser->SemErr(L"bad escape sequence in string or character"); + parser->SemErr(STRL("bad escape sequence in string or character")); } return (wchar_t) val; } static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { - coco_swprintf(format, 10, L"\\0x%04x", ch); + coco_swprintf(format, 10, STRL("\\0x%04x"), ch); return format; } @@ -832,28 +832,28 @@ wchar_t* Tab::Unescape (const wchar_t* s) { int i = 0; int len = coco_string_length(s); while (i < len) { - if (s[i] == '\\') { + if (s[i] == CHL('\\')) { switch (s[i+1]) { - case L'\\': buf.Append(L'\\'); i += 2; break; - case L'\'': buf.Append(L'\''); i += 2; break; - case L'\"': buf.Append(L'\"'); i += 2; break; - case L'r': buf.Append(L'\r'); i += 2; break; - case L'n': buf.Append(L'\n'); i += 2; break; - case L't': buf.Append(L'\t'); i += 2; break; - case L'0': buf.Append(L'\0'); i += 2; break; - case L'a': buf.Append(L'\a'); i += 2; break; - case L'b': buf.Append(L'\b'); i += 2; break; - case L'f': buf.Append(L'\f'); i += 2; break; - case L'v': buf.Append(L'\v'); i += 2; break; - case L'u': case L'x': + case CHL('\\'): buf.Append(CHL('\\')); i += 2; break; + case CHL('\''): buf.Append(CHL('\'')); i += 2; break; + case CHL('\"'): buf.Append(CHL('\"')); i += 2; break; + case CHL('r'): buf.Append(CHL('\r')); i += 2; break; + case CHL('n'): buf.Append(CHL('\n')); i += 2; break; + case CHL('t'): buf.Append(CHL('\t')); i += 2; break; + case CHL('0'): buf.Append(CHL('\0')); i += 2; break; + case CHL('a'): buf.Append(CHL('\a')); i += 2; break; + case CHL('b'): buf.Append(CHL('\b')); i += 2; break; + case CHL('f'): buf.Append(CHL('\f')); i += 2; break; + case CHL('v'): buf.Append(CHL('\v')); i += 2; break; + case CHL('u'): case CHL('x'): if (i + 6 <= coco_string_length(s)) { buf.Append(Hex2Char(s +i+2, 4)); i += 6; break; } else { - parser->SemErr(L"bad escape sequence in string or character"); + parser->SemErr(STRL("bad escape sequence in string or character")); i = coco_string_length(s); break; } default: - parser->SemErr(L"bad escape sequence in string or character"); + parser->SemErr(STRL("bad escape sequence in string or character")); i += 2; break; } } else { @@ -874,14 +874,14 @@ wchar_t* Tab::Escape (const wchar_t* s) { for (int i=0; i < len; i++) { ch = s[i]; switch(ch) { - case L'\\': buf.Append(L"\\\\"); break; - case L'\'': buf.Append(L"\\'"); break; - case L'\"': buf.Append(L"\\\""); break; - case L'\t': buf.Append(L"\\t"); break; - case L'\r': buf.Append(L"\\r"); break; - case L'\n': buf.Append(L"\\n"); break; + case CHL('\\'): buf.Append(STRL("\\\\")); break; + case CHL('\''): buf.Append(STRL("\\'")); break; + case CHL('\"'): buf.Append(STRL("\\\"")); break; + case CHL('\t'): buf.Append(STRL("\\t")); break; + case CHL('\r'): buf.Append(STRL("\\r")); break; + case CHL('\n'): buf.Append(STRL("\\n")); break; default: - if ((ch < L' ') || (ch > 0x7f)) { + if ((ch < CHL(' ')) || (ch > 0x7f)) { wchar_t* res = TabChar2Hex(ch, fmt); buf.Append(res); } else @@ -972,7 +972,7 @@ bool Tab::NoCircularProductions() { for (i=0; icount++; - wprintf(L" %ls --> %ls", n->left->name, n->right->name); + wprintf(STRL(" %ls --> %ls"), n->left->name, n->right->name); } for(int i=0; iname, curSy->line, curSy->col); - if (sym != NULL) wprintf(L"%ls is ", sym->name); + wprintf(STRL(" LL1 warning in %ls:%d:%d: "), curSy->name, curSy->line, curSy->col); + if (sym != NULL) wprintf(STRL("%ls is "), sym->name); switch (cond) { - case 1: wprintf(L"start of several alternatives\n"); break; - case 2: wprintf(L"start & successor of deletable structure\n"); break; - case 3: wprintf(L"an ANY node that matches no symbol\n"); break; - case 4: wprintf(L"contents of [...] or {...} must not be deletable\n"); break; + case 1: wprintf(STRL("start of several alternatives\n")); break; + case 2: wprintf(STRL("start & successor of deletable structure\n")); break; + case 3: wprintf(STRL("an ANY node that matches no symbol\n")); break; + case 4: wprintf(STRL("contents of [...] or {...} must not be deletable\n")); break; } } @@ -1009,16 +1009,16 @@ int Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { /* print the path for first set that contains token tok for the graph rooted at p */ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { while (p != NULL) { - //if(p->sym) wprintf(L"%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); + //if(p->sym) wprintf(STRL("%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); switch (p->typ) { case Node::nt: { if (p->sym->firstReady) { if(p->sym->first->Get(tok)) { if(coco_string_length(indent) == 1) - wprintf(L"%ls=> %ls:%d:%d:\n", indent, p->sym->name, p->line, p->col); - wprintf(L"%ls-> %ls:%d:%d:\n", indent, p->sym->name, p->sym->line, p->sym->col); + wprintf(STRL("%ls=> %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(STRL("%ls-> %ls:%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); if(p->sym->graph) { - wchar_t *new_indent = coco_string_create_append(indent, L" "); + wchar_t *new_indent = coco_string_create_append(indent, STRL(" ")); PrintFirstPath(p->sym->graph, tok, new_indent); coco_string_delete(new_indent); } @@ -1029,7 +1029,7 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { } case Node::t: case Node::wt: { if(p->sym->n == tok) - wprintf(L"%ls= %ls:%d:%d:\n", indent, p->sym->name, p->line, p->col); + wprintf(STRL("%ls= %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); break; } case Node::any: { @@ -1138,9 +1138,9 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { if (q->sub->typ == Node::rslv) { BitArray *fs = Expected(q->sub->next, curSy); if (Sets::Intersect(fs, &soFar)) - ResErr(q->sub, L"Warning: Resolver will never be evaluated. Place it at previous conflicting alternative."); + ResErr(q->sub, STRL("Warning: Resolver will never be evaluated. Place it at previous conflicting alternative.")); if (!Sets::Intersect(fs, &expected)) - ResErr(q->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); + ResErr(q->sub, STRL("Warning: Misplaced resolver: no LL(1) conflict.")); delete fs; } else { BitArray *ba = Expected(q->sub, curSy); @@ -1156,12 +1156,12 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { bool bsi = Sets::Intersect(fs, fsNext); delete fs; delete fsNext; if (!bsi) - ResErr(p->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); + ResErr(p->sub, STRL("Warning: Misplaced resolver: no LL(1) conflict.")); } CheckRes(p->sub, true); } else if (p->typ == Node::rslv) { if (!rslvAllowed) - ResErr(p, L"Warning: Misplaced resolver: no alternative."); + ResErr(p, STRL("Warning: Misplaced resolver: no alternative.")); } if (p->up) break; @@ -1187,7 +1187,7 @@ bool Tab::NtsComplete() { sym = nonterminals[i]; if (sym->graph == NULL) { complete = false; errors->count++; - wprintf(L" No production for %ls\n", sym->name); + wprintf(STRL(" No production for %ls\n"), sym->name); } } return complete; @@ -1220,7 +1220,7 @@ bool Tab::AllNtReached() { sym = nonterminals[i]; if (!((*visited)[sym->n])) { ok = false; errors->count++; - wprintf(L" %ls cannot be reached\n", sym->name); + wprintf(STRL(" %ls cannot be reached\n"), sym->name); } } return ok; @@ -1260,7 +1260,7 @@ bool Tab::AllNtToTerm() { sym = nonterminals[i]; if (!mark[sym->n]) { ok = false; errors->count++; - wprintf(L" %ls cannot be derived to terminals\n", sym->name); + wprintf(STRL(" %ls cannot be derived to terminals\n"), sym->name); } } return ok; @@ -1292,14 +1292,14 @@ void Tab::XRef() { } } // print cross reference list - fwprintf(trace, L"\n"); - fwprintf(trace, L"Cross reference list:\n"); - fwprintf(trace, L"--------------------\n\n"); + fwprintf(trace, STRL("\n")); + fwprintf(trace, STRL("Cross reference list:\n")); + fwprintf(trace, STRL("--------------------\n\n")); for (i=0; iname); - fwprintf(trace, L" %12ls", paddedName); + fwprintf(trace, STRL(" %12ls"), paddedName); coco_string_delete(paddedName); ArrayList *list = (ArrayList*)(xref.Get(sym)); int col = 14; @@ -1307,14 +1307,14 @@ void Tab::XRef() { for (j=0; jCount; j++) { line = (int)(ssize_t)((*list)[j]); if (col + 5 > 80) { - fwprintf(trace, L"\n"); - for (col = 1; col <= 14; col++) fwprintf(trace, L" "); + fwprintf(trace, STRL("\n")); + for (col = 1; col <= 14; col++) fwprintf(trace, STRL(" ")); } - fwprintf(trace, L"%5d", line); col += 5; + fwprintf(trace, STRL("%5d"), line); col += 5; } - fwprintf(trace, L"\n"); + fwprintf(trace, STRL("\n")); } - fwprintf(trace, L"\n\n"); + fwprintf(trace, STRL("\n\n")); for(int i=0; i < xref.Count; ++i) { SortedEntry *se = xref[i]; /* @@ -1334,16 +1334,16 @@ void Tab::SetDDT(const wchar_t* s) { int len = coco_string_length(st); for (int i = 0; i < len; i++) { ch = st[i]; - if (L'0' <= ch && ch <= L'9') ddt[ch - L'0'] = true; + if (CHL('0') <= ch && ch <= CHL('9')) ddt[ch - CHL('0')] = true; else switch (ch) { - case L'A' : ddt[0] = true; break; // trace automaton - case L'F' : ddt[1] = true; break; // list first/follow sets - case L'G' : ddt[2] = true; break; // print syntax graph - case L'I' : ddt[3] = true; break; // trace computation of first sets - case L'J' : ddt[4] = true; break; // print ANY and SYNC sets - case L'P' : ddt[8] = true; break; // print statistics - case L'S' : ddt[6] = true; break; // list symbol table - case L'X' : ddt[7] = true; break; // list cross reference table + case CHL('A') : ddt[0] = true; break; // trace automaton + case CHL('F') : ddt[1] = true; break; // list first/follow sets + case CHL('G') : ddt[2] = true; break; // print syntax graph + case CHL('I') : ddt[3] = true; break; // trace computation of first sets + case CHL('J') : ddt[4] = true; break; // print ANY and SYNC sets + case CHL('P') : ddt[8] = true; break; // print statistics + case CHL('S') : ddt[6] = true; break; // list symbol table + case CHL('X') : ddt[7] = true; break; // list cross reference table default : break; } } @@ -1362,10 +1362,10 @@ void Tab::SetOption(const wchar_t* s) { wchar_t *name = coco_string_create(s, 0, nameLenght); wchar_t *value = coco_string_create(s, valueIndex); - if (coco_string_equal(L"$namespace", name)) { + if (coco_string_equal(STRL("$namespace"), name)) { if (nsName == NULL) nsName = coco_string_create(value); - } else if (coco_string_equal(L"$checkEOF", name)) { - checkEOF = coco_string_equal(L"true", value); + } else if (coco_string_equal(STRL("$checkEOF"), name)) { + checkEOF = coco_string_equal(STRL("true"), value); } delete [] name; From 28d98093486cfa33bb0d6820fdac45ba8b3c0c4a Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 10:32:35 +0200 Subject: [PATCH 37/95] Remove several unneeded calls to 'printf' family functions --- src/Coco.cpp | 45 ++++++++-------- src/DFA.cpp | 116 ++++++++++++++++++++--------------------- src/ParserGen.cpp | 128 +++++++++++++++++++++++----------------------- src/Scanner.frame | 3 +- src/Scanner.h | 3 +- src/Tab.cpp | 96 ++++++++++++++++++---------------- 6 files changed, 197 insertions(+), 194 deletions(-) diff --git a/src/Coco.cpp b/src/Coco.cpp index 94a25af..fce8b14 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -61,7 +61,7 @@ int main(int argc, char *argv_[]) { #error unknown compiler! #endif - wprintf(STRL("Coco/R (Dec 01, 2018)\n")); + wprintf(STRL("%s"), "Coco/R (Dec 01, 2018)\n"); wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; @@ -98,7 +98,7 @@ int main(int argc, char *argv_[]) { chTrFileName = coco_string_create_char(traceFileName); if ((parser.trace = fopen(chTrFileName, "w")) == NULL) { - wprintf(STRL("-- could not open %hs\n"), chTrFileName); + wprintf(STRL("-- could not open %s\n"), chTrFileName); exit(1); } @@ -130,7 +130,7 @@ int main(int argc, char *argv_[]) { if (fileSize == 0) { remove(chTrFileName); } else { - wprintf(STRL("trace output is in %hs\n"), chTrFileName); + wprintf(STRL("trace output is in %s\n"), chTrFileName); } coco_string_delete(file); @@ -142,25 +142,26 @@ int main(int argc, char *argv_[]) { } } else { - wprintf(STRL("Usage: Coco Grammar.ATG {Option}\n")); - wprintf(STRL("Options:\n")); - wprintf(STRL(" -namespace \n")); - wprintf(STRL(" -frames \n")); - wprintf(STRL(" -trace \n")); - wprintf(STRL(" -o \n")); - wprintf(STRL(" -lines\n")); - wprintf(STRL(" -ignoreGammarErrors\n")); - wprintf(STRL("Valid characters in the trace string:\n")); - wprintf(STRL(" A trace automaton\n")); - wprintf(STRL(" F list first/follow sets\n")); - wprintf(STRL(" G print syntax graph\n")); - wprintf(STRL(" I trace computation of first sets\n")); - wprintf(STRL(" J list ANY and SYNC sets\n")); - wprintf(STRL(" P print statistics\n")); - wprintf(STRL(" S list symbol table\n")); - wprintf(STRL(" X list cross reference table\n")); - wprintf(STRL("Scanner.frame and Parser.frame files needed in ATG directory\n")); - wprintf(STRL("or in a directory specified in the -frames option.\n")); + wprintf(STRL("%s"), + "Usage: Coco Grammar.ATG {Option}\n" + "Options:\n" + " -namespace \n" + " -frames \n" + " -trace \n" + " -o \n" + " -lines\n" + " -ignoreGammarErrors\n" + "Valid characters in the trace string:\n" + " A trace automaton\n" + " F list first/follow sets\n" + " G print syntax graph\n" + " I trace computation of first sets\n" + " J list ANY and SYNC sets\n" + " P print statistics\n" + " S list symbol table\n" + " X list cross reference table\n" + "Scanner.frame and Parser.frame files needed in ATG directory\n" + "or in a directory specified in the -frames option.\n"); } coco_string_delete(srcName); diff --git a/src/DFA.cpp b/src/DFA.cpp index c87a919..4789fac 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -71,7 +71,7 @@ void DFA::PutRange(CharSet *s) { wchar_t *to = DFACh((wchar_t) r->to, fmt2); fwprintf(gen, STRL("(ch >= %ls && ch <= %ls)"), from, to); } - if (r->next != NULL) fwprintf(gen, STRL(" || ")); + if (r->next != NULL) fputws(STRL(" || "), gen); } } @@ -373,31 +373,30 @@ void DFA::MakeDeterministic() { } void DFA::PrintStates() { - fwprintf(trace, STRL("\n")); - fwprintf(trace, STRL("---------- states ----------\n")); + fwprintf(trace, STRL("\n---------- states ----------\n")); wchar_t_10 fmt; for (State *state = firstState; state != NULL; state = state->next) { bool first = true; - if (state->endOf == NULL) fwprintf(trace, STRL(" ")); + if (state->endOf == NULL) fputws(STRL(" "), trace); else { wchar_t *paddedName = tab->Name(state->endOf->name); fwprintf(trace, STRL("E(%12s)"), paddedName); coco_string_delete(paddedName); } fwprintf(trace, STRL("%3d:"), state->nr); - if (state->firstAction == NULL) fwprintf(trace, STRL("\n")); + if (state->firstAction == NULL) fputws(STRL("\n"), trace); for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (first) {fwprintf(trace, STRL(" ")); first = false;} else fwprintf(trace, STRL(" ")); + if (first) {fputws(STRL(" "), trace); first = false;} else fputws(STRL(" "), trace); if (action->typ == Node::clas) fwprintf(trace, STRL("%ls"), tab->classes[action->sym]->name); else fwprintf(trace, STRL("%3s"), DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, STRL("%3d"), targ->state->nr); } - if (action->tc == Node::contextTrans) fwprintf(trace, STRL(" context\n")); else fwprintf(trace, STRL("\n")); + if (action->tc == Node::contextTrans) fputws(STRL(" context\n"), trace); else fputws(STRL("\n"), trace); } } - fwprintf(trace, STRL("\n---------- character classes ----------\n")); + fputws(STRL("\n---------- character classes ----------\n"), trace); tab->WriteCharClasses(); } @@ -506,8 +505,8 @@ void DFA::NewComment(const Node *from, const Node *to, bool nested) { //------------------------ scanner generation ---------------------- void DFA::GenCommentIndented(int n, const wchar_t *s) { - for(int i= 1; i < n; ++i) fwprintf(gen, STRL("\t")); - fwprintf(gen, s); + for(int i= 1; i < n; ++i) fputws(STRL("\t"), gen); + fputws(s, gen); } void DFA::GenComBody(const Comment *com) { @@ -521,9 +520,10 @@ void DFA::GenComBody(const Comment *com) { fwprintf(gen, STRL("%ls) {\n"), res); if (imaxStop == 0) { - fwprintf(gen, STRL("\t\t\t\tlevel--;\n")); - fwprintf(gen, STRL("\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n")); - fwprintf(gen, STRL("\t\t\t\tNextCh();\n")); + fwprintf(gen, STRL("%s"), + "\t\t\t\tlevel--;\n" + "\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n" + "\t\t\t\tNextCh();\n"); } else { int currIndent, indent = imax - 1; for(int sidx = 1; sidx <= imaxStop; ++sidx) { @@ -545,7 +545,7 @@ void DFA::GenComBody(const Comment *com) { wchar_t* res = DFAChCond(com->start[0], fmt); fwprintf(gen, STRL(" else if (%ls) {\n"), res); if (imaxStop == 0) - fwprintf(gen, STRL("\t\t\tlevel++; NextCh();\n")); + fputws(STRL("\t\t\tlevel++; NextCh();\n"), gen); else { int indent = imax - 1; for(int sidx = 1; sidx <= imax; ++sidx) { @@ -570,12 +570,11 @@ void DFA::GenCommentHeader(const Comment *com, int i) { } void DFA::GenComment(const Comment *com, int i) { - fwprintf(gen, STRL("\n")); - fwprintf(gen, STRL("bool Scanner::Comment%d() "), i); - fwprintf(gen, STRL("{\n")); - fwprintf(gen, STRL("\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n")); wchar_t_20 fmt; - fwprintf(gen, STRL("\tNextCh();\n")); + fwprintf(gen, STRL("\nbool Scanner::Comment%d() {\n"), i); + fwprintf(gen, STRL("%s"), + "\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n" + "\tNextCh();\n"); int imax = coco_string_length(com->start)-1; if (imax == 0) { GenComBody(com); @@ -589,10 +588,11 @@ void DFA::GenComment(const Comment *com, int i) { for(int sidx = imax; sidx > 0; --sidx) { GenCommentIndented(sidx, STRL("\t}\n")); } - fwprintf(gen, STRL("\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n")); - fwprintf(gen, STRL("\treturn false;\n")); + fwprintf(gen, STRL("%s"), + "\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n" + "\treturn false;\n"); } - fwprintf(gen, STRL("}\n")); + fputws(STRL("}\n"), gen); } const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in Tab.literals @@ -628,7 +628,7 @@ void DFA::GenLiterals () { } // sym.name stores literals with quotes, e.g. "\"Literal\"" - fwprintf(gen, STRL("\tkeywords.set(STRL(")); + fputws(STRL("\tkeywords.set(STRL("), gen); // write keyword, escape non printable characters for (int k = 0; name[k] != CHL('\0'); k++) { wchar_t c = name[k]; @@ -666,7 +666,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { void DFA::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, STRL("} // namespace\n")); + fputws(STRL("} // namespace\n"), gen); } } @@ -710,48 +710,47 @@ void DFA::WriteState(const State *state) { wchar_t_20 fmt; for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (action == state->firstAction) fwprintf(gen, STRL("\t\t\tif (")); - else fwprintf(gen, STRL("\t\t\telse if (")); + if (action == state->firstAction) fputws(STRL("\t\t\tif ("), gen); + else fputws(STRL("\t\t\telse if ("), gen); if (action->typ == Node::chr) { wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); fwprintf(gen, STRL("%ls"), res); } else PutRange(tab->CharClassSet(action->sym)); - fwprintf(gen, STRL(") {")); + fputws(STRL(") {"), gen); if (action->tc == Node::contextTrans) { - fwprintf(gen, STRL("apx++; ")); ctxEnd = false; + fputws(STRL("apx++; "), gen); ctxEnd = false; } else if (state->ctx) - fwprintf(gen, STRL("apx = 0; ")); - fwprintf(gen, STRL("AddCh(); goto case_%d;"), action->target->state->nr); - fwprintf(gen, STRL("}\n")); + fputws(STRL("apx = 0; "), gen); + fwprintf(gen, STRL("AddCh(); goto case_%d;}\n"), action->target->state->nr); } if (state->firstAction == NULL) - fwprintf(gen, STRL("\t\t\t{")); + fputws(STRL("\t\t\t{"), gen); else - fwprintf(gen, STRL("\t\t\telse {")); + fputws(STRL("\t\t\telse {"), gen); if (ctxEnd) { // final context state: cut appendix - fwprintf(gen, STRL("\n")); - fwprintf(gen, STRL("\t\t\t\ttlen -= apx;\n")); - fwprintf(gen, STRL("\t\t\t\tSetScannerBehindT();")); - - fwprintf(gen, STRL("\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n")); - fwprintf(gen, STRL("\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n")); - fwprintf(gen, STRL("\t\t\t\t")); + fwprintf(gen, STRL("%s"), + "\n" + "\t\t\t\ttlen -= apx;\n" + "\t\t\t\tSetScannerBehindT();" + "\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n" + "\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n" + "\t\t\t\t"); } if (endOf == NULL) { - fwprintf(gen, STRL("goto case_0;}\n")); + fputws(STRL("goto case_0;}\n"), gen); } else { fwprintf(gen, STRL("t->kind = %d /* %ls */; "), endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, STRL("t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n")); + fwprintf(gen, STRL("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); } else { - fwprintf(gen, STRL("t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n")); + fwprintf(gen, STRL("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); } } else { - fwprintf(gen, STRL("loopState = false;")); + fputws(STRL("loopState = false;"), gen); if(endOf->semPos && endOf->typ == Node::t) CopySourcePart(endOf->semPos, 0); - fwprintf(gen, STRL(" break;}\n")); + fputws(STRL(" break;}\n"), gen); } } } @@ -767,13 +766,13 @@ void DFA::WriteStartTab() { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (firstRange) { firstRange = false; - fwprintf(gen, STRL("\tint i;\n")); + fputws(STRL("\tint i;\n"), gen); } fwprintf(gen, STRL("\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n"), r->from, r->to, targetState); } } } - fwprintf(gen, STRL("\t\tstart.set(Buffer::EoF, -1);\n")); + fwprintf(gen, STRL("%s"), "\t\tstart.set(Buffer::EoF, -1);\n"); } void DFA::WriteScanner() { @@ -797,7 +796,7 @@ void DFA::WriteScanner() { g.CopyFramePart(STRL("-->casing0")); if (ignoreCase) { - fwprintf(gen, STRL("\twchar_t valCh; // current input character (for token.val)\n")); + fwprintf(gen, STRL("%s"), "\twchar_t valCh; // current input character (for token.val)\n"); } g.CopyFramePart(STRL("-->commentsheader")); Comment *com = firstComment; @@ -829,12 +828,13 @@ void DFA::WriteScanner() { g.CopyFramePart(STRL("-->initialization")); g.CopyFramePart(STRL("-->casing1")); if (ignoreCase) { - fwprintf(gen, STRL("\t\tvalCh = ch;\n")); - fwprintf(gen, STRL("\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()")); + fwprintf(gen, STRL("%s"), + "\t\tvalCh = ch;\n" + "\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); } g.CopyFramePart(STRL("-->casing2")); - fwprintf(gen, STRL("\t\ttval[tlen++] = ")); - if (ignoreCase) fwprintf(gen, STRL("valCh;")); else fwprintf(gen, STRL("ch;")); + fputws(STRL("\t\ttval[tlen++] = "), gen); + if (ignoreCase) fputws(STRL("valCh;"), gen); else fputws(STRL("ch;"), gen); g.CopyFramePart(STRL("-->comments")); com = firstComment; cmdIdx = 0; @@ -844,26 +844,26 @@ void DFA::WriteScanner() { } g.CopyFramePart(STRL("-->scan1")); - fwprintf(gen, STRL("\t\t\t")); - if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, STRL("false")); } + fputws(STRL("\t\t\t"), gen); + if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fputws(STRL("false"), gen); } g.CopyFramePart(STRL("-->scan2")); if (firstComment != NULL) { - fwprintf(gen, STRL("\t\tif (")); + fputws(STRL("\t\tif ("), gen); com = firstComment; cmdIdx = 0; wchar_t_20 fmt; while (com != NULL) { wchar_t* res = DFAChCond(com->start[0], fmt); fwprintf(gen, STRL("(%ls && Comment%d())"), res, cmdIdx); if (com->next != NULL) { - fwprintf(gen, STRL(" || ")); + fputws(STRL(" || "), gen); } com = com->next; cmdIdx++; } - fwprintf(gen, STRL(") continue;")); + fputws(STRL(") continue;"), gen); } g.CopyFramePart(STRL("-->scan22")); - if (hasCtxMoves) { fwprintf(gen, STRL("\n")); fwprintf(gen, STRL("\tint apx = 0;")); } /* pdt */ + if (hasCtxMoves) { fputws(STRL("\n\tint apx = 0;"), gen); } /* pdt */ g.CopyFramePart(STRL("-->scan3")); /* CSB 02-10-05 check the Labels */ diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 7ace38b..9aeb73b 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -38,7 +38,7 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { void ParserGen::Indent (int n) { - for (int i = 1; i <= n; i++) fwprintf(gen, STRL("\t")); + for (int i = 1; i <= n; i++) fputws(STRL("\t"), gen); } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning @@ -85,7 +85,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { void ParserGen::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, STRL("} // namespace\n")); + fputws(STRL("} // namespace\n"), gen); } } @@ -100,7 +100,7 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { Indent(indent); while (buffer->GetPos() <= pos->end) { while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF - fwprintf(gen, STRL("\n")); Indent(indent); + fputws(STRL("\n"), gen); Indent(indent); if (ch == CR) { ch = buffer->Read(); } // skip CR if (ch == LF) { ch = buffer->Read(); } // skip LF for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) { @@ -113,7 +113,7 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { ch = buffer->Read(); } done: - if (indent > 0) fwprintf(gen, STRL("\n")); + if (indent > 0) fputws(STRL("\n"), gen); } } @@ -155,16 +155,16 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); - if (n == 0) fwprintf(gen, STRL("false")); // happens if an ANY set matches no symbol + if (n == 0) fputws(STRL("false"), gen); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; for (int i=0; iterminals.Count; i++) { sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, STRL("la->kind == ")); + fputws(STRL("la->kind == "), gen); WriteSymbolOrCode(gen, sym); --n; - if (n > 0) fwprintf(gen, STRL(" || ")); + if (n > 0) fputws(STRL(" || "), gen); } } } else @@ -177,9 +177,9 @@ void ParserGen::PutCaseLabels (const BitArray *s) { for (int i=0; iterminals.Count; i++) { sym = tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, STRL("case ")); + fputws(STRL("case "), gen); WriteSymbolOrCode(gen, sym); - fwprintf(gen, STRL(": ")); + fputws(STRL(": "), gen); } } } @@ -192,27 +192,27 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); fwprintf(gen, STRL("%ls("), p->sym->name); CopySourcePart(p->pos, 0); - fwprintf(gen, STRL(");\n")); + fputws(STRL(");\n"), gen); } else if (p->typ == Node::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) { - fwprintf(gen, STRL("Get();\n")); + fputws(STRL("Get();\n"), gen); //copy and pasted bellow - fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n")); + fputws(STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } else { - fwprintf(gen, STRL("Expect(")); + fputws(STRL("Expect("), gen); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, STRL(");\n")); + fputws(STRL(");\n"), gen); //copy and pasted from above - fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n")); + fputws(STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } } if (p->typ == Node::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); - fwprintf(gen, STRL("ExpectWeak(")); + fputws(STRL("ExpectWeak("), gen); WriteSymbolOrCode(gen, p->sym); fwprintf(gen, STRL(", %d);\n"), NewCondSet(s1)); delete s1; @@ -221,11 +221,11 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { int acc = Sets::Elements(p->set); if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here - fwprintf(gen, STRL("Get();\n")); + fputws(STRL("Get();\n"), gen); } else { GenErrorMsg(altErr, curSy); if (acc > 0) { - fwprintf(gen, STRL("if (")); GenCond(p->set, p); fwprintf(gen, STRL(") Get(); else SynErr(%d);\n"), errorNr); + fputws(STRL("if ("), gen); GenCond(p->set, p); fwprintf(gen, STRL(") Get(); else SynErr(%d);\n"), errorNr); } else fwprintf(gen, STRL("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); } } if (p->typ == Node::eps) { // nothing @@ -236,55 +236,55 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); - fwprintf(gen, STRL("while (!(")); GenCond(s1, p); fwprintf(gen, STRL(")) {")); - fwprintf(gen, STRL("SynErr(%d); Get();"), errorNr); fwprintf(gen, STRL("}\n")); + fputws(STRL("while (!("), gen); GenCond(s1, p); fputws(STRL(")) {"), gen); + fwprintf(gen, STRL("SynErr(%d); Get();"), errorNr); fputws(STRL("}\n"), gen); delete s1; } if (p->typ == Node::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); delete s1; bool useSwitch = UseSwitch(p); - if (useSwitch) { Indent(indent); fwprintf(gen, STRL("switch (la->kind) {\n")); } + if (useSwitch) { Indent(indent); fputws(STRL("switch (la->kind) {\n"), gen); } p2 = p; while (p2 != NULL) { s1 = tab->Expected(p2->sub, curSy); Indent(indent); if (useSwitch) { - PutCaseLabels(s1); fwprintf(gen, STRL("{\n")); + PutCaseLabels(s1); fputws(STRL("{\n"), gen); } else if (p2 == p) { - fwprintf(gen, STRL("if (")); GenCond(s1, p2->sub); fwprintf(gen, STRL(") {\n")); - } else if (p2->down == NULL && equal) { fwprintf(gen, STRL("} else {\n")); + fputws(STRL("if ("), gen); GenCond(s1, p2->sub); fputws(STRL(") {\n"), gen); + } else if (p2->down == NULL && equal) { fputws(STRL("} else {\n"), gen); } else { - fwprintf(gen, STRL("} else if (")); GenCond(s1, p2->sub); fwprintf(gen, STRL(") {\n")); + fputws(STRL("} else if ("), gen); GenCond(s1, p2->sub); fputws(STRL(") {\n"), gen); } GenCode(p2->sub, indent + 1, s1); if (useSwitch) { - Indent(indent); fwprintf(gen, STRL("\tbreak;\n")); - Indent(indent); fwprintf(gen, STRL("}\n")); + Indent(indent); fputws(STRL("\tbreak;\n"), gen); + Indent(indent); fputws(STRL("}\n"), gen); } p2 = p2->down; delete s1; } Indent(indent); if (equal) { - fwprintf(gen, STRL("}\n")); + fputws(STRL("}\n"), gen); } else { GenErrorMsg(altErr, curSy); if (useSwitch) { fwprintf(gen, STRL("default: SynErr(%d); break;\n"), errorNr); - Indent(indent); fwprintf(gen, STRL("}\n")); + Indent(indent); fputws(STRL("}\n"), gen); } else { - fwprintf(gen, STRL("} ")); fwprintf(gen, STRL("else SynErr(%d);\n"), errorNr); + fputws(STRL("} "), gen); fwprintf(gen, STRL("else SynErr(%d);\n"), errorNr); } } } if (p->typ == Node::iter) { Indent(indent); p2 = p->sub; - fwprintf(gen, STRL("while (")); + fputws(STRL("while ("), gen); if (p2->typ == Node::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); - fwprintf(gen, STRL("WeakSeparator(")); + fputws(STRL("WeakSeparator("), gen); WriteSymbolOrCode(gen, p2->sym); fwprintf(gen, STRL(",%d,%d) "), NewCondSet(s1), NewCondSet(s2)); delete s1; @@ -295,16 +295,16 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { s1 = tab->First(p2); GenCond(s1, p2); } - fwprintf(gen, STRL(") {\n")); + fputws(STRL(") {\n"), gen); GenCode(p2, indent + 1, s1); - Indent(indent); fwprintf(gen, STRL("}\n")); + Indent(indent); fputws(STRL("}\n"), gen); delete s1; } if (p->typ == Node::opt) { s1 = tab->First(p->sub); Indent(indent); - fwprintf(gen, STRL("if (")); GenCond(s1, p->sub); fwprintf(gen, STRL(") {\n")); + fputws(STRL("if ("), gen); GenCond(s1, p->sub); fputws(STRL(") {\n"), gen); GenCode(p->sub, indent + 1, s1); - Indent(indent); fwprintf(gen, STRL("}\n")); + Indent(indent); fputws(STRL("}\n"), gen); delete s1; } if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) @@ -320,7 +320,7 @@ void ParserGen::GenTokensHeader() { int i; bool isFirst = true; - fwprintf(gen, STRL("\tenum {\n")); + fputws(STRL("\tenum {\n"), gen); // tokens for (i=0; iterminals.Count; i++) { @@ -328,7 +328,7 @@ void ParserGen::GenTokensHeader() { if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } - else { fwprintf(gen , STRL(",\n")); } + else { fputws(STRL(",\n"), gen); } fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } @@ -336,25 +336,25 @@ void ParserGen::GenTokensHeader() { // pragmas for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } - else { fwprintf(gen , STRL(",\n")); } + else { fputws(STRL(",\n"), gen); } sym = tab->pragmas[i]; fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } - fwprintf(gen, STRL("\n\t};\n")); + fputws(STRL("\n\t};\n"), gen); // nonterminals - fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n")); + fputws(STRL("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"), gen); isFirst = true; for (i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; if (isFirst) { isFirst = false; } - else { fwprintf(gen , STRL(",\n")); } + else { fputws(STRL(",\n"), gen); } fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); } - fwprintf(gen, STRL("\n\t};\n#endif\n")); + fputws(STRL("\n\t};\n#endif\n"), gen); } @@ -362,11 +362,11 @@ void ParserGen::GenCodePragmas() { Symbol *sym; for (int i=0; ipragmas.Count; i++) { sym = tab->pragmas[i]; - fwprintf(gen, STRL("\t\tif (la->kind == ")); + fputws(STRL("\t\tif (la->kind == "), gen); WriteSymbolOrCode(gen, sym); - fwprintf(gen, STRL(") {\n")); + fputws(STRL(") {\n"), gen); CopySourcePart(sym->semPos, 4); - fwprintf(gen, STRL("\t\t}\n")); + fputws(STRL("\t\t}\n"), gen); } } @@ -385,7 +385,7 @@ void ParserGen::GenProductionsHeader() { curSy = sym; fwprintf(gen, STRL("\tvoid %ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, STRL(");\n")); + fputws(STRL(");\n"), gen); } } @@ -397,21 +397,20 @@ void ParserGen::GenProductions() { curSy = sym; fwprintf(gen, STRL("void Parser::%ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, STRL(") {\n")); + fputws(STRL(") {\n"), gen); CopySourcePart(sym->semPos, 2); - fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n")); + fputws(STRL("#ifdef PARSER_WITH_AST\n"), gen); if(i == 0) fwprintf(gen, STRL("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); else { fwprintf(gen, STRL("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n"), sym->name, sym->name); } - fwprintf(gen, STRL("#endif\n")); + fputws(STRL("#endif\n"), gen); ba.SetAll(false); GenCode(sym->graph, 2, &ba); - fwprintf(gen, STRL("#ifdef PARSER_WITH_AST\n")); - if(i == 0) fwprintf(gen, STRL("\t\tAstPopNonTerminal();\n")); - else fwprintf(gen, STRL("\t\tif(ntAdded) AstPopNonTerminal();\n")); - fwprintf(gen, STRL("#endif\n")); - fwprintf(gen, STRL("}\n\n")); + fputws(STRL("#ifdef PARSER_WITH_AST\n"), gen); + if(i == 0) fputws(STRL("\t\tAstPopNonTerminal();\n"), gen); + else fputws(STRL("\t\tif(ntAdded) AstPopNonTerminal();\n"), gen); + fputws(STRL("#endif\n}\n\n"), gen); } } @@ -420,18 +419,18 @@ void ParserGen::InitSets() { for (int i = 0; i < symSet.Count; i++) { BitArray *s = symSet[i]; - fwprintf(gen, STRL("\t\t{")); + fputws(STRL("\t\t{"), gen); int j = 0; Symbol *sym; for (int k=0; kterminals.Count; k++) { sym = tab->terminals[k]; - if ((*s)[sym->n]) fwprintf(gen, STRL("T,")); else fwprintf(gen, STRL("x,")); + fputws(((*s)[sym->n]) ? STRL("T,") : STRL("x,"), gen); ++j; - if (j%4 == 0) fwprintf(gen, STRL(" ")); + if (j%4 == 0) fputws(STRL(" "), gen); } - if (i == symSet.Count-1) fwprintf(gen, STRL("x}\n")); else fwprintf(gen, STRL("x},\n")); + if (i == symSet.Count-1) fputws(STRL("x}\n"), gen); else fputws(STRL("x},\n"), gen); } - fwprintf(gen, STRL("\t};\n\n")); + fputws(STRL("\t};\n\n"), gen); } void ParserGen::WriteParser () { @@ -459,13 +458,13 @@ void ParserGen::WriteParser () { g.CopyFramePart(STRL("-->headerdef")); - if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, STRL("\n"));} + if (usingPos != NULL) {CopySourcePart(usingPos, 0); fputws(STRL("\n"), gen);} g.CopyFramePart(STRL("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(STRL("-->constantsheader")); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ - fwprintf(gen, STRL("\tint maxT;\n")); + fputws(STRL("\tint maxT;\n"), gen); g.CopyFramePart(STRL("-->declarations")); CopySourcePart(tab->semDeclPos, 0); g.CopyFramePart(STRL("-->productionsheader")); GenProductionsHeader(); g.CopyFramePart(STRL("-->namespace_close")); @@ -484,7 +483,7 @@ void ParserGen::WriteParser () { g.CopyFramePart(STRL("-->pragmas")); GenCodePragmas(); g.CopyFramePart(STRL("-->productions")); GenProductions(); - g.CopyFramePart(STRL("-->parseRoot")); fwprintf(gen, STRL("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, STRL("\tExpect(0);")); + g.CopyFramePart(STRL("-->parseRoot")); fwprintf(gen, STRL("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(STRL("\tExpect(0);"), gen); g.CopyFramePart(STRL("-->constants")); fwprintf(gen, STRL("\tmaxT = %d;\n"), tab->terminals.Count-1); g.CopyFramePart(STRL("-->initialization")); InitSets(); @@ -498,8 +497,7 @@ void ParserGen::WriteParser () { void ParserGen::WriteStatistics () { - fwprintf(trace, STRL("\n")); - fwprintf(trace, STRL("%d terminals\n"), tab->terminals.Count); + fwprintf(trace, STRL("\n%d terminals\n"), tab->terminals.Count); fwprintf(trace, STRL("%d symbols\n"), tab->terminals.Count + tab->pragmas.Count + tab->nonterminals.Count); fwprintf(trace, STRL("%d nodes\n"), tab->nodes.Count); diff --git a/src/Scanner.frame b/src/Scanner.frame index 3c2c5fb..4e111ba 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -53,12 +53,12 @@ Scanner.h Specification #define wchar_t char #define SFMT_LCHR "%c" #define SFMT_SLCHR "%c" -#define SFMT_HSTR "%s" #define SFMT_LSTR "%s" #define SFMT_SLSTR "%s" #define SFMT_LS "s" #define STRL(s) s #define CHL(s) s +#define fputws fputs #define wprintf printf #define swprintf snprintf #define fwprintf fprintf @@ -72,7 +72,6 @@ Scanner.h Specification #define wcsrchr strrchr #else #include -#define SFMT_HSTR "%hs" #define SFMT_LSTR "%ls" #define SFMT_SLSTR L"%ls" #define SFMT_LS "ls" diff --git a/src/Scanner.h b/src/Scanner.h index 0431ac9..1ad2a10 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -48,12 +48,12 @@ Coco/R itself) does not fall under the GNU General Public License. #define wchar_t char #define SFMT_LCHR "%c" #define SFMT_SLCHR "%c" -#define SFMT_HSTR "%s" #define SFMT_LSTR "%s" #define SFMT_SLSTR "%s" #define SFMT_LS "s" #define STRL(s) s #define CHL(s) s +#define fputws fputs #define wprintf printf #define swprintf snprintf #define fwprintf fprintf @@ -67,7 +67,6 @@ Coco/R itself) does not fall under the GNU General Public License. #define wcsrchr strrchr #else #include -#define SFMT_HSTR "%hs" #define SFMT_LSTR "%ls" #define SFMT_SLSTR L"%ls" #define SFMT_LS "ls" diff --git a/src/Tab.cpp b/src/Tab.cpp index fb2e727..6411e84 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -118,20 +118,21 @@ void Tab::PrintSym(const Symbol *sym) { fwprintf(trace, STRL("%3d %14s %s"), sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); - if (sym->attrPos==NULL) fwprintf(trace, STRL(" false ")); else fwprintf(trace, STRL(" true ")); + if (sym->attrPos==NULL) fputws(STRL(" false "), trace); else fputws(STRL(" true "), trace); if (sym->typ == Node::nt) { fwprintf(trace, STRL("%5d"), Num(sym->graph)); - if (sym->deletable) fwprintf(trace, STRL(" true ")); else fwprintf(trace, STRL(" false ")); + if (sym->deletable) fputws(STRL(" true "), trace); else fputws(STRL(" false "), trace); } else - fwprintf(trace, STRL(" ")); + fputws(STRL(" "), trace); fwprintf(trace, STRL("%5d %s\n"), sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { - fwprintf(trace, STRL("Symbol Table:\n")); - fwprintf(trace, STRL("------------\n\n")); - fwprintf(trace, STRL(" nr name typ hasAt graph del line tokenKind\n")); + fwprintf(trace, STRL("%s"), + "Symbol Table:\n" + "------------\n\n" + " nr name typ hasAt graph del line tokenKind\n"); Symbol *sym; int i; @@ -149,8 +150,9 @@ void Tab::PrintSymbolTable() { } - fwprintf(trace, STRL("\nLiteral Tokens:\n")); - fwprintf(trace, STRL("--------------\n")); + fwprintf(trace, STRL("%s"), + "\nLiteral Tokens:\n" + "--------------\n"); Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { @@ -158,7 +160,7 @@ void Tab::PrintSymbolTable() { fwprintf(trace, STRL("_%ls = %ls.\n"), ((Symbol*) (e->val))->name, e->key); } delete iter; - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); } void Tab::PrintSet(const BitArray *s, int indent) { @@ -170,15 +172,15 @@ void Tab::PrintSet(const BitArray *s, int indent) { if ((*s)[sym->n]) { len = coco_string_length(sym->name); if (col + len >= 80) { - fwprintf(trace, STRL("\n")); - for (col = 1; col < indent; col++) fwprintf(trace, STRL(" ")); + fputws(STRL("\n"), trace); + for (col = 1; col < indent; col++) fputws(STRL(" "), trace); } fwprintf(trace, STRL("%ls "), sym->name); col += len + 1; } } - if (col == indent) fwprintf(trace, STRL("-- empty set --")); - fwprintf(trace, STRL("\n")); + if (col == indent) fputws(STRL("-- empty set --"), trace); + fputws(STRL("\n"), trace); } //--------------------------------------------------------------------- @@ -352,11 +354,12 @@ wchar_t* Tab::Name(const wchar_t *name) { } void Tab::PrintNodes() { - fwprintf(trace, STRL("Graph nodes:\n")); - fwprintf(trace, STRL("----------------------------------------------------\n")); - fwprintf(trace, STRL(" n type name next down sub pos line\n")); - fwprintf(trace, STRL(" val code\n")); - fwprintf(trace, STRL("----------------------------------------------------\n")); + fwprintf(trace, STRL("%s"), + "Graph nodes:\n" + "----------------------------------------------------\n" + " n type name next down sub pos line\n" + " val code\n" + "----------------------------------------------------\n"); Node *p; wchar_t_10 format; @@ -372,7 +375,7 @@ void Tab::PrintNodes() { wchar_t *paddedName = Name(c->name); fwprintf(trace, STRL("%12s "), paddedName); coco_string_delete(paddedName); - } else fwprintf(trace, STRL(" ")); + } else fputws(STRL(" "), trace); fwprintf(trace, STRL("%5d "), Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { @@ -390,7 +393,7 @@ void Tab::PrintNodes() { } fwprintf(trace, STRL("%5d\n"), p->line); } - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); } //--------------------------------------------------------------------- @@ -469,14 +472,14 @@ void Tab::WriteCharClasses () { wchar_t* format2 = coco_string_create_append(c->name, STRL(" ")); wchar_t* format = coco_string_create(format2, 0, 10); coco_string_merge(format, STRL(": ")); - fwprintf(trace, format); + fputws(format, trace); WriteCharSet(c->set); - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); coco_string_delete(format); coco_string_delete(format2); } - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); } //--------------------------------------------------------------------- @@ -527,9 +530,9 @@ BitArray* Tab::First(const Node *p) { BitArray mark(nodes.Count); BitArray *fs = First0(p, &mark); if (ddt[3]) { - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); if (p != NULL) fwprintf(trace, STRL("First: node = %d\n"), p->n ); - else fwprintf(trace, STRL("First: node = null\n")); + else fputws(STRL("First: node = null\n"), trace); PrintSet(fs, 0); } return fs; @@ -773,23 +776,25 @@ void Tab::CompSymbolSets() { CompFollowSets(); CompSyncSets(); if (ddt[1]) { - fwprintf(trace, STRL("\n")); - fwprintf(trace, STRL("First & follow symbols:\n")); - fwprintf(trace, STRL("----------------------\n\n")); + fwprintf(trace, STRL("%s"), + "\n" + "First & follow symbols:\n" + "----------------------\n\n"); Symbol *sym; for (int i=0; iname); - fwprintf(trace, STRL("first: ")); PrintSet(sym->first, 10); - fwprintf(trace, STRL("follow: ")); PrintSet(sym->follow, 10); - fwprintf(trace, STRL("\n")); + fputws(STRL("first: "), trace); PrintSet(sym->first, 10); + fputws(STRL("follow: "), trace); PrintSet(sym->follow, 10); + fputws(STRL("\n"), trace); } } if (ddt[4]) { - fwprintf(trace, STRL("\n")); - fwprintf(trace, STRL("ANY and SYNC sets:\n")); - fwprintf(trace, STRL("-----------------\n")); + fwprintf(trace, STRL("%s"), + "\n" + "ANY and SYNC sets:\n" + "-----------------\n"); Node *p; for (int i=0; iname, curSy->line, curSy->col); if (sym != NULL) wprintf(STRL("%ls is "), sym->name); switch (cond) { - case 1: wprintf(STRL("start of several alternatives\n")); break; - case 2: wprintf(STRL("start & successor of deletable structure\n")); break; - case 3: wprintf(STRL("an ANY node that matches no symbol\n")); break; - case 4: wprintf(STRL("contents of [...] or {...} must not be deletable\n")); break; + case 1: wprintf(STRL("%s"), "start of several alternatives\n"); break; + case 2: wprintf(STRL("%s"), "start & successor of deletable structure\n"); break; + case 3: wprintf(STRL("%s"), "an ANY node that matches no symbol\n"); break; + case 4: wprintf(STRL("%s"), "contents of [...] or {...} must not be deletable\n"); break; } } @@ -1292,9 +1297,10 @@ void Tab::XRef() { } } // print cross reference list - fwprintf(trace, STRL("\n")); - fwprintf(trace, STRL("Cross reference list:\n")); - fwprintf(trace, STRL("--------------------\n\n")); + fwprintf(trace, STRL("%s"), + "\n" + "Cross reference list:\n" + "--------------------\n\n"); for (i=0; iCount; j++) { line = (int)(ssize_t)((*list)[j]); if (col + 5 > 80) { - fwprintf(trace, STRL("\n")); - for (col = 1; col <= 14; col++) fwprintf(trace, STRL(" ")); + fputws(STRL("\n"), trace); + for (col = 1; col <= 14; col++) fputws(STRL(" "), trace); } fwprintf(trace, STRL("%5d"), line); col += 5; } - fwprintf(trace, STRL("\n")); + fputws(STRL("\n"), trace); } - fwprintf(trace, STRL("\n\n")); + fputws(STRL("\n\n"), trace); for(int i=0; i < xref.Count; ++i) { SortedEntry *se = xref[i]; /* From d795f694c801f3919421794da790d222d7d56c8d Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 11:34:39 +0200 Subject: [PATCH 38/95] Move 'ArrayList' to Scanner.frame to use in the AST (parser tree) generation also replace any ArrayList with typed ones. --- src/ArrayList.h | 117 ---------------------------------------------- src/DFA.cpp | 2 +- src/Parser.frame | 4 +- src/Parser.h | 4 +- src/ParserGen.cpp | 1 - src/ParserGen.h | 2 +- src/Scanner.frame | 77 ++++++++++++++++++++++++++++++ src/Scanner.h | 77 ++++++++++++++++++++++++++++++ src/Tab.cpp | 38 +++++++-------- src/Tab.h | 13 +++--- 10 files changed, 185 insertions(+), 150 deletions(-) delete mode 100644 src/ArrayList.h diff --git a/src/ArrayList.h b/src/ArrayList.h deleted file mode 100644 index e3b7c4d..0000000 --- a/src/ArrayList.h +++ /dev/null @@ -1,117 +0,0 @@ -/*------------------------------------------------------------------------- -Compiler Generator Coco/R, -Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz -extended by M. Loeberbauer & A. Woess, Univ. of Linz -ported to C++ by Csaba Balazs, University of Szeged -with improvements by Pat Terry, Rhodes University - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -As an exception, it is allowed to write an extension of Coco/R that is -used as a plugin in non-free software. - -If not otherwise stated, any source code generated by Coco/R (other than -Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ - -#if !defined(COCO_ARRAYLIST_H__) -#define COCO_ARRAYLIST_H__ - -#include - -namespace Coco { - -template -class TArrayList -{ - T** Data; -public: - typedef int tsize_t; - tsize_t Count; - tsize_t Capacity; - - TArrayList() { - Count = 0; - Capacity = 10; - Data = new T*[ Capacity ]; - } - virtual ~TArrayList() { - delete [] Data; - } - - void Add(T *value) { - if (Count < Capacity) { - Data[Count] = value; - Count++; - } else { - Capacity *= 2; - T** newData = new T*[Capacity]; - for (tsize_t i=0; i ArrayList; - -}; // namespace - -#endif // !defined(COCO_ARRAYLIST_H__) diff --git a/src/DFA.cpp b/src/DFA.cpp index 4789fac..44630f2 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -612,7 +612,7 @@ const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in void DFA::GenLiterals () { Symbol *sym; - TArrayList *ts[2]; + TArrayList *ts[2]; ts[0] = &tab->terminals; ts[1] = &tab->pragmas; diff --git a/src/Parser.frame b/src/Parser.frame index 038452d..068a42e 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -48,7 +48,7 @@ struct SynTree { ~SynTree(); Token *tok; - TArrayList children; + TArrayList children; void dump(int indent=0, bool isLast=false); void dump2(int maxT, int indent=0, bool isLast=false); @@ -92,7 +92,7 @@ public: #ifdef PARSER_WITH_AST SynTree *ast_root; - TArrayList ast_stack; + TArrayList ast_stack; void AstAddTerminal(); bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); void AstPopNonTerminal(); diff --git a/src/Parser.h b/src/Parser.h index 90ff828..02515d8 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -48,7 +48,7 @@ struct SynTree { ~SynTree(); Token *tok; - TArrayList children; + TArrayList children; void dump(int indent=0, bool isLast=false); void dump2(int maxT, int indent=0, bool isLast=false); @@ -125,7 +125,7 @@ class Parser { #ifdef PARSER_WITH_AST SynTree *ast_root; - TArrayList ast_stack; + TArrayList ast_stack; void AstAddTerminal(); bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); void AstPopNonTerminal(); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 9aeb73b..46badf9 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -28,7 +28,6 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include -#include "ArrayList.h" #include "ParserGen.h" #include "Parser.h" #include "BitArray.h" diff --git a/src/ParserGen.h b/src/ParserGen.h index e8ceb14..24c7514 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -60,7 +60,7 @@ class ParserGen FILE* fram; // parser frame file FILE* gen; // generated parser source file wchar_t* err; // generated parser error messages - TArrayList symSet; + TArrayList symSet; Tab *tab; // other Coco objects FILE* trace; diff --git a/src/Scanner.frame b/src/Scanner.frame index 4e111ba..8a63358 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -39,6 +39,7 @@ Scanner.h Specification #include #include #include +#include #include // io.h and fcntl are used to ensure binary read from streams on windows @@ -128,6 +129,82 @@ wchar_t* coco_string_create(const char *value); char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +template +class TArrayList +{ + T *Data; +public: + typedef int tsize_t; + tsize_t Count; + tsize_t Capacity; + + TArrayList() { + Count = 0; + Capacity = 10; + Data = new T[ Capacity ]; + } + virtual ~TArrayList() { + delete [] Data; + } + + void Add(T value) { + if (Count < Capacity) { + Data[Count] = value; + Count++; + } else { + Capacity *= 2; + T* newData = new T[Capacity]; + for (tsize_t i=0; i #include #include +#include #include // io.h and fcntl are used to ensure binary read from streams on windows @@ -124,6 +125,82 @@ wchar_t* coco_string_create(const char *value); char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +template +class TArrayList +{ + T *Data; +public: + typedef int tsize_t; + tsize_t Count; + tsize_t Capacity; + + TArrayList() { + Count = 0; + Capacity = 10; + Data = new T[ Capacity ]; + } + virtual ~TArrayList() { + delete [] Data; + } + + void Add(T value) { + if (Count < Capacity) { + Data[Count] = value; + Count++; + } else { + Capacity *= 2; + T* newData = new T[Capacity]; + for (tsize_t i=0; i &singles, const Node *rule) { if (p == NULL) return; // end of graph if (p->typ == Node::nt) { - if (p->up || DelGraph(p->next) || p->sym->graph == rule) singles->Add(p->sym); + if (p->up || DelGraph(p->next) || p->sym->graph == rule) singles.Add(p->sym); } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { if (p->up || DelGraph(p->next)) { GetSingles(p->sub, singles, rule); @@ -938,18 +938,18 @@ void Tab::GetSingles(const Node *p, ArrayList *singles, const Node *rule) { bool Tab::NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; - ArrayList list; + TArrayList list; Symbol *sym; int i; for (i=0; igraph, &singles, sym->graph); // get nonterminals s such that sym-->s + TArrayList singles; + GetSingles(sym->graph, singles, sym->graph); // get nonterminals s such that sym-->s Symbol *s; for (int j=0; jleft == m->right) onRightSide = true; if (n->right == m->left) onLeftSide = true; } @@ -975,11 +975,11 @@ bool Tab::NoCircularProductions() { ok = true; for (i=0; icount++; wprintf(STRL(" %ls --> %ls"), n->left->name, n->right->name); } - for(int i=0; iAdd((void*)(ssize_t)(-sym->line)); + TArrayList *list = (TArrayList*)(xref.Get(sym)); + if (list == NULL) {list = new TArrayList(); xref.Set(sym, list);} + list->Add(-sym->line); } // collect lines where symbols have been referenced Node *n; for (i=0; ityp == Node::t || n->typ == Node::wt || n->typ == Node::nt) { - ArrayList *list = (ArrayList*)(xref.Get(n->sym)); - if (list == NULL) {list = new ArrayList(); xref.Set(n->sym, list);} - list->Add((void*)(ssize_t)n->line); + TArrayList *list = (TArrayList*)(xref.Get(n->sym)); + if (list == NULL) {list = new TArrayList(); xref.Set(n->sym, list);} + list->Add(n->line); } } // print cross reference list @@ -1307,11 +1307,11 @@ void Tab::XRef() { wchar_t *paddedName = Name(sym->name); fwprintf(trace, STRL(" %12ls"), paddedName); coco_string_delete(paddedName); - ArrayList *list = (ArrayList*)(xref.Get(sym)); + TArrayList *list = (TArrayList*)(xref.Get(sym)); int col = 14; int line; for (j=0; jCount; j++) { - line = (int)(ssize_t)((*list)[j]); + line = (*list)[j]; if (col + 5 > 80) { fputws(STRL("\n"), trace); for (col = 1; col <= 14; col++) fputws(STRL(" "), trace); @@ -1330,7 +1330,7 @@ void Tab::XRef() { se->next = tmp; } */ - delete (ArrayList*)se->Value; + delete (TArrayList*)se->Value; } } diff --git a/src/Tab.h b/src/Tab.h index 590e08f..129fac7 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -30,7 +30,6 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_TAB_H__) #define COCO_TAB_H__ -#include "ArrayList.h" #include "HashTable.h" #include "StringBuilder.h" #include "SortedList.h" @@ -76,16 +75,16 @@ class Tab { Errors *errors; - TArrayList terminals; - TArrayList pragmas; - TArrayList nonterminals; + TArrayList terminals; + TArrayList pragmas; + TArrayList nonterminals; - TArrayList nodes; + TArrayList nodes; static const char* nTyp[]; Node *dummyNode; - TArrayList classes; + TArrayList classes; int dummyName; Tab(Parser *parser); @@ -198,7 +197,7 @@ class Tab { } }; - void GetSingles(const Node *p, ArrayList *singles, const Node *rule); + void GetSingles(const Node *p, TArrayList &singles, const Node *rule); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- From 88dc67fe2fe51ea8db54b4c74b915fcee0f06652 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 11:53:03 +0200 Subject: [PATCH 39/95] Replace STRL and CHL by the unified _SC macro --- src/Action.cpp | 2 +- src/Coco.atg | 76 ++++++------- src/Coco.cpp | 24 ++-- src/DFA.cpp | 248 +++++++++++++++++++++--------------------- src/Generator.cpp | 26 ++--- src/Makefile | 2 +- src/Parser.cpp | 208 +++++++++++++++++------------------ src/Parser.frame | 28 ++--- src/Parser.h | 2 +- src/ParserGen.cpp | 228 +++++++++++++++++++------------------- src/Scanner.cpp | 118 ++++++++++---------- src/Scanner.frame | 36 +++--- src/Scanner.h | 8 +- src/StringBuilder.cpp | 2 +- src/Tab.cpp | 240 ++++++++++++++++++++-------------------- 15 files changed, 622 insertions(+), 626 deletions(-) diff --git a/src/Action.cpp b/src/Action.cpp index b2b7806..b8c57e5 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -80,7 +80,7 @@ bool Action::ShiftWith(CharSet *s, Tab *tab) { //return true if it used the Char } else { CharClass *c = tab->FindCharClass(s); if (c == NULL) { - c = tab->NewCharClass(STRL("#"), s); // class with dummy name + c = tab->NewCharClass(_SC("#"), s); // class with dummy name rc = true; } typ = Node::clas; sym = c->n; diff --git a/src/Coco.atg b/src/Coco.atg index 6e2b6c0..31ad7a5 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -60,7 +60,7 @@ COMPILER Coco id = 0; str = 1; tokenString = NULL; - noString = coco_string_create(STRL("-none-")); + noString = coco_string_create(_SC("-none-")); ignoreGammarErrors = false; } @@ -127,7 +127,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); - if (sym != NULL) SemErr(STRL("name declared twice")); + if (sym != NULL) SemErr(_SC("name declared twice")); else { sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -154,8 +154,8 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(STRL("name declared twice")); - } else SemErr(STRL("this symbol kind not allowed on left side of production")); + if (sym->graph != NULL) SemErr(_SC("name declared twice")); + } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); @@ -163,7 +163,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra .) [ AttrDecl ] (. if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(STRL("attribute mismatch between declaration and use of this symbol")); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); .) [ SemText<.sym->semPos.> ] WEAK '=' @@ -175,22 +175,22 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra '.' } "END" ident (. if (!coco_string_equal(gramName, t->val)) - SemErr(STRL("name does not match grammar name")); + SemErr(_SC("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(STRL("missing production for grammar name")); + SemErr(_SC("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(STRL("grammar symbol must not have attributes")); + SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, STRL("???"), 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors.count == 0) { - wprintf(STRL("checking\n")); + wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); bool doGenCode = false; @@ -200,14 +200,14 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra } else doGenCode = tab->GrammarOk(); if (doGenCode) { - wprintf(STRL("parser")); + wprintf(_SC("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(STRL(" + scanner")); + wprintf(_SC(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(STRL(" generated\n")); + wprintf(_SC(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } @@ -222,9 +222,9 @@ SetDecl (. CharSet *s; .) = ident (. wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(STRL("name declared twice")); + if (c != NULL) SemErr(_SC("name declared twice")); .) - '=' Set (. if (s->Elements() == 0) SemErr(STRL("character set must not be empty")); + '=' Set (. if (s->Elements() == 0) SemErr(_SC("character set must not be empty")); tab->NewCharClass(name, s); coco_string_delete(name); .) @@ -246,7 +246,7 @@ Set (. CharSet *s2; .) SimSet (. int n1, n2; .) = (. s = new CharSet(); .) ( ident (. CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(STRL("undefined name")); else s->Or(c->set); + if (c == NULL) SemErr(_SC("undefined name")); else s->Or(c->set); .) | string (. wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); @@ -257,7 +257,7 @@ SimSet (. int n1, n2; .) for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { - if ((CHL('A') <= ch) && (ch <= CHL('Z'))) ch = ch - (CHL('A') - CHL('a')); // ch.ToLower() + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) ch = ch - (_SC('A') - _SC('a')); // ch.ToLower() } s->Set(ch); } @@ -281,7 +281,7 @@ Char // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(STRL("unacceptable character value")); + else SemErr(_SC("unacceptable character value")); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) @@ -292,7 +292,7 @@ Char TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) = Sym (. sym = tab->FindSym(name); - if (sym != NULL) SemErr(STRL("name declared twice")); + if (sym != NULL) SemErr(_SC("name declared twice")); else { sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -301,13 +301,13 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; coco_string_delete(tokenString); .) SYNC - ( '=' TokenExpr '.' (. if (kind == str) SemErr(STRL("a literal must not be declared with a structure")); + ( '=' TokenExpr '.' (. if (kind == str) SemErr(_SC("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if (tab->literals[tokenString] != NULL) - SemErr(STRL("token string declared twice")); + SemErr(_SC("token string declared twice")); tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } @@ -317,7 +317,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(STRL("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(STRL("semantic action not allowed here")); .) + [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(_SC("semantic action not allowed here")); .) ] . @@ -327,13 +327,13 @@ AttrDecl = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(STRL("bad string in attributes")); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } '>' (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(STRL("bad string in attributes")); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } ".>" (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) @@ -387,26 +387,26 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(STRL("undefined string in production")); + SemErr(_SC("undefined string in production")); sym = tab->eofSy; // dummy } } coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) - SemErr(STRL("this symbol kind is not allowed in a production")); + SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { if (typ == Node::t) typ = Node::wt; - else SemErr(STRL("only terminals may be weak")); + else SemErr(_SC("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); .) - [ Attribs

(. if (kind != id) SemErr(STRL("a literal must not have attributes")); .) + [ Attribs

(. if (kind != id) SemErr(_SC("a literal must not have attributes")); .) ] (. if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(STRL("attribute mismatch between declaration and use of this symbol")); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); .) | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) @@ -473,7 +473,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) ( Sym (. if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(STRL("undefined name")); + SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; @@ -499,15 +499,15 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) /*------------------------------------------------------------------------------------*/ Sym -= (. name = coco_string_create(STRL("???")); kind = id; .) += (. name = coco_string_create(_SC("???")); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); - name = coco_string_create_append(STRL("\""), subName); + name = coco_string_create_append(_SC("\""), subName); coco_string_delete(subName); - coco_string_merge(name, STRL("\"")); + coco_string_merge(name, _SC("\"")); .) ) (. kind = str; if (dfa->ignoreCase) { @@ -516,7 +516,7 @@ Sym coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) - SemErr(STRL("literal tokens must not contain blanks")); .) + SemErr(_SC("literal tokens must not contain blanks")); .) ) . @@ -526,12 +526,12 @@ Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(STRL("bad string in attributes")); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(STRL("bad string in attributes")); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) . @@ -542,8 +542,8 @@ SemText = "(." (. int beg = la->pos; int col = la->col; int line = t->line; .) { ANY - | badString (. SemErr(STRL("bad string in semantic action")); .) - | "(." (. SemErr(STRL("missing end of previous semantic action")); .) + | badString (. SemErr(_SC("bad string in semantic action")); .) + | "(." (. SemErr(_SC("missing end of previous semantic action")); .) } ".)" (. pos = new Position(beg, t->pos, col, line); .) . diff --git a/src/Coco.cpp b/src/Coco.cpp index fce8b14..794bc8c 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -61,7 +61,7 @@ int main(int argc, char *argv_[]) { #error unknown compiler! #endif - wprintf(STRL("%s"), "Coco/R (Dec 01, 2018)\n"); + wprintf(_SC("%s"), "Coco/R (Dec 01, 2018)\n"); wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; @@ -69,12 +69,12 @@ int main(int argc, char *argv_[]) { bool emitLines = false, ignoreGammarErrors = false; for (int i = 1; i < argc; i++) { - if (coco_string_equal(argv[i], STRL("-namespace")) && i < argc - 1) nsName = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], STRL("-frames")) && i < argc - 1) frameDir = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], STRL("-trace")) && i < argc - 1) ddtString = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], STRL("-o")) && i < argc - 1) outDir = coco_string_create_append(argv[++i], STRL("/")); - else if (coco_string_equal(argv[i], STRL("-lines"))) emitLines = true; - else if (coco_string_equal(argv[i], STRL("-ignoreGammarErrors"))) ignoreGammarErrors = true; + if (coco_string_equal(argv[i], _SC("-namespace")) && i < argc - 1) nsName = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-frames")) && i < argc - 1) frameDir = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-trace")) && i < argc - 1) ddtString = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-o")) && i < argc - 1) outDir = coco_string_create_append(argv[++i], _SC("/")); + else if (coco_string_equal(argv[i], _SC("-lines"))) emitLines = true; + else if (coco_string_equal(argv[i], _SC("-ignoreGammarErrors"))) ignoreGammarErrors = true; else srcName = coco_string_create(argv[i]); } @@ -94,11 +94,11 @@ int main(int argc, char *argv_[]) { Coco::Scanner scanner(file); Coco::Parser parser(&scanner); - traceFileName = coco_string_create_append(srcDir, STRL("trace.txt")); + traceFileName = coco_string_create_append(srcDir, _SC("trace.txt")); chTrFileName = coco_string_create_char(traceFileName); if ((parser.trace = fopen(chTrFileName, "w")) == NULL) { - wprintf(STRL("-- could not open %s\n"), chTrFileName); + wprintf(_SC("-- could not open %s\n"), chTrFileName); exit(1); } @@ -130,19 +130,19 @@ int main(int argc, char *argv_[]) { if (fileSize == 0) { remove(chTrFileName); } else { - wprintf(STRL("trace output is in %s\n"), chTrFileName); + wprintf(_SC("trace output is in %s\n"), chTrFileName); } coco_string_delete(file); coco_string_delete(srcDir); - wprintf(STRL("%d errors detected\n"), parser.errors.count); + wprintf(_SC("%d errors detected\n"), parser.errors.count); if (parser.errors.count != 0) { exit(1); } } else { - wprintf(STRL("%s"), + wprintf(_SC("%s"), "Usage: Coco Grammar.ATG {Option}\n" "Options:\n" " -namespace \n" diff --git a/src/DFA.cpp b/src/DFA.cpp index 44630f2..0c31117 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -43,17 +43,17 @@ typedef wchar_t wchar_t_20[20]; //---------- Output primitives static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { - if (ch < CHL(' ') || ch >= 127 || ch == CHL('\'') || ch == CHL('\\')) - coco_swprintf(format, 10, STRL("%d\0"), (int) ch); + if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) + coco_swprintf(format, 10, _SC("%d\0"), (int) ch); else - coco_swprintf(format, 10, STRL("CHL('%lc')\0"), (int) ch); + coco_swprintf(format, 10, _SC("_SC('%lc')\0"), (int) ch); return format; } static wchar_t* DFAChCond(wchar_t ch, wchar_t_20 &format) { wchar_t_10 fmt; wchar_t* res = DFACh(ch, fmt); - coco_swprintf(format, 20, STRL("ch == %ls\0"), res); + coco_swprintf(format, 20, _SC("ch == %ls\0"), res); return format; } @@ -62,16 +62,16 @@ void DFA::PutRange(CharSet *s) { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { wchar_t *from = DFACh((wchar_t) r->from, fmt1); - fwprintf(gen, STRL("ch == %ls"), from); + fwprintf(gen, _SC("ch == %ls"), from); } else if (r->from == 0) { wchar_t *to = DFACh((wchar_t) r->to, fmt1); - fwprintf(gen, STRL("ch <= %ls"), to); + fwprintf(gen, _SC("ch <= %ls"), to); } else { wchar_t *from = DFACh((wchar_t) r->from, fmt1); wchar_t *to = DFACh((wchar_t) r->to, fmt2); - fwprintf(gen, STRL("(ch >= %ls && ch <= %ls)"), from, to); + fwprintf(gen, _SC("(ch >= %ls && ch <= %ls)"), from, to); } - if (r->next != NULL) fputws(STRL(" || "), gen); + if (r->next != NULL) fputws(_SC(" || "), gen); } } @@ -161,7 +161,7 @@ void DFA::Step(State *from, const Node *p, BitArray *stepped) { Step(from, p->sub, stepped); Step(from, p->down, stepped); } else if (p->typ == Node::iter) { if (tab->DelSubGraph(p->sub)) { - parser->SemErr(STRL("contents of {...} must not be deletable")); + parser->SemErr(_SC("contents of {...} must not be deletable")); return; } if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); @@ -227,7 +227,7 @@ void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { void DFA::ConvertToStates(Node *p, Symbol *sym) { curGraph = p; curSy = sym; if (tab->DelGraph(curGraph)) { - parser->SemErr(STRL("token might be empty")); + parser->SemErr(_SC("token might be empty")); return; } NumberNodes(curGraph, firstState, true); @@ -269,7 +269,7 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off wchar_t format[200]; - coco_swprintf(format, 200, STRL("tokens %ls and %ls cannot be distinguished"), sym->name, matchedSym->name); + coco_swprintf(format, 200, _SC("tokens %ls and %ls cannot be distinguished"), sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; @@ -373,30 +373,30 @@ void DFA::MakeDeterministic() { } void DFA::PrintStates() { - fwprintf(trace, STRL("\n---------- states ----------\n")); + fwprintf(trace, _SC("\n---------- states ----------\n")); wchar_t_10 fmt; for (State *state = firstState; state != NULL; state = state->next) { bool first = true; - if (state->endOf == NULL) fputws(STRL(" "), trace); + if (state->endOf == NULL) fputws(_SC(" "), trace); else { wchar_t *paddedName = tab->Name(state->endOf->name); - fwprintf(trace, STRL("E(%12s)"), paddedName); + fwprintf(trace, _SC("E(%12s)"), paddedName); coco_string_delete(paddedName); } - fwprintf(trace, STRL("%3d:"), state->nr); - if (state->firstAction == NULL) fputws(STRL("\n"), trace); + fwprintf(trace, _SC("%3d:"), state->nr); + if (state->firstAction == NULL) fputws(_SC("\n"), trace); for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (first) {fputws(STRL(" "), trace); first = false;} else fputws(STRL(" "), trace); + if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); - if (action->typ == Node::clas) fwprintf(trace, STRL("%ls"), tab->classes[action->sym]->name); - else fwprintf(trace, STRL("%3s"), DFACh((wchar_t)action->sym, fmt)); + if (action->typ == Node::clas) fwprintf(trace, _SC("%ls"), tab->classes[action->sym]->name); + else fwprintf(trace, _SC("%3s"), DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { - fwprintf(trace, STRL("%3d"), targ->state->nr); + fwprintf(trace, _SC("%3d"), targ->state->nr); } - if (action->tc == Node::contextTrans) fputws(STRL(" context\n"), trace); else fputws(STRL("\n"), trace); + if (action->tc == Node::contextTrans) fputws(_SC(" context\n"), trace); else fputws(_SC("\n"), trace); } } - fputws(STRL("\n---------- character classes ----------\n"), trace); + fputws(_SC("\n---------- character classes ----------\n"), trace); tab->WriteCharClasses(); } @@ -426,7 +426,7 @@ void DFA::GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, b endOf = t->state->endOf; } else { - wprintf(STRL("Tokens %ls and %ls cannot be distinguished\n"), endOf->name, t->state->endOf->name); + wprintf(_SC("Tokens %ls and %ls cannot be distinguished\n"), endOf->name, t->state->endOf->name); errors->count++; } } @@ -482,15 +482,15 @@ wchar_t* DFA::CommentStr(const Node *p) { s.Append((wchar_t)p->val); } else if (p->typ == Node::clas) { CharSet *set = tab->CharClassSet(p->val); - if (set->Elements() != 1) parser->SemErr(STRL("character set contains more than 1 character")); + if (set->Elements() != 1) parser->SemErr(_SC("character set contains more than 1 character")); s.Append((wchar_t) set->First()); } - else parser->SemErr(STRL("comment delimiters may not be structured")); + else parser->SemErr(_SC("comment delimiters may not be structured")); p = p->next; } if (s.GetLength() == 0 || s.GetLength() > 8) { - parser->SemErr(STRL("comment delimiters must be 1 or 8 characters long")); - s = StringBuilder(STRL("?")); + parser->SemErr(_SC("comment delimiters must be 1 or 8 characters long")); + s = StringBuilder(_SC("?")); } return s.ToString(); } @@ -505,22 +505,22 @@ void DFA::NewComment(const Node *from, const Node *to, bool nested) { //------------------------ scanner generation ---------------------- void DFA::GenCommentIndented(int n, const wchar_t *s) { - for(int i= 1; i < n; ++i) fputws(STRL("\t"), gen); + for(int i= 1; i < n; ++i) fputws(_SC("\t"), gen); fputws(s, gen); } void DFA::GenComBody(const Comment *com) { int imax = coco_string_length(com->start)-1; int imaxStop = coco_string_length(com->stop)-1; - GenCommentIndented(imax, STRL("\t\tfor(;;) {\n")); + GenCommentIndented(imax, _SC("\t\tfor(;;) {\n")); wchar_t_20 fmt; wchar_t* res = DFAChCond(com->stop[0], fmt); - GenCommentIndented(imax, STRL("\t\t\tif (")); - fwprintf(gen, STRL("%ls) {\n"), res); + GenCommentIndented(imax, _SC("\t\t\tif (")); + fwprintf(gen, _SC("%ls) {\n"), res); if (imaxStop == 0) { - fwprintf(gen, STRL("%s"), + fwprintf(gen, _SC("%s"), "\t\t\t\tlevel--;\n" "\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n" "\t\t\t\tNextCh();\n"); @@ -528,51 +528,51 @@ void DFA::GenComBody(const Comment *com) { int currIndent, indent = imax - 1; for(int sidx = 1; sidx <= imaxStop; ++sidx) { currIndent = indent + sidx; - GenCommentIndented(currIndent, STRL("\t\t\t\tNextCh();\n")); - GenCommentIndented(currIndent, STRL("\t\t\t\tif (")); - fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->stop[sidx], fmt)); + GenCommentIndented(currIndent, _SC("\t\t\t\tNextCh();\n")); + GenCommentIndented(currIndent, _SC("\t\t\t\tif (")); + fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->stop[sidx], fmt)); } currIndent = indent + imax; - GenCommentIndented(currIndent, STRL("\t\t\tlevel--;\n")); - GenCommentIndented(currIndent, STRL("\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n")); - GenCommentIndented(currIndent, STRL("\t\t\tNextCh();\n")); + GenCommentIndented(currIndent, _SC("\t\t\tlevel--;\n")); + GenCommentIndented(currIndent, _SC("\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n")); + GenCommentIndented(currIndent, _SC("\t\t\tNextCh();\n")); for(int sidx = imaxStop; sidx > 0; --sidx) { - GenCommentIndented(indent + sidx, STRL("\t\t\t\t}\n")); + GenCommentIndented(indent + sidx, _SC("\t\t\t\t}\n")); } } if (com->nested) { - GenCommentIndented(imax, STRL("\t\t\t}")); + GenCommentIndented(imax, _SC("\t\t\t}")); wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, STRL(" else if (%ls) {\n"), res); + fwprintf(gen, _SC(" else if (%ls) {\n"), res); if (imaxStop == 0) - fputws(STRL("\t\t\tlevel++; NextCh();\n"), gen); + fputws(_SC("\t\t\tlevel++; NextCh();\n"), gen); else { int indent = imax - 1; for(int sidx = 1; sidx <= imax; ++sidx) { int loopIndent = indent + sidx; - GenCommentIndented(loopIndent, STRL("\t\t\t\tNextCh();\n")); - GenCommentIndented(loopIndent, STRL("\t\t\t\tif (")); - fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(loopIndent, _SC("\t\t\t\tNextCh();\n")); + GenCommentIndented(loopIndent, _SC("\t\t\t\tif (")); + fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); } - GenCommentIndented(indent + imax, STRL("\t\t\t\t\tlevel++; NextCh();\n")); + GenCommentIndented(indent + imax, _SC("\t\t\t\t\tlevel++; NextCh();\n")); for(int sidx = imax; sidx > 0; --sidx) { - GenCommentIndented(indent + sidx, STRL("\t\t\t\t}\n")); + GenCommentIndented(indent + sidx, _SC("\t\t\t\t}\n")); } } } - GenCommentIndented(imax, STRL("\t\t\t} else if (ch == buffer->EoF) return false;\n")); - GenCommentIndented(imax, STRL("\t\t\telse NextCh();\n")); - GenCommentIndented(imax, STRL("\t\t}\n")); + GenCommentIndented(imax, _SC("\t\t\t} else if (ch == buffer->EoF) return false;\n")); + GenCommentIndented(imax, _SC("\t\t\telse NextCh();\n")); + GenCommentIndented(imax, _SC("\t\t}\n")); } void DFA::GenCommentHeader(const Comment *com, int i) { - fwprintf(gen, STRL("\tbool Comment%d();\n"), i); + fwprintf(gen, _SC("\tbool Comment%d();\n"), i); } void DFA::GenComment(const Comment *com, int i) { wchar_t_20 fmt; - fwprintf(gen, STRL("\nbool Scanner::Comment%d() {\n"), i); - fwprintf(gen, STRL("%s"), + fwprintf(gen, _SC("\nbool Scanner::Comment%d() {\n"), i); + fwprintf(gen, _SC("%s"), "\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n" "\tNextCh();\n"); int imax = coco_string_length(com->start)-1; @@ -580,19 +580,19 @@ void DFA::GenComment(const Comment *com, int i) { GenComBody(com); } else { for(int sidx = 1; sidx <= imax; ++sidx) { - GenCommentIndented(sidx, STRL("\tif (")); - fwprintf(gen, STRL("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); - GenCommentIndented(sidx, STRL("\t\tNextCh();\n")); + GenCommentIndented(sidx, _SC("\tif (")); + fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(sidx, _SC("\t\tNextCh();\n")); } GenComBody(com); for(int sidx = imax; sidx > 0; --sidx) { - GenCommentIndented(sidx, STRL("\t}\n")); + GenCommentIndented(sidx, _SC("\t}\n")); } - fwprintf(gen, STRL("%s"), + fwprintf(gen, _SC("%s"), "\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n" "\treturn false;\n"); } - fputws(STRL("}\n"), gen); + fputws(_SC("}\n"), gen); } const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in Tab.literals @@ -628,13 +628,13 @@ void DFA::GenLiterals () { } // sym.name stores literals with quotes, e.g. "\"Literal\"" - fputws(STRL("\tkeywords.set(STRL("), gen); + fputws(_SC("\tkeywords.set(_SC("), gen); // write keyword, escape non printable characters - for (int k = 0; name[k] != CHL('\0'); k++) { + for (int k = 0; name[k] != _SC('\0'); k++) { wchar_t c = name[k]; - fwprintf(gen, (c >= 32 && c <= 127) ? STRL("%lc") : STRL("\\x%04x"), c); + fwprintf(gen, (c >= 32 && c <= 127) ? _SC("%lc") : _SC("\\x%04x"), c); } - fwprintf(gen, STRL("), %d);\n"), sym->n); + fwprintf(gen, _SC("), %d);\n"), sym->n); coco_string_delete(name); } @@ -653,7 +653,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, STRL("namespace %ls {\n"), curNs); + fwprintf(gen, _SC("namespace %ls {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -666,7 +666,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { void DFA::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fputws(STRL("} // namespace\n"), gen); + fputws(_SC("} // namespace\n"), gen); } } @@ -699,37 +699,37 @@ void DFA::CopySourcePart (const Position *pos, int indent) { void DFA::WriteState(const State *state) { Symbol *endOf = state->endOf; - fwprintf(gen, STRL("\t\tcase %d:\n"), state->nr); + fwprintf(gen, _SC("\t\tcase %d:\n"), state->nr); if (existLabel[state->nr]) - fwprintf(gen, STRL("\t\t\tcase_%d:\n"), state->nr); + fwprintf(gen, _SC("\t\t\tcase_%d:\n"), state->nr); if (endOf != NULL && state->firstAction != NULL) { - fwprintf(gen, STRL("\t\t\trecEnd = pos; recKind = %d /* %ls */;\n"), endOf->n, endOf->name); + fwprintf(gen, _SC("\t\t\trecEnd = pos; recKind = %d /* %ls */;\n"), endOf->n, endOf->name); } bool ctxEnd = state->ctx; wchar_t_20 fmt; for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (action == state->firstAction) fputws(STRL("\t\t\tif ("), gen); - else fputws(STRL("\t\t\telse if ("), gen); + if (action == state->firstAction) fputws(_SC("\t\t\tif ("), gen); + else fputws(_SC("\t\t\telse if ("), gen); if (action->typ == Node::chr) { wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); - fwprintf(gen, STRL("%ls"), res); + fwprintf(gen, _SC("%ls"), res); } else PutRange(tab->CharClassSet(action->sym)); - fputws(STRL(") {"), gen); + fputws(_SC(") {"), gen); if (action->tc == Node::contextTrans) { - fputws(STRL("apx++; "), gen); ctxEnd = false; + fputws(_SC("apx++; "), gen); ctxEnd = false; } else if (state->ctx) - fputws(STRL("apx = 0; "), gen); - fwprintf(gen, STRL("AddCh(); goto case_%d;}\n"), action->target->state->nr); + fputws(_SC("apx = 0; "), gen); + fwprintf(gen, _SC("AddCh(); goto case_%d;}\n"), action->target->state->nr); } if (state->firstAction == NULL) - fputws(STRL("\t\t\t{"), gen); + fputws(_SC("\t\t\t{"), gen); else - fputws(STRL("\t\t\telse {"), gen); + fputws(_SC("\t\t\telse {"), gen); if (ctxEnd) { // final context state: cut appendix - fwprintf(gen, STRL("%s"), + fwprintf(gen, _SC("%s"), "\n" "\t\t\t\ttlen -= apx;\n" "\t\t\t\tSetScannerBehindT();" @@ -738,19 +738,19 @@ void DFA::WriteState(const State *state) { "\t\t\t\t"); } if (endOf == NULL) { - fputws(STRL("goto case_0;}\n"), gen); + fputws(_SC("goto case_0;}\n"), gen); } else { - fwprintf(gen, STRL("t->kind = %d /* %ls */; "), endOf->n, endOf->name); + fwprintf(gen, _SC("t->kind = %d /* %ls */; "), endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, STRL("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); } else { - fwprintf(gen, STRL("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); } } else { - fputws(STRL("loopState = false;"), gen); + fputws(_SC("loopState = false;"), gen); if(endOf->semPos && endOf->typ == Node::t) CopySourcePart(endOf->semPos, 0); - fputws(STRL(" break;}\n"), gen); + fputws(_SC(" break;}\n"), gen); } } } @@ -760,45 +760,45 @@ void DFA::WriteStartTab() { for (Action *action = firstState->firstAction; action != NULL; action = action->next) { int targetState = action->target->state->nr; if (action->typ == Node::chr) { - fwprintf(gen, STRL("\tstart.set(%d, %d);\n"), action->sym, targetState); + fwprintf(gen, _SC("\tstart.set(%d, %d);\n"), action->sym, targetState); } else { CharSet *s = tab->CharClassSet(action->sym); for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (firstRange) { firstRange = false; - fputws(STRL("\tint i;\n"), gen); + fputws(_SC("\tint i;\n"), gen); } - fwprintf(gen, STRL("\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n"), r->from, r->to, targetState); + fwprintf(gen, _SC("\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n"), r->from, r->to, targetState); } } } - fwprintf(gen, STRL("%s"), "\t\tstart.set(Buffer::EoF, -1);\n"); + fwprintf(gen, _SC("%s"), "\t\tstart.set(Buffer::EoF, -1);\n"); } void DFA::WriteScanner() { Generator g(tab, errors); - fram = g.OpenFrame(STRL("Scanner.frame")); - gen = g.OpenGen(STRL("Scanner.h")); + fram = g.OpenFrame(_SC("Scanner.frame")); + gen = g.OpenGen(_SC("Scanner.h")); if (dirtyDFA) MakeDeterministic(); // Header g.GenCopyright(); - g.SkipFramePart(STRL("-->begin")); + g.SkipFramePart(_SC("-->begin")); - g.CopyFramePart(STRL("-->prefix")); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(STRL("-->prefix")); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(STRL("-->namespace_open")); + g.CopyFramePart(_SC("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(STRL("-->casing0")); + g.CopyFramePart(_SC("-->casing0")); if (ignoreCase) { - fwprintf(gen, STRL("%s"), "\twchar_t valCh; // current input character (for token.val)\n"); + fwprintf(gen, _SC("%s"), "\twchar_t valCh; // current input character (for token.val)\n"); } - g.CopyFramePart(STRL("-->commentsheader")); + g.CopyFramePart(_SC("-->commentsheader")); Comment *com = firstComment; int cmdIdx = 0; while (com != NULL) { @@ -806,65 +806,65 @@ void DFA::WriteScanner() { com = com->next; cmdIdx++; } - g.CopyFramePart(STRL("-->namespace_close")); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(STRL("-->implementation")); + g.CopyFramePart(_SC("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(STRL("Scanner.cpp")); + gen = g.OpenGen(_SC("Scanner.cpp")); g.GenCopyright(); - g.SkipFramePart(STRL("-->begin")); - g.CopyFramePart(STRL("-->namespace_open")); + g.SkipFramePart(_SC("-->begin")); + g.CopyFramePart(_SC("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(STRL("-->declarations")); - fwprintf(gen, STRL("\tmaxT = %d;\n"), tab->terminals.Count - 1); - fwprintf(gen, STRL("\tnoSym = %d;\n"), tab->noSym->n); + g.CopyFramePart(_SC("-->declarations")); + fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count - 1); + fwprintf(gen, _SC("\tnoSym = %d;\n"), tab->noSym->n); WriteStartTab(); GenLiterals(); - g.CopyFramePart(STRL("-->initialization")); - g.CopyFramePart(STRL("-->casing1")); + g.CopyFramePart(_SC("-->initialization")); + g.CopyFramePart(_SC("-->casing1")); if (ignoreCase) { - fwprintf(gen, STRL("%s"), + fwprintf(gen, _SC("%s"), "\t\tvalCh = ch;\n" "\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); } - g.CopyFramePart(STRL("-->casing2")); - fputws(STRL("\t\ttval[tlen++] = "), gen); - if (ignoreCase) fputws(STRL("valCh;"), gen); else fputws(STRL("ch;"), gen); + g.CopyFramePart(_SC("-->casing2")); + fputws(_SC("\t\ttval[tlen++] = "), gen); + if (ignoreCase) fputws(_SC("valCh;"), gen); else fputws(_SC("ch;"), gen); - g.CopyFramePart(STRL("-->comments")); + g.CopyFramePart(_SC("-->comments")); com = firstComment; cmdIdx = 0; while (com != NULL) { GenComment(com, cmdIdx); com = com->next; cmdIdx++; } - g.CopyFramePart(STRL("-->scan1")); - fputws(STRL("\t\t\t"), gen); - if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fputws(STRL("false"), gen); } + g.CopyFramePart(_SC("-->scan1")); + fputws(_SC("\t\t\t"), gen); + if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fputws(_SC("false"), gen); } - g.CopyFramePart(STRL("-->scan2")); + g.CopyFramePart(_SC("-->scan2")); if (firstComment != NULL) { - fputws(STRL("\t\tif ("), gen); + fputws(_SC("\t\tif ("), gen); com = firstComment; cmdIdx = 0; wchar_t_20 fmt; while (com != NULL) { wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, STRL("(%ls && Comment%d())"), res, cmdIdx); + fwprintf(gen, _SC("(%ls && Comment%d())"), res, cmdIdx); if (com->next != NULL) { - fputws(STRL(" || "), gen); + fputws(_SC(" || "), gen); } com = com->next; cmdIdx++; } - fputws(STRL(") continue;"), gen); + fputws(_SC(") continue;"), gen); } - g.CopyFramePart(STRL("-->scan22")); - if (hasCtxMoves) { fputws(STRL("\n\tint apx = 0;"), gen); } /* pdt */ - g.CopyFramePart(STRL("-->scan3")); + g.CopyFramePart(_SC("-->scan22")); + if (hasCtxMoves) { fputws(_SC("\n\tint apx = 0;"), gen); } /* pdt */ + g.CopyFramePart(_SC("-->scan3")); /* CSB 02-10-05 check the Labels */ existLabel = new bool[lastStateNr+1]; @@ -873,7 +873,7 @@ void DFA::WriteScanner() { WriteState(state); delete [] existLabel; - g.CopyFramePart(STRL("-->namespace_close")); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); diff --git a/src/Generator.cpp b/src/Generator.cpp index ee75458..86652cd 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -46,7 +46,7 @@ namespace Coco { FILE* Generator::OpenFrame(const wchar_t* frame) { if (coco_string_length(tab->frameDir) != 0) { - frameFile = coco_string_create_append(tab->frameDir, STRL("/")); + frameFile = coco_string_create_append(tab->frameDir, _SC("/")); coco_string_merge(frameFile, frame); char *chFrameFile = coco_string_create_char(frameFile); fram = fopen(chFrameFile, "r"); @@ -60,7 +60,7 @@ namespace Coco { delete [] chFrameFile; } if (fram == NULL) { - wchar_t *message = coco_string_create_append(STRL("-- Cannot find : "), frame); + wchar_t *message = coco_string_create_append(_SC("-- Cannot find : "), frame); errors->Exception(message); delete [] message; } @@ -75,14 +75,14 @@ namespace Coco { if ((gen = fopen(chFn, "r")) != NULL) { fclose(gen); - wchar_t *oldName = coco_string_create_append(fn, STRL(".old")); + wchar_t *oldName = coco_string_create_append(fn, _SC(".old")); char *chOldName = coco_string_create_char(oldName); remove(chOldName); rename(chFn, chOldName); // copy with overwrite coco_string_delete(chOldName); coco_string_delete(oldName); } if ((gen = fopen(chFn, "w")) == NULL) { - wchar_t *message = coco_string_create_append(STRL("-- Cannot generate : "), genName); + wchar_t *message = coco_string_create_append(_SC("-- Cannot generate : "), genName); errors->Exception(message); delete [] message; } @@ -97,14 +97,14 @@ namespace Coco { FILE *file = NULL; if (coco_string_length(tab->frameDir) != 0) { - wchar_t *copyFr = coco_string_create_append(tab->frameDir, STRL("/Copyright.frame")); + wchar_t *copyFr = coco_string_create_append(tab->frameDir, _SC("/Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; delete [] chCopyFr; } if (file == NULL) { - wchar_t *copyFr = coco_string_create_append(tab->srcDir, STRL("Copyright.frame")); + wchar_t *copyFr = coco_string_create_append(tab->srcDir, _SC("Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; @@ -134,7 +134,7 @@ namespace Coco { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, STRL("%ls_"), curNs); + fwprintf(gen, _SC("%ls_"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; } while (startPos < len); @@ -158,27 +158,27 @@ namespace Coco { endOfStopString = coco_string_length(stop)-1; } - fwscanf(fram, STRL("%lc"), &ch); // fram.ReadByte(); + fwscanf(fram, _SC("%lc"), &ch); // fram.ReadByte(); while (!feof(fram)) { // ch != EOF if (stop != NULL && ch == startCh) { int i = 0; do { if (i == endOfStopString) return; // stop[0..i] found - fwscanf(fram, STRL("%lc"), &ch); i++; + fwscanf(fram, _SC("%lc"), &ch); i++; } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { wchar_t *subStop = coco_string_create(stop, 0, i); - fwprintf(gen, STRL("%ls"), subStop); + fwprintf(gen, _SC("%ls"), subStop); coco_string_delete(subStop); } } else { - if (generateOutput) { fwprintf(gen, STRL("%lc"), ch); } - fwscanf(fram, STRL("%lc"), &ch); + if (generateOutput) { fwprintf(gen, _SC("%lc"), ch); } + fwscanf(fram, _SC("%lc"), &ch); } } if (stop != NULL) { - wchar_t *message = coco_string_create_append(STRL(" -- Incomplete or corrupt frame file: "), frameFile); + wchar_t *message = coco_string_create_append(_SC(" -- Incomplete or corrupt frame file: "), frameFile); errors->Exception(message); delete [] message; } diff --git a/src/Makefile b/src/Makefile index b6fe18e..f70959d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ all: - g++ *.cpp -o Coco $(CFLAGS) + g++ -g -Wall *.cpp -o Coco $(CFLAGS) clean: rm -f Coco diff --git a/src/Parser.cpp b/src/Parser.cpp index bdfe1fd..aa75d75 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -168,7 +168,7 @@ void Parser::Coco() { AstAddTerminal(); #endif sym = tab->FindSym(t->val); - if (sym != NULL) SemErr(STRL("name declared twice")); + if (sym != NULL) SemErr(_SC("name declared twice")); else { sym = tab->NewSym(Node::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -253,8 +253,8 @@ void Parser::Coco() { if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); else { if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(STRL("name declared twice")); - } else SemErr(STRL("this symbol kind not allowed on left side of production")); + if (sym->graph != NULL) SemErr(_SC("name declared twice")); + } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); @@ -265,7 +265,7 @@ void Parser::Coco() { } if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(STRL("attribute mismatch between declaration and use of this symbol")); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); @@ -287,22 +287,22 @@ void Parser::Coco() { AstAddTerminal(); #endif if (!coco_string_equal(gramName, t->val)) - SemErr(STRL("name does not match grammar name")); + SemErr(_SC("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(STRL("missing production for grammar name")); + SemErr(_SC("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(STRL("grammar symbol must not have attributes")); + SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, STRL("???"), 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(Node::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors.count == 0) { - wprintf(STRL("checking\n")); + wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); bool doGenCode = false; @@ -312,14 +312,14 @@ void Parser::Coco() { } else doGenCode = tab->GrammarOk(); if (doGenCode) { - wprintf(STRL("parser")); + wprintf(_SC("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(STRL(" + scanner")); + wprintf(_SC(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(STRL(" generated\n")); + wprintf(_SC(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } @@ -345,14 +345,14 @@ void Parser::SetDecl() { #endif wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(STRL("name declared twice")); + if (c != NULL) SemErr(_SC("name declared twice")); Expect(18 /* "=" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif Set(s); - if (s->Elements() == 0) SemErr(STRL("character set must not be empty")); + if (s->Elements() == 0) SemErr(_SC("character set must not be empty")); tab->NewCharClass(name, s); coco_string_delete(name); @@ -372,7 +372,7 @@ void Parser::TokenDecl(int typ) { #endif Sym(name, kind); sym = tab->FindSym(name); - if (sym != NULL) SemErr(STRL("name declared twice")); + if (sym != NULL) SemErr(_SC("name declared twice")); else { sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; @@ -391,13 +391,13 @@ void Parser::TokenDecl(int typ) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - if (kind == str) SemErr(STRL("a literal must not be declared with a structure")); + if (kind == str) SemErr(_SC("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if (tab->literals[tokenString] != NULL) - SemErr(STRL("token string declared twice")); + SemErr(_SC("token string declared twice")); tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } @@ -410,7 +410,7 @@ void Parser::TokenDecl(int typ) { } else SynErr(45); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); - if (typ == Node::t) errors.Warning(STRL("Warning semantic action on token declarations require a custom Scanner")); + if (typ == Node::t) errors.Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -481,7 +481,7 @@ void Parser::AttrDecl(Symbol *sym) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("bad string in attributes")); + SemErr(_SC("bad string in attributes")); } } Expect(26 /* ">" */); @@ -504,7 +504,7 @@ void Parser::AttrDecl(Symbol *sym) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("bad string in attributes")); + SemErr(_SC("bad string in attributes")); } } Expect(28 /* ".>" */); @@ -536,13 +536,13 @@ void Parser::SemText(Position* &pos) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("bad string in semantic action")); + SemErr(_SC("bad string in semantic action")); } else { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("missing end of previous semantic action")); + SemErr(_SC("missing end of previous semantic action")); } } Expect(41 /* ".)" */); @@ -585,7 +585,7 @@ void Parser::SimSet(CharSet* &s) { AstAddTerminal(); #endif CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(STRL("undefined name")); else s->Or(c->set); + if (c == NULL) SemErr(_SC("undefined name")); else s->Or(c->set); } else if (la->kind == _string) { Get(); @@ -600,7 +600,7 @@ void Parser::SimSet(CharSet* &s) { for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { - if ((CHL('A') <= ch) && (ch <= CHL('Z'))) ch = ch - (CHL('A') - CHL('a')); // ch.ToLower() + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) ch = ch - (_SC('A') - _SC('a')); // ch.ToLower() } s->Set(ch); } @@ -644,7 +644,7 @@ void Parser::Char(int &n) { // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(STRL("unacceptable character value")); + else SemErr(_SC("unacceptable character value")); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; @@ -657,7 +657,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, "Sym", la->line); #endif - name = coco_string_create(STRL("???")); kind = id; + name = coco_string_create(_SC("???")); kind = id; if (la->kind == _ident) { Get(); #ifdef PARSER_WITH_AST @@ -678,9 +678,9 @@ void Parser::Sym(wchar_t* &name, int &kind) { #endif wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); - name = coco_string_create_append(STRL("\""), subName); + name = coco_string_create_append(_SC("\""), subName); coco_string_delete(subName); - coco_string_merge(name, STRL("\"")); + coco_string_merge(name, _SC("\"")); } kind = str; @@ -690,7 +690,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) - SemErr(STRL("literal tokens must not contain blanks")); + SemErr(_SC("literal tokens must not contain blanks")); } else SynErr(48); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -773,29 +773,29 @@ void Parser::Factor(Graph* &g) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(STRL("undefined string in production")); + SemErr(_SC("undefined string in production")); sym = tab->eofSy; // dummy } } coco_string_delete(name); int typ = sym->typ; if (typ != Node::t && typ != Node::nt) - SemErr(STRL("this symbol kind is not allowed in a production")); + SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { if (typ == Node::t) typ = Node::wt; - else SemErr(STRL("only terminals may be weak")); + else SemErr(_SC("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { Attribs(p); - if (kind != id) SemErr(STRL("a literal must not have attributes")); + if (kind != id) SemErr(_SC("a literal must not have attributes")); } if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(STRL("attribute mismatch between declaration and use of this symbol")); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); break; } @@ -893,7 +893,7 @@ void Parser::Attribs(Node *p) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("bad string in attributes")); + SemErr(_SC("bad string in attributes")); } } Expect(26 /* ">" */); @@ -915,7 +915,7 @@ void Parser::Attribs(Node *p) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SemErr(STRL("bad string in attributes")); + SemErr(_SC("bad string in attributes")); } } Expect(28 /* ".>" */); @@ -996,7 +996,7 @@ void Parser::TokenFactor(Graph* &g) { if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(STRL("undefined name")); + SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; @@ -1146,7 +1146,7 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(STRL("Dummy Token")); + la->val = coco_string_create(_SC("Dummy Token")); Get(); Coco(); Expect(0); @@ -1218,97 +1218,97 @@ void Errors::SynErr(int line, int col, int n) { const size_t format_size = 20; wchar_t format[format_size]; switch (n) { - case 0: s = STRL("EOF expected"); break; - case 1: s = STRL("ident expected"); break; - case 2: s = STRL("number expected"); break; - case 3: s = STRL("string expected"); break; - case 4: s = STRL("badString expected"); break; - case 5: s = STRL("char expected"); break; - case 6: s = STRL("\"COMPILER\" expected"); break; - case 7: s = STRL("\"IGNORECASE\" expected"); break; - case 8: s = STRL("\"TERMINALS\" expected"); break; - case 9: s = STRL("\"CHARACTERS\" expected"); break; - case 10: s = STRL("\"TOKENS\" expected"); break; - case 11: s = STRL("\"PRAGMAS\" expected"); break; - case 12: s = STRL("\"COMMENTS\" expected"); break; - case 13: s = STRL("\"FROM\" expected"); break; - case 14: s = STRL("\"TO\" expected"); break; - case 15: s = STRL("\"NESTED\" expected"); break; - case 16: s = STRL("\"IGNORE\" expected"); break; - case 17: s = STRL("\"PRODUCTIONS\" expected"); break; - case 18: s = STRL("\"=\" expected"); break; - case 19: s = STRL("\".\" expected"); break; - case 20: s = STRL("\"END\" expected"); break; - case 21: s = STRL("\"+\" expected"); break; - case 22: s = STRL("\"-\" expected"); break; - case 23: s = STRL("\"..\" expected"); break; - case 24: s = STRL("\"ANY\" expected"); break; - case 25: s = STRL("\"<\" expected"); break; - case 26: s = STRL("\">\" expected"); break; - case 27: s = STRL("\"<.\" expected"); break; - case 28: s = STRL("\".>\" expected"); break; - case 29: s = STRL("\"|\" expected"); break; - case 30: s = STRL("\"WEAK\" expected"); break; - case 31: s = STRL("\"(\" expected"); break; - case 32: s = STRL("\")\" expected"); break; - case 33: s = STRL("\"[\" expected"); break; - case 34: s = STRL("\"]\" expected"); break; - case 35: s = STRL("\"{\" expected"); break; - case 36: s = STRL("\"}\" expected"); break; - case 37: s = STRL("\"SYNC\" expected"); break; - case 38: s = STRL("\"IF\" expected"); break; - case 39: s = STRL("\"CONTEXT\" expected"); break; - case 40: s = STRL("\"(.\" expected"); break; - case 41: s = STRL("\".)\" expected"); break; - case 42: s = STRL("??? expected"); break; - case 43: s = STRL("this symbol not expected in Coco"); break; - case 44: s = STRL("this symbol not expected in TokenDecl"); break; - case 45: s = STRL("invalid TokenDecl"); break; - case 46: s = STRL("invalid AttrDecl"); break; - case 47: s = STRL("invalid SimSet"); break; - case 48: s = STRL("invalid Sym"); break; - case 49: s = STRL("invalid Term"); break; - case 50: s = STRL("invalid Factor"); break; - case 51: s = STRL("invalid Attribs"); break; - case 52: s = STRL("invalid TokenFactor"); break; + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("number expected"); break; + case 3: s = _SC("string expected"); break; + case 4: s = _SC("badString expected"); break; + case 5: s = _SC("char expected"); break; + case 6: s = _SC("\"COMPILER\" expected"); break; + case 7: s = _SC("\"IGNORECASE\" expected"); break; + case 8: s = _SC("\"TERMINALS\" expected"); break; + case 9: s = _SC("\"CHARACTERS\" expected"); break; + case 10: s = _SC("\"TOKENS\" expected"); break; + case 11: s = _SC("\"PRAGMAS\" expected"); break; + case 12: s = _SC("\"COMMENTS\" expected"); break; + case 13: s = _SC("\"FROM\" expected"); break; + case 14: s = _SC("\"TO\" expected"); break; + case 15: s = _SC("\"NESTED\" expected"); break; + case 16: s = _SC("\"IGNORE\" expected"); break; + case 17: s = _SC("\"PRODUCTIONS\" expected"); break; + case 18: s = _SC("\"=\" expected"); break; + case 19: s = _SC("\".\" expected"); break; + case 20: s = _SC("\"END\" expected"); break; + case 21: s = _SC("\"+\" expected"); break; + case 22: s = _SC("\"-\" expected"); break; + case 23: s = _SC("\"..\" expected"); break; + case 24: s = _SC("\"ANY\" expected"); break; + case 25: s = _SC("\"<\" expected"); break; + case 26: s = _SC("\">\" expected"); break; + case 27: s = _SC("\"<.\" expected"); break; + case 28: s = _SC("\".>\" expected"); break; + case 29: s = _SC("\"|\" expected"); break; + case 30: s = _SC("\"WEAK\" expected"); break; + case 31: s = _SC("\"(\" expected"); break; + case 32: s = _SC("\")\" expected"); break; + case 33: s = _SC("\"[\" expected"); break; + case 34: s = _SC("\"]\" expected"); break; + case 35: s = _SC("\"{\" expected"); break; + case 36: s = _SC("\"}\" expected"); break; + case 37: s = _SC("\"SYNC\" expected"); break; + case 38: s = _SC("\"IF\" expected"); break; + case 39: s = _SC("\"CONTEXT\" expected"); break; + case 40: s = _SC("\"(.\" expected"); break; + case 41: s = _SC("\".)\" expected"); break; + case 42: s = _SC("??? expected"); break; + case 43: s = _SC("this symbol not expected in Coco"); break; + case 44: s = _SC("this symbol not expected in TokenDecl"); break; + case 45: s = _SC("invalid TokenDecl"); break; + case 46: s = _SC("invalid AttrDecl"); break; + case 47: s = _SC("invalid SimSet"); break; + case 48: s = _SC("invalid Sym"); break; + case 49: s = _SC("invalid Term"); break; + case 50: s = _SC("invalid Factor"); break; + case 51: s = _SC("invalid Attribs"); break; + case 52: s = _SC("invalid TokenFactor"); break; default: { - coco_swprintf(format, format_size, STRL("error %d"), n); + coco_swprintf(format, format_size, _SC("error %d"), n); s = format; } break; } - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(STRL("%ls\n"), s); + wprintf(_SC("%ls\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(STRL("%ls"), s); + wprintf(_SC("%ls"), s); exit(1); } #ifdef PARSER_WITH_AST static void printIndent(int n) { - for(int i=0; i < n; ++i) wprintf(STRL(" ")); + for(int i=0; i < n; ++i) wprintf(_SC(" ")); } SynTree::~SynTree() { - //wprintf(STRL("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(_SC("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -1332,18 +1332,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/Parser.frame b/src/Parser.frame index 068a42e..38133bf 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -300,7 +300,7 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(STRL("Dummy Token")); + la->val = coco_string_create(_SC("Dummy Token")); Get(); -->parseRoot } @@ -349,41 +349,41 @@ void Errors::SynErr(int line, int col, int n) { -->errors default: { - coco_swprintf(format, format_size, STRL("error %d"), n); + coco_swprintf(format, format_size, _SC("error %d"), n); s = format; } break; } - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(STRL("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(STRL("%ls\n"), s); + wprintf(_SC("%ls\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(STRL("%ls"), s); + wprintf(_SC("%ls"), s); exit(1); } #ifdef PARSER_WITH_AST static void printIndent(int n) { - for(int i=0; i < n; ++i) wprintf(STRL(" ")); + for(int i=0; i < n; ++i) wprintf(_SC(" ")); } SynTree::~SynTree() { - //wprintf(STRL("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(_SC("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -407,18 +407,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(STRL("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(STRL("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/Parser.h b/src/Parser.h index 02515d8..7dc8899 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -152,7 +152,7 @@ int id; id = 0; str = 1; tokenString = NULL; - noString = coco_string_create(STRL("-none-")); + noString = coco_string_create(_SC("-none-")); ignoreGammarErrors = false; } diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 46badf9..f848755 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -37,7 +37,7 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { void ParserGen::Indent (int n) { - for (int i = 1; i <= n; i++) fputws(STRL("\t"), gen); + for (int i = 1; i <= n; i++) fputws(_SC("\t"), gen); } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning @@ -71,7 +71,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, STRL("namespace %ls {\n"), curNs); + fwprintf(gen, _SC("namespace %ls {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -84,7 +84,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { void ParserGen::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fputws(STRL("} // namespace\n"), gen); + fputws(_SC("} // namespace\n"), gen); } } @@ -94,12 +94,12 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { if (pos != NULL) { buffer->SetPos(pos->beg); ch = buffer->Read(); if (tab->emitLines && pos->line) { - fwprintf(gen, STRL("\n#line %d \"%ls\"\n"), pos->line, tab->srcName); + fwprintf(gen, _SC("\n#line %d \"%ls\"\n"), pos->line, tab->srcName); } Indent(indent); while (buffer->GetPos() <= pos->end) { while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF - fputws(STRL("\n"), gen); Indent(indent); + fputws(_SC("\n"), gen); Indent(indent); if (ch == CR) { ch = buffer->Read(); } // skip CR if (ch == LF) { ch = buffer->Read(); } // skip LF for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) { @@ -108,11 +108,11 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { } if (buffer->GetPos() > pos->end) goto done; } - fwprintf(gen, STRL("%lc"), ch); + fwprintf(gen, _SC("%lc"), ch); ch = buffer->Read(); } done: - if (indent > 0) fputws(STRL("\n"), gen); + if (indent > 0) fputws(_SC("\n"), gen); } } @@ -120,26 +120,26 @@ void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { errorNr++; const int formatLen = 1000; wchar_t format[formatLen]; - coco_swprintf(format, formatLen, STRL("\t\t\tcase %d: s = STRL(\""), errorNr); + coco_swprintf(format, formatLen, _SC("\t\t\tcase %d: s = _SC(\""), errorNr); coco_string_merge(err, format); if (errTyp == tErr) { - if (sym->name[0] == CHL('"')) { + if (sym->name[0] == _SC('"')) { wchar_t *se = tab->Escape(sym->name); - coco_swprintf(format, formatLen, STRL("%ls expected"), se); + coco_swprintf(format, formatLen, _SC("%ls expected"), se); coco_string_merge(err, format); coco_string_delete(se); } else { - coco_swprintf(format, formatLen, STRL("%ls expected"), sym->name); + coco_swprintf(format, formatLen, _SC("%ls expected"), sym->name); coco_string_merge(err, format); } } else if (errTyp == altErr) { - coco_swprintf(format, formatLen, STRL("invalid %ls"), sym->name); + coco_swprintf(format, formatLen, _SC("invalid %ls"), sym->name); coco_string_merge(err, format); } else if (errTyp == syncErr) { - coco_swprintf(format, formatLen, STRL("this symbol not expected in %ls"), sym->name); + coco_swprintf(format, formatLen, _SC("this symbol not expected in %ls"), sym->name); coco_string_merge(err, format); } - coco_swprintf(format, formatLen, STRL("\"); break;\n")); + coco_swprintf(format, formatLen, _SC("\"); break;\n")); coco_string_merge(err, format); } @@ -154,20 +154,20 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); - if (n == 0) fputws(STRL("false"), gen); // happens if an ANY set matches no symbol + if (n == 0) fputws(_SC("false"), gen); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; for (int i=0; iterminals.Count; i++) { sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { - fputws(STRL("la->kind == "), gen); + fputws(_SC("la->kind == "), gen); WriteSymbolOrCode(gen, sym); --n; - if (n > 0) fputws(STRL(" || "), gen); + if (n > 0) fputws(_SC(" || "), gen); } } } else - fwprintf(gen, STRL("StartOf(%d /* %s */)"), NewCondSet(s), (tab->nTyp[p->typ])); + fwprintf(gen, _SC("StartOf(%d /* %s */)"), NewCondSet(s), (tab->nTyp[p->typ])); } } @@ -176,9 +176,9 @@ void ParserGen::PutCaseLabels (const BitArray *s) { for (int i=0; iterminals.Count; i++) { sym = tab->terminals[i]; if ((*s)[sym->n]) { - fputws(STRL("case "), gen); + fputws(_SC("case "), gen); WriteSymbolOrCode(gen, sym); - fputws(STRL(": "), gen); + fputws(_SC(": "), gen); } } } @@ -189,43 +189,43 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { while (p != NULL) { if (p->typ == Node::nt) { Indent(indent); - fwprintf(gen, STRL("%ls("), p->sym->name); + fwprintf(gen, _SC("%ls("), p->sym->name); CopySourcePart(p->pos, 0); - fputws(STRL(");\n"), gen); + fputws(_SC(");\n"), gen); } else if (p->typ == Node::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) { - fputws(STRL("Get();\n"), gen); + fputws(_SC("Get();\n"), gen); //copy and pasted bellow - fputws(STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); + fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } else { - fputws(STRL("Expect("), gen); + fputws(_SC("Expect("), gen); WriteSymbolOrCode(gen, p->sym); - fputws(STRL(");\n"), gen); + fputws(_SC(");\n"), gen); //copy and pasted from above - fputws(STRL("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); + fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } } if (p->typ == Node::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); - fputws(STRL("ExpectWeak("), gen); + fputws(_SC("ExpectWeak("), gen); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, STRL(", %d);\n"), NewCondSet(s1)); + fwprintf(gen, _SC(", %d);\n"), NewCondSet(s1)); delete s1; } if (p->typ == Node::any) { Indent(indent); int acc = Sets::Elements(p->set); if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here - fputws(STRL("Get();\n"), gen); + fputws(_SC("Get();\n"), gen); } else { GenErrorMsg(altErr, curSy); if (acc > 0) { - fputws(STRL("if ("), gen); GenCond(p->set, p); fwprintf(gen, STRL(") Get(); else SynErr(%d);\n"), errorNr); - } else fwprintf(gen, STRL("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); + fputws(_SC("if ("), gen); GenCond(p->set, p); fwprintf(gen, _SC(") Get(); else SynErr(%d);\n"), errorNr); + } else fwprintf(gen, _SC("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); } } if (p->typ == Node::eps) { // nothing } if (p->typ == Node::rslv) { // nothing @@ -235,57 +235,57 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); - fputws(STRL("while (!("), gen); GenCond(s1, p); fputws(STRL(")) {"), gen); - fwprintf(gen, STRL("SynErr(%d); Get();"), errorNr); fputws(STRL("}\n"), gen); + fputws(_SC("while (!("), gen); GenCond(s1, p); fputws(_SC(")) {"), gen); + fwprintf(gen, _SC("SynErr(%d); Get();"), errorNr); fputws(_SC("}\n"), gen); delete s1; } if (p->typ == Node::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); delete s1; bool useSwitch = UseSwitch(p); - if (useSwitch) { Indent(indent); fputws(STRL("switch (la->kind) {\n"), gen); } + if (useSwitch) { Indent(indent); fputws(_SC("switch (la->kind) {\n"), gen); } p2 = p; while (p2 != NULL) { s1 = tab->Expected(p2->sub, curSy); Indent(indent); if (useSwitch) { - PutCaseLabels(s1); fputws(STRL("{\n"), gen); + PutCaseLabels(s1); fputws(_SC("{\n"), gen); } else if (p2 == p) { - fputws(STRL("if ("), gen); GenCond(s1, p2->sub); fputws(STRL(") {\n"), gen); - } else if (p2->down == NULL && equal) { fputws(STRL("} else {\n"), gen); + fputws(_SC("if ("), gen); GenCond(s1, p2->sub); fputws(_SC(") {\n"), gen); + } else if (p2->down == NULL && equal) { fputws(_SC("} else {\n"), gen); } else { - fputws(STRL("} else if ("), gen); GenCond(s1, p2->sub); fputws(STRL(") {\n"), gen); + fputws(_SC("} else if ("), gen); GenCond(s1, p2->sub); fputws(_SC(") {\n"), gen); } GenCode(p2->sub, indent + 1, s1); if (useSwitch) { - Indent(indent); fputws(STRL("\tbreak;\n"), gen); - Indent(indent); fputws(STRL("}\n"), gen); + Indent(indent); fputws(_SC("\tbreak;\n"), gen); + Indent(indent); fputws(_SC("}\n"), gen); } p2 = p2->down; delete s1; } Indent(indent); if (equal) { - fputws(STRL("}\n"), gen); + fputws(_SC("}\n"), gen); } else { GenErrorMsg(altErr, curSy); if (useSwitch) { - fwprintf(gen, STRL("default: SynErr(%d); break;\n"), errorNr); - Indent(indent); fputws(STRL("}\n"), gen); + fwprintf(gen, _SC("default: SynErr(%d); break;\n"), errorNr); + Indent(indent); fputws(_SC("}\n"), gen); } else { - fputws(STRL("} "), gen); fwprintf(gen, STRL("else SynErr(%d);\n"), errorNr); + fputws(_SC("} "), gen); fwprintf(gen, _SC("else SynErr(%d);\n"), errorNr); } } } if (p->typ == Node::iter) { Indent(indent); p2 = p->sub; - fputws(STRL("while ("), gen); + fputws(_SC("while ("), gen); if (p2->typ == Node::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); - fputws(STRL("WeakSeparator("), gen); + fputws(_SC("WeakSeparator("), gen); WriteSymbolOrCode(gen, p2->sym); - fwprintf(gen, STRL(",%d,%d) "), NewCondSet(s1), NewCondSet(s2)); + fwprintf(gen, _SC(",%d,%d) "), NewCondSet(s1), NewCondSet(s2)); delete s1; delete s2; s1 = new BitArray(tab->terminals.Count); // for inner structure @@ -294,16 +294,16 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { s1 = tab->First(p2); GenCond(s1, p2); } - fputws(STRL(") {\n"), gen); + fputws(_SC(") {\n"), gen); GenCode(p2, indent + 1, s1); - Indent(indent); fputws(STRL("}\n"), gen); + Indent(indent); fputws(_SC("}\n"), gen); delete s1; } if (p->typ == Node::opt) { s1 = tab->First(p->sub); Indent(indent); - fputws(STRL("if ("), gen); GenCond(s1, p->sub); fputws(STRL(") {\n"), gen); + fputws(_SC("if ("), gen); GenCond(s1, p->sub); fputws(_SC(") {\n"), gen); GenCode(p->sub, indent + 1, s1); - Indent(indent); fputws(STRL("}\n"), gen); + Indent(indent); fputws(_SC("}\n"), gen); delete s1; } if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) @@ -319,7 +319,7 @@ void ParserGen::GenTokensHeader() { int i; bool isFirst = true; - fputws(STRL("\tenum {\n"), gen); + fputws(_SC("\tenum {\n"), gen); // tokens for (i=0; iterminals.Count; i++) { @@ -327,33 +327,33 @@ void ParserGen::GenTokensHeader() { if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } - else { fputws(STRL(",\n"), gen); } + else { fputws(_SC(",\n"), gen); } - fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); } // pragmas for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } - else { fputws(STRL(",\n"), gen); } + else { fputws(_SC(",\n"), gen); } sym = tab->pragmas[i]; - fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); } - fputws(STRL("\n\t};\n"), gen); + fputws(_SC("\n\t};\n"), gen); // nonterminals - fputws(STRL("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"), gen); + fputws(_SC("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"), gen); isFirst = true; for (i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; if (isFirst) { isFirst = false; } - else { fputws(STRL(",\n"), gen); } + else { fputws(_SC(",\n"), gen); } - fwprintf(gen , STRL("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); } - fputws(STRL("\n\t};\n#endif\n"), gen); + fputws(_SC("\n\t};\n#endif\n"), gen); } @@ -361,19 +361,19 @@ void ParserGen::GenCodePragmas() { Symbol *sym; for (int i=0; ipragmas.Count; i++) { sym = tab->pragmas[i]; - fputws(STRL("\t\tif (la->kind == "), gen); + fputws(_SC("\t\tif (la->kind == "), gen); WriteSymbolOrCode(gen, sym); - fputws(STRL(") {\n"), gen); + fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 4); - fputws(STRL("\t\t}\n"), gen); + fputws(_SC("\t\t}\n"), gen); } } void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { - fwprintf(gen, STRL("%d /* %ls */"), sym->n, sym->name); + fwprintf(gen, _SC("%d /* %ls */"), sym->n, sym->name); } else { - fwprintf(gen, STRL("_%ls"), sym->name); + fwprintf(gen, _SC("_%ls"), sym->name); } } @@ -382,9 +382,9 @@ void ParserGen::GenProductionsHeader() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, STRL("\tvoid %ls("), sym->name); + fwprintf(gen, _SC("\tvoid %ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fputws(STRL(");\n"), gen); + fputws(_SC(");\n"), gen); } } @@ -394,42 +394,42 @@ void ParserGen::GenProductions() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, STRL("void Parser::%ls("), sym->name); + fwprintf(gen, _SC("void Parser::%ls("), sym->name); CopySourcePart(sym->attrPos, 0); - fputws(STRL(") {\n"), gen); + fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 2); - fputws(STRL("#ifdef PARSER_WITH_AST\n"), gen); - if(i == 0) fwprintf(gen, STRL("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); + fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); + if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); else { - fwprintf(gen, STRL("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n"), sym->name, sym->name); + fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n"), sym->name, sym->name); } - fputws(STRL("#endif\n"), gen); + fputws(_SC("#endif\n"), gen); ba.SetAll(false); GenCode(sym->graph, 2, &ba); - fputws(STRL("#ifdef PARSER_WITH_AST\n"), gen); - if(i == 0) fputws(STRL("\t\tAstPopNonTerminal();\n"), gen); - else fputws(STRL("\t\tif(ntAdded) AstPopNonTerminal();\n"), gen); - fputws(STRL("#endif\n}\n\n"), gen); + fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); + if(i == 0) fputws(_SC("\t\tAstPopNonTerminal();\n"), gen); + else fputws(_SC("\t\tif(ntAdded) AstPopNonTerminal();\n"), gen); + fputws(_SC("#endif\n}\n\n"), gen); } } void ParserGen::InitSets() { - fwprintf(gen, STRL("\tstatic bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); + fwprintf(gen, _SC("\tstatic bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); for (int i = 0; i < symSet.Count; i++) { BitArray *s = symSet[i]; - fputws(STRL("\t\t{"), gen); + fputws(_SC("\t\t{"), gen); int j = 0; Symbol *sym; for (int k=0; kterminals.Count; k++) { sym = tab->terminals[k]; - fputws(((*s)[sym->n]) ? STRL("T,") : STRL("x,"), gen); + fputws(((*s)[sym->n]) ? _SC("T,") : _SC("x,"), gen); ++j; - if (j%4 == 0) fputws(STRL(" "), gen); + if (j%4 == 0) fputws(_SC(" "), gen); } - if (i == symSet.Count-1) fputws(STRL("x}\n"), gen); else fputws(STRL("x},\n"), gen); + if (i == symSet.Count-1) fputws(_SC("x}\n"), gen); else fputws(_SC("x},\n"), gen); } - fputws(STRL("\t};\n\n"), gen); + fputws(_SC("\t};\n\n"), gen); } void ParserGen::WriteParser () { @@ -437,8 +437,8 @@ void ParserGen::WriteParser () { int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart symSet.Add(tab->allSyncSets); - fram = g.OpenFrame(STRL("Parser.frame")); - gen = g.OpenGen(STRL("Parser.h")); + fram = g.OpenFrame(_SC("Parser.frame")); + gen = g.OpenGen(_SC("Parser.h")); Symbol *sym; for (int i=0; iterminals.Count; i++) { @@ -447,47 +447,47 @@ void ParserGen::WriteParser () { } g.GenCopyright(); - g.SkipFramePart(STRL("-->begin")); + g.SkipFramePart(_SC("-->begin")); - g.CopyFramePart(STRL("-->prefix")); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(STRL("-->prefix")); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(STRL("-->headerdef")); + g.CopyFramePart(_SC("-->headerdef")); - if (usingPos != NULL) {CopySourcePart(usingPos, 0); fputws(STRL("\n"), gen);} - g.CopyFramePart(STRL("-->namespace_open")); + if (usingPos != NULL) {CopySourcePart(usingPos, 0); fputws(_SC("\n"), gen);} + g.CopyFramePart(_SC("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(STRL("-->constantsheader")); + g.CopyFramePart(_SC("-->constantsheader")); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ - fputws(STRL("\tint maxT;\n"), gen); - g.CopyFramePart(STRL("-->declarations")); CopySourcePart(tab->semDeclPos, 0); - g.CopyFramePart(STRL("-->productionsheader")); GenProductionsHeader(); - g.CopyFramePart(STRL("-->namespace_close")); + fputws(_SC("\tint maxT;\n"), gen); + g.CopyFramePart(_SC("-->declarations")); CopySourcePart(tab->semDeclPos, 0); + g.CopyFramePart(_SC("-->productionsheader")); GenProductionsHeader(); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(STRL("-->implementation")); + g.CopyFramePart(_SC("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(STRL("Parser.cpp")); + gen = g.OpenGen(_SC("Parser.cpp")); g.GenCopyright(); - g.SkipFramePart(STRL("-->begin")); - g.CopyFramePart(STRL("-->namespace_open")); + g.SkipFramePart(_SC("-->begin")); + g.CopyFramePart(_SC("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(STRL("-->pragmas")); GenCodePragmas(); - g.CopyFramePart(STRL("-->productions")); GenProductions(); - g.CopyFramePart(STRL("-->parseRoot")); fwprintf(gen, STRL("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(STRL("\tExpect(0);"), gen); - g.CopyFramePart(STRL("-->constants")); - fwprintf(gen, STRL("\tmaxT = %d;\n"), tab->terminals.Count-1); - g.CopyFramePart(STRL("-->initialization")); InitSets(); - g.CopyFramePart(STRL("-->errors")); fwprintf(gen, STRL("%ls"), err); - g.CopyFramePart(STRL("-->namespace_close")); + g.CopyFramePart(_SC("-->pragmas")); GenCodePragmas(); + g.CopyFramePart(_SC("-->productions")); GenProductions(); + g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); + g.CopyFramePart(_SC("-->constants")); + fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count-1); + g.CopyFramePart(_SC("-->initialization")); InitSets(); + g.CopyFramePart(_SC("-->errors")); fwprintf(gen, _SC("%ls"), err); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); fclose(gen); @@ -496,11 +496,11 @@ void ParserGen::WriteParser () { void ParserGen::WriteStatistics () { - fwprintf(trace, STRL("\n%d terminals\n"), tab->terminals.Count); - fwprintf(trace, STRL("%d symbols\n"), tab->terminals.Count + tab->pragmas.Count + + fwprintf(trace, _SC("\n%d terminals\n"), tab->terminals.Count); + fwprintf(trace, _SC("%d symbols\n"), tab->terminals.Count + tab->pragmas.Count + tab->nonterminals.Count); - fwprintf(trace, STRL("%d nodes\n"), tab->nodes.Count); - fwprintf(trace, STRL("%d sets\n"), symSet.Count); + fwprintf(trace, _SC("%d nodes\n"), tab->nodes.Count); + fwprintf(trace, _SC("%d sets\n"), symSet.Count); } diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 32eda16..0bf9f43 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -75,13 +75,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((CHL('a') <= data[i]) && (data[i] <= CHL('z'))) { - newData[i] = data[i] + (CHL('A') - CHL('a')); + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = CHL('\0'); + newData[dataLen] = _SC('\0'); return newData; } @@ -98,12 +98,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((CHL('A') <= ch) && (ch <= CHL('Z'))) { - newData[i] = ch - (CHL('A') - CHL('a')); + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); } else { newData[i] = ch; } } - newData[dataLen] = CHL('\0'); + newData[dataLen] = _SC('\0'); return newData; } @@ -367,7 +367,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(STRL("--- buffer out of bounds access, position: %d\n"), value); + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -452,7 +452,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(STRL("--- Cannot open file %ls\n"), fileName); + wprintf(_SC("--- Cannot open file %ls\n"), fileName); exit(1); } coco_string_delete(chFileName); @@ -504,24 +504,24 @@ void Scanner::Init() { start.set(123, 27); start.set(125, 28); start.set(Buffer::EoF, -1); - keywords.set(STRL("COMPILER"), 6); - keywords.set(STRL("IGNORECASE"), 7); - keywords.set(STRL("TERMINALS"), 8); - keywords.set(STRL("CHARACTERS"), 9); - keywords.set(STRL("TOKENS"), 10); - keywords.set(STRL("PRAGMAS"), 11); - keywords.set(STRL("COMMENTS"), 12); - keywords.set(STRL("FROM"), 13); - keywords.set(STRL("TO"), 14); - keywords.set(STRL("NESTED"), 15); - keywords.set(STRL("IGNORE"), 16); - keywords.set(STRL("PRODUCTIONS"), 17); - keywords.set(STRL("END"), 20); - keywords.set(STRL("ANY"), 24); - keywords.set(STRL("WEAK"), 30); - keywords.set(STRL("SYNC"), 37); - keywords.set(STRL("IF"), 38); - keywords.set(STRL("CONTEXT"), 39); + keywords.set(_SC("COMPILER"), 6); + keywords.set(_SC("IGNORECASE"), 7); + keywords.set(_SC("TERMINALS"), 8); + keywords.set(_SC("CHARACTERS"), 9); + keywords.set(_SC("TOKENS"), 10); + keywords.set(_SC("PRAGMAS"), 11); + keywords.set(_SC("COMMENTS"), 12); + keywords.set(_SC("FROM"), 13); + keywords.set(_SC("TO"), 14); + keywords.set(_SC("NESTED"), 15); + keywords.set(_SC("IGNORE"), 16); + keywords.set(_SC("PRODUCTIONS"), 17); + keywords.set(_SC("END"), 20); + keywords.set(_SC("ANY"), 24); + keywords.set(_SC("WEAK"), 30); + keywords.set(_SC("SYNC"), 37); + keywords.set(_SC("IF"), 38); + keywords.set(_SC("CONTEXT"), 39); tvalLength = 128; @@ -534,7 +534,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(STRL("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -545,7 +545,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(STRL("Illegal byte order mark at start of file")); + wprintf(_SC("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -566,7 +566,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == CHL('\r') && buffer->Peek() != CHL('\n')) ch = EOL; + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } @@ -590,7 +590,7 @@ void Scanner::AddCh() { bool Scanner::Comment0() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == CHL('/')) { + if (ch == _SC('/')) { NextCh(); for(;;) { if (ch == 10) { @@ -608,19 +608,19 @@ bool Scanner::Comment0() { bool Scanner::Comment1() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == CHL('*')) { + if (ch == _SC('*')) { NextCh(); for(;;) { - if (ch == CHL('*')) { + if (ch == _SC('*')) { NextCh(); - if (ch == CHL('/')) { + if (ch == _SC('/')) { level--; if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } - } else if (ch == CHL('/')) { + } else if (ch == _SC('/')) { NextCh(); - if (ch == CHL('*')) { + if (ch == _SC('*')) { level++; NextCh(); } } else if (ch == buffer->EoF) return false; @@ -667,7 +667,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(STRL("--- Too long token value\n")); + wprintf(_SC("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -676,15 +676,15 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = CHL('\0'); + t->val[tlen] = _SC('\0'); } Token* Scanner::NextToken() { while(true) { - while (ch == CHL(' ') || + while (ch == _SC(' ') || (ch >= 9 && ch <= 10) || ch == 13 ) NextCh(); - if ((ch == CHL('/') && Comment0()) || (ch == CHL('/') && Comment1())) continue; + if ((ch == _SC('/') && Comment0()) || (ch == _SC('/') && Comment1())) continue; break; } @@ -710,12 +710,12 @@ Token* Scanner::NextToken() { case 1: case_1: recEnd = pos; recKind = 1 /* ident */; - if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_1;} + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} case 2: case_2: recEnd = pos; recKind = 2 /* number */; - if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_2;} + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_2;} else {t->kind = 2 /* number */; loopState = false; break;} case 3: case_3: @@ -724,7 +724,7 @@ Token* Scanner::NextToken() { case_4: {t->kind = 4 /* badString */; loopState = false; break;} case 5: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= CHL('&')) || (ch >= CHL('(') && ch <= CHL('[')) || (ch >= CHL(']') && ch <= 65535)) {AddCh(); goto case_6;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('&')) || (ch >= _SC('(') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 65535)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} else {goto case_0;} case 6: @@ -733,11 +733,11 @@ Token* Scanner::NextToken() { else {goto case_0;} case 7: case_7: - if ((ch >= CHL(' ') && ch <= CHL('~'))) {AddCh(); goto case_8;} + if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_8;} else {goto case_0;} case 8: case_8: - if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('a') && ch <= CHL('f'))) {AddCh(); goto case_8;} + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('a') && ch <= _SC('f'))) {AddCh(); goto case_8;} else if (ch == 39) {AddCh(); goto case_9;} else {goto case_0;} case 9: @@ -746,35 +746,35 @@ Token* Scanner::NextToken() { case 10: case_10: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= CHL('0') && ch <= CHL('9')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_10;} + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_10;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 11: case_11: recEnd = pos; recKind = 44 /* optionSym */; - if ((ch >= CHL('-') && ch <= CHL('.')) || (ch >= CHL('0') && ch <= CHL(':')) || (ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_11;} + if ((ch >= _SC('-') && ch <= _SC('.')) || (ch >= _SC('0') && ch <= _SC(':')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_11;} else {t->kind = 44 /* optionSym */; loopState = false; break;} case 12: case_12: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= CHL('!')) || (ch >= CHL('#') && ch <= CHL('[')) || (ch >= CHL(']') && ch <= 65535)) {AddCh(); goto case_12;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 65535)) {AddCh(); goto case_12;} else if (ch == 10 || ch == 13) {AddCh(); goto case_4;} - else if (ch == CHL('"')) {AddCh(); goto case_3;} + else if (ch == _SC('"')) {AddCh(); goto case_3;} else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_10;} - else if ((ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_15;} + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 14: case_14: - if ((ch >= CHL(' ') && ch <= CHL('~'))) {AddCh(); goto case_12;} + if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: recEnd = pos; recKind = 43 /* ddtSym */; - if ((ch >= CHL('0') && ch <= CHL('9'))) {AddCh(); goto case_10;} - else if ((ch >= CHL('A') && ch <= CHL('Z')) || ch == CHL('_') || (ch >= CHL('a') && ch <= CHL('z'))) {AddCh(); goto case_15;} - else if (ch == CHL('=')) {AddCh(); goto case_11;} + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} + else if (ch == _SC('=')) {AddCh(); goto case_11;} else {t->kind = 43 /* ddtSym */; loopState = false; break;} case 16: {t->kind = 18 /* "=" */; loopState = false; break;} @@ -813,17 +813,17 @@ Token* Scanner::NextToken() { {t->kind = 41 /* ".)" */; loopState = false; break;} case 31: recEnd = pos; recKind = 19 /* "." */; - if (ch == CHL('.')) {AddCh(); goto case_19;} - else if (ch == CHL('>')) {AddCh(); goto case_22;} - else if (ch == CHL(')')) {AddCh(); goto case_30;} + if (ch == _SC('.')) {AddCh(); goto case_19;} + else if (ch == _SC('>')) {AddCh(); goto case_22;} + else if (ch == _SC(')')) {AddCh(); goto case_30;} else {t->kind = 19 /* "." */; loopState = false; break;} case 32: recEnd = pos; recKind = 25 /* "<" */; - if (ch == CHL('.')) {AddCh(); goto case_21;} + if (ch == _SC('.')) {AddCh(); goto case_21;} else {t->kind = 25 /* "<" */; loopState = false; break;} case 33: recEnd = pos; recKind = 31 /* "(" */; - if (ch == CHL('.')) {AddCh(); goto case_29;} + if (ch == _SC('.')) {AddCh(); goto case_29;} else {t->kind = 31 /* "(" */; loopState = false; break;} } diff --git a/src/Scanner.frame b/src/Scanner.frame index 8a63358..5da2fa9 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -57,8 +57,7 @@ Scanner.h Specification #define SFMT_LSTR "%s" #define SFMT_SLSTR "%s" #define SFMT_LS "s" -#define STRL(s) s -#define CHL(s) s +#define _SC(s) s #define fputws fputs #define wprintf printf #define swprintf snprintf @@ -78,8 +77,7 @@ Scanner.h Specification #define SFMT_LS "ls" #define SFMT_LCHR "%lc" #define SFMT_SLCHR L"%lc" -#define STRL(s) L##s -#define CHL(s) L##s +#define _SC(s) L##s #endif #if _MSC_VER >= 1400 @@ -97,7 +95,7 @@ Scanner.h Specification #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR CHL(':') +#define COCO_CPP_NAMESPACE_SEPARATOR _SC(':') -->namespace_open @@ -466,13 +464,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((CHL('a') <= data[i]) && (data[i] <= CHL('z'))) { - newData[i] = data[i] + (CHL('A') - CHL('a')); + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = CHL('\0'); + newData[dataLen] = _SC('\0'); return newData; } @@ -489,12 +487,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((CHL('A') <= ch) && (ch <= CHL('Z'))) { - newData[i] = ch - (CHL('A') - CHL('a')); + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); } else { newData[i] = ch; } } - newData[dataLen] = CHL('\0'); + newData[dataLen] = _SC('\0'); return newData; } @@ -758,7 +756,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(STRL("--- buffer out of bounds access, position: %d\n"), value); + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -843,7 +841,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(STRL("--- Cannot open file %ls\n"), fileName); + wprintf(_SC("--- Cannot open file %ls\n"), fileName); exit(1); } coco_string_delete(chFileName); @@ -883,7 +881,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(STRL("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -894,7 +892,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(STRL("Illegal byte order mark at start of file")); + wprintf(_SC("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -915,7 +913,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == CHL('\r') && buffer->Peek() != CHL('\n')) ch = EOL; + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } -->casing1 @@ -972,7 +970,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(STRL("--- Too long token value\n")); + wprintf(_SC("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -981,12 +979,12 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = CHL('\0'); + t->val[tlen] = _SC('\0'); } Token* Scanner::NextToken() { while(true) { - while (ch == CHL(' ') || + while (ch == _SC(' ') || -->scan1 ) NextCh(); -->scan2 diff --git a/src/Scanner.h b/src/Scanner.h index 985f1ce..f9eae99 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -52,8 +52,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define SFMT_LSTR "%s" #define SFMT_SLSTR "%s" #define SFMT_LS "s" -#define STRL(s) s -#define CHL(s) s +#define _SC(s) s #define fputws fputs #define wprintf printf #define swprintf snprintf @@ -73,8 +72,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define SFMT_LS "ls" #define SFMT_LCHR "%lc" #define SFMT_SLCHR L"%lc" -#define STRL(s) L##s -#define CHL(s) L##s +#define _SC(s) L##s #endif #if _MSC_VER >= 1400 @@ -92,7 +90,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR CHL(':') +#define COCO_CPP_NAMESPACE_SEPARATOR _SC(':') namespace Coco { diff --git a/src/StringBuilder.cpp b/src/StringBuilder.cpp index 203f69d..441500b 100644 --- a/src/StringBuilder.cpp +++ b/src/StringBuilder.cpp @@ -70,7 +70,7 @@ void StringBuilder::Append(const wchar_t value) { data[length] = value; length++; - data[length] = CHL('\0'); + data[length] = _SC('\0'); } void StringBuilder::Append(const wchar_t *value) { diff --git a/src/Tab.cpp b/src/Tab.cpp index 221f4c3..223eb86 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -50,7 +50,7 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; errors = &parser->errors; - eofSy = NewSym(Node::t, STRL("EOF"), 0, 0); + eofSy = NewSym(Node::t, _SC("EOF"), 0, 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); checkEOF = true; visited = allSyncSets = NULL; @@ -79,8 +79,8 @@ Tab::~Tab() { Symbol* Tab::NewSym(int typ, const wchar_t* name, int line, int col) { if (coco_string_length(name) == 2 && name[0] == '"') { - parser->SemErr(STRL("empty token not allowed")); - name = coco_string_create(STRL("???")); + parser->SemErr(_SC("empty token not allowed")); + name = coco_string_create(_SC("???")); } Symbol *sym = new Symbol(typ, name, line, col); @@ -115,21 +115,21 @@ int Tab::Num(const Node *p) { void Tab::PrintSym(const Symbol *sym) { wchar_t *paddedName = Name(sym->name); - fwprintf(trace, STRL("%3d %14s %s"), sym->n, paddedName, nTyp[sym->typ]); + fwprintf(trace, _SC("%3d %14s %s"), sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); - if (sym->attrPos==NULL) fputws(STRL(" false "), trace); else fputws(STRL(" true "), trace); + if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); if (sym->typ == Node::nt) { - fwprintf(trace, STRL("%5d"), Num(sym->graph)); - if (sym->deletable) fputws(STRL(" true "), trace); else fputws(STRL(" false "), trace); + fwprintf(trace, _SC("%5d"), Num(sym->graph)); + if (sym->deletable) fputws(_SC(" true "), trace); else fputws(_SC(" false "), trace); } else - fputws(STRL(" "), trace); + fputws(_SC(" "), trace); - fwprintf(trace, STRL("%5d %s\n"), sym->line, tKind[sym->tokenKind]); + fwprintf(trace, _SC("%5d %s\n"), sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "Symbol Table:\n" "------------\n\n" " nr name typ hasAt graph del line tokenKind\n"); @@ -150,17 +150,17 @@ void Tab::PrintSymbolTable() { } - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "\nLiteral Tokens:\n" "--------------\n"); Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - fwprintf(trace, STRL("_%ls = %ls.\n"), ((Symbol*) (e->val))->name, e->key); + fwprintf(trace, _SC("_%ls = %ls.\n"), ((Symbol*) (e->val))->name, e->key); } delete iter; - fputws(STRL("\n"), trace); + fputws(_SC("\n"), trace); } void Tab::PrintSet(const BitArray *s, int indent) { @@ -172,15 +172,15 @@ void Tab::PrintSet(const BitArray *s, int indent) { if ((*s)[sym->n]) { len = coco_string_length(sym->name); if (col + len >= 80) { - fputws(STRL("\n"), trace); - for (col = 1; col < indent; col++) fputws(STRL(" "), trace); + fputws(_SC("\n"), trace); + for (col = 1; col < indent; col++) fputws(_SC(" "), trace); } - fwprintf(trace, STRL("%ls "), sym->name); + fwprintf(trace, _SC("%ls "), sym->name); col += len + 1; } } - if (col == indent) fputws(STRL("-- empty set --"), trace); - fputws(STRL("\n"), trace); + if (col == indent) fputws(_SC("-- empty set --"), trace); + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- @@ -275,7 +275,7 @@ Graph* Tab::StrToGraph(const wchar_t* str) { wchar_t *subStr = coco_string_create(str, 1, coco_string_length(str)-2); wchar_t *s = Unescape(subStr); coco_string_delete(subStr); - if (coco_string_length(s) == 0) parser->SemErr(STRL("empty token not allowed")); + if (coco_string_length(s) == 0) parser->SemErr(_SC("empty token not allowed")); Graph *g = new Graph(); g->r = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { @@ -337,15 +337,15 @@ typedef wchar_t wchar_t_10[10]; static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { - coco_swprintf(format, 10, STRL(" ")); + coco_swprintf(format, 10, _SC(" ")); } else { - coco_swprintf(format, 10, STRL("%5d"), pos->beg); + coco_swprintf(format, 10, _SC("%5d"), pos->beg); } return format; } wchar_t* Tab::Name(const wchar_t *name) { - wchar_t *name2 = coco_string_create_append(name, STRL(" ")); + wchar_t *name2 = coco_string_create_append(name, _SC(" ")); wchar_t *subName2 = coco_string_create(name2, 0, 12); coco_string_delete(name2); return subName2; @@ -354,7 +354,7 @@ wchar_t* Tab::Name(const wchar_t *name) { } void Tab::PrintNodes() { - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "Graph nodes:\n" "----------------------------------------------------\n" " n type name next down sub pos line\n" @@ -365,35 +365,35 @@ void Tab::PrintNodes() { wchar_t_10 format; for (int i=0; in, (nTyp[p->typ])); + fwprintf(trace, _SC("%4d %s "), p->n, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); - fwprintf(trace, STRL("%12s "), paddedName); + fwprintf(trace, _SC("%12s "), paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { CharClass *c = classes[p->val]; wchar_t *paddedName = Name(c->name); - fwprintf(trace, STRL("%12s "), paddedName); + fwprintf(trace, _SC("%12s "), paddedName); coco_string_delete(paddedName); - } else fputws(STRL(" "), trace); - fwprintf(trace, STRL("%5d "), Ptr(p->next, p->up)); + } else fputws(_SC(" "), trace); + fwprintf(trace, _SC("%5d "), Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { - fwprintf(trace, STRL(" %5s"), TabPos(p->pos, format)); + fwprintf(trace, _SC(" %5s"), TabPos(p->pos, format)); } if (p->typ == Node::chr) { - fwprintf(trace, STRL("%5d %5d "), p->val, p->code); + fwprintf(trace, _SC("%5d %5d "), p->val, p->code); } if (p->typ == Node::clas) { - fwprintf(trace, STRL(" %5d "), p->code); + fwprintf(trace, _SC(" %5d "), p->code); } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { - fwprintf(trace, STRL("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); + fwprintf(trace, _SC("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); } if (p->typ == Node::sem) { - fwprintf(trace, STRL(" %5s"), TabPos(p->pos, format)); + fwprintf(trace, _SC(" %5s"), TabPos(p->pos, format)); } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { - fwprintf(trace, STRL(" ")); + fwprintf(trace, _SC(" ")); } - fwprintf(trace, STRL("%5d\n"), p->line); + fwprintf(trace, _SC("%5d\n"), p->line); } - fputws(STRL("\n"), trace); + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- @@ -403,7 +403,7 @@ void Tab::PrintNodes() { CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { CharClass *c; - if (coco_string_equal(name, STRL("#"))) { + if (coco_string_equal(name, _SC("#"))) { wchar_t* temp = coco_string_create_append(name, (wchar_t) dummyName++); c = new CharClass(temp, s); coco_string_delete(temp); @@ -440,11 +440,11 @@ CharSet* Tab::CharClassSet(int i) { //----------- character class printing wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { - if (ch < CHL(' ') || ch >= 127 || ch == CHL('\'') || ch == CHL('\\')) { - coco_swprintf(format, 10, STRL("%d"), ch); + if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) { + coco_swprintf(format, 10, _SC("%d"), ch); return format; } else { - coco_swprintf(format, 10, STRL("'%lc'"), ch); + coco_swprintf(format, 10, _SC("'%lc'"), ch); return format; } } @@ -455,11 +455,11 @@ void Tab::WriteCharSet(const CharSet *s) { if (r->from < r->to) { wchar_t *from = TabCh(r->from, fmt1); wchar_t *to = TabCh(r->to, fmt2); - fwprintf(trace, STRL("%ls .. %ls "), from, to); + fwprintf(trace, _SC("%ls .. %ls "), from, to); } else { wchar_t *from = TabCh(r->from, fmt1); - fwprintf(trace, STRL("%ls "), from); + fwprintf(trace, _SC("%ls "), from); } } } @@ -469,17 +469,17 @@ void Tab::WriteCharClasses () { for (int i=0; iname, STRL(" ")); + wchar_t* format2 = coco_string_create_append(c->name, _SC(" ")); wchar_t* format = coco_string_create(format2, 0, 10); - coco_string_merge(format, STRL(": ")); + coco_string_merge(format, _SC(": ")); fputws(format, trace); WriteCharSet(c->set); - fputws(STRL("\n"), trace); + fputws(_SC("\n"), trace); coco_string_delete(format); coco_string_delete(format2); } - fputws(STRL("\n"), trace); + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- @@ -530,9 +530,9 @@ BitArray* Tab::First(const Node *p) { BitArray mark(nodes.Count); BitArray *fs = First0(p, &mark); if (ddt[3]) { - fputws(STRL("\n"), trace); - if (p != NULL) fwprintf(trace, STRL("First: node = %d\n"), p->n ); - else fputws(STRL("First: node = null\n"), trace); + fputws(_SC("\n"), trace); + if (p != NULL) fwprintf(trace, _SC("First: node = %d\n"), p->n ); + else fputws(_SC("First: node = null\n"), trace); PrintSet(fs, 0); } return fs; @@ -756,7 +756,7 @@ void Tab::CompDeletableSymbols() { for (i=0; ideletable) - wprintf(STRL(" %ls deletable\n"), sym->name); + wprintf(_SC(" %ls deletable\n"), sym->name); } } @@ -776,7 +776,7 @@ void Tab::CompSymbolSets() { CompFollowSets(); CompSyncSets(); if (ddt[1]) { - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "\n" "First & follow symbols:\n" "----------------------\n\n"); @@ -784,14 +784,14 @@ void Tab::CompSymbolSets() { Symbol *sym; for (int i=0; iname); - fputws(STRL("first: "), trace); PrintSet(sym->first, 10); - fputws(STRL("follow: "), trace); PrintSet(sym->follow, 10); - fputws(STRL("\n"), trace); + fwprintf(trace, _SC("%ls\n"), sym->name); + fputws(_SC("first: "), trace); PrintSet(sym->first, 10); + fputws(_SC("follow: "), trace); PrintSet(sym->follow, 10); + fputws(_SC("\n"), trace); } } if (ddt[4]) { - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "\n" "ANY and SYNC sets:\n" "-----------------\n"); @@ -800,7 +800,7 @@ void Tab::CompSymbolSets() { for (int i=0; ityp == Node::any || p->typ == Node::sync) { - fwprintf(trace, STRL("%4d %4s "), p->n, nTyp[p->typ]); + fwprintf(trace, _SC("%4d %4s "), p->n, nTyp[p->typ]); PrintSet(p->set, 11); } } @@ -818,16 +818,16 @@ wchar_t Tab::Hex2Char(const wchar_t* s, int len) { if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0'); else if ('a' <= ch && ch <= 'f') val = 16 * val + (10 + ch - 'a'); else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A'); - else parser->SemErr(STRL("bad escape sequence in string or character")); + else parser->SemErr(_SC("bad escape sequence in string or character")); } if (val >= COCO_WCHAR_MAX) {/* pdt */ - parser->SemErr(STRL("bad escape sequence in string or character")); + parser->SemErr(_SC("bad escape sequence in string or character")); } return (wchar_t) val; } static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { - coco_swprintf(format, 10, STRL("\\0x%04x"), ch); + coco_swprintf(format, 10, _SC("\\0x%04x"), ch); return format; } @@ -837,28 +837,28 @@ wchar_t* Tab::Unescape (const wchar_t* s) { int i = 0; int len = coco_string_length(s); while (i < len) { - if (s[i] == CHL('\\')) { + if (s[i] == _SC('\\')) { switch (s[i+1]) { - case CHL('\\'): buf.Append(CHL('\\')); i += 2; break; - case CHL('\''): buf.Append(CHL('\'')); i += 2; break; - case CHL('\"'): buf.Append(CHL('\"')); i += 2; break; - case CHL('r'): buf.Append(CHL('\r')); i += 2; break; - case CHL('n'): buf.Append(CHL('\n')); i += 2; break; - case CHL('t'): buf.Append(CHL('\t')); i += 2; break; - case CHL('0'): buf.Append(CHL('\0')); i += 2; break; - case CHL('a'): buf.Append(CHL('\a')); i += 2; break; - case CHL('b'): buf.Append(CHL('\b')); i += 2; break; - case CHL('f'): buf.Append(CHL('\f')); i += 2; break; - case CHL('v'): buf.Append(CHL('\v')); i += 2; break; - case CHL('u'): case CHL('x'): + case _SC('\\'): buf.Append(_SC('\\')); i += 2; break; + case _SC('\''): buf.Append(_SC('\'')); i += 2; break; + case _SC('\"'): buf.Append(_SC('\"')); i += 2; break; + case _SC('r'): buf.Append(_SC('\r')); i += 2; break; + case _SC('n'): buf.Append(_SC('\n')); i += 2; break; + case _SC('t'): buf.Append(_SC('\t')); i += 2; break; + case _SC('0'): buf.Append(_SC('\0')); i += 2; break; + case _SC('a'): buf.Append(_SC('\a')); i += 2; break; + case _SC('b'): buf.Append(_SC('\b')); i += 2; break; + case _SC('f'): buf.Append(_SC('\f')); i += 2; break; + case _SC('v'): buf.Append(_SC('\v')); i += 2; break; + case _SC('u'): case _SC('x'): if (i + 6 <= coco_string_length(s)) { buf.Append(Hex2Char(s +i+2, 4)); i += 6; break; } else { - parser->SemErr(STRL("bad escape sequence in string or character")); + parser->SemErr(_SC("bad escape sequence in string or character")); i = coco_string_length(s); break; } default: - parser->SemErr(STRL("bad escape sequence in string or character")); + parser->SemErr(_SC("bad escape sequence in string or character")); i += 2; break; } } else { @@ -879,14 +879,14 @@ wchar_t* Tab::Escape (const wchar_t* s) { for (int i=0; i < len; i++) { ch = s[i]; switch(ch) { - case CHL('\\'): buf.Append(STRL("\\\\")); break; - case CHL('\''): buf.Append(STRL("\\'")); break; - case CHL('\"'): buf.Append(STRL("\\\"")); break; - case CHL('\t'): buf.Append(STRL("\\t")); break; - case CHL('\r'): buf.Append(STRL("\\r")); break; - case CHL('\n'): buf.Append(STRL("\\n")); break; + case _SC('\\'): buf.Append(_SC("\\\\")); break; + case _SC('\''): buf.Append(_SC("\\'")); break; + case _SC('\"'): buf.Append(_SC("\\\"")); break; + case _SC('\t'): buf.Append(_SC("\\t")); break; + case _SC('\r'): buf.Append(_SC("\\r")); break; + case _SC('\n'): buf.Append(_SC("\\n")); break; default: - if ((ch < CHL(' ')) || (ch > 0x7f)) { + if ((ch < _SC(' ')) || (ch > 0x7f)) { wchar_t* res = TabChar2Hex(ch, fmt); buf.Append(res); } else @@ -977,7 +977,7 @@ bool Tab::NoCircularProductions() { for (i=0; icount++; - wprintf(STRL(" %ls --> %ls"), n->left->name, n->right->name); + wprintf(_SC(" %ls --> %ls"), n->left->name, n->right->name); } for(int i=0; iname, curSy->line, curSy->col); - if (sym != NULL) wprintf(STRL("%ls is "), sym->name); + wprintf(_SC(" LL1 warning in %ls:%d:%d: "), curSy->name, curSy->line, curSy->col); + if (sym != NULL) wprintf(_SC("%ls is "), sym->name); switch (cond) { - case 1: wprintf(STRL("%s"), "start of several alternatives\n"); break; - case 2: wprintf(STRL("%s"), "start & successor of deletable structure\n"); break; - case 3: wprintf(STRL("%s"), "an ANY node that matches no symbol\n"); break; - case 4: wprintf(STRL("%s"), "contents of [...] or {...} must not be deletable\n"); break; + case 1: wprintf(_SC("%s"), "start of several alternatives\n"); break; + case 2: wprintf(_SC("%s"), "start & successor of deletable structure\n"); break; + case 3: wprintf(_SC("%s"), "an ANY node that matches no symbol\n"); break; + case 4: wprintf(_SC("%s"), "contents of [...] or {...} must not be deletable\n"); break; } } @@ -1014,16 +1014,16 @@ int Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { /* print the path for first set that contains token tok for the graph rooted at p */ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { while (p != NULL) { - //if(p->sym) wprintf(STRL("%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); + //if(p->sym) wprintf(_SC("%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); switch (p->typ) { case Node::nt: { if (p->sym->firstReady) { if(p->sym->first->Get(tok)) { if(coco_string_length(indent) == 1) - wprintf(STRL("%ls=> %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); - wprintf(STRL("%ls-> %ls:%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); + wprintf(_SC("%ls=> %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(_SC("%ls-> %ls:%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); if(p->sym->graph) { - wchar_t *new_indent = coco_string_create_append(indent, STRL(" ")); + wchar_t *new_indent = coco_string_create_append(indent, _SC(" ")); PrintFirstPath(p->sym->graph, tok, new_indent); coco_string_delete(new_indent); } @@ -1034,7 +1034,7 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { } case Node::t: case Node::wt: { if(p->sym->n == tok) - wprintf(STRL("%ls= %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(_SC("%ls= %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); break; } case Node::any: { @@ -1143,9 +1143,9 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { if (q->sub->typ == Node::rslv) { BitArray *fs = Expected(q->sub->next, curSy); if (Sets::Intersect(fs, &soFar)) - ResErr(q->sub, STRL("Warning: Resolver will never be evaluated. Place it at previous conflicting alternative.")); + ResErr(q->sub, _SC("Warning: Resolver will never be evaluated. Place it at previous conflicting alternative.")); if (!Sets::Intersect(fs, &expected)) - ResErr(q->sub, STRL("Warning: Misplaced resolver: no LL(1) conflict.")); + ResErr(q->sub, _SC("Warning: Misplaced resolver: no LL(1) conflict.")); delete fs; } else { BitArray *ba = Expected(q->sub, curSy); @@ -1161,12 +1161,12 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { bool bsi = Sets::Intersect(fs, fsNext); delete fs; delete fsNext; if (!bsi) - ResErr(p->sub, STRL("Warning: Misplaced resolver: no LL(1) conflict.")); + ResErr(p->sub, _SC("Warning: Misplaced resolver: no LL(1) conflict.")); } CheckRes(p->sub, true); } else if (p->typ == Node::rslv) { if (!rslvAllowed) - ResErr(p, STRL("Warning: Misplaced resolver: no alternative.")); + ResErr(p, _SC("Warning: Misplaced resolver: no alternative.")); } if (p->up) break; @@ -1192,7 +1192,7 @@ bool Tab::NtsComplete() { sym = nonterminals[i]; if (sym->graph == NULL) { complete = false; errors->count++; - wprintf(STRL(" No production for %ls\n"), sym->name); + wprintf(_SC(" No production for %ls\n"), sym->name); } } return complete; @@ -1225,7 +1225,7 @@ bool Tab::AllNtReached() { sym = nonterminals[i]; if (!((*visited)[sym->n])) { ok = false; errors->count++; - wprintf(STRL(" %ls cannot be reached\n"), sym->name); + wprintf(_SC(" %ls cannot be reached\n"), sym->name); } } return ok; @@ -1265,7 +1265,7 @@ bool Tab::AllNtToTerm() { sym = nonterminals[i]; if (!mark[sym->n]) { ok = false; errors->count++; - wprintf(STRL(" %ls cannot be derived to terminals\n"), sym->name); + wprintf(_SC(" %ls cannot be derived to terminals\n"), sym->name); } } return ok; @@ -1297,7 +1297,7 @@ void Tab::XRef() { } } // print cross reference list - fwprintf(trace, STRL("%s"), + fwprintf(trace, _SC("%s"), "\n" "Cross reference list:\n" "--------------------\n\n"); @@ -1305,7 +1305,7 @@ void Tab::XRef() { for (i=0; iname); - fwprintf(trace, STRL(" %12ls"), paddedName); + fwprintf(trace, _SC(" %12ls"), paddedName); coco_string_delete(paddedName); TArrayList *list = (TArrayList*)(xref.Get(sym)); int col = 14; @@ -1313,14 +1313,14 @@ void Tab::XRef() { for (j=0; jCount; j++) { line = (*list)[j]; if (col + 5 > 80) { - fputws(STRL("\n"), trace); - for (col = 1; col <= 14; col++) fputws(STRL(" "), trace); + fputws(_SC("\n"), trace); + for (col = 1; col <= 14; col++) fputws(_SC(" "), trace); } - fwprintf(trace, STRL("%5d"), line); col += 5; + fwprintf(trace, _SC("%5d"), line); col += 5; } - fputws(STRL("\n"), trace); + fputws(_SC("\n"), trace); } - fputws(STRL("\n\n"), trace); + fputws(_SC("\n\n"), trace); for(int i=0; i < xref.Count; ++i) { SortedEntry *se = xref[i]; /* @@ -1340,16 +1340,16 @@ void Tab::SetDDT(const wchar_t* s) { int len = coco_string_length(st); for (int i = 0; i < len; i++) { ch = st[i]; - if (CHL('0') <= ch && ch <= CHL('9')) ddt[ch - CHL('0')] = true; + if (_SC('0') <= ch && ch <= _SC('9')) ddt[ch - _SC('0')] = true; else switch (ch) { - case CHL('A') : ddt[0] = true; break; // trace automaton - case CHL('F') : ddt[1] = true; break; // list first/follow sets - case CHL('G') : ddt[2] = true; break; // print syntax graph - case CHL('I') : ddt[3] = true; break; // trace computation of first sets - case CHL('J') : ddt[4] = true; break; // print ANY and SYNC sets - case CHL('P') : ddt[8] = true; break; // print statistics - case CHL('S') : ddt[6] = true; break; // list symbol table - case CHL('X') : ddt[7] = true; break; // list cross reference table + case _SC('A') : ddt[0] = true; break; // trace automaton + case _SC('F') : ddt[1] = true; break; // list first/follow sets + case _SC('G') : ddt[2] = true; break; // print syntax graph + case _SC('I') : ddt[3] = true; break; // trace computation of first sets + case _SC('J') : ddt[4] = true; break; // print ANY and SYNC sets + case _SC('P') : ddt[8] = true; break; // print statistics + case _SC('S') : ddt[6] = true; break; // list symbol table + case _SC('X') : ddt[7] = true; break; // list cross reference table default : break; } } @@ -1368,10 +1368,10 @@ void Tab::SetOption(const wchar_t* s) { wchar_t *name = coco_string_create(s, 0, nameLenght); wchar_t *value = coco_string_create(s, valueIndex); - if (coco_string_equal(STRL("$namespace"), name)) { + if (coco_string_equal(_SC("$namespace"), name)) { if (nsName == NULL) nsName = coco_string_create(value); - } else if (coco_string_equal(STRL("$checkEOF"), name)) { - checkEOF = coco_string_equal(STRL("true"), value); + } else if (coco_string_equal(_SC("$checkEOF"), name)) { + checkEOF = coco_string_equal(_SC("true"), value); } delete [] name; From 079cfa1bdea8732335bb7c573d844dca85df7d26 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 13:52:59 +0200 Subject: [PATCH 40/95] Close to achieve build with and without wchar --- src/CharClass.cpp | 1 - src/CharSet.cpp | 2 -- src/CharSet.h | 1 + src/Comment.cpp | 1 - src/Comment.h | 2 +- src/DFA.cpp | 44 +++++++++++++++++------------------ src/DFA.h | 2 +- src/Generator.cpp | 13 +++++------ src/HashTable.cpp | 1 - src/HashTable.h | 2 +- src/Node.h | 2 +- src/Parser.cpp | 25 ++++++++++---------- src/Parser.frame | 25 ++++++++++---------- src/ParserGen.cpp | 39 +++++++++++++++---------------- src/ParserGen.h | 1 - src/Scanner.cpp | 15 ++++++------ src/Scanner.frame | 54 ++++++++++++++++++++++++++----------------- src/Scanner.h | 39 ++++++++++++++++++++----------- src/StringBuilder.cpp | 35 +++++++++++++++------------- src/StringBuilder.h | 5 ++-- src/Symbol.cpp | 1 - src/Symbol.h | 2 +- src/Tab.cpp | 48 ++++++++++++++++++-------------------- src/Tab.h | 4 ++-- 24 files changed, 189 insertions(+), 175 deletions(-) diff --git a/src/CharClass.cpp b/src/CharClass.cpp index eaf11f0..80fae96 100644 --- a/src/CharClass.cpp +++ b/src/CharClass.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "CharClass.h" -#include "Scanner.h" namespace Coco { diff --git a/src/CharSet.cpp b/src/CharSet.cpp index f9f8f0c..def632e 100644 --- a/src/CharSet.cpp +++ b/src/CharSet.cpp @@ -29,9 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include -#include #include "CharSet.h" -#include "Scanner.h" namespace Coco { diff --git a/src/CharSet.h b/src/CharSet.h index d549fea..0d54f10 100644 --- a/src/CharSet.h +++ b/src/CharSet.h @@ -30,6 +30,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define COCO_CHARSET_H__ #include +#include "Scanner.h" namespace Coco { diff --git a/src/Comment.cpp b/src/Comment.cpp index 88a5865..d425b1b 100644 --- a/src/Comment.cpp +++ b/src/Comment.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Comment.h" -#include "Scanner.h" namespace Coco { diff --git a/src/Comment.h b/src/Comment.h index d3ce323..66dc3f2 100644 --- a/src/Comment.h +++ b/src/Comment.h @@ -29,7 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_COMMENT_H__) #define COCO_COMMENT_H__ -#include +#include "Scanner.h" namespace Coco { diff --git a/src/DFA.cpp b/src/DFA.cpp index 0c31117..0d4f19d 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -28,12 +28,10 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include -#include #include "DFA.h" #include "Tab.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" #include "Generator.h" namespace Coco { @@ -46,14 +44,14 @@ static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) coco_swprintf(format, 10, _SC("%d\0"), (int) ch); else - coco_swprintf(format, 10, _SC("_SC('%lc')\0"), (int) ch); + coco_swprintf(format, 10, _SC("_SC('%") _CHFMT _SC("')\0"), (int) ch); return format; } static wchar_t* DFAChCond(wchar_t ch, wchar_t_20 &format) { wchar_t_10 fmt; wchar_t* res = DFACh(ch, fmt); - coco_swprintf(format, 20, _SC("ch == %ls\0"), res); + coco_swprintf(format, 20, _SC("ch == %") _SFMT _SC("\0"), res); return format; } @@ -62,14 +60,14 @@ void DFA::PutRange(CharSet *s) { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { wchar_t *from = DFACh((wchar_t) r->from, fmt1); - fwprintf(gen, _SC("ch == %ls"), from); + fwprintf(gen, _SC("ch == %") _SFMT, from); } else if (r->from == 0) { wchar_t *to = DFACh((wchar_t) r->to, fmt1); - fwprintf(gen, _SC("ch <= %ls"), to); + fwprintf(gen, _SC("ch <= %") _SFMT, to); } else { wchar_t *from = DFACh((wchar_t) r->from, fmt1); wchar_t *to = DFACh((wchar_t) r->to, fmt2); - fwprintf(gen, _SC("(ch >= %ls && ch <= %ls)"), from, to); + fwprintf(gen, _SC("(ch >= %") _SFMT _SC(" && ch <= %") _SFMT _SC(")"), from, to); } if (r->next != NULL) fputws(_SC(" || "), gen); } @@ -269,7 +267,7 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off wchar_t format[200]; - coco_swprintf(format, 200, _SC("tokens %ls and %ls cannot be distinguished"), sym->name, matchedSym->name); + coco_swprintf(format, 200, _SC("tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished"), sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; @@ -380,7 +378,7 @@ void DFA::PrintStates() { if (state->endOf == NULL) fputws(_SC(" "), trace); else { wchar_t *paddedName = tab->Name(state->endOf->name); - fwprintf(trace, _SC("E(%12s)"), paddedName); + fwprintf(trace, _SC("E(%12") _SFMT _SC(")"), paddedName); coco_string_delete(paddedName); } fwprintf(trace, _SC("%3d:"), state->nr); @@ -388,8 +386,8 @@ void DFA::PrintStates() { for (Action *action = state->firstAction; action != NULL; action = action->next) { if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); - if (action->typ == Node::clas) fwprintf(trace, _SC("%ls"), tab->classes[action->sym]->name); - else fwprintf(trace, _SC("%3s"), DFACh((wchar_t)action->sym, fmt)); + if (action->typ == Node::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); + else fwprintf(trace, _SC("%3") _SFMT, DFACh((wchar_t)action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, _SC("%3d"), targ->state->nr); } @@ -426,7 +424,7 @@ void DFA::GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, b endOf = t->state->endOf; } else { - wprintf(_SC("Tokens %ls and %ls cannot be distinguished\n"), endOf->name, t->state->endOf->name); + wprintf(_SC("Tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished\n"), endOf->name, t->state->endOf->name); errors->count++; } } @@ -517,7 +515,7 @@ void DFA::GenComBody(const Comment *com) { wchar_t_20 fmt; wchar_t* res = DFAChCond(com->stop[0], fmt); GenCommentIndented(imax, _SC("\t\t\tif (")); - fwprintf(gen, _SC("%ls) {\n"), res); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), res); if (imaxStop == 0) { fwprintf(gen, _SC("%s"), @@ -530,7 +528,7 @@ void DFA::GenComBody(const Comment *com) { currIndent = indent + sidx; GenCommentIndented(currIndent, _SC("\t\t\t\tNextCh();\n")); GenCommentIndented(currIndent, _SC("\t\t\t\tif (")); - fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->stop[sidx], fmt)); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->stop[sidx], fmt)); } currIndent = indent + imax; GenCommentIndented(currIndent, _SC("\t\t\tlevel--;\n")); @@ -543,7 +541,7 @@ void DFA::GenComBody(const Comment *com) { if (com->nested) { GenCommentIndented(imax, _SC("\t\t\t}")); wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, _SC(" else if (%ls) {\n"), res); + fwprintf(gen, _SC(" else if (%") _SFMT _SC(") {\n"), res); if (imaxStop == 0) fputws(_SC("\t\t\tlevel++; NextCh();\n"), gen); else { @@ -552,7 +550,7 @@ void DFA::GenComBody(const Comment *com) { int loopIndent = indent + sidx; GenCommentIndented(loopIndent, _SC("\t\t\t\tNextCh();\n")); GenCommentIndented(loopIndent, _SC("\t\t\t\tif (")); - fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->start[sidx], fmt)); } GenCommentIndented(indent + imax, _SC("\t\t\t\t\tlevel++; NextCh();\n")); for(int sidx = imax; sidx > 0; --sidx) { @@ -581,7 +579,7 @@ void DFA::GenComment(const Comment *com, int i) { } else { for(int sidx = 1; sidx <= imax; ++sidx) { GenCommentIndented(sidx, _SC("\tif (")); - fwprintf(gen, _SC("%ls) {\n"), DFAChCond(com->start[sidx], fmt)); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->start[sidx], fmt)); GenCommentIndented(sidx, _SC("\t\tNextCh();\n")); } GenComBody(com); @@ -632,7 +630,7 @@ void DFA::GenLiterals () { // write keyword, escape non printable characters for (int k = 0; name[k] != _SC('\0'); k++) { wchar_t c = name[k]; - fwprintf(gen, (c >= 32 && c <= 127) ? _SC("%lc") : _SC("\\x%04x"), c); + fwprintf(gen, (c >= 32 && c <= 127) ? _SC("%") _CHFMT : _SC("\\x%04x"), c); } fwprintf(gen, _SC("), %d);\n"), sym->n); @@ -653,7 +651,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("namespace %ls {\n"), curNs); + fwprintf(gen, _SC("namespace %") _SFMT _SC(" {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -704,7 +702,7 @@ void DFA::WriteState(const State *state) { fwprintf(gen, _SC("\t\t\tcase_%d:\n"), state->nr); if (endOf != NULL && state->firstAction != NULL) { - fwprintf(gen, _SC("\t\t\trecEnd = pos; recKind = %d /* %ls */;\n"), endOf->n, endOf->name); + fwprintf(gen, _SC("\t\t\trecEnd = pos; recKind = %d /* %") _SFMT _SC(" */;\n"), endOf->n, endOf->name); } bool ctxEnd = state->ctx; @@ -714,7 +712,7 @@ void DFA::WriteState(const State *state) { else fputws(_SC("\t\t\telse if ("), gen); if (action->typ == Node::chr) { wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); - fwprintf(gen, _SC("%ls"), res); + fwprintf(gen, _SC("%") _SFMT, res); } else PutRange(tab->CharClassSet(action->sym)); fputws(_SC(") {"), gen); @@ -740,7 +738,7 @@ void DFA::WriteState(const State *state) { if (endOf == NULL) { fputws(_SC("goto case_0;}\n"), gen); } else { - fwprintf(gen, _SC("t->kind = %d /* %ls */; "), endOf->n, endOf->name); + fwprintf(gen, _SC("t->kind = %d /* %") _SFMT _SC(" */; "), endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); @@ -854,7 +852,7 @@ void DFA::WriteScanner() { wchar_t_20 fmt; while (com != NULL) { wchar_t* res = DFAChCond(com->start[0], fmt); - fwprintf(gen, _SC("(%ls && Comment%d())"), res, cmdIdx); + fwprintf(gen, _SC("(%") _SFMT _SC(" && Comment%d())"), res, cmdIdx); if (com->next != NULL) { fputws(_SC(" || "), gen); } diff --git a/src/DFA.h b/src/DFA.h index a4a9646..e736528 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -30,7 +30,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_DFA_H__) #define COCO_DFA_H__ -#include +#include "Scanner.h" #include "Action.h" #include "Comment.h" #include "State.h" diff --git a/src/Generator.cpp b/src/Generator.cpp index 86652cd..4b9a3d7 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include "Generator.h" -#include "Scanner.h" namespace Coco { @@ -134,7 +133,7 @@ namespace Coco { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("%ls_"), curNs); + fwprintf(gen, _SC("%") _SFMT _SC("_"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; } while (startPos < len); @@ -158,23 +157,23 @@ namespace Coco { endOfStopString = coco_string_length(stop)-1; } - fwscanf(fram, _SC("%lc"), &ch); // fram.ReadByte(); + fwscanf(fram, _SC("%") _CHFMT, &ch); // fram.ReadByte(); while (!feof(fram)) { // ch != EOF if (stop != NULL && ch == startCh) { int i = 0; do { if (i == endOfStopString) return; // stop[0..i] found - fwscanf(fram, _SC("%lc"), &ch); i++; + fwscanf(fram, _SC("%") _CHFMT, &ch); i++; } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { wchar_t *subStop = coco_string_create(stop, 0, i); - fwprintf(gen, _SC("%ls"), subStop); + fwprintf(gen, _SC("%") _SFMT, subStop); coco_string_delete(subStop); } } else { - if (generateOutput) { fwprintf(gen, _SC("%lc"), ch); } - fwscanf(fram, _SC("%lc"), &ch); + if (generateOutput) { fwprintf(gen, _SC("%") _CHFMT, ch); } + fwscanf(fram, _SC("%") _CHFMT, &ch); } } if (stop != NULL) { diff --git a/src/HashTable.cpp b/src/HashTable.cpp index 012cf4a..f49f046 100644 --- a/src/HashTable.cpp +++ b/src/HashTable.cpp @@ -29,7 +29,6 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include "HashTable.h" -#include "Scanner.h" namespace Coco { diff --git a/src/HashTable.h b/src/HashTable.h index 2d44cf0..454c70e 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -29,7 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_HASHTABLE_H__) #define COCO_HASHTABLE_H__ -#include +#include "Scanner.h" namespace Coco { diff --git a/src/Node.h b/src/Node.h index 6367b3c..34cc11a 100644 --- a/src/Node.h +++ b/src/Node.h @@ -31,8 +31,8 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include "Position.h" -#include "State.h" #include "Scanner.h" +#include "State.h" namespace Coco { diff --git a/src/Parser.cpp b/src/Parser.cpp index aa75d75..69cb47f 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -27,9 +27,8 @@ Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ -#include -#include "Parser.h" #include "Scanner.h" +#include "Parser.h" namespace Coco { @@ -1279,25 +1278,25 @@ void Errors::SynErr(int line, int col, int n) { } break; } - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(_SC("%ls\n"), s); + wprintf(_SC("%") _SFMT _SC("\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(_SC("%ls"), s); + wprintf(_SC("%") _SFMT _SC(""), s); exit(1); } @@ -1308,7 +1307,7 @@ static void printIndent(int n) { } SynTree::~SynTree() { - //wprintf(_SC("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -1332,18 +1331,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/Parser.frame b/src/Parser.frame index 38133bf..5f71d2b 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -121,9 +121,8 @@ Parser.cpp Specification -->begin -#include -#include "Parser.h" #include "Scanner.h" +#include "Parser.h" -->namespace_open @@ -354,25 +353,25 @@ void Errors::SynErr(int line, int col, int n) { } break; } - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %ls\n"), line, col, s); + wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(_SC("%ls\n"), s); + wprintf(_SC("%") _SFMT _SC("\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(_SC("%ls"), s); + wprintf(_SC("%") _SFMT _SC(""), s); exit(1); } @@ -383,7 +382,7 @@ static void printIndent(int n) { } SynTree::~SynTree() { - //wprintf(_SC("Token %ls : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); delete tok; for(int i=0; icol) { printIndent(indent); - wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); @@ -407,18 +406,18 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { int last_idx = children.Count; if(tok->col) { printIndent(indent); - wprintf(_SC("%s\t%d\t%d\t%d\t%ls\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { if(((SynTree*)children[0])->tok->kind < maxT) { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } } else { printIndent(indent); - wprintf(_SC("%d\t%d\t%d\t%ls\n"), children.Count, tok->line, tok->kind, tok->val); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } } if(last_idx) { diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index f848755..b47de34 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -31,7 +31,6 @@ Coco/R itself) does not fall under the GNU General Public License. #include "ParserGen.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" #include "Generator.h" namespace Coco { @@ -71,7 +70,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("namespace %ls {\n"), curNs); + fwprintf(gen, _SC("namespace %") _SFMT _SC(" {\n"), curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { @@ -94,7 +93,7 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { if (pos != NULL) { buffer->SetPos(pos->beg); ch = buffer->Read(); if (tab->emitLines && pos->line) { - fwprintf(gen, _SC("\n#line %d \"%ls\"\n"), pos->line, tab->srcName); + fwprintf(gen, _SC("\n#line %d \"%") _SFMT _SC("\"\n"), pos->line, tab->srcName); } Indent(indent); while (buffer->GetPos() <= pos->end) { @@ -108,7 +107,7 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { } if (buffer->GetPos() > pos->end) goto done; } - fwprintf(gen, _SC("%lc"), ch); + fwprintf(gen, _SC("%") _CHFMT, ch); ch = buffer->Read(); } done: @@ -125,18 +124,18 @@ void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { if (errTyp == tErr) { if (sym->name[0] == _SC('"')) { wchar_t *se = tab->Escape(sym->name); - coco_swprintf(format, formatLen, _SC("%ls expected"), se); + coco_swprintf(format, formatLen, _SC("%") _SFMT _SC(" expected"), se); coco_string_merge(err, format); coco_string_delete(se); } else { - coco_swprintf(format, formatLen, _SC("%ls expected"), sym->name); + coco_swprintf(format, formatLen, _SC("%") _SFMT _SC(" expected"), sym->name); coco_string_merge(err, format); } } else if (errTyp == altErr) { - coco_swprintf(format, formatLen, _SC("invalid %ls"), sym->name); + coco_swprintf(format, formatLen, _SC("invalid %") _SFMT, sym->name); coco_string_merge(err, format); } else if (errTyp == syncErr) { - coco_swprintf(format, formatLen, _SC("this symbol not expected in %ls"), sym->name); + coco_swprintf(format, formatLen, _SC("this symbol not expected in %") _SFMT, sym->name); coco_string_merge(err, format); } coco_swprintf(format, formatLen, _SC("\"); break;\n")); @@ -189,7 +188,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { while (p != NULL) { if (p->typ == Node::nt) { Indent(indent); - fwprintf(gen, _SC("%ls("), p->sym->name); + fwprintf(gen, _SC("%") _SFMT _SC("("), p->sym->name); CopySourcePart(p->pos, 0); fputws(_SC(");\n"), gen); } else if (p->typ == Node::t) { @@ -329,7 +328,7 @@ void ParserGen::GenTokensHeader() { if (isFirst) { isFirst = false; } else { fputws(_SC(",\n"), gen); } - fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); } // pragmas @@ -338,7 +337,7 @@ void ParserGen::GenTokensHeader() { else { fputws(_SC(",\n"), gen); } sym = tab->pragmas[i]; - fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); } fputws(_SC("\n\t};\n"), gen); @@ -351,7 +350,7 @@ void ParserGen::GenTokensHeader() { if (isFirst) { isFirst = false; } else { fputws(_SC(",\n"), gen); } - fwprintf(gen , _SC("\t\t_%ls=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); } fputws(_SC("\n\t};\n#endif\n"), gen); @@ -371,9 +370,9 @@ void ParserGen::GenCodePragmas() { void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { - fwprintf(gen, _SC("%d /* %ls */"), sym->n, sym->name); + fwprintf(gen, _SC("%d /* %") _SFMT _SC(" */"), sym->n, sym->name); } else { - fwprintf(gen, _SC("_%ls"), sym->name); + fwprintf(gen, _SC("_%") _SFMT, sym->name); } } @@ -382,7 +381,7 @@ void ParserGen::GenProductionsHeader() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, _SC("\tvoid %ls("), sym->name); + fwprintf(gen, _SC("\tvoid %") _SFMT _SC("("), sym->name); CopySourcePart(sym->attrPos, 0); fputws(_SC(");\n"), gen); } @@ -394,14 +393,14 @@ void ParserGen::GenProductions() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, _SC("void Parser::%ls("), sym->name); + fwprintf(gen, _SC("void Parser::%") _SFMT _SC("("), sym->name); CopySourcePart(sym->attrPos, 0); fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 2); fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); - if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%ls; ntTok->line = 0; ntTok->val = coco_string_create(\"%ls\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); + if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%") _SFMT _SC("; ntTok->line = 0; ntTok->val = coco_string_create(\"%") _SFMT _SC("\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); else { - fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%ls, \"%ls\", la->line);\n"), sym->name, sym->name); + fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%") _SFMT _SC(", \"%") _SFMT _SC("\", la->line);\n"), sym->name, sym->name); } fputws(_SC("#endif\n"), gen); ba.SetAll(false); @@ -482,11 +481,11 @@ void ParserGen::WriteParser () { g.CopyFramePart(_SC("-->pragmas")); GenCodePragmas(); g.CopyFramePart(_SC("-->productions")); GenProductions(); - g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%ls();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); + g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%") _SFMT _SC("();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); g.CopyFramePart(_SC("-->constants")); fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count-1); g.CopyFramePart(_SC("-->initialization")); InitSets(); - g.CopyFramePart(_SC("-->errors")); fwprintf(gen, _SC("%ls"), err); + g.CopyFramePart(_SC("-->errors")); fwprintf(gen, _SC("%") _SFMT, err); g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); diff --git a/src/ParserGen.h b/src/ParserGen.h index 24c7514..e1e82ab 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -33,7 +33,6 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Position.h" #include "Tab.h" #include "Symbol.h" -#include "Scanner.h" #include "DFA.h" namespace Coco { diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 0bf9f43..f500e21 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -210,6 +210,7 @@ unsigned int coco_string_hash(const wchar_t *data, size_t size) { return h; } +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char* value) { @@ -221,6 +222,12 @@ wchar_t* coco_string_create(const char* value) { return data; } +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; @@ -229,12 +236,6 @@ char* coco_string_create_char(const wchar_t *value) { return res; } -void coco_string_delete(char* &data) { - delete [] data; - data = NULL; -} - - Token::Token() { kind = 0; pos = 0; @@ -452,7 +453,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(_SC("--- Cannot open file %ls\n"), fileName); + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), fileName); exit(1); } coco_string_delete(chFileName); diff --git a/src/Scanner.frame b/src/Scanner.frame index 5da2fa9..cd9a95d 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -40,7 +40,6 @@ Scanner.h Specification #include #include #include -#include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 @@ -52,33 +51,41 @@ Scanner.h Specification #ifdef WITHOUT_WCHAR #define wchar_t char -#define SFMT_LCHR "%c" -#define SFMT_SLCHR "%c" -#define SFMT_LSTR "%s" -#define SFMT_SLSTR "%s" -#define SFMT_LS "s" +#define _CHFMT "c" +#define _SFMT "s" #define _SC(s) s #define fputws fputs #define wprintf printf #define swprintf snprintf #define fwprintf fprintf #define fwscanf fscanf -#define swscanf scanf +#define swscanf sscanf #define wcslen strlen #define wcscpy strcpy #define wcsncpy strncpy #define wcscmp strcmp +#define wcsncmp strncmp #define wcschr strchr #define wcsrchr strrchr +#define wcscasecmp strcasecmp +#define wcsncasecmp strncasecmp + +#if _MSC_VER >= 1400 +#define coco_swprintf printf_s +#elif _MSC_VER >= 1300 +#define coco_swprintf _snprintf +#elif defined __MINGW32__ +#define coco_swprintf _snprintf +#else +// assume every other compiler knows sprintf +#define coco_swprintf snprintf +#endif + #else #include -#define SFMT_LSTR "%ls" -#define SFMT_SLSTR L"%ls" -#define SFMT_LS "ls" -#define SFMT_LCHR "%lc" -#define SFMT_SLCHR L"%lc" +#define _CHFMT L"lc" +#define _SFMT L"ls" #define _SC(s) L##s -#endif #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s @@ -91,6 +98,8 @@ Scanner.h Specification #define coco_swprintf swprintf #endif +#endif + #define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) @@ -122,10 +131,12 @@ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); unsigned int coco_string_hash(const wchar_t* data, size_t size); +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char *value); -char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +#endif +char* coco_string_create_char(const wchar_t *value); template class TArrayList @@ -599,6 +610,7 @@ unsigned int coco_string_hash(const wchar_t *data, size_t size) { return h; } +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char* value) { @@ -610,6 +622,12 @@ wchar_t* coco_string_create(const char* value) { return data; } +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; @@ -618,12 +636,6 @@ char* coco_string_create_char(const wchar_t *value) { return res; } -void coco_string_delete(char* &data) { - delete [] data; - data = NULL; -} - - Token::Token() { kind = 0; pos = 0; @@ -841,7 +853,7 @@ Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(_SC("--- Cannot open file %ls\n"), fileName); + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), fileName); exit(1); } coco_string_delete(chFileName); diff --git a/src/Scanner.h b/src/Scanner.h index f9eae99..a764b56 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -35,7 +35,6 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include -#include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 @@ -47,33 +46,41 @@ Coco/R itself) does not fall under the GNU General Public License. #ifdef WITHOUT_WCHAR #define wchar_t char -#define SFMT_LCHR "%c" -#define SFMT_SLCHR "%c" -#define SFMT_LSTR "%s" -#define SFMT_SLSTR "%s" -#define SFMT_LS "s" +#define _CHFMT "c" +#define _SFMT "s" #define _SC(s) s #define fputws fputs #define wprintf printf #define swprintf snprintf #define fwprintf fprintf #define fwscanf fscanf -#define swscanf scanf +#define swscanf sscanf #define wcslen strlen #define wcscpy strcpy #define wcsncpy strncpy #define wcscmp strcmp +#define wcsncmp strncmp #define wcschr strchr #define wcsrchr strrchr +#define wcscasecmp strcasecmp +#define wcsncasecmp strncasecmp + +#if _MSC_VER >= 1400 +#define coco_swprintf printf_s +#elif _MSC_VER >= 1300 +#define coco_swprintf _snprintf +#elif defined __MINGW32__ +#define coco_swprintf _snprintf +#else +// assume every other compiler knows sprintf +#define coco_swprintf snprintf +#endif + #else #include -#define SFMT_LSTR "%ls" -#define SFMT_SLSTR L"%ls" -#define SFMT_LS "ls" -#define SFMT_LCHR "%lc" -#define SFMT_SLCHR L"%lc" +#define _CHFMT L"lc" +#define _SFMT L"ls" #define _SC(s) L##s -#endif #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s @@ -86,6 +93,8 @@ Coco/R itself) does not fall under the GNU General Public License. #define coco_swprintf swprintf #endif +#endif + #define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) @@ -118,10 +127,12 @@ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); unsigned int coco_string_hash(const wchar_t* data, size_t size); +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char *value); -char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +#endif +char* coco_string_create_char(const wchar_t *value); template class TArrayList diff --git a/src/StringBuilder.cpp b/src/StringBuilder.cpp index 441500b..ea6b985 100644 --- a/src/StringBuilder.cpp +++ b/src/StringBuilder.cpp @@ -28,13 +28,12 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include "StringBuilder.h" -#include "Scanner.h" namespace Coco { void StringBuilder::Init(int capacity) { length = 0; - this->capacity = capacity; + this->_capacity = capacity; data = new wchar_t[capacity + 1]; data[0] = 0; } @@ -44,8 +43,7 @@ StringBuilder::StringBuilder(int capacity) { } StringBuilder::StringBuilder(const wchar_t *val) { - capacity = length = wcslen(val); - Init(capacity); + Init(wcslen(val)); wcscpy(data, val); } @@ -54,18 +52,21 @@ StringBuilder::~StringBuilder() { delete [] data; data = NULL; length = 0; - capacity = 0; + _capacity = 0; } } +void StringBuilder::capacity(int new_capacity) { + wchar_t *nData = new wchar_t[new_capacity + 1]; + memcpy(nData, data, _capacity * sizeof(wchar_t)); + delete [] data; + data = nData; + _capacity = new_capacity; +} + void StringBuilder::Append(const wchar_t value) { - if (length == capacity) { - int oldCap = capacity; - capacity = capacity * 2; - wchar_t *nData = new wchar_t[capacity + 1]; - memcpy(nData, data, oldCap * sizeof(int)); - delete [] data; - data = nData; + if (length == _capacity) { + capacity(_capacity * 2); } data[length] = value; @@ -74,10 +75,12 @@ void StringBuilder::Append(const wchar_t value) { } void StringBuilder::Append(const wchar_t *value) { - if (length + (int)wcslen(value) < capacity) { - wcscpy(data + length, value); - length += wcslen(value); - } + int slen = (int)wcslen(value); + if (length + slen >= _capacity) { + capacity(length + slen + 1); + } + wcscpy(data + length, value); + length += slen; } diff --git a/src/StringBuilder.h b/src/StringBuilder.h index 35c8cd4..8928e3c 100644 --- a/src/StringBuilder.h +++ b/src/StringBuilder.h @@ -1,7 +1,7 @@ #if !defined(COCO_STRINGBUILDER_H__) #define COCO_STRINGBUILDER_H__ -#include +#include "Scanner.h" namespace Coco { @@ -14,13 +14,14 @@ class StringBuilder virtual ~StringBuilder(); void Append(const wchar_t val); void Append(const wchar_t *val); + void capacity(int new_capacity); wchar_t* ToString(); int GetLength() { return length; }; private: void Init(int capacity); wchar_t *data; - int capacity; + int _capacity; int length; }; diff --git a/src/Symbol.cpp b/src/Symbol.cpp index ce3b682..9364ec7 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Symbol.h" -#include "Scanner.h" #include "BitArray.h" namespace Coco { diff --git a/src/Symbol.h b/src/Symbol.h index e6b5e16..cea13f8 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -29,8 +29,8 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_SYMBOL_H__) #define COCO_SYMBOL_H__ -#include "Position.h" #include "Scanner.h" +#include "Position.h" namespace Coco { diff --git a/src/Tab.cpp b/src/Tab.cpp index 223eb86..4ca84d3 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -27,11 +27,9 @@ If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ -#include #include "Tab.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" namespace Coco { @@ -115,7 +113,7 @@ int Tab::Num(const Node *p) { void Tab::PrintSym(const Symbol *sym) { wchar_t *paddedName = Name(sym->name); - fwprintf(trace, _SC("%3d %14s %s"), sym->n, paddedName, nTyp[sym->typ]); + fwprintf(trace, _SC("%3d %14") _SFMT _SC(" %s"), sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); @@ -157,7 +155,7 @@ void Tab::PrintSymbolTable() { Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - fwprintf(trace, _SC("_%ls = %ls.\n"), ((Symbol*) (e->val))->name, e->key); + fwprintf(trace, _SC("_%") _SFMT _SC(" = %") _SFMT _SC(".\n"), ((Symbol*) (e->val))->name, e->key); } delete iter; fputws(_SC("\n"), trace); @@ -175,7 +173,7 @@ void Tab::PrintSet(const BitArray *s, int indent) { fputws(_SC("\n"), trace); for (col = 1; col < indent; col++) fputws(_SC(" "), trace); } - fwprintf(trace, _SC("%ls "), sym->name); + fwprintf(trace, _SC("%") _SFMT _SC(" "), sym->name); col += len + 1; } } @@ -368,18 +366,18 @@ void Tab::PrintNodes() { fwprintf(trace, _SC("%4d %s "), p->n, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); - fwprintf(trace, _SC("%12s "), paddedName); + fwprintf(trace, _SC("%12") _SFMT _SC(" "), paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { CharClass *c = classes[p->val]; wchar_t *paddedName = Name(c->name); - fwprintf(trace, _SC("%12s "), paddedName); + fwprintf(trace, _SC("%12") _SFMT _SC(" "), paddedName); coco_string_delete(paddedName); } else fputws(_SC(" "), trace); fwprintf(trace, _SC("%5d "), Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { - fwprintf(trace, _SC(" %5s"), TabPos(p->pos, format)); + fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); } if (p->typ == Node::chr) { fwprintf(trace, _SC("%5d %5d "), p->val, p->code); } if (p->typ == Node::clas) { @@ -387,7 +385,7 @@ void Tab::PrintNodes() { } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { fwprintf(trace, _SC("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); } if (p->typ == Node::sem) { - fwprintf(trace, _SC(" %5s"), TabPos(p->pos, format)); + fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { fwprintf(trace, _SC(" ")); } @@ -444,7 +442,7 @@ wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { coco_swprintf(format, 10, _SC("%d"), ch); return format; } else { - coco_swprintf(format, 10, _SC("'%lc'"), ch); + coco_swprintf(format, 10, _SC("'%") _CHFMT _SC("'"), ch); return format; } } @@ -455,11 +453,11 @@ void Tab::WriteCharSet(const CharSet *s) { if (r->from < r->to) { wchar_t *from = TabCh(r->from, fmt1); wchar_t *to = TabCh(r->to, fmt2); - fwprintf(trace, _SC("%ls .. %ls "), from, to); + fwprintf(trace, _SC("%") _SFMT _SC(" .. %") _SFMT _SC(" "), from, to); } else { wchar_t *from = TabCh(r->from, fmt1); - fwprintf(trace, _SC("%ls "), from); + fwprintf(trace, _SC("%") _SFMT _SC(" "), from); } } } @@ -756,7 +754,7 @@ void Tab::CompDeletableSymbols() { for (i=0; ideletable) - wprintf(_SC(" %ls deletable\n"), sym->name); + wprintf(_SC(" %") _SFMT _SC(" deletable\n"), sym->name); } } @@ -784,7 +782,7 @@ void Tab::CompSymbolSets() { Symbol *sym; for (int i=0; iname); + fwprintf(trace, _SC("%") _SFMT _SC("\n"), sym->name); fputws(_SC("first: "), trace); PrintSet(sym->first, 10); fputws(_SC("follow: "), trace); PrintSet(sym->follow, 10); fputws(_SC("\n"), trace); @@ -977,7 +975,7 @@ bool Tab::NoCircularProductions() { for (i=0; icount++; - wprintf(_SC(" %ls --> %ls"), n->left->name, n->right->name); + wprintf(_SC(" %") _SFMT _SC(" --> %") _SFMT, n->left->name, n->right->name); } for(int i=0; iname, curSy->line, curSy->col); - if (sym != NULL) wprintf(_SC("%ls is "), sym->name); + wprintf(_SC(" LL1 warning in %") _SFMT _SC(":%d:%d: "), curSy->name, curSy->line, curSy->col); + if (sym != NULL) wprintf(_SC("%") _SFMT _SC(" is "), sym->name); switch (cond) { case 1: wprintf(_SC("%s"), "start of several alternatives\n"); break; case 2: wprintf(_SC("%s"), "start & successor of deletable structure\n"); break; @@ -1014,14 +1012,14 @@ int Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { /* print the path for first set that contains token tok for the graph rooted at p */ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { while (p != NULL) { - //if(p->sym) wprintf(_SC("%ls-> %ls:%d:\n", indent, p->sym->name, p->sym->line)); + //if(p->sym) wprintf(_SC("%") _SFMT _SC("-> %") _SFMT _SC(":%d:\n", indent, p->sym->name, p->sym->line)); switch (p->typ) { case Node::nt: { if (p->sym->firstReady) { if(p->sym->first->Get(tok)) { if(coco_string_length(indent) == 1) - wprintf(_SC("%ls=> %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); - wprintf(_SC("%ls-> %ls:%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); + wprintf(_SC("%") _SFMT _SC("=> %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(_SC("%") _SFMT _SC("-> %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); if(p->sym->graph) { wchar_t *new_indent = coco_string_create_append(indent, _SC(" ")); PrintFirstPath(p->sym->graph, tok, new_indent); @@ -1034,7 +1032,7 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { } case Node::t: case Node::wt: { if(p->sym->n == tok) - wprintf(_SC("%ls= %ls:%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(_SC("%") _SFMT _SC("= %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->line, p->col); break; } case Node::any: { @@ -1192,7 +1190,7 @@ bool Tab::NtsComplete() { sym = nonterminals[i]; if (sym->graph == NULL) { complete = false; errors->count++; - wprintf(_SC(" No production for %ls\n"), sym->name); + wprintf(_SC(" No production for %") _SFMT _SC("\n"), sym->name); } } return complete; @@ -1225,7 +1223,7 @@ bool Tab::AllNtReached() { sym = nonterminals[i]; if (!((*visited)[sym->n])) { ok = false; errors->count++; - wprintf(_SC(" %ls cannot be reached\n"), sym->name); + wprintf(_SC(" %") _SFMT _SC(" cannot be reached\n"), sym->name); } } return ok; @@ -1265,7 +1263,7 @@ bool Tab::AllNtToTerm() { sym = nonterminals[i]; if (!mark[sym->n]) { ok = false; errors->count++; - wprintf(_SC(" %ls cannot be derived to terminals\n"), sym->name); + wprintf(_SC(" %") _SFMT _SC(" cannot be derived to terminals\n"), sym->name); } } return ok; @@ -1305,7 +1303,7 @@ void Tab::XRef() { for (i=0; iname); - fwprintf(trace, _SC(" %12ls"), paddedName); + fwprintf(trace, _SC(" %12") _SFMT, paddedName); coco_string_delete(paddedName); TArrayList *list = (TArrayList*)(xref.Get(sym)); int col = 14; diff --git a/src/Tab.h b/src/Tab.h index 129fac7..b1caa84 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -30,10 +30,10 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_TAB_H__) #define COCO_TAB_H__ +#include "Scanner.h" #include "HashTable.h" #include "StringBuilder.h" #include "SortedList.h" -#include "Scanner.h" #include "Position.h" #include "Symbol.h" #include "Node.h" @@ -204,7 +204,7 @@ class Tab { void LL1Error(int cond, const Symbol *sym); int CheckOverlap(const BitArray *s1, const BitArray *s2, int cond); - void PrintFirstPath(const Node *p, int tok, const wchar_t *indent=L"\t"); + void PrintFirstPath(const Node *p, int tok, const wchar_t *indent=_SC("\t")); int CheckAlts(Node *p); void CheckLL1(); From 5184aadf2942a3765b6c29993392fae2d02b5b35 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 15:35:24 +0200 Subject: [PATCH 41/95] Fix the scanner generation to work without wchar_t --- src/DFA.cpp | 16 ++++++++-------- src/Scanner.cpp | 4 ++-- src/Scanner.frame | 12 ++++++++---- src/Scanner.h | 12 ++++++++---- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 0d4f19d..823b2c8 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -40,7 +40,7 @@ typedef wchar_t wchar_t_10[10]; typedef wchar_t wchar_t_20[20]; //---------- Output primitives -static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { +static wchar_t* DFACh(int ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) coco_swprintf(format, 10, _SC("%d\0"), (int) ch); else @@ -48,7 +48,7 @@ static wchar_t* DFACh(wchar_t ch, wchar_t_10 &format) { return format; } -static wchar_t* DFAChCond(wchar_t ch, wchar_t_20 &format) { +static wchar_t* DFAChCond(int ch, wchar_t_20 &format) { wchar_t_10 fmt; wchar_t* res = DFACh(ch, fmt); coco_swprintf(format, 20, _SC("ch == %") _SFMT _SC("\0"), res); @@ -59,14 +59,14 @@ void DFA::PutRange(CharSet *s) { wchar_t_10 fmt1, fmt2; for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { - wchar_t *from = DFACh((wchar_t) r->from, fmt1); + wchar_t *from = DFACh(r->from, fmt1); fwprintf(gen, _SC("ch == %") _SFMT, from); } else if (r->from == 0) { - wchar_t *to = DFACh((wchar_t) r->to, fmt1); + wchar_t *to = DFACh(r->to, fmt1); fwprintf(gen, _SC("ch <= %") _SFMT, to); } else { - wchar_t *from = DFACh((wchar_t) r->from, fmt1); - wchar_t *to = DFACh((wchar_t) r->to, fmt2); + wchar_t *from = DFACh(r->from, fmt1); + wchar_t *to = DFACh(r->to, fmt2); fwprintf(gen, _SC("(ch >= %") _SFMT _SC(" && ch <= %") _SFMT _SC(")"), from, to); } if (r->next != NULL) fputws(_SC(" || "), gen); @@ -387,7 +387,7 @@ void DFA::PrintStates() { if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); if (action->typ == Node::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); - else fwprintf(trace, _SC("%3") _SFMT, DFACh((wchar_t)action->sym, fmt)); + else fwprintf(trace, _SC("%3") _SFMT, DFACh(action->sym, fmt)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, _SC("%3d"), targ->state->nr); } @@ -711,7 +711,7 @@ void DFA::WriteState(const State *state) { if (action == state->firstAction) fputws(_SC("\t\t\tif ("), gen); else fputws(_SC("\t\t\telse if ("), gen); if (action->typ == Node::chr) { - wchar_t* res = DFAChCond((wchar_t)action->sym, fmt); + wchar_t* res = DFAChCond(action->sym, fmt); fwprintf(gen, _SC("%") _SFMT, res); } else PutRange(tab->CharClassSet(action->sym)); fputws(_SC(") {"), gen); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index f500e21..4eb67a0 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -725,7 +725,7 @@ Token* Scanner::NextToken() { case_4: {t->kind = 4 /* badString */; loopState = false; break;} case 5: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('&')) || (ch >= _SC('(') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 65535)) {AddCh(); goto case_6;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('&')) || (ch >= _SC('(') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} else {goto case_0;} case 6: @@ -756,7 +756,7 @@ Token* Scanner::NextToken() { else {t->kind = 44 /* optionSym */; loopState = false; break;} case 12: case_12: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 65535)) {AddCh(); goto case_12;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_12;} else if (ch == 10 || ch == 13) {AddCh(); goto case_4;} else if (ch == _SC('"')) {AddCh(); goto case_3;} else if (ch == 92) {AddCh(); goto case_14;} diff --git a/src/Scanner.frame b/src/Scanner.frame index cd9a95d..a1fedf1 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -47,7 +47,7 @@ Scanner.h Specification #include #endif -//#define WITHOUT_WCHAR +#define WITHOUT_WCHAR #ifdef WITHOUT_WCHAR #define wchar_t char @@ -71,7 +71,7 @@ Scanner.h Specification #define wcsncasecmp strncasecmp #if _MSC_VER >= 1400 -#define coco_swprintf printf_s +#define coco_swprintf snprintf_s #elif _MSC_VER >= 1300 #define coco_swprintf _snprintf #elif defined __MINGW32__ @@ -81,6 +81,8 @@ Scanner.h Specification #define coco_swprintf snprintf #endif +#define COCO_WCHAR_MAX 255 + #else #include #define _CHFMT L"lc" @@ -98,9 +100,10 @@ Scanner.h Specification #define coco_swprintf swprintf #endif +#define COCO_WCHAR_MAX 65535 + #endif -#define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) @@ -211,7 +214,8 @@ public: T operator[](tsize_t index) { if (0<=index && index #endif -//#define WITHOUT_WCHAR +#define WITHOUT_WCHAR #ifdef WITHOUT_WCHAR #define wchar_t char @@ -66,7 +66,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define wcsncasecmp strncasecmp #if _MSC_VER >= 1400 -#define coco_swprintf printf_s +#define coco_swprintf snprintf_s #elif _MSC_VER >= 1300 #define coco_swprintf _snprintf #elif defined __MINGW32__ @@ -76,6 +76,8 @@ Coco/R itself) does not fall under the GNU General Public License. #define coco_swprintf snprintf #endif +#define COCO_WCHAR_MAX 255 + #else #include #define _CHFMT L"lc" @@ -93,9 +95,10 @@ Coco/R itself) does not fall under the GNU General Public License. #define coco_swprintf swprintf #endif +#define COCO_WCHAR_MAX 65535 + #endif -#define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) @@ -207,7 +210,8 @@ class TArrayList T operator[](tsize_t index) { if (0<=index && index Date: Sat, 5 Jun 2021 16:04:19 +0200 Subject: [PATCH 42/95] Fix other places that can cause trouble when compiling without char_t --- src/DFA.cpp | 2 +- src/DFA.h | 2 +- src/Scanner.cpp | 6 +++--- src/Scanner.frame | 12 ++++++------ src/Scanner.h | 6 +++--- src/StringBuilder.cpp | 2 +- src/StringBuilder.h | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 823b2c8..b5d984d 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -400,7 +400,7 @@ void DFA::PrintStates() { //---------------------------- actions -------------------------------- -Action* DFA::FindAction(const State *state, wchar_t ch) { +Action* DFA::FindAction(const State *state, int ch) { for (Action *a = state->firstAction; a != NULL; a = a->next) if (a->typ == Node::chr && ch == a->sym) return a; else if (a->typ == Node::clas) { diff --git a/src/DFA.h b/src/DFA.h index e736528..20864fa 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -97,7 +97,7 @@ class DFA void CheckLabels(); //---------------------------- actions -------------------------------- - Action* FindAction(const State *state, wchar_t ch); + Action* FindAction(const State *state, int ch); void GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); //------------------------- melted states ------------------------------ diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 4eb67a0..89296ed 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -125,7 +125,7 @@ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { return data; } -wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); @@ -150,14 +150,14 @@ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } -int coco_string_indexof(const wchar_t* data, const wchar_t value) { +int coco_string_indexof(const wchar_t* data, const int value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } -int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { +int coco_string_lastindexof(const wchar_t* data, const int value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } diff --git a/src/Scanner.frame b/src/Scanner.frame index a1fedf1..5caccc9 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -119,12 +119,12 @@ wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); -wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); +wchar_t* coco_string_create_append(const wchar_t* data, const int value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); -int coco_string_indexof(const wchar_t* data, const wchar_t value); -int coco_string_lastindexof(const wchar_t* data, const wchar_t value); +int coco_string_indexof(const wchar_t* data, const int value); +int coco_string_lastindexof(const wchar_t* data, const int value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); @@ -529,7 +529,7 @@ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { return data; } -wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); @@ -554,14 +554,14 @@ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } -int coco_string_indexof(const wchar_t* data, const wchar_t value) { +int coco_string_indexof(const wchar_t* data, const int value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } -int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { +int coco_string_lastindexof(const wchar_t* data, const int value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } diff --git a/src/Scanner.h b/src/Scanner.h index 90a3881..7c96306 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -115,12 +115,12 @@ wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); -wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); +wchar_t* coco_string_create_append(const wchar_t* data, const int value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); -int coco_string_indexof(const wchar_t* data, const wchar_t value); -int coco_string_lastindexof(const wchar_t* data, const wchar_t value); +int coco_string_indexof(const wchar_t* data, const int value); +int coco_string_lastindexof(const wchar_t* data, const int value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); diff --git a/src/StringBuilder.cpp b/src/StringBuilder.cpp index ea6b985..038519f 100644 --- a/src/StringBuilder.cpp +++ b/src/StringBuilder.cpp @@ -64,7 +64,7 @@ void StringBuilder::capacity(int new_capacity) { _capacity = new_capacity; } -void StringBuilder::Append(const wchar_t value) { +void StringBuilder::Append(const int value) { if (length == _capacity) { capacity(_capacity * 2); } diff --git a/src/StringBuilder.h b/src/StringBuilder.h index 8928e3c..6367f1a 100644 --- a/src/StringBuilder.h +++ b/src/StringBuilder.h @@ -12,7 +12,7 @@ class StringBuilder StringBuilder(const wchar_t *val); virtual ~StringBuilder(); - void Append(const wchar_t val); + void Append(const int val); void Append(const wchar_t *val); void capacity(int new_capacity); wchar_t* ToString(); From a262593ae6079903cf5ccf15927cb76ec18c3356 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 16:57:15 +0200 Subject: [PATCH 43/95] Fix AST generation to work with and without wchar_t --- src/Parser.cpp | 38 +++++++++++++++++++------------------- src/Parser.frame | 4 ++-- src/Parser.h | 2 +- src/ParserGen.cpp | 4 ++-- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/Parser.cpp b/src/Parser.cpp index 69cb47f..c2487ec 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -41,7 +41,7 @@ void Parser::AstAddTerminal() { ast_stack.Top()->children.Add(st_t); } -bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { Token *ntTok = new Token(); ntTok->kind = kind; ntTok->line = line; @@ -121,7 +121,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { void Parser::Coco() { Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; #ifdef PARSER_WITH_AST - Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create("Coco");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Coco"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif int beg = la->pos; int line = la->line; while (StartOf(1 /* any */)) { @@ -336,7 +336,7 @@ void Parser::Coco() { void Parser::SetDecl() { CharSet *s; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_SetDecl, "SetDecl", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SetDecl, _SC("SetDecl"), la->line); #endif Expect(_ident); #ifdef PARSER_WITH_AST @@ -367,7 +367,7 @@ void Parser::SetDecl() { void Parser::TokenDecl(int typ) { wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, "TokenDecl", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, _SC("TokenDecl"), la->line); #endif Sym(name, kind); sym = tab->FindSym(name); @@ -419,7 +419,7 @@ void Parser::TokenDecl(int typ) { void Parser::TokenExpr(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenExpr, "TokenExpr", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenExpr, _SC("TokenExpr"), la->line); #endif TokenTerm(g); bool first = true; @@ -437,7 +437,7 @@ void Parser::TokenExpr(Graph* &g) { void Parser::Set(CharSet* &s) { CharSet *s2; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, "Set", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, _SC("Set"), la->line); #endif SimSet(s); while (la->kind == 21 /* "+" */ || la->kind == 22 /* "-" */) { @@ -464,7 +464,7 @@ void Parser::Set(CharSet* &s) { void Parser::AttrDecl(Symbol *sym) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, "AttrDecl", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, _SC("AttrDecl"), la->line); #endif if (la->kind == 25 /* "<" */) { Get(); @@ -520,7 +520,7 @@ void Parser::AttrDecl(Symbol *sym) { void Parser::SemText(Position* &pos) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, "SemText", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, _SC("SemText"), la->line); #endif Expect(40 /* "(." */); #ifdef PARSER_WITH_AST @@ -557,7 +557,7 @@ void Parser::SemText(Position* &pos) { void Parser::Expression(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Expression, "Expression", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Expression, _SC("Expression"), la->line); #endif Term(g); bool first = true; @@ -575,7 +575,7 @@ void Parser::Expression(Graph* &g) { void Parser::SimSet(CharSet* &s) { int n1, n2; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, "SimSet", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, _SC("SimSet"), la->line); #endif s = new CharSet(); if (la->kind == _ident) { @@ -630,7 +630,7 @@ void Parser::SimSet(CharSet* &s) { void Parser::Char(int &n) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Char, "Char", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Char, _SC("Char"), la->line); #endif Expect(_char); #ifdef PARSER_WITH_AST @@ -654,7 +654,7 @@ void Parser::Char(int &n) { void Parser::Sym(wchar_t* &name, int &kind) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, "Sym", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, _SC("Sym"), la->line); #endif name = coco_string_create(_SC("???")); kind = id; if (la->kind == _ident) { @@ -699,7 +699,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { void Parser::Term(Graph* &g) { Graph *g2; Node *rslv = NULL; g = NULL; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, "Term", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, _SC("Term"), la->line); #endif if (StartOf(17 /* opt */)) { if (la->kind == 38 /* "IF" */) { @@ -726,7 +726,7 @@ void Parser::Term(Graph* &g) { void Parser::Resolver(Position* &pos) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, "Resolver", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, _SC("Resolver"), la->line); #endif Expect(38 /* "IF" */); #ifdef PARSER_WITH_AST @@ -749,7 +749,7 @@ void Parser::Factor(Graph* &g) { g = NULL; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Factor, "Factor", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Factor, _SC("Factor"), la->line); #endif switch (la->kind) { case _ident: case _string: case _char: case 30 /* "WEAK" */: { @@ -876,7 +876,7 @@ void Parser::Factor(Graph* &g) { void Parser::Attribs(Node *p) { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, "Attribs", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, _SC("Attribs"), la->line); #endif if (la->kind == 25 /* "<" */) { Get(); @@ -930,7 +930,7 @@ void Parser::Attribs(Node *p) { void Parser::Condition() { #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, "Condition", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, _SC("Condition"), la->line); #endif while (StartOf(20 /* alt */)) { if (la->kind == 31 /* "(" */) { @@ -955,7 +955,7 @@ void Parser::Condition() { void Parser::TokenTerm(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, "TokenTerm", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, _SC("TokenTerm"), la->line); #endif TokenFactor(g); while (StartOf(8 /* nt */)) { @@ -987,7 +987,7 @@ void Parser::TokenTerm(Graph* &g) { void Parser::TokenFactor(Graph* &g) { wchar_t* name = NULL; int kind; #ifdef PARSER_WITH_AST - bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, "TokenFactor", la->line); + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, _SC("TokenFactor"), la->line); #endif g = NULL; if (la->kind == _ident || la->kind == _string || la->kind == _char) { diff --git a/src/Parser.frame b/src/Parser.frame index 5f71d2b..13fe599 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -94,7 +94,7 @@ public: SynTree *ast_root; TArrayList ast_stack; void AstAddTerminal(); - bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + bool AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line); void AstPopNonTerminal(); #endif @@ -134,7 +134,7 @@ void Parser::AstAddTerminal() { ast_stack.Top()->children.Add(st_t); } -bool Parser::AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line) { +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { Token *ntTok = new Token(); ntTok->kind = kind; ntTok->line = line; diff --git a/src/Parser.h b/src/Parser.h index 7dc8899..02b8b98 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -127,7 +127,7 @@ class Parser { SynTree *ast_root; TArrayList ast_stack; void AstAddTerminal(); - bool AstAddNonTerminal(eNonTerminals kind, const char *nt_name, int line); + bool AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line); void AstPopNonTerminal(); #endif diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index b47de34..6ff821d 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -398,9 +398,9 @@ void ParserGen::GenProductions() { fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 2); fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); - if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%") _SFMT _SC("; ntTok->line = 0; ntTok->val = coco_string_create(\"%") _SFMT _SC("\");ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); + if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%") _SFMT _SC("; ntTok->line = 0; ntTok->val = coco_string_create(_SC(\"%") _SFMT _SC("\"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); else { - fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%") _SFMT _SC(", \"%") _SFMT _SC("\", la->line);\n"), sym->name, sym->name); + fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%") _SFMT _SC(", _SC(\"%") _SFMT _SC("\"), la->line);\n"), sym->name, sym->name); } fputws(_SC("#endif\n"), gen); ba.SetAll(false); From 1fa17ebf8fad297117bc8e477ef54f9d3ab8a3f1 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 17:00:00 +0200 Subject: [PATCH 44/95] Add the Taste example with memory leaks fixed --- src/Taste/CodeGenerator.h | 244 ++++++++++++++++++++++++++++++++++++++ src/Taste/Copyright.frame | 27 +++++ src/Taste/Makefile | 6 + src/Taste/SymbolTable.cpp | 77 ++++++++++++ src/Taste/SymbolTable.h | 78 ++++++++++++ src/Taste/Taste.IN | 1 + src/Taste/Taste.atg | 203 +++++++++++++++++++++++++++++++ src/Taste/Test.TAS | 31 +++++ src/Taste/build.sh | 4 + 9 files changed, 671 insertions(+) create mode 100644 src/Taste/CodeGenerator.h create mode 100644 src/Taste/Copyright.frame create mode 100644 src/Taste/Makefile create mode 100644 src/Taste/SymbolTable.cpp create mode 100644 src/Taste/SymbolTable.h create mode 100644 src/Taste/Taste.IN create mode 100644 src/Taste/Taste.atg create mode 100644 src/Taste/Test.TAS create mode 100755 src/Taste/build.sh diff --git a/src/Taste/CodeGenerator.h b/src/Taste/CodeGenerator.h new file mode 100644 index 0000000..0176338 --- /dev/null +++ b/src/Taste/CodeGenerator.h @@ -0,0 +1,244 @@ +#if !defined(TASTE_CODEGENERATOR_H__) +#define TASTE_CODEGENERATOR_H__ + +#include "Scanner.h" +#include +#include + +namespace Taste { + +class CodeGenerator +{ +public: + // opcodes + int + ADD, SUB, MUL, DIV, EQU, LSS, GTR, NEG, + LOAD, LOADG, STO, STOG, CONST, + CALL, RET, ENTER, LEAVE, + JMP, FJMP, READ, WRITE; + +#define OPCODE_SIZE 21 + wchar_t* opcode[OPCODE_SIZE]; + //memset(opcode, 0, OPCODE_SIZE * sizeof(wchar_t*)); + + int progStart; // address of first instruction of main program + int pc; // program counter + char *code; + + // data for Interpret + int *globals; + int *stack; + int top; // top of stack + int bp; // base pointer + + + CodeGenerator() { + // opcodes + ADD = 0; SUB = 1; MUL = 2; DIV = 3; EQU = 4; LSS = 5; GTR = 6; NEG = 7; + LOAD = 8; LOADG = 9; STO = 10; STOG = 11; CONST = 12; + CALL = 13; RET = 14; ENTER = 15; LEAVE = 16; + JMP = 17; FJMP = 18; READ = 19; WRITE = 20; + + opcode[ 0] = coco_string_create("ADD "); + opcode[ 1] = coco_string_create("SUB "); + opcode[ 2] = coco_string_create("MUL "); + opcode[ 3] = coco_string_create("DIV "); + opcode[ 4] = coco_string_create("EQU "); + opcode[ 5] = coco_string_create("LSS "); + opcode[ 6] = coco_string_create("GTR "); + opcode[ 7] = coco_string_create("NEG "); + opcode[ 8] = coco_string_create("LOAD "); + opcode[ 9] = coco_string_create("LOADG"); + opcode[10] = coco_string_create("STO "); + opcode[11] = coco_string_create("STOG "); + opcode[12] = coco_string_create("CONST"); + opcode[13] = coco_string_create("CALL "); + opcode[14] = coco_string_create("RET "); + opcode[15] = coco_string_create("ENTER"); + opcode[16] = coco_string_create("LEAVE"); + opcode[17] = coco_string_create("JMP "); + opcode[18] = coco_string_create("FJMP "); + opcode[19] = coco_string_create("READ "); + opcode[20] = coco_string_create("WRITE"); + +#define CODE_SIZE 3000 +#define GLOBALS_SIZE 100 + code = new char[CODE_SIZE]; + memset(code, 0, CODE_SIZE); + globals = new int[GLOBALS_SIZE]; + memset(globals, 0, GLOBALS_SIZE * sizeof(*globals)); + stack = new int[GLOBALS_SIZE]; + memset(stack, 0, GLOBALS_SIZE * sizeof(*stack)); + + progStart = 0; + + pc = 1; + } + + ~CodeGenerator() { + coco_string_delete(opcode[ 0]); + coco_string_delete(opcode[ 1]); + coco_string_delete(opcode[ 2]); + coco_string_delete(opcode[ 3]); + coco_string_delete(opcode[ 4]); + coco_string_delete(opcode[ 5]); + coco_string_delete(opcode[ 6]); + coco_string_delete(opcode[ 7]); + coco_string_delete(opcode[ 8]); + coco_string_delete(opcode[ 9]); + coco_string_delete(opcode[10]); + coco_string_delete(opcode[11]); + coco_string_delete(opcode[12]); + coco_string_delete(opcode[13]); + coco_string_delete(opcode[14]); + coco_string_delete(opcode[15]); + coco_string_delete(opcode[16]); + coco_string_delete(opcode[17]); + coco_string_delete(opcode[18]); + coco_string_delete(opcode[19]); + coco_string_delete(opcode[20]); + delete[] code; + delete[] globals; + delete[] stack; + } + + //----- code generation methods ----- + + void Emit (int op) { + //printf("Emit : %d\n", op); + code[pc++] = (char)op; + } + + void Emit (int op, int val) { + //printf("Emit : %d, %d\n", op, val); + Emit(op); Emit(val>>8); Emit(val); + } + + void Patch (int adr, int val) { + code[adr] = (char)(val>>8); code[adr+1] = (char)val; + } + + void Decode() { + int maxPc = pc; + pc = 1; + while (pc < maxPc) { + int code = Next(); + printf("%3d: %" _SFMT " ", pc-1, opcode[code]); + if (code == LOAD || code == LOADG || code == CONST || code == STO || code == STOG || + code == CALL || code == ENTER || code == JMP || code == FJMP) + printf("%d\n", Next2()); + else + if (code == ADD || code == SUB || code == MUL || code == DIV || code == NEG || + code == EQU || code == LSS || code == GTR || code == RET || code == LEAVE || + code == READ || code == WRITE) + printf("\n"); + } + } + + //----- interpreter methods ----- + + int Next () { + return code[pc++]; + } + + int Next2 () { + int x,y; + x = code[pc++]; y = code[pc++]; + return (x << 8) + y; + } + + int Int (bool b) { + if (b) return 1; else return 0; + } + + void Push (int val) { + //printf("Push : %d\n", top); + stack[top++] = val; + } + + int Pop() { + //printf("Pop : %d\n", top); + return stack[--top]; + } + + int ReadInt(FILE* s) { + int sign; + char ch; + do {fscanf(s, "%c", &ch);} while (!((ch >= '0' && ch <= '9') || ch == '-')); + + if (ch == '-') {sign = -1; fscanf(s, "%c", &ch);} else sign = 1; + int n = 0; + while (ch >= '0' && ch <= '9') { + n = 10 * n + (ch - '0'); + if (fscanf(s, "%c", &ch) <= 0) + break; + } + return n * sign; + } + + void Interpret (const char* data) { + int val; + FILE* s; + if ((s = fopen(data, "r")) == NULL) { + printf("--- Error accessing file %s\n", (char*)data); + exit(1); + } + printf("\n"); + pc = progStart; stack[0] = 0; top = 1; bp = 0; + for (;;) { + int nxt = Next(); + if (nxt == CONST) + Push(Next2()); + else if (nxt == LOAD) + Push(stack[bp+Next2()]); + else if (nxt == LOADG) + Push(globals[Next2()]); + else if (nxt == STO) + stack[bp+Next2()] = Pop(); + else if (nxt == STOG) + globals[Next2()] = Pop(); + else if (nxt == ADD) + Push(Pop()+Pop()); + else if (nxt == SUB) + Push(-Pop()+Pop()); + else if (nxt == DIV) + {val = Pop(); Push(Pop()/val);} + else if (nxt == MUL) + Push(Pop()*Pop()); + else if (nxt == NEG) + Push(-Pop()); + else if (nxt == EQU) + Push(Int(Pop()==Pop())); + else if (nxt == LSS) + Push(Int(Pop()>Pop())); + else if (nxt == GTR) + Push(Int(Pop()errors; + topScope = NULL; + curLevel = -1; + undefObj = new Obj(); + undefObj->name = coco_string_create("undef"); undefObj->type = undef; undefObj->kind = var; + undefObj->adr = 0; undefObj->level = 0; undefObj->next = NULL; +} + +SymbolTable::~SymbolTable() { + delete undefObj; + delete topScope; +} + +void SymbolTable::Err(const wchar_t* msg) { + errors->Error(0, 0, msg); +} + + +// open a new scope and make it the current scope (topScope) +void SymbolTable::OpenScope () { + Obj *scop = new Obj(); + scop->name = coco_string_create(""); scop->kind = scope; + scop->locals = NULL; scop->nextAdr = 0; + scop->next = topScope; topScope = scop; + curLevel++; +} + + +// close the current scope +void SymbolTable::CloseScope () { + Obj *scop = topScope; + topScope = topScope->next; curLevel--; + scop->next = NULL; + delete scop; +} + +// create a new object node in the current scope +Obj* SymbolTable::NewObj (const wchar_t* name, int kind, int type) { + Obj *p, *last, *obj = new Obj(); + obj->name = coco_string_create(name); obj->kind = kind; obj->type = type; + obj->level = curLevel; + p = topScope->locals; last = NULL; + while (p != NULL) { + if (coco_string_equal(p->name, name)) Err(_SC("name declared twice")); + last = p; p = p->next; + } + if (last == NULL) topScope->locals = obj; else last->next = obj; + if (kind == var) obj->adr = topScope->nextAdr++; + return obj; +} + + +// search the name in all open scopes and return its object node +Obj* SymbolTable::Find (const wchar_t* name) { + Obj *obj, *scope; + scope = topScope; + while (scope != NULL) { // for all open scopes + obj = scope->locals; + while (obj != NULL) { // for all objects in this scope + if (coco_string_equal(obj->name, name)) return obj; + obj = obj->next; + } + scope = scope->next; + } + wchar_t str[100]; + coco_swprintf(str, 100, _SC("%") _SFMT _SC(" is undeclared"), name); + Err(str); + return undefObj; +} + +}; // namespace diff --git a/src/Taste/SymbolTable.h b/src/Taste/SymbolTable.h new file mode 100644 index 0000000..b08d826 --- /dev/null +++ b/src/Taste/SymbolTable.h @@ -0,0 +1,78 @@ +#if !defined(TASTE_SYMBOLTABLE_H__) +#define TASTE_SYMBOLTABLE_H__ + +#include "Scanner.h" + +namespace Taste { + +class Parser; +class Errors; + +class Obj { // object describing a declared name +public: + wchar_t* name; // name of the object + int type; // type of the object (undef for proc) + Obj *next; // to next object in same scope + int kind; // var, proc, scope + int adr; // address in memory or start of proc + int level; // nesting level; 0=global, 1=local + Obj *locals; // scopes: to locally declared objects + int nextAdr; // scopes: next free address in this scope + + Obj() { + name = NULL; + type = 0; + next = NULL; + kind = 0; + adr = 0; + level = 0; + locals = NULL; + nextAdr = 0; + } + + ~Obj() { + coco_string_delete(name); + delete locals; + delete next; + } + + +}; + +class SymbolTable +{ +public: + const int // types + undef, integer, boolean; + + const int // object kinds + var, proc, scope; + + + int curLevel; // nesting level of current scope + Obj *undefObj; // object node for erroneous symbols + Obj *topScope; // topmost procedure scope + + Errors *errors; + + SymbolTable(Parser *parser); + ~SymbolTable(); + void Err(const wchar_t* msg); + + // open a new scope and make it the current scope (topScope) + void OpenScope (); + + // close the current scope + void CloseScope (); + + // create a new object node in the current scope + Obj* NewObj (const wchar_t* name, int kind, int type); + + // search the name in all open scopes and return its object node + Obj* Find (const wchar_t* name); + +}; + +}; // namespace + +#endif // !defined(TASTE_SYMBOLTABLE_H__) diff --git a/src/Taste/Taste.IN b/src/Taste/Taste.IN new file mode 100644 index 0000000..4ae0db3 --- /dev/null +++ b/src/Taste/Taste.IN @@ -0,0 +1 @@ +3 5 100 0 \ No newline at end of file diff --git a/src/Taste/Taste.atg b/src/Taste/Taste.atg new file mode 100644 index 0000000..9e14c20 --- /dev/null +++ b/src/Taste/Taste.atg @@ -0,0 +1,203 @@ +#include "SymbolTable.h" +#include "CodeGenerator.h" + +$namespace=Taste + +COMPILER Taste + + + int // operators + plus, minus, times, slash, equ, lss, gtr; + + int // types + undef, integer, boolean; + + int // object kinds + var, proc; + + int // opcodes + ADD, SUB, MUL, DIV, EQU, LSS, GTR, NEG, + LOAD, LOADG, STO, STOG, CONST, + CALL, RET, ENTER, LEAVE, + JMP, FJMP, READ, WRITE; + + SymbolTable *tab; + CodeGenerator *gen; + + void Err(const wchar_t* msg) { + errors.Error(la->line, la->col, msg); + } + + void InitDeclarations() { // it must exist + plus = 0; minus = 1; times = 2; slash = 3; equ = 4; lss = 5; gtr = 6; // operators + undef = 0; integer = 1; boolean = 2; // types + var = 0; proc = 1; // object kinds + + // opcodes + ADD = 0; SUB = 1; MUL = 2; DIV = 3; EQU = 4; LSS = 5; GTR = 6; NEG = 7; + LOAD = 8; LOADG = 9; STO = 10; STOG = 11; CONST = 12; + CALL = 13; RET = 14; ENTER = 15; LEAVE = 16; + JMP = 17; FJMP = 18; READ = 19; WRITE = 20; + } + + + +/*--------------------------------------------------------------------------*/ +CHARACTERS + letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz". + digit = "0123456789". + cr = '\r'. + lf = '\n'. + tab = '\t'. + +TOKENS + ident = letter {letter | digit}. + number = digit {digit}. + +COMMENTS FROM "/*" TO "*/" NESTED +COMMENTS FROM "//" TO lf + +IGNORE cr + lf + tab + + + +PRODUCTIONS +/*------------------------------------------------------------------------*/ +AddOp += (. op = -1; .) + ( '+' (. op = plus; .) + | '-' (. op = minus; .) + ). +/*------------------------------------------------------------------------*/ +Expr (. int type1, op; .) += SimExpr + [ RelOp + SimExpr (. if (type != type1) Err(_SC("incompatible types")); + gen->Emit(op); type = boolean; .) + ]. +/*------------------------------------------------------------------------*/ +Factor (. int n; Obj *obj; wchar_t* name; .) += (. type = undef; .) + ( Ident (. obj = tab->Find(name); coco_string_delete(name); type = obj->type; + if (obj->kind == var) { + if (obj->level == 0) gen->Emit(LOADG, obj->adr); + else gen->Emit(LOAD, obj->adr); + } else Err(_SC("variable expected")); .) + | number (. swscanf(t->val, _SC("%d"), &n); //n = Convert.ToInt32(t->val); + gen->Emit(CONST, n); type = integer; .) + | '-' + Factor (. if (type != integer) { + Err(_SC("integer type expected")); type = integer; + } + gen->Emit(NEG); .) + | "true" (. gen->Emit(CONST, 1); type = boolean; .) + | "false" (. gen->Emit(CONST, 0); type = boolean; .) + ). +/*------------------------------------------------------------------------*/ +Ident += ident (. name = coco_string_create(t->val); .). +/*------------------------------------------------------------------------*/ +MulOp += (. op = -1; .) + ( '*' (. op = times; .) + | '/' (. op = slash; .) + ). +/*------------------------------------------------------------------------*/ +ProcDecl (. wchar_t* name; Obj *obj; int adr; .) += "void" + Ident (. obj = tab->NewObj(name, proc, undef); obj->adr = gen->pc; + if (coco_string_equal(name, _SC("Main"))) gen->progStart = gen->pc; + tab->OpenScope(); coco_string_delete(name); .) + '(' ')' + '{' (. gen->Emit(ENTER, 0); adr = gen->pc - 2; .) + { VarDecl | Stat } + '}' (. gen->Emit(LEAVE); gen->Emit(RET); + gen->Patch(adr, tab->topScope->nextAdr); + tab->CloseScope(); .). +/*------------------------------------------------------------------------*/ +RelOp += (. op = -1; .) + ( "==" (. op = equ; .) + | '<' (. op = lss; .) + | '>' (. op = gtr; .) + ). +/*------------------------------------------------------------------------*/ +SimExpr (. int type1, op; .) += Term + { AddOp + Term (. if (type != integer || type1 != integer) + Err(_SC("integer type expected")); + gen->Emit(op); .) + }. +/*------------------------------------------------------------------------*/ +Stat (. int type; wchar_t* name; Obj *obj; + int adr, adr2, loopstart; .) += Ident (. obj = tab->Find(name); coco_string_delete(name); .) + ( '=' (. if (obj->kind != var) Err(_SC("cannot assign to procedure")); .) + Expr ';' + (. if (type != obj->type) Err(_SC("incompatible types")); + if (obj->level == 0) gen->Emit(STOG, obj->adr); + else gen->Emit(STO, obj->adr); .) + | '(' ')' ';' (. if (obj->kind != proc) Err(_SC("object is not a procedure")); + gen->Emit(CALL, obj->adr); .) + ) + +| "if" + '(' Expr ')' (. if (type != boolean) Err(_SC("boolean type expected")); + gen->Emit(FJMP, 0); adr = gen->pc - 2; .) + Stat + [ "else" (. gen->Emit(JMP, 0); adr2 = gen->pc - 2; + gen->Patch(adr, gen->pc); + adr = adr2; .) + Stat + ] (. gen->Patch(adr, gen->pc); .) + +| "while" (. loopstart = gen->pc; .) + '(' Expr ')' (. if (type != boolean) Err(_SC("boolean type expected")); + gen->Emit(FJMP, 0); adr = gen->pc - 2; .) + Stat (. gen->Emit(JMP, loopstart); gen->Patch(adr, gen->pc); .) + +| "read" + Ident ';' (. obj = tab->Find(name); coco_string_delete(name); + if (obj->type != integer) Err(_SC("integer type expected")); + gen->Emit(READ); + if (obj->level == 0) gen->Emit(STOG, obj->adr); + else gen->Emit(STO, obj->adr); .) + +| "write" + Expr ';' (. if (type != integer) Err(_SC("integer type expected")); + gen->Emit(WRITE); .) + +| '{' { Stat | VarDecl } '}' . +/*------------------------------------------------------------------------*/ +Taste (. wchar_t* name; + InitDeclarations(); .) += "program" + Ident (. coco_string_delete(name); tab->OpenScope(); .) + '{' + { VarDecl } + { ProcDecl } + '}' (. tab->CloseScope(); .). +/*------------------------------------------------------------------------*/ +Term (. int type1, op; .) += Factor + { MulOp + Factor (. if (type != integer || type1 != integer) + Err(_SC("integer type expected")); + gen->Emit(op); + .) + }. +/*------------------------------------------------------------------------*/ +Type += (. type = undef; .) + ( "int" (. type = integer; .) + | "bool" (. type = boolean; .) + ). +/*------------------------------------------------------------------------*/ +VarDecl (. wchar_t* name; int type; .) += Type + Ident (. tab->NewObj(name, var, type); coco_string_delete(name); .) + { ',' Ident (. tab->NewObj(name, var, type); coco_string_delete(name); .) + } ';'. + +END Taste. diff --git a/src/Taste/Test.TAS b/src/Taste/Test.TAS new file mode 100644 index 0000000..fcedcfd --- /dev/null +++ b/src/Taste/Test.TAS @@ -0,0 +1,31 @@ + +// This is a test program which can be compiled by the Taste-compiler. +// It reads a sequence of numbers and computes the sum of all integers +// up to these numbers. + +program Test { + int i; + + void Foo() { + int a, b, max; + read a; read b; + if (a > b) max = a; else max = b; + write max; + } + + void SumUp() { + int sum; + sum = 0; + while (i > 0) { sum = sum + i; i = i - 1; } + write sum; + } + + void Main() { + read i; + while (i > 0) { + SumUp(); + read i; + } + } +} + diff --git a/src/Taste/build.sh b/src/Taste/build.sh new file mode 100755 index 0000000..8d7be51 --- /dev/null +++ b/src/Taste/build.sh @@ -0,0 +1,4 @@ +../Coco -frames .. Taste.atg +make +#myvalgrind --leak-check=full +./Taste Test.TAS From 268e32c071f30f57eb407bbff34d4d6438f31d55 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 17:51:20 +0200 Subject: [PATCH 45/95] Minor code layout fix --- src/DFA.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index b5d984d..ed2fb84 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -224,10 +224,10 @@ void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { void DFA::ConvertToStates(Node *p, Symbol *sym) { curGraph = p; curSy = sym; - if (tab->DelGraph(curGraph)) { - parser->SemErr(_SC("token might be empty")); - return; - } + if (tab->DelGraph(curGraph)) { + parser->SemErr(_SC("token might be empty")); + return; + } NumberNodes(curGraph, firstState, true); BitArray ba(tab->nodes.Count); FindTrans(curGraph, true, &ba); From 4ea34e7a83238edfdc8642805914fddae3916287 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 5 Jun 2021 20:48:33 +0200 Subject: [PATCH 46/95] Another memory leak fixed --- src/DFA.cpp | 28 +++++++++++++++++++++------- src/Tab.h | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index ed2fb84..055c21d 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -36,22 +36,26 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -typedef wchar_t wchar_t_10[10]; -typedef wchar_t wchar_t_20[20]; +#define SZWC10 10 +#define SZWC20 20 +typedef wchar_t wchar_t_10[SZWC10+1]; +typedef wchar_t wchar_t_20[SZWC20+1]; //---------- Output primitives static wchar_t* DFACh(int ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) - coco_swprintf(format, 10, _SC("%d\0"), (int) ch); + coco_swprintf(format, SZWC10, _SC("%d"), (int) ch); else - coco_swprintf(format, 10, _SC("_SC('%") _CHFMT _SC("')\0"), (int) ch); + coco_swprintf(format, SZWC10, _SC("_SC('%") _CHFMT _SC("')"), (int) ch); + format[SZWC10] = _SC('\0'); return format; } static wchar_t* DFAChCond(int ch, wchar_t_20 &format) { wchar_t_10 fmt; wchar_t* res = DFACh(ch, fmt); - coco_swprintf(format, 20, _SC("ch == %") _SFMT _SC("\0"), res); + coco_swprintf(format, SZWC20, _SC("ch == %") _SFMT, res); + format[SZWC20] = _SC('\0'); return format; } @@ -116,6 +120,12 @@ void DFA::FindUsedStates(const State *state, BitArray *used) { FindUsedStates(a->target->state, used); } +static void deleteOnlyThisState(State **state) { + (*state)->next = NULL; + delete *state; + *state = NULL; +} + void DFA::DeleteRedundantStates() { //State *newState = new State[State::lastNr + 1]; State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1)); @@ -137,9 +147,13 @@ void DFA::DeleteRedundantStates() { a->target->state = newState[a->target->state->nr]; // delete unused states lastState = firstState; lastStateNr = 0; // firstState has number 0 - for (state = firstState->next; state != NULL; state = state->next) + State *state_to_delete = NULL; + for (state = firstState->next; state != NULL; state = state->next) { + if(state_to_delete) deleteOnlyThisState(&state_to_delete); if (used[state->nr]) {state->nr = ++lastStateNr; lastState = state;} - else lastState->next = state->next; + else { lastState->next = state->next; state_to_delete = state;} + } + if(state_to_delete) deleteOnlyThisState(&state_to_delete); free (newState); } diff --git a/src/Tab.h b/src/Tab.h index b1caa84..290643f 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -116,7 +116,7 @@ class Tab { void MakeSequence(Graph *g1, Graph *g2); void MakeIteration(Graph *g); void MakeOption(Graph *g); - void Finish(Graph *g); + void Finish(Graph *g); //set all 'next' from g->r to NULL void DeleteNodes(); Graph* StrToGraph(const wchar_t* str); void SetContextTrans(Node *p); // set transition code in the graph rooted at p From 31c62f3dffa8bad4eb8a89a3f372d12f24b3f8b3 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 09:06:38 +0200 Subject: [PATCH 47/95] Replace some magic numbers --- src/DFA.cpp | 5 +++-- src/ParserGen.cpp | 2 +- src/Tab.cpp | 13 +++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 055c21d..f8d5449 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -280,8 +280,9 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { state->endOf = sym; } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off - wchar_t format[200]; - coco_swprintf(format, 200, _SC("tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished"), sym->name, matchedSym->name); + const size_t format_size = 200; + wchar_t format[format_size]; + coco_swprintf(format, format_size, _SC("tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished"), sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 6ff821d..be931f9 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -117,7 +117,7 @@ void ParserGen::CopySourcePart (const Position *pos, int indent) { void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { errorNr++; - const int formatLen = 1000; + const size_t formatLen = 1000; wchar_t format[formatLen]; coco_swprintf(format, formatLen, _SC("\t\t\tcase %d: s = _SC(\""), errorNr); coco_string_merge(err, format); diff --git a/src/Tab.cpp b/src/Tab.cpp index 4ca84d3..9dea00c 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -331,13 +331,14 @@ int Tab::Ptr(const Node *p, bool up) { else return p->n; } -typedef wchar_t wchar_t_10[10]; +static const size_t wchar_t_10_sz = 10; +typedef wchar_t wchar_t_10[wchar_t_10_sz]; static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { - coco_swprintf(format, 10, _SC(" ")); + coco_swprintf(format, wchar_t_10_sz, _SC(" ")); } else { - coco_swprintf(format, 10, _SC("%5d"), pos->beg); + coco_swprintf(format, wchar_t_10_sz, _SC("%5d"), pos->beg); } return format; } @@ -439,10 +440,10 @@ CharSet* Tab::CharClassSet(int i) { wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) { - coco_swprintf(format, 10, _SC("%d"), ch); + coco_swprintf(format, wchar_t_10_sz, _SC("%d"), ch); return format; } else { - coco_swprintf(format, 10, _SC("'%") _CHFMT _SC("'"), ch); + coco_swprintf(format, wchar_t_10_sz, _SC("'%") _CHFMT _SC("'"), ch); return format; } } @@ -825,7 +826,7 @@ wchar_t Tab::Hex2Char(const wchar_t* s, int len) { } static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { - coco_swprintf(format, 10, _SC("\\0x%04x"), ch); + coco_swprintf(format, wchar_t_10_sz, _SC("\\0x%04x"), ch); return format; } From b9359ffc9086d3149cf0e99e7484397501fd663a Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 09:29:58 +0200 Subject: [PATCH 48/95] Remove unnecessary function and it's usages --- src/DFA.cpp | 4 +--- src/Tab.cpp | 34 +++++----------------------------- src/Tab.h | 1 - 3 files changed, 6 insertions(+), 33 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index f8d5449..a8c8e47 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -392,9 +392,7 @@ void DFA::PrintStates() { bool first = true; if (state->endOf == NULL) fputws(_SC(" "), trace); else { - wchar_t *paddedName = tab->Name(state->endOf->name); - fwprintf(trace, _SC("E(%12") _SFMT _SC(")"), paddedName); - coco_string_delete(paddedName); + fwprintf(trace, _SC("E(%-12.12") _SFMT _SC(")"), state->endOf->name); } fwprintf(trace, _SC("%3d:"), state->nr); if (state->firstAction == NULL) fputws(_SC("\n"), trace); diff --git a/src/Tab.cpp b/src/Tab.cpp index 9dea00c..076ef86 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -112,9 +112,7 @@ int Tab::Num(const Node *p) { } void Tab::PrintSym(const Symbol *sym) { - wchar_t *paddedName = Name(sym->name); - fwprintf(trace, _SC("%3d %14") _SFMT _SC(" %s"), sym->n, paddedName, nTyp[sym->typ]); - coco_string_delete(paddedName); + fwprintf(trace, _SC("%3d %-14.14") _SFMT _SC(" %s"), sym->n, sym->name, nTyp[sym->typ]); if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); if (sym->typ == Node::nt) { @@ -343,15 +341,6 @@ static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { return format; } -wchar_t* Tab::Name(const wchar_t *name) { - wchar_t *name2 = coco_string_create_append(name, _SC(" ")); - wchar_t *subName2 = coco_string_create(name2, 0, 12); - coco_string_delete(name2); - return subName2; - // found no simpler way to get the first 12 characters of the name - // padded with blanks on the right -} - void Tab::PrintNodes() { fwprintf(trace, _SC("%s"), "Graph nodes:\n" @@ -366,14 +355,10 @@ void Tab::PrintNodes() { p = nodes[i]; fwprintf(trace, _SC("%4d %s "), p->n, (nTyp[p->typ])); if (p->sym != NULL) { - wchar_t *paddedName = Name(p->sym->name); - fwprintf(trace, _SC("%12") _SFMT _SC(" "), paddedName); - coco_string_delete(paddedName); + fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), p->sym->name); } else if (p->typ == Node::clas) { CharClass *c = classes[p->val]; - wchar_t *paddedName = Name(c->name); - fwprintf(trace, _SC("%12") _SFMT _SC(" "), paddedName); - coco_string_delete(paddedName); + fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), c->name); } else fputws(_SC(" "), trace); fwprintf(trace, _SC("%5d "), Ptr(p->next, p->up)); @@ -467,16 +452,9 @@ void Tab::WriteCharClasses () { CharClass *c; for (int i=0; iname, _SC(" ")); - wchar_t* format = coco_string_create(format2, 0, 10); - coco_string_merge(format, _SC(": ")); - fputws(format, trace); - + fwprintf(trace, _SC("%-10.10") _SFMT _SC(": "), c->name); WriteCharSet(c->set); fputws(_SC("\n"), trace); - coco_string_delete(format); - coco_string_delete(format2); } fputws(_SC("\n"), trace); } @@ -1303,9 +1281,7 @@ void Tab::XRef() { for (i=0; iname); - fwprintf(trace, _SC(" %12") _SFMT, paddedName); - coco_string_delete(paddedName); + fwprintf(trace, _SC(" %-12.12") _SFMT, sym->name); TArrayList *list = (TArrayList*)(xref.Get(sym)); int col = 14; int line; diff --git a/src/Tab.h b/src/Tab.h index 290643f..517fc8e 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -130,7 +130,6 @@ class Tab { //----------------- graph printing ---------------------- int Ptr(const Node *p, bool up); - wchar_t* Name(const wchar_t* name); void PrintNodes(); //--------------------------------------------------------------------- From ae044ac71ebf820273004d96c18d0b36b7e67963 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 09:41:52 +0200 Subject: [PATCH 49/95] Remove unnecessary string allocation/deallocation --- src/DFA.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index a8c8e47..9d9d349 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -631,12 +631,8 @@ void DFA::GenLiterals () { for (int j = 0; j < ts[i]->Count; j++) { sym = (Symbol*) ((*(ts[i]))[j]); if (sym->tokenKind == Symbol::litToken) { - wchar_t* name = coco_string_create(SymName(sym)); - if (ignoreCase) { - wchar_t *oldName = name; - name = coco_string_create_lower(name); - coco_string_delete(oldName); - } + const wchar_t* name = SymName(sym); + if (ignoreCase) name = coco_string_create_lower(name); // sym.name stores literals with quotes, e.g. "\"Literal\"" fputws(_SC("\tkeywords.set(_SC("), gen); @@ -647,7 +643,7 @@ void DFA::GenLiterals () { } fwprintf(gen, _SC("), %d);\n"), sym->n); - coco_string_delete(name); + if (ignoreCase) coco_string_delete((wchar_t*&)name); } } } From ef40822185b019c83cc6434b3c200699356f8931 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 09:45:38 +0200 Subject: [PATCH 50/95] Remove unnecessary string allocation/deallocation --- src/DFA.cpp | 4 +--- src/Generator.cpp | 8 ++------ src/ParserGen.cpp | 4 +--- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 9d9d349..87b12f0 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -659,9 +659,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("namespace %") _SFMT _SC(" {\n"), curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("namespace %.*") _SFMT _SC(" {\n"), curLen, nsName+startPos); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; diff --git a/src/Generator.cpp b/src/Generator.cpp index 4b9a3d7..f937bd1 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -132,9 +132,7 @@ namespace Coco { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("%") _SFMT _SC("_"), curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("%.*") _SFMT _SC("_"), curLen, nsName+startPos); startPos = startPos + curLen + 1; } while (startPos < len); } @@ -167,9 +165,7 @@ namespace Coco { } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { - wchar_t *subStop = coco_string_create(stop, 0, i); - fwprintf(gen, _SC("%") _SFMT, subStop); - coco_string_delete(subStop); + fwprintf(gen, _SC("%.*") _SFMT, i, stop); } } else { if (generateOutput) { fwprintf(gen, _SC("%") _CHFMT, ch); } diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index be931f9..b739f44 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -69,9 +69,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, _SC("namespace %") _SFMT _SC(" {\n"), curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("namespace %.*") _SFMT _SC(" {\n"), curLen, nsName+startPos); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; From 9dc7b76c221c21ce8d4fb3546e5bb6d6c61cdf5c Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 09:59:54 +0200 Subject: [PATCH 51/95] Remove unnecessary string allocation/deallocation --- src/Scanner.cpp | 5 ++--- src/Scanner.frame | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 89296ed..d84ff5f 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -349,9 +349,8 @@ wchar_t* Buffer::GetString(int beg, int end) { SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); - wchar_t *res = coco_string_create(buf, 0, len); - coco_string_delete(buf); - return res; + buf[len] = 0; + return buf; } int Buffer::GetPos() { diff --git a/src/Scanner.frame b/src/Scanner.frame index 5caccc9..4c28dca 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -753,9 +753,8 @@ wchar_t* Buffer::GetString(int beg, int end) { SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); - wchar_t *res = coco_string_create(buf, 0, len); - coco_string_delete(buf); - return res; + buf[len] = 0; + return buf; } int Buffer::GetPos() { From 0c739db007bebc348d92a0ef569937c1e18a95a8 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 10:11:41 +0200 Subject: [PATCH 52/95] Remove unnecessary string allocation/deallocation --- src/Tab.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 076ef86..d362590 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -1340,17 +1340,11 @@ void Tab::SetOption(const wchar_t* s) { int nameLenght = coco_string_indexof(s, '='); int valueIndex = nameLenght + 1; - wchar_t *name = coco_string_create(s, 0, nameLenght); - wchar_t *value = coco_string_create(s, valueIndex); - - if (coco_string_equal(_SC("$namespace"), name)) { - if (nsName == NULL) nsName = coco_string_create(value); - } else if (coco_string_equal(_SC("$checkEOF"), name)) { - checkEOF = coco_string_equal(_SC("true"), value); + if (coco_string_equal_n(_SC("$namespace"), s, nameLenght)) { + if (nsName == NULL) nsName = coco_string_create(s + valueIndex); + } else if (coco_string_equal_n(_SC("$checkEOF"), s, nameLenght)) { + checkEOF = coco_string_equal(_SC("true"), s + valueIndex); } - - delete [] name; - delete [] value; } From 17e2ab3b6deeb8ade2f00ff6f12a43650ff0298c Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 11:29:50 +0200 Subject: [PATCH 53/95] Refactor code removing unnecessary layer that could leak memory --- src/Action.cpp | 11 ++++++----- src/Action.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Action.cpp b/src/Action.cpp index b8c57e5..b76b7a0 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -44,21 +44,22 @@ Action::~Action() { delete this->next; } -void Action::AddTarget(Target *t) { // add t to the action.targets +bool Action::AddTarget(State *state) { // add t to the action.targets Target *last = NULL; Target *p = target; - while (p != NULL && t->state->nr >= p->state->nr) { - if (t->state == p->state) return; + while (p != NULL && state->nr >= p->state->nr) { + if (state == p->state) return false; last = p; p = p->next; } + Target *t = new Target(state); t->next = p; if (p == target) target = t; else last->next = t; + return true; } void Action::AddTargets(Action *a) {// add copy of a.targets to action.targets for (Target *p = a->target; p != NULL; p = p->next) { - Target *t = new Target(p->state); - AddTarget(t); + AddTarget(p->state); } if (a->tc == Node::contextTrans) tc = Node::contextTrans; } diff --git a/src/Action.h b/src/Action.h index 85ec4ca..f929fbd 100644 --- a/src/Action.h +++ b/src/Action.h @@ -48,7 +48,7 @@ class Action // action of finite automaton Action(int typ, int sym, int tc); ~Action(); - void AddTarget(Target *t); // add t to the action.targets + bool AddTarget(State *state); // add t to the action.targets void AddTargets(Action *a); // add copy of a.targets to action.targets CharSet* Symbols(Tab *tab); bool ShiftWith(CharSet *s, Tab *tab); //return true if it used the CharSet *s From a05edfe5303064e1e8e55d013495e75d54e04dbe Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 11:30:20 +0200 Subject: [PATCH 54/95] Fix memory leak --- src/DFA.cpp | 16 ++++++++++------ src/DFA.h | 2 +- src/State.cpp | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 87b12f0..f94b0fb 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -290,12 +290,13 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } } -void DFA::SplitActions(State *state, Action *a, Action *b) { +bool DFA::SplitActions(State *state, Action *a, Action *b) { + bool rc = false; Action *c; CharSet *seta, *setb, *setc; seta = a->Symbols(tab); setb = b->Symbols(tab); if (seta->Equals(setb)) { a->AddTargets(b); - state->DetachAction(b); + rc = state->DetachAction(b); } else if (seta->Includes(setb)) { setc = seta->Clone(); setc->Subtract(setb); b->AddTargets(a); @@ -315,9 +316,10 @@ void DFA::SplitActions(State *state, Action *a, Action *b) { c->AddTargets(b); if(!c->ShiftWith(setc, tab)) delete setc; state->AddAction(c); - return; //don't need to delete anything + return rc; //don't need to delete anything } delete seta; delete setb; + return rc; } bool DFA::Overlap(const Action *a, const Action *b) { @@ -335,11 +337,13 @@ bool DFA::Overlap(const Action *a, const Action *b) { bool DFA::MakeUnique(State *state) { // return true if actions were split bool changed = false; for (Action *a = state->firstAction; a != NULL; a = a->next) - for (Action *b = a->next; b != NULL; b = b->next) + for (Action *b = a->next; b != NULL;) if (Overlap(a, b)) { - SplitActions(state, a, b); + //because an action can be deleted in SplitActions we need two pointers + Action *c = b; b = b->next; + SplitActions(state, a, c); changed = true; - } + } else b = b->next; return changed; } diff --git a/src/DFA.h b/src/DFA.h index 20864fa..b59e3d4 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -87,7 +87,7 @@ class DFA void ConvertToStates(Node *p, Symbol *sym); // match string against current automaton; store it either as a fixedToken or as a litToken void MatchLiteral(wchar_t* s, Symbol *sym); - void SplitActions(State *state, Action *a, Action *b); + bool SplitActions(State *state, Action *a, Action *b); bool Overlap(const Action *a, const Action *b); bool MakeUnique(State *state); // return true if actions were split void MeltStates(State *state); diff --git a/src/State.cpp b/src/State.cpp index d80b610..09111f9 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -66,6 +66,7 @@ bool State::DetachAction(Action *act) { else { lasta->next = a->next; } + a->next = NULL; delete a; return true; } return false; From 92f46df2606b910370a27bf095fec9866b47d0ab Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 11:35:04 +0200 Subject: [PATCH 55/95] Fix memory leak --- src/Symbol.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Symbol.cpp b/src/Symbol.cpp index 9364ec7..4869b57 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -61,6 +61,7 @@ Symbol::~Symbol() { delete this->follow; delete this->nts; delete this->semPos; + delete this->attrPos; } }; // namespace From 87aed462fa738a4645bc341a2e2f9d330f601df2 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 12:18:23 +0200 Subject: [PATCH 56/95] Add filename to error messages based on https://github.com/cviehb/CocoR-CPP/commit/ed2e153982bcaa4b673aa2930581a4270db2ea05 --- src/Coco.atg | 4 ++-- src/Coco.cpp | 4 ++-- src/DFA.cpp | 2 +- src/Parser.cpp | 19 +++++++++++-------- src/Parser.frame | 20 ++++++++++++-------- src/Parser.h | 5 +++-- src/ParserGen.cpp | 2 +- src/Scanner.cpp | 10 ++++++---- src/Scanner.frame | 15 +++++++++++---- src/Scanner.h | 5 +++++ src/Tab.cpp | 2 +- 11 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 31ad7a5..9f651f8 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -189,7 +189,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); - if (errors.count == 0) { + if (errors->count == 0) { wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); @@ -317,7 +317,7 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ == Node::t) errors.Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(_SC("semantic action not allowed here")); .) + [ SemText<.sym->semPos.> (. if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(_SC("semantic action not allowed here")); .) ] . diff --git a/src/Coco.cpp b/src/Coco.cpp index 794bc8c..5139e18 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -136,8 +136,8 @@ int main(int argc, char *argv_[]) { coco_string_delete(file); coco_string_delete(srcDir); - wprintf(_SC("%d errors detected\n"), parser.errors.count); - if (parser.errors.count != 0) { + wprintf(_SC("%d errors detected\n"), parser.errors->count); + if (parser.errors->count != 0) { exit(1); } diff --git a/src/DFA.cpp b/src/DFA.cpp index f94b0fb..cc4e035 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -892,7 +892,7 @@ void DFA::WriteScanner() { DFA::DFA(Parser *parser) { this->parser = parser; tab = parser->tab; - errors = &parser->errors; + errors = parser->errors; trace = parser->trace; firstState = NULL; lastState = NULL; lastStateNr = -1; firstState = NewState(); diff --git a/src/Parser.cpp b/src/Parser.cpp index c2487ec..3a54d80 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -59,12 +59,12 @@ void Parser::AstPopNonTerminal() { #endif void Parser::SynErr(int n) { - if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { - if (errDist >= minErrDist) errors.Error(t->line, t->col, msg); + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); errDist = 0; } @@ -300,7 +300,7 @@ void Parser::Coco() { tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); - if (errors.count == 0) { + if (errors->count == 0) { wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); @@ -409,7 +409,7 @@ void Parser::TokenDecl(int typ) { } else SynErr(45); if (la->kind == 40 /* "(." */) { SemText(sym->semPos); - if (typ == Node::t) errors.Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); + if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -1160,6 +1160,7 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); } bool Parser::StartOf(int s) { @@ -1198,6 +1199,7 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete dummyToken; + delete errors; #ifdef PARSER_WITH_AST delete ast_root; #endif @@ -1208,8 +1210,9 @@ Parser::~Parser() { #endif } -Errors::Errors() { +Errors::Errors(const char * FileName) { count = 0; + file = FileName; } void Errors::SynErr(int line, int col, int n) { @@ -1278,17 +1281,17 @@ void Errors::SynErr(int line, int col, int n) { } break; } - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); } void Errors::Warning(const wchar_t *s) { diff --git a/src/Parser.frame b/src/Parser.frame index 13fe599..6cb970f 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -59,8 +59,9 @@ struct SynTree { class Errors { public: int count; // number of errors detected + const char * file; - Errors(); + Errors(const char * FileName); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); @@ -85,7 +86,7 @@ private: public: Scanner *scanner; - Errors errors; + Errors *errors; Token *t; // last recognized token Token *la; // lookahead token @@ -152,12 +153,12 @@ void Parser::AstPopNonTerminal() { #endif void Parser::SynErr(int n) { - if (errDist >= minErrDist) errors.SynErr(la->line, la->col, n); + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { - if (errDist >= minErrDist) errors.Error(t->line, t->col, msg); + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); errDist = 0; } @@ -312,6 +313,7 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); } bool Parser::StartOf(int s) { @@ -326,6 +328,7 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete dummyToken; + delete errors; #ifdef PARSER_WITH_AST delete ast_root; #endif @@ -336,8 +339,9 @@ Parser::~Parser() { #endif } -Errors::Errors() { +Errors::Errors(const char * FileName) { count = 0; + file = FileName; } void Errors::SynErr(int line, int col, int n) { @@ -353,17 +357,17 @@ void Errors::SynErr(int line, int col, int n) { } break; } - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(_SC("-- line %d col %d: %") _SFMT _SC("\n"), line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); } void Errors::Warning(const wchar_t *s) { diff --git a/src/Parser.h b/src/Parser.h index 02b8b98..5d3ca7c 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -59,8 +59,9 @@ struct SynTree { class Errors { public: int count; // number of errors detected + const char * file; - Errors(); + Errors(const char * FileName); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); @@ -118,7 +119,7 @@ class Parser { public: Scanner *scanner; - Errors errors; + Errors *errors; Token *t; // last recognized token Token *la; // lookahead token diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index b739f44..ccb9f77 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -509,7 +509,7 @@ ParserGen::ParserGen (Parser *parser) { altErr = 1; syncErr = 2; tab = parser->tab; - errors = &parser->errors; + errors = parser->errors; trace = parser->trace; buffer = parser->scanner->buffer; errorNr = -1; diff --git a/src/Scanner.cpp b/src/Scanner.cpp index d84ff5f..b1e2cd5 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -445,23 +445,24 @@ int UTF8Buffer::Read() { Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); + parseFileName = NULL; Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; - char *chFileName = coco_string_create_char(fileName); - if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), fileName); + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); exit(1); } - coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); + parseFileName = NULL; Init(); } @@ -475,6 +476,7 @@ Scanner::~Scanner() { } delete [] tval; delete buffer; + if(parseFileName) coco_string_delete(parseFileName); } void Scanner::Init() { diff --git a/src/Scanner.frame b/src/Scanner.frame index 4c28dca..487ed29 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -396,6 +396,8 @@ private: int col; // column number of current character int oldEols; // EOLs that appeared in a comment; + char *parseFileName; + void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); @@ -417,6 +419,9 @@ public: Token* Scan(); Token* Peek(); void ResetPeek(); + const char *GetParserFileName() { + return parseFileName ? parseFileName : "unknown"; + }; }; // end Scanner @@ -849,23 +854,24 @@ int UTF8Buffer::Read() { Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); + parseFileName = NULL; Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; - char *chFileName = coco_string_create_char(fileName); - if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), fileName); + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); exit(1); } - coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); + parseFileName = NULL; Init(); } @@ -879,6 +885,7 @@ Scanner::~Scanner() { } delete [] tval; delete buffer; + if(parseFileName) coco_string_delete(parseFileName); } void Scanner::Init() { diff --git a/src/Scanner.h b/src/Scanner.h index 7c96306..1355f82 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -392,6 +392,8 @@ class Scanner { int col; // column number of current character int oldEols; // EOLs that appeared in a comment; + char *parseFileName; + void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); @@ -415,6 +417,9 @@ class Scanner { Token* Scan(); Token* Peek(); void ResetPeek(); + const char *GetParserFileName() { + return parseFileName ? parseFileName : "unknown"; + }; }; // end Scanner diff --git a/src/Tab.cpp b/src/Tab.cpp index d362590..3862878 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -47,7 +47,7 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; - errors = &parser->errors; + errors = parser->errors; eofSy = NewSym(Node::t, _SC("EOF"), 0, 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); checkEOF = true; From 987595cbca4fe1ec2bd30b3c7b0bd02189d03366 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 13:35:38 +0200 Subject: [PATCH 57/95] Put braces around token declaration demantic actions --- src/DFA.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index cc4e035..0de8c46 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -758,7 +758,11 @@ void DFA::WriteState(const State *state) { } } else { fputws(_SC("loopState = false;"), gen); - if(endOf->semPos && endOf->typ == Node::t) CopySourcePart(endOf->semPos, 0); + if(endOf->semPos && endOf->typ == Node::t) { + fputws(_SC(" {"), gen); + CopySourcePart(endOf->semPos, 0); + fputws(_SC("}"), gen); + } fputws(_SC(" break;}\n"), gen); } } From ea0ff029c00d09ee9852af87a4be26bd32ea6451 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 15:18:10 +0200 Subject: [PATCH 58/95] Add stub code to allow build CocoR parsers without dependency on libstdc++ --- src/Makefile | 2 +- src/Parser.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Parser.frame | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index f70959d..e3f9f22 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ all: - g++ -g -Wall *.cpp -o Coco $(CFLAGS) + g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o Coco $(CFLAGS) clean: rm -f Coco diff --git a/src/Parser.cpp b/src/Parser.cpp index 3a54d80..3384af9 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1357,3 +1357,54 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } // namespace + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/Parser.frame b/src/Parser.frame index 6cb970f..72a5fe1 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -432,3 +432,54 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { #endif -->namespace_close + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB From 110c3903e22794c85d3ef255582644c0604b7395 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 15:37:47 +0200 Subject: [PATCH 59/95] Fix to cross compile on linux with mingw64 compiler --- src/Coco.cpp | 2 +- src/Makefile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Coco.cpp b/src/Coco.cpp index 5139e18..66bc4ca 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -49,7 +49,7 @@ Coco/R itself) does not fall under the GNU General Public License. using namespace Coco; -#ifdef _WIN32 +#if defined(_WIN32) && !defined(__MINGW32__) int wmain(int argc, wchar_t *argv[]) { #elif defined __GNUC__ int main(int argc, char *argv_[]) { diff --git a/src/Makefile b/src/Makefile index e3f9f22..ed41589 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,6 @@ all: g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o Coco $(CFLAGS) + #x86_64-w64-mingw32-g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o Coco.exe $(CFLAGS) clean: rm -f Coco From 9e5a93215a005bd7f2f1da6722b8e07b21d3149a Mon Sep 17 00:00:00 2001 From: mingodad Date: Sun, 6 Jun 2021 15:54:26 +0200 Subject: [PATCH 60/95] Start playing with compiling CocoR-CPP to wasm --- src/build-wasm.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/build-wasm.sh diff --git a/src/build-wasm.sh b/src/build-wasm.sh new file mode 100644 index 0000000..0981c68 --- /dev/null +++ b/src/build-wasm.sh @@ -0,0 +1,4 @@ +#!/bin/sh +#emsdk-env +em++ -Wall -O2 -m32 -fno-rtti -fno-exceptions *.cpp -o coco-release-emscripten-32.bc +[ -e coco-release-emscripten-32.bc ] && emcc coco-release-emscripten-32.bc -o coco-wasm.html From 72d60354045bbc960022920fd72018523dece77c Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 8 Jun 2021 12:46:42 +0200 Subject: [PATCH 61/95] Implement the generation of an EBNF grammar understood by https://www.bottlecaps.de/rr/ui to generate railroad diagrams --- src/Coco.atg | 3 ++ src/Coco.cpp | 5 ++- src/HashTable.cpp | 5 +++ src/HashTable.h | 2 ++ src/Parser.cpp | 3 ++ src/ParserGen.cpp | 91 +++++++++++++++++++++++++++++++++++++++++++++++ src/ParserGen.h | 2 ++ src/Tab.cpp | 5 +-- src/Tab.h | 27 +++++++------- 9 files changed, 127 insertions(+), 16 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 9f651f8..d358af3 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -199,6 +199,9 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra tab->GrammarCheckAll(); } else doGenCode = tab->GrammarOk(); + if(tab->genRREBNF && doGenCode) { + pgen->WriteRREBNF(); + } if (doGenCode) { wprintf(_SC("parser")); pgen->WriteParser(); diff --git a/src/Coco.cpp b/src/Coco.cpp index 66bc4ca..b8e5497 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -66,7 +66,7 @@ int main(int argc, char *argv_[]) { wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; char *chTrFileName = NULL; - bool emitLines = false, ignoreGammarErrors = false; + bool emitLines = false, ignoreGammarErrors = false, genRREBNF = false; for (int i = 1; i < argc; i++) { if (coco_string_equal(argv[i], _SC("-namespace")) && i < argc - 1) nsName = coco_string_create(argv[++i]); @@ -74,6 +74,7 @@ int main(int argc, char *argv_[]) { else if (coco_string_equal(argv[i], _SC("-trace")) && i < argc - 1) ddtString = coco_string_create(argv[++i]); else if (coco_string_equal(argv[i], _SC("-o")) && i < argc - 1) outDir = coco_string_create_append(argv[++i], _SC("/")); else if (coco_string_equal(argv[i], _SC("-lines"))) emitLines = true; + else if (coco_string_equal(argv[i], _SC("-genRREBNF"))) genRREBNF = true; else if (coco_string_equal(argv[i], _SC("-ignoreGammarErrors"))) ignoreGammarErrors = true; else srcName = coco_string_create(argv[i]); } @@ -109,6 +110,7 @@ int main(int argc, char *argv_[]) { tab.frameDir = coco_string_create(frameDir); tab.outDir = coco_string_create(outDir != NULL ? outDir : srcDir); tab.emitLines = emitLines; + tab.genRREBNF = genRREBNF; parser.ignoreGammarErrors = ignoreGammarErrors; if (ddtString != NULL) tab.SetDDT(ddtString); parser.tab = &tab; @@ -150,6 +152,7 @@ int main(int argc, char *argv_[]) { " -trace \n" " -o \n" " -lines\n" + " -genRREBNF\n" " -ignoreGammarErrors\n" "Valid characters in the trace string:\n" " A trace automaton\n" diff --git a/src/HashTable.cpp b/src/HashTable.cpp index f49f046..5e7b551 100644 --- a/src/HashTable.cpp +++ b/src/HashTable.cpp @@ -95,6 +95,11 @@ HashTable::Iter::Iter(HashTable *ht) { this->cur = NULL; } +void HashTable::Iter::Reset() { + this->pos = 0; + this->cur = NULL; +} + bool HashTable::Iter::HasNext() { while (cur == NULL && pos < ht->size) { cur = ht->data[pos]; diff --git a/src/HashTable.h b/src/HashTable.h index 454c70e..8d4e049 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -43,6 +43,7 @@ class Iterator { public: virtual ~Iterator() {}; virtual bool HasNext() = 0; + virtual void Reset() = 0; virtual DictionaryEntry* Next() = 0; }; @@ -72,6 +73,7 @@ class HashTable public: Iter(HashTable *ht); virtual bool HasNext(); + virtual void Reset(); virtual DictionaryEntry* Next(); }; diff --git a/src/Parser.cpp b/src/Parser.cpp index 3384af9..90ed8d6 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -310,6 +310,9 @@ void Parser::Coco() { tab->GrammarCheckAll(); } else doGenCode = tab->GrammarOk(); + if(tab->genRREBNF && doGenCode) { + pgen->WriteRREBNF(); + } if (doGenCode) { wprintf(_SC("parser")); pgen->WriteParser(); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index ccb9f77..9471868 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -429,6 +429,97 @@ void ParserGen::InitSets() { fputws(_SC("\t};\n\n"), gen); } +int ParserGen::GenCodeRREBNF (const Node *p) { + int rc = 0; + const Node *p2; + while (p != NULL) { + switch (p->typ) { + case Node::nt: + case Node::t: { + fputws(p->sym->name, gen); + fputws(_SC(" "), gen); + ++rc; + break; + } + case Node::wt: { + break; + } + case Node::any: { + break; + } + case Node::eps: break; // nothing + case Node::rslv: break; // nothing + case Node::sem: { + break; + } + case Node::sync: { + break; + } + case Node::alt: { + fputws(_SC("( "), gen); + p2 = p; + while (p2 != NULL) { + rc += GenCodeRREBNF(p2->sub); + p2 = p2->down; + if(p2 != NULL) fputws(_SC("| "), gen); + } + fputws(_SC(") "), gen); + break; + } + case Node::iter: { + fputws(_SC("( "), gen); + rc += GenCodeRREBNF(p->sub); + fputws(_SC(")* "), gen); + break; + } + case Node::opt: + fputws(_SC("( "), gen); + rc += GenCodeRREBNF(p->sub); + fputws(_SC(")? "), gen); + break; + } + if (p->up) break; + p = p->next; + } + return rc; +} + +void ParserGen::WriteRREBNF () { + Symbol *sym; + Generator g(tab, errors); + gen = g.OpenGen("Parser.ebnf"); + + fwprintf(gen, _SC("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n")); + fwprintf(gen, _SC("\n//\n// productions\n//\n\n")); + for (int i=0; inonterminals.Count; i++) { + sym = tab->nonterminals[i]; + fwprintf(gen, _SC("%s ::= "), sym->name); + if(GenCodeRREBNF(sym->graph) == 0) { + fputws(_SC("\"\?\?()\?\?\""), gen); + } + fputws(_SC("\n"), gen); + } + fwprintf(gen, _SC("\n//\n// tokens\n//\n\n")); + Iterator *iter = tab->literals.GetIterator(); + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; + if (isalpha(sym->name[0])) { + iter->Reset(); + while (iter->HasNext()) { + DictionaryEntry *e = iter->Next(); + if (e->val == sym) { + fwprintf(gen, _SC("%s ::= %s\n"), sym->name, e->key); + break; + } + } + } else { + //fwprintf(gen, _SC("%d /* %s */"), sym->n, sym->name)); + } + } + delete iter; + fclose(gen); +} + void ParserGen::WriteParser () { Generator g(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart diff --git a/src/ParserGen.h b/src/ParserGen.h index e1e82ab..99d189c 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -86,6 +86,8 @@ class ParserGen void GenProductionsHeader(); void InitSets(); void OpenGen(const wchar_t* genName, bool backUp); + int GenCodeRREBNF(const Node *p); + void WriteRREBNF(); void WriteParser(); void WriteStatistics(); void WriteSymbolOrCode(FILE *gen, const Symbol *sym); diff --git a/src/Tab.cpp b/src/Tab.cpp index 3862878..ba72e2c 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -51,8 +51,9 @@ Tab::Tab(Parser *parser) { eofSy = NewSym(Node::t, _SC("EOF"), 0, 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); checkEOF = true; - visited = allSyncSets = NULL; - srcName = srcDir = nsName = frameDir = outDir = NULL; + visited = allSyncSets = NULL; + srcName = srcDir = nsName = frameDir = outDir = NULL; + genRREBNF = false; } Tab::~Tab() { diff --git a/src/Tab.h b/src/Tab.h index 517fc8e..8d6016f 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -51,26 +51,27 @@ class Tab { public: Position *semDeclPos; // position of global semantic declarations CharSet *ignored; // characters ignored by the scanner - bool ddt[10]; // debug and test switches + bool ddt[10]; // debug and test switches + bool genRREBNF; //generate EBNF for railroad diagram Symbol *gramSy; // root nonterminal; filled by ATG Symbol *eofSy; // end of file symbol Symbol *noSym; // used in case of an error BitArray *allSyncSets; // union of all synchronisation sets - HashTable literals; // symbols that are used as literals + HashTable literals; // symbols that are used as literals - wchar_t* srcName; // name of the atg file (including path) - wchar_t* srcDir; // directory path of the atg file - wchar_t* nsName; // namespace for generated files - wchar_t* frameDir; // directory containing the frame files - wchar_t* outDir; // directory for generated files - bool checkEOF; // should coco generate a check for EOF at - // the end of Parser.Parse(): - bool emitLines; // emit line directives in generated parser + wchar_t* srcName; // name of the atg file (including path) + wchar_t* srcDir; // directory path of the atg file + wchar_t* nsName; // namespace for generated files + wchar_t* frameDir; // directory containing the frame files + wchar_t* outDir; // directory for generated files + bool checkEOF; // should coco generate a check for EOF at + // the end of Parser.Parse(): + bool emitLines; // emit line directives in generated parser - BitArray *visited; // mark list for graph traversals - Symbol *curSy; // current symbol in computation of sets + BitArray *visited; // mark list for graph traversals + Symbol *curSy; // current symbol in computation of sets - Parser *parser; // other Coco objects + Parser *parser; // other Coco objects FILE* trace; Errors *errors; From b8c95fef0c31f8d522709cabf38d3226ff12906f Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 8 Jun 2021 12:47:31 +0200 Subject: [PATCH 62/95] Small code change without functionality change --- src/Scanner.cpp | 2 +- src/Scanner.frame | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Scanner.cpp b/src/Scanner.cpp index b1e2cd5..ec52161 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -682,7 +682,7 @@ void Scanner::AppendVal(Token *t) { } Token* Scanner::NextToken() { - while(true) { + for(;;) { while (ch == _SC(' ') || (ch >= 9 && ch <= 10) || ch == 13 ) NextCh(); diff --git a/src/Scanner.frame b/src/Scanner.frame index 487ed29..c1d2543 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -1005,7 +1005,7 @@ void Scanner::AppendVal(Token *t) { } Token* Scanner::NextToken() { - while(true) { + for(;;) { while (ch == _SC(' ') || -->scan1 ) NextCh(); From 672e3c2c041389c96cd2d2b000509a574c27fb29 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 08:30:18 +0200 Subject: [PATCH 63/95] Add missing Taste.cpp and fixes for latest changes --- src/Taste/Makefile | 5 ++++- src/Taste/SymbolTable.cpp | 2 +- src/Taste/Taste.atg | 2 +- src/Taste/Taste.cpp | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 src/Taste/Taste.cpp diff --git a/src/Taste/Makefile b/src/Taste/Makefile index ee13e77..9ef8f43 100644 --- a/src/Taste/Makefile +++ b/src/Taste/Makefile @@ -1,6 +1,9 @@ -all: +all: Parser.cpp g++ -g *.cpp -o Taste $(CFLAGS) +Parser.cpp: Taste.atg + ../Coco -frames .. Taste.atg + clean: rm -f Taste diff --git a/src/Taste/SymbolTable.cpp b/src/Taste/SymbolTable.cpp index e7ec054..ee5bca5 100644 --- a/src/Taste/SymbolTable.cpp +++ b/src/Taste/SymbolTable.cpp @@ -4,7 +4,7 @@ namespace Taste { SymbolTable::SymbolTable(Parser *parser): undef(0), integer(1), boolean(2), var(0), proc(1), scope(2) { - errors = &parser->errors; + errors = parser->errors; topScope = NULL; curLevel = -1; undefObj = new Obj(); diff --git a/src/Taste/Taste.atg b/src/Taste/Taste.atg index 9e14c20..2b671b8 100644 --- a/src/Taste/Taste.atg +++ b/src/Taste/Taste.atg @@ -25,7 +25,7 @@ COMPILER Taste CodeGenerator *gen; void Err(const wchar_t* msg) { - errors.Error(la->line, la->col, msg); + errors->Error(la->line, la->col, msg); } void InitDeclarations() { // it must exist diff --git a/src/Taste/Taste.cpp b/src/Taste/Taste.cpp new file mode 100644 index 0000000..5686bb5 --- /dev/null +++ b/src/Taste/Taste.cpp @@ -0,0 +1,35 @@ +#include + +#include "SymbolTable.h" + +#include "Scanner.h" +#include "Parser.h" +#include + +using namespace Taste; + +int main (int argc, char *argv[]) { + + if (argc == 2) { + wchar_t *fileName = coco_string_create(argv[1]); + Taste::Scanner *scanner = new Taste::Scanner(fileName); + Taste::Parser *parser = new Taste::Parser(scanner); + parser->tab = new Taste::SymbolTable(parser); + parser->gen = new Taste::CodeGenerator(); + parser->Parse(); + if (parser->errors->count == 0) { + parser->gen->Decode(); + parser->gen->Interpret("Taste.IN"); + } + + coco_string_delete(fileName); + delete parser->gen; + delete parser->tab; + delete parser; + delete scanner; + } else + printf("-- No source file specified\n"); + + return 0; + +} From f6cb7b2a212319bc8a6fd6c9a8be9676a6bea247 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 09:43:20 +0200 Subject: [PATCH 64/95] Remove unnecessary 'while' loop because it's using 'goto' to loop inside the 'switch' statement --- src/DFA.cpp | 5 ++- src/Scanner.cpp | 77 +++++++++++++++++++++++------------------------ src/Scanner.frame | 23 ++++++-------- 3 files changed, 49 insertions(+), 56 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 0de8c46..af748c3 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -752,12 +752,11 @@ void DFA::WriteState(const State *state) { fwprintf(gen, _SC("t->kind = %d /* %") _SFMT _SC(" */; "), endOf->n, endOf->name); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); loopState = false; break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); break;}\n"); } else { - fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); break;}\n"); } } else { - fputws(_SC("loopState = false;"), gen); if(endOf->semPos && endOf->typ == Node::t) { fputws(_SC(" {"), gen); CopySourcePart(endOf->semPos, 0); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index ec52161..9b752d2 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -697,34 +697,32 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - bool loopState = true; - while(loopState) { - switch (state) { - case -1: { t->kind = eofSym; loopState = false; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; loopState = false; break; - } // NextCh already done + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done case 1: case_1: recEnd = pos; recKind = 1 /* ident */; if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} - else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); loopState = false; break;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} case 2: case_2: recEnd = pos; recKind = 2 /* number */; if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_2;} - else {t->kind = 2 /* number */; loopState = false; break;} + else {t->kind = 2 /* number */; break;} case 3: case_3: - {t->kind = 3 /* string */; loopState = false; break;} + {t->kind = 3 /* string */; break;} case 4: case_4: - {t->kind = 4 /* badString */; loopState = false; break;} + {t->kind = 4 /* badString */; break;} case 5: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('&')) || (ch >= _SC('(') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} @@ -744,17 +742,17 @@ Token* Scanner::NextToken() { else {goto case_0;} case 9: case_9: - {t->kind = 5 /* char */; loopState = false; break;} + {t->kind = 5 /* char */; break;} case 10: case_10: recEnd = pos; recKind = 43 /* ddtSym */; if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_10;} - else {t->kind = 43 /* ddtSym */; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; break;} case 11: case_11: recEnd = pos; recKind = 44 /* optionSym */; if ((ch >= _SC('-') && ch <= _SC('.')) || (ch >= _SC('0') && ch <= _SC(':')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_11;} - else {t->kind = 44 /* optionSym */; loopState = false; break;} + else {t->kind = 44 /* optionSym */; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_12;} @@ -766,7 +764,7 @@ Token* Scanner::NextToken() { recEnd = pos; recKind = 43 /* ddtSym */; if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} - else {t->kind = 43 /* ddtSym */; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; break;} case 14: case_14: if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_12;} @@ -777,58 +775,57 @@ Token* Scanner::NextToken() { if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} else if (ch == _SC('=')) {AddCh(); goto case_11;} - else {t->kind = 43 /* ddtSym */; loopState = false; break;} + else {t->kind = 43 /* ddtSym */; break;} case 16: - {t->kind = 18 /* "=" */; loopState = false; break;} + {t->kind = 18 /* "=" */; break;} case 17: - {t->kind = 21 /* "+" */; loopState = false; break;} + {t->kind = 21 /* "+" */; break;} case 18: - {t->kind = 22 /* "-" */; loopState = false; break;} + {t->kind = 22 /* "-" */; break;} case 19: case_19: - {t->kind = 23 /* ".." */; loopState = false; break;} + {t->kind = 23 /* ".." */; break;} case 20: - {t->kind = 26 /* ">" */; loopState = false; break;} + {t->kind = 26 /* ">" */; break;} case 21: case_21: - {t->kind = 27 /* "<." */; loopState = false; break;} + {t->kind = 27 /* "<." */; break;} case 22: case_22: - {t->kind = 28 /* ".>" */; loopState = false; break;} + {t->kind = 28 /* ".>" */; break;} case 23: - {t->kind = 29 /* "|" */; loopState = false; break;} + {t->kind = 29 /* "|" */; break;} case 24: - {t->kind = 32 /* ")" */; loopState = false; break;} + {t->kind = 32 /* ")" */; break;} case 25: - {t->kind = 33 /* "[" */; loopState = false; break;} + {t->kind = 33 /* "[" */; break;} case 26: - {t->kind = 34 /* "]" */; loopState = false; break;} + {t->kind = 34 /* "]" */; break;} case 27: - {t->kind = 35 /* "{" */; loopState = false; break;} + {t->kind = 35 /* "{" */; break;} case 28: - {t->kind = 36 /* "}" */; loopState = false; break;} + {t->kind = 36 /* "}" */; break;} case 29: case_29: - {t->kind = 40 /* "(." */; loopState = false; break;} + {t->kind = 40 /* "(." */; break;} case 30: case_30: - {t->kind = 41 /* ".)" */; loopState = false; break;} + {t->kind = 41 /* ".)" */; break;} case 31: recEnd = pos; recKind = 19 /* "." */; if (ch == _SC('.')) {AddCh(); goto case_19;} else if (ch == _SC('>')) {AddCh(); goto case_22;} else if (ch == _SC(')')) {AddCh(); goto case_30;} - else {t->kind = 19 /* "." */; loopState = false; break;} + else {t->kind = 19 /* "." */; break;} case 32: recEnd = pos; recKind = 25 /* "<" */; if (ch == _SC('.')) {AddCh(); goto case_21;} - else {t->kind = 25 /* "<" */; loopState = false; break;} + else {t->kind = 25 /* "<" */; break;} case 33: recEnd = pos; recKind = 31 /* "(" */; if (ch == _SC('.')) {AddCh(); goto case_29;} - else {t->kind = 31 /* "(" */; loopState = false; break;} + else {t->kind = 31 /* "(" */; break;} - } } AppendVal(t); return t; diff --git a/src/Scanner.frame b/src/Scanner.frame index c1d2543..519d72e 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -1020,20 +1020,17 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - bool loopState = true; - while(loopState) { - switch (state) { - case -1: { t->kind = eofSym; loopState = false; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; loopState = false; break; - } // NextCh already done + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done -->scan3 - } } AppendVal(t); return t; From 25ec5368eab4b26892e2fde7850d0e8f24b4fbd2 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 16:26:39 +0200 Subject: [PATCH 65/95] Add 'ANY' when generating RREBNF --- src/ParserGen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 9471868..4777c57 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -445,6 +445,7 @@ int ParserGen::GenCodeRREBNF (const Node *p) { break; } case Node::any: { + fputws(_SC("ANY "), gen); break; } case Node::eps: break; // nothing From 60beabb2100f3eb6fca029fb377408a141a5b240 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 08:59:11 +0200 Subject: [PATCH 66/95] Reorganize the code removing duplication --- src/ParserGen.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 4777c57..4e4ef3a 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -194,16 +194,13 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) { fputws(_SC("Get();\n"), gen); - //copy and pasted bellow - fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } else { fputws(_SC("Expect("), gen); WriteSymbolOrCode(gen, p->sym); fputws(_SC(");\n"), gen); - //copy and pasted from above - fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } + fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); } if (p->typ == Node::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); From f16cbd178a05893d0d119a4a5cf6470b3e614914 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 08:59:26 +0200 Subject: [PATCH 67/95] Remove unused include --- src/Taste/Taste.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Taste/Taste.cpp b/src/Taste/Taste.cpp index 5686bb5..ff36370 100644 --- a/src/Taste/Taste.cpp +++ b/src/Taste/Taste.cpp @@ -4,7 +4,6 @@ #include "Scanner.h" #include "Parser.h" -#include using namespace Taste; From d0283150d368dc53873c0522d4e8178434041014 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 11:07:02 +0200 Subject: [PATCH 68/95] Finally the last known memory leak is fixed --- src/DFA.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index af748c3..963727e 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -362,7 +362,8 @@ void DFA::MeltStates(State *state) { do {changed = MakeUnique(s);} while (changed); melt = NewMelted(targets, s); } - else delete targets; + else delete targets; + delete action->target->next; action->target->next = NULL; action->target->state = melt->state; } From 8e9f19f54063fe5618cf5f8f8790dde42981a5de Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 12:09:32 +0200 Subject: [PATCH 69/95] Fix narrow signed char conversion when 'wcahr_t' == 'char' --- src/Tab.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index ba72e2c..1b5e694 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -424,7 +424,7 @@ CharSet* Tab::CharClassSet(int i) { //----------- character class printing -wchar_t* TabCh(const wchar_t ch, wchar_t_10 &format) { +wchar_t* TabCh(const int ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) { coco_swprintf(format, wchar_t_10_sz, _SC("%d"), ch); return format; From 010462ed6682e8320675c23190ee0262457410fe Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 12:21:15 +0200 Subject: [PATCH 70/95] Add the TestSuite --- src/TestSuite/TestAlts.ATG | 21 + src/TestSuite/TestAlts_Output.txt | 5 + src/TestSuite/TestAlts_Parser.cpp | 460 +++++++++++ src/TestSuite/TestAlts_Scanner.cpp | 684 ++++++++++++++++ src/TestSuite/TestAlts_Trace.txt | 78 ++ src/TestSuite/TestAlts_output.txt | 5 + src/TestSuite/TestAny.ATG | 26 + src/TestSuite/TestAny1.ATG | 26 + src/TestSuite/TestAny1_Output.txt | 9 + src/TestSuite/TestAny1_Parser.cpp | 481 +++++++++++ src/TestSuite/TestAny1_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestAny1_Trace.txt | 117 +++ src/TestSuite/TestAny1_output.txt | 9 + src/TestSuite/TestAny_Output.txt | 5 + src/TestSuite/TestAny_Parser.cpp | 518 ++++++++++++ src/TestSuite/TestAny_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestAny_Trace.txt | 120 +++ src/TestSuite/TestAny_output.txt | 5 + src/TestSuite/TestCasing.ATG | 23 + src/TestSuite/TestCasing_Output.txt | 5 + src/TestSuite/TestCasing_Parser.cpp | 456 +++++++++++ src/TestSuite/TestCasing_Scanner.cpp | 695 ++++++++++++++++ src/TestSuite/TestCasing_Trace.txt | 63 ++ src/TestSuite/TestChars.ATG | 25 + src/TestSuite/TestChars_Output.txt | 5 + src/TestSuite/TestChars_Parser.cpp | 412 ++++++++++ src/TestSuite/TestChars_Scanner.cpp | 674 ++++++++++++++++ src/TestSuite/TestChars_Trace.txt | 44 ++ src/TestSuite/TestChars_output.txt | 5 + src/TestSuite/TestCircular.ATG | 26 + src/TestSuite/TestCircular_Output.txt | 5 + src/TestSuite/TestCircular_Parser.cpp | 298 +++++++ src/TestSuite/TestCircular_Scanner.cpp | 629 +++++++++++++++ src/TestSuite/TestCircular_Trace.txt | 86 ++ src/TestSuite/TestComments.ATG | 22 + src/TestSuite/TestComments_Output.txt | 5 + src/TestSuite/TestComments_Parser.cpp | 412 ++++++++++ src/TestSuite/TestComments_Scanner.cpp | 742 +++++++++++++++++ src/TestSuite/TestComments_Trace.txt | 41 + src/TestSuite/TestComplete.ATG | 25 + src/TestSuite/TestComplete_Output.txt | 5 + src/TestSuite/TestComplete_Parser.cpp | 298 +++++++ src/TestSuite/TestComplete_Scanner.cpp | 629 +++++++++++++++ src/TestSuite/TestComplete_Trace.txt | 78 ++ src/TestSuite/TestDel.ATG | 26 + src/TestSuite/TestDel_Output.txt | 9 + src/TestSuite/TestDel_Parser.cpp | 517 ++++++++++++ src/TestSuite/TestDel_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestDel_Trace.txt | 117 +++ src/TestSuite/TestEps.ATG | 20 + src/TestSuite/TestEps_Output.txt | 5 + src/TestSuite/TestEps_Parser.cpp | 445 +++++++++++ src/TestSuite/TestEps_Scanner.cpp | 684 ++++++++++++++++ src/TestSuite/TestEps_Trace.txt | 72 ++ src/TestSuite/TestEps_output.txt | 5 + src/TestSuite/TestIters.ATG | 22 + src/TestSuite/TestIters_Output.txt | 5 + src/TestSuite/TestIters_Parser.cpp | 468 +++++++++++ src/TestSuite/TestIters_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestIters_Trace.txt | 88 +++ src/TestSuite/TestIters_output.txt | 5 + src/TestSuite/TestLL1.ATG | 41 + src/TestSuite/TestLL1_Output.txt | 41 + src/TestSuite/TestLL1_Parser.cpp | 679 ++++++++++++++++ src/TestSuite/TestLL1_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestLL1_Trace.txt | 195 +++++ src/TestSuite/TestOpts.ATG | 20 + src/TestSuite/TestOpts1.ATG | 19 + src/TestSuite/TestOpts1_Output.txt | 7 + src/TestSuite/TestOpts1_Parser.cpp | 421 ++++++++++ src/TestSuite/TestOpts1_Scanner.cpp | 684 ++++++++++++++++ src/TestSuite/TestOpts1_Trace.txt | 61 ++ src/TestSuite/TestOpts1_output.txt | 7 + src/TestSuite/TestOpts_Output.txt | 8 + src/TestSuite/TestOpts_Parser.cpp | 473 +++++++++++ src/TestSuite/TestOpts_Scanner.cpp | 684 ++++++++++++++++ src/TestSuite/TestOpts_Trace.txt | 82 ++ src/TestSuite/TestOpts_output.txt | 8 + src/TestSuite/TestReached.ATG | 26 + src/TestSuite/TestReached_Output.txt | 5 + src/TestSuite/TestReached_Parser.cpp | 298 +++++++ src/TestSuite/TestReached_Scanner.cpp | 629 +++++++++++++++ src/TestSuite/TestReached_Trace.txt | 85 ++ src/TestSuite/TestResIllegal.ATG | 27 + src/TestSuite/TestResIllegal_Output.txt | 15 + src/TestSuite/TestResIllegal_Parser.cpp | 298 +++++++ src/TestSuite/TestResIllegal_Scanner.cpp | 617 +++++++++++++++ src/TestSuite/TestResIllegal_Trace.txt | 130 +++ src/TestSuite/TestResOK.ATG | 55 ++ src/TestSuite/TestResOK_Output.txt | 7 + src/TestSuite/TestResOK_Parser.cpp | 722 +++++++++++++++++ src/TestSuite/TestResOK_Scanner.cpp | 678 ++++++++++++++++ src/TestSuite/TestResOK_Trace.txt | 213 +++++ src/TestSuite/TestSem.ATG | 34 + src/TestSuite/TestSem_Output.txt | 6 + src/TestSuite/TestSem_Parser.cpp | 498 ++++++++++++ src/TestSuite/TestSem_Scanner.cpp | 700 ++++++++++++++++ src/TestSuite/TestSem_Trace.txt | 115 +++ src/TestSuite/TestSem_output.txt | 6 + src/TestSuite/TestSync.ATG | 23 + src/TestSuite/TestSync_Output.txt | 5 + src/TestSuite/TestSync_Parser.cpp | 466 +++++++++++ src/TestSuite/TestSync_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestSync_Trace.txt | 91 +++ src/TestSuite/TestSync_output.txt | 5 + src/TestSuite/TestTerminalizable.ATG | 26 + src/TestSuite/TestTerminalizable_Output.txt | 8 + src/TestSuite/TestTerminalizable_Parser.cpp | 298 +++++++ src/TestSuite/TestTerminalizable_Scanner.cpp | 629 +++++++++++++++ src/TestSuite/TestTerminalizable_Trace.txt | 80 ++ src/TestSuite/TestTokens.ATG | 28 + src/TestSuite/TestTokens1.ATG | 28 + src/TestSuite/TestTokens1_Output.txt | 9 + src/TestSuite/TestTokens1_Parser.cpp | 259 ++++++ src/TestSuite/TestTokens1_Scanner.cpp | 724 +++++++++++++++++ src/TestSuite/TestTokens1_Trace.txt | 36 + src/TestSuite/TestTokens_Output.txt | 5 + src/TestSuite/TestTokens_Parser.cpp | 454 +++++++++++ src/TestSuite/TestTokens_Scanner.cpp | 788 +++++++++++++++++++ src/TestSuite/TestTokens_Trace.txt | 107 +++ src/TestSuite/TestTokens_output.txt | 5 + src/TestSuite/TestWeak.ATG | 25 + src/TestSuite/TestWeak_Output.txt | 5 + src/TestSuite/TestWeak_Parser.cpp | 483 ++++++++++++ src/TestSuite/TestWeak_Scanner.cpp | 693 ++++++++++++++++ src/TestSuite/TestWeak_Trace.txt | 100 +++ src/TestSuite/TestWeak_output.txt | 5 + src/TestSuite/check.bat | 5 + src/TestSuite/check.sh | 25 + src/TestSuite/checkall.bat | 24 + src/TestSuite/checkall.sh | 24 + src/TestSuite/checkerr.bat | 3 + src/TestSuite/checkerr.sh | 13 + src/TestSuite/compile.bat | 4 + src/TestSuite/compile.sh | 5 + src/TestSuite/compileall.bat | 24 + src/TestSuite/compileall.sh | 24 + src/TestSuite/readme.txt | 36 + src/TestSuite/zipall.bat | 1 + src/TestSuite/zipall.sh | 1 + 140 files changed, 29091 insertions(+) create mode 100644 src/TestSuite/TestAlts.ATG create mode 100644 src/TestSuite/TestAlts_Output.txt create mode 100644 src/TestSuite/TestAlts_Parser.cpp create mode 100644 src/TestSuite/TestAlts_Scanner.cpp create mode 100644 src/TestSuite/TestAlts_Trace.txt create mode 100644 src/TestSuite/TestAlts_output.txt create mode 100644 src/TestSuite/TestAny.ATG create mode 100644 src/TestSuite/TestAny1.ATG create mode 100644 src/TestSuite/TestAny1_Output.txt create mode 100644 src/TestSuite/TestAny1_Parser.cpp create mode 100644 src/TestSuite/TestAny1_Scanner.cpp create mode 100644 src/TestSuite/TestAny1_Trace.txt create mode 100644 src/TestSuite/TestAny1_output.txt create mode 100644 src/TestSuite/TestAny_Output.txt create mode 100644 src/TestSuite/TestAny_Parser.cpp create mode 100644 src/TestSuite/TestAny_Scanner.cpp create mode 100644 src/TestSuite/TestAny_Trace.txt create mode 100644 src/TestSuite/TestAny_output.txt create mode 100644 src/TestSuite/TestCasing.ATG create mode 100644 src/TestSuite/TestCasing_Output.txt create mode 100644 src/TestSuite/TestCasing_Parser.cpp create mode 100644 src/TestSuite/TestCasing_Scanner.cpp create mode 100644 src/TestSuite/TestCasing_Trace.txt create mode 100644 src/TestSuite/TestChars.ATG create mode 100644 src/TestSuite/TestChars_Output.txt create mode 100644 src/TestSuite/TestChars_Parser.cpp create mode 100644 src/TestSuite/TestChars_Scanner.cpp create mode 100644 src/TestSuite/TestChars_Trace.txt create mode 100644 src/TestSuite/TestChars_output.txt create mode 100644 src/TestSuite/TestCircular.ATG create mode 100644 src/TestSuite/TestCircular_Output.txt create mode 100644 src/TestSuite/TestCircular_Parser.cpp create mode 100644 src/TestSuite/TestCircular_Scanner.cpp create mode 100644 src/TestSuite/TestCircular_Trace.txt create mode 100644 src/TestSuite/TestComments.ATG create mode 100644 src/TestSuite/TestComments_Output.txt create mode 100644 src/TestSuite/TestComments_Parser.cpp create mode 100644 src/TestSuite/TestComments_Scanner.cpp create mode 100644 src/TestSuite/TestComments_Trace.txt create mode 100644 src/TestSuite/TestComplete.ATG create mode 100644 src/TestSuite/TestComplete_Output.txt create mode 100644 src/TestSuite/TestComplete_Parser.cpp create mode 100644 src/TestSuite/TestComplete_Scanner.cpp create mode 100644 src/TestSuite/TestComplete_Trace.txt create mode 100644 src/TestSuite/TestDel.ATG create mode 100644 src/TestSuite/TestDel_Output.txt create mode 100644 src/TestSuite/TestDel_Parser.cpp create mode 100644 src/TestSuite/TestDel_Scanner.cpp create mode 100644 src/TestSuite/TestDel_Trace.txt create mode 100644 src/TestSuite/TestEps.ATG create mode 100644 src/TestSuite/TestEps_Output.txt create mode 100644 src/TestSuite/TestEps_Parser.cpp create mode 100644 src/TestSuite/TestEps_Scanner.cpp create mode 100644 src/TestSuite/TestEps_Trace.txt create mode 100644 src/TestSuite/TestEps_output.txt create mode 100644 src/TestSuite/TestIters.ATG create mode 100644 src/TestSuite/TestIters_Output.txt create mode 100644 src/TestSuite/TestIters_Parser.cpp create mode 100644 src/TestSuite/TestIters_Scanner.cpp create mode 100644 src/TestSuite/TestIters_Trace.txt create mode 100644 src/TestSuite/TestIters_output.txt create mode 100644 src/TestSuite/TestLL1.ATG create mode 100644 src/TestSuite/TestLL1_Output.txt create mode 100644 src/TestSuite/TestLL1_Parser.cpp create mode 100644 src/TestSuite/TestLL1_Scanner.cpp create mode 100644 src/TestSuite/TestLL1_Trace.txt create mode 100644 src/TestSuite/TestOpts.ATG create mode 100644 src/TestSuite/TestOpts1.ATG create mode 100644 src/TestSuite/TestOpts1_Output.txt create mode 100644 src/TestSuite/TestOpts1_Parser.cpp create mode 100644 src/TestSuite/TestOpts1_Scanner.cpp create mode 100644 src/TestSuite/TestOpts1_Trace.txt create mode 100644 src/TestSuite/TestOpts1_output.txt create mode 100644 src/TestSuite/TestOpts_Output.txt create mode 100644 src/TestSuite/TestOpts_Parser.cpp create mode 100644 src/TestSuite/TestOpts_Scanner.cpp create mode 100644 src/TestSuite/TestOpts_Trace.txt create mode 100644 src/TestSuite/TestOpts_output.txt create mode 100644 src/TestSuite/TestReached.ATG create mode 100644 src/TestSuite/TestReached_Output.txt create mode 100644 src/TestSuite/TestReached_Parser.cpp create mode 100644 src/TestSuite/TestReached_Scanner.cpp create mode 100644 src/TestSuite/TestReached_Trace.txt create mode 100644 src/TestSuite/TestResIllegal.ATG create mode 100644 src/TestSuite/TestResIllegal_Output.txt create mode 100644 src/TestSuite/TestResIllegal_Parser.cpp create mode 100644 src/TestSuite/TestResIllegal_Scanner.cpp create mode 100644 src/TestSuite/TestResIllegal_Trace.txt create mode 100644 src/TestSuite/TestResOK.ATG create mode 100644 src/TestSuite/TestResOK_Output.txt create mode 100644 src/TestSuite/TestResOK_Parser.cpp create mode 100644 src/TestSuite/TestResOK_Scanner.cpp create mode 100644 src/TestSuite/TestResOK_Trace.txt create mode 100644 src/TestSuite/TestSem.ATG create mode 100644 src/TestSuite/TestSem_Output.txt create mode 100644 src/TestSuite/TestSem_Parser.cpp create mode 100644 src/TestSuite/TestSem_Scanner.cpp create mode 100644 src/TestSuite/TestSem_Trace.txt create mode 100644 src/TestSuite/TestSem_output.txt create mode 100644 src/TestSuite/TestSync.ATG create mode 100644 src/TestSuite/TestSync_Output.txt create mode 100644 src/TestSuite/TestSync_Parser.cpp create mode 100644 src/TestSuite/TestSync_Scanner.cpp create mode 100644 src/TestSuite/TestSync_Trace.txt create mode 100644 src/TestSuite/TestSync_output.txt create mode 100644 src/TestSuite/TestTerminalizable.ATG create mode 100644 src/TestSuite/TestTerminalizable_Output.txt create mode 100644 src/TestSuite/TestTerminalizable_Parser.cpp create mode 100644 src/TestSuite/TestTerminalizable_Scanner.cpp create mode 100644 src/TestSuite/TestTerminalizable_Trace.txt create mode 100644 src/TestSuite/TestTokens.ATG create mode 100644 src/TestSuite/TestTokens1.ATG create mode 100644 src/TestSuite/TestTokens1_Output.txt create mode 100644 src/TestSuite/TestTokens1_Parser.cpp create mode 100644 src/TestSuite/TestTokens1_Scanner.cpp create mode 100644 src/TestSuite/TestTokens1_Trace.txt create mode 100644 src/TestSuite/TestTokens_Output.txt create mode 100644 src/TestSuite/TestTokens_Parser.cpp create mode 100644 src/TestSuite/TestTokens_Scanner.cpp create mode 100644 src/TestSuite/TestTokens_Trace.txt create mode 100644 src/TestSuite/TestTokens_output.txt create mode 100644 src/TestSuite/TestWeak.ATG create mode 100644 src/TestSuite/TestWeak_Output.txt create mode 100644 src/TestSuite/TestWeak_Parser.cpp create mode 100644 src/TestSuite/TestWeak_Scanner.cpp create mode 100644 src/TestSuite/TestWeak_Trace.txt create mode 100644 src/TestSuite/TestWeak_output.txt create mode 100644 src/TestSuite/check.bat create mode 100755 src/TestSuite/check.sh create mode 100644 src/TestSuite/checkall.bat create mode 100755 src/TestSuite/checkall.sh create mode 100644 src/TestSuite/checkerr.bat create mode 100755 src/TestSuite/checkerr.sh create mode 100644 src/TestSuite/compile.bat create mode 100644 src/TestSuite/compile.sh create mode 100644 src/TestSuite/compileall.bat create mode 100644 src/TestSuite/compileall.sh create mode 100644 src/TestSuite/readme.txt create mode 100644 src/TestSuite/zipall.bat create mode 100644 src/TestSuite/zipall.sh diff --git a/src/TestSuite/TestAlts.ATG b/src/TestSuite/TestAlts.ATG new file mode 100644 index 0000000..1b7f47c --- /dev/null +++ b/src/TestSuite/TestAlts.ATG @@ -0,0 +1,21 @@ +$01246 +/*------------------------------------------------------------------------- +Test of alternatives in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = a A b. +A = (a | (b|c|) | d) e. + + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestAlts_Output.txt b/src/TestSuite/TestAlts_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestAlts_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp new file mode 100644 index 0000000..0e887ae --- /dev/null +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -0,0 +1,460 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + A(); + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b || la->kind == _c || la->kind == _e) { + if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _c) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + } + } else if (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid A"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAlts_Scanner.cpp b/src/TestSuite/TestAlts_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestAlts_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAlts_Trace.txt b/src/TestSuite/TestAlts_Trace.txt new file mode 100644 index 0000000..ff9d720 --- /dev/null +++ b/src/TestSuite/TestAlts_Trace.txt @@ -0,0 +1,78 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 2 17 + 2 nt A 3 17 + 3 t b 0 17 + 4 t a -15 18 + 5 t b -15 18 + 6 t c -15 18 + 7 alt -15 8 5 18 + 8 alt -15 10 6 18 + 9 eps -15 0 + 10 alt -15 0 9 0 + 11 alt 15 12 4 18 + 12 alt -15 14 7 18 + 13 t d -15 18 + 14 alt -15 0 13 18 + 15 t e 0 18 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a b c d e +follow: b + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 1 false 17 fixedToken + 1 A nt false 11 false 18 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAlts_output.txt b/src/TestSuite/TestAlts_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestAlts_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny.ATG b/src/TestSuite/TestAny.ATG new file mode 100644 index 0000000..a88f39f --- /dev/null +++ b/src/TestSuite/TestAny.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of ANY symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C D. +A = {a | ANY | b} c. +B = {ANY d} e. +C = [ANY f] g. +D = {ANY} h | i. + +END Test. diff --git a/src/TestSuite/TestAny1.ATG b/src/TestSuite/TestAny1.ATG new file mode 100644 index 0000000..43e3379 --- /dev/null +++ b/src/TestSuite/TestAny1.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of ANY symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C D. +A = (ANY |) ANY. +B = ANY | ANY. +C = {ANY} ANY. +D = [ANY] ANY. + +END Test. diff --git a/src/TestSuite/TestAny1_Output.txt b/src/TestSuite/TestAny1_Output.txt new file mode 100644 index 0000000..acd511c --- /dev/null +++ b/src/TestSuite/TestAny1_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +checking + LL1 warning in A:21:0: an ANY node that matches no symbol + LL1 warning in B:22:0: an ANY node that matches no symbol + LL1 warning in C:23:0: an ANY node that matches no symbol + LL1 warning in D:24:0: an ANY node that matches no symbol +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp new file mode 100644 index 0000000..c907bc3 --- /dev/null +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -0,0 +1,481 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + B(); + C(); + D(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (false) { + SynErr(11); // ANY node that matches no symbol + } else if (StartOf(1 /* eps */)) { + } else SynErr(12); + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + if (false) { + SynErr(13); // ANY node that matches no symbol + } else if (StartOf(1 /* any */)) { + Get(); + } else SynErr(14); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + while (false) { + SynErr(15); // ANY node that matches no symbol + } + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (false) { + SynErr(16); // ANY node that matches no symbol + } + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[2][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {x,T,T,T, T,T,T,T, T,T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid A"); break; + case 13: s = _SC("invalid B"); break; + case 14: s = _SC("invalid B"); break; + case 15: s = _SC("invalid C"); break; + case 16: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAny1_Scanner.cpp b/src/TestSuite/TestAny1_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestAny1_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAny1_Trace.txt b/src/TestSuite/TestAny1_Trace.txt new file mode 100644 index 0000000..139ea8d --- /dev/null +++ b/src/TestSuite/TestAny1_Trace.txt @@ -0,0 +1,117 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 any -9 0 + 6 eps -9 0 + 7 alt 9 8 5 0 + 8 alt -9 0 6 0 + 9 any 0 0 + 10 any 0 0 + 11 any 0 0 + 12 alt 0 13 10 0 + 13 alt 0 0 11 0 + 14 any -15 0 + 15 iter 16 0 14 0 + 16 any 0 0 + 17 any -19 0 + 18 opt 19 0 17 0 + 19 any 0 0 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g h i ??? +follow: EOF + +A +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +B +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +C +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +D +first: a b c d e f g h i ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 5 any -- empty set -- + 9 any a b c d e f g h i ??? + 10 any -- empty set -- + 11 any a b c d e f g h i ??? + 14 any -- empty set -- + 16 any a b c d e f g h i ??? + 17 any -- empty set -- + 19 any a b c d e f g h i ??? + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 7 false 21 fixedToken + 2 B nt false 12 false 22 fixedToken + 3 C nt false 15 false 23 fixedToken + 4 D nt false 18 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAny1_output.txt b/src/TestSuite/TestAny1_output.txt new file mode 100644 index 0000000..55fd063 --- /dev/null +++ b/src/TestSuite/TestAny1_output.txt @@ -0,0 +1,9 @@ +Coco/R (Sep 6, 2007) +checking + LL1 warning in A: an ANY node that matches no symbol + LL1 warning in B: an ANY node that matches no symbol + LL1 warning in C: an ANY node that matches no symbol + LL1 warning in D: an ANY node that matches no symbol +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny_Output.txt b/src/TestSuite/TestAny_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestAny_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp new file mode 100644 index 0000000..22a1bca --- /dev/null +++ b/src/TestSuite/TestAny_Parser.cpp @@ -0,0 +1,518 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + B(); + C(); + D(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + while (StartOf(1 /* alt */)) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (StartOf(2 /* any */)) { + Get(); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (StartOf(3 /* any */)) { + Get(); + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (StartOf(4 /* any */)) { + Get(); + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (StartOf(5 /* iter */)) { + while (StartOf(6 /* any */)) { + Get(); + } + Expect(_h); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _i) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[7][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {x,T,T,x, T,T,T,T, T,T,T,x}, + {x,x,x,x, T,T,T,T, T,T,T,x}, + {x,T,T,T, T,x,T,T, T,T,T,x}, + {x,T,T,T, T,T,T,x, T,T,T,x}, + {x,T,T,T, T,T,T,T, T,x,T,x}, + {x,T,T,T, T,T,T,T, x,x,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAny_Scanner.cpp b/src/TestSuite/TestAny_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestAny_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAny_Trace.txt b/src/TestSuite/TestAny_Trace.txt new file mode 100644 index 0000000..47d6015 --- /dev/null +++ b/src/TestSuite/TestAny_Trace.txt @@ -0,0 +1,120 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 t a -11 21 + 6 any -11 0 + 7 alt -11 8 5 21 + 8 alt -11 10 6 0 + 9 t b -11 21 + 10 alt -11 0 9 21 + 11 iter 12 0 7 0 + 12 t c 0 21 + 13 any 14 0 + 14 t d -15 22 + 15 iter 16 0 13 0 + 16 t e 0 22 + 17 any 18 0 + 18 t f -20 23 + 19 opt 20 0 17 0 + 20 t g 0 23 + 21 any -22 0 + 22 iter 23 0 21 0 + 23 t h 0 24 + 24 t i 0 24 + 25 alt 0 26 22 0 + 26 alt 0 0 24 24 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g h i ??? +follow: EOF + +A +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +B +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +C +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +D +first: a b c d e f g h i ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 6 any d e f g h i ??? + 13 any a b c d f g h i ??? + 17 any a b c d e f h i ??? + 21 any a b c d e f g ??? + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 11 false 21 fixedToken + 2 B nt false 15 false 22 fixedToken + 3 C nt false 19 false 23 fixedToken + 4 D nt false 25 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAny_output.txt b/src/TestSuite/TestAny_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestAny_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCasing.ATG b/src/TestSuite/TestCasing.ATG new file mode 100644 index 0000000..7d66196 --- /dev/null +++ b/src/TestSuite/TestCasing.ATG @@ -0,0 +1,23 @@ +$01246 +/*------------------------------------------------------------------------- +Test of case insensitive scanners. +----------------------------------------------------------------------------*/ +COMPILER Test +IGNORECASE + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + suffix = 'F'. + +TOKENS + ident = letter {letter}. + float1 = digit {digit} suffix. + float2 = digit {digit} 'E' digit {digit}. + A = "AAA". + +PRODUCTIONS + +Test = A "AAA" "AAa" "AaA" "Aaa" "aAA" "aAa" "aaA" "aaa" "BBb" "bbB". + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestCasing_Output.txt b/src/TestSuite/TestCasing_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestCasing_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp new file mode 100644 index 0000000..1d28add --- /dev/null +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -0,0 +1,456 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(5 /* "bbb" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(5 /* "bbb" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 6; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][8] = { + {T,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("float1 expected"); break; + case 3: s = _SC("float2 expected"); break; + case 4: s = _SC("A expected"); break; + case 5: s = _SC("\"bbb\" expected"); break; + case 6: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestCasing_Scanner.cpp b/src/TestSuite/TestCasing_Scanner.cpp new file mode 100644 index 0000000..27dc416 --- /dev/null +++ b/src/TestSuite/TestCasing_Scanner.cpp @@ -0,0 +1,695 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 6; + noSym = 6; + int i; + for (i = 97; i <= 122; ++i) start.set(i, 1); + for (i = 48; i <= 57; ++i) start.set(i, 5); + start.set(Buffer::EoF, -1); + keywords.set(_SC("aaa"), 4); + keywords.set(_SC("bbb"), 5); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + valCh = ch; + if ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower() +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = valCh; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, true); break;} + case 2: + case_2: + {t->kind = 2 /* float1 */; break;} + case 3: + case_3: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else {goto case_0;} + case 4: + case_4: + recEnd = pos; recKind = 3 /* float2 */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else {t->kind = 3 /* float2 */; break;} + case 5: + case_5: + if (ch == _SC('f')) {AddCh(); goto case_2;} + else if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_5;} + else if (ch == _SC('e')) {AddCh(); goto case_3;} + else {goto case_0;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestCasing_Trace.txt b/src/TestSuite/TestCasing_Trace.txt new file mode 100644 index 0000000..4f9c2b7 --- /dev/null +++ b/src/TestSuite/TestCasing_Trace.txt @@ -0,0 +1,63 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t A 2 21 + 2 t A 3 21 + 3 t A 4 21 + 4 t A 5 21 + 5 t A 6 21 + 6 t A 7 21 + 7 t A 8 21 + 8 t A 9 21 + 9 t A 10 21 + 10 t "bbb" 11 21 + 11 t "bbb" 0 21 + + +First & follow symbols: +---------------------- + +Test +first: A +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 + digit 5 +E(ident ) 1: letter 1 +E(float1 ) 2: + 3: digit 4 +E(float2 ) 4: digit 4 + 5: suffix 2 + digit 5 + _SC('e') 3 + +---------- character classes ---------- +letter : 'a' .. 'z' +digit : '0' .. '9' +suffix : 'f' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 14 classLitToken + 2 float1 t false 15 classToken + 3 float2 t false 16 classToken + 4 A t false 17 litToken + 5 "bbb" t false 21 litToken + 6 ??? t false 0 fixedToken + 0 Test nt false 1 false 21 fixedToken + +Literal Tokens: +-------------- +_A = "aaa". + diff --git a/src/TestSuite/TestChars.ATG b/src/TestSuite/TestChars.ATG new file mode 100644 index 0000000..4d1634b --- /dev/null +++ b/src/TestSuite/TestChars.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test of CHARACTERS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + lower = "abcdefghijklmnopqrstuvwxyz". + upper = 'A' ..'Z'. + letter = lower + upper. + noquote = ANY - '\''. + idchar = letter + '0' .. '9'. + LF = '\n'. + +TOKENS + ident = letter {idchar}. + +IGNORE + '\r' + LF + '\u0009' + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestChars_Output.txt b/src/TestSuite/TestChars_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestChars_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp new file mode 100644 index 0000000..8bafa1e --- /dev/null +++ b/src/TestSuite/TestChars_Parser.cpp @@ -0,0 +1,412 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 2; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][4] = { + {T,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestChars_Scanner.cpp b/src/TestSuite/TestChars_Scanner.cpp new file mode 100644 index 0000000..a2a6589 --- /dev/null +++ b/src/TestSuite/TestChars_Scanner.cpp @@ -0,0 +1,674 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 2; + noSym = 2; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 1); + for (i = 97; i <= 122; ++i) start.set(i, 1); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + (ch >= 9 && ch <= 10) || ch == 13 + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestChars_Trace.txt b/src/TestSuite/TestChars_Trace.txt new file mode 100644 index 0000000..c54d1ef --- /dev/null +++ b/src/TestSuite/TestChars_Trace.txt @@ -0,0 +1,44 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 23 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 +E(ident ) 1: idchar 1 + +---------- character classes ---------- +lower : 'a' .. 'z' +upper : 'A' .. 'Z' +letter : 'A' .. 'Z' 'a' .. 'z' +noquote : 0 .. '&' '(' .. 255 +idchar : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' +LF : 10 + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 16 classToken + 2 ??? t false 0 fixedToken + 0 Test nt false 1 false 23 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestChars_output.txt b/src/TestSuite/TestChars_output.txt new file mode 100644 index 0000000..bca0a11 --- /dev/null +++ b/src/TestSuite/TestChars_output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCircular.ATG b/src/TestSuite/TestCircular.ATG new file mode 100644 index 0000000..2d768fd --- /dev/null +++ b/src/TestSuite/TestCircular.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals can be reached. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A. +A = B D | a. +B = [b] C { c C}. +C = D A [d]. +D = [e f]. + +END Test. diff --git a/src/TestSuite/TestCircular_Output.txt b/src/TestSuite/TestCircular_Output.txt new file mode 100644 index 0000000..170f918 --- /dev/null +++ b/src/TestSuite/TestCircular_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking + D deletable + A --> B B --> C C --> Atrace output is in trace.txt +3 errors detected diff --git a/src/TestSuite/TestCircular_Parser.cpp b/src/TestSuite/TestCircular_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestCircular_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestCircular_Scanner.cpp b/src/TestSuite/TestCircular_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestCircular_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestCircular_Trace.txt b/src/TestSuite/TestCircular_Trace.txt new file mode 100644 index 0000000..a1d767d --- /dev/null +++ b/src/TestSuite/TestCircular_Trace.txt @@ -0,0 +1,86 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 0 20 + 2 nt B 3 21 + 3 nt D 0 21 + 4 t a 0 21 + 5 alt 0 6 2 21 + 6 alt 0 0 4 21 + 7 t b -9 22 + 8 opt 9 0 7 0 + 9 nt C 12 22 + 10 t c 11 22 + 11 nt C -12 22 + 12 iter 0 0 10 0 + 13 nt D 14 23 + 14 nt A 16 23 + 15 t d 0 23 + 16 opt 0 0 15 0 + 17 t e 18 24 + 18 t f 0 24 + 19 opt 0 0 17 0 + + +First & follow symbols: +---------------------- + +Test +first: a b e +follow: EOF + +A +first: a b e +follow: EOF c d e + +B +first: a b e +follow: EOF c d e + +D +first: e +follow: EOF a b c d e + +C +first: a b e +follow: EOF c d e + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 5 false 21 fixedToken + 2 B nt false 8 false 22 fixedToken + 3 D nt false 19 true 24 fixedToken + 4 C nt false 13 false 23 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestComments.ATG b/src/TestSuite/TestComments.ATG new file mode 100644 index 0000000..83a0440 --- /dev/null +++ b/src/TestSuite/TestComments.ATG @@ -0,0 +1,22 @@ +$01246 +/*------------------------------------------------------------------------- +Test of COMMENTS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + +TOKENS + ident = letter {letter | digit}. + +COMMENTS FROM "//" TO "\r\n" +COMMENTS FROM "/*" TO "*/" +COMMENTS FROM "(*" TO "*)" NESTED + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestComments_Output.txt b/src/TestSuite/TestComments_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestComments_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp new file mode 100644 index 0000000..8bafa1e --- /dev/null +++ b/src/TestSuite/TestComments_Parser.cpp @@ -0,0 +1,412 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 2; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][4] = { + {T,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestComments_Scanner.cpp b/src/TestSuite/TestComments_Scanner.cpp new file mode 100644 index 0000000..2664361 --- /dev/null +++ b/src/TestSuite/TestComments_Scanner.cpp @@ -0,0 +1,742 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 2; + noSym = 2; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 1); + for (i = 97; i <= 122; ++i) start.set(i, 1); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + +bool Scanner::Comment0() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('*')) { + NextCh(); + for(;;) { + if (ch == _SC('*')) { + NextCh(); + if (ch == _SC(')')) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == _SC('(')) { + NextCh(); + if (ch == _SC('*')) { + level++; NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + +bool Scanner::Comment1() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('*')) { + NextCh(); + for(;;) { + if (ch == _SC('*')) { + NextCh(); + if (ch == _SC('/')) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + +bool Scanner::Comment2() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('/')) { + NextCh(); + for(;;) { + if (ch == 13) { + NextCh(); + if (ch == 10) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + if ((ch == _SC('(') && Comment0()) || (ch == _SC('/') && Comment1()) || (ch == _SC('/') && Comment2())) continue; + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestComments_Trace.txt b/src/TestSuite/TestComments_Trace.txt new file mode 100644 index 0000000..d7a3f65 --- /dev/null +++ b/src/TestSuite/TestComments_Trace.txt @@ -0,0 +1,41 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 20 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 +E(ident ) 1: #A 1 + +---------- character classes ---------- +letter : 'A' .. 'Z' 'a' .. 'z' +digit : '0' .. '9' +#A : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 12 classToken + 2 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestComplete.ATG b/src/TestSuite/TestComplete.ATG new file mode 100644 index 0000000..e608af1 --- /dev/null +++ b/src/TestSuite/TestComplete.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals have productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A C D. +A = a B a. +C = c D c. +D = A d. + +END Test. diff --git a/src/TestSuite/TestComplete_Output.txt b/src/TestSuite/TestComplete_Output.txt new file mode 100644 index 0000000..92d7998 --- /dev/null +++ b/src/TestSuite/TestComplete_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking + No production for B +trace output is in trace.txt +1 errors detected diff --git a/src/TestSuite/TestComplete_Parser.cpp b/src/TestSuite/TestComplete_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestComplete_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestComplete_Scanner.cpp b/src/TestSuite/TestComplete_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestComplete_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestComplete_Trace.txt b/src/TestSuite/TestComplete_Trace.txt new file mode 100644 index 0000000..f7057ea --- /dev/null +++ b/src/TestSuite/TestComplete_Trace.txt @@ -0,0 +1,78 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt C 3 20 + 3 nt D 0 20 + 4 t a 5 21 + 5 nt B 6 21 + 6 t a 0 21 + 7 t c 8 22 + 8 nt D 9 22 + 9 t c 0 22 + 10 nt A 11 23 + 11 t d 0 23 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: c d + +C +first: c +follow: a + +D +first: a +follow: EOF c + +B +first: -- empty set -- +follow: a + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 4 false 21 fixedToken + 2 C nt false 7 false 22 fixedToken + 3 D nt false 10 false 23 fixedToken + 4 B nt false 0 false 0 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestDel.ATG b/src/TestSuite/TestDel.ATG new file mode 100644 index 0000000..6c242d8 --- /dev/null +++ b/src/TestSuite/TestDel.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of deletable symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A B g C g D. +A = a | {e}[f]. +B = {b}[c](d|). +C = A B. +D = C | h. + +END Test. diff --git a/src/TestSuite/TestDel_Output.txt b/src/TestSuite/TestDel_Output.txt new file mode 100644 index 0000000..76ca672 --- /dev/null +++ b/src/TestSuite/TestDel_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +checking + A deletable + B deletable + C deletable + D deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp new file mode 100644 index 0000000..5cc35c3 --- /dev/null +++ b/src/TestSuite/TestDel_Parser.cpp @@ -0,0 +1,517 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + B(); + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + C(); + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + D(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (StartOf(1 /* iter */)) { + while (la->kind == _e) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _f) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _c) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + A(); + B(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (StartOf(2 /* nt */)) { + C(); + } else if (la->kind == _h) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(13); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid B"); break; + case 13: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestDel_Scanner.cpp b/src/TestSuite/TestDel_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestDel_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestDel_Trace.txt b/src/TestSuite/TestDel_Trace.txt new file mode 100644 index 0000000..78c61d7 --- /dev/null +++ b/src/TestSuite/TestDel_Trace.txt @@ -0,0 +1,117 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 t g 4 20 + 4 nt C 5 20 + 5 t g 6 20 + 6 nt D 0 20 + 7 t a 0 21 + 8 t e -9 21 + 9 iter 11 0 8 0 + 10 t f 0 21 + 11 opt 0 0 10 0 + 12 alt 0 13 7 21 + 13 alt 0 0 9 0 + 14 t b -15 22 + 15 iter 17 0 14 0 + 16 t c -20 22 + 17 opt 20 0 16 0 + 18 t d 0 22 + 19 eps 0 0 + 20 alt 0 21 18 22 + 21 alt 0 0 19 0 + 22 nt A 23 23 + 23 nt B 0 23 + 24 nt C 0 24 + 25 t h 0 24 + 26 alt 0 27 24 24 + 27 alt 0 0 25 24 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g +follow: EOF + +A +first: a e f +follow: EOF b c d g + +B +first: b c d +follow: EOF g + +C +first: a b c d e f +follow: EOF g + +D +first: a b c d e f h +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 12 true 21 fixedToken + 2 B nt false 15 true 22 fixedToken + 3 C nt false 22 true 23 fixedToken + 4 D nt false 26 true 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestEps.ATG b/src/TestSuite/TestEps.ATG new file mode 100644 index 0000000..28bac33 --- /dev/null +++ b/src/TestSuite/TestEps.ATG @@ -0,0 +1,20 @@ +$01246 +/*------------------------------------------------------------------------- +Test of eps symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = (a|) b + | (c | (. sem .) | d) e. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestEps_Output.txt b/src/TestSuite/TestEps_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestEps_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp new file mode 100644 index 0000000..393e9c6 --- /dev/null +++ b/src/TestSuite/TestEps_Parser.cpp @@ -0,0 +1,445 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (la->kind == _a || la->kind == _b) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + } + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _c || la->kind == _d || la->kind == _e) { + if (la->kind == _c) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _e) { + sem + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestEps_Scanner.cpp b/src/TestSuite/TestEps_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestEps_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestEps_Trace.txt b/src/TestSuite/TestEps_Trace.txt new file mode 100644 index 0000000..ad7edac --- /dev/null +++ b/src/TestSuite/TestEps_Trace.txt @@ -0,0 +1,72 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a -5 17 + 2 eps -5 0 + 3 alt 5 4 1 17 + 4 alt -5 0 2 0 + 5 t b 0 17 + 6 t c -12 18 + 7 sem -12 330 0 + 8 alt 12 9 6 18 + 9 alt -12 11 7 0 + 10 t d -12 18 + 11 alt -12 0 10 18 + 12 t e 0 18 + 13 alt 0 14 3 17 + 14 alt 0 0 8 18 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 13 false 17 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestEps_output.txt b/src/TestSuite/TestEps_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestEps_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestIters.ATG b/src/TestSuite/TestIters.ATG new file mode 100644 index 0000000..39bd7b5 --- /dev/null +++ b/src/TestSuite/TestIters.ATG @@ -0,0 +1,22 @@ +$01246 +/*------------------------------------------------------------------------- +Test of iterations in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = (a | {b} c | {{d} e} | {f {g}} h) i. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestIters_Output.txt b/src/TestSuite/TestIters_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestIters_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp new file mode 100644 index 0000000..2666079 --- /dev/null +++ b/src/TestSuite/TestIters_Parser.cpp @@ -0,0 +1,468 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b || la->kind == _c) { + while (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _d || la->kind == _e || la->kind == _i) { + while (la->kind == _d || la->kind == _e) { + while (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else if (la->kind == _f || la->kind == _h) { + while (la->kind == _f) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (la->kind == _g) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_h); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + Expect(_i); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestIters_Scanner.cpp b/src/TestSuite/TestIters_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestIters_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestIters_Trace.txt b/src/TestSuite/TestIters_Trace.txt new file mode 100644 index 0000000..f700c0a --- /dev/null +++ b/src/TestSuite/TestIters_Trace.txt @@ -0,0 +1,88 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a -18 20 + 2 t b -3 20 + 3 iter 4 0 2 0 + 4 t c -18 20 + 5 alt 18 6 1 20 + 6 alt -18 11 3 0 + 7 t d -8 20 + 8 iter 9 0 7 0 + 9 t e -10 20 + 10 iter -18 0 8 0 + 11 alt -18 17 10 0 + 12 t f 14 20 + 13 t g -14 20 + 14 iter -15 0 13 0 + 15 iter 16 0 12 0 + 16 t h -18 20 + 17 alt -18 0 15 0 + 18 t i 0 20 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f h i +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 5 false 20 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestIters_output.txt b/src/TestSuite/TestIters_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestIters_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestLL1.ATG b/src/TestSuite/TestLL1.ATG new file mode 100644 index 0000000..a174223 --- /dev/null +++ b/src/TestSuite/TestLL1.ATG @@ -0,0 +1,41 @@ +$01246 +/*------------------------------------------------------------------------- +Test if LL(1) warnings are reported. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A E C G H I J. +A = (a | B). +B = {b}(c|) a. +C = {a}[D] B. +D = d [b]. +E = (F|) e. +F = [f]. +G = [ ( IF (eee) (a | b) + | b + ) + ] a. +H = [a] [IF (hhh) a] [IF (hhh) a] a. +I = {a} ( IF (iii) (a | b) + | b + ). +J = + { IF (aaa) "a"} + { IF (eee) ("a" | "b") + | "b" + } "a". + +END Test. diff --git a/src/TestSuite/TestLL1_Output.txt b/src/TestSuite/TestLL1_Output.txt new file mode 100644 index 0000000..12c94e5 --- /dev/null +++ b/src/TestSuite/TestLL1_Output.txt @@ -0,0 +1,41 @@ +Coco/R (Dec 01, 2018) +checking + F deletable + LL1 warning in A:21:0: a is start of several alternatives + = a:21:6: + => B:21:10: + -> B:22:0: + = a:22:13: + = a:22:13: + = a:22:13: + LL1 warning in E:25:0: e is start of several alternatives + = e:25:10: + = e:25:10: + = e:25:10: + = e:25:10: + LL1 warning in C:23:0: a is start & successor of deletable structure + = a:23:6: + => B:23:12: + -> B:22:0: + = a:22:13: + = a:22:13: + = a:22:13: + LL1 warning in G:27:0: a is start & successor of deletable structure + = a:27:19: + = a:30:5: + LL1 warning in H:31:0: a is start & successor of deletable structure + = a:31:6: + = a:31:19: + = a:31:32: + = a:31:35: + LL1 warning in I:32:0: a is start & successor of deletable structure + = a:32:6: + = a:32:21: + LL1 warning in J:35:0: a is start & successor of deletable structure + = a:37:14: + = a:39:4: + LL1 warning in D:24:0: b is start & successor of deletable structure + = b:24:8: +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp new file mode 100644 index 0000000..5dba9a5 --- /dev/null +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -0,0 +1,679 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + E(); + C(); + G(); + H(); + I(); + J(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _a || la->kind == _b || la->kind == _c) { + B(); + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::E() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); +#endif + if (la->kind == _e || la->kind == _f) { + F(); + } else if (la->kind == _e) { + } else SynErr(12); + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + while (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _d) { + D(); + } + B(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::G() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); +#endif + if (la->kind == _a || la->kind == _b) { + if (eee) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(13); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::H() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (hhh) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (hhh) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::I() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); +#endif + while (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (iii) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(14); + } else if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(15); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::J() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_J, _SC("J"), la->line); +#endif + while (aaa) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (la->kind == _a || la->kind == _b) { + if (eee) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(16); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _c) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _a) { + } else SynErr(17); + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::F() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); +#endif + if (la->kind == _f) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid E"); break; + case 13: s = _SC("invalid G"); break; + case 14: s = _SC("invalid I"); break; + case 15: s = _SC("invalid I"); break; + case 16: s = _SC("invalid J"); break; + case 17: s = _SC("invalid B"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestLL1_Scanner.cpp b/src/TestSuite/TestLL1_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestLL1_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestLL1_Trace.txt b/src/TestSuite/TestLL1_Trace.txt new file mode 100644 index 0000000..0a76722 --- /dev/null +++ b/src/TestSuite/TestLL1_Trace.txt @@ -0,0 +1,195 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt E 3 20 + 3 nt C 4 20 + 4 nt G 5 20 + 5 nt H 6 20 + 6 nt I 7 20 + 7 nt J 0 20 + 8 t a 0 21 + 9 nt B 0 21 + 10 alt 0 11 8 21 + 11 alt 0 0 9 21 + 12 t b -13 22 + 13 iter 16 0 12 0 + 14 t c -18 22 + 15 eps -18 0 + 16 alt 18 17 14 22 + 17 alt -18 0 15 0 + 18 t a 0 22 + 19 t a -20 23 + 20 iter 22 0 19 0 + 21 nt D -23 23 + 22 opt 23 0 21 0 + 23 nt B 0 23 + 24 t d 26 24 + 25 t b 0 24 + 26 opt 0 0 25 0 + 27 nt F -31 25 + 28 eps -31 0 + 29 alt 31 30 27 25 + 30 alt -31 0 28 0 + 31 t e 0 25 + 32 t f 0 26 + 33 opt 0 0 32 0 + 34 rslv 37 27 + 35 t a -43 27 + 36 t b -43 27 + 37 alt -43 38 35 27 + 38 alt -43 0 36 27 + 39 t b -43 28 + 40 alt -43 41 34 27 + 41 alt -43 0 39 28 + 42 opt 43 0 40 0 + 43 t a 0 30 + 44 t a -48 31 + 45 opt 48 0 44 0 + 46 rslv 47 31 + 47 t a -51 31 + 48 opt 51 0 46 0 + 49 rslv 50 31 + 50 t a -52 31 + 51 opt 52 0 49 0 + 52 t a 0 31 + 53 t a -54 32 + 54 iter 61 0 53 0 + 55 rslv 58 32 + 56 t a 0 32 + 57 t b 0 32 + 58 alt 0 59 56 32 + 59 alt 0 0 57 32 + 60 t b 0 33 + 61 alt 0 62 55 32 + 62 alt 0 0 60 33 + 63 rslv 64 36 + 64 t a -65 36 + 65 iter 74 0 63 0 + 66 rslv 69 37 + 67 t a -74 37 + 68 t b -74 37 + 69 alt -74 70 67 37 + 70 alt -74 0 68 37 + 71 t b -74 38 + 72 alt -74 73 66 37 + 73 alt -74 0 71 38 + 74 iter 75 0 72 0 + 75 t a 0 39 + + +First & follow symbols: +---------------------- + +Test +first: a b c +follow: EOF + +A +first: a b c +follow: e f + +E +first: e f +follow: a b c d + +C +first: a b c d +follow: a b + +G +first: a b +follow: a + +H +first: a +follow: a b + +I +first: a b +follow: a b + +J +first: a b +follow: EOF + +B +first: a b c +follow: a b e f + +D +first: d +follow: a b c + +F +first: f +follow: e + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 10 false 21 fixedToken + 2 E nt false 29 false 25 fixedToken + 3 C nt false 20 false 23 fixedToken + 4 G nt false 42 false 27 fixedToken + 5 H nt false 45 false 31 fixedToken + 6 I nt false 54 false 32 fixedToken + 7 J nt false 65 false 35 fixedToken + 8 B nt false 13 false 22 fixedToken + 9 D nt false 24 false 24 fixedToken + 10 F nt false 33 true 26 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts.ATG b/src/TestSuite/TestOpts.ATG new file mode 100644 index 0000000..867f323 --- /dev/null +++ b/src/TestSuite/TestOpts.ATG @@ -0,0 +1,20 @@ +$01246 +/*------------------------------------------------------------------------- +Test of options in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = a | [b] c | [Del] | d [[d][e]f]. +Del = [e]. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestOpts1.ATG b/src/TestSuite/TestOpts1.ATG new file mode 100644 index 0000000..cc47d7d --- /dev/null +++ b/src/TestSuite/TestOpts1.ATG @@ -0,0 +1,19 @@ +$01246 +/*------------------------------------------------------------------------- +Test of options in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = [[a]]. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestOpts1_Output.txt b/src/TestSuite/TestOpts1_Output.txt new file mode 100644 index 0000000..6aef8af --- /dev/null +++ b/src/TestSuite/TestOpts1_Output.txt @@ -0,0 +1,7 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + LL1 warning in Test:17:1: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp new file mode 100644 index 0000000..99e1329 --- /dev/null +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -0,0 +1,421 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (la->kind == _a) { + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestOpts1_Scanner.cpp b/src/TestSuite/TestOpts1_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestOpts1_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestOpts1_Trace.txt b/src/TestSuite/TestOpts1_Trace.txt new file mode 100644 index 0000000..80553fd --- /dev/null +++ b/src/TestSuite/TestOpts1_Trace.txt @@ -0,0 +1,61 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 0 17 + 2 opt 0 0 1 0 + 3 opt 0 0 2 0 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 3 true 17 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts1_output.txt b/src/TestSuite/TestOpts1_output.txt new file mode 100644 index 0000000..a8107e9 --- /dev/null +++ b/src/TestSuite/TestOpts1_output.txt @@ -0,0 +1,7 @@ +Coco/R (Sep 6, 2007) +checking + Test deletable + LL1 warning in Test: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts_Output.txt b/src/TestSuite/TestOpts_Output.txt new file mode 100644 index 0000000..7829c89 --- /dev/null +++ b/src/TestSuite/TestOpts_Output.txt @@ -0,0 +1,8 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + Del deletable + LL1 warning in Test:17:1: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp new file mode 100644 index 0000000..5a934ee --- /dev/null +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -0,0 +1,473 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b || la->kind == _c) { + if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _EOF || la->kind == _e) { + if (la->kind == _e) { + Del(); + } + } else if (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + if (la->kind == _d || la->kind == _e || la->kind == _f) { + if (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (la->kind == _e) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(8); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::Del() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Del, _SC("Del"), la->line); +#endif + if (la->kind == _e) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestOpts_Scanner.cpp b/src/TestSuite/TestOpts_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestOpts_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestOpts_Trace.txt b/src/TestSuite/TestOpts_Trace.txt new file mode 100644 index 0000000..c48310e --- /dev/null +++ b/src/TestSuite/TestOpts_Trace.txt @@ -0,0 +1,82 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 0 17 + 2 t b -4 17 + 3 opt 4 0 2 0 + 4 t c 0 17 + 5 alt 0 6 1 17 + 6 alt 0 9 3 0 + 7 nt Del 0 17 + 8 opt 0 0 7 0 + 9 alt 0 17 8 0 + 10 t d 16 17 + 11 t d -14 17 + 12 opt 14 0 11 0 + 13 t e -15 17 + 14 opt 15 0 13 0 + 15 t f 0 17 + 16 opt 0 0 12 0 + 17 alt 0 0 10 17 + 18 t e 0 18 + 19 opt 0 0 18 0 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e +follow: EOF + +Del +first: e +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 5 true 17 fixedToken + 1 Del nt false 19 true 18 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts_output.txt b/src/TestSuite/TestOpts_output.txt new file mode 100644 index 0000000..22c022e --- /dev/null +++ b/src/TestSuite/TestOpts_output.txt @@ -0,0 +1,8 @@ +Coco/R (Sep 6, 2007) +checking + Test deletable + Del deletable + LL1 warning in Test: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestReached.ATG b/src/TestSuite/TestReached.ATG new file mode 100644 index 0000000..685f13e --- /dev/null +++ b/src/TestSuite/TestReached.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals can be reached. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A C D. +A = a C a | b. +B = b. +C = c D c | b. +D = A d. + +END Test. diff --git a/src/TestSuite/TestReached_Output.txt b/src/TestSuite/TestReached_Output.txt new file mode 100644 index 0000000..89a33c9 --- /dev/null +++ b/src/TestSuite/TestReached_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking + B cannot be reached +trace output is in trace.txt +1 errors detected diff --git a/src/TestSuite/TestReached_Parser.cpp b/src/TestSuite/TestReached_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestReached_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestReached_Scanner.cpp b/src/TestSuite/TestReached_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestReached_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestReached_Trace.txt b/src/TestSuite/TestReached_Trace.txt new file mode 100644 index 0000000..a478c03 --- /dev/null +++ b/src/TestSuite/TestReached_Trace.txt @@ -0,0 +1,85 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt C 3 20 + 3 nt D 0 20 + 4 t a 5 21 + 5 nt C 6 21 + 6 t a 0 21 + 7 t b 0 21 + 8 alt 0 9 4 21 + 9 alt 0 0 7 21 + 10 t b 0 22 + 11 t c 12 23 + 12 nt D 13 23 + 13 t c 0 23 + 14 t b 0 23 + 15 alt 0 16 11 23 + 16 alt 0 0 14 23 + 17 nt A 18 24 + 18 t d 0 24 + + +First & follow symbols: +---------------------- + +Test +first: a b +follow: EOF + +A +first: a b +follow: b c d + +C +first: b c +follow: a b + +D +first: a b +follow: EOF c + +B +first: b +follow: -- empty set -- + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 8 false 21 fixedToken + 2 C nt false 15 false 23 fixedToken + 3 D nt false 17 false 24 fixedToken + 4 B nt false 10 false 22 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestResIllegal.ATG b/src/TestSuite/TestResIllegal.ATG new file mode 100644 index 0000000..ca51b2b --- /dev/null +++ b/src/TestSuite/TestResIllegal.ATG @@ -0,0 +1,27 @@ +$AFGJSX +COMPILER Test +PRODUCTIONS + Test = A B C D E. + + A = + "a" + ( "b" (IF (aaa) "c") "d" /* misplaced resolver */ + | IF (bbb) "b" "c" /* resolver not evaluated */ + | IF (ccc) "c" /* misplaced resolver */ + ). + + B = IF (ddd) "d". /* misplaced resolver */ + + C = {IF (eee) "d"} "e". /* misplaced resolver */ + + D = + "d" + { "d" + | IF (fff) "a" "b" /* misplaced resolver */ + } "a". + + E = + IF(ggg) "a" /* misplaced resolver */ + | ANY. + +END Test. diff --git a/src/TestSuite/TestResIllegal_Output.txt b/src/TestSuite/TestResIllegal_Output.txt new file mode 100644 index 0000000..00793ae --- /dev/null +++ b/src/TestSuite/TestResIllegal_Output.txt @@ -0,0 +1,15 @@ +Coco/R (Dec 01, 2018) +checking +TestResIllegal.ATG -- line 8 col 14: Warning: Misplaced resolver: no alternative. +TestResIllegal.ATG -- line 9 col 9: Warning: Resolver will never be evaluated. Place it at previous conflicting alternative. +TestResIllegal.ATG -- line 10 col 9: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 13 col 11: Warning: Misplaced resolver: no alternative. +TestResIllegal.ATG -- line 15 col 12: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 20 col 9: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 24 col 8: Warning: Misplaced resolver: no LL(1) conflict. + LL1 warning in D:17:0: "a" is start & successor of deletable structure + = "a":20:14: + = "a":21:5: +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestResIllegal_Parser.cpp b/src/TestSuite/TestResIllegal_Parser.cpp new file mode 100644 index 0000000..56481ba --- /dev/null +++ b/src/TestSuite/TestResIllegal_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + C(); + D(); + E(); +} + +void Parser::A() { + Expect(1 /* "a" */,__FUNCTION__); + if (la->kind == 2 /* "b" */) { + Get(); + Expect(3 /* "c" */,__FUNCTION__); + Expect(4 /* "d" */,__FUNCTION__); + } else if (bbb) { + Expect(2 /* "b" */,__FUNCTION__); + Expect(3 /* "c" */,__FUNCTION__); + } else if (ccc) { + Expect(3 /* "c" */,__FUNCTION__); + } else SynErr(7,__FUNCTION__); +} + +void Parser::B() { + Expect(4 /* "d" */,__FUNCTION__); +} + +void Parser::C() { + while (eee) { + Expect(4 /* "d" */,__FUNCTION__); + } + Expect(5 /* "e" */,__FUNCTION__); +} + +void Parser::D() { + Expect(4 /* "d" */,__FUNCTION__); + while (la->kind == 1 /* "a" */ || la->kind == 4 /* "d" */) { + if (la->kind == 4 /* "d" */) { + Get(); + } else { + Expect(1 /* "a" */,__FUNCTION__); + Expect(2 /* "b" */,__FUNCTION__); + } + } + Expect(1 /* "a" */,__FUNCTION__); +} + +void Parser::E() { + if (ggg) { + Expect(1 /* "a" */,__FUNCTION__); + } else if (StartOf(1)) { + Get(); + } else SynErr(8,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 6; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[2][8] = { + {T,x,x,x, x,x,x,x}, + {x,x,T,T, T,T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"\"a\" expected"); break; + case 2: s = coco_string_create(L"\"b\" expected"); break; + case 3: s = coco_string_create(L"\"c\" expected"); break; + case 4: s = coco_string_create(L"\"d\" expected"); break; + case 5: s = coco_string_create(L"\"e\" expected"); break; + case 6: s = coco_string_create(L"??? expected"); break; + case 7: s = coco_string_create(L"invalid A"); break; + case 8: s = coco_string_create(L"invalid E"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestResIllegal_Scanner.cpp b/src/TestSuite/TestResIllegal_Scanner.cpp new file mode 100644 index 0000000..768de1a --- /dev/null +++ b/src/TestSuite/TestResIllegal_Scanner.cpp @@ -0,0 +1,617 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 6; + noSym = 6; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestResIllegal_Trace.txt b/src/TestSuite/TestResIllegal_Trace.txt new file mode 100644 index 0000000..aac55e3 --- /dev/null +++ b/src/TestSuite/TestResIllegal_Trace.txt @@ -0,0 +1,130 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 4 + 2 nt B 3 4 + 3 nt C 4 4 + 4 nt D 5 4 + 5 nt E 0 4 + 6 t "a" 14 7 + 7 t "b" 8 8 + 8 rslv 9 8 + 9 t "c" 10 8 + 10 t "d" 0 8 + 11 rslv 12 9 + 12 t "b" 13 9 + 13 t "c" 0 9 + 14 alt 0 15 7 8 + 15 alt 0 18 11 9 + 16 rslv 17 10 + 17 t "c" 0 10 + 18 alt 0 0 16 10 + 19 rslv 20 13 + 20 t "d" 0 13 + 21 rslv 22 15 + 22 t "d" -23 15 + 23 iter 24 0 21 0 + 24 t "e" 0 15 + 25 t "d" 32 18 + 26 t "d" -32 19 + 27 rslv 28 20 + 28 t "a" 29 20 + 29 t "b" -32 20 + 30 alt -32 31 26 19 + 31 alt -32 0 27 20 + 32 iter 33 0 30 0 + 33 t "a" 0 21 + 34 rslv 35 24 + 35 t "a" 0 24 + 36 any 0 0 + 37 alt 0 38 34 24 + 38 alt 0 0 36 0 + + +First & follow symbols: +---------------------- + +Test +first: "a" +follow: EOF + +A +first: "a" +follow: "d" + +B +first: "d" +follow: "d" "e" + +C +first: "d" "e" +follow: "d" + +D +first: "d" +follow: "a" "b" "c" "d" "e" ??? + +E +first: "a" "b" "c" "d" "e" ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 36 any "b" "c" "d" "e" ??? + +Cross reference list: +-------------------- + + "a" 7 20 21 24 + "b" 8 9 20 + "c" 8 9 10 + "d" 8 13 15 18 19 + "e" 15 + A -6 4 + B -13 4 + C -15 4 + D -17 4 + E -23 4 + Test -4 + + + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 +E("a" ) 1: +E("b" ) 2: +E("c" ) 3: +E("d" ) 4: +E("e" ) 5: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 "a" t false 7 fixedToken + 2 "b" t false 8 fixedToken + 3 "c" t false 8 fixedToken + 4 "d" t false 8 fixedToken + 5 "e" t false 15 fixedToken + 6 ??? t false 0 fixedToken + 0 Test nt false 1 false 4 fixedToken + 1 A nt false 6 false 6 fixedToken + 2 B nt false 19 false 13 fixedToken + 3 C nt false 23 false 15 fixedToken + 4 D nt false 25 false 17 fixedToken + 5 E nt false 37 false 23 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestResOK.ATG b/src/TestSuite/TestResOK.ATG new file mode 100644 index 0000000..7503297 --- /dev/null +++ b/src/TestSuite/TestResOK.ATG @@ -0,0 +1,55 @@ +$AFGJSX +COMPILER Test +PRODUCTIONS + Test = A B C D E F G H + | I. + + A = "a" {[IF (true) "b" "c"] "b"} "c". + + B = + ( "a" + | IF (eee) "b" + | + ) "b". + + C = (IF(true) "a" "b" | "a"). + + D = { + IF (true) "a" + | "a" "b" + } "c". + + E = + ( "a" + | [ IF(true) "c" + | "c" "b" + ] + | "b" + ) "d". + + F = + { IF(true) ["a"] "b" + | ANY + | "a" + } "c". + + G = + { IF (aaa) "a"} + { IF (bbb) + (IF (eee) ("a" | "b") + | "b" + ) + } "a". + + H = + { IF (aaa) "a"} + { IF (eee) ("a" | "b") + | "b" + } "c". + + I = // both alternatives can be selected with EOF as the next input symbol + ( IF (aaa) ["b"] + | {"c"} + ). + +END Test. diff --git a/src/TestSuite/TestResOK_Output.txt b/src/TestSuite/TestResOK_Output.txt new file mode 100644 index 0000000..2d5fda3 --- /dev/null +++ b/src/TestSuite/TestResOK_Output.txt @@ -0,0 +1,7 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + I deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp new file mode 100644 index 0000000..4ea4454 --- /dev/null +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -0,0 +1,722 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (la->kind == 1 /* "a" */) { + A(); + B(); + C(); + D(); + E(); + F(); + G(); + H(); + } else if (la->kind == _EOF || la->kind == 2 /* "b" */ || la->kind == 3 /* "c" */) { + I(); + } else SynErr(6); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (la->kind == 2 /* "b" */) { + if (true) { + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (eee) { + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 2 /* "b" */) { + } else SynErr(7); + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (true) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + while (la->kind == 1 /* "a" */) { + if (true) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::E() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); +#endif + if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 3 /* "c" */ || la->kind == 4 /* "d" */) { + if (la->kind == 3 /* "c" */) { + if (true) { + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + } else if (la->kind == 2 /* "b" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(9); + Expect(4 /* "d" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::F() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); +#endif + while (StartOf(1 /* alt */)) { + if (true) { + if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 4 /* "d" */ || la->kind == 5 /* ??? */) { + Get(); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::G() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); +#endif + while (aaa) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (bbb) { + if (eee) { + if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 2 /* "b" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(10); + } else if (la->kind == 2 /* "b" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + } + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::H() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); +#endif + while (aaa) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (la->kind == 1 /* "a" */ || la->kind == 2 /* "b" */) { + if (eee) { + if (la->kind == 1 /* "a" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == 2 /* "b" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(12); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::I() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); +#endif + if (aaa) { + if (la->kind == 2 /* "b" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else if (la->kind == _EOF || la->kind == 3 /* "c" */) { + while (la->kind == 3 /* "c" */) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(13); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 5; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[2][7] = { + {T,x,x,x, x,x,x}, + {x,T,T,x, T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("\"a\" expected"); break; + case 2: s = _SC("\"b\" expected"); break; + case 3: s = _SC("\"c\" expected"); break; + case 4: s = _SC("\"d\" expected"); break; + case 5: s = _SC("??? expected"); break; + case 6: s = _SC("invalid Test"); break; + case 7: s = _SC("invalid B"); break; + case 8: s = _SC("invalid C"); break; + case 9: s = _SC("invalid E"); break; + case 10: s = _SC("invalid G"); break; + case 11: s = _SC("invalid G"); break; + case 12: s = _SC("invalid H"); break; + case 13: s = _SC("invalid I"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestResOK_Scanner.cpp b/src/TestSuite/TestResOK_Scanner.cpp new file mode 100644 index 0000000..77d7ca6 --- /dev/null +++ b/src/TestSuite/TestResOK_Scanner.cpp @@ -0,0 +1,678 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 5; + noSym = 5; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* "a" */; break;} + case 2: + {t->kind = 2 /* "b" */; break;} + case 3: + {t->kind = 3 /* "c" */; break;} + case 4: + {t->kind = 4 /* "d" */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestResOK_Trace.txt b/src/TestSuite/TestResOK_Trace.txt new file mode 100644 index 0000000..fa189d4 --- /dev/null +++ b/src/TestSuite/TestResOK_Trace.txt @@ -0,0 +1,213 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 4 + 2 nt B 3 4 + 3 nt C 4 4 + 4 nt D 5 4 + 5 nt E 6 4 + 6 nt F 7 4 + 7 nt G 8 4 + 8 nt H 0 4 + 9 nt I 0 5 + 10 alt 0 11 1 4 + 11 alt 0 0 9 5 + 12 t "a" 18 7 + 13 rslv 14 7 + 14 t "b" 15 7 + 15 t "c" -17 7 + 16 opt 17 0 13 0 + 17 t "b" -18 7 + 18 iter 19 0 16 0 + 19 t "c" 0 7 + 20 t "a" -27 10 + 21 rslv 22 11 + 22 t "b" -27 11 + 23 alt 27 24 20 10 + 24 alt -27 26 21 11 + 25 eps -27 0 + 26 alt -27 0 25 0 + 27 t "b" 0 13 + 28 rslv 29 15 + 29 t "a" 30 15 + 30 t "b" 0 15 + 31 t "a" 0 15 + 32 alt 0 33 28 15 + 33 alt 0 0 31 15 + 34 rslv 35 18 + 35 t "a" -40 18 + 36 t "a" 37 19 + 37 t "b" -40 19 + 38 alt -40 39 34 18 + 39 alt -40 0 36 19 + 40 iter 41 0 38 0 + 41 t "c" 0 20 + 42 t "a" -54 23 + 43 rslv 44 24 + 44 t "c" -54 24 + 45 t "c" 46 25 + 46 t "b" -54 25 + 47 alt -54 48 43 24 + 48 alt -54 0 45 25 + 49 opt -54 0 47 0 + 50 alt 54 51 42 23 + 51 alt -54 53 49 0 + 52 t "b" -54 27 + 53 alt -54 0 52 27 + 54 t "d" 0 28 + 55 rslv 57 31 + 56 t "a" -58 31 + 57 opt 58 0 56 0 + 58 t "b" -64 31 + 59 any -64 0 + 60 alt -64 61 55 31 + 61 alt -64 63 59 0 + 62 t "a" -64 33 + 63 alt -64 0 62 33 + 64 iter 65 0 60 0 + 65 t "c" 0 34 + 66 rslv 67 37 + 67 t "a" -68 37 + 68 iter 78 0 66 0 + 69 rslv 76 38 + 70 rslv 73 39 + 71 t "a" -78 39 + 72 t "b" -78 39 + 73 alt -78 74 71 39 + 74 alt -78 0 72 39 + 75 t "b" -78 40 + 76 alt -78 77 70 39 + 77 alt -78 0 75 40 + 78 iter 79 0 69 0 + 79 t "a" 0 42 + 80 rslv 81 45 + 81 t "a" -82 45 + 82 iter 91 0 80 0 + 83 rslv 86 46 + 84 t "a" -91 46 + 85 t "b" -91 46 + 86 alt -91 87 84 46 + 87 alt -91 0 85 46 + 88 t "b" -91 47 + 89 alt -91 90 83 46 + 90 alt -91 0 88 47 + 91 iter 92 0 89 0 + 92 t "c" 0 48 + 93 rslv 95 51 + 94 t "b" 0 51 + 95 opt 0 0 94 0 + 96 t "c" -97 52 + 97 iter 0 0 96 0 + 98 alt 0 99 93 51 + 99 alt 0 0 97 0 + + +First & follow symbols: +---------------------- + +Test +first: "a" "b" "c" +follow: EOF + +A +first: "a" +follow: "a" "b" + +B +first: "a" "b" +follow: "a" + +C +first: "a" +follow: "a" "c" + +D +first: "a" "c" +follow: "a" "b" "c" "d" + +E +first: "a" "b" "c" "d" +follow: "a" "b" "c" "d" ??? + +F +first: "a" "b" "c" "d" ??? +follow: "a" "b" + +G +first: "a" "b" +follow: "a" "b" "c" + +H +first: "a" "b" "c" +follow: EOF + +I +first: "b" "c" +follow: EOF + + +ANY and SYNC sets: +----------------- + 59 any "d" ??? + +Cross reference list: +-------------------- + + "a" 7 10 15 15 18 19 23 31 33 37 39 42 45 + 46 + "b" 7 7 11 13 15 19 25 27 31 39 40 46 47 + 51 + "c" 7 7 20 24 25 34 48 52 + "d" 28 + A -7 4 + B -9 4 + C -15 4 + D -17 4 + E -22 4 + F -30 4 + G -36 4 + H -44 4 + I -50 5 + Test -4 + + + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 +E("a" ) 1: +E("b" ) 2: +E("c" ) 3: +E("d" ) 4: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 "a" t false 7 fixedToken + 2 "b" t false 7 fixedToken + 3 "c" t false 7 fixedToken + 4 "d" t false 28 fixedToken + 5 ??? t false 0 fixedToken + 0 Test nt false 10 true 4 fixedToken + 1 A nt false 12 false 7 fixedToken + 2 B nt false 23 false 9 fixedToken + 3 C nt false 32 false 15 fixedToken + 4 D nt false 40 false 17 fixedToken + 5 E nt false 50 false 22 fixedToken + 6 F nt false 64 false 30 fixedToken + 7 G nt false 68 false 36 fixedToken + 8 H nt false 82 false 44 fixedToken + 9 I nt false 98 true 50 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestSem.ATG b/src/TestSuite/TestSem.ATG new file mode 100644 index 0000000..b8333f9 --- /dev/null +++ b/src/TestSuite/TestSem.ATG @@ -0,0 +1,34 @@ +$01246 +/*------------------------------------------------------------------------- +Test of semantic actions +----------------------------------------------------------------------------*/ +using System.Collections; + +COMPILER Test + + static void Foo() { + Console.WriteLine("foo"); + } + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRAGMAS + option = '$' ('a' | 'b'). (. Console.WriteLine("pragma"); .) + +PRODUCTIONS + +Test (. decl .) = A B C. +A = ( (. aaa .) c | (. bbb .) | d) (. ccc .). +B = (. ddd .) { a (. eee .)} (. fff .) b (. !$%&/()=?`+*#'-_.:,;<>^{[]}| .). +C = (a | b) (. ggg .) c (..). + +END Test. diff --git a/src/TestSuite/TestSem_Output.txt b/src/TestSuite/TestSem_Output.txt new file mode 100644 index 0000000..ca245c9 --- /dev/null +++ b/src/TestSuite/TestSem_Output.txt @@ -0,0 +1,6 @@ +Coco/R (Dec 01, 2018) +checking + A deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp new file mode 100644 index 0000000..f9ff002 --- /dev/null +++ b/src/TestSuite/TestSem_Parser.cpp @@ -0,0 +1,498 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + if (la->kind == _option) { + Console.WriteLine("pragma"); + } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + decl +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + B(); + C(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (la->kind == _c) { + aaa + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _a || la->kind == _b) { + bbb + } else if (la->kind == _d) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + ccc +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + ddd + while (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + eee + } + fff + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + !$%&/()=?`+*#'-_.:,;<>^{[]}| +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (la->kind == _a) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(12); + ggg + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid C"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestSem_Scanner.cpp b/src/TestSuite/TestSem_Scanner.cpp new file mode 100644 index 0000000..d6732ae --- /dev/null +++ b/src/TestSuite/TestSem_Scanner.cpp @@ -0,0 +1,700 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(36, 10); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + case 10: + if ((ch >= _SC('a') && ch <= _SC('b'))) {AddCh(); goto case_11;} + else {goto case_0;} + case 11: + case_11: + {t->kind = 11 /* option */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestSem_Trace.txt b/src/TestSuite/TestSem_Trace.txt new file mode 100644 index 0000000..baad7f7 --- /dev/null +++ b/src/TestSuite/TestSem_Trace.txt @@ -0,0 +1,115 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 29 + 2 nt B 3 29 + 3 nt C 0 29 + 4 sem 5 543 0 + 5 t c -11 30 + 6 sem -11 557 0 + 7 alt 11 8 4 0 + 8 alt -11 10 6 0 + 9 t d -11 30 + 10 alt -11 0 9 30 + 11 sem 0 572 0 + 12 sem 15 588 0 + 13 t a 14 31 + 14 sem -15 602 0 + 15 iter 16 0 13 0 + 16 sem 17 613 0 + 17 t b 18 31 + 18 sem 0 625 0 + 19 t a -23 32 + 20 t b -23 32 + 21 alt 23 22 19 32 + 22 alt -23 0 20 32 + 23 sem 24 674 0 + 24 t c 25 32 + 25 sem 0 685 0 + + +First & follow symbols: +---------------------- + +Test +first: a b c d +follow: EOF + +A +first: c d +follow: a b + +B +first: a b +follow: a b + +C +first: a b +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 + _SC('$') 10 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + 10: #A 11 +E(option ) 11: + +---------- character classes ---------- +#A : 'a' .. 'b' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 14 fixedToken + 2 b t false 15 fixedToken + 3 c t false 16 fixedToken + 4 d t false 17 fixedToken + 5 e t false 18 fixedToken + 6 f t false 19 fixedToken + 7 g t false 20 fixedToken + 8 h t false 21 fixedToken + 9 i t false 22 fixedToken + 10 ??? t false 0 fixedToken + 11 option pr false 25 fixedToken + 0 Test nt false 1 false 29 fixedToken + 1 A nt false 7 true 30 fixedToken + 2 B nt false 12 false 31 fixedToken + 3 C nt false 21 false 32 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestSem_output.txt b/src/TestSuite/TestSem_output.txt new file mode 100644 index 0000000..f60cea1 --- /dev/null +++ b/src/TestSuite/TestSem_output.txt @@ -0,0 +1,6 @@ +Coco/R (Sep 6, 2007) +checking + A deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSync.ATG b/src/TestSuite/TestSync.ATG new file mode 100644 index 0000000..cc3dfd5 --- /dev/null +++ b/src/TestSuite/TestSync.ATG @@ -0,0 +1,23 @@ +$01246 +/*------------------------------------------------------------------------- +Test of SYNC symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = a SYNC {b | c} d A. +A = SYNC [e f] g. + +END Test. diff --git a/src/TestSuite/TestSync_Output.txt b/src/TestSuite/TestSync_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestSync_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp new file mode 100644 index 0000000..f46d2a5 --- /dev/null +++ b/src/TestSuite/TestSync_Parser.cpp @@ -0,0 +1,466 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (!(StartOf(1 /* sync */))) {SynErr(11); Get();} + while (la->kind == _b || la->kind == _c) { + if (la->kind == _b) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + A(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + while (!(la->kind == _EOF || la->kind == _e || la->kind == _g)) {SynErr(12); Get();} + if (la->kind == _e) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[2][12] = { + {T,x,T,T, T,T,x,T, x,x,x,x}, + {T,x,T,T, T,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("this symbol not expected in Test"); break; + case 12: s = _SC("this symbol not expected in A"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestSync_Scanner.cpp b/src/TestSuite/TestSync_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestSync_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestSync_Trace.txt b/src/TestSuite/TestSync_Trace.txt new file mode 100644 index 0000000..2f001ac --- /dev/null +++ b/src/TestSuite/TestSync_Trace.txt @@ -0,0 +1,91 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 2 20 + 2 sync 7 0 + 3 t b -7 20 + 4 t c -7 20 + 5 alt -7 6 3 20 + 6 alt -7 0 4 20 + 7 iter 8 0 5 0 + 8 t d 9 20 + 9 nt A 0 20 + 10 sync 13 0 + 11 t e 12 21 + 12 t f -14 21 + 13 opt 14 0 11 0 + 14 t g 0 21 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: e g +follow: EOF + + +ANY and SYNC sets: +----------------- + 2 sync EOF b c d + 10 sync EOF e g + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 10 false 21 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestSync_output.txt b/src/TestSuite/TestSync_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestSync_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestTerminalizable.ATG b/src/TestSuite/TestTerminalizable.ATG new file mode 100644 index 0000000..42d01aa --- /dev/null +++ b/src/TestSuite/TestTerminalizable.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if nonterminals are terminalizable. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A B C D. +A = a C a. +B = b. +C = c D c. +D = A d. + +END Test. diff --git a/src/TestSuite/TestTerminalizable_Output.txt b/src/TestSuite/TestTerminalizable_Output.txt new file mode 100644 index 0000000..e7e14af --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Output.txt @@ -0,0 +1,8 @@ +Coco/R (Dec 01, 2018) +checking + Test cannot be derived to terminals + A cannot be derived to terminals + C cannot be derived to terminals + D cannot be derived to terminals +trace output is in trace.txt +4 errors detected diff --git a/src/TestSuite/TestTerminalizable_Parser.cpp b/src/TestSuite/TestTerminalizable_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestTerminalizable_Scanner.cpp b/src/TestSuite/TestTerminalizable_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTerminalizable_Trace.txt b/src/TestSuite/TestTerminalizable_Trace.txt new file mode 100644 index 0000000..77afd20 --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Trace.txt @@ -0,0 +1,80 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 t a 6 21 + 6 nt C 7 21 + 7 t a 0 21 + 8 t b 0 22 + 9 t c 10 23 + 10 nt D 11 23 + 11 t c 0 23 + 12 nt A 13 24 + 13 t d 0 24 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: b d + +B +first: b +follow: c + +C +first: c +follow: a + +D +first: a +follow: EOF c + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 5 false 21 fixedToken + 2 B nt false 8 false 22 fixedToken + 3 C nt false 9 false 23 fixedToken + 4 D nt false 12 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestTokens.ATG b/src/TestSuite/TestTokens.ATG new file mode 100644 index 0000000..5fc9766 --- /dev/null +++ b/src/TestSuite/TestTokens.ATG @@ -0,0 +1,28 @@ +$01246 +/*------------------------------------------------------------------------- +Test of TOKENS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + plus = '+'. + +TOKENS + ident = letter {letter | digit}. + ident1 = letter {'_'} '*'. + ident2 = letter CONTEXT ({'_'} '+'). + number = digit {digit} + | digit {digit} '.' {digit} ['E' ['+'|'-'] digit {digit}] + | digit {digit} CONTEXT (".."). +// nul = '\0'. + nul = "nul". +// hasNul = "ab\0c". + hasNul = "ab0c". + +PRODUCTIONS + +Test = ident "abc" "abc+" "a" "a_" "a__**" nul hasNul "nul". + +END Test. diff --git a/src/TestSuite/TestTokens1.ATG b/src/TestSuite/TestTokens1.ATG new file mode 100644 index 0000000..1e5253b --- /dev/null +++ b/src/TestSuite/TestTokens1.ATG @@ -0,0 +1,28 @@ +$01246 +/*------------------------------------------------------------------------- +Test of TOKENS definition (error case) +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + +TOKENS + ident = letter {letter | digit}. + ident1 = letter {digit} letter. /* tokens ident and ident1 cannot be distinguished */ + number = digit {digit} | digit {digit} CONTEXT(".."). + B1 = '\a'. + B2 = '\u0007'. /* tokens B1 and B2 cannot be distinguished */ + C1 = "\u0008". + C2 = "\b". /* tokens C1 and C2 cannot be distinguished */ + S1 = "+" "+". + S2 = "++". /* tokens S1 and S2 cannot be distinguished */ + S3 = '+' '+'. /* tokens S1 and S3 cannot be distinguished */ + X1 = "123..". /* tokens number and X1 cannot be distinguished */ + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestTokens1_Output.txt b/src/TestSuite/TestTokens1_Output.txt new file mode 100644 index 0000000..1aa1a3b --- /dev/null +++ b/src/TestSuite/TestTokens1_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +TestTokens1.ATG -- line 16 col 15: tokens B2 and B1 cannot be distinguished +TestTokens1.ATG -- line 18 col 11: tokens C2 and C1 cannot be distinguished +TestTokens1.ATG -- line 20 col 11: tokens S2 and S1 cannot be distinguished +Tokens ident and ident1 cannot be distinguished +Tokens S1 and S3 cannot be distinguished +Tokens number and X1 cannot be distinguished +trace output is in trace.txt +6 errors detected diff --git a/src/TestSuite/TestTokens1_Parser.cpp b/src/TestSuite/TestTokens1_Parser.cpp new file mode 100644 index 0000000..485fb90 --- /dev/null +++ b/src/TestSuite/TestTokens1_Parser.cpp @@ -0,0 +1,259 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + Expect(_ident,__FUNCTION__); + Expect(7 /* "abc" */,__FUNCTION__); + Expect(8 /* "abc+" */,__FUNCTION__); + Expect(9 /* "a" */,__FUNCTION__); + Expect(10 /* "a_" */,__FUNCTION__); + Expect(11 /* "a__**" */,__FUNCTION__); + Expect(_nul,__FUNCTION__); + Expect(_hasNul,__FUNCTION__); + Expect(_nul,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 12; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][14] = { + {T,x,x,x, x,x,x,x, x,x,x,x, x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"ident expected"); break; + case 2: s = coco_string_create(L"ident1 expected"); break; + case 3: s = coco_string_create(L"ident2 expected"); break; + case 4: s = coco_string_create(L"number expected"); break; + case 5: s = coco_string_create(L"nul expected"); break; + case 6: s = coco_string_create(L"hasNul expected"); break; + case 7: s = coco_string_create(L"\"abc\" expected"); break; + case 8: s = coco_string_create(L"\"abc+\" expected"); break; + case 9: s = coco_string_create(L"\"a\" expected"); break; + case 10: s = coco_string_create(L"\"a_\" expected"); break; + case 11: s = coco_string_create(L"\"a__**\" expected"); break; + case 12: s = coco_string_create(L"??? expected"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestTokens1_Scanner.cpp b/src/TestSuite/TestTokens1_Scanner.cpp new file mode 100644 index 0000000..ffae751 --- /dev/null +++ b/src/TestSuite/TestTokens1_Scanner.cpp @@ -0,0 +1,724 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 12; + noSym = 12; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 9); + for (i = 98; i <= 122; ++i) start.set(i, 9); + for (i = 48; i <= 57; ++i) start.set(i, 10); + start.set(97, 15); + start.set(Buffer::EoF, -1); + keywords.set(L"nul", 5); + keywords.set(L"ab0c", 6); + keywords.set(L"abc", 7); + keywords.set(L"a", 9); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int apx = 0; + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_1;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 2: + case_2: + {t->kind = 2; break;} + case 3: + case_3: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 3; break;} + case 4: + case_4: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_4;} + else if (ch == 'E') {AddCh(); goto case_5;} + else {t->kind = 4; break;} + case 5: + case_5: + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else if (ch == '+' || ch == '-') {AddCh(); goto case_6;} + else {goto case_0;} + case 6: + case_6: + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else {goto case_0;} + case 7: + case_7: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else {t->kind = 4; break;} + case 8: + case_8: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 4; break;} + case 9: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {apx = 0; AddCh(); goto case_1;} + else if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 10: + case_10: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_10;} + else if (ch == '.') {apx++; AddCh(); goto case_12;} + else {t->kind = 4; break;} + case 11: + case_11: + if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 12: + case_12: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {apx = 0; AddCh(); goto case_4;} + else if (ch == 'E') {apx = 0; AddCh(); goto case_5;} + else if (ch == '.') {apx++; AddCh(); goto case_8;} + else {t->kind = 4; break;} + case 13: + case_13: + {t->kind = 8; break;} + case 14: + case_14: + {t->kind = 11; break;} + case 15: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == 'a' || (ch >= 'c' && ch <= 'z')) {apx = 0; AddCh(); goto case_1;} + else if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_16;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else if (ch == 'b') {apx = 0; AddCh(); goto case_17;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 16: + case_16: + recEnd = pos; recKind = 10; + if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_18;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {t->kind = 10; break;} + case 17: + case_17: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'b') || (ch >= 'd' && ch <= 'z')) {AddCh(); goto case_1;} + else if (ch == 'c') {AddCh(); goto case_19;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 18: + case_18: + if (ch == '*') {apx = 0; AddCh(); goto case_20;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 19: + case_19: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_1;} + else if (ch == '+') {AddCh(); goto case_13;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 20: + case_20: + recEnd = pos; recKind = 2; + if (ch == '*') {AddCh(); goto case_14;} + else {t->kind = 2; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTokens1_Trace.txt b/src/TestSuite/TestTokens1_Trace.txt new file mode 100644 index 0000000..32aea93 --- /dev/null +++ b/src/TestSuite/TestTokens1_Trace.txt @@ -0,0 +1,36 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 26 + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 12 classToken + 2 ident1 t false 13 classToken + 3 number t false 14 classToken + 4 B1 t false 15 fixedToken + 5 B2 t false 16 fixedToken + 6 C1 t false 17 fixedToken + 7 C2 t false 18 fixedToken + 8 S1 t false 19 fixedToken + 9 S2 t false 20 fixedToken + 10 S3 t false 21 fixedToken + 11 X1 t false 22 fixedToken + 12 ??? t false 0 fixedToken + 0 Test nt false 1 false 26 fixedToken + +Literal Tokens: +-------------- +_C2 = "\b". +_B1 = "\a". +_X1 = "123..". +_B2 = "\u0007". +_S2 = "++". +_C1 = "\u0008". + diff --git a/src/TestSuite/TestTokens_Output.txt b/src/TestSuite/TestTokens_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestTokens_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp new file mode 100644 index 0000000..b7be964 --- /dev/null +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -0,0 +1,454 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(7 /* "abc" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(8 /* "abc+" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(9 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(10 /* "a_" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(11 /* "a__**" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_nul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_hasNul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_nul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 12; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][14] = { + {T,x,x,x, x,x,x,x, x,x,x,x, x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("ident1 expected"); break; + case 3: s = _SC("ident2 expected"); break; + case 4: s = _SC("number expected"); break; + case 5: s = _SC("nul expected"); break; + case 6: s = _SC("hasNul expected"); break; + case 7: s = _SC("\"abc\" expected"); break; + case 8: s = _SC("\"abc+\" expected"); break; + case 9: s = _SC("\"a\" expected"); break; + case 10: s = _SC("\"a_\" expected"); break; + case 11: s = _SC("\"a__**\" expected"); break; + case 12: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestTokens_Scanner.cpp b/src/TestSuite/TestTokens_Scanner.cpp new file mode 100644 index 0000000..100a6c2 --- /dev/null +++ b/src/TestSuite/TestTokens_Scanner.cpp @@ -0,0 +1,788 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 12; + noSym = 12; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 9); + for (i = 98; i <= 122; ++i) start.set(i, 9); + for (i = 48; i <= 57; ++i) start.set(i, 10); + start.set(97, 15); + start.set(Buffer::EoF, -1); + keywords.set(_SC("nul"), 5); + keywords.set(_SC("ab0c"), 6); + keywords.set(_SC("abc"), 7); + keywords.set(_SC("a"), 9); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int apx = 0; + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 2: + case_2: + {t->kind = 2 /* ident1 */; break;} + case 3: + case_3: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 3 /* ident2 */; break;} + case 4: + case_4: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else if (ch == _SC('E')) {AddCh(); goto case_5;} + else {t->kind = 4 /* number */; break;} + case 5: + case_5: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else if (ch == _SC('+') || ch == _SC('-')) {AddCh(); goto case_6;} + else {goto case_0;} + case 6: + case_6: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else {goto case_0;} + case 7: + case_7: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else {t->kind = 4 /* number */; break;} + case 8: + case_8: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 4 /* number */; break;} + case 9: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {apx = 0; AddCh(); goto case_1;} + else if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 10: + case_10: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if (ch == _SC('.')) {apx++; AddCh(); goto case_12;} + else {t->kind = 4 /* number */; break;} + case 11: + case_11: + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 12: + case_12: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {apx = 0; AddCh(); goto case_4;} + else if (ch == _SC('E')) {apx = 0; AddCh(); goto case_5;} + else if (ch == _SC('.')) {apx++; AddCh(); goto case_8;} + else {t->kind = 4 /* number */; break;} + case 13: + case_13: + {t->kind = 8 /* "abc+" */; break;} + case 14: + case_14: + {t->kind = 11 /* "a__**" */; break;} + case 15: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('a') || (ch >= _SC('c') && ch <= _SC('z'))) {apx = 0; AddCh(); goto case_1;} + else if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_16;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else if (ch == _SC('b')) {apx = 0; AddCh(); goto case_17;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 16: + case_16: + recEnd = pos; recKind = 10 /* "a_" */; + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_18;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {t->kind = 10 /* "a_" */; break;} + case 17: + case_17: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('b')) || (ch >= _SC('d') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else if (ch == _SC('c')) {AddCh(); goto case_19;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 18: + case_18: + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_20;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 19: + case_19: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else if (ch == _SC('+')) {AddCh(); goto case_13;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 20: + case_20: + recEnd = pos; recKind = 2 /* ident1 */; + if (ch == _SC('*')) {AddCh(); goto case_14;} + else {t->kind = 2 /* ident1 */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTokens_Trace.txt b/src/TestSuite/TestTokens_Trace.txt new file mode 100644 index 0000000..b47feaa --- /dev/null +++ b/src/TestSuite/TestTokens_Trace.txt @@ -0,0 +1,107 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 2 26 + 2 t "abc" 3 26 + 3 t "abc+" 4 26 + 4 t "a" 5 26 + 5 t "a_" 6 26 + 6 t "a__**" 7 26 + 7 t nul 8 26 + 8 t hasNul 9 26 + 9 t nul 0 26 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: #C 9 + digit 10 + _SC('a') 15 +E(ident ) 1: #A 1 +E(ident1 ) 2: +E(ident2 ) 3: +E(number ) 4: digit 4 + _SC('E') 5 + 5: digit 7 + #B 6 + 6: digit 7 +E(number ) 7: digit 7 +E(number ) 8: +E(ident ) 9: #A 1 + _SC('*') 2 + _SC('_') 11 context + _SC('+') 3 context +E(number ) 10: digit 10 + _SC('.') 12 context + 11: _SC('*') 2 + _SC('_') 11 context + _SC('+') 3 context +E(number ) 12: digit 4 + _SC('E') 5 + _SC('.') 8 context +E("abc+" ) 13: +E("a__**" ) 14: +E(ident ) 15: #D 1 + _SC('*') 2 + _SC('_') 16 context + _SC('+') 3 context + _SC('b') 17 +E("a_" ) 16: _SC('*') 2 + _SC('_') 18 context + _SC('+') 3 context +E(ident ) 17: #E 1 + _SC('c') 19 + 18: _SC('*') 20 + _SC('_') 11 context + _SC('+') 3 context +E(ident ) 19: #A 1 + _SC('+') 13 +E(ident1 ) 20: _SC('*') 14 + +---------- character classes ---------- +letter : 'A' .. 'Z' 'a' .. 'z' +digit : '0' .. '9' +plus : '+' +#A : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' +#B : '+' '-' +#C : 'A' .. 'Z' 'b' .. 'z' +#D : '0' .. '9' 'A' .. 'Z' 'a' 'c' .. 'z' +#E : '0' .. '9' 'A' .. 'Z' 'a' .. 'b' 'd' .. 'z' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 13 classLitToken + 2 ident1 t false 14 classToken + 3 ident2 t false 15 classToken + 4 number t false 16 classToken + 5 nul t false 20 litToken + 6 hasNul t false 22 litToken + 7 "abc" t false 26 litToken + 8 "abc+" t false 26 fixedToken + 9 "a" t false 26 litToken + 10 "a_" t false 26 fixedToken + 11 "a__**" t false 26 fixedToken + 12 ??? t false 0 fixedToken + 0 Test nt false 1 false 26 fixedToken + +Literal Tokens: +-------------- +_hasNul = "ab0c". +_nul = "nul". + diff --git a/src/TestSuite/TestTokens_output.txt b/src/TestSuite/TestTokens_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestTokens_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestWeak.ATG b/src/TestSuite/TestWeak.ATG new file mode 100644 index 0000000..bbfe26b --- /dev/null +++ b/src/TestSuite/TestWeak.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test of WEAK symbol +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C. +A = a WEAK b c. +B = a {WEAK b c} d. +C = a {WEAK b} c. + +END Test. diff --git a/src/TestSuite/TestWeak_Output.txt b/src/TestSuite/TestWeak_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestWeak_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp new file mode 100644 index 0000000..183f0f4 --- /dev/null +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -0,0 +1,483 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A(); + B(); + C(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + ExpectWeak(_b, 1); + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (WeakSeparator(_b,3,2) ) { + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (WeakSeparator(_b,4,3) ) { + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[5][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,x,T, x,x,x,x, x,x,x,x}, + {x,x,x,x, T,x,x,x, x,x,x,x}, + {x,x,x,T, x,x,x,x, x,x,x,x}, + {x,x,T,T, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + } +} + +void SynTree::dump2(int maxT, int indent, bool isLast) { + int last_idx = children.Count; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->tok->kind < maxT) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestWeak_Scanner.cpp b/src/TestSuite/TestWeak_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestWeak_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestWeak_Trace.txt b/src/TestSuite/TestWeak_Trace.txt new file mode 100644 index 0000000..78a3e9b --- /dev/null +++ b/src/TestSuite/TestWeak_Trace.txt @@ -0,0 +1,100 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 0 20 + 4 t a 5 21 + 5 wt b 6 21 + 6 t c 0 21 + 7 t a 10 22 + 8 wt b 9 22 + 9 t c -10 22 + 10 iter 11 0 8 0 + 11 t d 0 22 + 12 t a 14 23 + 13 wt b -14 23 + 14 iter 15 0 13 0 + 15 t c 0 23 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: a + +B +first: a +follow: a + +C +first: a +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 4 false 21 fixedToken + 2 B nt false 7 false 22 fixedToken + 3 C nt false 12 false 23 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestWeak_output.txt b/src/TestSuite/TestWeak_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestWeak_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/check.bat b/src/TestSuite/check.bat new file mode 100644 index 0000000..15a11ef --- /dev/null +++ b/src/TestSuite/check.bat @@ -0,0 +1,5 @@ +@..\Coco -frames .. %1.ATG > output.txt +@Compare trace.txt %1_Trace.txt %1_Trace +@Compare output.txt %1_Output.txt %1_Compilation 22 +@Compare Parser.cs %1_Parser.cs %1_Parser +@Compare Scanner.cs %1_Scanner.cs %1_Scanner diff --git a/src/TestSuite/check.sh b/src/TestSuite/check.sh new file mode 100755 index 0000000..20e487b --- /dev/null +++ b/src/TestSuite/check.sh @@ -0,0 +1,25 @@ +#!/bin/sh +#myvalgrind --leak-check=full +../Coco -frames .. $1.ATG > output.txt + +if cmp trace.txt $1_Trace.txt +then + echo $1_Trace passed +fi + +if cmp output.txt $1_Output.txt +then + echo $1_Compilation passed +fi + +#cp Parser.cpp $1_Parser.cpp +if cmp Parser.cpp $1_Parser.cpp +then + echo $1_Parser passed +fi + +#cp Scanner.cpp $1_Scanner.cpp +if cmp Scanner.cpp $1_Scanner.cpp +then + echo $1_Scanner passed +fi diff --git a/src/TestSuite/checkall.bat b/src/TestSuite/checkall.bat new file mode 100644 index 0000000..447e6f5 --- /dev/null +++ b/src/TestSuite/checkall.bat @@ -0,0 +1,24 @@ +echo off +call check TestAlts +call check TestOpts +call check TestOpts1 +call check TestIters +call check TestEps +call check TestAny +call check TestAny1 +call check TestSync +call check TestSem +call check TestWeak +call check TestChars +call check TestTokens +call checkerr TestTokens1 +call check TestComments +call check TestDel +call checkerr TestTerminalizable +call checkerr TestComplete +call checkerr TestReached +call checkerr TestCircular +call check TestLL1 +call check TestResOK +call checkerr TestResIllegal +call check TestCasing diff --git a/src/TestSuite/checkall.sh b/src/TestSuite/checkall.sh new file mode 100755 index 0000000..834b9f8 --- /dev/null +++ b/src/TestSuite/checkall.sh @@ -0,0 +1,24 @@ +#!/bin/sh +./check.sh TestAlts +./check.sh TestAny +./check.sh TestAny1 +./check.sh TestCasing +./check.sh TestChars +./check.sh TestComments +./check.sh TestDel +./check.sh TestEps +./check.sh TestIters +./check.sh TestLL1 +./check.sh TestOpts +./check.sh TestOpts1 +./check.sh TestResOK +./check.sh TestSem +./check.sh TestSync +./check.sh TestTokens +./check.sh TestWeak +./checkerr.sh TestCircular +./checkerr.sh TestComplete +./checkerr.sh TestReached +./checkerr.sh TestResIllegal +./checkerr.sh TestTerminalizable +./checkerr.sh TestTokens1 diff --git a/src/TestSuite/checkerr.bat b/src/TestSuite/checkerr.bat new file mode 100644 index 0000000..5675692 --- /dev/null +++ b/src/TestSuite/checkerr.bat @@ -0,0 +1,3 @@ +..\Coco -frames .. %1.ATG > output.txt +Compare trace.txt %1_Trace.txt %1_Trace +Compare output.txt %1_Output.txt %1_Compilation 22 diff --git a/src/TestSuite/checkerr.sh b/src/TestSuite/checkerr.sh new file mode 100755 index 0000000..37dc613 --- /dev/null +++ b/src/TestSuite/checkerr.sh @@ -0,0 +1,13 @@ +#!/bin/sh +#myvalgrind --leak-check=full +../Coco -frames .. $1.ATG > output.txt + +if cmp trace.txt $1_Trace.txt +then + echo $1_Trace passed +fi + +if cmp output.txt $1_Output.txt +then + echo $1_Compilation passed +fi diff --git a/src/TestSuite/compile.bat b/src/TestSuite/compile.bat new file mode 100644 index 0000000..9f07508 --- /dev/null +++ b/src/TestSuite/compile.bat @@ -0,0 +1,4 @@ +..\Coco -frames .. %1.ATG > %1_Output.txt +copy trace.txt %1_Trace.txt +copy Parser.cs %1_Parser.cs +copy Scanner.cs %1_Scanner.cs diff --git a/src/TestSuite/compile.sh b/src/TestSuite/compile.sh new file mode 100644 index 0000000..74176a2 --- /dev/null +++ b/src/TestSuite/compile.sh @@ -0,0 +1,5 @@ +#!/bin/sh +../Coco -frames .. $1.ATG > $1_Output.txt +cp trace.txt $1_Trace.txt +cp Parser.cpp $1_Parser.cpp +cp Scanner.cpp $1_Scanner.cpp diff --git a/src/TestSuite/compileall.bat b/src/TestSuite/compileall.bat new file mode 100644 index 0000000..c6bdd48 --- /dev/null +++ b/src/TestSuite/compileall.bat @@ -0,0 +1,24 @@ +echo off +call compile TestAlts +call compile TestOpts +call compile TestOpts1 +call compile TestIters +call compile TestEps +call compile TestAny +call compile TestAny1 +call compile TestSync +call compile TestSem +call compile TestWeak +call compile TestChars +call compile TestTokens +call compile TestTokens1 +call compile TestComments +call compile TestDel +call compile TestTerminalizable +call compile TestComplete +call compile TestReached +call compile TestCircular +call compile TestLL1 +call compile TestResOK +call compile TestResIllegal +call compile TestCasing diff --git a/src/TestSuite/compileall.sh b/src/TestSuite/compileall.sh new file mode 100644 index 0000000..7693afd --- /dev/null +++ b/src/TestSuite/compileall.sh @@ -0,0 +1,24 @@ +#!/bin/sh +./compile.sh TestAlts +./compile.sh TestOpts +./compile.sh TestOpts1 +./compile.sh TestIters +./compile.sh TestEps +./compile.sh TestAny +./compile.sh TestAny1 +./compile.sh TestSync +./compile.sh TestSem +./compile.sh TestWeak +./compile.sh TestChars +./compile.sh TestTokens +./compile.sh TestTokens1 +./compile.sh TestComments +./compile.sh TestDel +./compile.sh TestTerminalizable +./compile.sh TestComplete +./compile.sh TestReached +./compile.sh TestCircular +./compile.sh TestLL1 +./compile.sh TestResOK +./compile.sh TestResIllegal +./compile.sh TestCasing diff --git a/src/TestSuite/readme.txt b/src/TestSuite/readme.txt new file mode 100644 index 0000000..937ff5d --- /dev/null +++ b/src/TestSuite/readme.txt @@ -0,0 +1,36 @@ + Test Suite for Coco/R + Hanspeter Mössenböck + +This directory contains a simple test suite, which checks if changes to Coco/R still +lead to the same results. It works as follows: + +compile.bat Sample +runs Coco/R for the grammar Sample.ATG, which has all trace switches enabled so that +Coco/R dumps its internal data structures to the file trace.txt. It then copies the +following files: + Scanner.cs => TestSample_Scanner.cs + Parser.cs => TestSample_Parser.cs + console output => TestSample_Output.txt + trace.txt => TestSample_Trace.txt + + +compileall.bat +runs compile.bat for all ATG files in this directory. You should run this command +once before you start making changes to Coco/R. + +check.bat Sample +runs Coco/R again on the file Sample.ATG and compares the generated scanner, parser, +trace.txt and console output to the previously saved files. If the files are identical the +command prints the message + ++ passed TestSample_Compilation + +otherwise it prints the message + -- failed TestSample_Compilation + + +checkall.bat +runs check.bat for all ATG files in this directory. You should run this command after +every modification to Coco/R. The generated ++ passed or -- failed messages +indicate if the modification led to any differences in the output or in the internal data +structures of Coco/R. + \ No newline at end of file diff --git a/src/TestSuite/zipall.bat b/src/TestSuite/zipall.bat new file mode 100644 index 0000000..0e5b914 --- /dev/null +++ b/src/TestSuite/zipall.bat @@ -0,0 +1 @@ +jar -cfM TestSuite.zip readme.txt *.frame *.ATG *.cs *.txt *.bat \ No newline at end of file diff --git a/src/TestSuite/zipall.sh b/src/TestSuite/zipall.sh new file mode 100644 index 0000000..3ccc9dd --- /dev/null +++ b/src/TestSuite/zipall.sh @@ -0,0 +1 @@ +jar -cfM TestSuite.zip readme.txt *.frame *.ATG *.cpp *.txt *.bat *.sh From 5f1d5d32e4c22b439e92b8b719d9d52e7efad25c Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 16:45:01 +0200 Subject: [PATCH 71/95] Add an overview of my main changes --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index 3451280..07de1fd 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,26 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Fix all known memory leaks + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) + From 07c3244ff202baa47719413cf6e6ca6a5e582b2b Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 18:28:38 +0200 Subject: [PATCH 72/95] Fix for possible narrow conversion when wchar_t == char --- src/Tab.cpp | 4 ++-- src/Tab.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 1b5e694..1d6d81a 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -789,7 +789,7 @@ void Tab::CompSymbolSets() { // String handling //--------------------------------------------------------------------- -wchar_t Tab::Hex2Char(const wchar_t* s, int len) { +int Tab::Hex2Char(const wchar_t* s, int len) { int val = 0; for (int i = 0; i < len; i++) { wchar_t ch = s[i]; @@ -801,7 +801,7 @@ wchar_t Tab::Hex2Char(const wchar_t* s, int len) { if (val >= COCO_WCHAR_MAX) {/* pdt */ parser->SemErr(_SC("bad escape sequence in string or character")); } - return (wchar_t) val; + return val; } static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { diff --git a/src/Tab.h b/src/Tab.h index 8d6016f..9886f5c 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -175,7 +175,7 @@ class Tab { // String handling //--------------------------------------------------------------------- - wchar_t Hex2Char(const wchar_t* s, int len); + int Hex2Char(const wchar_t* s, int len); wchar_t* Unescape(const wchar_t* s); wchar_t* Escape(const wchar_t* s); From c9e56bfc84adc6307bae99162e5eade2869714ca Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 18:41:55 +0200 Subject: [PATCH 73/95] Fix my mistake by forget to wrap a literal string used as wchar_t * --- src/ParserGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 4e4ef3a..ce4f891 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -485,7 +485,7 @@ int ParserGen::GenCodeRREBNF (const Node *p) { void ParserGen::WriteRREBNF () { Symbol *sym; Generator g(tab, errors); - gen = g.OpenGen("Parser.ebnf"); + gen = g.OpenGen(_SC("Parser.ebnf")); fwprintf(gen, _SC("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n")); fwprintf(gen, _SC("\n//\n// productions\n//\n\n")); From 0c151cbd9051d53afeee049a749b00f2dcebec1f Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:20:29 +0200 Subject: [PATCH 74/95] Add reference to the Java and CSharp versions --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 07de1fd..7898854 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,4 @@ And this are my main modifications to the original: - Generate between comments the correspondent representation of several magic numbers (mainly Tokens) +See also https://github.com/mingodad/CocoR-Java and https://github.com/mingodad/CocoR-CSharp From e6a2b2164b52c361d46fafa80aa7544f58b33514 Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:20:51 +0200 Subject: [PATCH 75/95] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7898854..b2353d4 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ And this are my main modifications to the original: - Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals -- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) - Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro From 5a04a9c66d274c3f6409fb3afc51a98d7c33dffa Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 11 Jun 2021 14:55:36 +0200 Subject: [PATCH 76/95] My last fix for left recursion detection didn't worked for any depth, this now seems to work in all cases --- src/Tab.cpp | 16 ++++++++-------- src/Tab.h | 2 +- src/TestSuite/TestCircular_Output.txt | 5 ++++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 1d6d81a..0ab9fa4 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -893,7 +893,7 @@ bool Tab::GrammarCheckAll() { int errors = 0; if(!NtsComplete()) ++errors; if(!AllNtReached()) ++errors; - if(!NoCircularProductions()) ++errors; + if(!NoCircularProductions()) exit(1); if(!AllNtToTerm()) ++errors; CheckResolvers(); CheckLL1(); return errors == 0; @@ -901,17 +901,17 @@ bool Tab::GrammarCheckAll() { //--------------- check for circular productions ---------------------- -void Tab::GetSingles(const Node *p, TArrayList &singles, const Node *rule) { +void Tab::GetSingles(const Node *p, TArrayList &singles) { if (p == NULL) return; // end of graph if (p->typ == Node::nt) { - if (p->up || DelGraph(p->next) || p->sym->graph == rule) singles.Add(p->sym); + singles.Add(p->sym); } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { if (p->up || DelGraph(p->next)) { - GetSingles(p->sub, singles, rule); - if (p->typ == Node::alt) GetSingles(p->down, singles, rule); + GetSingles(p->sub, singles); + if (p->typ == Node::alt) GetSingles(p->down, singles); } } - if (!p->up && DelNode(p)) GetSingles(p->next, singles, rule); + if (!p->up && DelNode(p)) GetSingles(p->next, singles); } bool Tab::NoCircularProductions() { @@ -924,7 +924,7 @@ bool Tab::NoCircularProductions() { for (i=0; i singles; - GetSingles(sym->graph, singles, sym->graph); // get nonterminals s such that sym-->s + GetSingles(sym->graph, singles); // get nonterminals s such that sym-->s Symbol *s; for (int j=0; jcount++; - wprintf(_SC(" %") _SFMT _SC(" --> %") _SFMT, n->left->name, n->right->name); + wprintf(_SC(" %") _SFMT _SC(":%d --> %") _SFMT _SC(":%d\n"), n->left->name, n->left->line, n->right->name, n->right->line); } for(int i=0; i &singles, const Node *rule); + void GetSingles(const Node *p, TArrayList &singles); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- diff --git a/src/TestSuite/TestCircular_Output.txt b/src/TestSuite/TestCircular_Output.txt index 170f918..08079e1 100644 --- a/src/TestSuite/TestCircular_Output.txt +++ b/src/TestSuite/TestCircular_Output.txt @@ -1,5 +1,8 @@ Coco/R (Dec 01, 2018) checking D deletable - A --> B B --> C C --> Atrace output is in trace.txt + A:21 --> B:22 + B:22 --> C:23 + C:23 --> A:21 +trace output is in trace.txt 3 errors detected From 2f2beee271483198335d7f31fd1c5c0c8f6c1c7f Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 12 Jun 2021 12:54:47 +0200 Subject: [PATCH 77/95] Fix SynTree.dump2 that is supposed to show a pruned tree --- src/Parser.cpp | 8 +++++--- src/Parser.frame | 10 ++++++---- src/Parser.h | 2 +- src/TestSuite/TestAlts_Parser.cpp | 8 +++++--- src/TestSuite/TestAny1_Parser.cpp | 8 +++++--- src/TestSuite/TestAny_Parser.cpp | 8 +++++--- src/TestSuite/TestCasing_Parser.cpp | 8 +++++--- src/TestSuite/TestChars_Parser.cpp | 8 +++++--- src/TestSuite/TestComments_Parser.cpp | 8 +++++--- src/TestSuite/TestDel_Parser.cpp | 8 +++++--- src/TestSuite/TestEps_Parser.cpp | 8 +++++--- src/TestSuite/TestIters_Parser.cpp | 8 +++++--- src/TestSuite/TestLL1_Parser.cpp | 8 +++++--- src/TestSuite/TestOpts1_Parser.cpp | 8 +++++--- src/TestSuite/TestOpts_Parser.cpp | 8 +++++--- src/TestSuite/TestResOK_Parser.cpp | 8 +++++--- src/TestSuite/TestSem_Parser.cpp | 8 +++++--- src/TestSuite/TestSync_Parser.cpp | 8 +++++--- src/TestSuite/TestTokens_Parser.cpp | 8 +++++--- src/TestSuite/TestWeak_Parser.cpp | 8 +++++--- 20 files changed, 97 insertions(+), 59 deletions(-) diff --git a/src/Parser.cpp b/src/Parser.cpp index 90ed8d6..e1aa88a 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1333,18 +1333,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -1352,7 +1354,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/Parser.frame b/src/Parser.frame index 72a5fe1..952554b 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -51,7 +51,7 @@ struct SynTree { TArrayList children; void dump(int indent=0, bool isLast=false); - void dump2(int maxT, int indent=0, bool isLast=false); + void dump2(int indent=0, bool isLast=false); }; #endif @@ -406,18 +406,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -425,7 +427,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/Parser.h b/src/Parser.h index 5d3ca7c..4178ae7 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -51,7 +51,7 @@ struct SynTree { TArrayList children; void dump(int indent=0, bool isLast=false); - void dump2(int maxT, int indent=0, bool isLast=false); + void dump2(int indent=0, bool isLast=false); }; #endif diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp index 0e887ae..8938a54 100644 --- a/src/TestSuite/TestAlts_Parser.cpp +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -381,18 +381,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -400,7 +402,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp index c907bc3..c276806 100644 --- a/src/TestSuite/TestAny1_Parser.cpp +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -402,18 +402,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -421,7 +423,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp index 22a1bca..a4bae57 100644 --- a/src/TestSuite/TestAny_Parser.cpp +++ b/src/TestSuite/TestAny_Parser.cpp @@ -439,18 +439,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -458,7 +460,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp index 1d28add..b571c95 100644 --- a/src/TestSuite/TestCasing_Parser.cpp +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -377,18 +377,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -396,7 +398,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp index 8bafa1e..8fa27c6 100644 --- a/src/TestSuite/TestChars_Parser.cpp +++ b/src/TestSuite/TestChars_Parser.cpp @@ -333,18 +333,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -352,7 +354,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp index 8bafa1e..8fa27c6 100644 --- a/src/TestSuite/TestComments_Parser.cpp +++ b/src/TestSuite/TestComments_Parser.cpp @@ -333,18 +333,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -352,7 +354,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp index 5cc35c3..661fad8 100644 --- a/src/TestSuite/TestDel_Parser.cpp +++ b/src/TestSuite/TestDel_Parser.cpp @@ -438,18 +438,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -457,7 +459,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp index 393e9c6..f883388 100644 --- a/src/TestSuite/TestEps_Parser.cpp +++ b/src/TestSuite/TestEps_Parser.cpp @@ -366,18 +366,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -385,7 +387,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp index 2666079..2630096 100644 --- a/src/TestSuite/TestIters_Parser.cpp +++ b/src/TestSuite/TestIters_Parser.cpp @@ -389,18 +389,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -408,7 +410,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp index 5dba9a5..f1f5ce0 100644 --- a/src/TestSuite/TestLL1_Parser.cpp +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -600,18 +600,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -619,7 +621,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp index 99e1329..4b326d9 100644 --- a/src/TestSuite/TestOpts1_Parser.cpp +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -342,18 +342,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -361,7 +363,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp index 5a934ee..6554d4d 100644 --- a/src/TestSuite/TestOpts_Parser.cpp +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -394,18 +394,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -413,7 +415,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp index 4ea4454..d1a30cb 100644 --- a/src/TestSuite/TestResOK_Parser.cpp +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -643,18 +643,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -662,7 +664,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp index f9ff002..a672510 100644 --- a/src/TestSuite/TestSem_Parser.cpp +++ b/src/TestSuite/TestSem_Parser.cpp @@ -419,18 +419,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -438,7 +440,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp index f46d2a5..bfc42f1 100644 --- a/src/TestSuite/TestSync_Parser.cpp +++ b/src/TestSuite/TestSync_Parser.cpp @@ -387,18 +387,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -406,7 +408,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp index b7be964..b2e4b06 100644 --- a/src/TestSuite/TestTokens_Parser.cpp +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -375,18 +375,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -394,7 +396,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp index 183f0f4..613b634 100644 --- a/src/TestSuite/TestWeak_Parser.cpp +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -404,18 +404,20 @@ void SynTree::dump(int indent, bool isLast) { } } -void SynTree::dump2(int maxT, int indent, bool isLast) { +void SynTree::dump2(int indent, bool isLast) { int last_idx = children.Count; + int indentPlus = 4; if(tok->col) { printIndent(indent); wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); } else { if(last_idx == 1) { - if(((SynTree*)children[0])->tok->kind < maxT) { + if(((SynTree*)children[0])->children.Count == 0) { printIndent(indent); wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } + else indentPlus = 0; } else { printIndent(indent); @@ -423,7 +425,7 @@ void SynTree::dump2(int maxT, int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); } } From 3ecb057e2f9581b36df461a28fba71d84f7a8234 Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 14 Jun 2021 14:08:06 +0200 Subject: [PATCH 78/95] Rename SynTree::dump to SynTree::dump_all and SynTree::dump to SynTree::dump_pruned --- src/Parser.cpp | 8 ++++---- src/Parser.frame | 12 ++++++------ src/Parser.h | 4 ++-- src/TestSuite/TestAlts_Parser.cpp | 8 ++++---- src/TestSuite/TestAny1_Parser.cpp | 8 ++++---- src/TestSuite/TestAny_Parser.cpp | 8 ++++---- src/TestSuite/TestCasing_Parser.cpp | 8 ++++---- src/TestSuite/TestChars_Parser.cpp | 8 ++++---- src/TestSuite/TestComments_Parser.cpp | 8 ++++---- src/TestSuite/TestDel_Parser.cpp | 8 ++++---- src/TestSuite/TestEps_Parser.cpp | 8 ++++---- src/TestSuite/TestIters_Parser.cpp | 8 ++++---- src/TestSuite/TestLL1_Parser.cpp | 8 ++++---- src/TestSuite/TestOpts1_Parser.cpp | 8 ++++---- src/TestSuite/TestOpts_Parser.cpp | 8 ++++---- src/TestSuite/TestResOK_Parser.cpp | 8 ++++---- src/TestSuite/TestSem_Parser.cpp | 8 ++++---- src/TestSuite/TestSync_Parser.cpp | 8 ++++---- src/TestSuite/TestTokens_Parser.cpp | 8 ++++---- src/TestSuite/TestWeak_Parser.cpp | 8 ++++---- 20 files changed, 80 insertions(+), 80 deletions(-) diff --git a/src/Parser.cpp b/src/Parser.cpp index e1aa88a..0e659c5 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1318,7 +1318,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -1329,11 +1329,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -1354,7 +1354,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/Parser.frame b/src/Parser.frame index 952554b..4163029 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -50,8 +50,8 @@ struct SynTree { Token *tok; TArrayList children; - void dump(int indent=0, bool isLast=false); - void dump2(int indent=0, bool isLast=false); + void dump_all(int indent=0, bool isLast=false); + void dump_pruned(int indent=0, bool isLast=false); }; #endif @@ -391,7 +391,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -402,11 +402,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -427,7 +427,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/Parser.h b/src/Parser.h index 4178ae7..d9b46f8 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -50,8 +50,8 @@ struct SynTree { Token *tok; TArrayList children; - void dump(int indent=0, bool isLast=false); - void dump2(int indent=0, bool isLast=false); + void dump_all(int indent=0, bool isLast=false); + void dump_pruned(int indent=0, bool isLast=false); }; #endif diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp index 8938a54..736418e 100644 --- a/src/TestSuite/TestAlts_Parser.cpp +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -366,7 +366,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -377,11 +377,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -402,7 +402,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp index c276806..0b22ae2 100644 --- a/src/TestSuite/TestAny1_Parser.cpp +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -387,7 +387,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -398,11 +398,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -423,7 +423,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp index a4bae57..4f7f544 100644 --- a/src/TestSuite/TestAny_Parser.cpp +++ b/src/TestSuite/TestAny_Parser.cpp @@ -424,7 +424,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -435,11 +435,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -460,7 +460,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp index b571c95..2883b71 100644 --- a/src/TestSuite/TestCasing_Parser.cpp +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -362,7 +362,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -373,11 +373,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -398,7 +398,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp index 8fa27c6..b441de6 100644 --- a/src/TestSuite/TestChars_Parser.cpp +++ b/src/TestSuite/TestChars_Parser.cpp @@ -318,7 +318,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -329,11 +329,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -354,7 +354,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp index 8fa27c6..b441de6 100644 --- a/src/TestSuite/TestComments_Parser.cpp +++ b/src/TestSuite/TestComments_Parser.cpp @@ -318,7 +318,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -329,11 +329,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -354,7 +354,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp index 661fad8..b9f67f5 100644 --- a/src/TestSuite/TestDel_Parser.cpp +++ b/src/TestSuite/TestDel_Parser.cpp @@ -423,7 +423,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -434,11 +434,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -459,7 +459,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp index f883388..51a567e 100644 --- a/src/TestSuite/TestEps_Parser.cpp +++ b/src/TestSuite/TestEps_Parser.cpp @@ -351,7 +351,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -362,11 +362,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -387,7 +387,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp index 2630096..b243feb 100644 --- a/src/TestSuite/TestIters_Parser.cpp +++ b/src/TestSuite/TestIters_Parser.cpp @@ -374,7 +374,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -385,11 +385,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -410,7 +410,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp index f1f5ce0..6aad760 100644 --- a/src/TestSuite/TestLL1_Parser.cpp +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -585,7 +585,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -596,11 +596,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -621,7 +621,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp index 4b326d9..f488af1 100644 --- a/src/TestSuite/TestOpts1_Parser.cpp +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -327,7 +327,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -338,11 +338,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -363,7 +363,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp index 6554d4d..bf815ca 100644 --- a/src/TestSuite/TestOpts_Parser.cpp +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -379,7 +379,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -390,11 +390,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -415,7 +415,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp index d1a30cb..0c35b53 100644 --- a/src/TestSuite/TestResOK_Parser.cpp +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -628,7 +628,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -639,11 +639,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -664,7 +664,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp index a672510..a0d3231 100644 --- a/src/TestSuite/TestSem_Parser.cpp +++ b/src/TestSuite/TestSem_Parser.cpp @@ -404,7 +404,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -415,11 +415,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -440,7 +440,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp index bfc42f1..f3bd930 100644 --- a/src/TestSuite/TestSync_Parser.cpp +++ b/src/TestSuite/TestSync_Parser.cpp @@ -372,7 +372,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -383,11 +383,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -408,7 +408,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp index b2e4b06..bacd956 100644 --- a/src/TestSuite/TestTokens_Parser.cpp +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -360,7 +360,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -371,11 +371,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -396,7 +396,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp index 613b634..7628add 100644 --- a/src/TestSuite/TestWeak_Parser.cpp +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -389,7 +389,7 @@ SynTree::~SynTree() { for(int i=0; icol) { printIndent(indent); @@ -400,11 +400,11 @@ void SynTree::dump(int indent, bool isLast) { wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); } } -void SynTree::dump2(int indent, bool isLast) { +void SynTree::dump_pruned(int indent, bool isLast) { int last_idx = children.Count; int indentPlus = 4; if(tok->col) { @@ -425,7 +425,7 @@ void SynTree::dump2(int indent, bool isLast) { } } if(last_idx) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); } } From b80f2e078156df985c299a888c100cafd94c1866 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 1 Jul 2021 12:14:43 +0200 Subject: [PATCH 79/95] Fix to make it behave the same as the Java/CSharp version --- src/Tab.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index 0ab9fa4..b7ad3bc 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -798,7 +798,7 @@ int Tab::Hex2Char(const wchar_t* s, int len) { else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A'); else parser->SemErr(_SC("bad escape sequence in string or character")); } - if (val >= COCO_WCHAR_MAX) {/* pdt */ + if (val > COCO_WCHAR_MAX) {/* pdt */ parser->SemErr(_SC("bad escape sequence in string or character")); } return val; From ec65db3c020e37655eadb66e184cf55184547811 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 1 Jul 2021 12:16:00 +0200 Subject: [PATCH 80/95] Fix for endless loop with some ill grammars --- src/Tab.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index b7ad3bc..39ff8e2 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -1024,7 +1024,8 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { break; } case Node::iter: case Node::opt: { - PrintFirstPath(p->sub, tok, indent); + if (!DelNode(p->sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p->sub, tok, indent); break; } } From 530714ccf964449f92599d810756ad6b8b2ae2a8 Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 6 Jul 2021 18:16:01 +0200 Subject: [PATCH 81/95] Fix for when 'wchar_t' is 'char' --- src/DFA.cpp | 2 +- src/Tab.cpp | 2 +- src/TestSuite/TestLL1_Output.txt | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index 963727e..b018596 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -643,7 +643,7 @@ void DFA::GenLiterals () { fputws(_SC("\tkeywords.set(_SC("), gen); // write keyword, escape non printable characters for (int k = 0; name[k] != _SC('\0'); k++) { - wchar_t c = name[k]; + int c = name[k]; fwprintf(gen, (c >= 32 && c <= 127) ? _SC("%") _CHFMT : _SC("\\x%04x"), c); } fwprintf(gen, _SC("), %d);\n"), sym->n); diff --git a/src/Tab.cpp b/src/Tab.cpp index 39ff8e2..4fc6ec8 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -851,7 +851,7 @@ wchar_t* Tab::Unescape (const wchar_t* s) { wchar_t* Tab::Escape (const wchar_t* s) { StringBuilder buf; - wchar_t ch; + int ch; int len = coco_string_length(s); wchar_t_10 fmt; for (int i=0; i < len; i++) { diff --git a/src/TestSuite/TestLL1_Output.txt b/src/TestSuite/TestLL1_Output.txt index 12c94e5..8b676cd 100644 --- a/src/TestSuite/TestLL1_Output.txt +++ b/src/TestSuite/TestLL1_Output.txt @@ -25,8 +25,6 @@ checking = a:30:5: LL1 warning in H:31:0: a is start & successor of deletable structure = a:31:6: - = a:31:19: - = a:31:32: = a:31:35: LL1 warning in I:32:0: a is start & successor of deletable structure = a:32:6: From 0efd1ec09ab9c734040bb731eab5372a52517823 Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 6 Jul 2021 18:16:21 +0200 Subject: [PATCH 82/95] Remove unused variable --- src/Scanner.frame | 1 - src/Scanner.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Scanner.frame b/src/Scanner.frame index 519d72e..104226c 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -376,7 +376,6 @@ private: int eofSym; int noSym; int maxT; - int charSetSize; StartStates start; KeywordMap keywords; diff --git a/src/Scanner.h b/src/Scanner.h index 1355f82..37cfece 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -372,7 +372,6 @@ class Scanner { int eofSym; int noSym; int maxT; - int charSetSize; StartStates start; KeywordMap keywords; From 01b226ca0dc1ef983ea5b042227d9ae759ea3284 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 9 Jul 2021 09:49:18 +0200 Subject: [PATCH 83/95] Add examples folder and an initial bison grammar --- examples/bison.atg | 346 ++++++++++++++++++++++++++++++++++ examples/build-cocobison.sh | 4 + examples/cocobison.cpp | 28 +++ examples/readme-cocobison.txt | 5 + 4 files changed, 383 insertions(+) create mode 100644 examples/bison.atg create mode 100755 examples/build-cocobison.sh create mode 100644 examples/cocobison.cpp create mode 100644 examples/readme-cocobison.txt diff --git a/examples/bison.atg b/examples/bison.atg new file mode 100644 index 0000000..ffcf71e --- /dev/null +++ b/examples/bison.atg @@ -0,0 +1,346 @@ +$namespace=CocoBison + +COMPILER Bison + +TERMINALS + T_SYMBOL + +CHARACTERS + letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_". + digit = "0123456789". + cr = '\r'. + lf = '\n'. + tab = '\t'. + ff = '\f'. + stringCh = ANY - '"' - '\\' - cr - lf. + charCh = ANY - '\'' - '\\' - cr - lf. + printable = '\u0020' .. '\u007e'. + hex = "0123456789abcdef". + +TOKENS + ID = (letter | '.') { letter | digit | '.' | '-'}. + INT_LITERAL = digit { digit }. + STRING = '"' { stringCh | '\\' printable } '"'. + badString = '"' { stringCh | '\\' printable } (cr | lf). + CHAR_LITERAL = '\'' ( charCh | '\\' printable { hex } ) '\''. + + PERCENT_TOKEN = "%token". + PERCENT_NTERM = "%nterm". + + PERCENT_TYPE = "%type". + PERCENT_DESTRUCTOR = "%destructor". + PERCENT_PRINTER = "%printer". + + PERCENT_LEFT = "%left". + PERCENT_RIGHT = "%right". + PERCENT_NONASSOC = "%nonassoc". + PERCENT_PRECEDENCE = "%precedence". + + PERCENT_PREC = "%prec". + PERCENT_DPREC = "%dprec". + PERCENT_MERGE = "%merge". + + PERCENT_CODE = "%code". + PERCENT_DEFAULT_PREC = "%default-prec". + PERCENT_DEFINE = "%define". + PERCENT_DEFINES = "%defines". + PERCENT_ERROR_VERBOSE = "%error-verbose". + PERCENT_EXPECT = "%expect". + PERCENT_EXPECT_RR = "%expect-rr". + PERCENT_FLAG = "%". + PERCENT_FILE_PREFIX = "%file-prefix". + PERCENT_GLR_PARSER = "%glr-parser". + PERCENT_INITIAL_ACTION = "%initial-action". + PERCENT_LANGUAGE = "%language". + PERCENT_NAME_PREFIX = "%name-prefix". + PERCENT_NO_DEFAULT_PREC = "%no-default-prec". + PERCENT_NO_LINES = "%no-lines". + PERCENT_NONDETERMINISTIC_PARSER = "%nondeterministic-parser". + PERCENT_OUTPUT = "%output". + PERCENT_PURE_PARSER = "%pure-parser". + PERCENT_REQUIRE = "%require". + PERCENT_SKELETON = "%skeleton". + PERCENT_START = "%start". + PERCENT_TOKEN_TABLE = "%token-table". + PERCENT_VERBOSE = "%verbose". + PERCENT_YACC = "%yacc". + + //BRACED_CODE = "{...}". + //BRACED_PREDICATE = "%?{...}". + //BRACKETED_ID = "[identifier]". + //CHAR_LITERAL = "character literal". + COLON = ":". + EPILOGUE = "epilogue". + EQUAL = "=". + //ID = "identifier". + //ID_COLON "identifier:". + PERCENT_PERCENT = "%%". + PIPE = "|". + PROLOGUE = "%{...%}". + SEMICOLON = ";". + //TAG = "". + //TAG_ANY = "<*>". + //TAG_NONE = "<>". + LEFT_BRACE = '{'. + RIGHT_BRACE = '}'. + LEFT_ANGLE_BRACK = '<'. + RIGHT_ANGLE_BRACK = '>'. + +PRAGMAS + +COMMENTS FROM "/*" TO "*/" NESTED +COMMENTS FROM "//" TO lf + +IGNORE cr + lf + tab + ff + +/*-------------------------------------------------------------------------*/ + +PRODUCTIONS + +Bison = + prologue_declarations "%%" grammar [epilogue] + EOF + . + +prologue_declarations = + prologue_declaration {prologue_declaration} + . + +prologue_declaration = + grammar_declaration + | "%{" {ANY} "%}" + | "%" + | "%define" variable [value] + | "%defines" [STRING] + | "%error-verbose" + | "%expect" INT_LITERAL + | "%expect-rr" INT_LITERAL + | "%file-prefix" STRING + | "%glr-parser" + | "%pure_parser" + | "%initial-action" params + | "%language" STRING + | "%name" ID + | "%name-prefix" ['='] STRING + | "%no-lines" + | "%nondeterministic-parser" + | "%output" STRING + | ("%param" | "%lex-param" | "%parse-param") params + | "%pure-parser" + | "%require" STRING + | "%skeleton" STRING + | "%token-table" + | "%verbose" + | "%yacc" + //| "%include-enum" STRING ID + | "%debug" + | "%locations" + //| error ";" + | /*FIXME: Err? What is this horror doing here? */ ";" + //| "BISONPRE_VERSION" '(' ANY {ANY} ')' + . + +params = + '{' (. // manage nested braces + if(la->kind != _RIGHT_BRACE) { + //print("==", la->line, la->kind, la->val); + for (int nested = 1; nested > 0;) { + //print("==1", la->line, la->kind, la->val, nested); + //print("==", la->line, nested, la->kind, la->val); + if(la->kind == _LEFT_BRACE) ++nested; + Get(); + if(la->kind == _RIGHT_BRACE) --nested; + else if(la->kind == _EOF) break; + //print("==2", la->line, la->kind, la->val, nested); + } + } + .) + {ANY} '}' + . + +grammar_declaration = + symbol_declaration + | "%union" [union_name] params + | "%start" symbol + | code_props_type params generic_symlist + | "%default-prec" + | "%no-default-prec" + | "%code" [ID] params + . + +code_props_type = + "%destructor" + | "%printer" + . + +generic_symlist = + generic_symlist_item {generic_symlist_item} + . + +generic_symlist_item = + symbol + | tag + . + +union_name = + ID | tag + . + +symbol_declaration = + "%nterm" nterm_decls + | "%token" token_decls + | "%term" symbol_decls + | "%type" symbol_decls + | precedence_declarator token_decls_for_prec + . + +nterm_decls = + token_decls + . + +token_decls = + [tag] token_decl_1 {token_decl_1} + . + +token_decl_1 = + token_decl + . + +token_decl = + id [int_opt] [alias] + . + +int_opt = + INT_LITERAL + . + +alias = + string_as_id + | "_(" STRING ')' //TSTRING + . + +symbol_decls = + [tag] symbol_decl_1 {symbol_decl_1} + . + +symbol_decl_1 = + symbol + . + +precedence_declarator = + "%left" + | "%right" + | "%nonassoc" + | "%precedence" + | "%binary" + . + +token_decls_for_prec = + [tag] token_decl_for_prec_1 {token_decl_for_prec_1} + . + +// One or more token declarations for precedence declaration. +token_decl_for_prec_1 = + token_decl_for_prec + . + +token_decl_for_prec = + id [int_opt] + | string_as_id + . + +grammar = + rules_or_grammar_declaration {rules_or_grammar_declaration} + . + +rules_or_grammar_declaration = + rules + | grammar_declaration ";" + //| error ";" + . + +rules = + id_colon (. printf("%s ::= ", t->val); .) + [named_ref_opt | tag ] ":" rhses_1 (. printf("\n"); .) + . + +rhses_1 = + rhs { + '|' (. printf("| "); .) + rhs + } ';' + . + +rhs = + /*empty*/ (. printf("/*empty*/ "); .) + | "%empty" [params] + | rhs_symbol {rhs_symbol} + . + +rhs_symbol = + symbol (. printf("%s ", t->val); .) [named_ref_opt | tag] + | params + //| [tag] params //named_ref_opt + | "%?{" {ANY} '}' + | "%prec" symbol + | "%dprec" INT_LITERAL + | "%merge" tag + | "%expect" INT_LITERAL + | "%expect-rr" INT_LITERAL + . + +named_ref_opt = + '[' ID ']' //BRACKETED_ID + . + +epilogue = + "%%" {ANY} + . + +variable = + ID + . + +value = + ID + | STRING + | params + | INT_LITERAL + . + +id = + ID + | CHAR_LITERAL + . + +id_colon = + ID //':' + . + + +symbol = + id + | string_as_id + . + +string_as_id = + STRING + . + +tag = + '<' (. // manage nested angle brackets + if(la->kind != _RIGHT_ANGLE_BRACK) { + for (int nested = 1; nested > 0;) { + //print("==", la->line, nested, la->kind, la->val); + if(la->kind == _LEFT_ANGLE_BRACK) ++nested; + Get(); + if(la->kind == _RIGHT_ANGLE_BRACK) --nested; + else if(la->kind == _EOF) break; + } + } + .) + {ANY} '>' + . + + +END Bison. diff --git a/examples/build-cocobison.sh b/examples/build-cocobison.sh new file mode 100755 index 0000000..360c632 --- /dev/null +++ b/examples/build-cocobison.sh @@ -0,0 +1,4 @@ +../src/Coco -frames ../src bison.atg +g++ -g -Wall -o cocobison Parser.cpp Scanner.cpp cocobison.cpp +#./cocobison "postgresql-13.3/src/backend/parser/gram.y" + diff --git a/examples/cocobison.cpp b/examples/cocobison.cpp new file mode 100644 index 0000000..b7533b2 --- /dev/null +++ b/examples/cocobison.cpp @@ -0,0 +1,28 @@ +#include "Scanner.h" +#include "Parser.h" + +using namespace CocoBison; + +int main (int argc, char *argv[]) { + + if (argc == 2) { + wchar_t *fileName = coco_string_create(argv[1]); + CocoBison::Scanner scanner(fileName); + CocoBison::Parser parser(&scanner); + parser.Parse(); + if(parser.errors->count == 0) { +#ifdef PARSER_WITH_AST + if(parser.ast_root) { + parser.ast_root->dump_all(); + //parser.ast_root->dump_pruned(); + } +#endif + } + + coco_string_delete(fileName); + } else + wprintf(_SC("-- No source file specified\n")); + + return 0; + +} diff --git a/examples/readme-cocobison.txt b/examples/readme-cocobison.txt new file mode 100644 index 0000000..6b79e16 --- /dev/null +++ b/examples/readme-cocobison.txt @@ -0,0 +1,5 @@ +This example uses a bison grammar to generate an EBNF output (understood by https://www.bottlecaps.de/rr/ui) from an input bison parser file description. + +Example: + +./cocobison postgresql-13.3/src/backend/parser/gram.y From 223c07949ceb78c209132b2aad1173bf80a7689d Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 09:44:37 +0200 Subject: [PATCH 84/95] Add the suffix "_NT" to non terminal generated functions to minimize name collision --- src/Parser.cpp | 118 +++++++++++++------------- src/Parser.h | 36 ++++---- src/ParserGen.cpp | 8 +- src/TestSuite/TestAlts_Parser.cpp | 8 +- src/TestSuite/TestAny1_Parser.cpp | 20 ++--- src/TestSuite/TestAny_Parser.cpp | 20 ++--- src/TestSuite/TestCasing_Parser.cpp | 4 +- src/TestSuite/TestChars_Parser.cpp | 4 +- src/TestSuite/TestComments_Parser.cpp | 4 +- src/TestSuite/TestDel_Parser.cpp | 26 +++--- src/TestSuite/TestEps_Parser.cpp | 4 +- src/TestSuite/TestIters_Parser.cpp | 4 +- src/TestSuite/TestLL1_Parser.cpp | 46 +++++----- src/TestSuite/TestOpts1_Parser.cpp | 4 +- src/TestSuite/TestOpts_Parser.cpp | 8 +- src/TestSuite/TestResOK_Parser.cpp | 40 ++++----- src/TestSuite/TestSem_Parser.cpp | 16 ++-- src/TestSuite/TestSync_Parser.cpp | 8 +- src/TestSuite/TestTokens_Parser.cpp | 4 +- src/TestSuite/TestWeak_Parser.cpp | 16 ++-- 20 files changed, 199 insertions(+), 199 deletions(-) diff --git a/src/Parser.cpp b/src/Parser.cpp index 0e659c5..99c429a 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -118,7 +118,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Coco() { +void Parser::Coco_NT() { Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Coco"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); @@ -180,7 +180,7 @@ void Parser::Coco() { AstAddTerminal(); #endif while (la->kind == _ident) { - SetDecl(); + SetDecl_NT(); } } if (la->kind == 10 /* "TOKENS" */) { @@ -189,7 +189,7 @@ void Parser::Coco() { AstAddTerminal(); #endif while (la->kind == _ident || la->kind == _string || la->kind == _char) { - TokenDecl(Node::t); + TokenDecl_NT(Node::t); } } if (la->kind == 11 /* "PRAGMAS" */) { @@ -198,7 +198,7 @@ void Parser::Coco() { AstAddTerminal(); #endif while (la->kind == _ident || la->kind == _string || la->kind == _char) { - TokenDecl(Node::pr); + TokenDecl_NT(Node::pr); } } while (la->kind == 12 /* "COMMENTS" */) { @@ -211,12 +211,12 @@ void Parser::Coco() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g1); + TokenExpr_NT(g1); Expect(14 /* "TO" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g2); + TokenExpr_NT(g2); if (la->kind == 15 /* "NESTED" */) { Get(); #ifdef PARSER_WITH_AST @@ -231,7 +231,7 @@ void Parser::Coco() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Set(s); + Set_NT(s); tab->ignored->Or(s); delete s; } while (!(la->kind == _EOF || la->kind == 17 /* "PRODUCTIONS" */)) {SynErr(43); Get();} @@ -260,17 +260,17 @@ void Parser::Coco() { sym->attrPos = NULL; if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { - AttrDecl(sym); + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(_SC("attribute mismatch between declaration and use of this symbol")); if (la->kind == 40 /* "(." */) { - SemText(sym->semPos); + SemText_NT(sym->semPos); } ExpectWeak(18 /* "=" */, 3); - Expression(g); + Expression_NT(g); sym->graph = g->l; tab->Finish(g); delete g; @@ -336,7 +336,7 @@ void Parser::Coco() { #endif } -void Parser::SetDecl() { +void Parser::SetDecl_NT() { CharSet *s; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_SetDecl, _SC("SetDecl"), la->line); @@ -353,7 +353,7 @@ void Parser::SetDecl() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Set(s); + Set_NT(s); if (s->Elements() == 0) SemErr(_SC("character set must not be empty")); tab->NewCharClass(name, s); coco_string_delete(name); @@ -367,12 +367,12 @@ void Parser::SetDecl() { #endif } -void Parser::TokenDecl(int typ) { +void Parser::TokenDecl_NT(int typ) { wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, _SC("TokenDecl"), la->line); #endif - Sym(name, kind); + Sym_NT(name, kind); sym = tab->FindSym(name); if (sym != NULL) SemErr(_SC("name declared twice")); else { @@ -388,7 +388,7 @@ void Parser::TokenDecl(int typ) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g); + TokenExpr_NT(g); Expect(19 /* "." */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -411,7 +411,7 @@ void Parser::TokenDecl(int typ) { } else SynErr(45); if (la->kind == 40 /* "(." */) { - SemText(sym->semPos); + SemText_NT(sym->semPos); if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } #ifdef PARSER_WITH_AST @@ -419,15 +419,15 @@ void Parser::TokenDecl(int typ) { #endif } -void Parser::TokenExpr(Graph* &g) { +void Parser::TokenExpr_NT(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenExpr, _SC("TokenExpr"), la->line); #endif - TokenTerm(g); + TokenTerm_NT(g); bool first = true; while (WeakSeparator(29 /* "|" */,8,7) ) { - TokenTerm(g2); + TokenTerm_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; @@ -437,26 +437,26 @@ void Parser::TokenExpr(Graph* &g) { #endif } -void Parser::Set(CharSet* &s) { +void Parser::Set_NT(CharSet* &s) { CharSet *s2; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, _SC("Set"), la->line); #endif - SimSet(s); + SimSet_NT(s); while (la->kind == 21 /* "+" */ || la->kind == 22 /* "-" */) { if (la->kind == 21 /* "+" */) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SimSet(s2); + SimSet_NT(s2); s->Or(s2); delete s2; } else { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - SimSet(s2); + SimSet_NT(s2); s->Subtract(s2); delete s2; } } @@ -465,7 +465,7 @@ void Parser::Set(CharSet* &s) { #endif } -void Parser::AttrDecl(Symbol *sym) { +void Parser::AttrDecl_NT(Symbol *sym) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, _SC("AttrDecl"), la->line); #endif @@ -521,7 +521,7 @@ void Parser::AttrDecl(Symbol *sym) { #endif } -void Parser::SemText(Position* &pos) { +void Parser::SemText_NT(Position* &pos) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, _SC("SemText"), la->line); #endif @@ -557,15 +557,15 @@ void Parser::SemText(Position* &pos) { #endif } -void Parser::Expression(Graph* &g) { +void Parser::Expression_NT(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Expression, _SC("Expression"), la->line); #endif - Term(g); + Term_NT(g); bool first = true; while (WeakSeparator(29 /* "|" */,16,15) ) { - Term(g2); + Term_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; @@ -575,7 +575,7 @@ void Parser::Expression(Graph* &g) { #endif } -void Parser::SimSet(CharSet* &s) { +void Parser::SimSet_NT(CharSet* &s) { int n1, n2; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, _SC("SimSet"), la->line); @@ -609,14 +609,14 @@ void Parser::SimSet(CharSet* &s) { coco_string_delete(name); } else if (la->kind == _char) { - Char(n1); + Char_NT(n1); s->Set(n1); if (la->kind == 23 /* ".." */) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Char(n2); + Char_NT(n2); for (int i = n1; i <= n2; i++) s->Set(i); } } else if (la->kind == 24 /* "ANY" */) { @@ -631,7 +631,7 @@ void Parser::SimSet(CharSet* &s) { #endif } -void Parser::Char(int &n) { +void Parser::Char_NT(int &n) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Char, _SC("Char"), la->line); #endif @@ -655,7 +655,7 @@ void Parser::Char(int &n) { #endif } -void Parser::Sym(wchar_t* &name, int &kind) { +void Parser::Sym_NT(wchar_t* &name, int &kind) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, _SC("Sym"), la->line); #endif @@ -699,7 +699,7 @@ void Parser::Sym(wchar_t* &name, int &kind) { #endif } -void Parser::Term(Graph* &g) { +void Parser::Term_NT(Graph* &g) { Graph *g2; Node *rslv = NULL; g = NULL; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, _SC("Term"), la->line); @@ -707,14 +707,14 @@ void Parser::Term(Graph* &g) { if (StartOf(17 /* opt */)) { if (la->kind == 38 /* "IF" */) { rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); - Resolver(rslv->pos); + Resolver_NT(rslv->pos); g = new Graph(rslv); } - Factor(g2); + Factor_NT(g2); if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; while (StartOf(18 /* nt */)) { - Factor(g2); + Factor_NT(g2); tab->MakeSequence(g, g2); delete g2; } } else if (StartOf(19 /* sem */)) { @@ -727,7 +727,7 @@ void Parser::Term(Graph* &g) { #endif } -void Parser::Resolver(Position* &pos) { +void Parser::Resolver_NT(Position* &pos) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, _SC("Resolver"), la->line); #endif @@ -740,14 +740,14 @@ void Parser::Resolver(Position* &pos) { AstAddTerminal(); #endif int beg = la->pos; int col = la->col; int line = la->line; - Condition(); + Condition_NT(); pos = new Position(beg, t->pos, col, line); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif } -void Parser::Factor(Graph* &g) { +void Parser::Factor_NT(Graph* &g) { wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; @@ -763,7 +763,7 @@ void Parser::Factor(Graph* &g) { #endif weak = true; } - Sym(name, kind); + Sym_NT(name, kind); Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) sym = (Symbol*)tab->literals[name]; @@ -791,7 +791,7 @@ void Parser::Factor(Graph* &g) { g = new Graph(p); if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { - Attribs(p); + Attribs_NT(p); if (kind != id) SemErr(_SC("a literal must not have attributes")); } if (undef) @@ -806,7 +806,7 @@ void Parser::Factor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Expression(g); + Expression_NT(g); Expect(32 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -818,7 +818,7 @@ void Parser::Factor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Expression(g); + Expression_NT(g); Expect(34 /* "]" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -831,7 +831,7 @@ void Parser::Factor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Expression(g); + Expression_NT(g); Expect(36 /* "}" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -840,7 +840,7 @@ void Parser::Factor(Graph* &g) { break; } case 40 /* "(." */: { - SemText(pos); + SemText_NT(pos); Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); p->pos = pos; g = new Graph(p); @@ -877,7 +877,7 @@ void Parser::Factor(Graph* &g) { #endif } -void Parser::Attribs(Node *p) { +void Parser::Attribs_NT(Node *p) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, _SC("Attribs"), la->line); #endif @@ -931,7 +931,7 @@ void Parser::Attribs(Node *p) { #endif } -void Parser::Condition() { +void Parser::Condition_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, _SC("Condition"), la->line); #endif @@ -941,7 +941,7 @@ void Parser::Condition() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Condition(); + Condition_NT(); } else { Get(); } @@ -955,14 +955,14 @@ void Parser::Condition() { #endif } -void Parser::TokenTerm(Graph* &g) { +void Parser::TokenTerm_NT(Graph* &g) { Graph *g2; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, _SC("TokenTerm"), la->line); #endif - TokenFactor(g); + TokenFactor_NT(g); while (StartOf(8 /* nt */)) { - TokenFactor(g2); + TokenFactor_NT(g2); tab->MakeSequence(g, g2); delete g2; } if (la->kind == 39 /* "CONTEXT" */) { @@ -974,7 +974,7 @@ void Parser::TokenTerm(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g2); + TokenExpr_NT(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); delete g2; Expect(32 /* ")" */); @@ -987,14 +987,14 @@ void Parser::TokenTerm(Graph* &g) { #endif } -void Parser::TokenFactor(Graph* &g) { +void Parser::TokenFactor_NT(Graph* &g) { wchar_t* name = NULL; int kind; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, _SC("TokenFactor"), la->line); #endif g = NULL; if (la->kind == _ident || la->kind == _string || la->kind == _char) { - Sym(name, kind); + Sym_NT(name, kind); if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { @@ -1019,7 +1019,7 @@ void Parser::TokenFactor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g); + TokenExpr_NT(g); Expect(32 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -1029,7 +1029,7 @@ void Parser::TokenFactor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g); + TokenExpr_NT(g); Expect(34 /* "]" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -1040,7 +1040,7 @@ void Parser::TokenFactor(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - TokenExpr(g); + TokenExpr_NT(g); Expect(36 /* "}" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -1150,7 +1150,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Coco(); + Coco_NT(); Expect(0); } diff --git a/src/Parser.h b/src/Parser.h index d9b46f8..4e3a600 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -171,24 +171,24 @@ int id; ~Parser(); void SemErr(const wchar_t* msg); - void Coco(); - void SetDecl(); - void TokenDecl(int typ); - void TokenExpr(Graph* &g); - void Set(CharSet* &s); - void AttrDecl(Symbol *sym); - void SemText(Position* &pos); - void Expression(Graph* &g); - void SimSet(CharSet* &s); - void Char(int &n); - void Sym(wchar_t* &name, int &kind); - void Term(Graph* &g); - void Resolver(Position* &pos); - void Factor(Graph* &g); - void Attribs(Node *p); - void Condition(); - void TokenTerm(Graph* &g); - void TokenFactor(Graph* &g); + void Coco_NT(); + void SetDecl_NT(); + void TokenDecl_NT(int typ); + void TokenExpr_NT(Graph* &g); + void Set_NT(CharSet* &s); + void AttrDecl_NT(Symbol *sym); + void SemText_NT(Position* &pos); + void Expression_NT(Graph* &g); + void SimSet_NT(CharSet* &s); + void Char_NT(int &n); + void Sym_NT(wchar_t* &name, int &kind); + void Term_NT(Graph* &g); + void Resolver_NT(Position* &pos); + void Factor_NT(Graph* &g); + void Attribs_NT(Node *p); + void Condition_NT(); + void TokenTerm_NT(Graph* &g); + void TokenFactor_NT(Graph* &g); void Parse(); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index ce4f891..7e830b2 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -186,7 +186,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { while (p != NULL) { if (p->typ == Node::nt) { Indent(indent); - fwprintf(gen, _SC("%") _SFMT _SC("("), p->sym->name); + fwprintf(gen, _SC("%") _SFMT _SC("_NT("), p->sym->name); CopySourcePart(p->pos, 0); fputws(_SC(");\n"), gen); } else if (p->typ == Node::t) { @@ -376,7 +376,7 @@ void ParserGen::GenProductionsHeader() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, _SC("\tvoid %") _SFMT _SC("("), sym->name); + fwprintf(gen, _SC("\tvoid %") _SFMT _SC("_NT("), sym->name); CopySourcePart(sym->attrPos, 0); fputws(_SC(");\n"), gen); } @@ -388,7 +388,7 @@ void ParserGen::GenProductions() { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, _SC("void Parser::%") _SFMT _SC("("), sym->name); + fwprintf(gen, _SC("void Parser::%") _SFMT _SC("_NT("), sym->name); CopySourcePart(sym->attrPos, 0); fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 2); @@ -568,7 +568,7 @@ void ParserGen::WriteParser () { g.CopyFramePart(_SC("-->pragmas")); GenCodePragmas(); g.CopyFramePart(_SC("-->productions")); GenProductions(); - g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%") _SFMT _SC("();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); + g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%") _SFMT _SC("_NT();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); g.CopyFramePart(_SC("-->constants")); fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count-1); g.CopyFramePart(_SC("-->initialization")); InitSets(); diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp index 736418e..b1d9e03 100644 --- a/src/TestSuite/TestAlts_Parser.cpp +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -119,7 +119,7 @@ void Parser::Test() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - A(); + A_NT(); Expect(_b); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -129,7 +129,7 @@ void Parser::Test() { #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -262,7 +262,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp index 0b22ae2..51f2a5e 100644 --- a/src/TestSuite/TestAny1_Parser.cpp +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -111,20 +111,20 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - B(); - C(); - D(); + A_NT(); + B_NT(); + C_NT(); + D_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -138,7 +138,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -152,7 +152,7 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -165,7 +165,7 @@ void Parser::C() { #endif } -void Parser::D() { +void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif @@ -274,7 +274,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp index 4f7f544..7c052e9 100644 --- a/src/TestSuite/TestAny_Parser.cpp +++ b/src/TestSuite/TestAny_Parser.cpp @@ -111,20 +111,20 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - B(); - C(); - D(); + A_NT(); + B_NT(); + C_NT(); + D_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -152,7 +152,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -172,7 +172,7 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -192,7 +192,7 @@ void Parser::C() { #endif } -void Parser::D() { +void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif @@ -311,7 +311,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp index 2883b71..7517581 100644 --- a/src/TestSuite/TestCasing_Parser.cpp +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -260,7 +260,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp index b441de6..357840a 100644 --- a/src/TestSuite/TestChars_Parser.cpp +++ b/src/TestSuite/TestChars_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -220,7 +220,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp index b441de6..357840a 100644 --- a/src/TestSuite/TestComments_Parser.cpp +++ b/src/TestSuite/TestComments_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -220,7 +220,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp index b9f67f5..4e2b63e 100644 --- a/src/TestSuite/TestDel_Parser.cpp +++ b/src/TestSuite/TestDel_Parser.cpp @@ -111,28 +111,28 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - B(); + A_NT(); + B_NT(); Expect(_g); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - C(); + C_NT(); Expect(_g); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - D(); + D_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -160,7 +160,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -188,23 +188,23 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif - A(); - B(); + A_NT(); + B_NT(); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif } -void Parser::D() { +void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif if (StartOf(2 /* nt */)) { - C(); + C_NT(); } else if (la->kind == _h) { Get(); #ifdef PARSER_WITH_AST @@ -312,7 +312,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp index 51a567e..bf44ae1 100644 --- a/src/TestSuite/TestEps_Parser.cpp +++ b/src/TestSuite/TestEps_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -247,7 +247,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp index b243feb..b1998b6 100644 --- a/src/TestSuite/TestIters_Parser.cpp +++ b/src/TestSuite/TestIters_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -267,7 +267,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp index 6aad760..7b9898a 100644 --- a/src/TestSuite/TestLL1_Parser.cpp +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -111,23 +111,23 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - E(); - C(); - G(); - H(); - I(); - J(); + A_NT(); + E_NT(); + C_NT(); + G_NT(); + H_NT(); + I_NT(); + J_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -137,19 +137,19 @@ void Parser::A() { AstAddTerminal(); #endif } else if (la->kind == _a || la->kind == _b || la->kind == _c) { - B(); + B_NT(); } else SynErr(11); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif } -void Parser::E() { +void Parser::E_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); #endif if (la->kind == _e || la->kind == _f) { - F(); + F_NT(); } else if (la->kind == _e) { } else SynErr(12); Expect(_e); @@ -161,7 +161,7 @@ void Parser::E() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -172,15 +172,15 @@ void Parser::C() { #endif } if (la->kind == _d) { - D(); + D_NT(); } - B(); + B_NT(); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif } -void Parser::G() { +void Parser::G_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); #endif @@ -213,7 +213,7 @@ void Parser::G() { #endif } -void Parser::H() { +void Parser::H_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); #endif @@ -244,7 +244,7 @@ void Parser::H() { #endif } -void Parser::I() { +void Parser::I_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); #endif @@ -277,7 +277,7 @@ void Parser::I() { #endif } -void Parser::J() { +void Parser::J_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_J, _SC("J"), la->line); #endif @@ -316,7 +316,7 @@ void Parser::J() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -342,7 +342,7 @@ void Parser::B() { #endif } -void Parser::D() { +void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif @@ -361,7 +361,7 @@ void Parser::D() { #endif } -void Parser::F() { +void Parser::F_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); #endif @@ -472,7 +472,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp index f488af1..b4c80c4 100644 --- a/src/TestSuite/TestOpts1_Parser.cpp +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -224,7 +224,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp index bf815ca..c00bfb5 100644 --- a/src/TestSuite/TestOpts_Parser.cpp +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -133,7 +133,7 @@ void Parser::Test() { #endif } else if (la->kind == _EOF || la->kind == _e) { if (la->kind == _e) { - Del(); + Del_NT(); } } else if (la->kind == _d) { Get(); @@ -164,7 +164,7 @@ void Parser::Test() { #endif } -void Parser::Del() { +void Parser::Del_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Del, _SC("Del"), la->line); #endif @@ -275,7 +275,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp index 0c35b53..2380683 100644 --- a/src/TestSuite/TestResOK_Parser.cpp +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -111,28 +111,28 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif if (la->kind == 1 /* "a" */) { - A(); - B(); - C(); - D(); - E(); - F(); - G(); - H(); + A_NT(); + B_NT(); + C_NT(); + D_NT(); + E_NT(); + F_NT(); + G_NT(); + H_NT(); } else if (la->kind == _EOF || la->kind == 2 /* "b" */ || la->kind == 3 /* "c" */) { - I(); + I_NT(); } else SynErr(6); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -165,7 +165,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -190,7 +190,7 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -214,7 +214,7 @@ void Parser::C() { #endif } -void Parser::D() { +void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif @@ -244,7 +244,7 @@ void Parser::D() { #endif } -void Parser::E() { +void Parser::E_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); #endif @@ -286,7 +286,7 @@ void Parser::E() { #endif } -void Parser::F() { +void Parser::F_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); #endif @@ -320,7 +320,7 @@ void Parser::F() { #endif } -void Parser::G() { +void Parser::G_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); #endif @@ -359,7 +359,7 @@ void Parser::G() { #endif } -void Parser::H() { +void Parser::H_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); #endif @@ -398,7 +398,7 @@ void Parser::H() { #endif } -void Parser::I() { +void Parser::I_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); #endif @@ -518,7 +518,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp index a0d3231..01d6a14 100644 --- a/src/TestSuite/TestSem_Parser.cpp +++ b/src/TestSuite/TestSem_Parser.cpp @@ -114,20 +114,20 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { decl #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - B(); - C(); + A_NT(); + B_NT(); + C_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -151,7 +151,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -174,7 +174,7 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -296,7 +296,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp index f3bd930..a632726 100644 --- a/src/TestSuite/TestSync_Parser.cpp +++ b/src/TestSuite/TestSync_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -137,13 +137,13 @@ void Parser::Test() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - A(); + A_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -263,7 +263,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp index bacd956..39454fb 100644 --- a/src/TestSuite/TestTokens_Parser.cpp +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -111,7 +111,7 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif @@ -252,7 +252,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp index 7628add..896576a 100644 --- a/src/TestSuite/TestWeak_Parser.cpp +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -111,19 +111,19 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Test() { +void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - A(); - B(); - C(); + A_NT(); + B_NT(); + C_NT(); #ifdef PARSER_WITH_AST AstPopNonTerminal(); #endif } -void Parser::A() { +void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif @@ -141,7 +141,7 @@ void Parser::A() { #endif } -void Parser::B() { +void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif @@ -164,7 +164,7 @@ void Parser::B() { #endif } -void Parser::C() { +void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif @@ -279,7 +279,7 @@ void Parser::Parse() { la = dummyToken = new Token(); la->val = coco_string_create(_SC("Dummy Token")); Get(); - Test(); + Test_NT(); Expect(0); } From 8fd041a301a63fba9511b7f85fe7f54a276afdbc Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 11:29:34 +0200 Subject: [PATCH 85/95] Add token inheritance from https://github.com/Lercher/CocoR --- src/Coco.atg | 52 +++-- src/Parser.cpp | 297 ++++++++++++++------------ src/Parser.frame | 17 +- src/Parser.h | 5 +- src/ParserGen.cpp | 32 ++- src/ParserGen.h | 1 + src/Scanner.cpp | 95 ++++---- src/Symbol.cpp | 19 +- src/Symbol.h | 2 +- src/TestSuite/TestAlts_Parser.cpp | 31 ++- src/TestSuite/TestAny1_Parser.cpp | 21 +- src/TestSuite/TestAny_Parser.cpp | 25 ++- src/TestSuite/TestCasing_Parser.cpp | 21 +- src/TestSuite/TestChars_Parser.cpp | 21 +- src/TestSuite/TestComments_Parser.cpp | 21 +- src/TestSuite/TestDel_Parser.cpp | 37 ++-- src/TestSuite/TestEps_Parser.cpp | 31 ++- src/TestSuite/TestIters_Parser.cpp | 39 ++-- src/TestSuite/TestLL1_Parser.cpp | 65 +++--- src/TestSuite/TestOpts1_Parser.cpp | 25 ++- src/TestSuite/TestOpts_Parser.cpp | 41 ++-- src/TestSuite/TestResOK_Parser.cpp | 65 +++--- src/TestSuite/TestSem_Parser.cpp | 33 ++- src/TestSuite/TestSync_Parser.cpp | 29 ++- src/TestSuite/TestTokens_Parser.cpp | 21 +- src/TestSuite/TestWeak_Parser.cpp | 21 +- 26 files changed, 681 insertions(+), 386 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index d358af3..924c235 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -6,24 +6,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -117,9 +117,9 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line); } .) - - "COMPILER" (. genScanner = true; - tab->ignored = new CharSet(); .) + + "COMPILER" (. genScanner = true; + tab->ignored = new CharSet(); .) ident (. gramName = coco_string_create(t->val); beg = la->pos; line = la->line; @@ -137,7 +137,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) - "FROM" TokenExpr + "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) ] (. dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; .) @@ -145,7 +145,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra { "IGNORE" Set (. tab->ignored->Or(s); delete s; .) } - SYNC + SYNC "PRODUCTIONS" (. if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); .) @@ -292,7 +292,7 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) +TokenDecl (. wchar_t* name = NULL; int kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; .) = Sym (. sym = tab->FindSym(name); if (sym != NULL) SemErr(_SC("name declared twice")); @@ -303,6 +303,14 @@ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; coco_string_delete(name); coco_string_delete(tokenString); .) + [ ':' Sym + (. inheritsSym = tab->FindSym(name); + if (inheritsSym == NULL) SemErr(_SC("token can't inherit from unddeclared name")); + else if (inheritsSym == sym) SemErr(_SC("token can not inherit from itself")); + else if (inheritsSym->typ != typ) SemErr(_SC("token can't inherit from different token type")); + else sym->inherits = inheritsSym; + .) + ] SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr(_SC("a literal must not be declared with a structure")); tab->Finish(g); @@ -345,7 +353,7 @@ AttrDecl /*------------------------------------------------------------------------------------*/ Expression (. Graph *g2; .) -= += Term (. bool first = true; .) { WEAK '|' @@ -373,7 +381,7 @@ Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) /*------------------------------------------------------------------------------------*/ -Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; +Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; .) = @@ -386,7 +394,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; if (undef) { if (kind == id) sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt - else if (genScanner) { + else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production @@ -431,7 +439,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; /*------------------------------------------------------------------------------------*/ -Resolver +Resolver = "IF" "(" (. int beg = la->pos; int col = la->col; int line = la->line; .) Condition (. pos = new Position(beg, t->pos, col, line); .) @@ -528,12 +536,12 @@ Sym Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) - { ANY + { ANY | badString (. SemErr(_SC("bad string in attributes")); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) - { ANY + { ANY | badString (. SemErr(_SC("bad string in attributes")); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) @@ -552,5 +560,5 @@ SemText . /*------------------------------------------------------------------------------------*/ - + END Coco. diff --git a/src/Parser.cpp b/src/Parser.cpp index 99c429a..e8c5a35 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -94,12 +94,27 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[44] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -107,7 +122,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -135,7 +150,7 @@ void Parser::Coco_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - genScanner = true; + genScanner = true; tab->ignored = new CharSet(); Expect(_ident); #ifdef PARSER_WITH_AST @@ -149,19 +164,19 @@ void Parser::Coco_NT() { Get(); } tab->semDeclPos = new Position(beg, la->pos, 0, line); - if (la->kind == 7 /* "IGNORECASE" */) { + if (IsKind(la, 7 /* "IGNORECASE" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif dfa->ignoreCase = true; } - if (la->kind == 8 /* "TERMINALS" */) { + if (IsKind(la, 8 /* "TERMINALS" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == _ident) { + while (IsKind(la, _ident)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -174,34 +189,34 @@ void Parser::Coco_NT() { } } } - if (la->kind == 9 /* "CHARACTERS" */) { + if (IsKind(la, 9 /* "CHARACTERS" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == _ident) { + while (IsKind(la, _ident)) { SetDecl_NT(); } } - if (la->kind == 10 /* "TOKENS" */) { + if (IsKind(la, 10 /* "TOKENS" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == _ident || la->kind == _string || la->kind == _char) { + while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { TokenDecl_NT(Node::t); } } - if (la->kind == 11 /* "PRAGMAS" */) { + if (IsKind(la, 11 /* "PRAGMAS" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == _ident || la->kind == _string || la->kind == _char) { + while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { TokenDecl_NT(Node::pr); } } - while (la->kind == 12 /* "COMMENTS" */) { + while (IsKind(la, 12 /* "COMMENTS" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -217,7 +232,7 @@ void Parser::Coco_NT() { AstAddTerminal(); #endif TokenExpr_NT(g2); - if (la->kind == 15 /* "NESTED" */) { + if (IsKind(la, 15 /* "NESTED" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -226,7 +241,7 @@ void Parser::Coco_NT() { } dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; } - while (la->kind == 16 /* "IGNORE" */) { + while (IsKind(la, 16 /* "IGNORE" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -234,7 +249,7 @@ void Parser::Coco_NT() { Set_NT(s); tab->ignored->Or(s); delete s; } - while (!(la->kind == _EOF || la->kind == 17 /* "PRODUCTIONS" */)) {SynErr(43); Get();} + while (!(IsKind(la, _EOF) || IsKind(la, 17 /* "PRODUCTIONS" */))) {SynErr(44); Get();} Expect(17 /* "PRODUCTIONS" */); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -242,7 +257,7 @@ void Parser::Coco_NT() { if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); - while (la->kind == _ident) { + while (IsKind(la, _ident)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -259,14 +274,14 @@ void Parser::Coco_NT() { bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; - if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { + if (IsKind(la, 26 /* "<" */) || IsKind(la, 28 /* "<." */)) { AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(_SC("attribute mismatch between declaration and use of this symbol")); - if (la->kind == 40 /* "(." */) { + if (IsKind(la, 41 /* "(." */)) { SemText_NT(sym->semPos); } ExpectWeak(18 /* "=" */, 3); @@ -368,7 +383,7 @@ void Parser::SetDecl_NT() { } void Parser::TokenDecl_NT(int typ) { - wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; + wchar_t* name = NULL; int kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, _SC("TokenDecl"), la->line); #endif @@ -382,8 +397,21 @@ void Parser::TokenDecl_NT(int typ) { coco_string_delete(name); coco_string_delete(tokenString); - while (!(StartOf(5 /* sync */))) {SynErr(44); Get();} - if (la->kind == 18 /* "=" */) { + if (IsKind(la, 25 /* ":" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Sym_NT(name, kindInherits); + inheritsSym = tab->FindSym(name); + if (inheritsSym == NULL) SemErr(_SC("token can't inherit from unddeclared name")); + else if (inheritsSym == sym) SemErr(_SC("token can not inherit from itself")); + else if (inheritsSym->typ != typ) SemErr(_SC("token can't inherit from different token type")); + else sym->inherits = inheritsSym; + + } + while (!(StartOf(5 /* sync */))) {SynErr(45); Get();} + if (IsKind(la, 18 /* "=" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -409,8 +437,8 @@ void Parser::TokenDecl_NT(int typ) { if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); - } else SynErr(45); - if (la->kind == 40 /* "(." */) { + } else SynErr(46); + if (IsKind(la, 41 /* "(." */)) { SemText_NT(sym->semPos); if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } @@ -426,7 +454,7 @@ void Parser::TokenExpr_NT(Graph* &g) { #endif TokenTerm_NT(g); bool first = true; - while (WeakSeparator(29 /* "|" */,8,7) ) { + while (WeakSeparator(30 /* "|" */,8,7) ) { TokenTerm_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; @@ -443,8 +471,8 @@ void Parser::Set_NT(CharSet* &s) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, _SC("Set"), la->line); #endif SimSet_NT(s); - while (la->kind == 21 /* "+" */ || la->kind == 22 /* "-" */) { - if (la->kind == 21 /* "+" */) { + while (IsKind(la, 21 /* "+" */) || IsKind(la, 22 /* "-" */)) { + if (IsKind(la, 21 /* "+" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -469,7 +497,7 @@ void Parser::AttrDecl_NT(Symbol *sym) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, _SC("AttrDecl"), la->line); #endif - if (la->kind == 25 /* "<" */) { + if (IsKind(la, 26 /* "<" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -486,13 +514,13 @@ void Parser::AttrDecl_NT(Symbol *sym) { SemErr(_SC("bad string in attributes")); } } - Expect(26 /* ">" */); + Expect(27 /* ">" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else if (la->kind == 27 /* "<." */) { + } else if (IsKind(la, 28 /* "<." */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -509,13 +537,13 @@ void Parser::AttrDecl_NT(Symbol *sym) { SemErr(_SC("bad string in attributes")); } } - Expect(28 /* ".>" */); + Expect(29 /* ".>" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else SynErr(46); + } else SynErr(47); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -525,7 +553,7 @@ void Parser::SemText_NT(Position* &pos) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, _SC("SemText"), la->line); #endif - Expect(40 /* "(." */); + Expect(41 /* "(." */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif @@ -533,7 +561,7 @@ void Parser::SemText_NT(Position* &pos) { while (StartOf(13 /* alt */)) { if (StartOf(14 /* any */)) { Get(); - } else if (la->kind == _badString) { + } else if (IsKind(la, _badString)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -547,7 +575,7 @@ void Parser::SemText_NT(Position* &pos) { SemErr(_SC("missing end of previous semantic action")); } } - Expect(41 /* ".)" */); + Expect(42 /* ".)" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif @@ -564,7 +592,7 @@ void Parser::Expression_NT(Graph* &g) { #endif Term_NT(g); bool first = true; - while (WeakSeparator(29 /* "|" */,16,15) ) { + while (WeakSeparator(30 /* "|" */,16,15) ) { Term_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); delete g2; @@ -581,7 +609,7 @@ void Parser::SimSet_NT(CharSet* &s) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, _SC("SimSet"), la->line); #endif s = new CharSet(); - if (la->kind == _ident) { + if (IsKind(la, _ident)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -589,7 +617,7 @@ void Parser::SimSet_NT(CharSet* &s) { CharClass *c = tab->FindCharClass(t->val); if (c == NULL) SemErr(_SC("undefined name")); else s->Or(c->set); - } else if (la->kind == _string) { + } else if (IsKind(la, _string)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -608,10 +636,10 @@ void Parser::SimSet_NT(CharSet* &s) { } coco_string_delete(name); - } else if (la->kind == _char) { + } else if (IsKind(la, _char)) { Char_NT(n1); s->Set(n1); - if (la->kind == 23 /* ".." */) { + if (IsKind(la, 23 /* ".." */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -619,13 +647,13 @@ void Parser::SimSet_NT(CharSet* &s) { Char_NT(n2); for (int i = n1; i <= n2; i++) s->Set(i); } - } else if (la->kind == 24 /* "ANY" */) { + } else if (IsKind(la, 24 /* "ANY" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif delete s; s = new CharSet(); s->Fill(); - } else SynErr(47); + } else SynErr(48); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -660,14 +688,14 @@ void Parser::Sym_NT(wchar_t* &name, int &kind) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, _SC("Sym"), la->line); #endif name = coco_string_create(_SC("???")); kind = id; - if (la->kind == _ident) { + if (IsKind(la, _ident)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif kind = id; coco_string_delete(name); name = coco_string_create(t->val); - } else if (la->kind == _string || la->kind == _char) { - if (la->kind == _string) { + } else if (IsKind(la, _string) || IsKind(la, _char)) { + if (IsKind(la, _string)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -693,7 +721,7 @@ void Parser::Sym_NT(wchar_t* &name, int &kind) { } if (coco_string_indexof(name, ' ') >= 0) SemErr(_SC("literal tokens must not contain blanks")); - } else SynErr(48); + } else SynErr(49); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -705,7 +733,7 @@ void Parser::Term_NT(Graph* &g) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, _SC("Term"), la->line); #endif if (StartOf(17 /* opt */)) { - if (la->kind == 38 /* "IF" */) { + if (IsKind(la, 39 /* "IF" */)) { rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); Resolver_NT(rslv->pos); g = new Graph(rslv); @@ -719,7 +747,7 @@ void Parser::Term_NT(Graph* &g) { } } else if (StartOf(19 /* sem */)) { g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); - } else SynErr(49); + } else SynErr(50); if (g == NULL) // invalid start of Term g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); #ifdef PARSER_WITH_AST @@ -731,11 +759,11 @@ void Parser::Resolver_NT(Position* &pos) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, _SC("Resolver"), la->line); #endif - Expect(38 /* "IF" */); + Expect(39 /* "IF" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Expect(31 /* "(" */); + Expect(32 /* "(" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif @@ -748,15 +776,15 @@ void Parser::Resolver_NT(Position* &pos) { } void Parser::Factor_NT(Graph* &g) { - wchar_t* name = NULL; int kind; Position *pos; bool weak = false; + wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Factor, _SC("Factor"), la->line); #endif switch (la->kind) { - case _ident: case _string: case _char: case 30 /* "WEAK" */: { - if (la->kind == 30 /* "WEAK" */) { + case _ident: case _string: case _char: case 31 /* "WEAK" */: { + if (IsKind(la, 31 /* "WEAK" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -771,7 +799,7 @@ void Parser::Factor_NT(Graph* &g) { if (undef) { if (kind == id) sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt - else if (genScanner) { + else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production @@ -790,7 +818,7 @@ void Parser::Factor_NT(Graph* &g) { Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); - if (la->kind == 25 /* "<" */ || la->kind == 27 /* "<." */) { + if (IsKind(la, 26 /* "<" */) || IsKind(la, 28 /* "<." */)) { Attribs_NT(p); if (kind != id) SemErr(_SC("a literal must not have attributes")); } @@ -801,45 +829,45 @@ void Parser::Factor_NT(Graph* &g) { break; } - case 31 /* "(" */: { + case 32 /* "(" */: { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif Expression_NT(g); - Expect(32 /* ")" */); + Expect(33 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif break; } - case 33 /* "[" */: { + case 34 /* "[" */: { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif Expression_NT(g); - Expect(34 /* "]" */); + Expect(35 /* "]" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif tab->MakeOption(g); break; } - case 35 /* "{" */: { + case 36 /* "{" */: { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif Expression_NT(g); - Expect(36 /* "}" */); + Expect(37 /* "}" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif tab->MakeIteration(g); break; } - case 40 /* "(." */: { + case 41 /* "(." */: { SemText_NT(pos); Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); p->pos = pos; @@ -857,7 +885,7 @@ void Parser::Factor_NT(Graph* &g) { break; } - case 37 /* "SYNC" */: { + case 38 /* "SYNC" */: { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -867,7 +895,7 @@ void Parser::Factor_NT(Graph* &g) { break; } - default: SynErr(50); break; + default: SynErr(51); break; } if (g == NULL) // invalid start of Factor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); @@ -881,7 +909,7 @@ void Parser::Attribs_NT(Node *p) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, _SC("Attribs"), la->line); #endif - if (la->kind == 25 /* "<" */) { + if (IsKind(la, 26 /* "<" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -898,12 +926,12 @@ void Parser::Attribs_NT(Node *p) { SemErr(_SC("bad string in attributes")); } } - Expect(26 /* ">" */); + Expect(27 /* ">" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else if (la->kind == 27 /* "<." */) { + } else if (IsKind(la, 28 /* "<." */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -920,12 +948,12 @@ void Parser::Attribs_NT(Node *p) { SemErr(_SC("bad string in attributes")); } } - Expect(28 /* ".>" */); + Expect(29 /* ".>" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else SynErr(51); + } else SynErr(52); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -936,7 +964,7 @@ void Parser::Condition_NT() { bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, _SC("Condition"), la->line); #endif while (StartOf(20 /* alt */)) { - if (la->kind == 31 /* "(" */) { + if (IsKind(la, 32 /* "(" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -946,7 +974,7 @@ void Parser::Condition_NT() { Get(); } } - Expect(32 /* ")" */); + Expect(33 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif @@ -965,19 +993,19 @@ void Parser::TokenTerm_NT(Graph* &g) { TokenFactor_NT(g2); tab->MakeSequence(g, g2); delete g2; } - if (la->kind == 39 /* "CONTEXT" */) { + if (IsKind(la, 40 /* "CONTEXT" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Expect(31 /* "(" */); + Expect(32 /* "(" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif TokenExpr_NT(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); delete g2; - Expect(32 /* ")" */); + Expect(33 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif @@ -993,7 +1021,7 @@ void Parser::TokenFactor_NT(Graph* &g) { bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, _SC("TokenFactor"), la->line); #endif g = NULL; - if (la->kind == _ident || la->kind == _string || la->kind == _char) { + if (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { Sym_NT(name, kind); if (kind == id) { CharClass *c = tab->FindCharClass(name); @@ -1014,39 +1042,39 @@ void Parser::TokenFactor_NT(Graph* &g) { } coco_string_delete(name); - } else if (la->kind == 31 /* "(" */) { + } else if (IsKind(la, 32 /* "(" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif TokenExpr_NT(g); - Expect(32 /* ")" */); + Expect(33 /* ")" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 33 /* "[" */) { + } else if (IsKind(la, 34 /* "[" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif TokenExpr_NT(g); - Expect(34 /* "]" */); + Expect(35 /* "]" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); - } else if (la->kind == 35 /* "{" */) { + } else if (IsKind(la, 36 /* "{" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif TokenExpr_NT(g); - Expect(36 /* "}" */); + Expect(37 /* "}" */); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); - } else SynErr(52); + } else SynErr(53); if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); #ifdef PARSER_WITH_AST @@ -1155,7 +1183,7 @@ void Parser::Parse() { } Parser::Parser(Scanner *scanner) { - maxT = 42; + maxT = 43; ParserInitCaller::CallInit(this); dummyToken = NULL; @@ -1170,28 +1198,28 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[21][44] = { - {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, - {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {x,T,T,T, T,T,T,x, x,x,x,x, x,T,T,T, x,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,T, x,x,x,x, T,x,x,x, x,T,T,T, x,T,x,T, x,T,T,x, T,x,x,x}, - {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, - {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,x,T,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, T,x,x,x, x,T,T,T, T,T,T,T, T,T,T,x, T,x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,T,T, x,T,x,T, x,T,T,x, T,x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,T,T, x,T,x,T, x,T,x,x, T,x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,T,x,x, T,x,T,x, T,x,x,x, x,x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,T,T, T,T,T,T, T,T,T,x} + static const bool set[21][45] = { + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,x, x,x,x,x, x,T,T,T, x,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,T, x,x,x,x, T,x,x,x, x,x,T,T, T,x,T,x, T,x,T,T, x,T,x,x, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,x,T, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, T,x,x,x, x,x,T,T, T,T,T,T, T,T,T,T, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,x,T, T,x,T,x, T,x,T,T, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,x,T, T,x,T,x, T,x,T,x, x,T,x,x, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,T,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, x} }; @@ -1248,34 +1276,35 @@ void Errors::SynErr(int line, int col, int n) { case 22: s = _SC("\"-\" expected"); break; case 23: s = _SC("\"..\" expected"); break; case 24: s = _SC("\"ANY\" expected"); break; - case 25: s = _SC("\"<\" expected"); break; - case 26: s = _SC("\">\" expected"); break; - case 27: s = _SC("\"<.\" expected"); break; - case 28: s = _SC("\".>\" expected"); break; - case 29: s = _SC("\"|\" expected"); break; - case 30: s = _SC("\"WEAK\" expected"); break; - case 31: s = _SC("\"(\" expected"); break; - case 32: s = _SC("\")\" expected"); break; - case 33: s = _SC("\"[\" expected"); break; - case 34: s = _SC("\"]\" expected"); break; - case 35: s = _SC("\"{\" expected"); break; - case 36: s = _SC("\"}\" expected"); break; - case 37: s = _SC("\"SYNC\" expected"); break; - case 38: s = _SC("\"IF\" expected"); break; - case 39: s = _SC("\"CONTEXT\" expected"); break; - case 40: s = _SC("\"(.\" expected"); break; - case 41: s = _SC("\".)\" expected"); break; - case 42: s = _SC("??? expected"); break; - case 43: s = _SC("this symbol not expected in Coco"); break; - case 44: s = _SC("this symbol not expected in TokenDecl"); break; - case 45: s = _SC("invalid TokenDecl"); break; - case 46: s = _SC("invalid AttrDecl"); break; - case 47: s = _SC("invalid SimSet"); break; - case 48: s = _SC("invalid Sym"); break; - case 49: s = _SC("invalid Term"); break; - case 50: s = _SC("invalid Factor"); break; - case 51: s = _SC("invalid Attribs"); break; - case 52: s = _SC("invalid TokenFactor"); break; + case 25: s = _SC("\":\" expected"); break; + case 26: s = _SC("\"<\" expected"); break; + case 27: s = _SC("\">\" expected"); break; + case 28: s = _SC("\"<.\" expected"); break; + case 29: s = _SC("\".>\" expected"); break; + case 30: s = _SC("\"|\" expected"); break; + case 31: s = _SC("\"WEAK\" expected"); break; + case 32: s = _SC("\"(\" expected"); break; + case 33: s = _SC("\")\" expected"); break; + case 34: s = _SC("\"[\" expected"); break; + case 35: s = _SC("\"]\" expected"); break; + case 36: s = _SC("\"{\" expected"); break; + case 37: s = _SC("\"}\" expected"); break; + case 38: s = _SC("\"SYNC\" expected"); break; + case 39: s = _SC("\"IF\" expected"); break; + case 40: s = _SC("\"CONTEXT\" expected"); break; + case 41: s = _SC("\"(.\" expected"); break; + case 42: s = _SC("\".)\" expected"); break; + case 43: s = _SC("??? expected"); break; + case 44: s = _SC("this symbol not expected in Coco"); break; + case 45: s = _SC("this symbol not expected in TokenDecl"); break; + case 46: s = _SC("invalid TokenDecl"); break; + case 47: s = _SC("invalid AttrDecl"); break; + case 48: s = _SC("invalid SimSet"); break; + case 49: s = _SC("invalid Sym"); break; + case 50: s = _SC("invalid Term"); break; + case 51: s = _SC("invalid Factor"); break; + case 52: s = _SC("invalid Attribs"); break; + case 53: s = _SC("invalid TokenFactor"); break; default: { diff --git a/src/Parser.frame b/src/Parser.frame index 4163029..de13092 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -79,6 +79,7 @@ private: void SynErr(int n); void Get(); + bool IsKind(Token *t, int n); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); @@ -182,12 +183,22 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { +-->tbase + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -195,7 +206,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); diff --git a/src/Parser.h b/src/Parser.h index 4e3a600..dd236f4 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -79,8 +79,8 @@ class Parser { _string=3, _badString=4, _char=5, - _ddtSym=43, - _optionSym=44 + _ddtSym=44, + _optionSym=45, }; #ifdef PARSER_WITH_AST enum eNonTerminals{ @@ -112,6 +112,7 @@ class Parser { void SynErr(int n); void Get(); + bool IsKind(Token *t, int n); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 7e830b2..ce1237d 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -157,8 +157,9 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { for (int i=0; iterminals.Count; i++) { sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { - fputws(_SC("la->kind == "), gen); + fputws(_SC("IsKind(la, "), gen); WriteSymbolOrCode(gen, sym); + fputws(_SC(")"), gen); --n; if (n > 0) fputws(_SC(" || "), gen); } @@ -321,18 +322,21 @@ void ParserGen::GenTokensHeader() { if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } - else { fputws(_SC(",\n"), gen); } + else { fputws(_SC("\n"), gen); } - fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d,"), sym->name, sym->n); + if(sym->inherits) { + fwprintf(gen , _SC(" // INHERITS -> %") _SFMT, sym->inherits->name); + } } // pragmas for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } - else { fputws(_SC(",\n"), gen); } + else { fputws(_SC("\n"), gen); } sym = tab->pragmas[i]; - fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d,"), sym->name, sym->n); } fputws(_SC("\n\t};\n"), gen); @@ -363,6 +367,21 @@ void ParserGen::GenCodePragmas() { } } +void ParserGen::GenTokenBase() { + Symbol *sym; + fwprintf(gen, _SC("\tstatic const int tBase[%d] = {"), tab->terminals.Count); + + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; + if((i % 20) == 0) fputws(_SC("\n\t\t"), gen); + if (sym->inherits == NULL) + fputws(_SC("-1,"), gen); // not inherited + else + fwprintf(gen, _SC("%d,"), sym->inherits->n); + } + fputws(_SC("\n\t};\n"), gen); +} + void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { fwprintf(gen, _SC("%d /* %") _SFMT _SC(" */"), sym->n, sym->name); @@ -408,7 +427,7 @@ void ParserGen::GenProductions() { } void ParserGen::InitSets() { - fwprintf(gen, _SC("\tstatic bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); + fwprintf(gen, _SC("\tstatic const bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); for (int i = 0; i < symSet.Count; i++) { BitArray *s = symSet[i]; @@ -567,6 +586,7 @@ void ParserGen::WriteParser () { nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(_SC("-->pragmas")); GenCodePragmas(); + g.CopyFramePart(_SC("-->tbase")); GenTokenBase(); // write all tokens base types g.CopyFramePart(_SC("-->productions")); GenProductions(); g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%") _SFMT _SC("_NT();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); g.CopyFramePart(_SC("-->constants")); diff --git a/src/ParserGen.h b/src/ParserGen.h index 99d189c..55ee533 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -79,6 +79,7 @@ class ParserGen void GenCode(const Node *p, int indent, BitArray *isChecked); void GenTokens(); void GenTokensHeader(); + void GenTokenBase(); void GenPragmas(); void GenPragmasHeader(); void GenCodePragmas(); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 9b752d2..592cdfb 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -482,8 +482,8 @@ Scanner::~Scanner() { void Scanner::Init() { EOL = '\n'; eofSym = 0; - maxT = 42; - noSym = 42; + maxT = 43; + noSym = 43; int i; for (i = 65; i <= 90; ++i) start.set(i, 1); for (i = 95; i <= 95; ++i) start.set(i, 1); @@ -493,18 +493,19 @@ void Scanner::Init() { start.set(39, 5); start.set(36, 13); start.set(61, 16); - start.set(46, 31); + start.set(46, 32); start.set(43, 17); start.set(45, 18); - start.set(60, 32); - start.set(62, 20); - start.set(124, 23); - start.set(40, 33); - start.set(41, 24); - start.set(91, 25); - start.set(93, 26); - start.set(123, 27); - start.set(125, 28); + start.set(58, 20); + start.set(60, 33); + start.set(62, 21); + start.set(124, 24); + start.set(40, 34); + start.set(41, 25); + start.set(91, 26); + start.set(93, 27); + start.set(123, 28); + start.set(125, 29); start.set(Buffer::EoF, -1); keywords.set(_SC("COMPILER"), 6); keywords.set(_SC("IGNORECASE"), 7); @@ -520,10 +521,10 @@ void Scanner::Init() { keywords.set(_SC("PRODUCTIONS"), 17); keywords.set(_SC("END"), 20); keywords.set(_SC("ANY"), 24); - keywords.set(_SC("WEAK"), 30); - keywords.set(_SC("SYNC"), 37); - keywords.set(_SC("IF"), 38); - keywords.set(_SC("CONTEXT"), 39); + keywords.set(_SC("WEAK"), 31); + keywords.set(_SC("SYNC"), 38); + keywords.set(_SC("IF"), 39); + keywords.set(_SC("CONTEXT"), 40); tvalLength = 128; @@ -745,14 +746,14 @@ Token* Scanner::NextToken() { {t->kind = 5 /* char */; break;} case 10: case_10: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_10;} - else {t->kind = 43 /* ddtSym */; break;} + else {t->kind = 44 /* ddtSym */; break;} case 11: case_11: - recEnd = pos; recKind = 44 /* optionSym */; + recEnd = pos; recKind = 45 /* optionSym */; if ((ch >= _SC('-') && ch <= _SC('.')) || (ch >= _SC('0') && ch <= _SC(':')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_11;} - else {t->kind = 44 /* optionSym */; break;} + else {t->kind = 45 /* optionSym */; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_12;} @@ -761,21 +762,21 @@ Token* Scanner::NextToken() { else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} - else {t->kind = 43 /* ddtSym */; break;} + else {t->kind = 44 /* ddtSym */; break;} case 14: case_14: if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} else if (ch == _SC('=')) {AddCh(); goto case_11;} - else {t->kind = 43 /* ddtSym */; break;} + else {t->kind = 44 /* ddtSym */; break;} case 16: {t->kind = 18 /* "=" */; break;} case 17: @@ -786,45 +787,47 @@ Token* Scanner::NextToken() { case_19: {t->kind = 23 /* ".." */; break;} case 20: - {t->kind = 26 /* ">" */; break;} + {t->kind = 25 /* ":" */; break;} case 21: - case_21: - {t->kind = 27 /* "<." */; break;} + {t->kind = 27 /* ">" */; break;} case 22: case_22: - {t->kind = 28 /* ".>" */; break;} + {t->kind = 28 /* "<." */; break;} case 23: - {t->kind = 29 /* "|" */; break;} + case_23: + {t->kind = 29 /* ".>" */; break;} case 24: - {t->kind = 32 /* ")" */; break;} + {t->kind = 30 /* "|" */; break;} case 25: - {t->kind = 33 /* "[" */; break;} + {t->kind = 33 /* ")" */; break;} case 26: - {t->kind = 34 /* "]" */; break;} + {t->kind = 34 /* "[" */; break;} case 27: - {t->kind = 35 /* "{" */; break;} + {t->kind = 35 /* "]" */; break;} case 28: - {t->kind = 36 /* "}" */; break;} + {t->kind = 36 /* "{" */; break;} case 29: - case_29: - {t->kind = 40 /* "(." */; break;} + {t->kind = 37 /* "}" */; break;} case 30: case_30: - {t->kind = 41 /* ".)" */; break;} + {t->kind = 41 /* "(." */; break;} case 31: + case_31: + {t->kind = 42 /* ".)" */; break;} + case 32: recEnd = pos; recKind = 19 /* "." */; if (ch == _SC('.')) {AddCh(); goto case_19;} - else if (ch == _SC('>')) {AddCh(); goto case_22;} - else if (ch == _SC(')')) {AddCh(); goto case_30;} + else if (ch == _SC('>')) {AddCh(); goto case_23;} + else if (ch == _SC(')')) {AddCh(); goto case_31;} else {t->kind = 19 /* "." */; break;} - case 32: - recEnd = pos; recKind = 25 /* "<" */; - if (ch == _SC('.')) {AddCh(); goto case_21;} - else {t->kind = 25 /* "<" */; break;} case 33: - recEnd = pos; recKind = 31 /* "(" */; - if (ch == _SC('.')) {AddCh(); goto case_29;} - else {t->kind = 31 /* "(" */; break;} + recEnd = pos; recKind = 26 /* "<" */; + if (ch == _SC('.')) {AddCh(); goto case_22;} + else {t->kind = 26 /* "<" */; break;} + case 34: + recEnd = pos; recKind = 32 /* "(" */; + if (ch == _SC('.')) {AddCh(); goto case_30;} + else {t->kind = 32 /* "(" */; break;} } AppendVal(t); diff --git a/src/Symbol.cpp b/src/Symbol.cpp index 4869b57..b6a54c8 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -48,6 +48,7 @@ Symbol::Symbol(int typ, const wchar_t* name, int line, int col) { nts = NULL; attrPos = NULL; semPos = NULL; + inherits = NULL; this->typ = typ; this->name = coco_string_create(name); diff --git a/src/Symbol.h b/src/Symbol.h index cea13f8..e8543f5 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -60,7 +60,7 @@ class Symbol { Position *attrPos; // nt: position of attributes in source text (or null) Position *semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) - + Symbol *inherits; // optional, token from which this token derives Symbol(int typ, const wchar_t* name, int line, int col); virtual ~Symbol(); diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp index b1d9e03..c9a89c2 100644 --- a/src/TestSuite/TestAlts_Parser.cpp +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -133,25 +146,25 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b || la->kind == _c || la->kind == _e) { - if (la->kind == _b) { + } else if (IsKind(la, _b) || IsKind(la, _c) || IsKind(la, _e)) { + if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _c) { + } else if (IsKind(la, _c)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } else { } - } else if (la->kind == _d) { + } else if (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -282,7 +295,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][9] = { + static const bool set[1][9] = { {T,x,x,x, x,x,x,x, x} }; diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp index 51f2a5e..e739ede 100644 --- a/src/TestSuite/TestAny1_Parser.cpp +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -294,7 +307,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[2][12] = { + static const bool set[2][12] = { {T,x,x,x, x,x,x,x, x,x,x,x}, {x,T,T,T, T,T,T,T, T,T,T,x} }; diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp index 7c052e9..8c63422 100644 --- a/src/TestSuite/TestAny_Parser.cpp +++ b/src/TestSuite/TestAny_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -129,7 +142,7 @@ void Parser::A_NT() { bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif while (StartOf(1 /* alt */)) { - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -204,7 +217,7 @@ void Parser::D_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _i) { + } else if (IsKind(la, _i)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -331,7 +344,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[7][12] = { + static const bool set[7][12] = { {T,x,x,x, x,x,x,x, x,x,x,x}, {x,T,T,x, T,T,T,T, T,T,T,x}, {x,x,x,x, T,T,T,T, T,T,T,x}, diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp index 7517581..b92c10f 100644 --- a/src/TestSuite/TestCasing_Parser.cpp +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[7] = { + -1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -280,7 +293,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][8] = { + static const bool set[1][8] = { {T,x,x,x, x,x,x,x} }; diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp index 357840a..4a3fd3c 100644 --- a/src/TestSuite/TestChars_Parser.cpp +++ b/src/TestSuite/TestChars_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[3] = { + -1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -240,7 +253,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][4] = { + static const bool set[1][4] = { {T,x,x,x} }; diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp index 357840a..4a3fd3c 100644 --- a/src/TestSuite/TestComments_Parser.cpp +++ b/src/TestSuite/TestComments_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[3] = { + -1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -240,7 +253,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][4] = { + static const bool set[1][4] = { {T,x,x,x} }; diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp index 4e2b63e..c276c11 100644 --- a/src/TestSuite/TestDel_Parser.cpp +++ b/src/TestSuite/TestDel_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -136,19 +149,19 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } else if (StartOf(1 /* iter */)) { - while (la->kind == _e) { + while (IsKind(la, _e)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _f) { + if (IsKind(la, _f)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -164,24 +177,24 @@ void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif - while (la->kind == _b) { + while (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _c) { + if (IsKind(la, _c)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _d) { + if (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _EOF || la->kind == _g) { + } else if (IsKind(la, _EOF) || IsKind(la, _g)) { } else SynErr(12); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -205,7 +218,7 @@ void Parser::D_NT() { #endif if (StartOf(2 /* nt */)) { C_NT(); - } else if (la->kind == _h) { + } else if (IsKind(la, _h)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -332,7 +345,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[3][12] = { + static const bool set[3][12] = { {T,x,x,x, x,x,x,x, x,x,x,x}, {T,x,T,T, T,T,T,T, x,x,x,x}, {T,T,T,T, T,T,T,x, x,x,x,x} diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp index bf44ae1..0f816e1 100644 --- a/src/TestSuite/TestEps_Parser.cpp +++ b/src/TestSuite/TestEps_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -115,8 +128,8 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - if (la->kind == _a || la->kind == _b) { - if (la->kind == _a) { + if (IsKind(la, _a) || IsKind(la, _b)) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -127,13 +140,13 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _c || la->kind == _d || la->kind == _e) { - if (la->kind == _c) { + } else if (IsKind(la, _c) || IsKind(la, _d) || IsKind(la, _e)) { + if (IsKind(la, _c)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _e) { + } else if (IsKind(la, _e)) { sem } else { Get(); @@ -267,7 +280,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][9] = { + static const bool set[1][9] = { {T,x,x,x, x,x,x,x, x} }; diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp index b1998b6..60dc43e 100644 --- a/src/TestSuite/TestIters_Parser.cpp +++ b/src/TestSuite/TestIters_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -115,13 +128,13 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b || la->kind == _c) { - while (la->kind == _b) { + } else if (IsKind(la, _b) || IsKind(la, _c)) { + while (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -131,9 +144,9 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _d || la->kind == _e || la->kind == _i) { - while (la->kind == _d || la->kind == _e) { - while (la->kind == _d) { + } else if (IsKind(la, _d) || IsKind(la, _e) || IsKind(la, _i)) { + while (IsKind(la, _d) || IsKind(la, _e)) { + while (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -144,13 +157,13 @@ void Parser::Test_NT() { AstAddTerminal(); #endif } - } else if (la->kind == _f || la->kind == _h) { - while (la->kind == _f) { + } else if (IsKind(la, _f) || IsKind(la, _h)) { + while (IsKind(la, _f)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == _g) { + while (IsKind(la, _g)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -287,7 +300,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][12] = { + static const bool set[1][12] = { {T,x,x,x, x,x,x,x, x,x,x,x} }; diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp index 7b9898a..de3bed4 100644 --- a/src/TestSuite/TestLL1_Parser.cpp +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -131,12 +144,12 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _a || la->kind == _b || la->kind == _c) { + } else if (IsKind(la, _a) || IsKind(la, _b) || IsKind(la, _c)) { B_NT(); } else SynErr(11); #ifdef PARSER_WITH_AST @@ -148,9 +161,9 @@ void Parser::E_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); #endif - if (la->kind == _e || la->kind == _f) { + if (IsKind(la, _e) || IsKind(la, _f)) { F_NT(); - } else if (la->kind == _e) { + } else if (IsKind(la, _e)) { } else SynErr(12); Expect(_e); #ifdef PARSER_WITH_AST @@ -165,13 +178,13 @@ void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif - while (la->kind == _a) { + while (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _d) { + if (IsKind(la, _d)) { D_NT(); } B_NT(); @@ -184,14 +197,14 @@ void Parser::G_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); #endif - if (la->kind == _a || la->kind == _b) { + if (IsKind(la, _a) || IsKind(la, _b)) { if (eee) { - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b) { + } else if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -217,7 +230,7 @@ void Parser::H_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -248,25 +261,25 @@ void Parser::I_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); #endif - while (la->kind == _a) { + while (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } if (iii) { - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b) { + } else if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } else SynErr(14); - } else if (la->kind == _b) { + } else if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -287,14 +300,14 @@ void Parser::J_NT() { AstAddTerminal(); #endif } - while (la->kind == _a || la->kind == _b) { + while (IsKind(la, _a) || IsKind(la, _b)) { if (eee) { - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b) { + } else if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -320,18 +333,18 @@ void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif - while (la->kind == _b) { + while (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _c) { + if (IsKind(la, _c)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _a) { + } else if (IsKind(la, _a)) { } else SynErr(17); Expect(_a); #ifdef PARSER_WITH_AST @@ -350,7 +363,7 @@ void Parser::D_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - if (la->kind == _b) { + if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -365,7 +378,7 @@ void Parser::F_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); #endif - if (la->kind == _f) { + if (IsKind(la, _f)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -492,7 +505,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][12] = { + static const bool set[1][12] = { {T,x,x,x, x,x,x,x, x,x,x,x} }; diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp index b4c80c4..bbc1811 100644 --- a/src/TestSuite/TestOpts1_Parser.cpp +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -115,8 +128,8 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - if (la->kind == _a) { - if (la->kind == _a) { + if (IsKind(la, _a)) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -244,7 +257,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][9] = { + static const bool set[1][9] = { {T,x,x,x, x,x,x,x, x} }; diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp index c00bfb5..4a346f8 100644 --- a/src/TestSuite/TestOpts_Parser.cpp +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -115,13 +128,13 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b || la->kind == _c) { - if (la->kind == _b) { + } else if (IsKind(la, _b) || IsKind(la, _c)) { + if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -131,23 +144,23 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _EOF || la->kind == _e) { - if (la->kind == _e) { + } else if (IsKind(la, _EOF) || IsKind(la, _e)) { + if (IsKind(la, _e)) { Del_NT(); } - } else if (la->kind == _d) { + } else if (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - if (la->kind == _d || la->kind == _e || la->kind == _f) { - if (la->kind == _d) { + if (IsKind(la, _d) || IsKind(la, _e) || IsKind(la, _f)) { + if (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - if (la->kind == _e) { + if (IsKind(la, _e)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -168,7 +181,7 @@ void Parser::Del_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Del, _SC("Del"), la->line); #endif - if (la->kind == _e) { + if (IsKind(la, _e)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -295,7 +308,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][9] = { + static const bool set[1][9] = { {T,x,x,x, x,x,x,x, x} }; diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp index 2380683..723aebb 100644 --- a/src/TestSuite/TestResOK_Parser.cpp +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[6] = { + -1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -115,7 +128,7 @@ void Parser::Test_NT() { #ifdef PARSER_WITH_AST Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); #endif - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { A_NT(); B_NT(); C_NT(); @@ -124,7 +137,7 @@ void Parser::Test_NT() { F_NT(); G_NT(); H_NT(); - } else if (la->kind == _EOF || la->kind == 2 /* "b" */ || la->kind == 3 /* "c" */) { + } else if (IsKind(la, _EOF) || IsKind(la, 2 /* "b" */) || IsKind(la, 3 /* "c" */)) { I_NT(); } else SynErr(6); #ifdef PARSER_WITH_AST @@ -140,7 +153,7 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - while (la->kind == 2 /* "b" */) { + while (IsKind(la, 2 /* "b" */)) { if (true) { Expect(2 /* "b" */); #ifdef PARSER_WITH_AST @@ -169,7 +182,7 @@ void Parser::B_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -179,7 +192,7 @@ void Parser::B_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 2 /* "b" */) { + } else if (IsKind(la, 2 /* "b" */)) { } else SynErr(7); Expect(2 /* "b" */); #ifdef PARSER_WITH_AST @@ -203,7 +216,7 @@ void Parser::C_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 1 /* "a" */) { + } else if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -218,7 +231,7 @@ void Parser::D_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); #endif - while (la->kind == 1 /* "a" */) { + while (IsKind(la, 1 /* "a" */)) { if (true) { Expect(1 /* "a" */); #ifdef PARSER_WITH_AST @@ -248,13 +261,13 @@ void Parser::E_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); #endif - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 3 /* "c" */ || la->kind == 4 /* "d" */) { - if (la->kind == 3 /* "c" */) { + } else if (IsKind(la, 3 /* "c" */) || IsKind(la, 4 /* "d" */)) { + if (IsKind(la, 3 /* "c" */)) { if (true) { Expect(3 /* "c" */); #ifdef PARSER_WITH_AST @@ -271,7 +284,7 @@ void Parser::E_NT() { #endif } } - } else if (la->kind == 2 /* "b" */) { + } else if (IsKind(la, 2 /* "b" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -292,7 +305,7 @@ void Parser::F_NT() { #endif while (StartOf(1 /* alt */)) { if (true) { - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -302,7 +315,7 @@ void Parser::F_NT() { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 4 /* "d" */ || la->kind == 5 /* ??? */) { + } else if (IsKind(la, 4 /* "d" */) || IsKind(la, 5 /* ??? */)) { Get(); } else { Get(); @@ -332,18 +345,18 @@ void Parser::G_NT() { } while (bbb) { if (eee) { - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 2 /* "b" */) { + } else if (IsKind(la, 2 /* "b" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } else SynErr(10); - } else if (la->kind == 2 /* "b" */) { + } else if (IsKind(la, 2 /* "b" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -369,14 +382,14 @@ void Parser::H_NT() { AstAddTerminal(); #endif } - while (la->kind == 1 /* "a" */ || la->kind == 2 /* "b" */) { + while (IsKind(la, 1 /* "a" */) || IsKind(la, 2 /* "b" */)) { if (eee) { - if (la->kind == 1 /* "a" */) { + if (IsKind(la, 1 /* "a" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == 2 /* "b" */) { + } else if (IsKind(la, 2 /* "b" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -403,14 +416,14 @@ void Parser::I_NT() { bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); #endif if (aaa) { - if (la->kind == 2 /* "b" */) { + if (IsKind(la, 2 /* "b" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif } - } else if (la->kind == _EOF || la->kind == 3 /* "c" */) { - while (la->kind == 3 /* "c" */) { + } else if (IsKind(la, _EOF) || IsKind(la, 3 /* "c" */)) { + while (IsKind(la, 3 /* "c" */)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -538,7 +551,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[2][7] = { + static const bool set[2][7] = { {T,x,x,x, x,x,x}, {x,T,T,x, T,T,x} }; diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp index 01d6a14..ca16ffd 100644 --- a/src/TestSuite/TestSem_Parser.cpp +++ b/src/TestSuite/TestSem_Parser.cpp @@ -90,12 +90,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -103,7 +116,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -131,15 +144,15 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif - if (la->kind == _c) { + if (IsKind(la, _c)) { aaa Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _a || la->kind == _b) { + } else if (IsKind(la, _a) || IsKind(la, _b)) { bbb - } else if (la->kind == _d) { + } else if (IsKind(la, _d)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -156,7 +169,7 @@ void Parser::B_NT() { bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); #endif ddd - while (la->kind == _a) { + while (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -178,12 +191,12 @@ void Parser::C_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); #endif - if (la->kind == _a) { + if (IsKind(la, _a)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - } else if (la->kind == _b) { + } else if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -316,7 +329,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][12] = { + static const bool set[1][12] = { {T,x,x,x, x,x,x,x, x,x,x,x} }; diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp index a632726..ef02a97 100644 --- a/src/TestSuite/TestSync_Parser.cpp +++ b/src/TestSuite/TestSync_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -120,8 +133,8 @@ void Parser::Test_NT() { AstAddTerminal(); #endif while (!(StartOf(1 /* sync */))) {SynErr(11); Get();} - while (la->kind == _b || la->kind == _c) { - if (la->kind == _b) { + while (IsKind(la, _b) || IsKind(la, _c)) { + if (IsKind(la, _b)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -147,8 +160,8 @@ void Parser::A_NT() { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); #endif - while (!(la->kind == _EOF || la->kind == _e || la->kind == _g)) {SynErr(12); Get();} - if (la->kind == _e) { + while (!(IsKind(la, _EOF) || IsKind(la, _e) || IsKind(la, _g))) {SynErr(12); Get();} + if (IsKind(la, _e)) { Get(); #ifdef PARSER_WITH_AST AstAddTerminal(); @@ -283,7 +296,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[2][12] = { + static const bool set[2][12] = { {T,x,T,T, T,T,x,T, x,x,x,x}, {T,x,T,T, T,x,x,x, x,x,x,x} }; diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp index 39454fb..e65023d 100644 --- a/src/TestSuite/TestTokens_Parser.cpp +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[13] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -272,7 +285,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[1][14] = { + static const bool set[1][14] = { {T,x,x,x, x,x,x,x, x,x,x,x, x,x} }; diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp index 896576a..95453e4 100644 --- a/src/TestSuite/TestWeak_Parser.cpp +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -87,12 +87,25 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -100,7 +113,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -299,7 +312,7 @@ bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[5][12] = { + static const bool set[5][12] = { {T,x,x,x, x,x,x,x, x,x,x,x}, {T,x,x,T, x,x,x,x, x,x,x,x}, {x,x,x,x, T,x,x,x, x,x,x,x}, From 1e5715c6e66481e577e8bc0774b939de3f40d09d Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Sat, 14 Aug 2021 11:40:57 +0200 Subject: [PATCH 86/95] Add the extra features description from last commits --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b2353d4..47e426e 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ And this are my main modifications to the original: - Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro -- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) +- Add the `_NT` suffix to non terminal functions to prevent name collision +- Add token inheritance from https://github.com/Lercher/CocoR See also https://github.com/mingodad/CocoR-Java and https://github.com/mingodad/CocoR-CSharp From 9182e47e4da63cfb7d579873b3570250bcb602f5 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 4 Sep 2021 10:55:18 +0200 Subject: [PATCH 87/95] Add column info to non terminals --- src/Coco.atg | 5 +++-- src/Parser.cpp | 1 + src/TestSuite/TestAny_Trace.txt | 2 +- src/TestSuite/TestCircular_Trace.txt | 8 ++++---- src/TestSuite/TestDel_Trace.txt | 10 ++++----- src/TestSuite/TestIters_Trace.txt | 16 +++++++-------- src/TestSuite/TestLL1_Trace.txt | 24 +++++++++++----------- src/TestSuite/TestOpts1_Trace.txt | 4 ++-- src/TestSuite/TestOpts_Trace.txt | 16 +++++++-------- src/TestSuite/TestResIllegal_Trace.txt | 4 ++-- src/TestSuite/TestResOK_Trace.txt | 28 +++++++++++++------------- src/TestSuite/TestSem_Trace.txt | 2 +- src/TestSuite/TestSync_Trace.txt | 4 ++-- src/TestSuite/TestWeak_Trace.txt | 4 ++-- src/TestSuite/check.sh | 1 + src/TestSuite/checkerr.sh | 1 + 16 files changed, 67 insertions(+), 63 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 924c235..98c93e1 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -155,8 +155,9 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(_SC("name declared twice")); - } else SemErr(_SC("this symbol kind not allowed on left side of production")); - sym->line = t->line; + } else SemErr(_SC("this symbol kind not allowed on left side of production")); + sym->line = t->line; + sym->col = t->col; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; diff --git a/src/Parser.cpp b/src/Parser.cpp index e8c5a35..b261db3 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -270,6 +270,7 @@ void Parser::Coco_NT() { if (sym->graph != NULL) SemErr(_SC("name declared twice")); } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; + sym->col = t->col; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; diff --git a/src/TestSuite/TestAny_Trace.txt b/src/TestSuite/TestAny_Trace.txt index 47d6015..bceba58 100644 --- a/src/TestSuite/TestAny_Trace.txt +++ b/src/TestSuite/TestAny_Trace.txt @@ -14,7 +14,7 @@ Graph nodes: 8 alt -11 10 6 0 9 t b -11 21 10 alt -11 0 9 21 - 11 iter 12 0 7 0 + 11 iter 12 0 7 21 12 t c 0 21 13 any 14 0 14 t d -15 22 diff --git a/src/TestSuite/TestCircular_Trace.txt b/src/TestSuite/TestCircular_Trace.txt index a1d767d..9d5f7b5 100644 --- a/src/TestSuite/TestCircular_Trace.txt +++ b/src/TestSuite/TestCircular_Trace.txt @@ -11,18 +11,18 @@ Graph nodes: 5 alt 0 6 2 21 6 alt 0 0 4 21 7 t b -9 22 - 8 opt 9 0 7 0 + 8 opt 9 0 7 22 9 nt C 12 22 10 t c 11 22 11 nt C -12 22 - 12 iter 0 0 10 0 + 12 iter 0 0 10 22 13 nt D 14 23 14 nt A 16 23 15 t d 0 23 - 16 opt 0 0 15 0 + 16 opt 0 0 15 23 17 t e 18 24 18 t f 0 24 - 19 opt 0 0 17 0 + 19 opt 0 0 17 24 First & follow symbols: diff --git a/src/TestSuite/TestDel_Trace.txt b/src/TestSuite/TestDel_Trace.txt index 78c61d7..9088d92 100644 --- a/src/TestSuite/TestDel_Trace.txt +++ b/src/TestSuite/TestDel_Trace.txt @@ -12,15 +12,15 @@ Graph nodes: 6 nt D 0 20 7 t a 0 21 8 t e -9 21 - 9 iter 11 0 8 0 + 9 iter 11 0 8 21 10 t f 0 21 - 11 opt 0 0 10 0 + 11 opt 0 0 10 21 12 alt 0 13 7 21 - 13 alt 0 0 9 0 + 13 alt 0 0 9 21 14 t b -15 22 - 15 iter 17 0 14 0 + 15 iter 17 0 14 22 16 t c -20 22 - 17 opt 20 0 16 0 + 17 opt 20 0 16 22 18 t d 0 22 19 eps 0 0 20 alt 0 21 18 22 diff --git a/src/TestSuite/TestIters_Trace.txt b/src/TestSuite/TestIters_Trace.txt index f700c0a..384965d 100644 --- a/src/TestSuite/TestIters_Trace.txt +++ b/src/TestSuite/TestIters_Trace.txt @@ -6,21 +6,21 @@ Graph nodes: 0 eps 0 0 1 t a -18 20 2 t b -3 20 - 3 iter 4 0 2 0 + 3 iter 4 0 2 20 4 t c -18 20 5 alt 18 6 1 20 - 6 alt -18 11 3 0 + 6 alt -18 11 3 20 7 t d -8 20 - 8 iter 9 0 7 0 + 8 iter 9 0 7 20 9 t e -10 20 - 10 iter -18 0 8 0 - 11 alt -18 17 10 0 + 10 iter -18 0 8 20 + 11 alt -18 17 10 20 12 t f 14 20 13 t g -14 20 - 14 iter -15 0 13 0 - 15 iter 16 0 12 0 + 14 iter -15 0 13 20 + 15 iter 16 0 12 20 16 t h -18 20 - 17 alt -18 0 15 0 + 17 alt -18 0 15 20 18 t i 0 20 diff --git a/src/TestSuite/TestLL1_Trace.txt b/src/TestSuite/TestLL1_Trace.txt index 0a76722..0e403dc 100644 --- a/src/TestSuite/TestLL1_Trace.txt +++ b/src/TestSuite/TestLL1_Trace.txt @@ -16,27 +16,27 @@ Graph nodes: 10 alt 0 11 8 21 11 alt 0 0 9 21 12 t b -13 22 - 13 iter 16 0 12 0 + 13 iter 16 0 12 22 14 t c -18 22 15 eps -18 0 16 alt 18 17 14 22 17 alt -18 0 15 0 18 t a 0 22 19 t a -20 23 - 20 iter 22 0 19 0 + 20 iter 22 0 19 23 21 nt D -23 23 - 22 opt 23 0 21 0 + 22 opt 23 0 21 23 23 nt B 0 23 24 t d 26 24 25 t b 0 24 - 26 opt 0 0 25 0 + 26 opt 0 0 25 24 27 nt F -31 25 28 eps -31 0 29 alt 31 30 27 25 30 alt -31 0 28 0 31 t e 0 25 32 t f 0 26 - 33 opt 0 0 32 0 + 33 opt 0 0 32 26 34 rslv 37 27 35 t a -43 27 36 t b -43 27 @@ -45,19 +45,19 @@ Graph nodes: 39 t b -43 28 40 alt -43 41 34 27 41 alt -43 0 39 28 - 42 opt 43 0 40 0 + 42 opt 43 0 40 27 43 t a 0 30 44 t a -48 31 - 45 opt 48 0 44 0 + 45 opt 48 0 44 31 46 rslv 47 31 47 t a -51 31 - 48 opt 51 0 46 0 + 48 opt 51 0 46 31 49 rslv 50 31 50 t a -52 31 - 51 opt 52 0 49 0 + 51 opt 52 0 49 31 52 t a 0 31 53 t a -54 32 - 54 iter 61 0 53 0 + 54 iter 61 0 53 32 55 rslv 58 32 56 t a 0 32 57 t b 0 32 @@ -68,7 +68,7 @@ Graph nodes: 62 alt 0 0 60 33 63 rslv 64 36 64 t a -65 36 - 65 iter 74 0 63 0 + 65 iter 74 0 63 36 66 rslv 69 37 67 t a -74 37 68 t b -74 37 @@ -77,7 +77,7 @@ Graph nodes: 71 t b -74 38 72 alt -74 73 66 37 73 alt -74 0 71 38 - 74 iter 75 0 72 0 + 74 iter 75 0 72 37 75 t a 0 39 diff --git a/src/TestSuite/TestOpts1_Trace.txt b/src/TestSuite/TestOpts1_Trace.txt index 80553fd..5d62b0b 100644 --- a/src/TestSuite/TestOpts1_Trace.txt +++ b/src/TestSuite/TestOpts1_Trace.txt @@ -5,8 +5,8 @@ Graph nodes: ---------------------------------------------------- 0 eps 0 0 1 t a 0 17 - 2 opt 0 0 1 0 - 3 opt 0 0 2 0 + 2 opt 0 0 1 17 + 3 opt 0 0 2 17 First & follow symbols: diff --git a/src/TestSuite/TestOpts_Trace.txt b/src/TestSuite/TestOpts_Trace.txt index c48310e..eb33639 100644 --- a/src/TestSuite/TestOpts_Trace.txt +++ b/src/TestSuite/TestOpts_Trace.txt @@ -6,23 +6,23 @@ Graph nodes: 0 eps 0 0 1 t a 0 17 2 t b -4 17 - 3 opt 4 0 2 0 + 3 opt 4 0 2 17 4 t c 0 17 5 alt 0 6 1 17 - 6 alt 0 9 3 0 + 6 alt 0 9 3 17 7 nt Del 0 17 - 8 opt 0 0 7 0 - 9 alt 0 17 8 0 + 8 opt 0 0 7 17 + 9 alt 0 17 8 17 10 t d 16 17 11 t d -14 17 - 12 opt 14 0 11 0 + 12 opt 14 0 11 17 13 t e -15 17 - 14 opt 15 0 13 0 + 14 opt 15 0 13 17 15 t f 0 17 - 16 opt 0 0 12 0 + 16 opt 0 0 12 17 17 alt 0 0 10 17 18 t e 0 18 - 19 opt 0 0 18 0 + 19 opt 0 0 18 18 First & follow symbols: diff --git a/src/TestSuite/TestResIllegal_Trace.txt b/src/TestSuite/TestResIllegal_Trace.txt index aac55e3..6ac64dc 100644 --- a/src/TestSuite/TestResIllegal_Trace.txt +++ b/src/TestSuite/TestResIllegal_Trace.txt @@ -26,7 +26,7 @@ Graph nodes: 20 t "d" 0 13 21 rslv 22 15 22 t "d" -23 15 - 23 iter 24 0 21 0 + 23 iter 24 0 21 15 24 t "e" 0 15 25 t "d" 32 18 26 t "d" -32 19 @@ -35,7 +35,7 @@ Graph nodes: 29 t "b" -32 20 30 alt -32 31 26 19 31 alt -32 0 27 20 - 32 iter 33 0 30 0 + 32 iter 33 0 30 19 33 t "a" 0 21 34 rslv 35 24 35 t "a" 0 24 diff --git a/src/TestSuite/TestResOK_Trace.txt b/src/TestSuite/TestResOK_Trace.txt index fa189d4..c71f49e 100644 --- a/src/TestSuite/TestResOK_Trace.txt +++ b/src/TestSuite/TestResOK_Trace.txt @@ -19,9 +19,9 @@ Graph nodes: 13 rslv 14 7 14 t "b" 15 7 15 t "c" -17 7 - 16 opt 17 0 13 0 + 16 opt 17 0 13 7 17 t "b" -18 7 - 18 iter 19 0 16 0 + 18 iter 19 0 16 7 19 t "c" 0 7 20 t "a" -27 10 21 rslv 22 11 @@ -43,7 +43,7 @@ Graph nodes: 37 t "b" -40 19 38 alt -40 39 34 18 39 alt -40 0 36 19 - 40 iter 41 0 38 0 + 40 iter 41 0 38 18 41 t "c" 0 20 42 t "a" -54 23 43 rslv 44 24 @@ -52,26 +52,26 @@ Graph nodes: 46 t "b" -54 25 47 alt -54 48 43 24 48 alt -54 0 45 25 - 49 opt -54 0 47 0 + 49 opt -54 0 47 24 50 alt 54 51 42 23 - 51 alt -54 53 49 0 + 51 alt -54 53 49 24 52 t "b" -54 27 53 alt -54 0 52 27 54 t "d" 0 28 55 rslv 57 31 56 t "a" -58 31 - 57 opt 58 0 56 0 + 57 opt 58 0 56 31 58 t "b" -64 31 59 any -64 0 60 alt -64 61 55 31 61 alt -64 63 59 0 62 t "a" -64 33 63 alt -64 0 62 33 - 64 iter 65 0 60 0 + 64 iter 65 0 60 31 65 t "c" 0 34 66 rslv 67 37 67 t "a" -68 37 - 68 iter 78 0 66 0 + 68 iter 78 0 66 37 69 rslv 76 38 70 rslv 73 39 71 t "a" -78 39 @@ -81,11 +81,11 @@ Graph nodes: 75 t "b" -78 40 76 alt -78 77 70 39 77 alt -78 0 75 40 - 78 iter 79 0 69 0 + 78 iter 79 0 69 38 79 t "a" 0 42 80 rslv 81 45 81 t "a" -82 45 - 82 iter 91 0 80 0 + 82 iter 91 0 80 45 83 rslv 86 46 84 t "a" -91 46 85 t "b" -91 46 @@ -94,15 +94,15 @@ Graph nodes: 88 t "b" -91 47 89 alt -91 90 83 46 90 alt -91 0 88 47 - 91 iter 92 0 89 0 + 91 iter 92 0 89 46 92 t "c" 0 48 93 rslv 95 51 94 t "b" 0 51 - 95 opt 0 0 94 0 + 95 opt 0 0 94 51 96 t "c" -97 52 - 97 iter 0 0 96 0 + 97 iter 0 0 96 52 98 alt 0 99 93 51 - 99 alt 0 0 97 0 + 99 alt 0 0 97 52 First & follow symbols: diff --git a/src/TestSuite/TestSem_Trace.txt b/src/TestSuite/TestSem_Trace.txt index baad7f7..66b8c15 100644 --- a/src/TestSuite/TestSem_Trace.txt +++ b/src/TestSuite/TestSem_Trace.txt @@ -18,7 +18,7 @@ Graph nodes: 12 sem 15 588 0 13 t a 14 31 14 sem -15 602 0 - 15 iter 16 0 13 0 + 15 iter 16 0 13 31 16 sem 17 613 0 17 t b 18 31 18 sem 0 625 0 diff --git a/src/TestSuite/TestSync_Trace.txt b/src/TestSuite/TestSync_Trace.txt index 2f001ac..13a77d9 100644 --- a/src/TestSuite/TestSync_Trace.txt +++ b/src/TestSuite/TestSync_Trace.txt @@ -10,13 +10,13 @@ Graph nodes: 4 t c -7 20 5 alt -7 6 3 20 6 alt -7 0 4 20 - 7 iter 8 0 5 0 + 7 iter 8 0 5 20 8 t d 9 20 9 nt A 0 20 10 sync 13 0 11 t e 12 21 12 t f -14 21 - 13 opt 14 0 11 0 + 13 opt 14 0 11 21 14 t g 0 21 diff --git a/src/TestSuite/TestWeak_Trace.txt b/src/TestSuite/TestWeak_Trace.txt index 78a3e9b..5601707 100644 --- a/src/TestSuite/TestWeak_Trace.txt +++ b/src/TestSuite/TestWeak_Trace.txt @@ -13,11 +13,11 @@ Graph nodes: 7 t a 10 22 8 wt b 9 22 9 t c -10 22 - 10 iter 11 0 8 0 + 10 iter 11 0 8 22 11 t d 0 22 12 t a 14 23 13 wt b -14 23 - 14 iter 15 0 13 0 + 14 iter 15 0 13 23 15 t c 0 23 diff --git a/src/TestSuite/check.sh b/src/TestSuite/check.sh index 20e487b..994457b 100755 --- a/src/TestSuite/check.sh +++ b/src/TestSuite/check.sh @@ -2,6 +2,7 @@ #myvalgrind --leak-check=full ../Coco -frames .. $1.ATG > output.txt +#cp trace.txt $1_Trace.txt if cmp trace.txt $1_Trace.txt then echo $1_Trace passed diff --git a/src/TestSuite/checkerr.sh b/src/TestSuite/checkerr.sh index 37dc613..e71ea9f 100755 --- a/src/TestSuite/checkerr.sh +++ b/src/TestSuite/checkerr.sh @@ -2,6 +2,7 @@ #myvalgrind --leak-check=full ../Coco -frames .. $1.ATG > output.txt +#cp trace.txt $1_Trace.txt if cmp trace.txt $1_Trace.txt then echo $1_Trace passed From f3b3e150d47c37f276873f6255e09d289d750f2e Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 4 Sep 2021 11:01:08 +0200 Subject: [PATCH 88/95] Add missing code for proper handling token inheritance --- src/ParserGen.cpp | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index ce1237d..fa4c4a0 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -169,8 +169,9 @@ void ParserGen::GenCond (const BitArray *s, const Node *p) { } } -void ParserGen::PutCaseLabels (const BitArray *s) { +void ParserGen::PutCaseLabels (const BitArray *s0) { Symbol *sym; + BitArray *s = DerivationsOf(s0); for (int i=0; iterminals.Count; i++) { sym = tab->terminals[i]; if ((*s)[sym->n]) { @@ -179,6 +180,28 @@ void ParserGen::PutCaseLabels (const BitArray *s) { fputws(_SC(": "), gen); } } + delete s; +} + +BitArray *ParserGen::DerivationsOf(const BitArray *s0) { + BitArray *s = s0->Clone(); + bool done = false; + while (!done) { + done = true; + for (int i=0; iterminals.Count; i++) { + Symbol *sym = tab->terminals[i]; + if ((*s)[sym->n]) { + for (int i=0; iterminals.Count; i++) { + Symbol *baseSym = tab->terminals[i]; + if (baseSym->inherits == sym && !(*s)[baseSym->n]) { + s->Set(baseSym->n, true); + done = false; + } + } + } + } + } + return s; } void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { @@ -430,7 +453,7 @@ void ParserGen::InitSets() { fwprintf(gen, _SC("\tstatic const bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); for (int i = 0; i < symSet.Count; i++) { - BitArray *s = symSet[i]; + BitArray *s = DerivationsOf(symSet[i]); fputws(_SC("\t\t{"), gen); int j = 0; Symbol *sym; @@ -441,6 +464,7 @@ void ParserGen::InitSets() { if (j%4 == 0) fputws(_SC(" "), gen); } if (i == symSet.Count-1) fputws(_SC("x}\n"), gen); else fputws(_SC("x},\n"), gen); + delete s; } fputws(_SC("\t};\n\n"), gen); } From f3f29f516ed20fa4cb925edc209299bfd8719a0d Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 25 Dec 2021 11:52:39 +0100 Subject: [PATCH 89/95] Fix railroad EBNF generation and other fixes --- src/DFA.cpp | 18 +++++++++++------- src/Node.cpp | 19 ++++++++++--------- src/Node.h | 11 ++++++----- src/ParserGen.cpp | 37 ++++++++++++++++++++++--------------- src/ParserGen.h | 3 ++- src/State.cpp | 21 +++++++++++---------- src/State.h | 21 +++++++++++---------- src/Tab.cpp | 31 ++++++++++++++++++++++++++----- src/Tab.h | 3 +++ 9 files changed, 102 insertions(+), 62 deletions(-) diff --git a/src/DFA.cpp b/src/DFA.cpp index b018596..354fdd8 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -163,6 +163,10 @@ State* DFA::TheState(const Node *p) { else return p->state; } +static bool IsIterOpt(Node *p) { + return p->rmin == 0 && p->rmax == 1; +} + void DFA::Step(State *from, const Node *p, BitArray *stepped) { if (p == NULL) return; stepped->Set(p->n, true); @@ -536,7 +540,7 @@ void DFA::GenComBody(const Comment *com) { fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), res); if (imaxStop == 0) { - fwprintf(gen, _SC("%s"), + fwprintf(gen, _SC("%s"), "\t\t\t\tlevel--;\n" "\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n" "\t\t\t\tNextCh();\n"); @@ -751,6 +755,11 @@ void DFA::WriteState(const State *state) { fputws(_SC("goto case_0;}\n"), gen); } else { fwprintf(gen, _SC("t->kind = %d /* %") _SFMT _SC(" */; "), endOf->n, endOf->name); + if(endOf->semPos && endOf->typ == Node::t) { + fputws(_SC(" {"), gen); + CopySourcePart(endOf->semPos, 0); + fputws(_SC("}"), gen); + } if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); break;}\n"); @@ -758,11 +767,6 @@ void DFA::WriteState(const State *state) { fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); break;}\n"); } } else { - if(endOf->semPos && endOf->typ == Node::t) { - fputws(_SC(" {"), gen); - CopySourcePart(endOf->semPos, 0); - fputws(_SC("}"), gen); - } fputws(_SC(" break;}\n"), gen); } } @@ -797,7 +801,7 @@ void DFA::WriteScanner() { // Header g.GenCopyright(); g.SkipFramePart(_SC("-->begin")); - + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); diff --git a/src/Node.cpp b/src/Node.cpp index 7688cf0..d15fd36 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -43,6 +43,7 @@ Node::Node(int typ, Symbol *sym, int line, int col) { this->set = NULL; this->pos = NULL; this->state = NULL; + this->rmin = this->rmax = 0; this->typ = typ; this->sym = sym; this->line = line; this->col = col; } diff --git a/src/Node.h b/src/Node.h index 34cc11a..4453969 100644 --- a/src/Node.h +++ b/src/Node.h @@ -68,19 +68,20 @@ class Node { Node *next; // to successor node Node *down; // alt: to next alternative Node *sub; // alt, iter, opt: to first node of substructure - bool up; // true: "next" leads to successor in enclosing structure + bool up; // true: "next" leads to successor in enclosing structure Symbol *sym; // nt, t, wt: symbol represented by this node int val; // chr: ordinal character value - // clas: index of character class + // clas: index of character class int code; // chr, clas: transition code BitArray *set; // any, sync: the set represented by this node Position *pos; // nt, t, wt: pos of actual attributes - // sem: pos of semantic action in source text - // rslv: pos of resolver in source text + // sem: pos of semantic action in source text + // rslv: pos of resolver in source text int line; // source text line number of item in this node int col; // source text line column number of item in this node State *state; // DFA state corresponding to this node - // (only used in DFA.ConvertToStates) + // (only used in DFA.ConvertToStates) + int rmin, rmax; // repetition quantifiers Node(int typ, Symbol *sym, int line, int col); ~Node(); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index fa4c4a0..9fc9175 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -469,15 +469,15 @@ void ParserGen::InitSets() { fputws(_SC("\t};\n\n"), gen); } -int ParserGen::GenCodeRREBNF (const Node *p) { - int rc = 0; +int ParserGen::GenCodeRREBNF (const Node *p, int depth) { + int rc = 0, loop_count = 0; const Node *p2; while (p != NULL) { switch (p->typ) { case Node::nt: case Node::t: { - fputws(p->sym->name, gen); fputws(_SC(" "), gen); + fputws(p->sym->name, gen); ++rc; break; } @@ -485,7 +485,7 @@ int ParserGen::GenCodeRREBNF (const Node *p) { break; } case Node::any: { - fputws(_SC("ANY "), gen); + fputws(_SC(" ANY"), gen); break; } case Node::eps: break; // nothing @@ -497,30 +497,37 @@ int ParserGen::GenCodeRREBNF (const Node *p) { break; } case Node::alt: { - fputws(_SC("( "), gen); + bool need_close_alt = false; + if(depth > 0 || loop_count || p->next) { + fputws(" (", gen); + need_close_alt = true; + } p2 = p; while (p2 != NULL) { - rc += GenCodeRREBNF(p2->sub); + rc += GenCodeRREBNF(p2->sub, depth+1); p2 = p2->down; - if(p2 != NULL) fputws(_SC("| "), gen); + if(p2 != NULL) fputws(_SC(" |"), gen); } - fputws(_SC(") "), gen); + if(need_close_alt) fputws(_SC(" )"), gen); break; } case Node::iter: { - fputws(_SC("( "), gen); - rc += GenCodeRREBNF(p->sub); - fputws(_SC(")* "), gen); + if(p->sub->up == 0) fputws(_SC(" ("), gen); + rc += GenCodeRREBNF(p->sub, depth+1); + if(p->sub->up == 0) fputws(_SC(" )"), gen); + fputws(_SC("*"), gen); break; } case Node::opt: - fputws(_SC("( "), gen); - rc += GenCodeRREBNF(p->sub); - fputws(_SC(")? "), gen); + if(p->sub->up == 0) fputws(_SC(" ("), gen); + rc += GenCodeRREBNF(p->sub, depth+1); + if(p->sub->up == 0) fputws(_SC(" )"), gen); + fputws(_SC("?"), gen); break; } if (p->up) break; p = p->next; + ++loop_count; } return rc; } @@ -535,7 +542,7 @@ void ParserGen::WriteRREBNF () { for (int i=0; inonterminals.Count; i++) { sym = tab->nonterminals[i]; fwprintf(gen, _SC("%s ::= "), sym->name); - if(GenCodeRREBNF(sym->graph) == 0) { + if(GenCodeRREBNF(sym->graph, 0) == 0) { fputws(_SC("\"\?\?()\?\?\""), gen); } fputws(_SC("\n"), gen); diff --git a/src/ParserGen.h b/src/ParserGen.h index 55ee533..b196870 100644 --- a/src/ParserGen.h +++ b/src/ParserGen.h @@ -76,6 +76,7 @@ class ParserGen int NewCondSet(const BitArray *s); void GenCond(const BitArray *s, const Node *p); void PutCaseLabels(const BitArray *s); + BitArray *DerivationsOf(const BitArray *s); void GenCode(const Node *p, int indent, BitArray *isChecked); void GenTokens(); void GenTokensHeader(); @@ -87,7 +88,7 @@ class ParserGen void GenProductionsHeader(); void InitSets(); void OpenGen(const wchar_t* genName, bool backUp); - int GenCodeRREBNF(const Node *p); + int GenCodeRREBNF(const Node *p, int depth=0); void WriteRREBNF(); void WriteParser(); void WriteStatistics(); diff --git a/src/State.cpp b/src/State.cpp index 09111f9..42e43eb 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -36,6 +36,7 @@ State::State() { this->endOf = NULL; this->ctx = false; this->next = NULL; + this->rmin = this->rmax = 0; } State::~State() { @@ -54,7 +55,7 @@ void State::AddAction(Action *act) { else { lasta->next = act; } -} +} bool State::DetachAction(Action *act) { Action *lasta = NULL, *a = firstAction; diff --git a/src/State.h b/src/State.h index 23e994d..3073e22 100644 --- a/src/State.h +++ b/src/State.h @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -43,7 +43,8 @@ class State // state of finite automaton Symbol *endOf; // recognized token if state is final bool ctx; // true if state is reached via contextTrans State *next; - + int rmin, rmax; // repetition quantifiers + State(); ~State(); void AddAction(Action *act); diff --git a/src/Tab.cpp b/src/Tab.cpp index 4fc6ec8..1bd8667 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -236,9 +236,17 @@ void Tab::MakeSequence(Graph *g1, Graph *g2) { g1->r = g2->r; } -void Tab::MakeIteration(Graph *g) { - g->l = NewNode(Node::iter, g->l); +void Tab::MakeOptIter(Graph *g, int typ) { + int line = g->l->line; + int col = g->l->col; + g->l = NewNode(typ, g->l); + g->l->line = line; + g->l->col = col; g->r->up = true; +} + +void Tab::MakeIteration(Graph *g) { + MakeOptIter(g, Node::iter); Node *p = g->r; g->r = g->l; while (p != NULL) { @@ -248,12 +256,25 @@ void Tab::MakeIteration(Graph *g) { } void Tab::MakeOption(Graph *g) { - g->l = NewNode(Node::opt, g->l); - g->r->up = true; + MakeOptIter(g, Node::opt); g->l->next = g->r; g->r = g->l; } +void Tab::MakeRepetition(Graph *g, int rmin, int rmax) { + bool isOption = (rmin == 0 && rmax == 1); + MakeOptIter(g, Node::iter); + if(isOption) g->l->next = g->r; + Node *p = g->r; + g->r = g->l; + if(!isOption) { + while (p != NULL) { + Node *q = p->next; p->next = g->l; + p = q; + } + } +} + void Tab::Finish(Graph *g) { Node *p = g->r; while (p != NULL) { @@ -317,7 +338,7 @@ bool Tab::DelNode(const Node* p) { return DelSubGraph(p->sub) || (p->down != NULL && DelSubGraph(p->down)); } else { - return p->typ == Node::iter || p->typ == Node::opt || p->typ == Node::sem + return (p->typ == Node::iter && p->rmin == 0) || p->typ == Node::opt || p->typ == Node::sem || p->typ == Node::eps || p->typ == Node::rslv || p->typ == Node::sync; } } diff --git a/src/Tab.h b/src/Tab.h index a0ea2e2..9d6eca5 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -117,6 +117,7 @@ class Tab { void MakeSequence(Graph *g1, Graph *g2); void MakeIteration(Graph *g); void MakeOption(Graph *g); + void MakeRepetition(Graph *g, int rmin, int rmax); void Finish(Graph *g); //set all 'next' from g->r to NULL void DeleteNodes(); Graph* StrToGraph(const wchar_t* str); @@ -235,6 +236,8 @@ class Tab { void XRef(); void SetDDT(const wchar_t* s); void SetOption(const wchar_t* s); +private: + void MakeOptIter(Graph *g, int typ); }; From b8b387ef76dc2717973f7c7e5fb194b185898599 Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 27 Dec 2021 13:04:14 +0100 Subject: [PATCH 90/95] Fix genRREBNF when outputting ANY --- src/ParserGen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 9fc9175..62e6dad 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -486,6 +486,7 @@ int ParserGen::GenCodeRREBNF (const Node *p, int depth) { } case Node::any: { fputws(_SC(" ANY"), gen); + ++rc; break; } case Node::eps: break; // nothing From 8bc853962f3b81a8415a4ebafa1d9e7476ed283e Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 11 Jul 2022 21:20:17 +0200 Subject: [PATCH 91/95] Fix trace output --- src/Coco.atg | 20 ++++++++++---------- src/DFA.cpp | 12 +++++++----- src/Parser.cpp | 18 +++++++++--------- src/Scanner.cpp | 4 ++++ src/Scanner.frame | 5 +++++ src/Scanner.h | 1 + src/SortedList.cpp | 4 ++++ src/Tab.cpp | 35 ++++++++++++++++------------------- 8 files changed, 56 insertions(+), 43 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 98c93e1..e6206e1 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -80,7 +80,7 @@ CHARACTERS tab = '\t'. stringCh = ANY - '"' - '\\' - cr - lf. charCh = ANY - '\'' - '\\' - cr - lf. - printable = '\u0020' .. '\u007e'. + printable = '\u0020' .. '\u007e'. hex = "0123456789abcdef". TOKENS @@ -375,9 +375,9 @@ Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) else g = g2; .) { Factor (. tab->MakeSequence(g, g2); delete g2; .) } -| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) +| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) ) (. if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ @@ -394,7 +394,7 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt + sym = tab->NewSym(Node::nt, name, t->line, t->col); // forward nt else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); @@ -423,18 +423,18 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) -| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); +| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); .) -| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0, 0); // p.set is set in tab->SetupAnys +| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); .) -| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0, 0); +| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) . @@ -488,7 +488,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; + Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -505,7 +505,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) | '[' TokenExpr ']' (. tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) | '{' TokenExpr '}' (. tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); .) + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ diff --git a/src/DFA.cpp b/src/DFA.cpp index 354fdd8..df685f1 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -42,11 +42,13 @@ typedef wchar_t wchar_t_10[SZWC10+1]; typedef wchar_t wchar_t_20[SZWC20+1]; //---------- Output primitives -static wchar_t* DFACh(int ch, wchar_t_10 &format) { +static wchar_t* DFACh(int ch, wchar_t_10 &format, bool noWrapper=false) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) coco_swprintf(format, SZWC10, _SC("%d"), (int) ch); - else - coco_swprintf(format, SZWC10, _SC("_SC('%") _CHFMT _SC("')"), (int) ch); + else { + const char *strFmt = noWrapper ? "'%" _CHFMT "'" : _SC("_SC('%") _CHFMT _SC("')"); + coco_swprintf(format, SZWC10, strFmt, (int) ch); + } format[SZWC10] = _SC('\0'); return format; } @@ -409,9 +411,9 @@ void DFA::PrintStates() { if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); if (action->typ == Node::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); - else fwprintf(trace, _SC("%3") _SFMT, DFACh(action->sym, fmt)); + else fwprintf(trace, _SC("%3") _SFMT, DFACh(action->sym, fmt, true)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { - fwprintf(trace, _SC("%3d"), targ->state->nr); + fwprintf(trace, _SC(" %3d"), targ->state->nr); } if (action->tc == Node::contextTrans) fputws(_SC(" context\n"), trace); else fputws(_SC("\n"), trace); } diff --git a/src/Parser.cpp b/src/Parser.cpp index b261db3..4828e94 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -747,10 +747,10 @@ void Parser::Term_NT(Graph* &g) { tab->MakeSequence(g, g2); delete g2; } } else if (StartOf(19 /* sem */)) { - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); } else SynErr(50); if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -799,7 +799,7 @@ void Parser::Factor_NT(Graph* &g) { bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0, 0); // forward nt + sym = tab->NewSym(Node::nt, name, t->line, t->col); // forward nt else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); @@ -870,7 +870,7 @@ void Parser::Factor_NT(Graph* &g) { } case 41 /* "(." */: { SemText_NT(pos); - Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0, 0); + Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); @@ -881,7 +881,7 @@ void Parser::Factor_NT(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0, 0); // p.set is set in tab->SetupAnys + Node *p = tab->NewNode(Node::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); break; @@ -891,7 +891,7 @@ void Parser::Factor_NT(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0, 0); + Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); break; @@ -899,7 +899,7 @@ void Parser::Factor_NT(Graph* &g) { default: SynErr(51); break; } if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -1030,7 +1030,7 @@ void Parser::TokenFactor_NT(Graph* &g) { SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0, 0); p->val = c->n; + Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -1077,7 +1077,7 @@ void Parser::TokenFactor_NT(Graph* &g) { tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else SynErr(53); if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0, 0)); + g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 592cdfb..e4b452c 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -191,6 +191,10 @@ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp(data1, data2); +} + unsigned int coco_string_hash(const wchar_t *data) { unsigned int h = 0; if (!data) { return 0; } diff --git a/src/Scanner.frame b/src/Scanner.frame index 104226c..ca72fc0 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -131,6 +131,7 @@ bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); unsigned int coco_string_hash(const wchar_t* data, size_t size); @@ -599,6 +600,10 @@ int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp(data1, data2); +} + unsigned int coco_string_hash(const wchar_t *data) { unsigned int h = 0; if (!data) { return 0; } diff --git a/src/Scanner.h b/src/Scanner.h index 37cfece..023f531 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -127,6 +127,7 @@ bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); unsigned int coco_string_hash(const wchar_t* data, size_t size); diff --git a/src/SortedList.cpp b/src/SortedList.cpp index 0ad855c..0b78eee 100644 --- a/src/SortedList.cpp +++ b/src/SortedList.cpp @@ -36,6 +36,10 @@ int Compare(const Symbol *x, const Symbol *y) { return coco_string_compareto(x->name, y->name); } +int CompareNocase(const Symbol *x, const Symbol *y) { + return coco_string_compareto_nocase(x->name, y->name); +} + SortedEntry::SortedEntry(const Symbol* Key, const void* Value) { this->Key = Key; this->Value = Value; diff --git a/src/Tab.cpp b/src/Tab.cpp index 1bd8667..b3fdd2a 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -113,7 +113,7 @@ int Tab::Num(const Node *p) { } void Tab::PrintSym(const Symbol *sym) { - fwprintf(trace, _SC("%3d %-14.14") _SFMT _SC(" %s"), sym->n, sym->name, nTyp[sym->typ]); + fwprintf(trace, _SC("%3d %-14.14") _SFMT _SC(" %s"), sym->n, sym->name, nTyp[sym->typ]); if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); if (sym->typ == Node::nt) { @@ -129,7 +129,7 @@ void Tab::PrintSymbolTable() { fwprintf(trace, _SC("%s"), "Symbol Table:\n" "------------\n\n" - " nr name typ hasAt graph del line tokenKind\n"); + " nr name typ hasAt graph del line tokenKind\n"); Symbol *sym; int i; @@ -193,7 +193,7 @@ Node* Tab::NewNode(int typ, Symbol *sym, int line, int col) { Node* Tab::NewNode(int typ, Node* sub) { - Node* node = NewNode(typ, (Symbol*)NULL, 0, 0); + Node* node = NewNode(typ, (Symbol*)NULL, sub->line, sub->col); node->sub = sub; return node; } @@ -206,7 +206,7 @@ Node* Tab::NewNode(int typ, int val, int line, int col) { void Tab::MakeFirstAlt(Graph *g) { - g->l = NewNode(Node::alt, g->l); g->l->line = g->l->sub->line; + g->l = NewNode(Node::alt, g->l); g->r->up = true; g->l->next = g->r; g->r = g->l; @@ -214,7 +214,7 @@ void Tab::MakeFirstAlt(Graph *g) { // The result will be in g1 void Tab::MakeAlternative(Graph *g1, Graph *g2) { - g2->l = NewNode(Node::alt, g2->l); g2->l->line = g2->l->sub->line; + g2->l = NewNode(Node::alt, g2->l); g2->l->up = true; g2->r->up = true; Node *p = g1->l; while (p->down != NULL) p = p->down; @@ -237,11 +237,7 @@ void Tab::MakeSequence(Graph *g1, Graph *g2) { } void Tab::MakeOptIter(Graph *g, int typ) { - int line = g->l->line; - int col = g->l->col; g->l = NewNode(typ, g->l); - g->l->line = line; - g->l->col = col; g->r->up = true; } @@ -366,10 +362,10 @@ static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { void Tab::PrintNodes() { fwprintf(trace, _SC("%s"), "Graph nodes:\n" - "----------------------------------------------------\n" - " n type name next down sub pos line\n" + "----------------------------------------------------------\n" + " n type name next down sub pos line col\n" " val code\n" - "----------------------------------------------------\n"); + "----------------------------------------------------------\n"); Node *p; wchar_t_10 format; @@ -397,7 +393,7 @@ void Tab::PrintNodes() { } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { fwprintf(trace, _SC(" ")); } - fwprintf(trace, _SC("%5d\n"), p->line); + fwprintf(trace, _SC("%5d %5d\n"), p->line, p->col); } fputws(_SC("\n"), trace); } @@ -461,7 +457,7 @@ void Tab::WriteCharSet(const CharSet *s) { if (r->from < r->to) { wchar_t *from = TabCh(r->from, fmt1); wchar_t *to = TabCh(r->to, fmt2); - fwprintf(trace, _SC("%") _SFMT _SC(" .. %") _SFMT _SC(" "), from, to); + fwprintf(trace, _SC("%") _SFMT _SC("..%") _SFMT _SC(" "), from, to); } else { wchar_t *from = TabCh(r->from, fmt1); @@ -530,9 +526,10 @@ BitArray* Tab::First(const Node *p) { BitArray *fs = First0(p, &mark); if (ddt[3]) { fputws(_SC("\n"), trace); - if (p != NULL) fwprintf(trace, _SC("First: node = %d\n"), p->n ); + if (p != NULL) fwprintf(trace, _SC("First: node = %d\tline = %d\tcol = %d\ttype = %s\t%s\n"), p->n, + p->line, p->col, this->nTyp[p->typ], p->sym ? p->sym->name : ""); else fputws(_SC("First: node = null\n"), trace); - PrintSet(fs, 0); + fwprintf(trace, _SC(" ")); PrintSet(fs, 10); } return fs; } @@ -783,7 +780,7 @@ void Tab::CompSymbolSets() { Symbol *sym; for (int i=0; iname); + fwprintf(trace, _SC("%") _SFMT _SC(" -> line: %d\n"), sym->name, sym->line); fputws(_SC("first: "), trace); PrintSet(sym->first, 10); fputws(_SC("follow: "), trace); PrintSet(sym->follow, 10); fputws(_SC("\n"), trace); @@ -799,8 +796,8 @@ void Tab::CompSymbolSets() { for (int i=0; ityp == Node::any || p->typ == Node::sync) { - fwprintf(trace, _SC("%4d %4s "), p->n, nTyp[p->typ]); - PrintSet(p->set, 11); + fwprintf(trace, _SC("Node: %4d %4s: Line: %4d\n"), p->n, nTyp[p->typ], p->line); + fwprintf(trace, _SC(" ")); PrintSet(p->set, 10); } } } From b36a8844ed27dfef081e7ae165e49f3cbeeaefda Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 14 Jul 2022 08:46:27 +0200 Subject: [PATCH 92/95] Change Node/Symbol type/kind to an independent header --- src/Action.cpp | 8 +-- src/Coco.atg | 58 ++++++++-------- src/DFA.cpp | 64 ++++++++--------- src/Node.cpp | 2 +- src/Node.h | 26 +------ src/NodeSymbolKind.h | 42 +++++++++++ src/Parser.cpp | 52 +++++++------- src/Parser.h | 12 ++-- src/ParserGen.cpp | 54 +++++++-------- src/Scanner.frame | 1 + src/Scanner.h | 1 + src/Symbol.cpp | 2 +- src/Symbol.h | 5 +- src/Tab.cpp | 162 +++++++++++++++++++++++-------------------- src/Tab.h | 10 +-- 15 files changed, 266 insertions(+), 233 deletions(-) create mode 100644 src/NodeSymbolKind.h diff --git a/src/Action.cpp b/src/Action.cpp index b76b7a0..23f24dd 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -61,12 +61,12 @@ void Action::AddTargets(Action *a) {// add copy of a.targets to action.targets for (Target *p = a->target; p != NULL; p = p->next) { AddTarget(p->state); } - if (a->tc == Node::contextTrans) tc = Node::contextTrans; + if (a->tc == TransitionCode::contextTrans) tc = TransitionCode::contextTrans; } CharSet* Action::Symbols(Tab *tab) { CharSet *s; - if (typ == Node::clas) + if (typ == NodeType::clas) s = tab->CharClassSet(sym)->Clone(); else { s = new CharSet(); s->Set(sym); @@ -77,14 +77,14 @@ CharSet* Action::Symbols(Tab *tab) { bool Action::ShiftWith(CharSet *s, Tab *tab) { //return true if it used the CharSet *s bool rc = false; if (s->Elements() == 1) { - typ = Node::chr; sym = s->First(); + typ = NodeType::chr; sym = s->First(); } else { CharClass *c = tab->FindCharClass(s); if (c == NULL) { c = tab->NewCharClass(_SC("#"), s); // class with dummy name rc = true; } - typ = Node::clas; sym = c->n; + typ = NodeType::clas; sym = c->n; } return rc; } diff --git a/src/Coco.atg b/src/Coco.atg index e6206e1..13da0e0 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -39,8 +39,8 @@ $namespace=Coco COMPILER Coco - int id; - int str; + NodeType id; + NodeType str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; @@ -57,8 +57,8 @@ COMPILER Coco tab = NULL; dfa = NULL; pgen = NULL; - id = 0; - str = 1; + id = NodeType::id; + str = NodeType::t; tokenString = NULL; noString = coco_string_create(_SC("-none-")); ignoreGammarErrors = false; @@ -129,13 +129,13 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); if (sym != NULL) SemErr(_SC("name declared twice")); else { - sym = tab->NewSym(Node::t, t->val, t->line, t->col); + sym = tab->NewSym(NodeType::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; }.) } ] /*from cocoxml*/ [ "CHARACTERS" { SetDecl }] - [ "TOKENS" { TokenDecl }] - [ "PRAGMAS" { TokenDecl }] + [ "TOKENS" { TokenDecl }] + [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) "FROM" TokenExpr "TO" TokenExpr @@ -151,9 +151,9 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra .) { ident (. sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); + if (undef) sym = tab->NewSym(NodeType::nt, t->val, t->line, t->col); else { - if (sym->typ == Node::nt) { + if (sym->typ == NodeType::nt) { if (sym->graph != NULL) SemErr(_SC("name declared twice")); } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; @@ -186,7 +186,7 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra if (sym->attrPos != NULL) SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, _SC("???"), 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(NodeType::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); @@ -293,7 +293,7 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. wchar_t* name = NULL; int kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; .) +TokenDecl (. wchar_t* name = NULL; NodeType kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; .) = Sym (. sym = tab->FindSym(name); if (sym != NULL) SemErr(_SC("name declared twice")); @@ -329,7 +329,7 @@ TokenDecl (. wchar_t* name = NULL; int kind, kindInherits; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != Node::pr) SemErr(_SC("semantic action not allowed here")); .) + [ SemText<.sym->semPos.> (. if (typ == NodeType::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != NodeType::pr) SemErr(_SC("semantic action not allowed here")); .) ] . @@ -368,21 +368,21 @@ Expression (. Graph *g2; .) Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) = -( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); .) +( [ (. rslv = tab->NewNode(NodeType::rslv, (Symbol*)NULL, la->line, la->col); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] Factor (. if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; .) { Factor (. tab->MakeSequence(g, g2); delete g2; .) } -| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) +| (. g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) ) (. if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ -Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; +Factor (. wchar_t* name = NULL; NodeType kind; Position *pos; bool weak = false; g = NULL; .) = @@ -394,9 +394,9 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, t->line, t->col); // forward nt + sym = tab->NewSym(NodeType::nt, name, t->line, t->col); // forward nt else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line, t->col); + sym = tab->NewSym(NodeType::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(_SC("undefined string in production")); @@ -404,11 +404,11 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; } } coco_string_delete(name); - int typ = sym->typ; - if (typ != Node::t && typ != Node::nt) + NodeType typ = sym->typ; + if (typ != NodeType::t && typ != NodeType::nt) SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { - if (typ == Node::t) typ = Node::wt; + if (typ == NodeType::t) typ = NodeType::wt; else SemErr(_SC("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); @@ -423,18 +423,18 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) -| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, t->line, t->col); +| SemText (. Node *p = tab->NewNode(NodeType::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); .) -| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys +| "ANY" (. Node *p = tab->NewNode(NodeType::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); .) -| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, t->line, t->col); +| "SYNC" (. Node *p = tab->NewNode(NodeType::sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . @@ -479,7 +479,7 @@ TokenTerm (. Graph *g2; .) /*------------------------------------------------------------------------------------*/ -TokenFactor (. wchar_t* name = NULL; int kind; .) +TokenFactor (. wchar_t* name = NULL; NodeType kind; .) = (. g = NULL; .) ( Sym (. if (kind == id) { @@ -488,7 +488,7 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; + Node *p = tab->NewNode(NodeType::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -505,12 +505,12 @@ TokenFactor (. wchar_t* name = NULL; int kind; .) | '[' TokenExpr ']' (. tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) | '{' TokenExpr '}' (. tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); .) + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ -Sym +Sym = (. name = coco_string_create(_SC("???")); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) diff --git a/src/DFA.cpp b/src/DFA.cpp index df685f1..4c593a9 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -93,7 +93,7 @@ void DFA::NewTransition(State *from, State *to, int typ, int sym, int tc) { Target *t = new Target(to); Action *a = new Action(typ, sym, tc); a->target = t; from->AddAction(a); - if (typ == Node::clas) curSy->tokenKind = Symbol::classToken; + if (typ == NodeType::clas) curSy->tokenKind = Symbol::classToken; } void DFA::CombineShifts() { @@ -173,11 +173,11 @@ void DFA::Step(State *from, const Node *p, BitArray *stepped) { if (p == NULL) return; stepped->Set(p->n, true); - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { NewTransition(from, TheState(p->next), p->typ, p->val, p->code); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { Step(from, p->sub, stepped); Step(from, p->down, stepped); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { if (tab->DelSubGraph(p->sub)) { parser->SemErr(_SC("contents of {...} must not be deletable")); return; @@ -188,7 +188,7 @@ void DFA::Step(State *from, const Node *p, BitArray *stepped) { BitArray newStepped(tab->nodes.Count); Step(p->state, p, &newStepped); } - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); } @@ -204,19 +204,19 @@ void DFA::Step(State *from, const Node *p, BitArray *stepped) { void DFA::NumberNodes(Node *p, State *state, bool renumIter) { if (p == NULL) return; if (p->state != NULL) return; // already visited; - if ((state == NULL) || ((p->typ == Node::iter) && renumIter)) state = NewState(); + if ((state == NULL) || ((p->typ == NodeType::iter) && renumIter)) state = NewState(); p->state = state; if (tab->DelGraph(p)) state->endOf = curSy; - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { NumberNodes(p->next, NULL, false); - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { NumberNodes(p->next, state, true); NumberNodes(p->sub, state, true); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); NumberNodes(p->down, state, renumIter); @@ -231,13 +231,13 @@ void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { Step(p->state, p, &stepped); // start of group of equally numbered nodes } - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { FindTrans(p->next, true, marked); - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { FindTrans(p->next, true, marked); FindTrans(p->sub, false, marked); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { FindTrans(p->next, false, marked); FindTrans(p->sub, false, marked); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { FindTrans(p->sub, false, marked); FindTrans(p->down, false, marked); } } @@ -251,7 +251,7 @@ void DFA::ConvertToStates(Node *p, Symbol *sym) { NumberNodes(curGraph, firstState, true); BitArray ba(tab->nodes.Count); FindTrans(curGraph, true, &ba); - if (p->typ == Node::iter) { + if (p->typ == NodeType::iter) { ba.SetAll(false); Step(firstState, p, &ba); } @@ -277,14 +277,14 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } for (; i < len; i++) { // make new DFA for s[i..len-1] State *to = NewState(); - NewTransition(state, to, Node::chr, s[i], Node::normalTrans); + NewTransition(state, to, NodeType::chr, s[i], TransitionCode::normalTrans); state = to; } coco_string_delete(s); Symbol *matchedSym = state->endOf; if (state->endOf == NULL) { state->endOf = sym; - } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { + } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == TransitionCode::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off const size_t format_size = 200; wchar_t format[format_size]; @@ -317,7 +317,7 @@ bool DFA::SplitActions(State *state, Action *a, Action *b) { setb->Subtract(setc); if(!a->ShiftWith(seta, tab)) delete seta; if(!b->ShiftWith(setb, tab)) delete setb; - c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith + c = new Action(0, 0, TransitionCode::normalTrans); // typ and sym are set in ShiftWith c->AddTargets(a); c->AddTargets(b); if(!c->ShiftWith(setc, tab)) delete setc; @@ -330,12 +330,12 @@ bool DFA::SplitActions(State *state, Action *a, Action *b) { bool DFA::Overlap(const Action *a, const Action *b) { CharSet *seta, *setb; - if (a->typ == Node::chr) - if (b->typ == Node::chr) return (a->sym == b->sym); + if (a->typ == NodeType::chr) + if (b->typ == NodeType::chr) return (a->sym == b->sym); else {setb = tab->CharClassSet(b->sym); return setb->Get(a->sym);} else { seta = tab->CharClassSet(a->sym); - if (b->typ == Node::chr) return seta->Get(b->sym); + if (b->typ == NodeType::chr) return seta->Get(b->sym); else {setb = tab->CharClassSet(b->sym); return seta->Intersects(setb);} } } @@ -379,7 +379,7 @@ void DFA::MeltStates(State *state) { void DFA::FindCtxStates() { for (State *state = firstState; state != NULL; state = state->next) for (Action *a = state->firstAction; a != NULL; a = a->next) - if (a->tc == Node::contextTrans) a->target->state->ctx = true; + if (a->tc == TransitionCode::contextTrans) a->target->state->ctx = true; } void DFA::MakeDeterministic() { @@ -410,12 +410,12 @@ void DFA::PrintStates() { for (Action *action = state->firstAction; action != NULL; action = action->next) { if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); - if (action->typ == Node::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); + if (action->typ == NodeType::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); else fwprintf(trace, _SC("%3") _SFMT, DFACh(action->sym, fmt, true)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, _SC(" %3d"), targ->state->nr); } - if (action->tc == Node::contextTrans) fputws(_SC(" context\n"), trace); else fputws(_SC("\n"), trace); + if (action->tc == TransitionCode::contextTrans) fputws(_SC(" context\n"), trace); else fputws(_SC("\n"), trace); } } fputws(_SC("\n---------- character classes ----------\n"), trace); @@ -426,8 +426,8 @@ void DFA::PrintStates() { Action* DFA::FindAction(const State *state, int ch) { for (Action *a = state->firstAction; a != NULL; a = a->next) - if (a->typ == Node::chr && ch == a->sym) return a; - else if (a->typ == Node::clas) { + if (a->typ == NodeType::chr && ch == a->sym) return a; + else if (a->typ == NodeType::clas) { CharSet *s = tab->CharClassSet(a->sym); if (s->Get(ch)) return a; } @@ -500,9 +500,9 @@ Melted* DFA::StateWithSet(const BitArray *s) { wchar_t* DFA::CommentStr(const Node *p) { StringBuilder s; while (p != NULL) { - if (p->typ == Node::chr) { + if (p->typ == NodeType::chr) { s.Append((wchar_t)p->val); - } else if (p->typ == Node::clas) { + } else if (p->typ == NodeType::clas) { CharSet *set = tab->CharClassSet(p->val); if (set->Elements() != 1) parser->SemErr(_SC("character set contains more than 1 character")); s.Append((wchar_t) set->First()); @@ -728,13 +728,13 @@ void DFA::WriteState(const State *state) { for (Action *action = state->firstAction; action != NULL; action = action->next) { if (action == state->firstAction) fputws(_SC("\t\t\tif ("), gen); else fputws(_SC("\t\t\telse if ("), gen); - if (action->typ == Node::chr) { + if (action->typ == NodeType::chr) { wchar_t* res = DFAChCond(action->sym, fmt); fwprintf(gen, _SC("%") _SFMT, res); } else PutRange(tab->CharClassSet(action->sym)); fputws(_SC(") {"), gen); - if (action->tc == Node::contextTrans) { + if (action->tc == TransitionCode::contextTrans) { fputws(_SC("apx++; "), gen); ctxEnd = false; } else if (state->ctx) fputws(_SC("apx = 0; "), gen); @@ -757,7 +757,7 @@ void DFA::WriteState(const State *state) { fputws(_SC("goto case_0;}\n"), gen); } else { fwprintf(gen, _SC("t->kind = %d /* %") _SFMT _SC(" */; "), endOf->n, endOf->name); - if(endOf->semPos && endOf->typ == Node::t) { + if(endOf->semPos && endOf->typ == NodeType::t) { fputws(_SC(" {"), gen); CopySourcePart(endOf->semPos, 0); fputws(_SC("}"), gen); @@ -778,7 +778,7 @@ void DFA::WriteStartTab() { bool firstRange = true; for (Action *action = firstState->firstAction; action != NULL; action = action->next) { int targetState = action->target->state->nr; - if (action->typ == Node::chr) { + if (action->typ == NodeType::chr) { fwprintf(gen, _SC("\tstart.set(%d, %d);\n"), action->sym, targetState); } else { CharSet *s = tab->CharClassSet(action->sym); diff --git a/src/Node.cpp b/src/Node.cpp index d15fd36..9b7c261 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -32,7 +32,7 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -Node::Node(int typ, Symbol *sym, int line, int col) { +Node::Node(NodeType typ, Symbol *sym, int line, int col) { this->n = 0; this->next = NULL; this->down = NULL; diff --git a/src/Node.h b/src/Node.h index 4453969..bfb7bd9 100644 --- a/src/Node.h +++ b/src/Node.h @@ -33,6 +33,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Position.h" #include "Scanner.h" #include "State.h" +#include "NodeSymbolKind.h" namespace Coco { @@ -41,30 +42,9 @@ class BitArray; class Node { public: - // constants for node kinds - enum { - t = 1, // terminal symbol - pr, // pragma - nt, // nonterminal symbol - clas, // character class - chr, // character - wt, // weak terminal symbol - any, // - eps, // empty - sync, // synchronization symbol - sem, // semantic action: (. .) - alt, // alternative: | - iter, // iteration: { } - opt, // option: [ ] - rslv, // resolver expr - }; - enum { - normalTrans, // transition codes - contextTrans, - }; int n; // node number - int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv + NodeType typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv Node *next; // to successor node Node *down; // alt: to next alternative Node *sub; // alt, iter, opt: to first node of substructure @@ -83,7 +63,7 @@ class Node { // (only used in DFA.ConvertToStates) int rmin, rmax; // repetition quantifiers - Node(int typ, Symbol *sym, int line, int col); + Node(NodeType typ, Symbol *sym, int line, int col); ~Node(); }; diff --git a/src/NodeSymbolKind.h b/src/NodeSymbolKind.h new file mode 100644 index 0000000..d1a7b71 --- /dev/null +++ b/src/NodeSymbolKind.h @@ -0,0 +1,42 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +/* + * File: NodeSymbolKind.h + * Author: mingo + * + * Created on July 14, 2022, 8:03 AM + */ + +#ifndef NODESYMBOLKIND_H +#define NODESYMBOLKIND_H + +// constants for node/symbol kinds +enum NodeType { + id, + t, // terminal symbol + pr, // pragma + nt, // nonterminal symbol + clas, // character class + chr, // character + wt, // weak terminal symbol + any, // + eps, // empty + sync, // synchronization symbol + sem, // semantic action: (. .) + alt, // alternative: | + iter, // iteration: { } + opt, // option: [ ] + rslv, // resolver expr +}; +enum TransitionCode { + normalTrans, // transition codes + contextTrans, +}; + + +#endif /* NODESYMBOLKIND_H */ + diff --git a/src/Parser.cpp b/src/Parser.cpp index 4828e94..03a58cb 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -184,7 +184,7 @@ void Parser::Coco_NT() { sym = tab->FindSym(t->val); if (sym != NULL) SemErr(_SC("name declared twice")); else { - sym = tab->NewSym(Node::t, t->val, t->line, t->col); + sym = tab->NewSym(NodeType::t, t->val, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } } @@ -204,7 +204,7 @@ void Parser::Coco_NT() { AstAddTerminal(); #endif while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { - TokenDecl_NT(Node::t); + TokenDecl_NT(NodeType::t); } } if (IsKind(la, 11 /* "PRAGMAS" */)) { @@ -213,7 +213,7 @@ void Parser::Coco_NT() { AstAddTerminal(); #endif while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { - TokenDecl_NT(Node::pr); + TokenDecl_NT(NodeType::pr); } } while (IsKind(la, 12 /* "COMMENTS" */)) { @@ -264,9 +264,9 @@ void Parser::Coco_NT() { #endif sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line, t->col); + if (undef) sym = tab->NewSym(NodeType::nt, t->val, t->line, t->col); else { - if (sym->typ == Node::nt) { + if (sym->typ == NodeType::nt) { if (sym->graph != NULL) SemErr(_SC("name declared twice")); } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; @@ -312,7 +312,7 @@ void Parser::Coco_NT() { if (sym->attrPos != NULL) SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, _SC("???"), 0, 0); // noSym gets highest number + tab->noSym = tab->NewSym(NodeType::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); @@ -383,8 +383,8 @@ void Parser::SetDecl_NT() { #endif } -void Parser::TokenDecl_NT(int typ) { - wchar_t* name = NULL; int kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; +void Parser::TokenDecl_NT(NodeType typ) { + wchar_t* name = NULL; NodeType kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, _SC("TokenDecl"), la->line); #endif @@ -441,7 +441,7 @@ void Parser::TokenDecl_NT(int typ) { } else SynErr(46); if (IsKind(la, 41 /* "(." */)) { SemText_NT(sym->semPos); - if (typ == Node::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); + if (typ == NodeType::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -684,7 +684,7 @@ void Parser::Char_NT(int &n) { #endif } -void Parser::Sym_NT(wchar_t* &name, int &kind) { +void Parser::Sym_NT(wchar_t* &name, NodeType &kind) { #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, _SC("Sym"), la->line); #endif @@ -735,7 +735,7 @@ void Parser::Term_NT(Graph* &g) { #endif if (StartOf(17 /* opt */)) { if (IsKind(la, 39 /* "IF" */)) { - rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line, la->col); + rslv = tab->NewNode(NodeType::rslv, (Symbol*)NULL, la->line, la->col); Resolver_NT(rslv->pos); g = new Graph(rslv); } @@ -747,10 +747,10 @@ void Parser::Term_NT(Graph* &g) { tab->MakeSequence(g, g2); delete g2; } } else if (StartOf(19 /* sem */)) { - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); } else SynErr(50); if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif @@ -777,7 +777,7 @@ void Parser::Resolver_NT(Position* &pos) { } void Parser::Factor_NT(Graph* &g) { - wchar_t* name = NULL; int kind; Position *pos; bool weak = false; + wchar_t* name = NULL; NodeType kind; Position *pos; bool weak = false; g = NULL; #ifdef PARSER_WITH_AST @@ -799,9 +799,9 @@ void Parser::Factor_NT(Graph* &g) { bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, t->line, t->col); // forward nt + sym = tab->NewSym(NodeType::nt, name, t->line, t->col); // forward nt else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line, t->col); + sym = tab->NewSym(NodeType::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(_SC("undefined string in production")); @@ -809,11 +809,11 @@ void Parser::Factor_NT(Graph* &g) { } } coco_string_delete(name); - int typ = sym->typ; - if (typ != Node::t && typ != Node::nt) + NodeType typ = sym->typ; + if (typ != NodeType::t && typ != NodeType::nt) SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { - if (typ == Node::t) typ = Node::wt; + if (typ == NodeType::t) typ = NodeType::wt; else SemErr(_SC("only terminals may be weak")); } Node *p = tab->NewNode(typ, sym, t->line, t->col); @@ -870,7 +870,7 @@ void Parser::Factor_NT(Graph* &g) { } case 41 /* "(." */: { SemText_NT(pos); - Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, t->line, t->col); + Node *p = tab->NewNode(NodeType::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); @@ -881,7 +881,7 @@ void Parser::Factor_NT(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys + Node *p = tab->NewNode(NodeType::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); break; @@ -891,7 +891,7 @@ void Parser::Factor_NT(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, t->line, t->col); + Node *p = tab->NewNode(NodeType::sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); break; @@ -899,7 +899,7 @@ void Parser::Factor_NT(Graph* &g) { default: SynErr(51); break; } if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); @@ -1017,7 +1017,7 @@ void Parser::TokenTerm_NT(Graph* &g) { } void Parser::TokenFactor_NT(Graph* &g) { - wchar_t* name = NULL; int kind; + wchar_t* name = NULL; NodeType kind; #ifdef PARSER_WITH_AST bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, _SC("TokenFactor"), la->line); #endif @@ -1030,7 +1030,7 @@ void Parser::TokenFactor_NT(Graph* &g) { SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; + Node *p = tab->NewNode(NodeType::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str @@ -1077,7 +1077,7 @@ void Parser::TokenFactor_NT(Graph* &g) { tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else SynErr(53); if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, t->line, t->col)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); #ifdef PARSER_WITH_AST if(ntAdded) AstPopNonTerminal(); #endif diff --git a/src/Parser.h b/src/Parser.h index dd236f4..0c790e5 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -133,8 +133,8 @@ class Parser { void AstPopNonTerminal(); #endif -int id; - int str; +NodeType id; + NodeType str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; @@ -151,8 +151,8 @@ int id; tab = NULL; dfa = NULL; pgen = NULL; - id = 0; - str = 1; + id = NodeType::id; + str = NodeType::t; tokenString = NULL; noString = coco_string_create(_SC("-none-")); ignoreGammarErrors = false; @@ -174,7 +174,7 @@ int id; void Coco_NT(); void SetDecl_NT(); - void TokenDecl_NT(int typ); + void TokenDecl_NT(NodeType typ); void TokenExpr_NT(Graph* &g); void Set_NT(CharSet* &s); void AttrDecl_NT(Symbol *sym); @@ -182,7 +182,7 @@ int id; void Expression_NT(Graph* &g); void SimSet_NT(CharSet* &s); void Char_NT(int &n); - void Sym_NT(wchar_t* &name, int &kind); + void Sym_NT(wchar_t* &name, NodeType &kind); void Term_NT(Graph* &g); void Resolver_NT(Position* &pos); void Factor_NT(Graph* &g); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 62e6dad..7061da0 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -42,7 +42,7 @@ void ParserGen::Indent (int n) { // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning bool ParserGen::UseSwitch (const Node *p) { BitArray *s2; - if (p->typ != Node::alt) return false; + if (p->typ != NodeType::alt) return false; int nAlts = 0; BitArray s1(tab->terminals.Count); while (p != NULL) { @@ -53,7 +53,7 @@ bool ParserGen::UseSwitch (const Node *p) { delete s2; ++nAlts; // must not optimize with switch-statement, if alt uses a resolver expression - if (p->sub->typ == Node::rslv) return false; + if (p->sub->typ == NodeType::rslv) return false; p = p->down; } return nAlts > 5; @@ -148,7 +148,7 @@ int ParserGen::NewCondSet (const BitArray *s) { } void ParserGen::GenCond (const BitArray *s, const Node *p) { - if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); + if (p->typ == NodeType::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); if (n == 0) fputws(_SC("false"), gen); // happens if an ANY set matches no symbol @@ -208,12 +208,12 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { const Node *p2; BitArray *s1, *s2; while (p != NULL) { - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { Indent(indent); fwprintf(gen, _SC("%") _SFMT _SC("_NT("), p->sym->name); CopySourcePart(p->pos, 0); fputws(_SC(");\n"), gen); - } else if (p->typ == Node::t) { + } else if (p->typ == NodeType::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) { @@ -225,7 +225,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { fputws(_SC(");\n"), gen); } fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); - } if (p->typ == Node::wt) { + } if (p->typ == NodeType::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); @@ -233,7 +233,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { WriteSymbolOrCode(gen, p->sym); fwprintf(gen, _SC(", %d);\n"), NewCondSet(s1)); delete s1; - } if (p->typ == Node::any) { + } if (p->typ == NodeType::any) { Indent(indent); int acc = Sets::Elements(p->set); if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { @@ -245,18 +245,18 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { fputws(_SC("if ("), gen); GenCond(p->set, p); fwprintf(gen, _SC(") Get(); else SynErr(%d);\n"), errorNr); } else fwprintf(gen, _SC("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); } - } if (p->typ == Node::eps) { // nothing - } if (p->typ == Node::rslv) { // nothing - } if (p->typ == Node::sem) { + } if (p->typ == NodeType::eps) { // nothing + } if (p->typ == NodeType::rslv) { // nothing + } if (p->typ == NodeType::sem) { CopySourcePart(p->pos, indent); - } if (p->typ == Node::sync) { + } if (p->typ == NodeType::sync) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); fputws(_SC("while (!("), gen); GenCond(s1, p); fputws(_SC(")) {"), gen); fwprintf(gen, _SC("SynErr(%d); Get();"), errorNr); fputws(_SC("}\n"), gen); delete s1; - } if (p->typ == Node::alt) { + } if (p->typ == NodeType::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); delete s1; @@ -294,11 +294,11 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { fputws(_SC("} "), gen); fwprintf(gen, _SC("else SynErr(%d);\n"), errorNr); } } - } if (p->typ == Node::iter) { + } if (p->typ == NodeType::iter) { Indent(indent); p2 = p->sub; fputws(_SC("while ("), gen); - if (p2->typ == Node::wt) { + if (p2->typ == NodeType::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); fputws(_SC("WeakSeparator("), gen); @@ -316,7 +316,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { GenCode(p2, indent + 1, s1); Indent(indent); fputws(_SC("}\n"), gen); delete s1; - } if (p->typ == Node::opt) { + } if (p->typ == NodeType::opt) { s1 = tab->First(p->sub); Indent(indent); fputws(_SC("if ("), gen); GenCond(s1, p->sub); fputws(_SC(") {\n"), gen); @@ -324,7 +324,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); fputws(_SC("}\n"), gen); delete s1; } - if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) + if (p->typ != NodeType::eps && p->typ != NodeType::sem && p->typ != NodeType::sync) isChecked->SetAll(false); // = new BitArray(Symbol.terminals.Count); if (p->up) break; p = p->next; @@ -474,30 +474,30 @@ int ParserGen::GenCodeRREBNF (const Node *p, int depth) { const Node *p2; while (p != NULL) { switch (p->typ) { - case Node::nt: - case Node::t: { + case NodeType::nt: + case NodeType::t: { fputws(_SC(" "), gen); fputws(p->sym->name, gen); ++rc; break; } - case Node::wt: { + case NodeType::wt: { break; } - case Node::any: { + case NodeType::any: { fputws(_SC(" ANY"), gen); ++rc; break; } - case Node::eps: break; // nothing - case Node::rslv: break; // nothing - case Node::sem: { + case NodeType::eps: break; // nothing + case NodeType::rslv: break; // nothing + case NodeType::sem: { break; } - case Node::sync: { + case NodeType::sync: { break; } - case Node::alt: { + case NodeType::alt: { bool need_close_alt = false; if(depth > 0 || loop_count || p->next) { fputws(" (", gen); @@ -512,14 +512,14 @@ int ParserGen::GenCodeRREBNF (const Node *p, int depth) { if(need_close_alt) fputws(_SC(" )"), gen); break; } - case Node::iter: { + case NodeType::iter: { if(p->sub->up == 0) fputws(_SC(" ("), gen); rc += GenCodeRREBNF(p->sub, depth+1); if(p->sub->up == 0) fputws(_SC(" )"), gen); fputws(_SC("*"), gen); break; } - case Node::opt: + case NodeType::opt: if(p->sub->up == 0) fputws(_SC(" ("), gen); rc += GenCodeRREBNF(p->sub, depth+1); if(p->sub->up == 0) fputws(_SC(" )"), gen); diff --git a/src/Scanner.frame b/src/Scanner.frame index ca72fc0..357569c 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -209,6 +209,7 @@ public: } void Clear() { + memset(Data, 0, Capacity*sizeof(T)); Count = 0; } diff --git a/src/Scanner.h b/src/Scanner.h index 023f531..989c4a0 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -205,6 +205,7 @@ class TArrayList } void Clear() { + memset(Data, 0, Capacity*sizeof(T)); Count = 0; } diff --git a/src/Symbol.cpp b/src/Symbol.cpp index b6a54c8..1980f47 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -37,7 +37,7 @@ const int Symbol::litToken = 2; const int Symbol::classLitToken = 3; -Symbol::Symbol(int typ, const wchar_t* name, int line, int col) { +Symbol::Symbol(NodeType typ, const wchar_t* name, int line, int col) { n = 0; graph = NULL; tokenKind = 0; diff --git a/src/Symbol.h b/src/Symbol.h index e8543f5..b696f6d 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -31,6 +31,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Scanner.h" #include "Position.h" +#include "NodeSymbolKind.h" namespace Coco { @@ -46,7 +47,7 @@ class Symbol { static const int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ int n; // symbol number - int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ + NodeType typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ wchar_t *name; // symbol name Node *graph; // nt: to first node of syntax graph int tokenKind; // t: token kind (fixedToken, classToken, ...) @@ -62,7 +63,7 @@ class Symbol { // nt: pos of local declarations in source text (or null) Symbol *inherits; // optional, token from which this token derives - Symbol(int typ, const wchar_t* name, int line, int col); + Symbol(NodeType typ, const wchar_t* name, int line, int col); virtual ~Symbol(); }; diff --git a/src/Tab.cpp b/src/Tab.cpp index b3fdd2a..bd18b90 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -48,8 +48,8 @@ Tab::Tab(Parser *parser) { this->parser = parser; trace = parser->trace; errors = parser->errors; - eofSy = NewSym(Node::t, _SC("EOF"), 0, 0); - dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0, 0); + eofSy = NewSym(NodeType::t, _SC("EOF"), 0, 0); + dummyNode = NewNode(NodeType::eps, (Symbol*)NULL, 0, 0); checkEOF = true; visited = allSyncSets = NULL; srcName = srcDir = nsName = frameDir = outDir = NULL; @@ -76,18 +76,18 @@ Tab::~Tab() { } -Symbol* Tab::NewSym(int typ, const wchar_t* name, int line, int col) { +Symbol* Tab::NewSym(NodeType typ, const wchar_t* name, int line, int col) { if (coco_string_length(name) == 2 && name[0] == '"') { parser->SemErr(_SC("empty token not allowed")); name = coco_string_create(_SC("???")); } Symbol *sym = new Symbol(typ, name, line, col); - if (typ == Node::t) { + if (typ == NodeType::t) { sym->n = terminals.Count; terminals.Add(sym); - } else if (typ == Node::pr) { + } else if (typ == NodeType::pr) { pragmas.Add(sym); - } else if (typ == Node::nt) { + } else if (typ == NodeType::nt) { sym->n = nonterminals.Count; nonterminals.Add(sym); } @@ -116,7 +116,7 @@ void Tab::PrintSym(const Symbol *sym) { fwprintf(trace, _SC("%3d %-14.14") _SFMT _SC(" %s"), sym->n, sym->name, nTyp[sym->typ]); if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); - if (sym->typ == Node::nt) { + if (sym->typ == NodeType::nt) { fwprintf(trace, _SC("%5d"), Num(sym->graph)); if (sym->deletable) fputws(_SC(" true "), trace); else fputws(_SC(" false "), trace); } else @@ -184,7 +184,7 @@ void Tab::PrintSet(const BitArray *s, int indent) { // Syntax graph management //--------------------------------------------------------------------- -Node* Tab::NewNode(int typ, Symbol *sym, int line, int col) { +Node* Tab::NewNode(NodeType typ, Symbol *sym, int line, int col) { Node* node = new Node(typ, sym, line, col); node->n = nodes.Count; nodes.Add(node); @@ -192,13 +192,13 @@ Node* Tab::NewNode(int typ, Symbol *sym, int line, int col) { } -Node* Tab::NewNode(int typ, Node* sub) { +Node* Tab::NewNode(NodeType typ, Node* sub) { Node* node = NewNode(typ, (Symbol*)NULL, sub->line, sub->col); node->sub = sub; return node; } -Node* Tab::NewNode(int typ, int val, int line, int col) { +Node* Tab::NewNode(NodeType typ, int val, int line, int col) { Node* node = NewNode(typ, (Symbol*)NULL, line, col); node->val = val; return node; @@ -206,7 +206,7 @@ Node* Tab::NewNode(int typ, int val, int line, int col) { void Tab::MakeFirstAlt(Graph *g) { - g->l = NewNode(Node::alt, g->l); + g->l = NewNode(NodeType::alt, g->l); g->r->up = true; g->l->next = g->r; g->r = g->l; @@ -214,7 +214,7 @@ void Tab::MakeFirstAlt(Graph *g) { // The result will be in g1 void Tab::MakeAlternative(Graph *g1, Graph *g2) { - g2->l = NewNode(Node::alt, g2->l); + g2->l = NewNode(NodeType::alt, g2->l); g2->l->up = true; g2->r->up = true; Node *p = g1->l; while (p->down != NULL) p = p->down; @@ -236,13 +236,13 @@ void Tab::MakeSequence(Graph *g1, Graph *g2) { g1->r = g2->r; } -void Tab::MakeOptIter(Graph *g, int typ) { +void Tab::MakeOptIter(Graph *g, NodeType typ) { g->l = NewNode(typ, g->l); g->r->up = true; } void Tab::MakeIteration(Graph *g) { - MakeOptIter(g, Node::iter); + MakeOptIter(g, NodeType::iter); Node *p = g->r; g->r = g->l; while (p != NULL) { @@ -252,14 +252,14 @@ void Tab::MakeIteration(Graph *g) { } void Tab::MakeOption(Graph *g) { - MakeOptIter(g, Node::opt); + MakeOptIter(g, NodeType::opt); g->l->next = g->r; g->r = g->l; } void Tab::MakeRepetition(Graph *g, int rmin, int rmax) { bool isOption = (rmin == 0 && rmax == 1); - MakeOptIter(g, Node::iter); + MakeOptIter(g, NodeType::iter); if(isOption) g->l->next = g->r; Node *p = g->r; g->r = g->l; @@ -282,7 +282,7 @@ void Tab::Finish(Graph *g) { void Tab::DeleteNodes() { for(int i=0; ir = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { - Node *p = NewNode(Node::chr, (int)s[i], 0, 0); + Node *p = NewNode(NodeType::chr, (int)s[i], 0, 0); g->r->next = p; g->r = p; } g->l = dummyNode->next; dummyNode->next = NULL; @@ -304,11 +304,11 @@ Graph* Tab::StrToGraph(const wchar_t* str) { void Tab::SetContextTrans(Node *p) { // set transition code in the graph rooted at p while (p != NULL) { - if (p->typ == Node::chr || p->typ == Node::clas) { - p->code = Node::contextTrans; - } else if (p->typ == Node::opt || p->typ == Node::iter) { + if (p->typ == NodeType::chr || p->typ == NodeType::clas) { + p->code = TransitionCode::contextTrans; + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { SetContextTrans(p->sub); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { SetContextTrans(p->sub); SetContextTrans(p->down); } if (p->up) break; @@ -327,15 +327,15 @@ bool Tab::DelSubGraph(const Node* p) { } bool Tab::DelNode(const Node* p) { - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { return p->sym->deletable; } - else if (p->typ == Node::alt) { + else if (p->typ == NodeType::alt) { return DelSubGraph(p->sub) || (p->down != NULL && DelSubGraph(p->down)); } else { - return (p->typ == Node::iter && p->rmin == 0) || p->typ == Node::opt || p->typ == Node::sem - || p->typ == Node::eps || p->typ == Node::rslv || p->typ == Node::sync; + return (p->typ == NodeType::iter && p->rmin == 0) || p->typ == NodeType::opt || p->typ == NodeType::sem + || p->typ == NodeType::eps || p->typ == NodeType::rslv || p->typ == NodeType::sync; } } @@ -374,25 +374,32 @@ void Tab::PrintNodes() { fwprintf(trace, _SC("%4d %s "), p->n, (nTyp[p->typ])); if (p->sym != NULL) { fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), p->sym->name); - } else if (p->typ == Node::clas) { + } else if (p->typ == NodeType::clas) { CharClass *c = classes[p->val]; fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), c->name); } else fputws(_SC(" "), trace); fwprintf(trace, _SC("%5d "), Ptr(p->next, p->up)); - if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { + switch(p->typ) { + case NodeType::t: case NodeType::nt: case NodeType::wt: fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); - } if (p->typ == Node::chr) { + break; + case NodeType::chr: fwprintf(trace, _SC("%5d %5d "), p->val, p->code); - } if (p->typ == Node::clas) { + break; + case NodeType::clas: fwprintf(trace, _SC(" %5d "), p->code); - } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { + break; + case NodeType::alt: case NodeType::iter: case NodeType::opt: fwprintf(trace, _SC("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); - } if (p->typ == Node::sem) { + break; + case NodeType::sem: fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); - } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { + break; + case NodeType::eps: case NodeType::any: case NodeType::sync: fwprintf(trace, _SC(" ")); - } + break; + } fwprintf(trace, _SC("%5d %5d\n"), p->line, p->col); } fputws(_SC("\n"), trace); @@ -480,13 +487,12 @@ void Tab::WriteCharClasses () { //--------------------------------------------------------------------- // Symbol set computations //--------------------------------------------------------------------- - /* Computes the first set for the given Node. */ BitArray* Tab::First0(const Node *p, BitArray *mark) { BitArray *fs = new BitArray(terminals.Count); while (p != NULL && !((*mark)[p->n])) { mark->Set(p->n, true); - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { if (p->sym->firstReady) { fs->Or(p->sym->first); } else { @@ -495,13 +501,13 @@ BitArray* Tab::First0(const Node *p, BitArray *mark) { delete fs0; } } - else if (p->typ == Node::t || p->typ == Node::wt) { + else if (p->typ == NodeType::t || p->typ == NodeType::wt) { fs->Set(p->sym->n, true); } - else if (p->typ == Node::any) { + else if (p->typ == NodeType::any) { fs->Or(p->set); } - else if (p->typ == Node::alt) { + else if (p->typ == NodeType::alt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; @@ -509,7 +515,7 @@ BitArray* Tab::First0(const Node *p, BitArray *mark) { fs->Or(fs0); delete fs0; } - else if (p->typ == Node::iter || p->typ == Node::opt) { + else if (p->typ == NodeType::iter || p->typ == NodeType::opt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; @@ -544,8 +550,10 @@ void Tab::CompFirstSets() { sym->first = new BitArray(terminals.Count); sym->firstReady = false; } + fwprintf(trace, _SC("Computing First Sets: %d\n"), nonterminals.Count); for (i=0; iname, sym->line, sym->col); BitArray *saved = sym->first; sym->first = First(sym->graph); delete saved; @@ -556,15 +564,15 @@ void Tab::CompFirstSets() { void Tab::CompFollow(Node *p) { while (p != NULL && !((*visited)[p->n])) { visited->Set(p->n, true); - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { BitArray *s = First(p->next); p->sym->follow->Or(s); delete s; if (DelGraph(p->next)) p->sym->nts->Set(curSy->n, true); - } else if (p->typ == Node::opt || p->typ == Node::iter) { + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { CompFollow(p->sub); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { CompFollow(p->sub); CompFollow(p->down); } p = p->next; @@ -615,12 +623,12 @@ void Tab::CompFollowSets() { const Node* Tab::LeadingAny(const Node *p) { if (p == NULL) return NULL; const Node *a = NULL; - if (p->typ == Node::any) a = p; - else if (p->typ == Node::alt) { + if (p->typ == NodeType::any) a = p; + else if (p->typ == NodeType::alt) { a = LeadingAny(p->sub); if (a == NULL) a = LeadingAny(p->down); } - else if (p->typ == Node::opt || p->typ == Node::iter) a = LeadingAny(p->sub); + else if (p->typ == NodeType::opt || p->typ == NodeType::iter) a = LeadingAny(p->sub); if (a == NULL && DelNode(p) && !p->up) a = LeadingAny(p->next); return a; } @@ -628,13 +636,13 @@ const Node* Tab::LeadingAny(const Node *p) { void Tab::FindAS(const Node *p) { // find ANY sets const Node *a; while (p != NULL) { - if (p->typ == Node::opt || p->typ == Node::iter) { + if (p->typ == NodeType::opt || p->typ == NodeType::iter) { FindAS(p->sub); a = LeadingAny(p->sub); BitArray *ba = First(p->next); if (a != NULL) Sets::Subtract(a->set, ba); delete ba; - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { BitArray s1(terminals.Count); const Node *q = p; while (q != NULL) { @@ -661,7 +669,7 @@ void Tab::FindAS(const Node *p) { // find ANY sets if (DelNode(p)) { a = LeadingAny(p->next); if (a != NULL) { - Node *q = (p->typ == Node::nt) ? p->sym->graph : p->sub; + Node *q = (p->typ == NodeType::nt) ? p->sym->graph : p->sub; BitArray *ba = First(q); Sets::Subtract(a->set, ba); delete ba; @@ -690,21 +698,21 @@ BitArray* Tab::Expected(const Node *p, const Symbol *curSy) { // does not look behind resolvers; only called during LL(1) test and in CheckRes BitArray* Tab::Expected0(const Node *p, const Symbol *curSy) { - if (p->typ == Node::rslv) return new BitArray(terminals.Count); + if (p->typ == NodeType::rslv) return new BitArray(terminals.Count); else return Expected(p, curSy); } void Tab::CompSync(Node *p) { while (p != NULL && !(visited->Get(p->n))) { visited->Set(p->n, true); - if (p->typ == Node::sync) { + if (p->typ == NodeType::sync) { BitArray *s = Expected(p->next, curSy); s->Set(eofSy->n, true); allSyncSets->Or(s); p->set = s; - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { CompSync(p->sub); CompSync(p->down); - } else if (p->typ == Node::opt || p->typ == Node::iter) + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) CompSync(p->sub); p = p->next; } @@ -728,7 +736,7 @@ void Tab::SetupAnys() { Node *p; for (int i=0; ityp == Node::any) { + if (p->typ == NodeType::any) { p->set = new BitArray(terminals.Count, true); p->set->Set(eofSy->n, false); } @@ -795,7 +803,7 @@ void Tab::CompSymbolSets() { Node *p; for (int i=0; ityp == Node::any || p->typ == Node::sync) { + if (p->typ == NodeType::any || p->typ == NodeType::sync) { fwprintf(trace, _SC("Node: %4d %4s: Line: %4d\n"), p->n, nTyp[p->typ], p->line); fwprintf(trace, _SC(" ")); PrintSet(p->set, 10); } @@ -921,12 +929,12 @@ bool Tab::GrammarCheckAll() { void Tab::GetSingles(const Node *p, TArrayList &singles) { if (p == NULL) return; // end of graph - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { singles.Add(p->sym); - } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { + } else if (p->typ == NodeType::alt || p->typ == NodeType::iter || p->typ == NodeType::opt) { if (p->up || DelGraph(p->next)) { GetSingles(p->sub, singles); - if (p->typ == Node::alt) GetSingles(p->down, singles); + if (p->typ == NodeType::alt) GetSingles(p->down, singles); } } if (!p->up && DelNode(p)) GetSingles(p->next, singles); @@ -1012,7 +1020,7 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { while (p != NULL) { //if(p->sym) wprintf(_SC("%") _SFMT _SC("-> %") _SFMT _SC(":%d:\n", indent, p->sym->name, p->sym->line)); switch (p->typ) { - case Node::nt: { + case NodeType::nt: { if (p->sym->firstReady) { if(p->sym->first->Get(tok)) { if(coco_string_length(indent) == 1) @@ -1028,20 +1036,20 @@ void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { } break; } - case Node::t: case Node::wt: { + case NodeType::t: case NodeType::wt: { if(p->sym->n == tok) wprintf(_SC("%") _SFMT _SC("= %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->line, p->col); break; } - case Node::any: { + case NodeType::any: { break; } - case Node::alt: { + case NodeType::alt: { PrintFirstPath(p->sub, tok, indent); PrintFirstPath(p->down, tok, indent); break; } - case Node::iter: case Node::opt: { + case NodeType::iter: case NodeType::opt: { if (!DelNode(p->sub)) //prevent endless loop with some ill grammars PrintFirstPath(p->sub, tok, indent); break; @@ -1056,7 +1064,7 @@ int Tab::CheckAlts(Node *p) { int rc = 0; BitArray s0(terminals.Count), *s1, *s2; while (p != NULL) { - if (p->typ == Node::alt) { + if (p->typ == NodeType::alt) { Node *q = p; s0.SetAll(false); while (q != NULL) { // for all alternatives @@ -1078,7 +1086,7 @@ int Tab::CheckAlts(Node *p) { CheckAlts(q->sub); q = q->down; } - } else if (p->typ == Node::opt || p->typ == Node::iter) { + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { if (DelSubGraph(p->sub)) LL1Error(4, NULL); // e.g. [[...]] else { s1 = Expected0(p->sub, curSy); @@ -1098,7 +1106,7 @@ int Tab::CheckAlts(Node *p) { delete s1; delete s2; } CheckAlts(p->sub); - } else if (p->typ == Node::any) { + } else if (p->typ == NodeType::any) { if (Sets::Elements(p->set) == 0) LL1Error(3, NULL); // e.g. {ANY} ANY or [ANY] ANY or ( ANY | ANY ) } @@ -1128,7 +1136,7 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { while (p != NULL) { const Node *q; - if (p->typ == Node::alt) { + if (p->typ == NodeType::alt) { expected.SetAll(false); for (q = p; q != NULL; q = q->down) { BitArray *ba = Expected0(q->sub, curSy); @@ -1137,7 +1145,7 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { } soFar.SetAll(false); for (q = p; q != NULL; q = q->down) { - if (q->sub->typ == Node::rslv) { + if (q->sub->typ == NodeType::rslv) { BitArray *fs = Expected(q->sub->next, curSy); if (Sets::Intersect(fs, &soFar)) ResErr(q->sub, _SC("Warning: Resolver will never be evaluated. Place it at previous conflicting alternative.")); @@ -1151,8 +1159,8 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { } CheckRes(q->sub, true); } - } else if (p->typ == Node::iter || p->typ == Node::opt) { - if (p->sub->typ == Node::rslv) { + } else if (p->typ == NodeType::iter || p->typ == NodeType::opt) { + if (p->sub->typ == NodeType::rslv) { BitArray *fs = First(p->sub->next); BitArray *fsNext = Expected(p->next, curSy); bool bsi = Sets::Intersect(fs, fsNext); @@ -1161,7 +1169,7 @@ void Tab::CheckRes(const Node *p, bool rslvAllowed) { ResErr(p->sub, _SC("Warning: Misplaced resolver: no LL(1) conflict.")); } CheckRes(p->sub, true); - } else if (p->typ == Node::rslv) { + } else if (p->typ == NodeType::rslv) { if (!rslvAllowed) ResErr(p, _SC("Warning: Misplaced resolver: no alternative.")); } @@ -1199,12 +1207,12 @@ bool Tab::NtsComplete() { void Tab::MarkReachedNts(const Node *p) { while (p != NULL) { - if (p->typ == Node::nt && !((*visited)[p->sym->n])) { // new nt reached + if (p->typ == NodeType::nt && !((*visited)[p->sym->n])) { // new nt reached visited->Set(p->sym->n, true); MarkReachedNts(p->sym->graph); - } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { + } else if (p->typ == NodeType::alt || p->typ == NodeType::iter || p->typ == NodeType::opt) { MarkReachedNts(p->sub); - if (p->typ == Node::alt) MarkReachedNts(p->down); + if (p->typ == NodeType::alt) MarkReachedNts(p->down); } if (p->up) break; p = p->next; @@ -1232,8 +1240,8 @@ bool Tab::AllNtReached() { bool Tab::IsTerm(const Node *p, const BitArray *mark) { // true if graph can be derived to terminals while (p != NULL) { - if (p->typ == Node::nt && !((*mark)[p->sym->n])) return false; - if (p->typ == Node::alt && !IsTerm(p->sub, mark) + if (p->typ == NodeType::nt && !((*mark)[p->sym->n])) return false; + if (p->typ == NodeType::alt && !IsTerm(p->sub, mark) && (p->down == NULL || !IsTerm(p->down, mark))) return false; if (p->up) break; p = p->next; @@ -1287,7 +1295,7 @@ void Tab::XRef() { Node *n; for (i=0; ityp == Node::t || n->typ == Node::wt || n->typ == Node::nt) { + if (n->typ == NodeType::t || n->typ == NodeType::wt || n->typ == NodeType::nt) { TArrayList *list = (TArrayList*)(xref.Get(n->sym)); if (list == NULL) {list = new TArrayList(); xref.Set(n->sym, list);} list->Add(n->line); diff --git a/src/Tab.h b/src/Tab.h index 9d6eca5..a01c910 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -98,7 +98,7 @@ class Tab { static const char* tKind[]; - Symbol* NewSym(int typ, const wchar_t* name, int line, int col); + Symbol* NewSym(NodeType typ, const wchar_t* name, int line, int col); Symbol* FindSym(const wchar_t* name); int Num(const Node *p); void PrintSym(const Symbol *sym); @@ -109,9 +109,9 @@ class Tab { // Syntax graph management //--------------------------------------------------------------------- - Node* NewNode(int typ, Symbol *sym, int line, int col); - Node* NewNode(int typ, Node* sub); - Node* NewNode(int typ, int val, int line, int col); + Node* NewNode(NodeType typ, Symbol *sym, int line, int col); + Node* NewNode(NodeType typ, Node* sub); + Node* NewNode(NodeType typ, int val, int line, int col); void MakeFirstAlt(Graph *g); void MakeAlternative(Graph *g1, Graph *g2); void MakeSequence(Graph *g1, Graph *g2); @@ -237,7 +237,7 @@ class Tab { void SetDDT(const wchar_t* s); void SetOption(const wchar_t* s); private: - void MakeOptIter(Graph *g, int typ); + void MakeOptIter(Graph *g, NodeType typ); }; From 84e4333a34502dc279abb1d2c4081016a0029ce1 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 14 Jul 2022 10:20:12 +0200 Subject: [PATCH 93/95] Fix memory leak --- src/Coco.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Coco.cpp b/src/Coco.cpp index b8e5497..0226c9e 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -173,6 +173,7 @@ int main(int argc, char *argv_[]) { coco_string_delete(ddtString); coco_string_delete(chTrFileName); coco_string_delete(traceFileName); + coco_string_delete(outDir); return 0; } From cde75385e0f49acd2026c10ebf020f3d4132a26d Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 14 Jul 2022 10:25:04 +0200 Subject: [PATCH 94/95] Fix my mistake of calling "First" before testing, introduced here d608f8d5c5655c01d670cd5e2b88ebc82d940573 --- src/Tab.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Tab.cpp b/src/Tab.cpp index bd18b90..2d2eccc 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -639,9 +639,11 @@ void Tab::FindAS(const Node *p) { // find ANY sets if (p->typ == NodeType::opt || p->typ == NodeType::iter) { FindAS(p->sub); a = LeadingAny(p->sub); - BitArray *ba = First(p->next); - if (a != NULL) Sets::Subtract(a->set, ba); - delete ba; + if (a != NULL) { + BitArray *ba = First(p->next); + Sets::Subtract(a->set, ba); + delete ba; + } } else if (p->typ == NodeType::alt) { BitArray s1(terminals.Count); const Node *q = p; From cd146b6fb0ca37eace547468d0931c9bf16eb3de Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 27 Sep 2022 14:11:49 +0200 Subject: [PATCH 95/95] Fixes to build with https://github.com/jart/cosmopolitan --- src/DFA.cpp | 2 + src/NodeSymbolKind.h | 2 +- src/Parser.cpp | 2 +- src/ParserGen.cpp | 6 +- src/Tab.cpp | 25 +++---- src/mk-cocor-cpp-amalgamation.lua | 105 ++++++++++++++++++++++++++++++ src/mk-cocor.sh | 14 ++++ 7 files changed, 140 insertions(+), 16 deletions(-) create mode 100644 src/mk-cocor-cpp-amalgamation.lua create mode 100644 src/mk-cocor.sh diff --git a/src/DFA.cpp b/src/DFA.cpp index 4c593a9..1f79326 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -36,10 +36,12 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +#ifndef SZWC10 #define SZWC10 10 #define SZWC20 20 typedef wchar_t wchar_t_10[SZWC10+1]; typedef wchar_t wchar_t_20[SZWC20+1]; +#endif //---------- Output primitives static wchar_t* DFACh(int ch, wchar_t_10 &format, bool noWrapper=false) { diff --git a/src/NodeSymbolKind.h b/src/NodeSymbolKind.h index d1a7b71..e894052 100644 --- a/src/NodeSymbolKind.h +++ b/src/NodeSymbolKind.h @@ -25,7 +25,7 @@ enum NodeType { wt, // weak terminal symbol any, // eps, // empty - sync, // synchronization symbol + nt_sync, // synchronization symbol sem, // semantic action: (. .) alt, // alternative: | iter, // iteration: { } diff --git a/src/Parser.cpp b/src/Parser.cpp index 03a58cb..4d04692 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -891,7 +891,7 @@ void Parser::Factor_NT(Graph* &g) { #ifdef PARSER_WITH_AST AstAddTerminal(); #endif - Node *p = tab->NewNode(NodeType::sync, (Symbol*)NULL, t->line, t->col); + Node *p = tab->NewNode(NodeType::nt_sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); break; diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index 7061da0..c184900 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -249,7 +249,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { } if (p->typ == NodeType::rslv) { // nothing } if (p->typ == NodeType::sem) { CopySourcePart(p->pos, indent); - } if (p->typ == NodeType::sync) { + } if (p->typ == NodeType::nt_sync) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); @@ -324,7 +324,7 @@ void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { Indent(indent); fputws(_SC("}\n"), gen); delete s1; } - if (p->typ != NodeType::eps && p->typ != NodeType::sem && p->typ != NodeType::sync) + if (p->typ != NodeType::eps && p->typ != NodeType::sem && p->typ != NodeType::nt_sync) isChecked->SetAll(false); // = new BitArray(Symbol.terminals.Count); if (p->up) break; p = p->next; @@ -494,7 +494,7 @@ int ParserGen::GenCodeRREBNF (const Node *p, int depth) { case NodeType::sem: { break; } - case NodeType::sync: { + case NodeType::nt_sync: { break; } case NodeType::alt: { diff --git a/src/Tab.cpp b/src/Tab.cpp index 2d2eccc..6c9eac5 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -335,7 +335,7 @@ bool Tab::DelNode(const Node* p) { } else { return (p->typ == NodeType::iter && p->rmin == 0) || p->typ == NodeType::opt || p->typ == NodeType::sem - || p->typ == NodeType::eps || p->typ == NodeType::rslv || p->typ == NodeType::sync; + || p->typ == NodeType::eps || p->typ == NodeType::rslv || p->typ == NodeType::nt_sync; } } @@ -347,14 +347,17 @@ int Tab::Ptr(const Node *p, bool up) { else return p->n; } -static const size_t wchar_t_10_sz = 10; -typedef wchar_t wchar_t_10[wchar_t_10_sz]; +#ifndef SZWC10 +#define SZWC10 10 +#define SZWC20 20 +typedef wchar_t wchar_t_10[SZWC10+1]; +#endif static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { - coco_swprintf(format, wchar_t_10_sz, _SC(" ")); + coco_swprintf(format, SZWC10, _SC(" ")); } else { - coco_swprintf(format, wchar_t_10_sz, _SC("%5d"), pos->beg); + coco_swprintf(format, SZWC10, _SC("%5d"), pos->beg); } return format; } @@ -396,7 +399,7 @@ void Tab::PrintNodes() { case NodeType::sem: fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); break; - case NodeType::eps: case NodeType::any: case NodeType::sync: + case NodeType::eps: case NodeType::any: case NodeType::nt_sync: fwprintf(trace, _SC(" ")); break; } @@ -450,10 +453,10 @@ CharSet* Tab::CharClassSet(int i) { wchar_t* TabCh(const int ch, wchar_t_10 &format) { if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) { - coco_swprintf(format, wchar_t_10_sz, _SC("%d"), ch); + coco_swprintf(format, SZWC10, _SC("%d"), ch); return format; } else { - coco_swprintf(format, wchar_t_10_sz, _SC("'%") _CHFMT _SC("'"), ch); + coco_swprintf(format, SZWC10, _SC("'%") _CHFMT _SC("'"), ch); return format; } } @@ -707,7 +710,7 @@ BitArray* Tab::Expected0(const Node *p, const Symbol *curSy) { void Tab::CompSync(Node *p) { while (p != NULL && !(visited->Get(p->n))) { visited->Set(p->n, true); - if (p->typ == NodeType::sync) { + if (p->typ == NodeType::nt_sync) { BitArray *s = Expected(p->next, curSy); s->Set(eofSy->n, true); allSyncSets->Or(s); @@ -805,7 +808,7 @@ void Tab::CompSymbolSets() { Node *p; for (int i=0; ityp == NodeType::any || p->typ == NodeType::sync) { + if (p->typ == NodeType::any || p->typ == NodeType::nt_sync) { fwprintf(trace, _SC("Node: %4d %4s: Line: %4d\n"), p->n, nTyp[p->typ], p->line); fwprintf(trace, _SC(" ")); PrintSet(p->set, 10); } @@ -833,7 +836,7 @@ int Tab::Hex2Char(const wchar_t* s, int len) { } static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { - coco_swprintf(format, wchar_t_10_sz, _SC("\\0x%04x"), ch); + coco_swprintf(format, SZWC10, _SC("\\0x%04x"), ch); return format; } diff --git a/src/mk-cocor-cpp-amalgamation.lua b/src/mk-cocor-cpp-amalgamation.lua new file mode 100644 index 0000000..acbfec8 --- /dev/null +++ b/src/mk-cocor-cpp-amalgamation.lua @@ -0,0 +1,105 @@ +local base_dir = "/home/mingo/dev/c/A_grammars/CocoR-CPP/src/"; +local includes_base = {} +local sq_sources = [==[ +Action.cpp +BitArray.cpp +CharClass.cpp +CharSet.cpp +Comment.cpp +DFA.cpp +Generator.cpp +HashTable.cpp +Melted.cpp +Node.cpp +Parser.cpp +ParserGen.cpp +Position.cpp +Scanner.cpp +SortedList.cpp +State.cpp +StringBuilder.cpp +Symbol.cpp +Tab.cpp +Target.cpp +Coco.cpp +]==]; + +local included = {}; +local inc_sys = {}; +local inc_sys_count = 0; +local out = io.stdout + +function CopyWithInline(prefix, filename) + if included[filename] then return end + included[filename] = true + print('//--Start of', filename); + --if(filename:match("luac?.c")) + local inp = io.open(prefix .. filename, "r") + if not inp then + for idx in ipairs(includes_base) do + local sdir = includes_base[idx] + local fn = prefix .. sdir .. filename + --print(fn) + inp = io.open(fn, "r") + if inp then break end + end + end + if not inp then + if filename == "fzn_picat_sat_bc.h" then + print('//--End of', filename); + end + else + assert(inp) + for line in inp:lines() do + if line:match('#define LUA_USE_READLINE') then + out:write("//" .. line .. "\n") + else + local inc = line:match('#include%s+(["<].-)[">]') + if inc then + out:write("//" .. line .. "\n") + if inc:sub(1,1) == '"' or inc:match('[<"]sq') then + CopyWithInline(prefix, inc:sub(2)) + else + local fn = inc:sub(2) + if inc_sys[fn] == null then + inc_sys_count = inc_sys_count +1 + inc_sys[fn] = inc_sys_count + end + end + else + out:write(line .. "\n") + end + end + end + print('//--End of', filename); + end +end + +print([==[ +#ifdef WITH_COSMOPOLITAN + +STATIC_STACK_SIZE(0x400000); + +#endif + +#ifndef __COSMOPOLITAN__ +//g++ -g -Wall -Wextra -DWITHOUT_WCHAR -fno-rtti -fno-exceptions cocor-am.cpp -o Coco +#include //3 +#include //7 +#include //5 +#include //1 +#include //10 +#include //9 +#include //4 +#include //8 +#include //2 +//#include //6 + +#endif +]==]) + +local prefix = base_dir; local src_files = sq_sources; +for filename in src_files:gmatch('([^\n]+)') do + CopyWithInline(prefix, filename); +end +--for k, v in pairs(inc_sys) do print("#include <" .. k .. "> //" .. v ) end diff --git a/src/mk-cocor.sh b/src/mk-cocor.sh new file mode 100644 index 0000000..bdd7a3e --- /dev/null +++ b/src/mk-cocor.sh @@ -0,0 +1,14 @@ +# run gcc compiler in freestanding mode +optim=-O2 +g++ -g $optim -static -fno-pie -nostdlib -nostdinc \ + -fno-omit-frame-pointer -pg -mnop-mcount -mno-tls-direct-seg-refs \ + -fno-exceptions -fno-rtti -Wall -fno-strict-aliasing \ + -o cocor.com.dbg cocor-am.cpp \ + -DWITHOUT_WCHAR \ + -DWITH_COSMOPOLITAN \ + -Wl,--gc-sections -fuse-ld=bfd \ + -Wl,-T,ape.lds crt.o ape-no-modify-self.o cosmopolitan.a \ + -include cosmopolitan.h + +objcopy -S -O binary cocor.com.dbg cocor.com +