diff --git a/README.md b/README.md index 3451280..47e426e 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,29 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Fix all known memory leaks + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) +- Add the `_NT` suffix to non terminal functions to prevent name collision +- Add token inheritance from https://github.com/Lercher/CocoR + +See also https://github.com/mingodad/CocoR-Java and https://github.com/mingodad/CocoR-CSharp diff --git a/examples/bison.atg b/examples/bison.atg new file mode 100644 index 0000000..ffcf71e --- /dev/null +++ b/examples/bison.atg @@ -0,0 +1,346 @@ +$namespace=CocoBison + +COMPILER Bison + +TERMINALS + T_SYMBOL + +CHARACTERS + letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_". + digit = "0123456789". + cr = '\r'. + lf = '\n'. + tab = '\t'. + ff = '\f'. + stringCh = ANY - '"' - '\\' - cr - lf. + charCh = ANY - '\'' - '\\' - cr - lf. + printable = '\u0020' .. '\u007e'. + hex = "0123456789abcdef". + +TOKENS + ID = (letter | '.') { letter | digit | '.' | '-'}. + INT_LITERAL = digit { digit }. + STRING = '"' { stringCh | '\\' printable } '"'. + badString = '"' { stringCh | '\\' printable } (cr | lf). + CHAR_LITERAL = '\'' ( charCh | '\\' printable { hex } ) '\''. + + PERCENT_TOKEN = "%token". + PERCENT_NTERM = "%nterm". + + PERCENT_TYPE = "%type". + PERCENT_DESTRUCTOR = "%destructor". + PERCENT_PRINTER = "%printer". + + PERCENT_LEFT = "%left". + PERCENT_RIGHT = "%right". + PERCENT_NONASSOC = "%nonassoc". + PERCENT_PRECEDENCE = "%precedence". + + PERCENT_PREC = "%prec". + PERCENT_DPREC = "%dprec". + PERCENT_MERGE = "%merge". + + PERCENT_CODE = "%code". + PERCENT_DEFAULT_PREC = "%default-prec". + PERCENT_DEFINE = "%define". + PERCENT_DEFINES = "%defines". + PERCENT_ERROR_VERBOSE = "%error-verbose". + PERCENT_EXPECT = "%expect". + PERCENT_EXPECT_RR = "%expect-rr". + PERCENT_FLAG = "%". + PERCENT_FILE_PREFIX = "%file-prefix". + PERCENT_GLR_PARSER = "%glr-parser". + PERCENT_INITIAL_ACTION = "%initial-action". + PERCENT_LANGUAGE = "%language". + PERCENT_NAME_PREFIX = "%name-prefix". + PERCENT_NO_DEFAULT_PREC = "%no-default-prec". + PERCENT_NO_LINES = "%no-lines". + PERCENT_NONDETERMINISTIC_PARSER = "%nondeterministic-parser". + PERCENT_OUTPUT = "%output". + PERCENT_PURE_PARSER = "%pure-parser". + PERCENT_REQUIRE = "%require". + PERCENT_SKELETON = "%skeleton". + PERCENT_START = "%start". + PERCENT_TOKEN_TABLE = "%token-table". + PERCENT_VERBOSE = "%verbose". + PERCENT_YACC = "%yacc". + + //BRACED_CODE = "{...}". + //BRACED_PREDICATE = "%?{...}". + //BRACKETED_ID = "[identifier]". + //CHAR_LITERAL = "character literal". + COLON = ":". + EPILOGUE = "epilogue". + EQUAL = "=". + //ID = "identifier". + //ID_COLON "identifier:". + PERCENT_PERCENT = "%%". + PIPE = "|". + PROLOGUE = "%{...%}". + SEMICOLON = ";". + //TAG = "". + //TAG_ANY = "<*>". + //TAG_NONE = "<>". + LEFT_BRACE = '{'. + RIGHT_BRACE = '}'. + LEFT_ANGLE_BRACK = '<'. + RIGHT_ANGLE_BRACK = '>'. + +PRAGMAS + +COMMENTS FROM "/*" TO "*/" NESTED +COMMENTS FROM "//" TO lf + +IGNORE cr + lf + tab + ff + +/*-------------------------------------------------------------------------*/ + +PRODUCTIONS + +Bison = + prologue_declarations "%%" grammar [epilogue] + EOF + . + +prologue_declarations = + prologue_declaration {prologue_declaration} + . + +prologue_declaration = + grammar_declaration + | "%{" {ANY} "%}" + | "%" + | "%define" variable [value] + | "%defines" [STRING] + | "%error-verbose" + | "%expect" INT_LITERAL + | "%expect-rr" INT_LITERAL + | "%file-prefix" STRING + | "%glr-parser" + | "%pure_parser" + | "%initial-action" params + | "%language" STRING + | "%name" ID + | "%name-prefix" ['='] STRING + | "%no-lines" + | "%nondeterministic-parser" + | "%output" STRING + | ("%param" | "%lex-param" | "%parse-param") params + | "%pure-parser" + | "%require" STRING + | "%skeleton" STRING + | "%token-table" + | "%verbose" + | "%yacc" + //| "%include-enum" STRING ID + | "%debug" + | "%locations" + //| error ";" + | /*FIXME: Err? What is this horror doing here? */ ";" + //| "BISONPRE_VERSION" '(' ANY {ANY} ')' + . + +params = + '{' (. // manage nested braces + if(la->kind != _RIGHT_BRACE) { + //print("==", la->line, la->kind, la->val); + for (int nested = 1; nested > 0;) { + //print("==1", la->line, la->kind, la->val, nested); + //print("==", la->line, nested, la->kind, la->val); + if(la->kind == _LEFT_BRACE) ++nested; + Get(); + if(la->kind == _RIGHT_BRACE) --nested; + else if(la->kind == _EOF) break; + //print("==2", la->line, la->kind, la->val, nested); + } + } + .) + {ANY} '}' + . + +grammar_declaration = + symbol_declaration + | "%union" [union_name] params + | "%start" symbol + | code_props_type params generic_symlist + | "%default-prec" + | "%no-default-prec" + | "%code" [ID] params + . + +code_props_type = + "%destructor" + | "%printer" + . + +generic_symlist = + generic_symlist_item {generic_symlist_item} + . + +generic_symlist_item = + symbol + | tag + . + +union_name = + ID | tag + . + +symbol_declaration = + "%nterm" nterm_decls + | "%token" token_decls + | "%term" symbol_decls + | "%type" symbol_decls + | precedence_declarator token_decls_for_prec + . + +nterm_decls = + token_decls + . + +token_decls = + [tag] token_decl_1 {token_decl_1} + . + +token_decl_1 = + token_decl + . + +token_decl = + id [int_opt] [alias] + . + +int_opt = + INT_LITERAL + . + +alias = + string_as_id + | "_(" STRING ')' //TSTRING + . + +symbol_decls = + [tag] symbol_decl_1 {symbol_decl_1} + . + +symbol_decl_1 = + symbol + . + +precedence_declarator = + "%left" + | "%right" + | "%nonassoc" + | "%precedence" + | "%binary" + . + +token_decls_for_prec = + [tag] token_decl_for_prec_1 {token_decl_for_prec_1} + . + +// One or more token declarations for precedence declaration. +token_decl_for_prec_1 = + token_decl_for_prec + . + +token_decl_for_prec = + id [int_opt] + | string_as_id + . + +grammar = + rules_or_grammar_declaration {rules_or_grammar_declaration} + . + +rules_or_grammar_declaration = + rules + | grammar_declaration ";" + //| error ";" + . + +rules = + id_colon (. printf("%s ::= ", t->val); .) + [named_ref_opt | tag ] ":" rhses_1 (. printf("\n"); .) + . + +rhses_1 = + rhs { + '|' (. printf("| "); .) + rhs + } ';' + . + +rhs = + /*empty*/ (. printf("/*empty*/ "); .) + | "%empty" [params] + | rhs_symbol {rhs_symbol} + . + +rhs_symbol = + symbol (. printf("%s ", t->val); .) [named_ref_opt | tag] + | params + //| [tag] params //named_ref_opt + | "%?{" {ANY} '}' + | "%prec" symbol + | "%dprec" INT_LITERAL + | "%merge" tag + | "%expect" INT_LITERAL + | "%expect-rr" INT_LITERAL + . + +named_ref_opt = + '[' ID ']' //BRACKETED_ID + . + +epilogue = + "%%" {ANY} + . + +variable = + ID + . + +value = + ID + | STRING + | params + | INT_LITERAL + . + +id = + ID + | CHAR_LITERAL + . + +id_colon = + ID //':' + . + + +symbol = + id + | string_as_id + . + +string_as_id = + STRING + . + +tag = + '<' (. // manage nested angle brackets + if(la->kind != _RIGHT_ANGLE_BRACK) { + for (int nested = 1; nested > 0;) { + //print("==", la->line, nested, la->kind, la->val); + if(la->kind == _LEFT_ANGLE_BRACK) ++nested; + Get(); + if(la->kind == _RIGHT_ANGLE_BRACK) --nested; + else if(la->kind == _EOF) break; + } + } + .) + {ANY} '>' + . + + +END Bison. diff --git a/examples/build-cocobison.sh b/examples/build-cocobison.sh new file mode 100755 index 0000000..360c632 --- /dev/null +++ b/examples/build-cocobison.sh @@ -0,0 +1,4 @@ +../src/Coco -frames ../src bison.atg +g++ -g -Wall -o cocobison Parser.cpp Scanner.cpp cocobison.cpp +#./cocobison "postgresql-13.3/src/backend/parser/gram.y" + diff --git a/examples/cocobison.cpp b/examples/cocobison.cpp new file mode 100644 index 0000000..b7533b2 --- /dev/null +++ b/examples/cocobison.cpp @@ -0,0 +1,28 @@ +#include "Scanner.h" +#include "Parser.h" + +using namespace CocoBison; + +int main (int argc, char *argv[]) { + + if (argc == 2) { + wchar_t *fileName = coco_string_create(argv[1]); + CocoBison::Scanner scanner(fileName); + CocoBison::Parser parser(&scanner); + parser.Parse(); + if(parser.errors->count == 0) { +#ifdef PARSER_WITH_AST + if(parser.ast_root) { + parser.ast_root->dump_all(); + //parser.ast_root->dump_pruned(); + } +#endif + } + + coco_string_delete(fileName); + } else + wprintf(_SC("-- No source file specified\n")); + + return 0; + +} diff --git a/examples/readme-cocobison.txt b/examples/readme-cocobison.txt new file mode 100644 index 0000000..6b79e16 --- /dev/null +++ b/examples/readme-cocobison.txt @@ -0,0 +1,5 @@ +This example uses a bison grammar to generate an EBNF output (understood by https://www.bottlecaps.de/rr/ui) from an input bison parser file description. + +Example: + +./cocobison postgresql-13.3/src/backend/parser/gram.y diff --git a/src/Action.cpp b/src/Action.cpp index d6857b3..23f24dd 100644 --- a/src/Action.cpp +++ b/src/Action.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -39,28 +39,34 @@ Action::Action(int typ, int sym, int tc) { this->typ = typ; this->sym = sym; this->tc = tc; } -void Action::AddTarget(Target *t) { // add t to the action.targets +Action::~Action() { + delete this->target; + delete this->next; +} + +bool Action::AddTarget(State *state) { // add t to the action.targets Target *last = NULL; Target *p = target; - while (p != NULL && t->state->nr >= p->state->nr) { - if (t->state == p->state) return; + while (p != NULL && state->nr >= p->state->nr) { + if (state == p->state) return false; last = p; p = p->next; } + Target *t = new Target(state); t->next = p; if (p == target) target = t; else last->next = t; + return true; } void Action::AddTargets(Action *a) {// add copy of a.targets to action.targets for (Target *p = a->target; p != NULL; p = p->next) { - Target *t = new Target(p->state); - AddTarget(t); + AddTarget(p->state); } - if (a->tc == Node::contextTrans) tc = Node::contextTrans; + if (a->tc == TransitionCode::contextTrans) tc = TransitionCode::contextTrans; } CharSet* Action::Symbols(Tab *tab) { CharSet *s; - if (typ == Node::clas) + if (typ == NodeType::clas) s = tab->CharClassSet(sym)->Clone(); else { s = new CharSet(); s->Set(sym); @@ -68,14 +74,19 @@ CharSet* Action::Symbols(Tab *tab) { return s; } -void Action::ShiftWith(CharSet *s, Tab *tab) { +bool Action::ShiftWith(CharSet *s, Tab *tab) { //return true if it used the CharSet *s + bool rc = false; if (s->Elements() == 1) { - typ = Node::chr; sym = s->First(); + typ = NodeType::chr; sym = s->First(); } else { CharClass *c = tab->FindCharClass(s); - if (c == NULL) c = tab->NewCharClass(L"#", s); // class with dummy name - typ = Node::clas; sym = c->n; + if (c == NULL) { + c = tab->NewCharClass(_SC("#"), s); // class with dummy name + rc = true; + } + typ = NodeType::clas; sym = c->n; } + return rc; } }; // namespace diff --git a/src/Action.h b/src/Action.h index 4148b63..f929fbd 100644 --- a/src/Action.h +++ b/src/Action.h @@ -47,10 +47,11 @@ class Action // action of finite automaton Action *next; Action(int typ, int sym, int tc); - void AddTarget(Target *t); // add t to the action.targets + ~Action(); + bool AddTarget(State *state); // add t to the action.targets void AddTargets(Action *a); // add copy of a.targets to action.targets CharSet* Symbols(Tab *tab); - void ShiftWith(CharSet *s, Tab *tab); + bool ShiftWith(CharSet *s, Tab *tab); //return true if it used the CharSet *s }; }; // namespace diff --git a/src/ArrayList.cpp b/src/ArrayList.cpp deleted file mode 100644 index 0f50ddf..0000000 --- a/src/ArrayList.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/*------------------------------------------------------------------------- -Compiler Generator Coco/R, -Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz -extended by M. Loeberbauer & A. Woess, Univ. of Linz -ported to C++ by Csaba Balazs, University of Szeged -with improvements by Pat Terry, Rhodes University - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -As an exception, it is allowed to write an extension of Coco/R that is -used as a plugin in non-free software. - -If not otherwise stated, any source code generated by Coco/R (other than -Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ - -#include -#include "ArrayList.h" - -namespace Coco { - -ArrayList::ArrayList() { - Count = 0; - Capacity = 10; - Data = new void*[ Capacity ]; -} - -ArrayList::~ArrayList() { - delete [] Data; -} - -void ArrayList::Add(void *value) { - if (Count < Capacity) { - Data[Count] = value; - Count++; - } else { - Capacity *= 2; - void** newData = new void*[Capacity]; - for (int i=0; i>3) + BitArray::BitArray(const int length, const bool defaultValue) { Count = length; - Data = new unsigned char[ (length+7)>>3 ]; + unsigned int size = CALC_BIT_BYTES(length); + Data = new unsigned char[ size ]; if (defaultValue) - memset(Data, 0xFF, (length+7)>>3); + memset(Data, 0xFF, size); else - memset(Data, 0x00, (length+7)>>3); + memset(Data, 0x00, size); } BitArray::BitArray(const BitArray ©) { Count = copy.Count; - Data = new unsigned char[ (copy.Count+7)>>3 ]; - memcpy(Data, copy.Data, (copy.Count+7)>>3); + unsigned int size = CALC_BIT_BYTES(copy.Count); + Data = new unsigned char[ size ]; + memcpy(Data, copy.Data, size); } BitArray::~BitArray() @@ -54,7 +58,7 @@ BitArray::~BitArray() Data = NULL; } -int BitArray::getCount() { +int BitArray::getCount() const { return Count; } @@ -76,37 +80,41 @@ void BitArray::Set(const int index, const bool value) void BitArray::SetAll(const bool value) { + unsigned int size = CALC_BIT_BYTES(Count); if (value) - memset(Data, 0xFF, (Count+7)>>3); + memset(Data, 0xFF, size); else - memset(Data, 0x00, (Count+7)>>3); + memset(Data, 0x00, size); } void BitArray::Not() { - for (int i=0; i<(Count+7)>>3; i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count); i>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } void BitArray::Or(const BitArray *value) { - for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } void BitArray::Xor(const BitArray *value) { - for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { + for (int i=0, imax=CALC_BIT_BYTES(Count), vmax=CALC_BIT_BYTES(value->Count); + (iData[i]); } } @@ -115,7 +123,7 @@ BitArray* BitArray::Clone() const { BitArray *newBitArray = new BitArray(Count); newBitArray->Count = Count; - memcpy(newBitArray->Data, Data, (Count+7)>>3); + memcpy(newBitArray->Data, Data, CALC_BIT_BYTES(Count)); return newBitArray; } @@ -147,8 +155,9 @@ const BitArray &BitArray::operator=(const BitArray &right) if ( &right != this ) { // avoid self assignment delete [] Data; // prevents memory leak Count = right.Count; - Data = new unsigned char[ (Count+7)>>3 ]; - memcpy(Data, right.Data, (Count+7)>>3); + unsigned int size = CALC_BIT_BYTES(Count); + Data = new unsigned char[ size ]; + memcpy(Data, right.Data, size); } return *this; // enables cascaded assignments } diff --git a/src/BitArray.h b/src/BitArray.h index 31d0617..3694540 100644 --- a/src/BitArray.h +++ b/src/BitArray.h @@ -38,7 +38,7 @@ class BitArray BitArray(const BitArray © ); virtual ~BitArray(); - int getCount(); + int getCount() const; bool Get(const int index) const; void Set(const int index, const bool value); diff --git a/src/CharClass.cpp b/src/CharClass.cpp index deca847..80fae96 100644 --- a/src/CharClass.cpp +++ b/src/CharClass.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "CharClass.h" -#include "Scanner.h" namespace Coco { @@ -37,6 +36,7 @@ CharClass::CharClass(const wchar_t* name, CharSet *s) { CharClass::~CharClass() { coco_string_delete(name); + delete this->set; } }; // namespace diff --git a/src/CharSet.cpp b/src/CharSet.cpp index 04267eb..def632e 100644 --- a/src/CharSet.cpp +++ b/src/CharSet.cpp @@ -29,9 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include -#include #include "CharSet.h" -#include "Scanner.h" namespace Coco { @@ -76,7 +74,7 @@ CharSet* CharSet::Clone() const { return s; } -bool CharSet::Equals(CharSet *s) const { +bool CharSet::Equals(const CharSet *s) const { Range *p = head, *q = s->head; while (p != NULL && q != NULL) { if (p->from != q->from || p->to != q->to) return false; @@ -96,49 +94,47 @@ int CharSet::First() const { return -1; } -void CharSet::Or(CharSet *s) { +void CharSet::Or(const CharSet *s) { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) Set(i); } -void CharSet::And(CharSet *s) { - CharSet *x = new CharSet(); +void CharSet::And(const CharSet *s) { + CharSet x; Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) - if (s->Get(i)) x->Set(i); + if (s->Get(i)) x.Set(i); Range *del = p; p = p->next; delete del; } - head = x->head; - x->head = NULL; - delete x; + head = x.head; + x.head = NULL; } -void CharSet::Subtract(CharSet *s) { - CharSet *x = new CharSet(); +void CharSet::Subtract(const CharSet *s) { + CharSet x; Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) - if (!s->Get(i)) x->Set(i); + if (!s->Get(i)) x.Set(i); Range *del = p; p = p->next; delete del; } - head = x->head; - x->head = NULL; - delete x; + head = x.head; + x.head = NULL; } -bool CharSet::Includes(CharSet *s) const { +bool CharSet::Includes(const CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (!Get(i)) return false; return true; } -bool CharSet::Intersects(CharSet *s) const { +bool CharSet::Intersects(const CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (Get(i)) return true; diff --git a/src/CharSet.h b/src/CharSet.h index 4164d2d..0d54f10 100644 --- a/src/CharSet.h +++ b/src/CharSet.h @@ -30,6 +30,7 @@ Coco/R itself) does not fall under the GNU General Public License. #define COCO_CHARSET_H__ #include +#include "Scanner.h" namespace Coco { @@ -51,14 +52,14 @@ class CharSet { bool Get(int i) const; void Set(int i); CharSet* Clone() const; - bool Equals(CharSet *s) const; + bool Equals(const CharSet *s) const; int Elements() const; int First() const; - void Or(CharSet *s); - void And(CharSet *s); - void Subtract(CharSet *s); - bool Includes(CharSet *s) const; - bool Intersects(CharSet *s) const; + void Or(const CharSet *s); + void And(const CharSet *s); + void Subtract(const CharSet *s); + bool Includes(const CharSet *s) const; + bool Intersects(const CharSet *s) const; void Clear(); void Fill(); }; diff --git a/src/Coco.atg b/src/Coco.atg index b63b9a2..13da0e0 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -6,24 +6,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -35,18 +35,19 @@ $namespace=Coco #include "Tab.h" #include "DFA.h" #include "ParserGen.h" +#define COCO_FRAME_PARSER COMPILER Coco - int id; - int str; + NodeType id; + NodeType str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; DFA *dfa; ParserGen *pgen; - bool genScanner; + bool genScanner, ignoreGammarErrors; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens @@ -56,10 +57,11 @@ COMPILER Coco tab = NULL; dfa = NULL; pgen = NULL; - id = 0; - str = 1; + id = NodeType::id; + str = NodeType::t; tokenString = NULL; - noString = coco_string_create(L"-none-"); + noString = coco_string_create(_SC("-none-")); + ignoreGammarErrors = false; } // Uncomment this method if cleanup is necessary, @@ -78,7 +80,7 @@ CHARACTERS tab = '\t'. stringCh = ANY - '"' - '\\' - cr - lf. charCh = ANY - '\'' - '\\' - cr - lf. - printable = '\u0020' .. '\u007e'. + printable = '\u0020' .. '\u007e'. hex = "0123456789abcdef". TOKENS @@ -115,82 +117,101 @@ Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gra pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line); } .) - - "COMPILER" (. genScanner = true; - tab->ignored = new CharSet(); .) + + "COMPILER" (. genScanner = true; + tab->ignored = new CharSet(); .) ident (. gramName = coco_string_create(t->val); beg = la->pos; line = la->line; .) { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ + [ "TERMINALS" { ident (. sym = tab->FindSym(t->val); + if (sym != NULL) SemErr(_SC("name declared twice")); + else { + sym = tab->NewSym(NodeType::t, t->val, t->line, t->col); + sym->tokenKind = Symbol::fixedToken; + }.) + } ] /*from cocoxml*/ [ "CHARACTERS" { SetDecl }] - [ "TOKENS" { TokenDecl }] - [ "PRAGMAS" { TokenDecl }] + [ "TOKENS" { TokenDecl }] + [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) - "FROM" TokenExpr + "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) - ] (. dfa->NewComment(g1->l, g2->l, nested); .) + ] (. dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; .) } - { "IGNORE" Set (. tab->ignored->Or(s); .) + { "IGNORE" Set (. tab->ignored->Or(s); delete s; .) } - SYNC + SYNC "PRODUCTIONS" (. if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); .) { ident (. sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); + if (undef) sym = tab->NewSym(NodeType::nt, t->val, t->line, t->col); else { - if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(L"name declared twice"); - } else SemErr(L"this symbol kind not allowed on left side of production"); - sym->line = t->line; + if (sym->typ == NodeType::nt) { + if (sym->graph != NULL) SemErr(_SC("name declared twice")); + } else SemErr(_SC("this symbol kind not allowed on left side of production")); + sym->line = t->line; + sym->col = t->col; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; .) [ AttrDecl ] (. if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); .) [ SemText<.sym->semPos.> ] WEAK '=' Expression (. sym->graph = g->l; tab->Finish(g); + delete g; .) WEAK '.' } "END" ident (. if (!coco_string_equal(gramName, t->val)) - SemErr(L"name does not match grammar name"); + SemErr(_SC("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); + coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(L"missing production for grammar name"); + SemErr(_SC("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(L"grammar symbol must not have attributes"); + SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number + tab->noSym = tab->NewSym(NodeType::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors->count == 0) { - wprintf(L"checking\n"); + wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); - if (tab->GrammarOk()) { - wprintf(L"parser"); + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab->GrammarCheckAll(); + } + else doGenCode = tab->GrammarOk(); + if(tab->genRREBNF && doGenCode) { + pgen->WriteRREBNF(); + } + if (doGenCode) { + wprintf(_SC("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(L" + scanner"); + wprintf(_SC(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(L" generated\n"); + wprintf(_SC(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } @@ -205,10 +226,11 @@ SetDecl (. CharSet *s; .) = ident (. wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(L"name declared twice"); + if (c != NULL) SemErr(_SC("name declared twice")); .) - '=' Set (. if (s->Elements() == 0) SemErr(L"character set must not be empty"); + '=' Set (. if (s->Elements() == 0) SemErr(_SC("character set must not be empty")); tab->NewCharClass(name, s); + coco_string_delete(name); .) '.' . @@ -218,8 +240,8 @@ SetDecl (. CharSet *s; .) Set (. CharSet *s2; .) = SimSet - { '+' SimSet (. s->Or(s2); .) - | '-' SimSet (. s->Subtract(s2); .) + { '+' SimSet (. s->Or(s2); delete s2; .) + | '-' SimSet (. s->Subtract(s2); delete s2; .) } . @@ -228,27 +250,27 @@ Set (. CharSet *s2; .) SimSet (. int n1, n2; .) = (. s = new CharSet(); .) ( ident (. CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); + if (c == NULL) SemErr(_SC("undefined name")); else s->Or(c->set); .) | string (. - wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - wchar_t *name = tab->Unescape(subName2); - coco_string_delete(subName2); - wchar_t ch; - int len = coco_string_length(name); - for(int i=0; i < len; i++) { - ch = name[i]; - if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() - } - s->Set(ch); - } - coco_string_delete(name); - .) + wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + wchar_t *name = tab->Unescape(subName2); + coco_string_delete(subName2); + wchar_t ch; + int len = coco_string_length(name); + for(int i=0; i < len; i++) { + ch = name[i]; + if (dfa->ignoreCase) { + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) ch = ch - (_SC('A') - _SC('a')); // ch.ToLower() + } + s->Set(ch); + } + coco_string_delete(name); + .) | Char (. s->Set(n1); .) [ ".." Char (. for (int i = n1; i <= n2; i++) s->Set(i); .) ] -| "ANY" (. s = new CharSet(); s->Fill(); .) +| "ANY" (. delete s; s = new CharSet(); s->Fill(); .) ) . @@ -257,47 +279,57 @@ SimSet (. int n1, n2; .) Char = char (. n = 0; - wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - wchar_t* name = tab->Unescape(subName); - coco_string_delete(subName); - - // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ - if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(L"unacceptable character value"); - coco_string_delete(name); - if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; + wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + wchar_t* name = tab->Unescape(subName); + coco_string_delete(subName); + + // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ + if (coco_string_length(name) <= 1) n = name[0]; + else SemErr(_SC("unacceptable character value")); + coco_string_delete(name); + if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) . /*------------------------------------------------------------------------------------*/ -TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) +TokenDecl (. wchar_t* name = NULL; NodeType kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; .) = Sym (. sym = tab->FindSym(name); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(_SC("name declared twice")); else { - sym = tab->NewSym(typ, name, t->line); + sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } - tokenString = NULL; + coco_string_delete(name); + coco_string_delete(tokenString); .) + [ ':' Sym + (. inheritsSym = tab->FindSym(name); + if (inheritsSym == NULL) SemErr(_SC("token can't inherit from unddeclared name")); + else if (inheritsSym == sym) SemErr(_SC("token can not inherit from itself")); + else if (inheritsSym->typ != typ) SemErr(_SC("token can't inherit from different token type")); + else sym->inherits = inheritsSym; + .) + ] SYNC - ( '=' TokenExpr '.' (. if (kind == str) SemErr(L"a literal must not be declared with a structure"); + ( '=' TokenExpr '.' (. if (kind == str) SemErr(_SC("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string - if ((*(tab->literals))[tokenString] != NULL) - SemErr(L"token string declared twice"); - tab->literals->Set(tokenString, sym); + if (tab->literals[tokenString] != NULL) + SemErr(_SC("token string declared twice")); + tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } + delete g; .) | (. if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); .) ) - [ SemText<.sym->semPos.> (. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) + [ SemText<.sym->semPos.> (. if (typ == NodeType::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); .) //(. if (typ != NodeType::pr) SemErr(_SC("semantic action not allowed here")); .) ] . @@ -307,13 +339,13 @@ AttrDecl = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } '>' (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY - | badString (. SemErr(L"bad string in attributes"); .) + | badString (. SemErr(_SC("bad string in attributes")); .) } ".>" (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) @@ -322,12 +354,12 @@ AttrDecl /*------------------------------------------------------------------------------------*/ Expression (. Graph *g2; .) -= += Term (. bool first = true; .) { WEAK '|' Term (. if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; .) } . @@ -336,21 +368,21 @@ Expression (. Graph *g2; .) Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) = -( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); .) +( [ (. rslv = tab->NewNode(NodeType::rslv, (Symbol*)NULL, la->line, la->col); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] - Factor (. if (rslv != NULL) tab->MakeSequence(g, g2); + Factor (. if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; .) - { Factor (. tab->MakeSequence(g, g2); .) + { Factor (. tab->MakeSequence(g, g2); delete g2; .) } -| (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) +| (. g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) ) (. if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ -Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; +Factor (. wchar_t* name = NULL; NodeType kind; Position *pos; bool weak = false; g = NULL; .) = @@ -358,56 +390,57 @@ Factor (. wchar_t* name = NULL; int kind; Position *pos; ] Sym (. Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) - sym = (Symbol*)((*(tab->literals))[name]); + sym = (Symbol*)tab->literals[name]; bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0); // forward nt - else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line); + sym = tab->NewSym(NodeType::nt, name, t->line, t->col); // forward nt + else if (genScanner) { + sym = tab->NewSym(NodeType::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(L"undefined string in production"); + SemErr(_SC("undefined string in production")); sym = tab->eofSy; // dummy } } - int typ = sym->typ; - if (typ != Node::t && typ != Node::nt) - SemErr(L"this symbol kind is not allowed in a production"); + coco_string_delete(name); + NodeType typ = sym->typ; + if (typ != NodeType::t && typ != NodeType::nt) + SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { - if (typ == Node::t) typ = Node::wt; - else SemErr(L"only terminals may be weak"); + if (typ == NodeType::t) typ = NodeType::wt; + else SemErr(_SC("only terminals may be weak")); } - Node *p = tab->NewNode(typ, sym, t->line); + Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); .) - [ Attribs

(. if (kind != id) SemErr(L"a literal must not have attributes"); .) + [ Attribs

(. if (kind != id) SemErr(_SC("a literal must not have attributes")); .) ] (. if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); .) | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) -| SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); +| SemText (. Node *p = tab->NewNode(NodeType::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); .) -| "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys +| "ANY" (. Node *p = tab->NewNode(NodeType::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); .) -| "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); +| "SYNC" (. Node *p = tab->NewNode(NodeType::sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ -Resolver +Resolver = "IF" "(" (. int beg = la->pos; int col = la->col; int line = la->line; .) Condition (. pos = new Position(beg, t->pos, col, line); .) @@ -425,7 +458,7 @@ TokenExpr (. Graph *g2; .) { WEAK '|' TokenTerm (. if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; .) } . @@ -435,63 +468,67 @@ TokenExpr (. Graph *g2; .) TokenTerm (. Graph *g2; .) = TokenFactor - { TokenFactor (. tab->MakeSequence(g, g2); .) + { TokenFactor (. tab->MakeSequence(g, g2); delete g2; .) } [ "CONTEXT" '(' TokenExpr (. tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; - tab->MakeSequence(g, g2); .) + tab->MakeSequence(g, g2); delete g2; .) ')' ] . /*------------------------------------------------------------------------------------*/ -TokenFactor (. wchar_t* name = NULL; int kind; .) +TokenFactor (. wchar_t* name = NULL; NodeType kind; .) = (. g = NULL; .) ( Sym (. if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(L"undefined name"); + SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; + Node *p = tab->NewNode(NodeType::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); - tokenString = coco_string_create(noString); + coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); - else tokenString = coco_string_create(noString); + else { + coco_string_delete(tokenString); + tokenString = coco_string_create(noString); + } } + coco_string_delete(name); .) | '(' TokenExpr ')' -| '[' TokenExpr ']' (. tab->MakeOption(g); tokenString = coco_string_create(noString); .) -| '{' TokenExpr '}' (. tab->MakeIteration(g); tokenString = coco_string_create(noString); .) +| '[' TokenExpr ']' (. tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) +| '{' TokenExpr '}' (. tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); .) . /*------------------------------------------------------------------------------------*/ -Sym -= (. name = coco_string_create(L"???"); kind = id; .) +Sym += (. name = coco_string_create(_SC("???")); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. - wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - coco_string_delete(name); - name = coco_string_create_append(L"\"", subName); - coco_string_delete(subName); - coco_string_merge(name, L"\""); - .) + wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); + coco_string_delete(name); + name = coco_string_create_append(_SC("\""), subName); + coco_string_delete(subName); + coco_string_merge(name, _SC("\"")); + .) ) (. kind = str; if (dfa->ignoreCase) { - wchar_t *oldName = name; - name = coco_string_create_lower(name); - coco_string_delete(oldName); - } + wchar_t *oldName = name; + name = coco_string_create_lower(name); + coco_string_delete(oldName); + } if (coco_string_indexof(name, ' ') >= 0) - SemErr(L"literal tokens must not contain blanks"); .) + SemErr(_SC("literal tokens must not contain blanks")); .) ) . @@ -500,13 +537,13 @@ Sym Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) - { ANY - | badString (. SemErr(L"bad string in attributes"); .) + { ANY + | badString (. SemErr(_SC("bad string in attributes")); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) - { ANY - | badString (. SemErr(L"bad string in attributes"); .) + { ANY + | badString (. SemErr(_SC("bad string in attributes")); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) . @@ -517,12 +554,12 @@ SemText = "(." (. int beg = la->pos; int col = la->col; int line = t->line; .) { ANY - | badString (. SemErr(L"bad string in semantic action"); .) - | "(." (. SemErr(L"missing end of previous semantic action"); .) + | badString (. SemErr(_SC("bad string in semantic action")); .) + | "(." (. SemErr(_SC("missing end of previous semantic action")); .) } ".)" (. pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ - + END Coco. diff --git a/src/Coco.cpp b/src/Coco.cpp index f860e9f..0226c9e 100644 --- a/src/Coco.cpp +++ b/src/Coco.cpp @@ -49,7 +49,7 @@ Coco/R itself) does not fall under the GNU General Public License. using namespace Coco; -#ifdef _WIN32 +#if defined(_WIN32) && !defined(__MINGW32__) int wmain(int argc, wchar_t *argv[]) { #elif defined __GNUC__ int main(int argc, char *argv_[]) { @@ -61,19 +61,21 @@ int main(int argc, char *argv_[]) { #error unknown compiler! #endif - wprintf(L"Coco/R (Dec 01, 2018)\n"); + wprintf(_SC("%s"), "Coco/R (Dec 01, 2018)\n"); wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; char *chTrFileName = NULL; - bool emitLines = false; + bool emitLines = false, ignoreGammarErrors = false, genRREBNF = false; for (int i = 1; i < argc; i++) { - if (coco_string_equal(argv[i], L"-namespace") && i < argc - 1) nsName = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-frames") && i < argc - 1) frameDir = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-trace") && i < argc - 1) ddtString = coco_string_create(argv[++i]); - else if (coco_string_equal(argv[i], L"-o") && i < argc - 1) outDir = coco_string_create_append(argv[++i], L"/"); - else if (coco_string_equal(argv[i], L"-lines")) emitLines = true; + if (coco_string_equal(argv[i], _SC("-namespace")) && i < argc - 1) nsName = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-frames")) && i < argc - 1) frameDir = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-trace")) && i < argc - 1) ddtString = coco_string_create(argv[++i]); + else if (coco_string_equal(argv[i], _SC("-o")) && i < argc - 1) outDir = coco_string_create_append(argv[++i], _SC("/")); + else if (coco_string_equal(argv[i], _SC("-lines"))) emitLines = true; + else if (coco_string_equal(argv[i], _SC("-genRREBNF"))) genRREBNF = true; + else if (coco_string_equal(argv[i], _SC("-ignoreGammarErrors"))) ignoreGammarErrors = true; else srcName = coco_string_create(argv[i]); } @@ -90,76 +92,79 @@ int main(int argc, char *argv_[]) { wchar_t* file = coco_string_create(srcName); wchar_t* srcDir = coco_string_create(srcName, 0, pos+1); - Coco::Scanner *scanner = new Coco::Scanner(file); - Coco::Parser *parser = new Coco::Parser(scanner); + Coco::Scanner scanner(file); + Coco::Parser parser(&scanner); - traceFileName = coco_string_create_append(srcDir, L"trace.txt"); + traceFileName = coco_string_create_append(srcDir, _SC("trace.txt")); chTrFileName = coco_string_create_char(traceFileName); - if ((parser->trace = fopen(chTrFileName, "w")) == NULL) { - wprintf(L"-- could not open %hs\n", chTrFileName); + if ((parser.trace = fopen(chTrFileName, "w")) == NULL) { + wprintf(_SC("-- could not open %s\n"), chTrFileName); exit(1); } - parser->tab = new Coco::Tab(parser); - parser->dfa = new Coco::DFA(parser); - parser->pgen = new Coco::ParserGen(parser); + Coco::Tab tab(&parser); + tab.srcName = coco_string_create(srcName); + tab.srcDir = coco_string_create(srcDir); + tab.nsName = nsName ? coco_string_create(nsName) : NULL; + tab.frameDir = coco_string_create(frameDir); + tab.outDir = coco_string_create(outDir != NULL ? outDir : srcDir); + tab.emitLines = emitLines; + tab.genRREBNF = genRREBNF; + parser.ignoreGammarErrors = ignoreGammarErrors; + if (ddtString != NULL) tab.SetDDT(ddtString); + parser.tab = &tab; - parser->tab->srcName = coco_string_create(srcName); - parser->tab->srcDir = coco_string_create(srcDir); - parser->tab->nsName = nsName ? coco_string_create(nsName) : NULL; - parser->tab->frameDir = coco_string_create(frameDir); - parser->tab->outDir = coco_string_create(outDir != NULL ? outDir : srcDir); - parser->tab->emitLines = emitLines; + Coco::DFA dfa(&parser); + parser.dfa = &dfa; + Coco::ParserGen pgen(&parser); + parser.pgen = &pgen; - if (ddtString != NULL) parser->tab->SetDDT(ddtString); + parser.Parse(); - parser->Parse(); - - fclose(parser->trace); + fclose(parser.trace); // obtain the FileSize - parser->trace = fopen(chTrFileName, "r"); - fseek(parser->trace, 0, SEEK_END); - long fileSize = ftell(parser->trace); - fclose(parser->trace); + parser.trace = fopen(chTrFileName, "r"); + fseek(parser.trace, 0, SEEK_END); + long fileSize = ftell(parser.trace); + fclose(parser.trace); if (fileSize == 0) { remove(chTrFileName); } else { - wprintf(L"trace output is in %hs\n", chTrFileName); + wprintf(_SC("trace output is in %s\n"), chTrFileName); } - wprintf(L"%d errors detected\n", parser->errors->count); - if (parser->errors->count != 0) { + coco_string_delete(file); + coco_string_delete(srcDir); + + wprintf(_SC("%d errors detected\n"), parser.errors->count); + if (parser.errors->count != 0) { exit(1); } - delete parser->pgen; - delete parser->dfa; - delete parser->tab; - delete parser; - delete scanner; - coco_string_delete(file); - coco_string_delete(srcDir); } else { - wprintf(L"Usage: Coco Grammar.ATG {Option}\n"); - wprintf(L"Options:\n"); - wprintf(L" -namespace \n"); - wprintf(L" -frames \n"); - wprintf(L" -trace \n"); - wprintf(L" -o \n"); - wprintf(L" -lines\n"); - wprintf(L"Valid characters in the trace string:\n"); - wprintf(L" A trace automaton\n"); - wprintf(L" F list first/follow sets\n"); - wprintf(L" G print syntax graph\n"); - wprintf(L" I trace computation of first sets\n"); - wprintf(L" J list ANY and SYNC sets\n"); - wprintf(L" P print statistics\n"); - wprintf(L" S list symbol table\n"); - wprintf(L" X list cross reference table\n"); - wprintf(L"Scanner.frame and Parser.frame files needed in ATG directory\n"); - wprintf(L"or in a directory specified in the -frames option.\n"); + wprintf(_SC("%s"), + "Usage: Coco Grammar.ATG {Option}\n" + "Options:\n" + " -namespace \n" + " -frames \n" + " -trace \n" + " -o \n" + " -lines\n" + " -genRREBNF\n" + " -ignoreGammarErrors\n" + "Valid characters in the trace string:\n" + " A trace automaton\n" + " F list first/follow sets\n" + " G print syntax graph\n" + " I trace computation of first sets\n" + " J list ANY and SYNC sets\n" + " P print statistics\n" + " S list symbol table\n" + " X list cross reference table\n" + "Scanner.frame and Parser.frame files needed in ATG directory\n" + "or in a directory specified in the -frames option.\n"); } coco_string_delete(srcName); @@ -168,6 +173,7 @@ int main(int argc, char *argv_[]) { coco_string_delete(ddtString); coco_string_delete(chTrFileName); coco_string_delete(traceFileName); + coco_string_delete(outDir); return 0; } diff --git a/src/Comment.cpp b/src/Comment.cpp index 7c91746..d425b1b 100644 --- a/src/Comment.cpp +++ b/src/Comment.cpp @@ -27,19 +27,20 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Comment.h" -#include "Scanner.h" namespace Coco { -Comment::Comment(wchar_t* start, wchar_t* stop, bool nested) { - this->start = coco_string_create(start); - this->stop = coco_string_create(stop); +Comment::Comment(wchar_t* start, wchar_t* stop, bool nested, bool needCopy) { + this->start = needCopy ? coco_string_create(start) : start; + this->stop = needCopy ? coco_string_create(stop) : stop; this->nested = nested; + this->next = NULL; } Comment::~Comment() { coco_string_delete(start); coco_string_delete(stop); + delete next; } }; // namespace diff --git a/src/Comment.h b/src/Comment.h index ffc1c82..66dc3f2 100644 --- a/src/Comment.h +++ b/src/Comment.h @@ -29,7 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_COMMENT_H__) #define COCO_COMMENT_H__ -#include +#include "Scanner.h" namespace Coco { @@ -41,7 +41,7 @@ class Comment // info about comment syntax bool nested; Comment *next; - Comment(wchar_t* start, wchar_t* stop, bool nested); + Comment(wchar_t* start, wchar_t* stop, bool nested, bool needCopy=true); virtual ~Comment(); }; diff --git a/src/DFA.cpp b/src/DFA.cpp index cf4414f..1f79326 100644 --- a/src/DFA.cpp +++ b/src/DFA.cpp @@ -28,51 +28,56 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include -#include #include "DFA.h" #include "Tab.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" #include "Generator.h" namespace Coco { +#ifndef SZWC10 +#define SZWC10 10 +#define SZWC20 20 +typedef wchar_t wchar_t_10[SZWC10+1]; +typedef wchar_t wchar_t_20[SZWC20+1]; +#endif + //---------- Output primitives -wchar_t* DFA::Ch(wchar_t ch) { - wchar_t* format = new wchar_t[10]; - if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') - coco_swprintf(format, 10, L"%d\0", (int) ch); - else - coco_swprintf(format, 10, L"L'%lc'\0", (int) ch); +static wchar_t* DFACh(int ch, wchar_t_10 &format, bool noWrapper=false) { + if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) + coco_swprintf(format, SZWC10, _SC("%d"), (int) ch); + else { + const char *strFmt = noWrapper ? "'%" _CHFMT "'" : _SC("_SC('%") _CHFMT _SC("')"); + coco_swprintf(format, SZWC10, strFmt, (int) ch); + } + format[SZWC10] = _SC('\0'); return format; } -wchar_t* DFA::ChCond(wchar_t ch) { - wchar_t* format = new wchar_t[20]; - wchar_t* res = Ch(ch); - coco_swprintf(format, 20, L"ch == %ls\0", res); - delete [] res; +static wchar_t* DFAChCond(int ch, wchar_t_20 &format) { + wchar_t_10 fmt; + wchar_t* res = DFACh(ch, fmt); + coco_swprintf(format, SZWC20, _SC("ch == %") _SFMT, res); + format[SZWC20] = _SC('\0'); return format; } void DFA::PutRange(CharSet *s) { + wchar_t_10 fmt1, fmt2; for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { - wchar_t *from = Ch((wchar_t) r->from); - fwprintf(gen, L"ch == %ls", from); - delete [] from; + wchar_t *from = DFACh(r->from, fmt1); + fwprintf(gen, _SC("ch == %") _SFMT, from); } else if (r->from == 0) { - wchar_t *to = Ch((wchar_t) r->to); - fwprintf(gen, L"ch <= %ls", to); - delete [] to; + wchar_t *to = DFACh(r->to, fmt1); + fwprintf(gen, _SC("ch <= %") _SFMT, to); } else { - wchar_t *from = Ch((wchar_t) r->from); - wchar_t *to = Ch((wchar_t) r->to); - fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to); - delete [] from; delete [] to; + wchar_t *from = DFACh(r->from, fmt1); + wchar_t *to = DFACh(r->to, fmt2); + fwprintf(gen, _SC("(ch >= %") _SFMT _SC(" && ch <= %") _SFMT _SC(")"), from, to); } - if (r->next != NULL) fwprintf(gen, L" || "); + if (r->next != NULL) fputws(_SC(" || "), gen); } } @@ -90,7 +95,7 @@ void DFA::NewTransition(State *from, State *to, int typ, int sym, int tc) { Target *t = new Target(to); Action *a = new Action(typ, sym, tc); a->target = t; from->AddAction(a); - if (typ == Node::clas) curSy->tokenKind = Symbol::classToken; + if (typ == NodeType::clas) curSy->tokenKind = Symbol::classToken; } void DFA::CombineShifts() { @@ -104,75 +109,88 @@ void DFA::CombineShifts() { if (a->target->state == b->target->state && a->tc == b->tc) { seta = a->Symbols(tab); setb = b->Symbols(tab); seta->Or(setb); - a->ShiftWith(seta, tab); + if(!a->ShiftWith(seta, tab)) delete seta; c = b; b = b->next; state->DetachAction(c); + delete setb; } else b = b->next; } } } -void DFA::FindUsedStates(State *state, BitArray *used) { +void DFA::FindUsedStates(const State *state, BitArray *used) { if ((*used)[state->nr]) return; used->Set(state->nr, true); for (Action *a = state->firstAction; a != NULL; a = a->next) FindUsedStates(a->target->state, used); } +static void deleteOnlyThisState(State **state) { + (*state)->next = NULL; + delete *state; + *state = NULL; +} + void DFA::DeleteRedundantStates() { //State *newState = new State[State::lastNr + 1]; State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1)); - BitArray *used = new BitArray(lastStateNr + 1); - FindUsedStates(firstState, used); + BitArray used(lastStateNr + 1); + FindUsedStates(firstState, &used); // combine equal final states for (State *s1 = firstState->next; s1 != NULL; s1 = s1->next) // firstState cannot be final - if ((*used)[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx)) + if (used[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx)) for (State *s2 = s1->next; s2 != NULL; s2 = s2->next) - if ((*used)[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) { - used->Set(s2->nr, false); newState[s2->nr] = s1; + if (used[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) { + used.Set(s2->nr, false); newState[s2->nr] = s1; } State *state; for (state = firstState; state != NULL; state = state->next) - if ((*used)[state->nr]) + if (used[state->nr]) for (Action *a = state->firstAction; a != NULL; a = a->next) - if (!((*used)[a->target->state->nr])) + if (!(used[a->target->state->nr])) a->target->state = newState[a->target->state->nr]; // delete unused states lastState = firstState; lastStateNr = 0; // firstState has number 0 - for (state = firstState->next; state != NULL; state = state->next) - if ((*used)[state->nr]) {state->nr = ++lastStateNr; lastState = state;} - else lastState->next = state->next; + State *state_to_delete = NULL; + for (state = firstState->next; state != NULL; state = state->next) { + if(state_to_delete) deleteOnlyThisState(&state_to_delete); + if (used[state->nr]) {state->nr = ++lastStateNr; lastState = state;} + else { lastState->next = state->next; state_to_delete = state;} + } + if(state_to_delete) deleteOnlyThisState(&state_to_delete); free (newState); - delete used; } -State* DFA::TheState(Node *p) { +State* DFA::TheState(const Node *p) { State *state; if (p == NULL) {state = NewState(); state->endOf = curSy; return state;} else return p->state; } -void DFA::Step(State *from, Node *p, BitArray *stepped) { +static bool IsIterOpt(Node *p) { + return p->rmin == 0 && p->rmax == 1; +} + +void DFA::Step(State *from, const Node *p, BitArray *stepped) { if (p == NULL) return; stepped->Set(p->n, true); - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { NewTransition(from, TheState(p->next), p->typ, p->val, p->code); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { Step(from, p->sub, stepped); Step(from, p->down, stepped); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { if (tab->DelSubGraph(p->sub)) { - parser->SemErr(L"contents of {...} must not be deletable"); + parser->SemErr(_SC("contents of {...} must not be deletable")); return; } if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); if (p->state != from) { - BitArray *newStepped = new BitArray(tab->nodes->Count); - Step(p->state, p, newStepped); - delete newStepped; + BitArray newStepped(tab->nodes.Count); + Step(p->state, p, &newStepped); } - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); } @@ -188,57 +206,56 @@ void DFA::Step(State *from, Node *p, BitArray *stepped) { void DFA::NumberNodes(Node *p, State *state, bool renumIter) { if (p == NULL) return; if (p->state != NULL) return; // already visited; - if ((state == NULL) || ((p->typ == Node::iter) && renumIter)) state = NewState(); + if ((state == NULL) || ((p->typ == NodeType::iter) && renumIter)) state = NewState(); p->state = state; if (tab->DelGraph(p)) state->endOf = curSy; - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { NumberNodes(p->next, NULL, false); - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { NumberNodes(p->next, state, true); NumberNodes(p->sub, state, true); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); NumberNodes(p->down, state, renumIter); } } -void DFA::FindTrans (Node *p, bool start, BitArray *marked) { +void DFA::FindTrans (const Node *p, bool start, BitArray *marked) { if (p == NULL || (*marked)[p->n]) return; marked->Set(p->n, true); if (start) { - BitArray *stepped = new BitArray(tab->nodes->Count); - Step(p->state, p, stepped); // start of group of equally numbered nodes - delete stepped; + BitArray stepped(tab->nodes.Count); + Step(p->state, p, &stepped); // start of group of equally numbered nodes } - if (p->typ == Node::clas || p->typ == Node::chr) { + if (p->typ == NodeType::clas || p->typ == NodeType::chr) { FindTrans(p->next, true, marked); - } else if (p->typ == Node::opt) { + } else if (p->typ == NodeType::opt) { FindTrans(p->next, true, marked); FindTrans(p->sub, false, marked); - } else if (p->typ == Node::iter) { + } else if (p->typ == NodeType::iter) { FindTrans(p->next, false, marked); FindTrans(p->sub, false, marked); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { FindTrans(p->sub, false, marked); FindTrans(p->down, false, marked); } } void DFA::ConvertToStates(Node *p, Symbol *sym) { curGraph = p; curSy = sym; - if (tab->DelGraph(curGraph)) { - parser->SemErr(L"token might be empty"); - return; - } + if (tab->DelGraph(curGraph)) { + parser->SemErr(_SC("token might be empty")); + return; + } NumberNodes(curGraph, firstState, true); - FindTrans(curGraph, true, new BitArray(tab->nodes->Count)); - if (p->typ == Node::iter) { - BitArray *stepped = new BitArray(tab->nodes->Count); - Step(firstState, p, stepped); - delete stepped; + BitArray ba(tab->nodes.Count); + FindTrans(curGraph, true, &ba); + if (p->typ == NodeType::iter) { + ba.SetAll(false); + Step(firstState, p, &ba); } } @@ -262,17 +279,18 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } for (; i < len; i++) { // make new DFA for s[i..len-1] State *to = NewState(); - NewTransition(state, to, Node::chr, s[i], Node::normalTrans); + NewTransition(state, to, NodeType::chr, s[i], TransitionCode::normalTrans); state = to; } coco_string_delete(s); Symbol *matchedSym = state->endOf; if (state->endOf == NULL) { state->endOf = sym; - } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { + } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == TransitionCode::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off - wchar_t format[200]; - coco_swprintf(format, 200, L"tokens %ls and %ls cannot be distinguished", sym->name, matchedSym->name); + const size_t format_size = 200; + wchar_t format[format_size]; + coco_swprintf(format, format_size, _SC("tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished"), sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; @@ -280,42 +298,46 @@ void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { } } -void DFA::SplitActions(State *state, Action *a, Action *b) { +bool DFA::SplitActions(State *state, Action *a, Action *b) { + bool rc = false; Action *c; CharSet *seta, *setb, *setc; seta = a->Symbols(tab); setb = b->Symbols(tab); if (seta->Equals(setb)) { a->AddTargets(b); - state->DetachAction(b); + rc = state->DetachAction(b); } else if (seta->Includes(setb)) { setc = seta->Clone(); setc->Subtract(setb); b->AddTargets(a); - a->ShiftWith(setc, tab); + if(!a->ShiftWith(setc, tab)) delete setc; } else if (setb->Includes(seta)) { setc = setb->Clone(); setc->Subtract(seta); a->AddTargets(b); - b->ShiftWith(setc, tab); + if(!b->ShiftWith(setc, tab)) delete setc; } else { setc = seta->Clone(); setc->And(setb); seta->Subtract(setc); setb->Subtract(setc); - a->ShiftWith(seta, tab); - b->ShiftWith(setb, tab); - c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith + if(!a->ShiftWith(seta, tab)) delete seta; + if(!b->ShiftWith(setb, tab)) delete setb; + c = new Action(0, 0, TransitionCode::normalTrans); // typ and sym are set in ShiftWith c->AddTargets(a); c->AddTargets(b); - c->ShiftWith(setc, tab); + if(!c->ShiftWith(setc, tab)) delete setc; state->AddAction(c); + return rc; //don't need to delete anything } + delete seta; delete setb; + return rc; } -bool DFA::Overlap(Action *a, Action *b) { +bool DFA::Overlap(const Action *a, const Action *b) { CharSet *seta, *setb; - if (a->typ == Node::chr) - if (b->typ == Node::chr) return (a->sym == b->sym); + if (a->typ == NodeType::chr) + if (b->typ == NodeType::chr) return (a->sym == b->sym); else {setb = tab->CharClassSet(b->sym); return setb->Get(a->sym);} else { seta = tab->CharClassSet(a->sym); - if (b->typ == Node::chr) return seta->Get(b->sym); + if (b->typ == NodeType::chr) return seta->Get(b->sym); else {setb = tab->CharClassSet(b->sym); return seta->Intersects(setb);} } } @@ -323,11 +345,13 @@ bool DFA::Overlap(Action *a, Action *b) { bool DFA::MakeUnique(State *state) { // return true if actions were split bool changed = false; for (Action *a = state->firstAction; a != NULL; a = a->next) - for (Action *b = a->next; b != NULL; b = b->next) + for (Action *b = a->next; b != NULL;) if (Overlap(a, b)) { - SplitActions(state, a, b); + //because an action can be deleted in SplitActions we need two pointers + Action *c = b; b = b->next; + SplitActions(state, a, c); changed = true; - } + } else b = b->next; return changed; } @@ -346,6 +370,8 @@ void DFA::MeltStates(State *state) { do {changed = MakeUnique(s);} while (changed); melt = NewMelted(targets, s); } + else delete targets; + delete action->target->next; action->target->next = NULL; action->target->state = melt->state; } @@ -355,7 +381,7 @@ void DFA::MeltStates(State *state) { void DFA::FindCtxStates() { for (State *state = firstState; state != NULL; state = state->next) for (Action *a = state->firstAction; a != NULL; a = a->next) - if (a->tc == Node::contextTrans) a->target->state->ctx = true; + if (a->tc == TransitionCode::contextTrans) a->target->state->ctx = true; } void DFA::MakeDeterministic() { @@ -373,39 +399,37 @@ void DFA::MakeDeterministic() { } void DFA::PrintStates() { - fwprintf(trace, L"\n"); - fwprintf(trace, L"---------- states ----------\n"); + fwprintf(trace, _SC("\n---------- states ----------\n")); + wchar_t_10 fmt; for (State *state = firstState; state != NULL; state = state->next) { bool first = true; - if (state->endOf == NULL) fwprintf(trace, L" "); + if (state->endOf == NULL) fputws(_SC(" "), trace); else { - wchar_t *paddedName = tab->Name(state->endOf->name); - fwprintf(trace, L"E(%12s)", paddedName); - coco_string_delete(paddedName); + fwprintf(trace, _SC("E(%-12.12") _SFMT _SC(")"), state->endOf->name); } - fwprintf(trace, L"%3d:", state->nr); - if (state->firstAction == NULL) fwprintf(trace, L"\n"); + fwprintf(trace, _SC("%3d:"), state->nr); + if (state->firstAction == NULL) fputws(_SC("\n"), trace); for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); + if (first) {fputws(_SC(" "), trace); first = false;} else fputws(_SC(" "), trace); - if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)(*tab->classes)[action->sym])->name); - else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym)); + if (action->typ == NodeType::clas) fwprintf(trace, _SC("%") _SFMT, tab->classes[action->sym]->name); + else fwprintf(trace, _SC("%3") _SFMT, DFACh(action->sym, fmt, true)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { - fwprintf(trace, L"%3d", targ->state->nr); + fwprintf(trace, _SC(" %3d"), targ->state->nr); } - if (action->tc == Node::contextTrans) fwprintf(trace, L" context\n"); else fwprintf(trace, L"\n"); + if (action->tc == TransitionCode::contextTrans) fputws(_SC(" context\n"), trace); else fputws(_SC("\n"), trace); } } - fwprintf(trace, L"\n---------- character classes ----------\n"); + fputws(_SC("\n---------- character classes ----------\n"), trace); tab->WriteCharClasses(); } //---------------------------- actions -------------------------------- -Action* DFA::FindAction(State *state, wchar_t ch) { +Action* DFA::FindAction(const State *state, int ch) { for (Action *a = state->firstAction; a != NULL; a = a->next) - if (a->typ == Node::chr && ch == a->sym) return a; - else if (a->typ == Node::clas) { + if (a->typ == NodeType::chr && ch == a->sym) return a; + else if (a->typ == NodeType::clas) { CharSet *s = tab->CharClassSet(a->sym); if (s->Get(ch)) return a; } @@ -413,7 +437,7 @@ Action* DFA::FindAction(State *state, wchar_t ch) { } -void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) { +void DFA::GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) { // compute the set of target states targets = new BitArray(maxStates); endOf = NULL; ctx = false; @@ -426,7 +450,7 @@ void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &c endOf = t->state->endOf; } else { - wprintf(L"Tokens %ls and %ls cannot be distinguished\n", endOf->name, t->state->endOf->name); + wprintf(_SC("Tokens %") _SFMT _SC(" and %") _SFMT _SC(" cannot be distinguished\n"), endOf->name, t->state->endOf->name); errors->count++; } } @@ -456,7 +480,7 @@ Melted* DFA::NewMelted(BitArray *set, State *state) { } -BitArray* DFA::MeltedSet(int nr) { +const BitArray* DFA::MeltedSet(int nr) { Melted *m = firstMelted; while (m != NULL) { if (m->state->nr == nr) return m->set; else m = m->next; @@ -466,7 +490,7 @@ BitArray* DFA::MeltedSet(int nr) { return NULL; } -Melted* DFA::StateWithSet(BitArray *s) { +Melted* DFA::StateWithSet(const BitArray *s) { for (Melted *m = firstMelted; m != NULL; m = m->next) if (Sets::Equals(s, m->set)) return m; return NULL; @@ -475,119 +499,136 @@ Melted* DFA::StateWithSet(BitArray *s) { //------------------------ comments -------------------------------- -wchar_t* DFA::CommentStr(Node *p) { - StringBuilder s = StringBuilder(); +wchar_t* DFA::CommentStr(const Node *p) { + StringBuilder s; while (p != NULL) { - if (p->typ == Node::chr) { + if (p->typ == NodeType::chr) { s.Append((wchar_t)p->val); - } else if (p->typ == Node::clas) { + } else if (p->typ == NodeType::clas) { CharSet *set = tab->CharClassSet(p->val); - if (set->Elements() != 1) parser->SemErr(L"character set contains more than 1 character"); + if (set->Elements() != 1) parser->SemErr(_SC("character set contains more than 1 character")); s.Append((wchar_t) set->First()); } - else parser->SemErr(L"comment delimiters may not be structured"); + else parser->SemErr(_SC("comment delimiters may not be structured")); p = p->next; } - if (s.GetLength() == 0 || s.GetLength() > 2) { - parser->SemErr(L"comment delimiters must be 1 or 2 characters long"); - s = StringBuilder(L"?"); + if (s.GetLength() == 0 || s.GetLength() > 8) { + parser->SemErr(_SC("comment delimiters must be 1 or 8 characters long")); + s = StringBuilder(_SC("?")); } return s.ToString(); } -void DFA::NewComment(Node *from, Node *to, bool nested) { - Comment *c = new Comment(CommentStr(from), CommentStr(to), nested); +void DFA::NewComment(const Node *from, const Node *to, bool nested) { + Comment *c = new Comment(CommentStr(from), CommentStr(to), nested, false); c->next = firstComment; firstComment = c; } //------------------------ scanner generation ---------------------- -void DFA::GenComBody(Comment *com) { - fwprintf(gen, L"\t\tfor(;;) {\n"); +void DFA::GenCommentIndented(int n, const wchar_t *s) { + for(int i= 1; i < n; ++i) fputws(_SC("\t"), gen); + fputws(s, gen); +} + +void DFA::GenComBody(const Comment *com) { + int imax = coco_string_length(com->start)-1; + int imaxStop = coco_string_length(com->stop)-1; + GenCommentIndented(imax, _SC("\t\tfor(;;) {\n")); - wchar_t* res = ChCond(com->stop[0]); - fwprintf(gen, L"\t\t\tif (%ls) ", res); - fwprintf(gen, L"{\n"); - delete [] res; + wchar_t_20 fmt; + wchar_t* res = DFAChCond(com->stop[0], fmt); + GenCommentIndented(imax, _SC("\t\t\tif (")); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), res); - if (coco_string_length(com->stop) == 1) { - fwprintf(gen, L"\t\t\t\tlevel--;\n"); - fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); - fwprintf(gen, L"\t\t\t\tNextCh();\n"); + if (imaxStop == 0) { + fwprintf(gen, _SC("%s"), + "\t\t\t\tlevel--;\n" + "\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n" + "\t\t\t\tNextCh();\n"); } else { - fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = ChCond(com->stop[1]); - fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res); - delete [] res; - fwprintf(gen, L"\t\t\t\t\tlevel--;\n"); - fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); - fwprintf(gen, L"\t\t\t\t\tNextCh();\n"); - fwprintf(gen, L"\t\t\t\t}\n"); + int currIndent, indent = imax - 1; + for(int sidx = 1; sidx <= imaxStop; ++sidx) { + currIndent = indent + sidx; + GenCommentIndented(currIndent, _SC("\t\t\t\tNextCh();\n")); + GenCommentIndented(currIndent, _SC("\t\t\t\tif (")); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->stop[sidx], fmt)); + } + currIndent = indent + imax; + GenCommentIndented(currIndent, _SC("\t\t\tlevel--;\n")); + GenCommentIndented(currIndent, _SC("\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }\n")); + GenCommentIndented(currIndent, _SC("\t\t\tNextCh();\n")); + for(int sidx = imaxStop; sidx > 0; --sidx) { + GenCommentIndented(indent + sidx, _SC("\t\t\t\t}\n")); + } } if (com->nested) { - fwprintf(gen, L"\t\t\t}"); - wchar_t* res = ChCond(com->start[0]); - fwprintf(gen, L" else if (%ls) ", res); - delete [] res; - fwprintf(gen, L"{\n"); - if (coco_string_length(com->stop) == 1) - fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n"); + GenCommentIndented(imax, _SC("\t\t\t}")); + wchar_t* res = DFAChCond(com->start[0], fmt); + fwprintf(gen, _SC(" else if (%") _SFMT _SC(") {\n"), res); + if (imaxStop == 0) + fputws(_SC("\t\t\tlevel++; NextCh();\n"), gen); else { - fwprintf(gen, L"\t\t\t\tNextCh();\n"); - wchar_t* res = ChCond(com->start[1]); - fwprintf(gen, L"\t\t\t\tif (%ls) ", res); - delete [] res; - fwprintf(gen, L"{\n"); - fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n"); - fwprintf(gen, L"\t\t\t\t}\n"); + int indent = imax - 1; + for(int sidx = 1; sidx <= imax; ++sidx) { + int loopIndent = indent + sidx; + GenCommentIndented(loopIndent, _SC("\t\t\t\tNextCh();\n")); + GenCommentIndented(loopIndent, _SC("\t\t\t\tif (")); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->start[sidx], fmt)); + } + GenCommentIndented(indent + imax, _SC("\t\t\t\t\tlevel++; NextCh();\n")); + for(int sidx = imax; sidx > 0; --sidx) { + GenCommentIndented(indent + sidx, _SC("\t\t\t\t}\n")); + } } } - fwprintf(gen, L"\t\t\t} else if (ch == buffer->EoF) return false;\n"); - fwprintf(gen, L"\t\t\telse NextCh();\n"); - fwprintf(gen, L"\t\t}\n"); + GenCommentIndented(imax, _SC("\t\t\t} else if (ch == buffer->EoF) return false;\n")); + GenCommentIndented(imax, _SC("\t\t\telse NextCh();\n")); + GenCommentIndented(imax, _SC("\t\t}\n")); } -void DFA::GenCommentHeader(Comment *com, int i) { - fwprintf(gen, L"\tbool Comment%d();\n", i); +void DFA::GenCommentHeader(const Comment *com, int i) { + fwprintf(gen, _SC("\tbool Comment%d();\n"), i); } -void DFA::GenComment(Comment *com, int i) { - fwprintf(gen, L"\n"); - fwprintf(gen, L"bool Scanner::Comment%d() ", i); - fwprintf(gen, L"{\n"); - fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n"); - if (coco_string_length(com->start) == 1) { - fwprintf(gen, L"\tNextCh();\n"); +void DFA::GenComment(const Comment *com, int i) { + wchar_t_20 fmt; + fwprintf(gen, _SC("\nbool Scanner::Comment%d() {\n"), i); + fwprintf(gen, _SC("%s"), + "\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n" + "\tNextCh();\n"); + int imax = coco_string_length(com->start)-1; + if (imax == 0) { GenComBody(com); } else { - fwprintf(gen, L"\tNextCh();\n"); - wchar_t* res = ChCond(com->start[1]); - fwprintf(gen, L"\tif (%ls) ", res); - delete [] res; - fwprintf(gen, L"{\n"); - - fwprintf(gen, L"\t\tNextCh();\n"); - GenComBody(com); - - fwprintf(gen, L"\t} else {\n"); - fwprintf(gen, L"\t\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n"); - fwprintf(gen, L"\t}\n"); - fwprintf(gen, L"\treturn false;\n"); - } - fwprintf(gen, L"}\n"); -} - -wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literals + for(int sidx = 1; sidx <= imax; ++sidx) { + GenCommentIndented(sidx, _SC("\tif (")); + fwprintf(gen, _SC("%") _SFMT _SC(") {\n"), DFAChCond(com->start[sidx], fmt)); + GenCommentIndented(sidx, _SC("\t\tNextCh();\n")); + } + GenComBody(com); + for(int sidx = imax; sidx > 0; --sidx) { + GenCommentIndented(sidx, _SC("\t}\n")); + } + fwprintf(gen, _SC("%s"), + "\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n" + "\treturn false;\n"); + } + fputws(_SC("}\n"), gen); +} + +const wchar_t* DFA::SymName(const Symbol *sym) { // real name value is stored in Tab.literals if (('a'<=sym->name[0] && sym->name[0]<='z') || ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0]) - Iterator *iter = tab->literals->GetIterator(); + Iterator *iter = tab->literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - if (e->val == sym) { return e->key; } + if (e->val == sym) { delete iter; return e->key; } } + delete iter; } return sym->name; } @@ -595,31 +636,27 @@ wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literal void DFA::GenLiterals () { Symbol *sym; - ArrayList *ts[2]; - ts[0] = tab->terminals; - ts[1] = tab->pragmas; + TArrayList *ts[2]; + ts[0] = &tab->terminals; + ts[1] = &tab->pragmas; for (int i = 0; i < 2; ++i) { for (int j = 0; j < ts[i]->Count; j++) { sym = (Symbol*) ((*(ts[i]))[j]); if (sym->tokenKind == Symbol::litToken) { - wchar_t* name = coco_string_create(SymName(sym)); - if (ignoreCase) { - wchar_t *oldName = name; - name = coco_string_create_lower(name); - coco_string_delete(oldName); - } + const wchar_t* name = SymName(sym); + if (ignoreCase) name = coco_string_create_lower(name); // sym.name stores literals with quotes, e.g. "\"Literal\"" - fwprintf(gen, L"\tkeywords.set(L"); + fputws(_SC("\tkeywords.set(_SC("), gen); // write keyword, escape non printable characters - for (int k = 0; name[k] != L'\0'; k++) { - wchar_t c = name[k]; - fwprintf(gen, (c >= 32 && c <= 127) ? L"%lc" : L"\\x%04x", c); + for (int k = 0; name[k] != _SC('\0'); k++) { + int c = name[k]; + fwprintf(gen, (c >= 32 && c <= 127) ? _SC("%") _CHFMT : _SC("\\x%04x"), c); } - fwprintf(gen, L", %d);\n", sym->n); + fwprintf(gen, _SC("), %d);\n"), sym->n); - coco_string_delete(name); + if (ignoreCase) coco_string_delete((wchar_t*&)name); } } } @@ -635,9 +672,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"namespace %ls {\n", curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("namespace %.*") _SFMT _SC(" {\n"), curLen, nsName+startPos); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; @@ -649,7 +684,7 @@ int DFA::GenNamespaceOpen(const wchar_t *nsName) { void DFA::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, L"} // namespace\n"); + fputws(_SC("} // namespace\n"), gen); } } @@ -669,59 +704,74 @@ void DFA::CheckLabels() { } } -void DFA::WriteState(State *state) { +/* TODO better interface for CopySourcePart */ +void DFA::CopySourcePart (const Position *pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser->pgen->buffer->GetPos(); // Pos is modified by CopySourcePart + FILE* prevGen = parser->pgen->gen; + parser->pgen->gen = gen; + parser->pgen->CopySourcePart(pos, 0); + parser->pgen->gen = prevGen; + parser->pgen->buffer->SetPos(oldPos); +} + +void DFA::WriteState(const State *state) { Symbol *endOf = state->endOf; - fwprintf(gen, L"\t\tcase %d:\n", state->nr); + fwprintf(gen, _SC("\t\tcase %d:\n"), state->nr); if (existLabel[state->nr]) - fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr); + fwprintf(gen, _SC("\t\t\tcase_%d:\n"), state->nr); if (endOf != NULL && state->firstAction != NULL) { - fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d;\n", endOf->n); + fwprintf(gen, _SC("\t\t\trecEnd = pos; recKind = %d /* %") _SFMT _SC(" */;\n"), endOf->n, endOf->name); } bool ctxEnd = state->ctx; + wchar_t_20 fmt; for (Action *action = state->firstAction; action != NULL; action = action->next) { - if (action == state->firstAction) fwprintf(gen, L"\t\t\tif ("); - else fwprintf(gen, L"\t\t\telse if ("); - if (action->typ == Node::chr) { - wchar_t* res = ChCond((wchar_t)action->sym); - fwprintf(gen, L"%ls", res); - delete [] res; + if (action == state->firstAction) fputws(_SC("\t\t\tif ("), gen); + else fputws(_SC("\t\t\telse if ("), gen); + if (action->typ == NodeType::chr) { + wchar_t* res = DFAChCond(action->sym, fmt); + fwprintf(gen, _SC("%") _SFMT, res); } else PutRange(tab->CharClassSet(action->sym)); - fwprintf(gen, L") {"); + fputws(_SC(") {"), gen); - if (action->tc == Node::contextTrans) { - fwprintf(gen, L"apx++; "); ctxEnd = false; + if (action->tc == TransitionCode::contextTrans) { + fputws(_SC("apx++; "), gen); ctxEnd = false; } else if (state->ctx) - fwprintf(gen, L"apx = 0; "); - fwprintf(gen, L"AddCh(); goto case_%d;", action->target->state->nr); - fwprintf(gen, L"}\n"); + fputws(_SC("apx = 0; "), gen); + fwprintf(gen, _SC("AddCh(); goto case_%d;}\n"), action->target->state->nr); } if (state->firstAction == NULL) - fwprintf(gen, L"\t\t\t{"); + fputws(_SC("\t\t\t{"), gen); else - fwprintf(gen, L"\t\t\telse {"); + fputws(_SC("\t\t\telse {"), gen); if (ctxEnd) { // final context state: cut appendix - fwprintf(gen, L"\n"); - fwprintf(gen, L"\t\t\t\ttlen -= apx;\n"); - fwprintf(gen, L"\t\t\t\tSetScannerBehindT();"); - - fwprintf(gen, L"\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n"); - fwprintf(gen, L"\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n"); - fwprintf(gen, L"\t\t\t\t"); + fwprintf(gen, _SC("%s"), + "\n" + "\t\t\t\ttlen -= apx;\n" + "\t\t\t\tSetScannerBehindT();" + "\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n" + "\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n" + "\t\t\t\t"); } if (endOf == NULL) { - fwprintf(gen, L"goto case_0;}\n"); + fputws(_SC("goto case_0;}\n"), gen); } else { - fwprintf(gen, L"t->kind = %d; ", endOf->n); + fwprintf(gen, _SC("t->kind = %d /* %") _SFMT _SC(" */; "), endOf->n, endOf->name); + if(endOf->semPos && endOf->typ == NodeType::t) { + fputws(_SC(" {"), gen); + CopySourcePart(endOf->semPos, 0); + fputws(_SC("}"), gen); + } if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { - fwprintf(gen, L"wchar_t *literal = coco_string_create_lower(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, true); break;}\n"); } else { - fwprintf(gen, L"wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); + fwprintf(gen, _SC("%s"), "t->kind = keywords.get(tval, tlen, t->kind, false); break;}\n"); } } else { - fwprintf(gen, L"break;}\n"); + fputws(_SC(" break;}\n"), gen); } } } @@ -730,46 +780,46 @@ void DFA::WriteStartTab() { bool firstRange = true; for (Action *action = firstState->firstAction; action != NULL; action = action->next) { int targetState = action->target->state->nr; - if (action->typ == Node::chr) { - fwprintf(gen, L"\tstart.set(%d, %d);\n", action->sym, targetState); + if (action->typ == NodeType::chr) { + fwprintf(gen, _SC("\tstart.set(%d, %d);\n"), action->sym, targetState); } else { CharSet *s = tab->CharClassSet(action->sym); for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (firstRange) { firstRange = false; - fwprintf(gen, L"\tint i;\n"); + fputws(_SC("\tint i;\n"), gen); } - fwprintf(gen, L"\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n", r->from, r->to, targetState); + fwprintf(gen, _SC("\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n"), r->from, r->to, targetState); } } } - fwprintf(gen, L"\t\tstart.set(Buffer::EoF, -1);\n"); + fwprintf(gen, _SC("%s"), "\t\tstart.set(Buffer::EoF, -1);\n"); } void DFA::WriteScanner() { - Generator g = Generator(tab, errors); - fram = g.OpenFrame(L"Scanner.frame"); - gen = g.OpenGen(L"Scanner.h"); + Generator g(tab, errors); + fram = g.OpenFrame(_SC("Scanner.frame")); + gen = g.OpenGen(_SC("Scanner.h")); if (dirtyDFA) MakeDeterministic(); // Header g.GenCopyright(); - g.SkipFramePart(L"-->begin"); - - g.CopyFramePart(L"-->prefix"); + g.SkipFramePart(_SC("-->begin")); + + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->namespace_open"); + g.CopyFramePart(_SC("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->casing0"); + g.CopyFramePart(_SC("-->casing0")); if (ignoreCase) { - fwprintf(gen, L"\twchar_t valCh; // current input character (for token.val)\n"); + fwprintf(gen, _SC("%s"), "\twchar_t valCh; // current input character (for token.val)\n"); } - g.CopyFramePart(L"-->commentsheader"); + g.CopyFramePart(_SC("-->commentsheader")); Comment *com = firstComment; int cmdIdx = 0; while (com != NULL) { @@ -777,63 +827,65 @@ void DFA::WriteScanner() { com = com->next; cmdIdx++; } - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(L"-->implementation"); + g.CopyFramePart(_SC("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(L"Scanner.cpp"); + gen = g.OpenGen(_SC("Scanner.cpp")); g.GenCopyright(); - g.SkipFramePart(L"-->begin"); - g.CopyFramePart(L"-->namespace_open"); + g.SkipFramePart(_SC("-->begin")); + g.CopyFramePart(_SC("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->declarations"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count - 1); - fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n); + g.CopyFramePart(_SC("-->declarations")); + fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count - 1); + fwprintf(gen, _SC("\tnoSym = %d;\n"), tab->noSym->n); WriteStartTab(); GenLiterals(); - g.CopyFramePart(L"-->initialization"); - g.CopyFramePart(L"-->casing1"); + g.CopyFramePart(_SC("-->initialization")); + g.CopyFramePart(_SC("-->casing1")); if (ignoreCase) { - fwprintf(gen, L"\t\tvalCh = ch;\n"); - fwprintf(gen, L"\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); + fwprintf(gen, _SC("%s"), + "\t\tvalCh = ch;\n" + "\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); } - g.CopyFramePart(L"-->casing2"); - fwprintf(gen, L"\t\ttval[tlen++] = "); - if (ignoreCase) fwprintf(gen, L"valCh;"); else fwprintf(gen, L"ch;"); + g.CopyFramePart(_SC("-->casing2")); + fputws(_SC("\t\ttval[tlen++] = "), gen); + if (ignoreCase) fputws(_SC("valCh;"), gen); else fputws(_SC("ch;"), gen); - g.CopyFramePart(L"-->comments"); + g.CopyFramePart(_SC("-->comments")); com = firstComment; cmdIdx = 0; while (com != NULL) { GenComment(com, cmdIdx); com = com->next; cmdIdx++; } - g.CopyFramePart(L"-->scan1"); - fwprintf(gen, L"\t\t\t"); - if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, L"false"); } + g.CopyFramePart(_SC("-->scan1")); + fputws(_SC("\t\t\t"), gen); + if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fputws(_SC("false"), gen); } - g.CopyFramePart(L"-->scan2"); + g.CopyFramePart(_SC("-->scan2")); if (firstComment != NULL) { - fwprintf(gen, L"\tif ("); + fputws(_SC("\t\tif ("), gen); com = firstComment; cmdIdx = 0; + wchar_t_20 fmt; while (com != NULL) { - wchar_t* res = ChCond(com->start[0]); - fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx); - delete [] res; + wchar_t* res = DFAChCond(com->start[0], fmt); + fwprintf(gen, _SC("(%") _SFMT _SC(" && Comment%d())"), res, cmdIdx); if (com->next != NULL) { - fwprintf(gen, L" || "); + fputws(_SC(" || "), gen); } com = com->next; cmdIdx++; } - fwprintf(gen, L") return NextToken();"); + fputws(_SC(") continue;"), gen); } - if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */ - g.CopyFramePart(L"-->scan3"); + g.CopyFramePart(_SC("-->scan22")); + if (hasCtxMoves) { fputws(_SC("\n\tint apx = 0;"), gen); } /* pdt */ + g.CopyFramePart(_SC("-->scan3")); /* CSB 02-10-05 check the Labels */ existLabel = new bool[lastStateNr+1]; @@ -842,7 +894,7 @@ void DFA::WriteScanner() { WriteState(state); delete [] existLabel; - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); @@ -862,4 +914,10 @@ DFA::DFA(Parser *parser) { hasCtxMoves = false; } +DFA::~DFA() { + delete firstState; + delete firstComment; + delete firstMelted; +} + }; // namespace diff --git a/src/DFA.h b/src/DFA.h index 57ed846..b59e3d4 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -30,7 +30,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_DFA_H__) #define COCO_DFA_H__ -#include +#include "Scanner.h" #include "Action.h" #include "Comment.h" #include "State.h" @@ -72,25 +72,23 @@ class DFA Comment *firstComment; // list of comments //---------- Output primitives - wchar_t* Ch(wchar_t ch); - wchar_t* ChCond(wchar_t ch); void PutRange(CharSet *s); //---------- State handling State* NewState(); void NewTransition(State *from, State *to, int typ, int sym, int tc); void CombineShifts(); - void FindUsedStates(State *state, BitArray *used); + void FindUsedStates(const State *state, BitArray *used); void DeleteRedundantStates(); - State* TheState(Node *p); - void Step(State *from, Node *p, BitArray *stepped); + State* TheState(const Node *p); + void Step(State *from, const Node *p, BitArray *stepped); void NumberNodes(Node *p, State *state, bool renumIter); - void FindTrans (Node *p, bool start, BitArray *marked); + void FindTrans (const Node *p, bool start, BitArray *marked); void ConvertToStates(Node *p, Symbol *sym); // match string against current automaton; store it either as a fixedToken or as a litToken void MatchLiteral(wchar_t* s, Symbol *sym); - void SplitActions(State *state, Action *a, Action *b); - bool Overlap(Action *a, Action *b); + bool SplitActions(State *state, Action *a, Action *b); + bool Overlap(const Action *a, const Action *b); bool MakeUnique(State *state); // return true if actions were split void MeltStates(State *state); void FindCtxStates(); @@ -99,32 +97,35 @@ class DFA void CheckLabels(); //---------------------------- actions -------------------------------- - Action* FindAction(State *state, wchar_t ch); - void GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); + Action* FindAction(const State *state, int ch); + void GetTargetStates(const Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); //------------------------- melted states ------------------------------ Melted* NewMelted(BitArray *set, State *state); - BitArray* MeltedSet(int nr); - Melted* StateWithSet(BitArray *s); + const BitArray* MeltedSet(int nr); + Melted* StateWithSet(const BitArray *s); //------------------------ comments -------------------------------- - wchar_t* CommentStr(Node *p); - void NewComment(Node *from, Node *to, bool nested); + wchar_t* CommentStr(const Node *p); + void NewComment(const Node *from, const Node *to, bool nested); //------------------------ scanner generation ---------------------- - void GenComBody(Comment *com); - void GenCommentHeader(Comment *com, int i); - void GenComment(Comment *com, int i); + void GenCommentIndented(int n, const wchar_t *s); + void GenComBody(const Comment *com); + void GenCommentHeader(const Comment *com, int i); + void GenComment(const Comment *com, int i); void CopyFramePart(const wchar_t* stop); - wchar_t* SymName(Symbol *sym); // real name value is stored in Tab.literals + const wchar_t* SymName(const Symbol *sym); // real name value is stored in Tab.literals void GenLiterals (); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); - void WriteState(State *state); + void CopySourcePart (const Position *pos, int indent); + void WriteState(const State *state); void WriteStartTab(); void OpenGen(const wchar_t* genName, bool backUp); /* pdt */ void WriteScanner(); DFA(Parser *parser); + ~DFA(); }; }; // namespace diff --git a/src/Generator.cpp b/src/Generator.cpp index f742b03..f937bd1 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -27,7 +27,6 @@ Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include "Generator.h" -#include "Scanner.h" namespace Coco { @@ -39,9 +38,14 @@ namespace Coco { frameFile = NULL; } + Generator::~Generator() { + coco_string_delete(frameFile); + if(fram) fclose(fram); + } + FILE* Generator::OpenFrame(const wchar_t* frame) { if (coco_string_length(tab->frameDir) != 0) { - frameFile = coco_string_create_append(tab->frameDir, L"/"); + frameFile = coco_string_create_append(tab->frameDir, _SC("/")); coco_string_merge(frameFile, frame); char *chFrameFile = coco_string_create_char(frameFile); fram = fopen(chFrameFile, "r"); @@ -55,7 +59,7 @@ namespace Coco { delete [] chFrameFile; } if (fram == NULL) { - wchar_t *message = coco_string_create_append(L"-- Cannot find : ", frame); + wchar_t *message = coco_string_create_append(_SC("-- Cannot find : "), frame); errors->Exception(message); delete [] message; } @@ -70,14 +74,14 @@ namespace Coco { if ((gen = fopen(chFn, "r")) != NULL) { fclose(gen); - wchar_t *oldName = coco_string_create_append(fn, L".old"); + wchar_t *oldName = coco_string_create_append(fn, _SC(".old")); char *chOldName = coco_string_create_char(oldName); remove(chOldName); rename(chFn, chOldName); // copy with overwrite coco_string_delete(chOldName); coco_string_delete(oldName); } if ((gen = fopen(chFn, "w")) == NULL) { - wchar_t *message = coco_string_create_append(L"-- Cannot generate : ", genName); + wchar_t *message = coco_string_create_append(_SC("-- Cannot generate : "), genName); errors->Exception(message); delete [] message; } @@ -92,14 +96,14 @@ namespace Coco { FILE *file = NULL; if (coco_string_length(tab->frameDir) != 0) { - wchar_t *copyFr = coco_string_create_append(tab->frameDir, L"/Copyright.frame"); + wchar_t *copyFr = coco_string_create_append(tab->frameDir, _SC("/Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; delete [] chCopyFr; } if (file == NULL) { - wchar_t *copyFr = coco_string_create_append(tab->srcDir, L"Copyright.frame"); + wchar_t *copyFr = coco_string_create_append(tab->srcDir, _SC("Copyright.frame")); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; @@ -128,9 +132,7 @@ namespace Coco { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"%ls_", curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("%.*") _SFMT _SC("_"), curLen, nsName+startPos); startPos = startPos + curLen + 1; } while (startPos < len); } @@ -153,30 +155,28 @@ namespace Coco { endOfStopString = coco_string_length(stop)-1; } - fwscanf(fram, L"%lc", &ch); // fram.ReadByte(); + fwscanf(fram, _SC("%") _CHFMT, &ch); // fram.ReadByte(); while (!feof(fram)) { // ch != EOF if (stop != NULL && ch == startCh) { int i = 0; do { if (i == endOfStopString) return; // stop[0..i] found - fwscanf(fram, L"%lc", &ch); i++; + fwscanf(fram, _SC("%") _CHFMT, &ch); i++; } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { - wchar_t *subStop = coco_string_create(stop, 0, i); - fwprintf(gen, L"%ls", subStop); - coco_string_delete(subStop); + fwprintf(gen, _SC("%.*") _SFMT, i, stop); } } else { - if (generateOutput) { fwprintf(gen, L"%lc", ch); } - fwscanf(fram, L"%lc", &ch); + if (generateOutput) { fwprintf(gen, _SC("%") _CHFMT, ch); } + fwscanf(fram, _SC("%") _CHFMT, &ch); } } if (stop != NULL) { - wchar_t *message = coco_string_create_append(L" -- Incomplete or corrupt frame file: ", frameFile); + wchar_t *message = coco_string_create_append(_SC(" -- Incomplete or corrupt frame file: "), frameFile); errors->Exception(message); delete [] message; } } -} \ No newline at end of file +} diff --git a/src/Generator.h b/src/Generator.h index cb7e1d1..edab4a6 100644 --- a/src/Generator.h +++ b/src/Generator.h @@ -38,6 +38,7 @@ namespace Coco { class Generator { public: Generator(Tab *tab, Errors *errors); + ~Generator(); FILE* OpenFrame(const wchar_t* frame); FILE* OpenGen(const wchar_t *genName); void GenCopyright(); diff --git a/src/HashTable.cpp b/src/HashTable.cpp index da0a92a..5e7b551 100644 --- a/src/HashTable.cpp +++ b/src/HashTable.cpp @@ -29,7 +29,6 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include "HashTable.h" -#include "Scanner.h" namespace Coco { @@ -45,6 +44,7 @@ HashTable::~HashTable() { while (o != NULL) { Obj *del = o; o = o->next; + coco_string_delete(del->key); delete del; } } @@ -52,7 +52,7 @@ HashTable::~HashTable() { data = NULL; }; -HashTable::Obj* HashTable::Get0(wchar_t *key) const { +HashTable::Obj* HashTable::Get0(const wchar_t *key) const { int k = coco_string_hash(key) % size; HashTable::Obj *o = data[k]; while (o != NULL && !coco_string_equal(key, o->key)) { @@ -61,13 +61,13 @@ HashTable::Obj* HashTable::Get0(wchar_t *key) const { return o; } -void HashTable::Set(wchar_t *key, void *val) { +void HashTable::Set(const wchar_t *key, void *val) { HashTable::Obj *o = Get0(key); if (o == NULL) { // new entry int k = coco_string_hash(key) % size; o = new Obj(); - o->key = key; + o->key = coco_string_create(key); o->val = val; o->next = data[k]; data[k] = o; @@ -77,7 +77,7 @@ void HashTable::Set(wchar_t *key, void *val) { } } -void* HashTable::Get(wchar_t *key) const { +void* HashTable::Get(const wchar_t *key) const { HashTable::Obj *o = Get0(key); if (o != NULL) { return o->val; @@ -95,6 +95,11 @@ HashTable::Iter::Iter(HashTable *ht) { this->cur = NULL; } +void HashTable::Iter::Reset() { + this->pos = 0; + this->cur = NULL; +} + bool HashTable::Iter::HasNext() { while (cur == NULL && pos < ht->size) { cur = ht->data[pos]; diff --git a/src/HashTable.h b/src/HashTable.h index 487f1b9..8d4e049 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -29,7 +29,7 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_HASHTABLE_H__) #define COCO_HASHTABLE_H__ -#include +#include "Scanner.h" namespace Coco { @@ -41,7 +41,9 @@ class DictionaryEntry { class Iterator { public: + virtual ~Iterator() {}; virtual bool HasNext() = 0; + virtual void Reset() = 0; virtual DictionaryEntry* Next() = 0; }; @@ -51,9 +53,9 @@ class HashTable HashTable(int size = 128); virtual ~HashTable(); - virtual void Set(wchar_t *key, void *value); - virtual void* Get(wchar_t *key) const; - inline void* operator[](wchar_t *key) const { return Get(key); }; + virtual void Set(const wchar_t *key, void *value); + virtual void* Get(const wchar_t *key) const; + inline void* operator[](const wchar_t *key) const { return Get(key); }; virtual Iterator* GetIterator(); private: @@ -71,10 +73,11 @@ class HashTable public: Iter(HashTable *ht); virtual bool HasNext(); + virtual void Reset(); virtual DictionaryEntry* Next(); }; - Obj* Get0(wchar_t *key) const; + Obj* Get0(const wchar_t *key) const; Obj **data; int size; }; diff --git a/src/Makefile b/src/Makefile index b6fe18e..ed41589 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,6 @@ all: - g++ *.cpp -o Coco $(CFLAGS) + g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o Coco $(CFLAGS) + #x86_64-w64-mingw32-g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o Coco.exe $(CFLAGS) clean: rm -f Coco diff --git a/src/Melted.cpp b/src/Melted.cpp index f63c9f0..7e71f93 100644 --- a/src/Melted.cpp +++ b/src/Melted.cpp @@ -27,13 +27,19 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Melted.h" +#include "BitArray.h" namespace Coco { class BitArray; Melted::Melted(BitArray *set, State *state) { - this->set = set; this->state = state; + this->set = set; this->state = state; this->next = NULL; +} + +Melted::~Melted() { + delete set; + delete next; } }; // namespace diff --git a/src/Melted.h b/src/Melted.h index 960faa2..08137e6 100644 --- a/src/Melted.h +++ b/src/Melted.h @@ -39,11 +39,12 @@ class BitArray; class Melted // info about melted states { public: - BitArray *set; // set of old states + const BitArray *set; // set of old states State *state; // new state Melted *next; Melted(BitArray *set, State *state); + ~Melted(); }; }; // namespace diff --git a/src/Node.cpp b/src/Node.cpp index 4fd8adb..9b7c261 100644 --- a/src/Node.cpp +++ b/src/Node.cpp @@ -5,52 +5,34 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Node.h" +#include "BitArray.h" namespace Coco { -// constants for node kinds -int Node::t = 1; // terminal symbol -int Node::pr = 2; // pragma -int Node::nt = 3; // nonterminal symbol -int Node::clas = 4; // character class -int Node::chr = 5; // character -int Node::wt = 6; // weak terminal symbol -int Node::any = 7; // -int Node::eps = 8; // empty -int Node::sync = 9; // synchronization symbol -int Node::sem = 10; // semantic action: (. .) -int Node::alt = 11; // alternative: | -int Node::iter = 12; // iteration: { } -int Node::opt = 13; // option: [ ] -int Node::rslv = 14; // resolver expr -int Node::normalTrans = 0; // transition codes -int Node::contextTrans = 1; - - -Node::Node(int typ, Symbol *sym, int line) { +Node::Node(NodeType typ, Symbol *sym, int line, int col) { this->n = 0; this->next = NULL; this->down = NULL; @@ -61,9 +43,14 @@ Node::Node(int typ, Symbol *sym, int line) { this->set = NULL; this->pos = NULL; this->state = NULL; - this->state = 0; + this->rmin = this->rmax = 0; + + this->typ = typ; this->sym = sym; this->line = line; this->col = col; +} - this->typ = typ; this->sym = sym; this->line = line; +Node::~Node() { + delete pos; + delete set; } }; // namespace diff --git a/src/Node.h b/src/Node.h index 882da38..bfb7bd9 100644 --- a/src/Node.h +++ b/src/Node.h @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -31,8 +31,9 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include "Position.h" -#include "State.h" #include "Scanner.h" +#include "State.h" +#include "NodeSymbolKind.h" namespace Coco { @@ -41,45 +42,30 @@ class BitArray; class Node { public: - // constants for node kinds - static int t; // terminal symbol - static int pr; // pragma - static int nt; // nonterminal symbol - static int clas; // character class - static int chr; // character - static int wt; // weak terminal symbol - static int any; // - static int eps; // empty - static int sync; // synchronization symbol - static int sem; // semantic action: (. .) - static int alt; // alternative: | - static int iter; // iteration: { } - static int opt; // option: [ ] - static int rslv; // resolver expr - - static int normalTrans; // transition codes - static int contextTrans; int n; // node number - int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv + NodeType typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv Node *next; // to successor node Node *down; // alt: to next alternative Node *sub; // alt, iter, opt: to first node of substructure - bool up; // true: "next" leads to successor in enclosing structure + bool up; // true: "next" leads to successor in enclosing structure Symbol *sym; // nt, t, wt: symbol represented by this node int val; // chr: ordinal character value - // clas: index of character class + // clas: index of character class int code; // chr, clas: transition code BitArray *set; // any, sync: the set represented by this node Position *pos; // nt, t, wt: pos of actual attributes - // sem: pos of semantic action in source text - // rslv: pos of resolver in source text + // sem: pos of semantic action in source text + // rslv: pos of resolver in source text int line; // source text line number of item in this node + int col; // source text line column number of item in this node State *state; // DFA state corresponding to this node - // (only used in DFA.ConvertToStates) + // (only used in DFA.ConvertToStates) + int rmin, rmax; // repetition quantifiers - Node(int typ, Symbol *sym, int line); -}; + Node(NodeType typ, Symbol *sym, int line, int col); + ~Node(); +}; }; // namespace diff --git a/src/NodeSymbolKind.h b/src/NodeSymbolKind.h new file mode 100644 index 0000000..e894052 --- /dev/null +++ b/src/NodeSymbolKind.h @@ -0,0 +1,42 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +/* + * File: NodeSymbolKind.h + * Author: mingo + * + * Created on July 14, 2022, 8:03 AM + */ + +#ifndef NODESYMBOLKIND_H +#define NODESYMBOLKIND_H + +// constants for node/symbol kinds +enum NodeType { + id, + t, // terminal symbol + pr, // pragma + nt, // nonterminal symbol + clas, // character class + chr, // character + wt, // weak terminal symbol + any, // + eps, // empty + nt_sync, // synchronization symbol + sem, // semantic action: (. .) + alt, // alternative: | + iter, // iteration: { } + opt, // option: [ ] + rslv, // resolver expr +}; +enum TransitionCode { + normalTrans, // transition codes + contextTrans, +}; + + +#endif /* NODESYMBOLKIND_H */ + diff --git a/src/Parser.cpp b/src/Parser.cpp index ce0f16e..4d04692 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -27,14 +27,37 @@ Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ -#include -#include "Parser.h" #include "Scanner.h" +#include "Parser.h" namespace Coco { +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + void Parser::SynErr(int n) { if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; @@ -71,12 +94,27 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { + static const int tBase[44] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -84,7 +122,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -95,10 +133,13 @@ bool Parser::WeakSeparator(int n, int syFol, int repFol) { } } -void Parser::Coco() { +void Parser::Coco_NT() { Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Coco; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Coco"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif int beg = la->pos; int line = la->line; - while (StartOf(1)) { + while (StartOf(1 /* any */)) { Get(); } if (la->pos != beg) { @@ -106,313 +147,527 @@ void Parser::Coco() { } Expect(6 /* "COMPILER" */); - genScanner = true; +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + genScanner = true; tab->ignored = new CharSet(); Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif gramName = coco_string_create(t->val); beg = la->pos; line = la->line; - while (StartOf(2)) { + while (StartOf(2 /* any */)) { Get(); } tab->semDeclPos = new Position(beg, la->pos, 0, line); - if (la->kind == 7 /* "IGNORECASE" */) { + if (IsKind(la, 7 /* "IGNORECASE" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif dfa->ignoreCase = true; } - if (la->kind == 8 /* "CHARACTERS" */) { + if (IsKind(la, 8 /* "TERMINALS" */)) { Get(); - while (la->kind == _ident) { - SetDecl(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, _ident)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + sym = tab->FindSym(t->val); + if (sym != NULL) SemErr(_SC("name declared twice")); + else { + sym = tab->NewSym(NodeType::t, t->val, t->line, t->col); + sym->tokenKind = Symbol::fixedToken; + } } } - if (la->kind == 9 /* "TOKENS" */) { + if (IsKind(la, 9 /* "CHARACTERS" */)) { Get(); - while (la->kind == _ident || la->kind == _string || la->kind == _char) { - TokenDecl(Node::t); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, _ident)) { + SetDecl_NT(); } } - if (la->kind == 10 /* "PRAGMAS" */) { + if (IsKind(la, 10 /* "TOKENS" */)) { Get(); - while (la->kind == _ident || la->kind == _string || la->kind == _char) { - TokenDecl(Node::pr); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { + TokenDecl_NT(NodeType::t); } } - while (la->kind == 11 /* "COMMENTS" */) { + if (IsKind(la, 11 /* "PRAGMAS" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { + TokenDecl_NT(NodeType::pr); + } + } + while (IsKind(la, 12 /* "COMMENTS" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif bool nested = false; - Expect(12 /* "FROM" */); - TokenExpr(g1); - Expect(13 /* "TO" */); - TokenExpr(g2); - if (la->kind == 14 /* "NESTED" */) { + Expect(13 /* "FROM" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g1); + Expect(14 /* "TO" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g2); + if (IsKind(la, 15 /* "NESTED" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif nested = true; } - dfa->NewComment(g1->l, g2->l, nested); + dfa->NewComment(g1->l, g2->l, nested); delete g1; delete g2; } - while (la->kind == 15 /* "IGNORE" */) { + while (IsKind(la, 16 /* "IGNORE" */)) { Get(); - Set(s); - tab->ignored->Or(s); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Set_NT(s); + tab->ignored->Or(s); delete s; } - while (!(la->kind == _EOF || la->kind == 16 /* "PRODUCTIONS" */)) {SynErr(42); Get();} - Expect(16 /* "PRODUCTIONS" */); + while (!(IsKind(la, _EOF) || IsKind(la, 17 /* "PRODUCTIONS" */))) {SynErr(44); Get();} + Expect(17 /* "PRODUCTIONS" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); - while (la->kind == _ident) { + while (IsKind(la, _ident)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif sym = tab->FindSym(t->val); bool undef = (sym == NULL); - if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); + if (undef) sym = tab->NewSym(NodeType::nt, t->val, t->line, t->col); else { - if (sym->typ == Node::nt) { - if (sym->graph != NULL) SemErr(L"name declared twice"); - } else SemErr(L"this symbol kind not allowed on left side of production"); + if (sym->typ == NodeType::nt) { + if (sym->graph != NULL) SemErr(_SC("name declared twice")); + } else SemErr(_SC("this symbol kind not allowed on left side of production")); sym->line = t->line; + sym->col = t->col; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; - if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { - AttrDecl(sym); + if (IsKind(la, 26 /* "<" */) || IsKind(la, 28 /* "<." */)) { + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); - if (la->kind == 39 /* "(." */) { - SemText(sym->semPos); + if (IsKind(la, 41 /* "(." */)) { + SemText_NT(sym->semPos); } - ExpectWeak(17 /* "=" */, 3); - Expression(g); + ExpectWeak(18 /* "=" */, 3); + Expression_NT(g); sym->graph = g->l; tab->Finish(g); + delete g; - ExpectWeak(18 /* "." */, 4); + ExpectWeak(19 /* "." */, 4); } - Expect(19 /* "END" */); + Expect(20 /* "END" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (!coco_string_equal(gramName, t->val)) - SemErr(L"name does not match grammar name"); + SemErr(_SC("name does not match grammar name")); tab->gramSy = tab->FindSym(gramName); + coco_string_delete(gramName); if (tab->gramSy == NULL) - SemErr(L"missing production for grammar name"); + SemErr(_SC("missing production for grammar name")); else { sym = tab->gramSy; if (sym->attrPos != NULL) - SemErr(L"grammar symbol must not have attributes"); + SemErr(_SC("grammar symbol must not have attributes")); } - tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number + tab->noSym = tab->NewSym(NodeType::t, _SC("???"), 0, 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors->count == 0) { - wprintf(L"checking\n"); + wprintf(_SC("checking\n")); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); - if (tab->GrammarOk()) { - wprintf(L"parser"); + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab->GrammarCheckAll(); + } + else doGenCode = tab->GrammarOk(); + if(tab->genRREBNF && doGenCode) { + pgen->WriteRREBNF(); + } + if (doGenCode) { + wprintf(_SC("parser")); pgen->WriteParser(); if (genScanner) { - wprintf(L" + scanner"); + wprintf(_SC(" + scanner")); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } - wprintf(L" generated\n"); + wprintf(_SC(" generated\n")); if (tab->ddt[8]) pgen->WriteStatistics(); } } if (tab->ddt[6]) tab->PrintSymbolTable(); - Expect(18 /* "." */); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif } -void Parser::SetDecl() { +void Parser::SetDecl_NT() { CharSet *s; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SetDecl, _SC("SetDecl"), la->line); +#endif Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); - if (c != NULL) SemErr(L"name declared twice"); + if (c != NULL) SemErr(_SC("name declared twice")); - Expect(17 /* "=" */); - Set(s); - if (s->Elements() == 0) SemErr(L"character set must not be empty"); + Expect(18 /* "=" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Set_NT(s); + if (s->Elements() == 0) SemErr(_SC("character set must not be empty")); tab->NewCharClass(name, s); + coco_string_delete(name); - Expect(18 /* "." */); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::TokenDecl(int typ) { - wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; - Sym(name, kind); +void Parser::TokenDecl_NT(NodeType typ) { + wchar_t* name = NULL; NodeType kind, kindInherits; Symbol *sym, *inheritsSym; Graph *g; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenDecl, _SC("TokenDecl"), la->line); +#endif + Sym_NT(name, kind); sym = tab->FindSym(name); - if (sym != NULL) SemErr(L"name declared twice"); + if (sym != NULL) SemErr(_SC("name declared twice")); else { - sym = tab->NewSym(typ, name, t->line); + sym = tab->NewSym(typ, name, t->line, t->col); sym->tokenKind = Symbol::fixedToken; } - tokenString = NULL; + coco_string_delete(name); + coco_string_delete(tokenString); - while (!(StartOf(5))) {SynErr(43); Get();} - if (la->kind == 17 /* "=" */) { + if (IsKind(la, 25 /* ":" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Sym_NT(name, kindInherits); + inheritsSym = tab->FindSym(name); + if (inheritsSym == NULL) SemErr(_SC("token can't inherit from unddeclared name")); + else if (inheritsSym == sym) SemErr(_SC("token can not inherit from itself")); + else if (inheritsSym->typ != typ) SemErr(_SC("token can't inherit from different token type")); + else sym->inherits = inheritsSym; + + } + while (!(StartOf(5 /* sync */))) {SynErr(45); Get();} + if (IsKind(la, 18 /* "=" */)) { Get(); - TokenExpr(g); - Expect(18 /* "." */); - if (kind == str) SemErr(L"a literal must not be declared with a structure"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g); + Expect(19 /* "." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + if (kind == str) SemErr(_SC("a literal must not be declared with a structure")); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string - if ((*(tab->literals))[tokenString] != NULL) - SemErr(L"token string declared twice"); - tab->literals->Set(tokenString, sym); + if (tab->literals[tokenString] != NULL) + SemErr(_SC("token string declared twice")); + tab->literals.Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } + delete g; - } else if (StartOf(6)) { + } else if (StartOf(6 /* sem */)) { if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); - } else SynErr(44); - if (la->kind == 39 /* "(." */) { - SemText(sym->semPos); - if (typ != Node::pr) SemErr(L"semantic action not allowed here"); + } else SynErr(46); + if (IsKind(la, 41 /* "(." */)) { + SemText_NT(sym->semPos); + if (typ == NodeType::t) errors->Warning(_SC("Warning semantic action on token declarations require a custom Scanner")); } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::TokenExpr(Graph* &g) { +void Parser::TokenExpr_NT(Graph* &g) { Graph *g2; - TokenTerm(g); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenExpr, _SC("TokenExpr"), la->line); +#endif + TokenTerm_NT(g); bool first = true; - while (WeakSeparator(28 /* "|" */,8,7) ) { - TokenTerm(g2); + while (WeakSeparator(30 /* "|" */,8,7) ) { + TokenTerm_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Set(CharSet* &s) { +void Parser::Set_NT(CharSet* &s) { CharSet *s2; - SimSet(s); - while (la->kind == 20 /* "+" */ || la->kind == 21 /* "-" */) { - if (la->kind == 20 /* "+" */) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Set, _SC("Set"), la->line); +#endif + SimSet_NT(s); + while (IsKind(la, 21 /* "+" */) || IsKind(la, 22 /* "-" */)) { + if (IsKind(la, 21 /* "+" */)) { Get(); - SimSet(s2); - s->Or(s2); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SimSet_NT(s2); + s->Or(s2); delete s2; } else { Get(); - SimSet(s2); - s->Subtract(s2); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SimSet_NT(s2); + s->Subtract(s2); delete s2; } } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::AttrDecl(Symbol *sym) { - if (la->kind == 24 /* "<" */) { +void Parser::AttrDecl_NT(Symbol *sym) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_AttrDecl, _SC("AttrDecl"), la->line); +#endif + if (IsKind(la, 26 /* "<" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr(L"bad string in attributes"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("bad string in attributes")); } } - Expect(25 /* ">" */); + Expect(27 /* ">" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else if (la->kind == 26 /* "<." */) { + } else if (IsKind(la, 28 /* "<." */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr(L"bad string in attributes"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("bad string in attributes")); } } - Expect(27 /* ".>" */); + Expect(29 /* ".>" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); - } else SynErr(45); + } else SynErr(47); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::SemText(Position* &pos) { - Expect(39 /* "(." */); +void Parser::SemText_NT(Position* &pos) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SemText, _SC("SemText"), la->line); +#endif + Expect(41 /* "(." */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = t->line; - while (StartOf(13)) { - if (StartOf(14)) { + while (StartOf(13 /* alt */)) { + if (StartOf(14 /* any */)) { Get(); - } else if (la->kind == _badString) { + } else if (IsKind(la, _badString)) { Get(); - SemErr(L"bad string in semantic action"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("bad string in semantic action")); } else { Get(); - SemErr(L"missing end of previous semantic action"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("missing end of previous semantic action")); } } - Expect(40 /* ".)" */); + Expect(42 /* ".)" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif pos = new Position(beg, t->pos, col, line); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Expression(Graph* &g) { +void Parser::Expression_NT(Graph* &g) { Graph *g2; - Term(g); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Expression, _SC("Expression"), la->line); +#endif + Term_NT(g); bool first = true; - while (WeakSeparator(28 /* "|" */,16,15) ) { - Term(g2); + while (WeakSeparator(30 /* "|" */,16,15) ) { + Term_NT(g2); if (first) { tab->MakeFirstAlt(g); first = false; } - tab->MakeAlternative(g, g2); + tab->MakeAlternative(g, g2); delete g2; } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::SimSet(CharSet* &s) { +void Parser::SimSet_NT(CharSet* &s) { int n1, n2; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_SimSet, _SC("SimSet"), la->line); +#endif s = new CharSet(); - if (la->kind == _ident) { + if (IsKind(la, _ident)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif CharClass *c = tab->FindCharClass(t->val); - if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); + if (c == NULL) SemErr(_SC("undefined name")); else s->Or(c->set); - } else if (la->kind == _string) { + } else if (IsKind(la, _string)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); - wchar_t ch; - int len = coco_string_length(name); - for(int i=0; i < len; i++) { - ch = name[i]; - if (dfa->ignoreCase) { - if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() - } - s->Set(ch); - } + wchar_t ch; + int len = coco_string_length(name); + for(int i=0; i < len; i++) { + ch = name[i]; + if (dfa->ignoreCase) { + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) ch = ch - (_SC('A') - _SC('a')); // ch.ToLower() + } + s->Set(ch); + } coco_string_delete(name); - - } else if (la->kind == _char) { - Char(n1); + + } else if (IsKind(la, _char)) { + Char_NT(n1); s->Set(n1); - if (la->kind == 22 /* ".." */) { + if (IsKind(la, 23 /* ".." */)) { Get(); - Char(n2); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Char_NT(n2); for (int i = n1; i <= n2; i++) s->Set(i); } - } else if (la->kind == 23 /* "ANY" */) { + } else if (IsKind(la, 24 /* "ANY" */)) { Get(); - s = new CharSet(); s->Fill(); - } else SynErr(46); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + delete s; s = new CharSet(); s->Fill(); + } else SynErr(48); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Char(int &n) { +void Parser::Char_NT(int &n) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Char, _SC("Char"), la->line); +#endif Expect(_char); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif n = 0; wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t* name = tab->Unescape(subName); @@ -420,28 +675,43 @@ void Parser::Char(int &n) { // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; - else SemErr(L"unacceptable character value"); + else SemErr(_SC("unacceptable character value")); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Sym(wchar_t* &name, int &kind) { - name = coco_string_create(L"???"); kind = id; - if (la->kind == _ident) { +void Parser::Sym_NT(wchar_t* &name, NodeType &kind) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Sym, _SC("Sym"), la->line); +#endif + name = coco_string_create(_SC("???")); kind = id; + if (IsKind(la, _ident)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif kind = id; coco_string_delete(name); name = coco_string_create(t->val); - } else if (la->kind == _string || la->kind == _char) { - if (la->kind == _string) { + } else if (IsKind(la, _string) || IsKind(la, _char)) { + if (IsKind(la, _string)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif coco_string_delete(name); name = coco_string_create(t->val); } else { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); - coco_string_delete(name); - name = coco_string_create_append(L"\"", subName); + coco_string_delete(name); + name = coco_string_create_append(_SC("\""), subName); coco_string_delete(subName); - coco_string_merge(name, L"\""); + coco_string_merge(name, _SC("\"")); } kind = str; @@ -451,232 +721,366 @@ void Parser::Sym(wchar_t* &name, int &kind) { coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) - SemErr(L"literal tokens must not contain blanks"); - } else SynErr(47); + SemErr(_SC("literal tokens must not contain blanks")); + } else SynErr(49); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Term(Graph* &g) { +void Parser::Term_NT(Graph* &g) { Graph *g2; Node *rslv = NULL; g = NULL; - if (StartOf(17)) { - if (la->kind == 37 /* "IF" */) { - rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); - Resolver(rslv->pos); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Term, _SC("Term"), la->line); +#endif + if (StartOf(17 /* opt */)) { + if (IsKind(la, 39 /* "IF" */)) { + rslv = tab->NewNode(NodeType::rslv, (Symbol*)NULL, la->line, la->col); + Resolver_NT(rslv->pos); g = new Graph(rslv); } - Factor(g2); - if (rslv != NULL) tab->MakeSequence(g, g2); + Factor_NT(g2); + if (rslv != NULL) {tab->MakeSequence(g, g2); delete g2;} else g = g2; - while (StartOf(18)) { - Factor(g2); - tab->MakeSequence(g, g2); + while (StartOf(18 /* nt */)) { + Factor_NT(g2); + tab->MakeSequence(g, g2); delete g2; } - } else if (StartOf(19)) { - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); - } else SynErr(48); + } else if (StartOf(19 /* sem */)) { + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); + } else SynErr(50); if (g == NULL) // invalid start of Term - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Resolver(Position* &pos) { - Expect(37 /* "IF" */); - Expect(30 /* "(" */); +void Parser::Resolver_NT(Position* &pos) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Resolver, _SC("Resolver"), la->line); +#endif + Expect(39 /* "IF" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(32 /* "(" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; - Condition(); + Condition_NT(); pos = new Position(beg, t->pos, col, line); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Factor(Graph* &g) { - wchar_t* name = NULL; int kind; Position *pos; bool weak = false; +void Parser::Factor_NT(Graph* &g) { + wchar_t* name = NULL; NodeType kind; Position *pos; bool weak = false; g = NULL; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Factor, _SC("Factor"), la->line); +#endif switch (la->kind) { - case _ident: case _string: case _char: case 29 /* "WEAK" */: { - if (la->kind == 29 /* "WEAK" */) { + case _ident: case _string: case _char: case 31 /* "WEAK" */: { + if (IsKind(la, 31 /* "WEAK" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif weak = true; } - Sym(name, kind); + Sym_NT(name, kind); Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) - sym = (Symbol*)((*(tab->literals))[name]); + sym = (Symbol*)tab->literals[name]; bool undef = (sym == NULL); if (undef) { if (kind == id) - sym = tab->NewSym(Node::nt, name, 0); // forward nt - else if (genScanner) { - sym = tab->NewSym(Node::t, name, t->line); + sym = tab->NewSym(NodeType::nt, name, t->line, t->col); // forward nt + else if (genScanner) { + sym = tab->NewSym(NodeType::t, name, t->line, t->col); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production - SemErr(L"undefined string in production"); + SemErr(_SC("undefined string in production")); sym = tab->eofSy; // dummy } } - int typ = sym->typ; - if (typ != Node::t && typ != Node::nt) - SemErr(L"this symbol kind is not allowed in a production"); + coco_string_delete(name); + NodeType typ = sym->typ; + if (typ != NodeType::t && typ != NodeType::nt) + SemErr(_SC("this symbol kind is not allowed in a production")); if (weak) { - if (typ == Node::t) typ = Node::wt; - else SemErr(L"only terminals may be weak"); + if (typ == NodeType::t) typ = NodeType::wt; + else SemErr(_SC("only terminals may be weak")); } - Node *p = tab->NewNode(typ, sym, t->line); + Node *p = tab->NewNode(typ, sym, t->line, t->col); g = new Graph(p); - if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { - Attribs(p); - if (kind != id) SemErr(L"a literal must not have attributes"); + if (IsKind(la, 26 /* "<" */) || IsKind(la, 28 /* "<." */)) { + Attribs_NT(p); + if (kind != id) SemErr(_SC("a literal must not have attributes")); } if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) - SemErr(L"attribute mismatch between declaration and use of this symbol"); + SemErr(_SC("attribute mismatch between declaration and use of this symbol")); break; } - case 30 /* "(" */: { + case 32 /* "(" */: { Get(); - Expression(g); - Expect(31 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expression_NT(g); + Expect(33 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif break; } - case 32 /* "[" */: { + case 34 /* "[" */: { Get(); - Expression(g); - Expect(33 /* "]" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expression_NT(g); + Expect(35 /* "]" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeOption(g); break; } - case 34 /* "{" */: { + case 36 /* "{" */: { Get(); - Expression(g); - Expect(35 /* "}" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expression_NT(g); + Expect(37 /* "}" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif tab->MakeIteration(g); break; } - case 39 /* "(." */: { - SemText(pos); - Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); + case 41 /* "(." */: { + SemText_NT(pos); + Node *p = tab->NewNode(NodeType::sem, (Symbol*)NULL, t->line, t->col); p->pos = pos; g = new Graph(p); break; } - case 23 /* "ANY" */: { + case 24 /* "ANY" */: { Get(); - Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Node *p = tab->NewNode(NodeType::any, (Symbol*)NULL, t->line, t->col); // p.set is set in tab->SetupAnys g = new Graph(p); break; } - case 36 /* "SYNC" */: { + case 38 /* "SYNC" */: { Get(); - Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Node *p = tab->NewNode(NodeType::nt_sync, (Symbol*)NULL, t->line, t->col); g = new Graph(p); break; } - default: SynErr(49); break; + default: SynErr(51); break; } if (g == NULL) // invalid start of Factor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Attribs(Node *p) { - if (la->kind == 24 /* "<" */) { +void Parser::Attribs_NT(Node *p) { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Attribs, _SC("Attribs"), la->line); +#endif + if (IsKind(la, 26 /* "<" */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr(L"bad string in attributes"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("bad string in attributes")); } } - Expect(25 /* ">" */); + Expect(27 /* ">" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else if (la->kind == 26 /* "<." */) { + } else if (IsKind(la, 28 /* "<." */)) { Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif int beg = la->pos; int col = la->col; int line = la->line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr(L"bad string in attributes"); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + SemErr(_SC("bad string in attributes")); } } - Expect(27 /* ".>" */); + Expect(29 /* ".>" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); - } else SynErr(50); + } else SynErr(52); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::Condition() { - while (StartOf(20)) { - if (la->kind == 30 /* "(" */) { +void Parser::Condition_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Condition, _SC("Condition"), la->line); +#endif + while (StartOf(20 /* alt */)) { + if (IsKind(la, 32 /* "(" */)) { Get(); - Condition(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Condition_NT(); } else { Get(); } } - Expect(31 /* ")" */); + Expect(33 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::TokenTerm(Graph* &g) { +void Parser::TokenTerm_NT(Graph* &g) { Graph *g2; - TokenFactor(g); - while (StartOf(8)) { - TokenFactor(g2); - tab->MakeSequence(g, g2); +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenTerm, _SC("TokenTerm"), la->line); +#endif + TokenFactor_NT(g); + while (StartOf(8 /* nt */)) { + TokenFactor_NT(g2); + tab->MakeSequence(g, g2); delete g2; } - if (la->kind == 38 /* "CONTEXT" */) { + if (IsKind(la, 40 /* "CONTEXT" */)) { Get(); - Expect(30 /* "(" */); - TokenExpr(g2); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(32 /* "(" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; - tab->MakeSequence(g, g2); - Expect(31 /* ")" */); + tab->MakeSequence(g, g2); delete g2; + Expect(33 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } -void Parser::TokenFactor(Graph* &g) { - wchar_t* name = NULL; int kind; +void Parser::TokenFactor_NT(Graph* &g) { + wchar_t* name = NULL; NodeType kind; +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_TokenFactor, _SC("TokenFactor"), la->line); +#endif g = NULL; - if (la->kind == _ident || la->kind == _string || la->kind == _char) { - Sym(name, kind); + if (IsKind(la, _ident) || IsKind(la, _string) || IsKind(la, _char)) { + Sym_NT(name, kind); if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { - SemErr(L"undefined name"); + SemErr(_SC("undefined name")); c = tab->NewCharClass(name, new CharSet()); } - Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; + Node *p = tab->NewNode(NodeType::clas, (Symbol*)NULL, t->line, t->col); p->val = c->n; g = new Graph(p); - tokenString = coco_string_create(noString); + coco_string_delete(tokenString); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); - else tokenString = coco_string_create(noString); + else { + coco_string_delete(tokenString); + tokenString = coco_string_create(noString); + } } + coco_string_delete(name); - } else if (la->kind == 30 /* "(" */) { + } else if (IsKind(la, 32 /* "(" */)) { Get(); - TokenExpr(g); - Expect(31 /* ")" */); - } else if (la->kind == 32 /* "[" */) { +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g); + Expect(33 /* ")" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 34 /* "[" */)) { Get(); - TokenExpr(g); - Expect(33 /* "]" */); - tab->MakeOption(g); tokenString = coco_string_create(noString); - } else if (la->kind == 34 /* "{" */) { +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g); + Expect(35 /* "]" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + tab->MakeOption(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); + } else if (IsKind(la, 36 /* "{" */)) { Get(); - TokenExpr(g); - Expect(35 /* "}" */); - tab->MakeIteration(g); tokenString = coco_string_create(noString); - } else SynErr(51); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + TokenExpr_NT(g); + Expect(37 /* "}" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + tab->MakeIteration(g); coco_string_delete(tokenString); tokenString = coco_string_create(noString); + } else SynErr(53); if (g == NULL) // invalid start of TokenFactor - g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); + g = new Graph(tab->NewNode(NodeType::eps, (Symbol*)NULL, t->line, t->col)); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif } @@ -696,7 +1100,7 @@ struct ParserInitExistsRecognizer { struct InitIsMissingType { char dummy1; }; - + struct InitExistsType { char dummy1; char dummy2; }; @@ -720,7 +1124,7 @@ struct ParserDestroyExistsRecognizer { struct DestroyIsMissingType { char dummy1; }; - + struct DestroyExistsType { char dummy1; char dummy2; }; @@ -773,14 +1177,14 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(L"Dummy Token"); + la->val = coco_string_create(_SC("Dummy Token")); Get(); - Coco(); + Coco_NT(); Expect(0); } Parser::Parser(Scanner *scanner) { - maxT = 41; + maxT = 43; ParserInitCaller::CallInit(this); dummyToken = NULL; @@ -788,35 +1192,35 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; - errors = new Errors(); + this->errors = new Errors(scanner->GetParserFileName()); } bool Parser::StartOf(int s) { const bool T = true; const bool x = false; - static bool set[21][43] = { - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,x, x,x,x,x, T,T,T,x, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,T,x, x,x,x,T, x,x,x,x, T,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,T, x,T,T,T, T,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, T,x,T,x, x,x,x,x, x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,x}, - {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, x,T,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,T, x,x,x,x, T,T,T,T, T,T,T,T, T,T,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, - {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,x,x,T, x,x,x}, - {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, T,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, - {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,x} + static const bool set[21][45] = { + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,x, x,x,x,x, x,T,T,T, x,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,T, x,x,x,x, T,x,x,x, x,x,T,T, T,x,T,x, T,x,T,T, x,T,x,x, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {T,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,T,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,T, T,x,x,x, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,x, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,T,x, T,x,x,x, x,x,x,x, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x,T, x}, + {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,x,T, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, T,x,x,x, x,x,T,T, T,T,T,T, T,T,T,T, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,x,T, T,x,T,x, T,x,T,T, x,T,x,x, x}, + {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, T,x,x,x, x,x,x,T, T,x,T,x, T,x,T,x, x,T,x,x, x}, + {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,x,x,x, x,x,T,x, x,T,x,T, x,T,x,x, x,x,x,x, x}, + {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, x} }; @@ -826,100 +1230,216 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); - delete errors; delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif } -Errors::Errors() { +Errors::Errors(const char * FileName) { count = 0; + file = FileName; } void Errors::SynErr(int line, int col, int n) { - wchar_t* s; + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; switch (n) { - case 0: s = coco_string_create(L"EOF expected"); break; - case 1: s = coco_string_create(L"ident expected"); break; - case 2: s = coco_string_create(L"number expected"); break; - case 3: s = coco_string_create(L"string expected"); break; - case 4: s = coco_string_create(L"badString expected"); break; - case 5: s = coco_string_create(L"char expected"); break; - case 6: s = coco_string_create(L"\"COMPILER\" expected"); break; - case 7: s = coco_string_create(L"\"IGNORECASE\" expected"); break; - case 8: s = coco_string_create(L"\"CHARACTERS\" expected"); break; - case 9: s = coco_string_create(L"\"TOKENS\" expected"); break; - case 10: s = coco_string_create(L"\"PRAGMAS\" expected"); break; - case 11: s = coco_string_create(L"\"COMMENTS\" expected"); break; - case 12: s = coco_string_create(L"\"FROM\" expected"); break; - case 13: s = coco_string_create(L"\"TO\" expected"); break; - case 14: s = coco_string_create(L"\"NESTED\" expected"); break; - case 15: s = coco_string_create(L"\"IGNORE\" expected"); break; - case 16: s = coco_string_create(L"\"PRODUCTIONS\" expected"); break; - case 17: s = coco_string_create(L"\"=\" expected"); break; - case 18: s = coco_string_create(L"\".\" expected"); break; - case 19: s = coco_string_create(L"\"END\" expected"); break; - case 20: s = coco_string_create(L"\"+\" expected"); break; - case 21: s = coco_string_create(L"\"-\" expected"); break; - case 22: s = coco_string_create(L"\"..\" expected"); break; - case 23: s = coco_string_create(L"\"ANY\" expected"); break; - case 24: s = coco_string_create(L"\"<\" expected"); break; - case 25: s = coco_string_create(L"\">\" expected"); break; - case 26: s = coco_string_create(L"\"<.\" expected"); break; - case 27: s = coco_string_create(L"\".>\" expected"); break; - case 28: s = coco_string_create(L"\"|\" expected"); break; - case 29: s = coco_string_create(L"\"WEAK\" expected"); break; - case 30: s = coco_string_create(L"\"(\" expected"); break; - case 31: s = coco_string_create(L"\")\" expected"); break; - case 32: s = coco_string_create(L"\"[\" expected"); break; - case 33: s = coco_string_create(L"\"]\" expected"); break; - case 34: s = coco_string_create(L"\"{\" expected"); break; - case 35: s = coco_string_create(L"\"}\" expected"); break; - case 36: s = coco_string_create(L"\"SYNC\" expected"); break; - case 37: s = coco_string_create(L"\"IF\" expected"); break; - case 38: s = coco_string_create(L"\"CONTEXT\" expected"); break; - case 39: s = coco_string_create(L"\"(.\" expected"); break; - case 40: s = coco_string_create(L"\".)\" expected"); break; - case 41: s = coco_string_create(L"??? expected"); break; - case 42: s = coco_string_create(L"this symbol not expected in Coco"); break; - case 43: s = coco_string_create(L"this symbol not expected in TokenDecl"); break; - case 44: s = coco_string_create(L"invalid TokenDecl"); break; - case 45: s = coco_string_create(L"invalid AttrDecl"); break; - case 46: s = coco_string_create(L"invalid SimSet"); break; - case 47: s = coco_string_create(L"invalid Sym"); break; - case 48: s = coco_string_create(L"invalid Term"); break; - case 49: s = coco_string_create(L"invalid Factor"); break; - case 50: s = coco_string_create(L"invalid Attribs"); break; - case 51: s = coco_string_create(L"invalid TokenFactor"); break; + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("number expected"); break; + case 3: s = _SC("string expected"); break; + case 4: s = _SC("badString expected"); break; + case 5: s = _SC("char expected"); break; + case 6: s = _SC("\"COMPILER\" expected"); break; + case 7: s = _SC("\"IGNORECASE\" expected"); break; + case 8: s = _SC("\"TERMINALS\" expected"); break; + case 9: s = _SC("\"CHARACTERS\" expected"); break; + case 10: s = _SC("\"TOKENS\" expected"); break; + case 11: s = _SC("\"PRAGMAS\" expected"); break; + case 12: s = _SC("\"COMMENTS\" expected"); break; + case 13: s = _SC("\"FROM\" expected"); break; + case 14: s = _SC("\"TO\" expected"); break; + case 15: s = _SC("\"NESTED\" expected"); break; + case 16: s = _SC("\"IGNORE\" expected"); break; + case 17: s = _SC("\"PRODUCTIONS\" expected"); break; + case 18: s = _SC("\"=\" expected"); break; + case 19: s = _SC("\".\" expected"); break; + case 20: s = _SC("\"END\" expected"); break; + case 21: s = _SC("\"+\" expected"); break; + case 22: s = _SC("\"-\" expected"); break; + case 23: s = _SC("\"..\" expected"); break; + case 24: s = _SC("\"ANY\" expected"); break; + case 25: s = _SC("\":\" expected"); break; + case 26: s = _SC("\"<\" expected"); break; + case 27: s = _SC("\">\" expected"); break; + case 28: s = _SC("\"<.\" expected"); break; + case 29: s = _SC("\".>\" expected"); break; + case 30: s = _SC("\"|\" expected"); break; + case 31: s = _SC("\"WEAK\" expected"); break; + case 32: s = _SC("\"(\" expected"); break; + case 33: s = _SC("\")\" expected"); break; + case 34: s = _SC("\"[\" expected"); break; + case 35: s = _SC("\"]\" expected"); break; + case 36: s = _SC("\"{\" expected"); break; + case 37: s = _SC("\"}\" expected"); break; + case 38: s = _SC("\"SYNC\" expected"); break; + case 39: s = _SC("\"IF\" expected"); break; + case 40: s = _SC("\"CONTEXT\" expected"); break; + case 41: s = _SC("\"(.\" expected"); break; + case 42: s = _SC("\".)\" expected"); break; + case 43: s = _SC("??? expected"); break; + case 44: s = _SC("this symbol not expected in Coco"); break; + case 45: s = _SC("this symbol not expected in TokenDecl"); break; + case 46: s = _SC("invalid TokenDecl"); break; + case 47: s = _SC("invalid AttrDecl"); break; + case 48: s = _SC("invalid SimSet"); break; + case 49: s = _SC("invalid Sym"); break; + case 50: s = _SC("invalid Term"); break; + case 51: s = _SC("invalid Factor"); break; + case 52: s = _SC("invalid Attribs"); break; + case 53: s = _SC("invalid TokenFactor"); break; default: { - wchar_t format[20]; - coco_swprintf(format, 20, L"error %d", n); - s = coco_string_create(format); + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; } break; } - wprintf(L"-- line %d col %d: %ls\n", line, col, s); - coco_string_delete(s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(L"%ls\n", s); + wprintf(_SC("%") _SFMT _SC("\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(_SC("%") _SFMT _SC(""), s); exit(1); } +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + } // namespace + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/Parser.frame b/src/Parser.frame index 85bd8b5..de13092 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -41,11 +41,27 @@ Parser.h Specification -->namespace_open +#ifdef PARSER_WITH_AST + +struct SynTree { + SynTree(Token *t ): tok(t){} + ~SynTree(); + + Token *tok; + TArrayList children; + + void dump_all(int indent=0, bool isLast=false); + void dump_pruned(int indent=0, bool isLast=false); +}; + +#endif + class Errors { public: int count; // number of errors detected + const char * file; - Errors(); + Errors(const char * FileName); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); @@ -63,6 +79,7 @@ private: void SynErr(int n); void Get(); + bool IsKind(Token *t, int n); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); @@ -75,6 +92,14 @@ public: Token *t; // last recognized token Token *la; // lookahead token +#ifdef PARSER_WITH_AST + SynTree *ast_root; + TArrayList ast_stack; + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line); + void AstPopNonTerminal(); +#endif + -->declarations Parser(Scanner *scanner); @@ -98,13 +123,36 @@ Parser.cpp Specification -->begin -#include -#include "Parser.h" #include "Scanner.h" +#include "Parser.h" -->namespace_open +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + void Parser::SynErr(int n) { if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; @@ -135,12 +183,22 @@ void Parser::Get() { } } +bool Parser::IsKind(Token *t, int n) { +-->tbase + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + void Parser::Expect(int n) { - if (la->kind==n) Get(); else { SynErr(n); } + if (IsKind(la, n)) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { - if (la->kind == n) Get(); + if (IsKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -148,7 +206,7 @@ void Parser::ExpectWeak(int n, int follow) { } bool Parser::WeakSeparator(int n, int syFol, int repFol) { - if (la->kind == n) {Get(); return true;} + if (IsKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -176,7 +234,7 @@ struct ParserInitExistsRecognizer { struct InitIsMissingType { char dummy1; }; - + struct InitExistsType { char dummy1; char dummy2; }; @@ -200,7 +258,7 @@ struct ParserDestroyExistsRecognizer { struct DestroyIsMissingType { char dummy1; }; - + struct DestroyExistsType { char dummy1; char dummy2; }; @@ -253,7 +311,7 @@ struct ParserDestroyCaller { void Parser::Parse() { t = NULL; la = dummyToken = new Token(); - la->val = coco_string_create(L"Dummy Token"); + la->val = coco_string_create(_SC("Dummy Token")); Get(); -->parseRoot } @@ -266,7 +324,7 @@ Parser::Parser(Scanner *scanner) { minErrDist = 2; errDist = minErrDist; this->scanner = scanner; - errors = new Errors(); + this->errors = new Errors(scanner->GetParserFileName()); } bool Parser::StartOf(int s) { @@ -280,47 +338,161 @@ bool Parser::StartOf(int s) { Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); - delete errors; delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif } -Errors::Errors() { +Errors::Errors(const char * FileName) { count = 0; + file = FileName; } void Errors::SynErr(int line, int col, int n) { - wchar_t* s; + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; switch (n) { -->errors default: { - wchar_t format[20]; - coco_swprintf(format, 20, L"error %d", n); - s = coco_string_create(format); + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; } break; } - wprintf(L"-- line %d col %d: %ls\n", line, col, s); - coco_string_delete(s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { - wprintf(L"-- line %d col %d: %ls\n", line, col, s); + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); } void Errors::Warning(const wchar_t *s) { - wprintf(L"%ls\n", s); + wprintf(_SC("%") _SFMT _SC("\n"), s); } void Errors::Exception(const wchar_t* s) { - wprintf(L"%ls", s); + wprintf(_SC("%") _SFMT _SC(""), s); exit(1); } +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + -->namespace_close + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/Parser.h b/src/Parser.h index c02102d..0c790e5 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -33,6 +33,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include "Tab.h" #include "DFA.h" #include "ParserGen.h" +#define COCO_FRAME_PARSER #include "Scanner.h" @@ -40,11 +41,27 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { +#ifdef PARSER_WITH_AST + +struct SynTree { + SynTree(Token *t ): tok(t){} + ~SynTree(); + + Token *tok; + TArrayList children; + + void dump_all(int indent=0, bool isLast=false); + void dump_pruned(int indent=0, bool isLast=false); +}; + +#endif + class Errors { public: int count; // number of errors detected + const char * file; - Errors(); + Errors(const char * FileName); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); @@ -62,9 +79,31 @@ class Parser { _string=3, _badString=4, _char=5, - _ddtSym=42, - _optionSym=43 + _ddtSym=44, + _optionSym=45, + }; +#ifdef PARSER_WITH_AST + enum eNonTerminals{ + _Coco=0, + _SetDecl=1, + _TokenDecl=2, + _TokenExpr=3, + _Set=4, + _AttrDecl=5, + _SemText=6, + _Expression=7, + _SimSet=8, + _Char=9, + _Sym=10, + _Term=11, + _Resolver=12, + _Factor=13, + _Attribs=14, + _Condition=15, + _TokenTerm=16, + _TokenFactor=17 }; +#endif int maxT; Token *dummyToken; @@ -73,6 +112,7 @@ class Parser { void SynErr(int n); void Get(); + bool IsKind(Token *t, int n); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); @@ -85,15 +125,23 @@ class Parser { Token *t; // last recognized token Token *la; // lookahead token -int id; - int str; +#ifdef PARSER_WITH_AST + SynTree *ast_root; + TArrayList ast_stack; + void AstAddTerminal(); + bool AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line); + void AstPopNonTerminal(); +#endif + +NodeType id; + NodeType str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; DFA *dfa; ParserGen *pgen; - bool genScanner; + bool genScanner, ignoreGammarErrors; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens @@ -103,10 +151,11 @@ int id; tab = NULL; dfa = NULL; pgen = NULL; - id = 0; - str = 1; + id = NodeType::id; + str = NodeType::t; tokenString = NULL; - noString = coco_string_create(L"-none-"); + noString = coco_string_create(_SC("-none-")); + ignoreGammarErrors = false; } // Uncomment this method if cleanup is necessary, @@ -123,24 +172,24 @@ int id; ~Parser(); void SemErr(const wchar_t* msg); - void Coco(); - void SetDecl(); - void TokenDecl(int typ); - void TokenExpr(Graph* &g); - void Set(CharSet* &s); - void AttrDecl(Symbol *sym); - void SemText(Position* &pos); - void Expression(Graph* &g); - void SimSet(CharSet* &s); - void Char(int &n); - void Sym(wchar_t* &name, int &kind); - void Term(Graph* &g); - void Resolver(Position* &pos); - void Factor(Graph* &g); - void Attribs(Node *p); - void Condition(); - void TokenTerm(Graph* &g); - void TokenFactor(Graph* &g); + void Coco_NT(); + void SetDecl_NT(); + void TokenDecl_NT(NodeType typ); + void TokenExpr_NT(Graph* &g); + void Set_NT(CharSet* &s); + void AttrDecl_NT(Symbol *sym); + void SemText_NT(Position* &pos); + void Expression_NT(Graph* &g); + void SimSet_NT(CharSet* &s); + void Char_NT(int &n); + void Sym_NT(wchar_t* &name, NodeType &kind); + void Term_NT(Graph* &g); + void Resolver_NT(Position* &pos); + void Factor_NT(Graph* &g); + void Attribs_NT(Node *p); + void Condition_NT(); + void TokenTerm_NT(Graph* &g); + void TokenFactor_NT(Graph* &g); void Parse(); diff --git a/src/ParserGen.cpp b/src/ParserGen.cpp index ee8e938..c184900 100644 --- a/src/ParserGen.cpp +++ b/src/ParserGen.cpp @@ -28,38 +28,37 @@ Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include -#include "ArrayList.h" #include "ParserGen.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" #include "Generator.h" namespace Coco { void ParserGen::Indent (int n) { - for (int i = 1; i <= n; i++) fwprintf(gen, L"\t"); + for (int i = 1; i <= n; i++) fputws(_SC("\t"), gen); } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning -bool ParserGen::UseSwitch (Node *p) { - BitArray *s1, *s2; - if (p->typ != Node::alt) return false; +bool ParserGen::UseSwitch (const Node *p) { + BitArray *s2; + if (p->typ != NodeType::alt) return false; int nAlts = 0; - s1 = new BitArray(tab->terminals->Count); + BitArray s1(tab->terminals.Count); while (p != NULL) { s2 = tab->Expected0(p->sub, curSy); // must not optimize with switch statement, if there are ll1 warnings - if (s1->Overlaps(s2)) { return false; } - s1->Or(s2); + if (s1.Overlaps(s2)) {delete s2; return false; } + s1.Or(s2); + delete s2; ++nAlts; // must not optimize with switch-statement, if alt uses a resolver expression - if (p->sub->typ == Node::rslv) return false; + if (p->sub->typ == NodeType::rslv) return false; p = p->down; } return nAlts > 5; } - + int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { if (nsName == NULL || coco_string_length(nsName) == 0) { return 0; @@ -70,9 +69,7 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } - wchar_t *curNs = coco_string_create(nsName, startPos, curLen); - fwprintf(gen, L"namespace %ls {\n", curNs); - coco_string_delete(curNs); + fwprintf(gen, _SC("namespace %.*") _SFMT _SC(" {\n"), curLen, nsName+startPos); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; @@ -84,22 +81,22 @@ int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { void ParserGen::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { - fwprintf(gen, L"} // namespace\n"); + fputws(_SC("} // namespace\n"), gen); } } -void ParserGen::CopySourcePart (Position *pos, int indent) { +void ParserGen::CopySourcePart (const Position *pos, int indent) { // Copy text described by pos from atg to gen int ch, i; if (pos != NULL) { buffer->SetPos(pos->beg); ch = buffer->Read(); if (tab->emitLines && pos->line) { - fwprintf(gen, L"\n#line %d \"%ls\"\n", pos->line, tab->srcName); + fwprintf(gen, _SC("\n#line %d \"%") _SFMT _SC("\"\n"), pos->line, tab->srcName); } Indent(indent); while (buffer->GetPos() <= pos->end) { while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF - fwprintf(gen, L"\n"); Indent(indent); + fputws(_SC("\n"), gen); Indent(indent); if (ch == CR) { ch = buffer->Read(); } // skip CR if (ch == LF) { ch = buffer->Read(); } // skip LF for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) { @@ -108,189 +105,226 @@ void ParserGen::CopySourcePart (Position *pos, int indent) { } if (buffer->GetPos() > pos->end) goto done; } - fwprintf(gen, L"%lc", ch); + fwprintf(gen, _SC("%") _CHFMT, ch); ch = buffer->Read(); } done: - if (indent > 0) fwprintf(gen, L"\n"); + if (indent > 0) fputws(_SC("\n"), gen); } } -void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { +void ParserGen::GenErrorMsg (int errTyp, const Symbol *sym) { errorNr++; - const int formatLen = 1000; + const size_t formatLen = 1000; wchar_t format[formatLen]; - coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = coco_string_create(L\"", errorNr); + coco_swprintf(format, formatLen, _SC("\t\t\tcase %d: s = _SC(\""), errorNr); coco_string_merge(err, format); if (errTyp == tErr) { - if (sym->name[0] == L'"') { - coco_swprintf(format, formatLen, L"%ls expected", tab->Escape(sym->name)); + if (sym->name[0] == _SC('"')) { + wchar_t *se = tab->Escape(sym->name); + coco_swprintf(format, formatLen, _SC("%") _SFMT _SC(" expected"), se); coco_string_merge(err, format); + coco_string_delete(se); } else { - coco_swprintf(format, formatLen, L"%ls expected", sym->name); + coco_swprintf(format, formatLen, _SC("%") _SFMT _SC(" expected"), sym->name); coco_string_merge(err, format); } } else if (errTyp == altErr) { - coco_swprintf(format, formatLen, L"invalid %ls", sym->name); + coco_swprintf(format, formatLen, _SC("invalid %") _SFMT, sym->name); coco_string_merge(err, format); } else if (errTyp == syncErr) { - coco_swprintf(format, formatLen, L"this symbol not expected in %ls", sym->name); + coco_swprintf(format, formatLen, _SC("this symbol not expected in %") _SFMT, sym->name); coco_string_merge(err, format); } - coco_swprintf(format, formatLen, L"\"); break;\n"); + coco_swprintf(format, formatLen, _SC("\"); break;\n")); coco_string_merge(err, format); } -int ParserGen::NewCondSet (BitArray *s) { - for (int i = 1; i < symSet->Count; i++) // skip symSet[0] (reserved for union of SYNC sets) - if (Sets::Equals(s, (BitArray*)(*symSet)[i])) return i; - symSet->Add(s->Clone()); - return symSet->Count - 1; +int ParserGen::NewCondSet (const BitArray *s) { + for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) + if (Sets::Equals(s, symSet[i])) return i; + symSet.Add(s->Clone()); + return symSet.Count - 1; } -void ParserGen::GenCond (BitArray *s, Node *p) { - if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); +void ParserGen::GenCond (const BitArray *s, const Node *p) { + if (p->typ == NodeType::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); - if (n == 0) fwprintf(gen, L"false"); // happens if an ANY set matches no symbol + if (n == 0) fputws(_SC("false"), gen); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (int i=0; iterminals.Count; i++) { + sym = (Symbol*)tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, L"la->kind == "); + fputws(_SC("IsKind(la, "), gen); WriteSymbolOrCode(gen, sym); + fputws(_SC(")"), gen); --n; - if (n > 0) fwprintf(gen, L" || "); + if (n > 0) fputws(_SC(" || "), gen); } } } else - fwprintf(gen, L"StartOf(%d)", NewCondSet(s)); + fwprintf(gen, _SC("StartOf(%d /* %s */)"), NewCondSet(s), (tab->nTyp[p->typ])); } } -void ParserGen::PutCaseLabels (BitArray *s) { +void ParserGen::PutCaseLabels (const BitArray *s0) { Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + BitArray *s = DerivationsOf(s0); + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; if ((*s)[sym->n]) { - fwprintf(gen, L"case "); + fputws(_SC("case "), gen); WriteSymbolOrCode(gen, sym); - fwprintf(gen, L": "); + fputws(_SC(": "), gen); + } + } + delete s; +} + +BitArray *ParserGen::DerivationsOf(const BitArray *s0) { + BitArray *s = s0->Clone(); + bool done = false; + while (!done) { + done = true; + for (int i=0; iterminals.Count; i++) { + Symbol *sym = tab->terminals[i]; + if ((*s)[sym->n]) { + for (int i=0; iterminals.Count; i++) { + Symbol *baseSym = tab->terminals[i]; + if (baseSym->inherits == sym && !(*s)[baseSym->n]) { + s->Set(baseSym->n, true); + done = false; + } + } + } } } + return s; } -void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { - Node *p2; +void ParserGen::GenCode (const Node *p, int indent, BitArray *isChecked) { + const Node *p2; BitArray *s1, *s2; while (p != NULL) { - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { Indent(indent); - fwprintf(gen, L"%ls(", p->sym->name); + fwprintf(gen, _SC("%") _SFMT _SC("_NT("), p->sym->name); CopySourcePart(p->pos, 0); - fwprintf(gen, L");\n"); - } else if (p->typ == Node::t) { + fputws(_SC(");\n"), gen); + } else if (p->typ == NodeType::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n - if ((*isChecked)[p->sym->n]) fwprintf(gen, L"Get();\n"); + if ((*isChecked)[p->sym->n]) { + fputws(_SC("Get();\n"), gen); + } else { - fwprintf(gen, L"Expect("); + fputws(_SC("Expect("), gen); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, L");\n"); + fputws(_SC(");\n"), gen); } - } if (p->typ == Node::wt) { + fputws(_SC("#ifdef PARSER_WITH_AST\n\tAstAddTerminal();\n#endif\n"), gen); + } if (p->typ == NodeType::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); - fwprintf(gen, L"ExpectWeak("); + fputws(_SC("ExpectWeak("), gen); WriteSymbolOrCode(gen, p->sym); - fwprintf(gen, L", %d);\n", NewCondSet(s1)); - } if (p->typ == Node::any) { + fwprintf(gen, _SC(", %d);\n"), NewCondSet(s1)); + delete s1; + } if (p->typ == NodeType::any) { Indent(indent); int acc = Sets::Elements(p->set); - if (tab->terminals->Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { + if (tab->terminals.Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here - fwprintf(gen, L"Get();\n"); + fputws(_SC("Get();\n"), gen); } else { GenErrorMsg(altErr, curSy); if (acc > 0) { - fwprintf(gen, L"if ("); GenCond(p->set, p); fwprintf(gen, L") Get(); else SynErr(%d);\n", errorNr); - } else fwprintf(gen, L"SynErr(%d); // ANY node that matches no symbol\n", errorNr); + fputws(_SC("if ("), gen); GenCond(p->set, p); fwprintf(gen, _SC(") Get(); else SynErr(%d);\n"), errorNr); + } else fwprintf(gen, _SC("SynErr(%d); // ANY node that matches no symbol\n"), errorNr); } - } if (p->typ == Node::eps) { // nothing - } if (p->typ == Node::rslv) { // nothing - } if (p->typ == Node::sem) { + } if (p->typ == NodeType::eps) { // nothing + } if (p->typ == NodeType::rslv) { // nothing + } if (p->typ == NodeType::sem) { CopySourcePart(p->pos, indent); - } if (p->typ == Node::sync) { + } if (p->typ == NodeType::nt_sync) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); - fwprintf(gen, L"while (!("); GenCond(s1, p); fwprintf(gen, L")) {"); - fwprintf(gen, L"SynErr(%d); Get();", errorNr); fwprintf(gen, L"}\n"); - } if (p->typ == Node::alt) { + fputws(_SC("while (!("), gen); GenCond(s1, p); fputws(_SC(")) {"), gen); + fwprintf(gen, _SC("SynErr(%d); Get();"), errorNr); fputws(_SC("}\n"), gen); + delete s1; + } if (p->typ == NodeType::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); + delete s1; bool useSwitch = UseSwitch(p); - if (useSwitch) { Indent(indent); fwprintf(gen, L"switch (la->kind) {\n"); } + if (useSwitch) { Indent(indent); fputws(_SC("switch (la->kind) {\n"), gen); } p2 = p; while (p2 != NULL) { s1 = tab->Expected(p2->sub, curSy); Indent(indent); if (useSwitch) { - PutCaseLabels(s1); fwprintf(gen, L"{\n"); + PutCaseLabels(s1); fputws(_SC("{\n"), gen); } else if (p2 == p) { - fwprintf(gen, L"if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); - } else if (p2->down == NULL && equal) { fwprintf(gen, L"} else {\n"); + fputws(_SC("if ("), gen); GenCond(s1, p2->sub); fputws(_SC(") {\n"), gen); + } else if (p2->down == NULL && equal) { fputws(_SC("} else {\n"), gen); } else { - fwprintf(gen, L"} else if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); + fputws(_SC("} else if ("), gen); GenCond(s1, p2->sub); fputws(_SC(") {\n"), gen); } GenCode(p2->sub, indent + 1, s1); if (useSwitch) { - Indent(indent); fwprintf(gen, L"\tbreak;\n"); - Indent(indent); fwprintf(gen, L"}\n"); + Indent(indent); fputws(_SC("\tbreak;\n"), gen); + Indent(indent); fputws(_SC("}\n"), gen); } p2 = p2->down; + delete s1; } Indent(indent); if (equal) { - fwprintf(gen, L"}\n"); + fputws(_SC("}\n"), gen); } else { GenErrorMsg(altErr, curSy); if (useSwitch) { - fwprintf(gen, L"default: SynErr(%d); break;\n", errorNr); - Indent(indent); fwprintf(gen, L"}\n"); + fwprintf(gen, _SC("default: SynErr(%d); break;\n"), errorNr); + Indent(indent); fputws(_SC("}\n"), gen); } else { - fwprintf(gen, L"} "); fwprintf(gen, L"else SynErr(%d);\n", errorNr); + fputws(_SC("} "), gen); fwprintf(gen, _SC("else SynErr(%d);\n"), errorNr); } } - } if (p->typ == Node::iter) { + } if (p->typ == NodeType::iter) { Indent(indent); p2 = p->sub; - fwprintf(gen, L"while ("); - if (p2->typ == Node::wt) { + fputws(_SC("while ("), gen); + if (p2->typ == NodeType::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); - fwprintf(gen, L"WeakSeparator("); + fputws(_SC("WeakSeparator("), gen); WriteSymbolOrCode(gen, p2->sym); - fwprintf(gen, L",%d,%d) ", NewCondSet(s1), NewCondSet(s2)); - s1 = new BitArray(tab->terminals->Count); // for inner structure + fwprintf(gen, _SC(",%d,%d) "), NewCondSet(s1), NewCondSet(s2)); + delete s1; + delete s2; + s1 = new BitArray(tab->terminals.Count); // for inner structure if (p2->up || p2->next == NULL) p2 = NULL; else p2 = p2->next; } else { s1 = tab->First(p2); GenCond(s1, p2); } - fwprintf(gen, L") {\n"); + fputws(_SC(") {\n"), gen); GenCode(p2, indent + 1, s1); - Indent(indent); fwprintf(gen, L"}\n"); - } if (p->typ == Node::opt) { + Indent(indent); fputws(_SC("}\n"), gen); + delete s1; + } if (p->typ == NodeType::opt) { s1 = tab->First(p->sub); Indent(indent); - fwprintf(gen, L"if ("); GenCond(s1, p->sub); fwprintf(gen, L") {\n"); + fputws(_SC("if ("), gen); GenCond(s1, p->sub); fputws(_SC(") {\n"), gen); GenCode(p->sub, indent + 1, s1); - Indent(indent); fwprintf(gen, L"}\n"); + Indent(indent); fputws(_SC("}\n"), gen); + delete s1; } - if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) + if (p->typ != NodeType::eps && p->typ != NodeType::sem && p->typ != NodeType::nt_sync) isChecked->SetAll(false); // = new BitArray(Symbol.terminals.Count); if (p->up) break; p = p->next; @@ -303,151 +337,295 @@ void ParserGen::GenTokensHeader() { int i; bool isFirst = true; - fwprintf(gen, L"\tenum {\n"); + fputws(_SC("\tenum {\n"), gen); // tokens - for (i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } - else { fwprintf(gen , L",\n"); } + else { fputws(_SC("\n"), gen); } - fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d,"), sym->name, sym->n); + if(sym->inherits) { + fwprintf(gen , _SC(" // INHERITS -> %") _SFMT, sym->inherits->name); + } } // pragmas - for (i=0; ipragmas->Count; i++) { + for (i=0; ipragmas.Count; i++) { if (isFirst) { isFirst = false; } - else { fwprintf(gen , L",\n"); } + else { fputws(_SC("\n"), gen); } - sym = (Symbol*)((*(tab->pragmas))[i]); - fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); + sym = tab->pragmas[i]; + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d,"), sym->name, sym->n); } - fwprintf(gen, L"\n\t};\n"); + fputws(_SC("\n\t};\n"), gen); + + // nonterminals + fputws(_SC("#ifdef PARSER_WITH_AST\n\tenum eNonTerminals{\n"), gen); + isFirst = true; + for (i=0; inonterminals.Count; i++) { + sym = tab->nonterminals[i]; + if (isFirst) { isFirst = false; } + else { fputws(_SC(",\n"), gen); } + + fwprintf(gen , _SC("\t\t_%") _SFMT _SC("=%d"), sym->name, sym->n); + } + fputws(_SC("\n\t};\n#endif\n"), gen); + } void ParserGen::GenCodePragmas() { Symbol *sym; - for (int i=0; ipragmas->Count; i++) { - sym = (Symbol*)((*(tab->pragmas))[i]); - fwprintf(gen, L"\t\tif (la->kind == "); + for (int i=0; ipragmas.Count; i++) { + sym = tab->pragmas[i]; + fputws(_SC("\t\tif (la->kind == "), gen); WriteSymbolOrCode(gen, sym); - fwprintf(gen, L") {\n"); + fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 4); - fwprintf(gen, L"\t\t}\n"); + fputws(_SC("\t\t}\n"), gen); } } +void ParserGen::GenTokenBase() { + Symbol *sym; + fwprintf(gen, _SC("\tstatic const int tBase[%d] = {"), tab->terminals.Count); + + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; + if((i % 20) == 0) fputws(_SC("\n\t\t"), gen); + if (sym->inherits == NULL) + fputws(_SC("-1,"), gen); // not inherited + else + fwprintf(gen, _SC("%d,"), sym->inherits->n); + } + fputws(_SC("\n\t};\n"), gen); +} + void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { - fwprintf(gen, L"%d /* %ls */", sym->n, sym->name); + fwprintf(gen, _SC("%d /* %") _SFMT _SC(" */"), sym->n, sym->name); } else { - fwprintf(gen, L"_%ls", sym->name); + fwprintf(gen, _SC("_%") _SFMT, sym->name); } } void ParserGen::GenProductionsHeader() { Symbol *sym; - for (int i=0; inonterminals->Count; i++) { - sym = (Symbol*)((*(tab->nonterminals))[i]); + for (int i=0; inonterminals.Count; i++) { + sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, L"\tvoid %ls(", sym->name); + fwprintf(gen, _SC("\tvoid %") _SFMT _SC("_NT("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, L");\n"); + fputws(_SC(");\n"), gen); } } void ParserGen::GenProductions() { Symbol *sym; - for (int i=0; inonterminals->Count; i++) { - sym = (Symbol*)((*(tab->nonterminals))[i]); + BitArray ba(tab->terminals.Count); + for (int i=0; inonterminals.Count; i++) { + sym = tab->nonterminals[i]; curSy = sym; - fwprintf(gen, L"void Parser::%ls(", sym->name); + fwprintf(gen, _SC("void Parser::%") _SFMT _SC("_NT("), sym->name); CopySourcePart(sym->attrPos, 0); - fwprintf(gen, L") {\n"); + fputws(_SC(") {\n"), gen); CopySourcePart(sym->semPos, 2); - GenCode(sym->graph, 2, new BitArray(tab->terminals->Count)); - fwprintf(gen, L"}\n"); fwprintf(gen, L"\n"); + fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); + if(i == 0) fwprintf(gen, _SC("\t\tToken *ntTok = new Token(); ntTok->kind = eNonTerminals::_%") _SFMT _SC("; ntTok->line = 0; ntTok->val = coco_string_create(_SC(\"%") _SFMT _SC("\"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);\n"), sym->name, sym->name); + else { + fwprintf(gen, _SC("\t\tbool ntAdded = AstAddNonTerminal(eNonTerminals::_%") _SFMT _SC(", _SC(\"%") _SFMT _SC("\"), la->line);\n"), sym->name, sym->name); + } + fputws(_SC("#endif\n"), gen); + ba.SetAll(false); + GenCode(sym->graph, 2, &ba); + fputws(_SC("#ifdef PARSER_WITH_AST\n"), gen); + if(i == 0) fputws(_SC("\t\tAstPopNonTerminal();\n"), gen); + else fputws(_SC("\t\tif(ntAdded) AstPopNonTerminal();\n"), gen); + fputws(_SC("#endif\n}\n\n"), gen); } } void ParserGen::InitSets() { - fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet->Count, tab->terminals->Count+1); + fwprintf(gen, _SC("\tstatic const bool set[%d][%d] = {\n"), symSet.Count, tab->terminals.Count+1); - for (int i = 0; i < symSet->Count; i++) { - BitArray *s = (BitArray*)(*symSet)[i]; - fwprintf(gen, L"\t\t{"); + for (int i = 0; i < symSet.Count; i++) { + BitArray *s = DerivationsOf(symSet[i]); + fputws(_SC("\t\t{"), gen); int j = 0; Symbol *sym; - for (int k=0; kterminals->Count; k++) { - sym = (Symbol*)((*(tab->terminals))[k]); - if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,"); + for (int k=0; kterminals.Count; k++) { + sym = tab->terminals[k]; + fputws(((*s)[sym->n]) ? _SC("T,") : _SC("x,"), gen); ++j; - if (j%4 == 0) fwprintf(gen, L" "); + if (j%4 == 0) fputws(_SC(" "), gen); } - if (i == symSet->Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n"); + if (i == symSet.Count-1) fputws(_SC("x}\n"), gen); else fputws(_SC("x},\n"), gen); + delete s; } - fwprintf(gen, L"\t};\n\n"); + fputws(_SC("\t};\n\n"), gen); +} + +int ParserGen::GenCodeRREBNF (const Node *p, int depth) { + int rc = 0, loop_count = 0; + const Node *p2; + while (p != NULL) { + switch (p->typ) { + case NodeType::nt: + case NodeType::t: { + fputws(_SC(" "), gen); + fputws(p->sym->name, gen); + ++rc; + break; + } + case NodeType::wt: { + break; + } + case NodeType::any: { + fputws(_SC(" ANY"), gen); + ++rc; + break; + } + case NodeType::eps: break; // nothing + case NodeType::rslv: break; // nothing + case NodeType::sem: { + break; + } + case NodeType::nt_sync: { + break; + } + case NodeType::alt: { + bool need_close_alt = false; + if(depth > 0 || loop_count || p->next) { + fputws(" (", gen); + need_close_alt = true; + } + p2 = p; + while (p2 != NULL) { + rc += GenCodeRREBNF(p2->sub, depth+1); + p2 = p2->down; + if(p2 != NULL) fputws(_SC(" |"), gen); + } + if(need_close_alt) fputws(_SC(" )"), gen); + break; + } + case NodeType::iter: { + if(p->sub->up == 0) fputws(_SC(" ("), gen); + rc += GenCodeRREBNF(p->sub, depth+1); + if(p->sub->up == 0) fputws(_SC(" )"), gen); + fputws(_SC("*"), gen); + break; + } + case NodeType::opt: + if(p->sub->up == 0) fputws(_SC(" ("), gen); + rc += GenCodeRREBNF(p->sub, depth+1); + if(p->sub->up == 0) fputws(_SC(" )"), gen); + fputws(_SC("?"), gen); + break; + } + if (p->up) break; + p = p->next; + ++loop_count; + } + return rc; +} + +void ParserGen::WriteRREBNF () { + Symbol *sym; + Generator g(tab, errors); + gen = g.OpenGen(_SC("Parser.ebnf")); + + fwprintf(gen, _SC("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n")); + fwprintf(gen, _SC("\n//\n// productions\n//\n\n")); + for (int i=0; inonterminals.Count; i++) { + sym = tab->nonterminals[i]; + fwprintf(gen, _SC("%s ::= "), sym->name); + if(GenCodeRREBNF(sym->graph, 0) == 0) { + fputws(_SC("\"\?\?()\?\?\""), gen); + } + fputws(_SC("\n"), gen); + } + fwprintf(gen, _SC("\n//\n// tokens\n//\n\n")); + Iterator *iter = tab->literals.GetIterator(); + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; + if (isalpha(sym->name[0])) { + iter->Reset(); + while (iter->HasNext()) { + DictionaryEntry *e = iter->Next(); + if (e->val == sym) { + fwprintf(gen, _SC("%s ::= %s\n"), sym->name, e->key); + break; + } + } + } else { + //fwprintf(gen, _SC("%d /* %s */"), sym->n, sym->name)); + } + } + delete iter; + fclose(gen); } void ParserGen::WriteParser () { - Generator g = Generator(tab, errors); + Generator g(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart - symSet->Add(tab->allSyncSets); + symSet.Add(tab->allSyncSets); - fram = g.OpenFrame(L"Parser.frame"); - gen = g.OpenGen(L"Parser.h"); + fram = g.OpenFrame(_SC("Parser.frame")); + gen = g.OpenGen(_SC("Parser.h")); Symbol *sym; - for (int i=0; iterminals->Count; i++) { - sym = (Symbol*)((*(tab->terminals))[i]); + for (int i=0; iterminals.Count; i++) { + sym = tab->terminals[i]; GenErrorMsg(tErr, sym); } g.GenCopyright(); - g.SkipFramePart(L"-->begin"); + g.SkipFramePart(_SC("-->begin")); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->prefix"); + g.CopyFramePart(_SC("-->prefix")); g.GenPrefixFromNamespace(); - g.CopyFramePart(L"-->headerdef"); + g.CopyFramePart(_SC("-->headerdef")); - if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, L"\n");} - g.CopyFramePart(L"-->namespace_open"); + if (usingPos != NULL) {CopySourcePart(usingPos, 0); fputws(_SC("\n"), gen);} + g.CopyFramePart(_SC("-->namespace_open")); int nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->constantsheader"); + g.CopyFramePart(_SC("-->constantsheader")); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ - fwprintf(gen, L"\tint maxT;\n"); - g.CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0); - g.CopyFramePart(L"-->productionsheader"); GenProductionsHeader(); - g.CopyFramePart(L"-->namespace_close"); + fputws(_SC("\tint maxT;\n"), gen); + g.CopyFramePart(_SC("-->declarations")); CopySourcePart(tab->semDeclPos, 0); + g.CopyFramePart(_SC("-->productionsheader")); GenProductionsHeader(); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); - g.CopyFramePart(L"-->implementation"); + g.CopyFramePart(_SC("-->implementation")); fclose(gen); // Source - gen = g.OpenGen(L"Parser.cpp"); + gen = g.OpenGen(_SC("Parser.cpp")); g.GenCopyright(); - g.SkipFramePart(L"-->begin"); - g.CopyFramePart(L"-->namespace_open"); + g.SkipFramePart(_SC("-->begin")); + g.CopyFramePart(_SC("-->namespace_open")); nrOfNs = GenNamespaceOpen(tab->nsName); - g.CopyFramePart(L"-->pragmas"); GenCodePragmas(); - g.CopyFramePart(L"-->productions"); GenProductions(); - g.CopyFramePart(L"-->parseRoot"); fwprintf(gen, L"\t%ls();\n", tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, L"\tExpect(0);"); - g.CopyFramePart(L"-->constants"); - fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count-1); - g.CopyFramePart(L"-->initialization"); InitSets(); - g.CopyFramePart(L"-->errors"); fwprintf(gen, L"%ls", err); - g.CopyFramePart(L"-->namespace_close"); + g.CopyFramePart(_SC("-->pragmas")); GenCodePragmas(); + g.CopyFramePart(_SC("-->tbase")); GenTokenBase(); // write all tokens base types + g.CopyFramePart(_SC("-->productions")); GenProductions(); + g.CopyFramePart(_SC("-->parseRoot")); fwprintf(gen, _SC("\t%") _SFMT _SC("_NT();\n"), tab->gramSy->name); if (tab->checkEOF) fputws(_SC("\tExpect(0);"), gen); + g.CopyFramePart(_SC("-->constants")); + fwprintf(gen, _SC("\tmaxT = %d;\n"), tab->terminals.Count-1); + g.CopyFramePart(_SC("-->initialization")); InitSets(); + g.CopyFramePart(_SC("-->errors")); fwprintf(gen, _SC("%") _SFMT, err); + g.CopyFramePart(_SC("-->namespace_close")); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); fclose(gen); @@ -456,12 +634,11 @@ void ParserGen::WriteParser () { void ParserGen::WriteStatistics () { - fwprintf(trace, L"\n"); - fwprintf(trace, L"%d terminals\n", tab->terminals->Count); - fwprintf(trace, L"%d symbols\n", tab->terminals->Count + tab->pragmas->Count + - tab->nonterminals->Count); - fwprintf(trace, L"%d nodes\n", tab->nodes->Count); - fwprintf(trace, L"%d sets\n", symSet->Count); + fwprintf(trace, _SC("\n%d terminals\n"), tab->terminals.Count); + fwprintf(trace, _SC("%d symbols\n"), tab->terminals.Count + tab->pragmas.Count + + tab->nonterminals.Count); + fwprintf(trace, _SC("%d nodes\n"), tab->nodes.Count); + fwprintf(trace, _SC("%d sets\n"), symSet.Count); } @@ -479,8 +656,13 @@ ParserGen::ParserGen (Parser *parser) { errorNr = -1; usingPos = NULL; - symSet = new ArrayList(); err = NULL; } +ParserGen::~ParserGen () { + for(int i=0; i symSet; Tab *tab; // other Coco objects FILE* trace; @@ -68,18 +67,20 @@ class ParserGen Buffer *buffer; void Indent(int n); - bool UseSwitch(Node *p); + bool UseSwitch(const Node *p); void CopyFramePart(const wchar_t* stop); - void CopySourcePart(Position *pos, int indent); + void CopySourcePart(const Position *pos, int indent); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); - void GenErrorMsg(int errTyp, Symbol *sym); - int NewCondSet(BitArray *s); - void GenCond(BitArray *s, Node *p); - void PutCaseLabels(BitArray *s); - void GenCode(Node *p, int indent, BitArray *isChecked); + void GenErrorMsg(int errTyp, const Symbol *sym); + int NewCondSet(const BitArray *s); + void GenCond(const BitArray *s, const Node *p); + void PutCaseLabels(const BitArray *s); + BitArray *DerivationsOf(const BitArray *s); + void GenCode(const Node *p, int indent, BitArray *isChecked); void GenTokens(); void GenTokensHeader(); + void GenTokenBase(); void GenPragmas(); void GenPragmasHeader(); void GenCodePragmas(); @@ -87,10 +88,14 @@ class ParserGen void GenProductionsHeader(); void InitSets(); void OpenGen(const wchar_t* genName, bool backUp); + int GenCodeRREBNF(const Node *p, int depth=0); + void WriteRREBNF(); void WriteParser(); void WriteStatistics(); void WriteSymbolOrCode(FILE *gen, const Symbol *sym); + void CheckAstGen(); ParserGen (Parser *parser); + ~ParserGen(); }; diff --git a/src/Scanner.cpp b/src/Scanner.cpp index d58d8f7..e4b452c 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -75,13 +75,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((L'a' <= data[i]) && (data[i] <= L'z')) { - newData[i] = data[i] + (L'A' - L'a'); + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = L'\0'; + newData[dataLen] = _SC('\0'); return newData; } @@ -98,12 +98,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((L'A' <= ch) && (ch <= L'Z')) { - newData[i] = ch - (L'A' - L'a'); + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); } else { newData[i] = ch; } } - newData[dataLen] = L'\0'; + newData[dataLen] = _SC('\0'); return newData; } @@ -125,7 +125,7 @@ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { return data; } -wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); @@ -150,14 +150,14 @@ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } -int coco_string_indexof(const wchar_t* data, const wchar_t value) { +int coco_string_indexof(const wchar_t* data, const int value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } -int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { +int coco_string_lastindexof(const wchar_t* data, const int value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } @@ -175,10 +175,26 @@ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp(data1, data2); +} + unsigned int coco_string_hash(const wchar_t *data) { unsigned int h = 0; if (!data) { return 0; } @@ -189,6 +205,16 @@ unsigned int coco_string_hash(const wchar_t *data) { return h; } +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char* value) { @@ -200,6 +226,12 @@ wchar_t* coco_string_create(const char* value) { return data; } +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; @@ -208,12 +240,6 @@ char* coco_string_create_char(const wchar_t *value) { return res; } -void coco_string_delete(char* &data) { - delete [] data; - data = NULL; -} - - Token::Token() { kind = 0; pos = 0; @@ -223,6 +249,17 @@ Token::Token() { next = NULL; } +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + Token::~Token() { coco_string_delete(val); } @@ -243,7 +280,7 @@ Buffer::Buffer(FILE* s, bool isUserStream) { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; - buf = new unsigned char[bufCapacity]; + buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); @@ -273,7 +310,7 @@ Buffer::Buffer(const unsigned char* buf, int len) { } Buffer::~Buffer() { - Close(); + Close(); if (buf != NULL) { delete [] buf; buf = NULL; @@ -316,9 +353,8 @@ wchar_t* Buffer::GetString(int beg, int end) { SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); - wchar_t *res = coco_string_create(buf, 0, len); - coco_string_delete(buf); - return res; + buf[len] = 0; + return buf; } int Buffer::GetPos() { @@ -335,7 +371,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(L"--- buffer out of bounds access, position: %d\n", value); + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -413,23 +449,24 @@ int UTF8Buffer::Read() { Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); + parseFileName = NULL; Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; - char *chFileName = coco_string_create_char(fileName); - if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(L"--- Cannot open file %ls\n", fileName); + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); exit(1); } - coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); + parseFileName = NULL; Init(); } @@ -443,13 +480,14 @@ Scanner::~Scanner() { } delete [] tval; delete buffer; + if(parseFileName) coco_string_delete(parseFileName); } void Scanner::Init() { EOL = '\n'; eofSym = 0; - maxT = 41; - noSym = 41; + maxT = 43; + noSym = 43; int i; for (i = 65; i <= 90; ++i) start.set(i, 1); for (i = 95; i <= 95; ++i) start.set(i, 1); @@ -459,36 +497,38 @@ void Scanner::Init() { start.set(39, 5); start.set(36, 13); start.set(61, 16); - start.set(46, 31); + start.set(46, 32); start.set(43, 17); start.set(45, 18); - start.set(60, 32); - start.set(62, 20); - start.set(124, 23); - start.set(40, 33); - start.set(41, 24); - start.set(91, 25); - start.set(93, 26); - start.set(123, 27); - start.set(125, 28); + start.set(58, 20); + start.set(60, 33); + start.set(62, 21); + start.set(124, 24); + start.set(40, 34); + start.set(41, 25); + start.set(91, 26); + start.set(93, 27); + start.set(123, 28); + start.set(125, 29); start.set(Buffer::EoF, -1); - keywords.set(L"COMPILER", 6); - keywords.set(L"IGNORECASE", 7); - keywords.set(L"CHARACTERS", 8); - keywords.set(L"TOKENS", 9); - keywords.set(L"PRAGMAS", 10); - keywords.set(L"COMMENTS", 11); - keywords.set(L"FROM", 12); - keywords.set(L"TO", 13); - keywords.set(L"NESTED", 14); - keywords.set(L"IGNORE", 15); - keywords.set(L"PRODUCTIONS", 16); - keywords.set(L"END", 19); - keywords.set(L"ANY", 23); - keywords.set(L"WEAK", 29); - keywords.set(L"SYNC", 36); - keywords.set(L"IF", 37); - keywords.set(L"CONTEXT", 38); + keywords.set(_SC("COMPILER"), 6); + keywords.set(_SC("IGNORECASE"), 7); + keywords.set(_SC("TERMINALS"), 8); + keywords.set(_SC("CHARACTERS"), 9); + keywords.set(_SC("TOKENS"), 10); + keywords.set(_SC("PRAGMAS"), 11); + keywords.set(_SC("COMMENTS"), 12); + keywords.set(_SC("FROM"), 13); + keywords.set(_SC("TO"), 14); + keywords.set(_SC("NESTED"), 15); + keywords.set(_SC("IGNORE"), 16); + keywords.set(_SC("PRODUCTIONS"), 17); + keywords.set(_SC("END"), 20); + keywords.set(_SC("ANY"), 24); + keywords.set(_SC("WEAK"), 31); + keywords.set(_SC("SYNC"), 38); + keywords.set(_SC("IF"), 39); + keywords.set(_SC("CONTEXT"), 40); tvalLength = 128; @@ -501,7 +541,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -512,7 +552,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(L"Illegal byte order mark at start of file"); + wprintf(_SC("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -533,7 +573,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } @@ -557,7 +597,7 @@ void Scanner::AddCh() { bool Scanner::Comment0() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == L'/') { + if (ch == _SC('/')) { NextCh(); for(;;) { if (ch == 10) { @@ -567,36 +607,34 @@ bool Scanner::Comment0() { } else if (ch == buffer->EoF) return false; else NextCh(); } - } else { - buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } bool Scanner::Comment1() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); - if (ch == L'*') { + if (ch == _SC('*')) { NextCh(); for(;;) { - if (ch == L'*') { + if (ch == _SC('*')) { NextCh(); - if (ch == L'/') { - level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } - NextCh(); + if (ch == _SC('/')) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); } - } else if (ch == L'/') { + } else if (ch == _SC('/')) { NextCh(); - if (ch == L'*') { + if (ch == _SC('*')) { level++; NextCh(); } } else if (ch == buffer->EoF) return false; else NextCh(); } - } else { - buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -636,7 +674,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too long token value\n"); + wprintf(_SC("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -645,14 +683,18 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = L'\0'; + t->val[tlen] = _SC('\0'); } Token* Scanner::NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == _SC(' ') || (ch >= 9 && ch <= 10) || ch == 13 - ) NextCh(); - if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken(); + ) NextCh(); + if ((ch == _SC('/') && Comment0()) || (ch == _SC('/') && Comment1())) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = CreateToken(); @@ -660,34 +702,34 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - switch (state) { - case -1: { t->kind = eofSym; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; break; - } // NextCh already done + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done case 1: case_1: - recEnd = pos; recKind = 1; - if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} - else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} case 2: case_2: - recEnd = pos; recKind = 2; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;} - else {t->kind = 2; break;} + recEnd = pos; recKind = 2 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_2;} + else {t->kind = 2 /* number */; break;} case 3: case_3: - {t->kind = 3; break;} + {t->kind = 3 /* string */; break;} case 4: case_4: - {t->kind = 4; break;} + {t->kind = 4 /* badString */; break;} case 5: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('&')) || (ch >= _SC('(') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} else {goto case_0;} case 6: @@ -696,100 +738,102 @@ Token* Scanner::NextToken() { else {goto case_0;} case 7: case_7: - if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_8;} + if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_8;} else {goto case_0;} case 8: case_8: - if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_8;} + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('a') && ch <= _SC('f'))) {AddCh(); goto case_8;} else if (ch == 39) {AddCh(); goto case_9;} else {goto case_0;} case 9: case_9: - {t->kind = 5; break;} + {t->kind = 5 /* char */; break;} case 10: case_10: - recEnd = pos; recKind = 42; - if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} - else {t->kind = 42; break;} + recEnd = pos; recKind = 44 /* ddtSym */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_10;} + else {t->kind = 44 /* ddtSym */; break;} case 11: case_11: - recEnd = pos; recKind = 43; - if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} - else {t->kind = 43; break;} + recEnd = pos; recKind = 45 /* optionSym */; + if ((ch >= _SC('-') && ch <= _SC('.')) || (ch >= _SC('0') && ch <= _SC(':')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_11;} + else {t->kind = 45 /* optionSym */; break;} case 12: case_12: - if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= _SC('!')) || (ch >= _SC('#') && ch <= _SC('[')) || (ch >= _SC(']') && ch <= 255)) {AddCh(); goto case_12;} else if (ch == 10 || ch == 13) {AddCh(); goto case_4;} - else if (ch == L'"') {AddCh(); goto case_3;} + else if (ch == _SC('"')) {AddCh(); goto case_3;} else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: - recEnd = pos; recKind = 42; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} - else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else {t->kind = 42; break;} + recEnd = pos; recKind = 44 /* ddtSym */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} + else {t->kind = 44 /* ddtSym */; break;} case 14: case_14: - if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} + if ((ch >= _SC(' ') && ch <= _SC('~'))) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: - recEnd = pos; recKind = 42; - if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} - else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} - else if (ch == L'=') {AddCh(); goto case_11;} - else {t->kind = 42; break;} + recEnd = pos; recKind = 44 /* ddtSym */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if ((ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('_') || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_15;} + else if (ch == _SC('=')) {AddCh(); goto case_11;} + else {t->kind = 44 /* ddtSym */; break;} case 16: - {t->kind = 17; break;} + {t->kind = 18 /* "=" */; break;} case 17: - {t->kind = 20; break;} + {t->kind = 21 /* "+" */; break;} case 18: - {t->kind = 21; break;} + {t->kind = 22 /* "-" */; break;} case 19: case_19: - {t->kind = 22; break;} + {t->kind = 23 /* ".." */; break;} case 20: - {t->kind = 25; break;} + {t->kind = 25 /* ":" */; break;} case 21: - case_21: - {t->kind = 26; break;} + {t->kind = 27 /* ">" */; break;} case 22: case_22: - {t->kind = 27; break;} + {t->kind = 28 /* "<." */; break;} case 23: - {t->kind = 28; break;} + case_23: + {t->kind = 29 /* ".>" */; break;} case 24: - {t->kind = 31; break;} + {t->kind = 30 /* "|" */; break;} case 25: - {t->kind = 32; break;} + {t->kind = 33 /* ")" */; break;} case 26: - {t->kind = 33; break;} + {t->kind = 34 /* "[" */; break;} case 27: - {t->kind = 34; break;} + {t->kind = 35 /* "]" */; break;} case 28: - {t->kind = 35; break;} + {t->kind = 36 /* "{" */; break;} case 29: - case_29: - {t->kind = 39; break;} + {t->kind = 37 /* "}" */; break;} case 30: case_30: - {t->kind = 40; break;} + {t->kind = 41 /* "(." */; break;} case 31: - recEnd = pos; recKind = 18; - if (ch == L'.') {AddCh(); goto case_19;} - else if (ch == L'>') {AddCh(); goto case_22;} - else if (ch == L')') {AddCh(); goto case_30;} - else {t->kind = 18; break;} + case_31: + {t->kind = 42 /* ".)" */; break;} case 32: - recEnd = pos; recKind = 24; - if (ch == L'.') {AddCh(); goto case_21;} - else {t->kind = 24; break;} + recEnd = pos; recKind = 19 /* "." */; + if (ch == _SC('.')) {AddCh(); goto case_19;} + else if (ch == _SC('>')) {AddCh(); goto case_23;} + else if (ch == _SC(')')) {AddCh(); goto case_31;} + else {t->kind = 19 /* "." */; break;} case 33: - recEnd = pos; recKind = 30; - if (ch == L'.') {AddCh(); goto case_29;} - else {t->kind = 30; break;} - - } + recEnd = pos; recKind = 26 /* "<" */; + if (ch == _SC('.')) {AddCh(); goto case_22;} + else {t->kind = 26 /* "<" */; break;} + case 34: + recEnd = pos; recKind = 32 /* "(" */; + if (ch == _SC('.')) {AddCh(); goto case_30;} + else {t->kind = 32 /* "(" */; break;} + + } AppendVal(t); return t; } diff --git a/src/Scanner.frame b/src/Scanner.frame index 0c36f7b..357569c 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ @@ -39,7 +39,7 @@ Scanner.h Specification #include #include #include -#include +#include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 @@ -47,6 +47,48 @@ Scanner.h Specification #include #endif +#define WITHOUT_WCHAR + +#ifdef WITHOUT_WCHAR +#define wchar_t char +#define _CHFMT "c" +#define _SFMT "s" +#define _SC(s) s +#define fputws fputs +#define wprintf printf +#define swprintf snprintf +#define fwprintf fprintf +#define fwscanf fscanf +#define swscanf sscanf +#define wcslen strlen +#define wcscpy strcpy +#define wcsncpy strncpy +#define wcscmp strcmp +#define wcsncmp strncmp +#define wcschr strchr +#define wcsrchr strrchr +#define wcscasecmp strcasecmp +#define wcsncasecmp strncasecmp + +#if _MSC_VER >= 1400 +#define coco_swprintf snprintf_s +#elif _MSC_VER >= 1300 +#define coco_swprintf _snprintf +#elif defined __MINGW32__ +#define coco_swprintf _snprintf +#else +// assume every other compiler knows sprintf +#define coco_swprintf snprintf +#endif + +#define COCO_WCHAR_MAX 255 + +#else +#include +#define _CHFMT L"lc" +#define _SFMT L"ls" +#define _SC(s) L##s + #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 @@ -59,10 +101,13 @@ Scanner.h Specification #endif #define COCO_WCHAR_MAX 65535 + +#endif + #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR L':' +#define COCO_CPP_NAMESPACE_SEPARATOR _SC(':') -->namespace_open @@ -74,24 +119,109 @@ wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); -wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); +wchar_t* coco_string_create_append(const wchar_t* data, const int value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); -int coco_string_indexof(const wchar_t* data, const wchar_t value); -int coco_string_lastindexof(const wchar_t* data, const wchar_t value); +int coco_string_indexof(const wchar_t* data, const int value); +int coco_string_lastindexof(const wchar_t* data, const int value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); +unsigned int coco_string_hash(const wchar_t* data, size_t size); +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char *value); -char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +#endif +char* coco_string_create_char(const wchar_t *value); +template +class TArrayList +{ + T *Data; +public: + typedef int tsize_t; + tsize_t Count; + tsize_t Capacity; + + TArrayList() { + Count = 0; + Capacity = 10; + Data = new T[ Capacity ]; + } + virtual ~TArrayList() { + delete [] Data; + } + + void Add(T value) { + if (Count < Capacity) { + Data[Count] = value; + Count++; + } else { + Capacity *= 2; + T* newData = new T[Capacity]; + for (tsize_t i=0; inext = tab[k]; tab[k] = e; } - int get(const wchar_t *key, int defaultVal) { - Elem *e = tab[coco_string_hash(key) % 128]; - while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; + int get(const wchar_t *key, size_t size, int defaultVal, bool ignoreCase) { + Elem *e = tab[coco_string_hash(key, size) % 128]; + if(ignoreCase) { + while (e != NULL && !coco_string_equal_nocase_n(e->key, key, size)) e = e->next; + } + else { + while (e != NULL && !coco_string_equal_n(e->key, key, size)) e = e->next; + } return e == NULL ? defaultVal : e->val; } }; @@ -242,7 +378,6 @@ private: int eofSym; int noSym; int maxT; - int charSetSize; StartStates start; KeywordMap keywords; @@ -262,6 +397,8 @@ private: int col; // column number of current character int oldEols; // EOLs that appeared in a comment; + char *parseFileName; + void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); @@ -275,7 +412,7 @@ private: public: Buffer *buffer; // scanner buffer - + Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); @@ -283,6 +420,9 @@ public: Token* Scan(); Token* Peek(); void ResetPeek(); + const char *GetParserFileName() { + return parseFileName ? parseFileName : "unknown"; + }; }; // end Scanner @@ -345,13 +485,13 @@ wchar_t* coco_string_create_upper(const wchar_t* data) { wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { - if ((L'a' <= data[i]) && (data[i] <= L'z')) { - newData[i] = data[i] + (L'A' - L'a'); + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); } else { newData[i] = data[i]; } } - newData[dataLen] = L'\0'; + newData[dataLen] = _SC('\0'); return newData; } @@ -368,12 +508,12 @@ wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataL for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; - if ((L'A' <= ch) && (ch <= L'Z')) { - newData[i] = ch - (L'A' - L'a'); + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); } else { newData[i] = ch; } } - newData[dataLen] = L'\0'; + newData[dataLen] = _SC('\0'); return newData; } @@ -395,7 +535,7 @@ wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { return data; } -wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); @@ -420,14 +560,14 @@ bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } -int coco_string_indexof(const wchar_t* data, const wchar_t value) { +int coco_string_indexof(const wchar_t* data, const int value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } -int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { +int coco_string_lastindexof(const wchar_t* data, const int value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } @@ -445,10 +585,26 @@ bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp(data1, data2); +} + unsigned int coco_string_hash(const wchar_t *data) { unsigned int h = 0; if (!data) { return 0; } @@ -459,6 +615,16 @@ unsigned int coco_string_hash(const wchar_t *data) { return h; } +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char* value) { @@ -470,6 +636,12 @@ wchar_t* coco_string_create(const char* value) { return data; } +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; @@ -478,12 +650,6 @@ char* coco_string_create_char(const wchar_t *value) { return res; } -void coco_string_delete(char* &data) { - delete [] data; - data = NULL; -} - - Token::Token() { kind = 0; pos = 0; @@ -493,6 +659,17 @@ Token::Token() { next = NULL; } +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + Token::~Token() { coco_string_delete(val); } @@ -513,7 +690,7 @@ Buffer::Buffer(FILE* s, bool isUserStream) { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; - buf = new unsigned char[bufCapacity]; + buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); @@ -543,7 +720,7 @@ Buffer::Buffer(const unsigned char* buf, int len) { } Buffer::~Buffer() { - Close(); + Close(); if (buf != NULL) { delete [] buf; buf = NULL; @@ -586,9 +763,8 @@ wchar_t* Buffer::GetString(int beg, int end) { SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); - wchar_t *res = coco_string_create(buf, 0, len); - coco_string_delete(buf); - return res; + buf[len] = 0; + return buf; } int Buffer::GetPos() { @@ -605,7 +781,7 @@ void Buffer::SetPos(int value) { } if ((value < 0) || (value > fileLen)) { - wprintf(L"--- buffer out of bounds access, position: %d\n", value); + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); exit(1); } @@ -683,23 +859,24 @@ int UTF8Buffer::Read() { Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); + parseFileName = NULL; Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; - char *chFileName = coco_string_create_char(fileName); - if ((stream = fopen(chFileName, "rb")) == NULL) { - wprintf(L"--- Cannot open file %ls\n", fileName); + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); exit(1); } - coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); + parseFileName = NULL; Init(); } @@ -713,6 +890,7 @@ Scanner::~Scanner() { } delete [] tval; delete buffer; + if(parseFileName) coco_string_delete(parseFileName); } void Scanner::Init() { @@ -730,7 +908,7 @@ void Scanner::Init() { *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); exit(1); } @@ -741,7 +919,7 @@ void Scanner::Init() { NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { - wprintf(L"Illegal byte order mark at start of file"); + wprintf(_SC("Illegal byte order mark at start of file")); exit(1); } Buffer *oldBuf = buffer; @@ -762,7 +940,7 @@ void Scanner::NextCh() { ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac - if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; if (ch == EOL) { line++; col = 0; } } -->casing1 @@ -819,7 +997,7 @@ void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { - wprintf(L"--- Too long token value\n"); + wprintf(_SC("--- Too long token value\n")); exit(1); } CreateHeapBlock(); @@ -828,14 +1006,18 @@ void Scanner::AppendVal(Token *t) { heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); - t->val[tlen] = L'\0'; + t->val[tlen] = _SC('\0'); } Token* Scanner::NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == _SC(' ') || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = CreateToken(); @@ -843,18 +1025,18 @@ Token* Scanner::NextToken() { int state = start.state(ch); tlen = 0; AddCh(); - switch (state) { - case -1: { t->kind = eofSym; break; } // NextCh already done - case 0: { - case_0: - if (recKind != noSym) { - tlen = recEnd - t->pos; - SetScannerBehindT(); - } - t->kind = recKind; break; - } // NextCh already done + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done -->scan3 - } + } AppendVal(t); return t; } diff --git a/src/Scanner.h b/src/Scanner.h index b183771..989c4a0 100644 --- a/src/Scanner.h +++ b/src/Scanner.h @@ -34,7 +34,7 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include #include -#include +#include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 @@ -42,6 +42,48 @@ Coco/R itself) does not fall under the GNU General Public License. #include #endif +#define WITHOUT_WCHAR + +#ifdef WITHOUT_WCHAR +#define wchar_t char +#define _CHFMT "c" +#define _SFMT "s" +#define _SC(s) s +#define fputws fputs +#define wprintf printf +#define swprintf snprintf +#define fwprintf fprintf +#define fwscanf fscanf +#define swscanf sscanf +#define wcslen strlen +#define wcscpy strcpy +#define wcsncpy strncpy +#define wcscmp strcmp +#define wcsncmp strncmp +#define wcschr strchr +#define wcsrchr strrchr +#define wcscasecmp strcasecmp +#define wcsncasecmp strncasecmp + +#if _MSC_VER >= 1400 +#define coco_swprintf snprintf_s +#elif _MSC_VER >= 1300 +#define coco_swprintf _snprintf +#elif defined __MINGW32__ +#define coco_swprintf _snprintf +#else +// assume every other compiler knows sprintf +#define coco_swprintf snprintf +#endif + +#define COCO_WCHAR_MAX 255 + +#else +#include +#define _CHFMT L"lc" +#define _SFMT L"ls" +#define _SC(s) L##s + #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 @@ -54,10 +96,13 @@ Coco/R itself) does not fall under the GNU General Public License. #endif #define COCO_WCHAR_MAX 65535 + +#endif + #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) -#define COCO_CPP_NAMESPACE_SEPARATOR L':' +#define COCO_CPP_NAMESPACE_SEPARATOR _SC(':') namespace Coco { @@ -70,24 +115,109 @@ wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); -wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); +wchar_t* coco_string_create_append(const wchar_t* data, const int value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); -int coco_string_indexof(const wchar_t* data, const wchar_t value); -int coco_string_lastindexof(const wchar_t* data, const wchar_t value); +int coco_string_indexof(const wchar_t* data, const int value); +int coco_string_lastindexof(const wchar_t* data, const int value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2); +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size); +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); +int coco_string_compareto_nocase(const wchar_t* data1, const wchar_t* data2); unsigned int coco_string_hash(const wchar_t* data); +unsigned int coco_string_hash(const wchar_t* data, size_t size); +#ifndef WITHOUT_WCHAR // string handling, ascii character wchar_t* coco_string_create(const char *value); -char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); +#endif +char* coco_string_create_char(const wchar_t *value); +template +class TArrayList +{ + T *Data; +public: + typedef int tsize_t; + tsize_t Count; + tsize_t Capacity; + + TArrayList() { + Count = 0; + Capacity = 10; + Data = new T[ Capacity ]; + } + virtual ~TArrayList() { + delete [] Data; + } + + void Add(T value) { + if (Count < Capacity) { + Data[Count] = value; + Count++; + } else { + Capacity *= 2; + T* newData = new T[Capacity]; + for (tsize_t i=0; inext = tab[k]; tab[k] = e; } - int get(const wchar_t *key, int defaultVal) { - Elem *e = tab[coco_string_hash(key) % 128]; - while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; + int get(const wchar_t *key, size_t size, int defaultVal, bool ignoreCase) { + Elem *e = tab[coco_string_hash(key, size) % 128]; + if(ignoreCase) { + while (e != NULL && !coco_string_equal_nocase_n(e->key, key, size)) e = e->next; + } + else { + while (e != NULL && !coco_string_equal_n(e->key, key, size)) e = e->next; + } return e == NULL ? defaultVal : e->val; } }; @@ -238,7 +374,6 @@ class Scanner { int eofSym; int noSym; int maxT; - int charSetSize; StartStates start; KeywordMap keywords; @@ -258,6 +393,8 @@ class Scanner { int col; // column number of current character int oldEols; // EOLs that appeared in a comment; + char *parseFileName; + void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); @@ -273,7 +410,7 @@ class Scanner { public: Buffer *buffer; // scanner buffer - + Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); @@ -281,6 +418,9 @@ class Scanner { Token* Scan(); Token* Peek(); void ResetPeek(); + const char *GetParserFileName() { + return parseFileName ? parseFileName : "unknown"; + }; }; // end Scanner diff --git a/src/Sets.h b/src/Sets.h index 4acd050..c09f407 100644 --- a/src/Sets.h +++ b/src/Sets.h @@ -35,14 +35,14 @@ namespace Coco { class Sets { public: - static int First(BitArray *s) { + static int First(const BitArray *s) { int max = s->getCount(); for (int i=0; igetCount(); int n = 0; for (int i=0; igetCount(); for (int i=0; i b ? + static bool Includes(const BitArray *a, const BitArray *b) { // a > b ? int max = a->getCount(); for (int i=0; igetCount(); for (int i=0; iClone(); c->Not(); a->And(c); diff --git a/src/SortedList.cpp b/src/SortedList.cpp index 656100e..0b78eee 100644 --- a/src/SortedList.cpp +++ b/src/SortedList.cpp @@ -32,17 +32,22 @@ Coco/R itself) does not fall under the GNU General Public License. namespace Coco { -int Compare(Symbol *x, Symbol *y) { +int Compare(const Symbol *x, const Symbol *y) { return coco_string_compareto(x->name, y->name); } -SortedEntry::SortedEntry(Symbol* Key, void* Value) { +int CompareNocase(const Symbol *x, const Symbol *y) { + return coco_string_compareto_nocase(x->name, y->name); +} + +SortedEntry::SortedEntry(const Symbol* Key, const void* Value) { this->Key = Key; this->Value = Value; this->next = NULL; } SortedEntry::~SortedEntry() { + delete next; }; SortedList::SortedList() { @@ -51,9 +56,10 @@ SortedList::SortedList() { } SortedList::~SortedList() { + delete Data; } -bool SortedList::Find(Symbol* key) { +bool SortedList::Find(const Symbol* key) { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { if (!Compare(pSortedEntry->Key, key)) @@ -63,7 +69,7 @@ bool SortedList::Find(Symbol* key) { return false; } -void SortedList::Set(Symbol *key, void *value) { +void SortedList::Set(const Symbol *key, const void *value) { if (!Find(key)) { // new entry SortedEntry* pSortedEntry = Data; @@ -101,7 +107,7 @@ void SortedList::Set(Symbol *key, void *value) { } } -void* SortedList::Get( Symbol* key ) const // Value +const void* SortedList::Get( const Symbol* key ) const // Value { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { @@ -113,7 +119,7 @@ void* SortedList::Get( Symbol* key ) const // Value } -void* SortedList::GetKey( int index ) const // Key +const void* SortedList::GetKey( int index ) const // Key { if (0 <= index && index < Count) { SortedEntry* pSortedEntry = Data; diff --git a/src/SortedList.h b/src/SortedList.h index 5939675..2008d8f 100644 --- a/src/SortedList.h +++ b/src/SortedList.h @@ -36,11 +36,11 @@ class Symbol; class SortedEntry { public: - Symbol* Key; - void* Value; + const Symbol* Key; + const void* Value; SortedEntry* next; - SortedEntry(Symbol* Key, void* Value); + SortedEntry(const Symbol* Key, const void* Value); virtual ~SortedEntry(); }; @@ -50,14 +50,14 @@ class SortedList SortedList(); virtual ~SortedList(); - void Set(Symbol *key, void *value); - void* Get( Symbol* key ) const; // Value - void* GetKey( int index ) const ;// Key + void Set(const Symbol *key, const void *value); + const void* Get( const Symbol* key ) const; // Value + const void* GetKey( int index ) const ;// Key SortedEntry* operator[]( int index ) const; int Count; private: - bool Find(Symbol* key); + bool Find(const Symbol* key); SortedEntry *Data; diff --git a/src/State.cpp b/src/State.cpp index 1f7eeb6..42e43eb 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -36,6 +36,12 @@ State::State() { this->endOf = NULL; this->ctx = false; this->next = NULL; + this->rmin = this->rmax = 0; +} + +State::~State() { + delete firstAction; + delete next; } void State::AddAction(Action *act) { @@ -49,9 +55,9 @@ void State::AddAction(Action *act) { else { lasta->next = act; } -} +} -void State::DetachAction(Action *act) { +bool State::DetachAction(Action *act) { Action *lasta = NULL, *a = firstAction; while (a != NULL && a != act) {lasta = a; a = a->next;} if (a != NULL) { @@ -61,7 +67,10 @@ void State::DetachAction(Action *act) { else { lasta->next = a->next; } + a->next = NULL; delete a; + return true; } + return false; } diff --git a/src/State.h b/src/State.h index 318d77a..3073e22 100644 --- a/src/State.h +++ b/src/State.h @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ @@ -43,10 +43,12 @@ class State // state of finite automaton Symbol *endOf; // recognized token if state is final bool ctx; // true if state is reached via contextTrans State *next; - + int rmin, rmax; // repetition quantifiers + State(); + ~State(); void AddAction(Action *act); - void DetachAction(Action *act); + bool DetachAction(Action *act); void MeltWith(State *s); }; diff --git a/src/StringBuilder.cpp b/src/StringBuilder.cpp index a9cf4b4..038519f 100644 --- a/src/StringBuilder.cpp +++ b/src/StringBuilder.cpp @@ -28,13 +28,12 @@ Coco/R itself) does not fall under the GNU General Public License. #include #include "StringBuilder.h" -#include "Scanner.h" namespace Coco { void StringBuilder::Init(int capacity) { length = 0; - this->capacity = capacity; + this->_capacity = capacity; data = new wchar_t[capacity + 1]; data[0] = 0; } @@ -44,8 +43,7 @@ StringBuilder::StringBuilder(int capacity) { } StringBuilder::StringBuilder(const wchar_t *val) { - capacity = length = wcslen(val); - Init(capacity); + Init(wcslen(val)); wcscpy(data, val); } @@ -54,30 +52,35 @@ StringBuilder::~StringBuilder() { delete [] data; data = NULL; length = 0; - capacity = 0; + _capacity = 0; } } -void StringBuilder::Append(const wchar_t value) { - if (length == capacity) { - int oldCap = capacity; - capacity = capacity * 2; - wchar_t *nData = new wchar_t[capacity + 1]; - memcpy(nData, data, oldCap * sizeof(int)); - delete [] data; - data = nData; +void StringBuilder::capacity(int new_capacity) { + wchar_t *nData = new wchar_t[new_capacity + 1]; + memcpy(nData, data, _capacity * sizeof(wchar_t)); + delete [] data; + data = nData; + _capacity = new_capacity; +} + +void StringBuilder::Append(const int value) { + if (length == _capacity) { + capacity(_capacity * 2); } data[length] = value; length++; - data[length] = '\0'; + data[length] = _SC('\0'); } void StringBuilder::Append(const wchar_t *value) { - if (length + (int)wcslen(value) < capacity) { - wcscpy(data + length, value); - length += wcslen(value); - } + int slen = (int)wcslen(value); + if (length + slen >= _capacity) { + capacity(length + slen + 1); + } + wcscpy(data + length, value); + length += slen; } diff --git a/src/StringBuilder.h b/src/StringBuilder.h index 35c8cd4..6367f1a 100644 --- a/src/StringBuilder.h +++ b/src/StringBuilder.h @@ -1,7 +1,7 @@ #if !defined(COCO_STRINGBUILDER_H__) #define COCO_STRINGBUILDER_H__ -#include +#include "Scanner.h" namespace Coco { @@ -12,15 +12,16 @@ class StringBuilder StringBuilder(const wchar_t *val); virtual ~StringBuilder(); - void Append(const wchar_t val); + void Append(const int val); void Append(const wchar_t *val); + void capacity(int new_capacity); wchar_t* ToString(); int GetLength() { return length; }; private: void Init(int capacity); wchar_t *data; - int capacity; + int _capacity; int length; }; diff --git a/src/Symbol.cpp b/src/Symbol.cpp index a340d99..1980f47 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -5,39 +5,39 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Symbol.h" -#include "Scanner.h" +#include "BitArray.h" namespace Coco { -int Symbol::fixedToken = 0; -int Symbol::classToken = 1; -int Symbol::litToken = 2; -int Symbol::classLitToken = 3; +const int Symbol::fixedToken = 0; +const int Symbol::classToken = 1; +const int Symbol::litToken = 2; +const int Symbol::classLitToken = 3; -Symbol::Symbol(int typ, const wchar_t* name, int line) { +Symbol::Symbol(NodeType typ, const wchar_t* name, int line, int col) { n = 0; graph = NULL; tokenKind = 0; @@ -48,14 +48,21 @@ Symbol::Symbol(int typ, const wchar_t* name, int line) { nts = NULL; attrPos = NULL; semPos = NULL; + inherits = NULL; this->typ = typ; this->name = coco_string_create(name); this->line = line; + this->col = col; } Symbol::~Symbol() { coco_string_delete(name); + delete this->first; + delete this->follow; + delete this->nts; + delete this->semPos; + delete this->attrPos; } }; // namespace diff --git a/src/Symbol.h b/src/Symbol.h index 63cb8e8..b696f6d 100644 --- a/src/Symbol.h +++ b/src/Symbol.h @@ -29,8 +29,9 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_SYMBOL_H__) #define COCO_SYMBOL_H__ -#include "Position.h" #include "Scanner.h" +#include "Position.h" +#include "NodeSymbolKind.h" namespace Coco { @@ -40,13 +41,13 @@ class BitArray; class Symbol { public: // token kinds - static int fixedToken; // e.g. 'a' ('b' | 'c') (structure of literals) - static int classToken; // e.g. digit {digit} (at least one char class) - static int litToken; // e.g. "while" - static int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ + static const int fixedToken; // e.g. 'a' ('b' | 'c') (structure of literals) + static const int classToken; // e.g. digit {digit} (at least one char class) + static const int litToken; // e.g. "while" + static const int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ int n; // symbol number - int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ + NodeType typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ wchar_t *name; // symbol name Node *graph; // nt: to first node of syntax graph int tokenKind; // t: token kind (fixedToken, classToken, ...) @@ -56,12 +57,13 @@ class Symbol { BitArray *follow; // nt: terminal followers BitArray *nts; // nt: nonterminals whose followers have to be added to this sym int line; // source text line number of item in this node + int col; // source text line column number of item in this node Position *attrPos; // nt: position of attributes in source text (or null) Position *semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) + Symbol *inherits; // optional, token from which this token derives - - Symbol(int typ, const wchar_t* name, int line); + Symbol(NodeType typ, const wchar_t* name, int line, int col); virtual ~Symbol(); }; diff --git a/src/Tab.cpp b/src/Tab.cpp index 731ec12..6c9eac5 100644 --- a/src/Tab.cpp +++ b/src/Tab.cpp @@ -27,11 +27,9 @@ If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ -#include #include "Tab.h" #include "Parser.h" #include "BitArray.h" -#include "Scanner.h" namespace Coco { @@ -44,37 +42,53 @@ const char* Tab::tKind[] = {"fixedToken", "classToken", "litToken", "classLitTok Tab::Tab(Parser *parser) { for (int i=0; i<10; i++) ddt[i] = false; - terminals = new ArrayList(); - pragmas = new ArrayList(); - nonterminals = new ArrayList(); - nodes = new ArrayList(); dummyNode = NULL; - classes= new ArrayList(); dummyName = 'A'; this->parser = parser; trace = parser->trace; errors = parser->errors; - eofSy = NewSym(Node::t, L"EOF", 0); - dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); - literals = new HashTable(); + eofSy = NewSym(NodeType::t, _SC("EOF"), 0, 0); + dummyNode = NewNode(NodeType::eps, (Symbol*)NULL, 0, 0); checkEOF = true; -} - - -Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { + visited = allSyncSets = NULL; + srcName = srcDir = nsName = frameDir = outDir = NULL; + genRREBNF = false; +} + +Tab::~Tab() { + for(int i=0; iSemErr(L"empty token not allowed"); - name = coco_string_create(L"???"); + parser->SemErr(_SC("empty token not allowed")); + name = coco_string_create(_SC("???")); } - Symbol *sym = new Symbol(typ, name, line); + Symbol *sym = new Symbol(typ, name, line, col); - if (typ == Node::t) { - sym->n = terminals->Count; terminals->Add(sym); - } else if (typ == Node::pr) { - pragmas->Add(sym); - } else if (typ == Node::nt) { - sym->n = nonterminals->Count; nonterminals->Add(sym); + if (typ == NodeType::t) { + sym->n = terminals.Count; terminals.Add(sym); + } else if (typ == NodeType::pr) { + pragmas.Add(sym); + } else if (typ == NodeType::nt) { + sym->n = nonterminals.Count; nonterminals.Add(sym); } return sym; @@ -83,115 +97,116 @@ Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { Symbol* Tab::FindSym(const wchar_t* name) { Symbol *s; int i; - for (i=0; iCount; i++) { - s = (Symbol*)((*terminals)[i]); + for (i=0; iname, name)) return s; } - for (i=0; iCount; i++) { - s = (Symbol*)((*nonterminals)[i]); + for (i=0; iname, name)) return s; } return NULL; } -int Tab::Num(Node *p) { +int Tab::Num(const Node *p) { if (p == NULL) return 0; else return p->n; } -void Tab::PrintSym(Symbol *sym) { - wchar_t *paddedName = Name(sym->name); - fwprintf(trace, L"%3d %14s %ls", sym->n, paddedName, nTyp[sym->typ]); - coco_string_delete(paddedName); +void Tab::PrintSym(const Symbol *sym) { + fwprintf(trace, _SC("%3d %-14.14") _SFMT _SC(" %s"), sym->n, sym->name, nTyp[sym->typ]); - if (sym->attrPos==NULL) fwprintf(trace, L" false "); else fwprintf(trace, L" true "); - if (sym->typ == Node::nt) { - fwprintf(trace, L"%5d", Num(sym->graph)); - if (sym->deletable) fwprintf(trace, L" true "); else fwprintf(trace, L" false "); + if (sym->attrPos==NULL) fputws(_SC(" false "), trace); else fputws(_SC(" true "), trace); + if (sym->typ == NodeType::nt) { + fwprintf(trace, _SC("%5d"), Num(sym->graph)); + if (sym->deletable) fputws(_SC(" true "), trace); else fputws(_SC(" false "), trace); } else - fwprintf(trace, L" "); + fputws(_SC(" "), trace); - fwprintf(trace, L"%5d %ls\n", sym->line, tKind[sym->tokenKind]); + fwprintf(trace, _SC("%5d %s\n"), sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { - fwprintf(trace, L"Symbol Table:\n"); - fwprintf(trace, L"------------\n\n"); - fwprintf(trace, L" nr name typ hasAt graph del line tokenKind\n"); + fwprintf(trace, _SC("%s"), + "Symbol Table:\n" + "------------\n\n" + " nr name typ hasAt graph del line tokenKind\n"); Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (i=0; iCount; i++) { - sym = (Symbol*)((*pragmas)[i]); + for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; iGetIterator(); + Iterator *iter = literals.GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); - fwprintf(trace, L"_%ls = %ls.\n", ((Symbol*) (e->val))->name, e->key); + fwprintf(trace, _SC("_%") _SFMT _SC(" = %") _SFMT _SC(".\n"), ((Symbol*) (e->val))->name, e->key); } - fwprintf(trace, L"\n"); + delete iter; + fputws(_SC("\n"), trace); } -void Tab::PrintSet(BitArray *s, int indent) { +void Tab::PrintSet(const BitArray *s, int indent) { int col, len; col = indent; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (int i=0; in]) { len = coco_string_length(sym->name); if (col + len >= 80) { - fwprintf(trace, L"\n"); - for (col = 1; col < indent; col++) fwprintf(trace, L" "); + fputws(_SC("\n"), trace); + for (col = 1; col < indent; col++) fputws(_SC(" "), trace); } - fwprintf(trace, L"%ls ", sym->name); + fwprintf(trace, _SC("%") _SFMT _SC(" "), sym->name); col += len + 1; } } - if (col == indent) fwprintf(trace, L"-- empty set --"); - fwprintf(trace, L"\n"); + if (col == indent) fputws(_SC("-- empty set --"), trace); + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- // Syntax graph management //--------------------------------------------------------------------- -Node* Tab::NewNode(int typ, Symbol *sym, int line) { - Node* node = new Node(typ, sym, line); - node->n = nodes->Count; - nodes->Add(node); +Node* Tab::NewNode(NodeType typ, Symbol *sym, int line, int col) { + Node* node = new Node(typ, sym, line, col); + node->n = nodes.Count; + nodes.Add(node); return node; } -Node* Tab::NewNode(int typ, Node* sub) { - Node* node = NewNode(typ, (Symbol*)NULL, 0); +Node* Tab::NewNode(NodeType typ, Node* sub) { + Node* node = NewNode(typ, (Symbol*)NULL, sub->line, sub->col); node->sub = sub; return node; } -Node* Tab::NewNode(int typ, int val, int line) { - Node* node = NewNode(typ, (Symbol*)NULL, line); +Node* Tab::NewNode(NodeType typ, int val, int line, int col) { + Node* node = NewNode(typ, (Symbol*)NULL, line, col); node->val = val; return node; } void Tab::MakeFirstAlt(Graph *g) { - g->l = NewNode(Node::alt, g->l); g->l->line = g->l->sub->line; + g->l = NewNode(NodeType::alt, g->l); g->r->up = true; g->l->next = g->r; g->r = g->l; @@ -199,7 +214,7 @@ void Tab::MakeFirstAlt(Graph *g) { // The result will be in g1 void Tab::MakeAlternative(Graph *g1, Graph *g2) { - g2->l = NewNode(Node::alt, g2->l); g2->l->line = g2->l->sub->line; + g2->l = NewNode(NodeType::alt, g2->l); g2->l->up = true; g2->r->up = true; Node *p = g1->l; while (p->down != NULL) p = p->down; @@ -221,9 +236,13 @@ void Tab::MakeSequence(Graph *g1, Graph *g2) { g1->r = g2->r; } -void Tab::MakeIteration(Graph *g) { - g->l = NewNode(Node::iter, g->l); +void Tab::MakeOptIter(Graph *g, NodeType typ) { + g->l = NewNode(typ, g->l); g->r->up = true; +} + +void Tab::MakeIteration(Graph *g) { + MakeOptIter(g, NodeType::iter); Node *p = g->r; g->r = g->l; while (p != NULL) { @@ -233,12 +252,25 @@ void Tab::MakeIteration(Graph *g) { } void Tab::MakeOption(Graph *g) { - g->l = NewNode(Node::opt, g->l); - g->r->up = true; + MakeOptIter(g, NodeType::opt); g->l->next = g->r; g->r = g->l; } +void Tab::MakeRepetition(Graph *g, int rmin, int rmax) { + bool isOption = (rmin == 0 && rmax == 1); + MakeOptIter(g, NodeType::iter); + if(isOption) g->l->next = g->r; + Node *p = g->r; + g->r = g->l; + if(!isOption) { + while (p != NULL) { + Node *q = p->next; p->next = g->l; + p = q; + } + } +} + void Tab::Finish(Graph *g) { Node *p = g->r; while (p != NULL) { @@ -248,19 +280,20 @@ void Tab::Finish(Graph *g) { } void Tab::DeleteNodes() { - nodes = new ArrayList(); - dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); + for(int i=0; iSemErr(L"empty token not allowed"); + if (coco_string_length(s) == 0) parser->SemErr(_SC("empty token not allowed")); Graph *g = new Graph(); g->r = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { - Node *p = NewNode(Node::chr, (int)s[i], 0); + Node *p = NewNode(NodeType::chr, (int)s[i], 0, 0); g->r->next = p; g->r = p; } g->l = dummyNode->next; dummyNode->next = NULL; @@ -271,11 +304,11 @@ Graph* Tab::StrToGraph(const wchar_t* str) { void Tab::SetContextTrans(Node *p) { // set transition code in the graph rooted at p while (p != NULL) { - if (p->typ == Node::chr || p->typ == Node::clas) { - p->code = Node::contextTrans; - } else if (p->typ == Node::opt || p->typ == Node::iter) { + if (p->typ == NodeType::chr || p->typ == NodeType::clas) { + p->code = TransitionCode::contextTrans; + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { SetContextTrans(p->sub); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { SetContextTrans(p->sub); SetContextTrans(p->down); } if (p->up) break; @@ -285,93 +318,94 @@ void Tab::SetContextTrans(Node *p) { // set transition code in the graph rooted //------------ graph deletability check ----------------- -bool Tab::DelGraph(Node* p) { +bool Tab::DelGraph(const Node* p) { return p == NULL || (DelNode(p) && DelGraph(p->next)); } -bool Tab::DelSubGraph(Node* p) { +bool Tab::DelSubGraph(const Node* p) { return p == NULL || (DelNode(p) && (p->up || DelSubGraph(p->next))); } -bool Tab::DelNode(Node* p) { - if (p->typ == Node::nt) { +bool Tab::DelNode(const Node* p) { + if (p->typ == NodeType::nt) { return p->sym->deletable; } - else if (p->typ == Node::alt) { + else if (p->typ == NodeType::alt) { return DelSubGraph(p->sub) || (p->down != NULL && DelSubGraph(p->down)); } else { - return p->typ == Node::iter || p->typ == Node::opt || p->typ == Node::sem - || p->typ == Node::eps || p->typ == Node::rslv || p->typ == Node::sync; + return (p->typ == NodeType::iter && p->rmin == 0) || p->typ == NodeType::opt || p->typ == NodeType::sem + || p->typ == NodeType::eps || p->typ == NodeType::rslv || p->typ == NodeType::nt_sync; } } //----------------- graph printing ---------------------- -int Tab::Ptr(Node *p, bool up) { +int Tab::Ptr(const Node *p, bool up) { if (p == NULL) return 0; else if (up) return -(p->n); else return p->n; } -wchar_t* Tab::Pos(Position *pos) { - wchar_t* format = new wchar_t[10]; +#ifndef SZWC10 +#define SZWC10 10 +#define SZWC20 20 +typedef wchar_t wchar_t_10[SZWC10+1]; +#endif + +static wchar_t* TabPos(Position *pos, wchar_t_10 &format) { if (pos == NULL) { - coco_swprintf(format, 10, L" "); + coco_swprintf(format, SZWC10, _SC(" ")); } else { - coco_swprintf(format, 10, L"%5d", pos->beg); + coco_swprintf(format, SZWC10, _SC("%5d"), pos->beg); } return format; } -wchar_t* Tab::Name(const wchar_t *name) { - wchar_t *name2 = coco_string_create_append(name, L" "); - wchar_t *subName2 = coco_string_create(name2, 0, 12); - coco_string_delete(name2); - return subName2; - // found no simpler way to get the first 12 characters of the name - // padded with blanks on the right -} - void Tab::PrintNodes() { - fwprintf(trace, L"Graph nodes:\n"); - fwprintf(trace, L"----------------------------------------------------\n"); - fwprintf(trace, L" n type name next down sub pos line\n"); - fwprintf(trace, L" val code\n"); - fwprintf(trace, L"----------------------------------------------------\n"); + fwprintf(trace, _SC("%s"), + "Graph nodes:\n" + "----------------------------------------------------------\n" + " n type name next down sub pos line col\n" + " val code\n" + "----------------------------------------------------------\n"); Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); - fwprintf(trace, L"%4d %ls ", p->n, (nTyp[p->typ])); + wchar_t_10 format; + for (int i=0; in, (nTyp[p->typ])); if (p->sym != NULL) { - wchar_t *paddedName = Name(p->sym->name); - fwprintf(trace, L"%12s ", paddedName); - coco_string_delete(paddedName); - } else if (p->typ == Node::clas) { - CharClass *c = (CharClass*)(*classes)[p->val]; - wchar_t *paddedName = Name(c->name); - fwprintf(trace, L"%12s ", paddedName); - coco_string_delete(paddedName); - } else fwprintf(trace, L" "); - fwprintf(trace, L"%5d ", Ptr(p->next, p->up)); - - if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { - fwprintf(trace, L" %5s", Pos(p->pos)); - } if (p->typ == Node::chr) { - fwprintf(trace, L"%5d %5d ", p->val, p->code); - } if (p->typ == Node::clas) { - fwprintf(trace, L" %5d ", p->code); - } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { - fwprintf(trace, L"%5d %5d ", Ptr(p->down, false), Ptr(p->sub, false)); - } if (p->typ == Node::sem) { - fwprintf(trace, L" %5s", Pos(p->pos)); - } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { - fwprintf(trace, L" "); - } - fwprintf(trace, L"%5d\n", p->line); - } - fwprintf(trace, L"\n"); + fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), p->sym->name); + } else if (p->typ == NodeType::clas) { + CharClass *c = classes[p->val]; + fwprintf(trace, _SC("%-12.12") _SFMT _SC(" "), c->name); + } else fputws(_SC(" "), trace); + fwprintf(trace, _SC("%5d "), Ptr(p->next, p->up)); + + switch(p->typ) { + case NodeType::t: case NodeType::nt: case NodeType::wt: + fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); + break; + case NodeType::chr: + fwprintf(trace, _SC("%5d %5d "), p->val, p->code); + break; + case NodeType::clas: + fwprintf(trace, _SC(" %5d "), p->code); + break; + case NodeType::alt: case NodeType::iter: case NodeType::opt: + fwprintf(trace, _SC("%5d %5d "), Ptr(p->down, false), Ptr(p->sub, false)); + break; + case NodeType::sem: + fwprintf(trace, _SC(" %5") _SFMT, TabPos(p->pos, format)); + break; + case NodeType::eps: case NodeType::any: case NodeType::nt_sync: + fwprintf(trace, _SC(" ")); + break; + } + fwprintf(trace, _SC("%5d %5d\n"), p->line, p->col); + } + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- @@ -381,98 +415,87 @@ void Tab::PrintNodes() { CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { CharClass *c; - if (coco_string_equal(name, L"#")) { + if (coco_string_equal(name, _SC("#"))) { wchar_t* temp = coco_string_create_append(name, (wchar_t) dummyName++); c = new CharClass(temp, s); coco_string_delete(temp); } else { c = new CharClass(name, s); } - c->n = classes->Count; - classes->Add(c); + c->n = classes.Count; + classes.Add(c); return c; } CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); + for (int i=0; iname, name)) return c; } return NULL; } -CharClass* Tab::FindCharClass(CharSet *s) { +CharClass* Tab::FindCharClass(const CharSet *s) { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); + for (int i=0; iEquals(c->set)) return c; } return NULL; } CharSet* Tab::CharClassSet(int i) { - return ((CharClass*)((*classes)[i]))->set; + return classes[i]->set; } //----------- character class printing -wchar_t* Tab::Ch(const wchar_t ch) { - wchar_t* format = new wchar_t[10]; - if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') { - coco_swprintf(format, 10, L"%d", ch); +wchar_t* TabCh(const int ch, wchar_t_10 &format) { + if (ch < _SC(' ') || ch >= 127 || ch == _SC('\'') || ch == _SC('\\')) { + coco_swprintf(format, SZWC10, _SC("%d"), ch); return format; } else { - coco_swprintf(format, 10, L"'%lc'", ch); + coco_swprintf(format, SZWC10, _SC("'%") _CHFMT _SC("'"), ch); return format; } } -void Tab::WriteCharSet(CharSet *s) { +void Tab::WriteCharSet(const CharSet *s) { + wchar_t_10 fmt1, fmt2; for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from < r->to) { - wchar_t *from = Ch(r->from); - wchar_t *to = Ch(r->to); - fwprintf(trace, L"%ls .. %ls ", from, to); - delete [] from; - delete [] to; + wchar_t *from = TabCh(r->from, fmt1); + wchar_t *to = TabCh(r->to, fmt2); + fwprintf(trace, _SC("%") _SFMT _SC("..%") _SFMT _SC(" "), from, to); } else { - wchar_t *from = Ch(r->from); - fwprintf(trace, L"%ls ", from); - delete [] from; + wchar_t *from = TabCh(r->from, fmt1); + fwprintf(trace, _SC("%") _SFMT _SC(" "), from); } } } void Tab::WriteCharClasses () { CharClass *c; - for (int i=0; iCount; i++) { - c = (CharClass*)((*classes)[i]); - - wchar_t* format2 = coco_string_create_append(c->name, L" "); - wchar_t* format = coco_string_create(format2, 0, 10); - coco_string_merge(format, L": "); - fwprintf(trace, format); - + for (int i=0; iname); WriteCharSet(c->set); - fwprintf(trace, L"\n"); - coco_string_delete(format); - coco_string_delete(format2); + fputws(_SC("\n"), trace); } - fwprintf(trace, L"\n"); + fputws(_SC("\n"), trace); } //--------------------------------------------------------------------- // Symbol set computations //--------------------------------------------------------------------- - /* Computes the first set for the given Node. */ -BitArray* Tab::First0(Node *p, BitArray *mark) { - BitArray *fs = new BitArray(terminals->Count); +BitArray* Tab::First0(const Node *p, BitArray *mark) { + BitArray *fs = new BitArray(terminals.Count); while (p != NULL && !((*mark)[p->n])) { mark->Set(p->n, true); - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { if (p->sym->firstReady) { fs->Or(p->sym->first); } else { @@ -481,13 +504,13 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { delete fs0; } } - else if (p->typ == Node::t || p->typ == Node::wt) { + else if (p->typ == NodeType::t || p->typ == NodeType::wt) { fs->Set(p->sym->n, true); } - else if (p->typ == Node::any) { + else if (p->typ == NodeType::any) { fs->Or(p->set); } - else if (p->typ == Node::alt) { + else if (p->typ == NodeType::alt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; @@ -495,7 +518,7 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { fs->Or(fs0); delete fs0; } - else if (p->typ == Node::iter || p->typ == Node::opt) { + else if (p->typ == NodeType::iter || p->typ == NodeType::opt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; @@ -507,15 +530,15 @@ BitArray* Tab::First0(Node *p, BitArray *mark) { return fs; } -BitArray* Tab::First(Node *p) { - BitArray *mark = new BitArray(nodes->Count); - BitArray *fs = First0(p, mark); - delete mark; +BitArray* Tab::First(const Node *p) { + BitArray mark(nodes.Count); + BitArray *fs = First0(p, &mark); if (ddt[3]) { - fwprintf(trace, L"\n"); - if (p != NULL) fwprintf(trace, L"First: node = %d\n", p->n ); - else fwprintf(trace, L"First: node = null\n"); - PrintSet(fs, 0); + fputws(_SC("\n"), trace); + if (p != NULL) fwprintf(trace, _SC("First: node = %d\tline = %d\tcol = %d\ttype = %s\t%s\n"), p->n, + p->line, p->col, this->nTyp[p->typ], p->sym ? p->sym->name : ""); + else fputws(_SC("First: node = null\n"), trace); + fwprintf(trace, _SC(" ")); PrintSet(fs, 10); } return fs; } @@ -524,14 +547,19 @@ BitArray* Tab::First(Node *p) { void Tab::CompFirstSets() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - sym->first = new BitArray(terminals->Count); + for (i=0; ifirst; + sym->first = new BitArray(terminals.Count); sym->firstReady = false; } - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + fwprintf(trace, _SC("Computing First Sets: %d\n"), nonterminals.Count); + for (i=0; iname, sym->line, sym->col); + BitArray *saved = sym->first; sym->first = First(sym->graph); + delete saved; sym->firstReady = true; } } @@ -539,14 +567,15 @@ void Tab::CompFirstSets() { void Tab::CompFollow(Node *p) { while (p != NULL && !((*visited)[p->n])) { visited->Set(p->n, true); - if (p->typ == Node::nt) { + if (p->typ == NodeType::nt) { BitArray *s = First(p->next); p->sym->follow->Or(s); + delete s; if (DelGraph(p->next)) p->sym->nts->Set(curSy->n, true); - } else if (p->typ == Node::opt || p->typ == Node::iter) { + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { CompFollow(p->sub); - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { CompFollow(p->sub); CompFollow(p->down); } p = p->next; @@ -557,8 +586,8 @@ void Tab::Complete(Symbol *sym) { if (!((*visited)[sym->n])) { visited->Set(sym->n, true); Symbol *s; - for (int i=0; iCount; i++) { - s = (Symbol*)((*nonterminals)[i]); + for (int i=0; ints))[s->n]) { Complete(s); sym->follow->Or(s->follow); @@ -571,60 +600,67 @@ void Tab::Complete(Symbol *sym) { void Tab::CompFollowSets() { Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - sym->follow = new BitArray(terminals->Count); - sym->nts = new BitArray(nonterminals->Count); + for (i=0; ifollow = new BitArray(terminals.Count); + sym->nts = new BitArray(nonterminals.Count); } gramSy->follow->Set(eofSy->n, true); - visited = new BitArray(nodes->Count); - for (i=0; iCount; i++) { // get direct successors of nonterminals - sym = (Symbol*)((*nonterminals)[i]); + delete visited; + visited = new BitArray(nodes.Count); + for (i=0; igraph); } - for (i=0; iCount; i++) { // add indirect successors to followers - sym = (Symbol*)((*nonterminals)[i]); - visited = new BitArray(nonterminals->Count); + for (i=0; ityp == Node::any) a = p; - else if (p->typ == Node::alt) { + const Node *a = NULL; + if (p->typ == NodeType::any) a = p; + else if (p->typ == NodeType::alt) { a = LeadingAny(p->sub); if (a == NULL) a = LeadingAny(p->down); } - else if (p->typ == Node::opt || p->typ == Node::iter) a = LeadingAny(p->sub); + else if (p->typ == NodeType::opt || p->typ == NodeType::iter) a = LeadingAny(p->sub); if (a == NULL && DelNode(p) && !p->up) a = LeadingAny(p->next); return a; } -void Tab::FindAS(Node *p) { // find ANY sets - Node *a; +void Tab::FindAS(const Node *p) { // find ANY sets + const Node *a; while (p != NULL) { - if (p->typ == Node::opt || p->typ == Node::iter) { + if (p->typ == NodeType::opt || p->typ == NodeType::iter) { FindAS(p->sub); a = LeadingAny(p->sub); - if (a != NULL) Sets::Subtract(a->set, First(p->next)); - } else if (p->typ == Node::alt) { - BitArray *s1 = new BitArray(terminals->Count); - Node *q = p; + if (a != NULL) { + BitArray *ba = First(p->next); + Sets::Subtract(a->set, ba); + delete ba; + } + } else if (p->typ == NodeType::alt) { + BitArray s1(terminals.Count); + const Node *q = p; while (q != NULL) { FindAS(q->sub); a = LeadingAny(q->sub); if (a != NULL) { BitArray *tmp = First(q->down); - tmp->Or(s1); + tmp->Or(&s1); Sets::Subtract(a->set, tmp); + delete tmp; } else { BitArray *f = First(q->sub); - s1->Or(f); + s1.Or(f); delete f; } q = q->down; @@ -638,8 +674,10 @@ void Tab::FindAS(Node *p) { // find ANY sets if (DelNode(p)) { a = LeadingAny(p->next); if (a != NULL) { - Node *q = (p->typ == Node::nt) ? p->sym->graph : p->sub; - Sets::Subtract(a->set, First(q)); + Node *q = (p->typ == NodeType::nt) ? p->sym->graph : p->sub; + BitArray *ba = First(q); + Sets::Subtract(a->set, ba); + delete ba; } } @@ -650,13 +688,13 @@ void Tab::FindAS(Node *p) { // find ANY sets void Tab::CompAnySets() { Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } } -BitArray* Tab::Expected(Node *p, Symbol *curSy) { +BitArray* Tab::Expected(const Node *p, const Symbol *curSy) { BitArray *s = First(p); if (DelGraph(p)) s->Or(curSy->follow); @@ -664,35 +702,36 @@ BitArray* Tab::Expected(Node *p, Symbol *curSy) { } // does not look behind resolvers; only called during LL(1) test and in CheckRes -BitArray* Tab::Expected0(Node *p, Symbol *curSy) { - if (p->typ == Node::rslv) return new BitArray(terminals->Count); +BitArray* Tab::Expected0(const Node *p, const Symbol *curSy) { + if (p->typ == NodeType::rslv) return new BitArray(terminals.Count); else return Expected(p, curSy); } void Tab::CompSync(Node *p) { while (p != NULL && !(visited->Get(p->n))) { visited->Set(p->n, true); - if (p->typ == Node::sync) { + if (p->typ == NodeType::nt_sync) { BitArray *s = Expected(p->next, curSy); s->Set(eofSy->n, true); allSyncSets->Or(s); p->set = s; - } else if (p->typ == Node::alt) { + } else if (p->typ == NodeType::alt) { CompSync(p->sub); CompSync(p->down); - } else if (p->typ == Node::opt || p->typ == Node::iter) + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) CompSync(p->sub); p = p->next; } } void Tab::CompSyncSets() { - allSyncSets = new BitArray(terminals->Count); + allSyncSets = new BitArray(terminals.Count); allSyncSets->Set(eofSy->n, true); - visited = new BitArray(nodes->Count); + delete visited; + visited = new BitArray(nodes.Count); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } @@ -700,10 +739,10 @@ void Tab::CompSyncSets() { void Tab::SetupAnys() { Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); - if (p->typ == Node::any) { - p->set = new BitArray(terminals->Count, true); + for (int i=0; ityp == NodeType::any) { + p->set = new BitArray(terminals.Count, true); p->set->Set(eofSy->n, false); } } @@ -715,26 +754,26 @@ void Tab::CompDeletableSymbols() { int i; do { changed = false; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; ideletable && sym->graph != NULL && DelGraph(sym->graph)) { sym->deletable = true; changed = true; } } } while (changed); - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (i=0; ideletable) - wprintf(L" %ls deletable\n", sym->name); + wprintf(_SC(" %") _SFMT _SC(" deletable\n"), sym->name); } } void Tab::RenumberPragmas() { - int n = terminals->Count; + int n = terminals.Count; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*pragmas)[i]); + for (int i=0; in = n++; } } @@ -746,30 +785,32 @@ void Tab::CompSymbolSets() { CompFollowSets(); CompSyncSets(); if (ddt[1]) { - fwprintf(trace, L"\n"); - fwprintf(trace, L"First & follow symbols:\n"); - fwprintf(trace, L"----------------------\n\n"); + fwprintf(trace, _SC("%s"), + "\n" + "First & follow symbols:\n" + "----------------------\n\n"); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - fwprintf(trace, L"%ls\n", sym->name); - fwprintf(trace, L"first: "); PrintSet(sym->first, 10); - fwprintf(trace, L"follow: "); PrintSet(sym->follow, 10); - fwprintf(trace, L"\n"); + for (int i=0; i line: %d\n"), sym->name, sym->line); + fputws(_SC("first: "), trace); PrintSet(sym->first, 10); + fputws(_SC("follow: "), trace); PrintSet(sym->follow, 10); + fputws(_SC("\n"), trace); } } if (ddt[4]) { - fwprintf(trace, L"\n"); - fwprintf(trace, L"ANY and SYNC sets:\n"); - fwprintf(trace, L"-----------------\n"); + fwprintf(trace, _SC("%s"), + "\n" + "ANY and SYNC sets:\n" + "-----------------\n"); Node *p; - for (int i=0; iCount; i++) { - p = (Node*)((*nodes)[i]); - if (p->typ == Node::any || p->typ == Node::sync) { - fwprintf(trace, L"%4d %4s ", p->n, nTyp[p->typ]); - PrintSet(p->set, 11); + for (int i=0; ityp == NodeType::any || p->typ == NodeType::nt_sync) { + fwprintf(trace, _SC("Node: %4d %4s: Line: %4d\n"), p->n, nTyp[p->typ], p->line); + fwprintf(trace, _SC(" ")); PrintSet(p->set, 10); } } } @@ -779,58 +820,54 @@ void Tab::CompSymbolSets() { // String handling //--------------------------------------------------------------------- -wchar_t Tab::Hex2Char(const wchar_t* s) { +int Tab::Hex2Char(const wchar_t* s, int len) { int val = 0; - int len = coco_string_length(s); for (int i = 0; i < len; i++) { wchar_t ch = s[i]; if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0'); else if ('a' <= ch && ch <= 'f') val = 16 * val + (10 + ch - 'a'); else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A'); - else parser->SemErr(L"bad escape sequence in string or character"); + else parser->SemErr(_SC("bad escape sequence in string or character")); } - if (val >= COCO_WCHAR_MAX) {/* pdt */ - parser->SemErr(L"bad escape sequence in string or character"); + if (val > COCO_WCHAR_MAX) {/* pdt */ + parser->SemErr(_SC("bad escape sequence in string or character")); } - return (wchar_t) val; + return val; } -wchar_t* Tab::Char2Hex(const wchar_t ch) { - wchar_t* format = new wchar_t[10]; - coco_swprintf(format, 10, L"\\0x%04x", ch); +static wchar_t* TabChar2Hex(const wchar_t ch, wchar_t_10 &format) { + coco_swprintf(format, SZWC10, _SC("\\0x%04x"), ch); return format; } wchar_t* Tab::Unescape (const wchar_t* s) { /* replaces escape sequences in s by their Unicode values. */ - StringBuilder buf = StringBuilder(); + StringBuilder buf; int i = 0; int len = coco_string_length(s); while (i < len) { - if (s[i] == '\\') { + if (s[i] == _SC('\\')) { switch (s[i+1]) { - case L'\\': buf.Append(L'\\'); i += 2; break; - case L'\'': buf.Append(L'\''); i += 2; break; - case L'\"': buf.Append(L'\"'); i += 2; break; - case L'r': buf.Append(L'\r'); i += 2; break; - case L'n': buf.Append(L'\n'); i += 2; break; - case L't': buf.Append(L'\t'); i += 2; break; - case L'0': buf.Append(L'\0'); i += 2; break; - case L'a': buf.Append(L'\a'); i += 2; break; - case L'b': buf.Append(L'\b'); i += 2; break; - case L'f': buf.Append(L'\f'); i += 2; break; - case L'v': buf.Append(L'\v'); i += 2; break; - case L'u': case L'x': + case _SC('\\'): buf.Append(_SC('\\')); i += 2; break; + case _SC('\''): buf.Append(_SC('\'')); i += 2; break; + case _SC('\"'): buf.Append(_SC('\"')); i += 2; break; + case _SC('r'): buf.Append(_SC('\r')); i += 2; break; + case _SC('n'): buf.Append(_SC('\n')); i += 2; break; + case _SC('t'): buf.Append(_SC('\t')); i += 2; break; + case _SC('0'): buf.Append(_SC('\0')); i += 2; break; + case _SC('a'): buf.Append(_SC('\a')); i += 2; break; + case _SC('b'): buf.Append(_SC('\b')); i += 2; break; + case _SC('f'): buf.Append(_SC('\f')); i += 2; break; + case _SC('v'): buf.Append(_SC('\v')); i += 2; break; + case _SC('u'): case _SC('x'): if (i + 6 <= coco_string_length(s)) { - wchar_t *subS = coco_string_create(s, i+2, 4); - buf.Append(Hex2Char(subS)); i += 6; break; - coco_string_delete(subS); + buf.Append(Hex2Char(s +i+2, 4)); i += 6; break; } else { - parser->SemErr(L"bad escape sequence in string or character"); + parser->SemErr(_SC("bad escape sequence in string or character")); i = coco_string_length(s); break; } default: - parser->SemErr(L"bad escape sequence in string or character"); + parser->SemErr(_SC("bad escape sequence in string or character")); i += 2; break; } } else { @@ -844,23 +881,23 @@ wchar_t* Tab::Unescape (const wchar_t* s) { wchar_t* Tab::Escape (const wchar_t* s) { - StringBuilder buf = StringBuilder(); - wchar_t ch; + StringBuilder buf; + int ch; int len = coco_string_length(s); + wchar_t_10 fmt; for (int i=0; i < len; i++) { ch = s[i]; switch(ch) { - case L'\\': buf.Append(L"\\\\"); break; - case L'\'': buf.Append(L"\\'"); break; - case L'\"': buf.Append(L"\\\""); break; - case L'\t': buf.Append(L"\\t"); break; - case L'\r': buf.Append(L"\\r"); break; - case L'\n': buf.Append(L"\\n"); break; + case _SC('\\'): buf.Append(_SC("\\\\")); break; + case _SC('\''): buf.Append(_SC("\\'")); break; + case _SC('\"'): buf.Append(_SC("\\\"")); break; + case _SC('\t'): buf.Append(_SC("\\t")); break; + case _SC('\r'): buf.Append(_SC("\\r")); break; + case _SC('\n'): buf.Append(_SC("\\n")); break; default: - if ((ch < L' ') || (ch > 0x7f)) { - wchar_t* res = Char2Hex(ch); + if ((ch < _SC(' ')) || (ch > 0x7f)) { + wchar_t* res = TabChar2Hex(ch, fmt); buf.Append(res); - delete [] res; } else buf.Append(ch); break; @@ -883,17 +920,26 @@ bool Tab::GrammarOk() { return ok; } +bool Tab::GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) exit(1); + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; +} //--------------- check for circular productions ---------------------- -void Tab::GetSingles(Node *p, ArrayList *singles) { +void Tab::GetSingles(const Node *p, TArrayList &singles) { if (p == NULL) return; // end of graph - if (p->typ == Node::nt) { - if (p->up || DelGraph(p->next)) singles->Add(p->sym); - } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { + if (p->typ == NodeType::nt) { + singles.Add(p->sym); + } else if (p->typ == NodeType::alt || p->typ == NodeType::iter || p->typ == NodeType::opt) { if (p->up || DelGraph(p->next)) { GetSingles(p->sub, singles); - if (p->typ == Node::alt) GetSingles(p->down, singles); + if (p->typ == NodeType::alt) GetSingles(p->down, singles); } } if (!p->up && DelNode(p)) GetSingles(p->next, singles); @@ -901,108 +947,184 @@ void Tab::GetSingles(Node *p, ArrayList *singles) { bool Tab::NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; - ArrayList *list = new ArrayList(); + TArrayList list; Symbol *sym; int i; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - ArrayList *singles = new ArrayList(); + for (i=0; i singles; GetSingles(sym->graph, singles); // get nonterminals s such that sym-->s Symbol *s; - for (int j=0; jCount; j++) { - s = (Symbol*)((*singles)[j]); - list->Add(new CNode(sym, s)); + for (int j=0; jCount; i++) { - n = (CNode*)(*list)[i]; + for (i = 0; i < list.Count; i++) { + n = list[i]; onLeftSide = false; onRightSide = false; CNode *m; - for (int j=0; jCount; j++) { - m = (CNode*)((*list)[j]); + for (int j=0; jleft == m->right) onRightSide = true; if (n->right == m->left) onLeftSide = true; } if (!onLeftSide || !onRightSide) { - list->Remove(n); i--; changed = true; + delete n; + list.Remove(n); i--; changed = true; } } } while(changed); ok = true; - for (i=0; iCount; i++) { - n = (CNode*)((*list)[i]); + for (i=0; icount++; - wprintf(L" %ls --> %ls", n->left->name, n->right->name); + wprintf(_SC(" %") _SFMT _SC(":%d --> %") _SFMT _SC(":%d\n"), n->left->name, n->left->line, n->right->name, n->right->line); } + for(int i=0; iname); - if (sym != NULL) wprintf(L"%ls is ", sym->name); +void Tab::LL1Error(int cond, const Symbol *sym) { + wprintf(_SC(" LL1 warning in %") _SFMT _SC(":%d:%d: "), curSy->name, curSy->line, curSy->col); + if (sym != NULL) wprintf(_SC("%") _SFMT _SC(" is "), sym->name); switch (cond) { - case 1: wprintf(L"start of several alternatives\n"); break; - case 2: wprintf(L"start & successor of deletable structure\n"); break; - case 3: wprintf(L"an ANY node that matches no symbol\n"); break; - case 4: wprintf(L"contents of [...] or {...} must not be deletable\n"); break; + case 1: wprintf(_SC("%s"), "start of several alternatives\n"); break; + case 2: wprintf(_SC("%s"), "start & successor of deletable structure\n"); break; + case 3: wprintf(_SC("%s"), "an ANY node that matches no symbol\n"); break; + case 4: wprintf(_SC("%s"), "contents of [...] or {...} must not be deletable\n"); break; } } -void Tab::CheckOverlap(BitArray *s1, BitArray *s2, int cond) { +int Tab::CheckOverlap(const BitArray *s1, const BitArray *s2, int cond) { + int overlaped = 0; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*terminals)[i]); + for (int i=0; in] && (*s2)[sym->n]) { LL1Error(cond, sym); + ++overlaped; } } -} - -void Tab::CheckAlts(Node *p) { - BitArray *s1, *s2; + return overlaped; +} + +/* print the path for first set that contains token tok for the graph rooted at p */ +void Tab::PrintFirstPath(const Node *p, int tok, const wchar_t *indent) { + while (p != NULL) { + //if(p->sym) wprintf(_SC("%") _SFMT _SC("-> %") _SFMT _SC(":%d:\n", indent, p->sym->name, p->sym->line)); + switch (p->typ) { + case NodeType::nt: { + if (p->sym->firstReady) { + if(p->sym->first->Get(tok)) { + if(coco_string_length(indent) == 1) + wprintf(_SC("%") _SFMT _SC("=> %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->line, p->col); + wprintf(_SC("%") _SFMT _SC("-> %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->sym->line, p->sym->col); + if(p->sym->graph) { + wchar_t *new_indent = coco_string_create_append(indent, _SC(" ")); + PrintFirstPath(p->sym->graph, tok, new_indent); + coco_string_delete(new_indent); + } + return; + } + } + break; + } + case NodeType::t: case NodeType::wt: { + if(p->sym->n == tok) + wprintf(_SC("%") _SFMT _SC("= %") _SFMT _SC(":%d:%d:\n"), indent, p->sym->name, p->line, p->col); + break; + } + case NodeType::any: { + break; + } + case NodeType::alt: { + PrintFirstPath(p->sub, tok, indent); + PrintFirstPath(p->down, tok, indent); + break; + } + case NodeType::iter: case NodeType::opt: { + if (!DelNode(p->sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p->sub, tok, indent); + break; + } + } + if (!DelNode(p)) break; + p = p->next; + } +} + +int Tab::CheckAlts(Node *p) { + int rc = 0; + BitArray s0(terminals.Count), *s1, *s2; while (p != NULL) { - if (p->typ == Node::alt) { + if (p->typ == NodeType::alt) { Node *q = p; - s1 = new BitArray(terminals->Count); + s0.SetAll(false); while (q != NULL) { // for all alternatives s2 = Expected0(q->sub, curSy); - CheckOverlap(s1, s2, 1); - s1->Or(s2); + int overlaped = CheckOverlap(&s0, s2, 1); + if(overlaped > 0) { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i=0; in) && s2->Get(sym->n)) {overlapToken = sym->n; break;} + } + //print(format("\t-> %s:%d: %d", first_overlap.sub.sym.name, first_overlap.sub.sym.line, overlaped)); + PrintFirstPath( p, overlapToken); + rc += overlaped; + } + s0.Or(s2); + delete s2; CheckAlts(q->sub); q = q->down; } - } else if (p->typ == Node::opt || p->typ == Node::iter) { + } else if (p->typ == NodeType::opt || p->typ == NodeType::iter) { if (DelSubGraph(p->sub)) LL1Error(4, NULL); // e.g. [[...]] else { s1 = Expected0(p->sub, curSy); s2 = Expected(p->next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if(overlaped > 0) { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i=0; iGet(sym->n) && s2->Get(sym->n)) {overlapToken = sym->n; break;} + } + //print(format("\t=>:%d: %d", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } + delete s1; delete s2; } CheckAlts(p->sub); - } else if (p->typ == Node::any) { + } else if (p->typ == NodeType::any) { if (Sets::Elements(p->set) == 0) LL1Error(3, NULL); // e.g. {ANY} ANY or [ANY] ANY or ( ANY | ANY ) } if (p->up) break; p = p->next; } + return rc; } void Tab::CheckLL1() { Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph); } @@ -1010,40 +1132,51 @@ void Tab::CheckLL1() { //------------- check if resolvers are legal -------------------- -void Tab::ResErr(Node *p, const wchar_t* msg) { +void Tab::ResErr(const Node *p, const wchar_t* msg) { errors->Warning(p->line, p->pos->col, msg); } -void Tab::CheckRes(Node *p, bool rslvAllowed) { +void Tab::CheckRes(const Node *p, bool rslvAllowed) { + BitArray expected(terminals.Count), soFar(terminals.Count); while (p != NULL) { - Node *q; - if (p->typ == Node::alt) { - BitArray *expected = new BitArray(terminals->Count); - for (q = p; q != NULL; q = q->down) - expected->Or(Expected0(q->sub, curSy)); - BitArray *soFar = new BitArray(terminals->Count); + const Node *q; + if (p->typ == NodeType::alt) { + expected.SetAll(false); + for (q = p; q != NULL; q = q->down) { + BitArray *ba = Expected0(q->sub, curSy); + expected.Or(ba); + delete ba; + } + soFar.SetAll(false); for (q = p; q != NULL; q = q->down) { - if (q->sub->typ == Node::rslv) { + if (q->sub->typ == NodeType::rslv) { BitArray *fs = Expected(q->sub->next, curSy); - if (Sets::Intersect(fs, soFar)) - ResErr(q->sub, L"Warning: Resolver will never be evaluated. Place it at previous conflicting alternative."); - if (!Sets::Intersect(fs, expected)) - ResErr(q->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); - } else soFar->Or(Expected(q->sub, curSy)); + if (Sets::Intersect(fs, &soFar)) + ResErr(q->sub, _SC("Warning: Resolver will never be evaluated. Place it at previous conflicting alternative.")); + if (!Sets::Intersect(fs, &expected)) + ResErr(q->sub, _SC("Warning: Misplaced resolver: no LL(1) conflict.")); + delete fs; + } else { + BitArray *ba = Expected(q->sub, curSy); + soFar.Or(ba); + delete ba; + } CheckRes(q->sub, true); } - } else if (p->typ == Node::iter || p->typ == Node::opt) { - if (p->sub->typ == Node::rslv) { + } else if (p->typ == NodeType::iter || p->typ == NodeType::opt) { + if (p->sub->typ == NodeType::rslv) { BitArray *fs = First(p->sub->next); BitArray *fsNext = Expected(p->next, curSy); - if (!Sets::Intersect(fs, fsNext)) - ResErr(p->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); + bool bsi = Sets::Intersect(fs, fsNext); + delete fs; delete fsNext; + if (!bsi) + ResErr(p->sub, _SC("Warning: Misplaced resolver: no LL(1) conflict.")); } CheckRes(p->sub, true); - } else if (p->typ == Node::rslv) { + } else if (p->typ == NodeType::rslv) { if (!rslvAllowed) - ResErr(p, L"Warning: Misplaced resolver: no alternative."); + ResErr(p, _SC("Warning: Misplaced resolver: no alternative.")); } if (p->up) break; @@ -1053,8 +1186,8 @@ void Tab::CheckRes(Node *p, bool rslvAllowed) { } void Tab::CheckResolvers() { - for (int i=0; iCount; i++) { - curSy = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph, false); } } @@ -1065,11 +1198,11 @@ void Tab::CheckResolvers() { bool Tab::NtsComplete() { bool complete = true; Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; igraph == NULL) { complete = false; errors->count++; - wprintf(L" No production for %ls\n", sym->name); + wprintf(_SC(" No production for %") _SFMT _SC("\n"), sym->name); } } return complete; @@ -1077,14 +1210,14 @@ bool Tab::NtsComplete() { //-------------- check if every nts can be reached ----------------- -void Tab::MarkReachedNts(Node *p) { +void Tab::MarkReachedNts(const Node *p) { while (p != NULL) { - if (p->typ == Node::nt && !((*visited)[p->sym->n])) { // new nt reached + if (p->typ == NodeType::nt && !((*visited)[p->sym->n])) { // new nt reached visited->Set(p->sym->n, true); MarkReachedNts(p->sym->graph); - } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { + } else if (p->typ == NodeType::alt || p->typ == NodeType::iter || p->typ == NodeType::opt) { MarkReachedNts(p->sub); - if (p->typ == Node::alt) MarkReachedNts(p->down); + if (p->typ == NodeType::alt) MarkReachedNts(p->down); } if (p->up) break; p = p->next; @@ -1093,15 +1226,16 @@ void Tab::MarkReachedNts(Node *p) { bool Tab::AllNtReached() { bool ok = true; - visited = new BitArray(nonterminals->Count); + delete visited; + visited = new BitArray(nonterminals.Count); visited->Set(gramSy->n, true); MarkReachedNts(gramSy->graph); Symbol *sym; - for (int i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); + for (int i=0; in])) { ok = false; errors->count++; - wprintf(L" %ls cannot be reached\n", sym->name); + wprintf(_SC(" %") _SFMT _SC(" cannot be reached\n"), sym->name); } } return ok; @@ -1109,10 +1243,10 @@ bool Tab::AllNtReached() { //--------- check if every nts can be derived to terminals ------------ -bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to terminals +bool Tab::IsTerm(const Node *p, const BitArray *mark) { // true if graph can be derived to terminals while (p != NULL) { - if (p->typ == Node::nt && !((*mark)[p->sym->n])) return false; - if (p->typ == Node::alt && !IsTerm(p->sub, mark) + if (p->typ == NodeType::nt && !((*mark)[p->sym->n])) return false; + if (p->typ == NodeType::alt && !IsTerm(p->sub, mark) && (p->down == NULL || !IsTerm(p->down, mark))) return false; if (p->up) break; p = p->next; @@ -1123,25 +1257,25 @@ bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to t bool Tab::AllNtToTerm() { bool changed, ok = true; - BitArray *mark = new BitArray(nonterminals->Count); + BitArray mark(nonterminals.Count); // a nonterminal is marked if it can be derived to terminal symbols Symbol *sym; int i; do { changed = false; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - if (!((*mark)[sym->n]) && IsTerm(sym->graph, mark)) { - mark->Set(sym->n, true); changed = true; + for (i=0; in] && IsTerm(sym->graph, &mark)) { + mark.Set(sym->n, true); changed = true; } } } while (changed); - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - if (!((*mark)[sym->n])) { + for (i=0; in]) { ok = false; errors->count++; - wprintf(L" %ls cannot be derived to terminals\n", sym->name); + wprintf(_SC(" %") _SFMT _SC(" cannot be derived to terminals\n"), sym->name); } } return ok; @@ -1152,52 +1286,60 @@ bool Tab::AllNtToTerm() { //--------------------------------------------------------------------- void Tab::XRef() { - SortedList *xref = new SortedList(); + SortedList xref; // collect lines where symbols have been defined Symbol *sym; int i, j; - for (i=0; iCount; i++) { - sym = (Symbol*)((*nonterminals)[i]); - ArrayList *list = (ArrayList*)(xref->Get(sym)); - if (list == NULL) {list = new ArrayList(); xref->Set(sym, list);} - int *intg = new int(- sym->line); - list->Add(intg); + for (i=0; i *list = (TArrayList*)(xref.Get(sym)); + if (list == NULL) {list = new TArrayList(); xref.Set(sym, list);} + list->Add(-sym->line); } // collect lines where symbols have been referenced Node *n; - for (i=0; iCount; i++) { - n = (Node*)((*nodes)[i]); - if (n->typ == Node::t || n->typ == Node::wt || n->typ == Node::nt) { - ArrayList *list = (ArrayList*)(xref->Get(n->sym)); - if (list == NULL) {list = new ArrayList(); xref->Set(n->sym, list);} - int *intg = new int(n->line); - list->Add(intg); + for (i=0; ityp == NodeType::t || n->typ == NodeType::wt || n->typ == NodeType::nt) { + TArrayList *list = (TArrayList*)(xref.Get(n->sym)); + if (list == NULL) {list = new TArrayList(); xref.Set(n->sym, list);} + list->Add(n->line); } } // print cross reference list - fwprintf(trace, L"\n"); - fwprintf(trace, L"Cross reference list:\n"); - fwprintf(trace, L"--------------------\n\n"); - - for (i=0; iCount; i++) { - sym = (Symbol*)(xref->GetKey(i)); - wchar_t *paddedName = Name(sym->name); - fwprintf(trace, L" %12ls", paddedName); - coco_string_delete(paddedName); - ArrayList *list = (ArrayList*)(xref->Get(sym)); + fwprintf(trace, _SC("%s"), + "\n" + "Cross reference list:\n" + "--------------------\n\n"); + + for (i=0; iname); + TArrayList *list = (TArrayList*)(xref.Get(sym)); int col = 14; int line; for (j=0; jCount; j++) { - line = *(int*)((*list)[j]); + line = (*list)[j]; if (col + 5 > 80) { - fwprintf(trace, L"\n"); - for (col = 1; col <= 14; col++) fwprintf(trace, L" "); + fputws(_SC("\n"), trace); + for (col = 1; col <= 14; col++) fputws(_SC(" "), trace); } - fwprintf(trace, L"%5d", line); col += 5; + fwprintf(trace, _SC("%5d"), line); col += 5; } - fwprintf(trace, L"\n"); - } - fwprintf(trace, L"\n\n"); + fputws(_SC("\n"), trace); + } + fputws(_SC("\n\n"), trace); + for(int i=0; i < xref.Count; ++i) { + SortedEntry *se = xref[i]; + /* + while(se->next) { + SortedEntry *tmp = se->next; + delete (ArrayList*)tmp->Value; + se->next = tmp; + } + */ + delete (TArrayList*)se->Value; + } } void Tab::SetDDT(const wchar_t* s) { @@ -1206,16 +1348,16 @@ void Tab::SetDDT(const wchar_t* s) { int len = coco_string_length(st); for (int i = 0; i < len; i++) { ch = st[i]; - if (L'0' <= ch && ch <= L'9') ddt[ch - L'0'] = true; + if (_SC('0') <= ch && ch <= _SC('9')) ddt[ch - _SC('0')] = true; else switch (ch) { - case L'A' : ddt[0] = true; break; // trace automaton - case L'F' : ddt[1] = true; break; // list first/follow sets - case L'G' : ddt[2] = true; break; // print syntax graph - case L'I' : ddt[3] = true; break; // trace computation of first sets - case L'J' : ddt[4] = true; break; // print ANY and SYNC sets - case L'P' : ddt[8] = true; break; // print statistics - case L'S' : ddt[6] = true; break; // list symbol table - case L'X' : ddt[7] = true; break; // list cross reference table + case _SC('A') : ddt[0] = true; break; // trace automaton + case _SC('F') : ddt[1] = true; break; // list first/follow sets + case _SC('G') : ddt[2] = true; break; // print syntax graph + case _SC('I') : ddt[3] = true; break; // trace computation of first sets + case _SC('J') : ddt[4] = true; break; // print ANY and SYNC sets + case _SC('P') : ddt[8] = true; break; // print statistics + case _SC('S') : ddt[6] = true; break; // list symbol table + case _SC('X') : ddt[7] = true; break; // list cross reference table default : break; } } @@ -1231,17 +1373,11 @@ void Tab::SetOption(const wchar_t* s) { int nameLenght = coco_string_indexof(s, '='); int valueIndex = nameLenght + 1; - wchar_t *name = coco_string_create(s, 0, nameLenght); - wchar_t *value = coco_string_create(s, valueIndex); - - if (coco_string_equal(L"$namespace", name)) { - if (nsName == NULL) nsName = coco_string_create(value); - } else if (coco_string_equal(L"$checkEOF", name)) { - checkEOF = coco_string_equal(L"true", value); + if (coco_string_equal_n(_SC("$namespace"), s, nameLenght)) { + if (nsName == NULL) nsName = coco_string_create(s + valueIndex); + } else if (coco_string_equal_n(_SC("$checkEOF"), s, nameLenght)) { + checkEOF = coco_string_equal(_SC("true"), s + valueIndex); } - - delete [] name; - delete [] value; } diff --git a/src/Tab.h b/src/Tab.h index ae788aa..a01c910 100644 --- a/src/Tab.h +++ b/src/Tab.h @@ -30,11 +30,10 @@ Coco/R itself) does not fall under the GNU General Public License. #if !defined(COCO_TAB_H__) #define COCO_TAB_H__ -#include "ArrayList.h" +#include "Scanner.h" #include "HashTable.h" #include "StringBuilder.h" #include "SortedList.h" -#include "Scanner.h" #include "Position.h" #include "Symbol.h" #include "Node.h" @@ -52,44 +51,45 @@ class Tab { public: Position *semDeclPos; // position of global semantic declarations CharSet *ignored; // characters ignored by the scanner - bool ddt[10]; // debug and test switches + bool ddt[10]; // debug and test switches + bool genRREBNF; //generate EBNF for railroad diagram Symbol *gramSy; // root nonterminal; filled by ATG Symbol *eofSy; // end of file symbol Symbol *noSym; // used in case of an error BitArray *allSyncSets; // union of all synchronisation sets - HashTable *literals; // symbols that are used as literals + HashTable literals; // symbols that are used as literals - wchar_t* srcName; // name of the atg file (including path) - wchar_t* srcDir; // directory path of the atg file - wchar_t* nsName; // namespace for generated files - wchar_t* frameDir; // directory containing the frame files - wchar_t* outDir; // directory for generated files - bool checkEOF; // should coco generate a check for EOF at - // the end of Parser.Parse(): - bool emitLines; // emit line directives in generated parser + wchar_t* srcName; // name of the atg file (including path) + wchar_t* srcDir; // directory path of the atg file + wchar_t* nsName; // namespace for generated files + wchar_t* frameDir; // directory containing the frame files + wchar_t* outDir; // directory for generated files + bool checkEOF; // should coco generate a check for EOF at + // the end of Parser.Parse(): + bool emitLines; // emit line directives in generated parser - BitArray *visited; // mark list for graph traversals - Symbol *curSy; // current symbol in computation of sets + BitArray *visited; // mark list for graph traversals + Symbol *curSy; // current symbol in computation of sets - Parser *parser; // other Coco objects + Parser *parser; // other Coco objects FILE* trace; Errors *errors; - ArrayList *terminals; - ArrayList *pragmas; - ArrayList *nonterminals; + TArrayList terminals; + TArrayList pragmas; + TArrayList nonterminals; - ArrayList *nodes; + TArrayList nodes; static const char* nTyp[]; Node *dummyNode; - ArrayList *classes; + TArrayList classes; int dummyName; - Tab(Parser *parser); + ~Tab(); //--------------------------------------------------------------------- // Symbol list management @@ -98,41 +98,40 @@ class Tab { static const char* tKind[]; - Symbol* NewSym(int typ, const wchar_t* name, int line); + Symbol* NewSym(NodeType typ, const wchar_t* name, int line, int col); Symbol* FindSym(const wchar_t* name); - int Num(Node *p); - void PrintSym(Symbol *sym); + int Num(const Node *p); + void PrintSym(const Symbol *sym); void PrintSymbolTable(); - void PrintSet(BitArray *s, int indent); + void PrintSet(const BitArray *s, int indent); //--------------------------------------------------------------------- // Syntax graph management //--------------------------------------------------------------------- - Node* NewNode(int typ, Symbol *sym, int line); - Node* NewNode(int typ, Node* sub); - Node* NewNode(int typ, int val, int line); + Node* NewNode(NodeType typ, Symbol *sym, int line, int col); + Node* NewNode(NodeType typ, Node* sub); + Node* NewNode(NodeType typ, int val, int line, int col); void MakeFirstAlt(Graph *g); void MakeAlternative(Graph *g1, Graph *g2); void MakeSequence(Graph *g1, Graph *g2); void MakeIteration(Graph *g); void MakeOption(Graph *g); - void Finish(Graph *g); + void MakeRepetition(Graph *g, int rmin, int rmax); + void Finish(Graph *g); //set all 'next' from g->r to NULL void DeleteNodes(); Graph* StrToGraph(const wchar_t* str); void SetContextTrans(Node *p); // set transition code in the graph rooted at p //------------ graph deletability check ----------------- - bool DelGraph(Node* p); - bool DelSubGraph(Node* p); - bool DelNode(Node* p); + bool DelGraph(const Node* p); + bool DelSubGraph(const Node* p); + bool DelNode(const Node* p); //----------------- graph printing ---------------------- - int Ptr(Node *p, bool up); - wchar_t* Pos(Position *pos); - wchar_t* Name(const wchar_t* name); + int Ptr(const Node *p, bool up); void PrintNodes(); //--------------------------------------------------------------------- @@ -141,13 +140,12 @@ class Tab { CharClass* NewCharClass(const wchar_t* name, CharSet *s); CharClass* FindCharClass(const wchar_t* name); - CharClass* FindCharClass(CharSet *s); + CharClass* FindCharClass(const CharSet *s); CharSet* CharClassSet(int i); //----------- character class printing - wchar_t* Ch(const wchar_t ch); - void WriteCharSet(CharSet *s); + void WriteCharSet(const CharSet *s); void WriteCharClasses (); //--------------------------------------------------------------------- @@ -155,18 +153,18 @@ class Tab { //--------------------------------------------------------------------- /* Computes the first set for the given Node. */ - BitArray* First0(Node *p, BitArray *mark); - BitArray* First(Node *p); + BitArray* First0(const Node *p, BitArray *mark); + BitArray* First(const Node *p); void CompFirstSets(); void CompFollow(Node *p); void Complete(Symbol *sym); void CompFollowSets(); - Node* LeadingAny(Node *p); - void FindAS(Node *p); // find ANY sets + const Node* LeadingAny(const Node *p); + void FindAS(const Node *p); // find ANY sets void CompAnySets(); - BitArray* Expected(Node *p, Symbol *curSy); + BitArray* Expected(const Node *p, const Symbol *curSy); // does not look behind resolvers; only called during LL(1) test and in CheckRes - BitArray* Expected0(Node *p, Symbol *curSy); + BitArray* Expected0(const Node *p, const Symbol *curSy); void CompSync(Node *p); void CompSyncSets(); void SetupAnys(); @@ -178,8 +176,7 @@ class Tab { // String handling //--------------------------------------------------------------------- - wchar_t Hex2Char(const wchar_t* s); - wchar_t* Char2Hex(const wchar_t ch); + int Hex2Char(const wchar_t* s, int len); wchar_t* Unescape(const wchar_t* s); wchar_t* Escape(const wchar_t* s); @@ -188,32 +185,34 @@ class Tab { //--------------------------------------------------------------------- bool GrammarOk(); + bool GrammarCheckAll(); //--------------- check for circular productions ---------------------- class CNode { // node of list for finding circular productions public: - Symbol *left, *right; + const Symbol *left, *right; - CNode (Symbol *l, Symbol *r) { + CNode (const Symbol *l, const Symbol *r) { left = l; right = r; } }; - void GetSingles(Node *p, ArrayList *singles); + void GetSingles(const Node *p, TArrayList &singles); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- - void LL1Error(int cond, Symbol *sym); - void CheckOverlap(BitArray *s1, BitArray *s2, int cond); - void CheckAlts(Node *p); + void LL1Error(int cond, const Symbol *sym); + int CheckOverlap(const BitArray *s1, const BitArray *s2, int cond); + void PrintFirstPath(const Node *p, int tok, const wchar_t *indent=_SC("\t")); + int CheckAlts(Node *p); void CheckLL1(); //------------- check if resolvers are legal -------------------- - void ResErr(Node *p, const wchar_t* msg); - void CheckRes(Node *p, bool rslvAllowed); + void ResErr(const Node *p, const wchar_t* msg); + void CheckRes(const Node *p, bool rslvAllowed); void CheckResolvers(); //------------- check if every nts has a production -------------------- @@ -222,12 +221,12 @@ class Tab { //-------------- check if every nts can be reached ----------------- - void MarkReachedNts(Node *p); + void MarkReachedNts(const Node *p); bool AllNtReached(); //--------- check if every nts can be derived to terminals ------------ - bool IsTerm(Node *p, BitArray *mark); // true if graph can be derived to terminals + bool IsTerm(const Node *p, const BitArray *mark); // true if graph can be derived to terminals bool AllNtToTerm(); //--------------------------------------------------------------------- @@ -237,6 +236,8 @@ class Tab { void XRef(); void SetDDT(const wchar_t* s); void SetOption(const wchar_t* s); +private: + void MakeOptIter(Graph *g, NodeType typ); }; diff --git a/src/Target.cpp b/src/Target.cpp index dcbeefe..363dacf 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -38,4 +38,8 @@ Target::Target(State *s) { state = s; } +Target::~Target() { + delete next; +} + }; // namespace diff --git a/src/Target.h b/src/Target.h index c54d4ca..4943e97 100644 --- a/src/Target.h +++ b/src/Target.h @@ -37,6 +37,7 @@ class Target // set of states that are reached by an action { public: Target (State *s); + ~Target(); State *state; // target state Target *next; diff --git a/src/Taste/CodeGenerator.h b/src/Taste/CodeGenerator.h new file mode 100644 index 0000000..0176338 --- /dev/null +++ b/src/Taste/CodeGenerator.h @@ -0,0 +1,244 @@ +#if !defined(TASTE_CODEGENERATOR_H__) +#define TASTE_CODEGENERATOR_H__ + +#include "Scanner.h" +#include +#include + +namespace Taste { + +class CodeGenerator +{ +public: + // opcodes + int + ADD, SUB, MUL, DIV, EQU, LSS, GTR, NEG, + LOAD, LOADG, STO, STOG, CONST, + CALL, RET, ENTER, LEAVE, + JMP, FJMP, READ, WRITE; + +#define OPCODE_SIZE 21 + wchar_t* opcode[OPCODE_SIZE]; + //memset(opcode, 0, OPCODE_SIZE * sizeof(wchar_t*)); + + int progStart; // address of first instruction of main program + int pc; // program counter + char *code; + + // data for Interpret + int *globals; + int *stack; + int top; // top of stack + int bp; // base pointer + + + CodeGenerator() { + // opcodes + ADD = 0; SUB = 1; MUL = 2; DIV = 3; EQU = 4; LSS = 5; GTR = 6; NEG = 7; + LOAD = 8; LOADG = 9; STO = 10; STOG = 11; CONST = 12; + CALL = 13; RET = 14; ENTER = 15; LEAVE = 16; + JMP = 17; FJMP = 18; READ = 19; WRITE = 20; + + opcode[ 0] = coco_string_create("ADD "); + opcode[ 1] = coco_string_create("SUB "); + opcode[ 2] = coco_string_create("MUL "); + opcode[ 3] = coco_string_create("DIV "); + opcode[ 4] = coco_string_create("EQU "); + opcode[ 5] = coco_string_create("LSS "); + opcode[ 6] = coco_string_create("GTR "); + opcode[ 7] = coco_string_create("NEG "); + opcode[ 8] = coco_string_create("LOAD "); + opcode[ 9] = coco_string_create("LOADG"); + opcode[10] = coco_string_create("STO "); + opcode[11] = coco_string_create("STOG "); + opcode[12] = coco_string_create("CONST"); + opcode[13] = coco_string_create("CALL "); + opcode[14] = coco_string_create("RET "); + opcode[15] = coco_string_create("ENTER"); + opcode[16] = coco_string_create("LEAVE"); + opcode[17] = coco_string_create("JMP "); + opcode[18] = coco_string_create("FJMP "); + opcode[19] = coco_string_create("READ "); + opcode[20] = coco_string_create("WRITE"); + +#define CODE_SIZE 3000 +#define GLOBALS_SIZE 100 + code = new char[CODE_SIZE]; + memset(code, 0, CODE_SIZE); + globals = new int[GLOBALS_SIZE]; + memset(globals, 0, GLOBALS_SIZE * sizeof(*globals)); + stack = new int[GLOBALS_SIZE]; + memset(stack, 0, GLOBALS_SIZE * sizeof(*stack)); + + progStart = 0; + + pc = 1; + } + + ~CodeGenerator() { + coco_string_delete(opcode[ 0]); + coco_string_delete(opcode[ 1]); + coco_string_delete(opcode[ 2]); + coco_string_delete(opcode[ 3]); + coco_string_delete(opcode[ 4]); + coco_string_delete(opcode[ 5]); + coco_string_delete(opcode[ 6]); + coco_string_delete(opcode[ 7]); + coco_string_delete(opcode[ 8]); + coco_string_delete(opcode[ 9]); + coco_string_delete(opcode[10]); + coco_string_delete(opcode[11]); + coco_string_delete(opcode[12]); + coco_string_delete(opcode[13]); + coco_string_delete(opcode[14]); + coco_string_delete(opcode[15]); + coco_string_delete(opcode[16]); + coco_string_delete(opcode[17]); + coco_string_delete(opcode[18]); + coco_string_delete(opcode[19]); + coco_string_delete(opcode[20]); + delete[] code; + delete[] globals; + delete[] stack; + } + + //----- code generation methods ----- + + void Emit (int op) { + //printf("Emit : %d\n", op); + code[pc++] = (char)op; + } + + void Emit (int op, int val) { + //printf("Emit : %d, %d\n", op, val); + Emit(op); Emit(val>>8); Emit(val); + } + + void Patch (int adr, int val) { + code[adr] = (char)(val>>8); code[adr+1] = (char)val; + } + + void Decode() { + int maxPc = pc; + pc = 1; + while (pc < maxPc) { + int code = Next(); + printf("%3d: %" _SFMT " ", pc-1, opcode[code]); + if (code == LOAD || code == LOADG || code == CONST || code == STO || code == STOG || + code == CALL || code == ENTER || code == JMP || code == FJMP) + printf("%d\n", Next2()); + else + if (code == ADD || code == SUB || code == MUL || code == DIV || code == NEG || + code == EQU || code == LSS || code == GTR || code == RET || code == LEAVE || + code == READ || code == WRITE) + printf("\n"); + } + } + + //----- interpreter methods ----- + + int Next () { + return code[pc++]; + } + + int Next2 () { + int x,y; + x = code[pc++]; y = code[pc++]; + return (x << 8) + y; + } + + int Int (bool b) { + if (b) return 1; else return 0; + } + + void Push (int val) { + //printf("Push : %d\n", top); + stack[top++] = val; + } + + int Pop() { + //printf("Pop : %d\n", top); + return stack[--top]; + } + + int ReadInt(FILE* s) { + int sign; + char ch; + do {fscanf(s, "%c", &ch);} while (!((ch >= '0' && ch <= '9') || ch == '-')); + + if (ch == '-') {sign = -1; fscanf(s, "%c", &ch);} else sign = 1; + int n = 0; + while (ch >= '0' && ch <= '9') { + n = 10 * n + (ch - '0'); + if (fscanf(s, "%c", &ch) <= 0) + break; + } + return n * sign; + } + + void Interpret (const char* data) { + int val; + FILE* s; + if ((s = fopen(data, "r")) == NULL) { + printf("--- Error accessing file %s\n", (char*)data); + exit(1); + } + printf("\n"); + pc = progStart; stack[0] = 0; top = 1; bp = 0; + for (;;) { + int nxt = Next(); + if (nxt == CONST) + Push(Next2()); + else if (nxt == LOAD) + Push(stack[bp+Next2()]); + else if (nxt == LOADG) + Push(globals[Next2()]); + else if (nxt == STO) + stack[bp+Next2()] = Pop(); + else if (nxt == STOG) + globals[Next2()] = Pop(); + else if (nxt == ADD) + Push(Pop()+Pop()); + else if (nxt == SUB) + Push(-Pop()+Pop()); + else if (nxt == DIV) + {val = Pop(); Push(Pop()/val);} + else if (nxt == MUL) + Push(Pop()*Pop()); + else if (nxt == NEG) + Push(-Pop()); + else if (nxt == EQU) + Push(Int(Pop()==Pop())); + else if (nxt == LSS) + Push(Int(Pop()>Pop())); + else if (nxt == GTR) + Push(Int(Pop()errors; + topScope = NULL; + curLevel = -1; + undefObj = new Obj(); + undefObj->name = coco_string_create("undef"); undefObj->type = undef; undefObj->kind = var; + undefObj->adr = 0; undefObj->level = 0; undefObj->next = NULL; +} + +SymbolTable::~SymbolTable() { + delete undefObj; + delete topScope; +} + +void SymbolTable::Err(const wchar_t* msg) { + errors->Error(0, 0, msg); +} + + +// open a new scope and make it the current scope (topScope) +void SymbolTable::OpenScope () { + Obj *scop = new Obj(); + scop->name = coco_string_create(""); scop->kind = scope; + scop->locals = NULL; scop->nextAdr = 0; + scop->next = topScope; topScope = scop; + curLevel++; +} + + +// close the current scope +void SymbolTable::CloseScope () { + Obj *scop = topScope; + topScope = topScope->next; curLevel--; + scop->next = NULL; + delete scop; +} + +// create a new object node in the current scope +Obj* SymbolTable::NewObj (const wchar_t* name, int kind, int type) { + Obj *p, *last, *obj = new Obj(); + obj->name = coco_string_create(name); obj->kind = kind; obj->type = type; + obj->level = curLevel; + p = topScope->locals; last = NULL; + while (p != NULL) { + if (coco_string_equal(p->name, name)) Err(_SC("name declared twice")); + last = p; p = p->next; + } + if (last == NULL) topScope->locals = obj; else last->next = obj; + if (kind == var) obj->adr = topScope->nextAdr++; + return obj; +} + + +// search the name in all open scopes and return its object node +Obj* SymbolTable::Find (const wchar_t* name) { + Obj *obj, *scope; + scope = topScope; + while (scope != NULL) { // for all open scopes + obj = scope->locals; + while (obj != NULL) { // for all objects in this scope + if (coco_string_equal(obj->name, name)) return obj; + obj = obj->next; + } + scope = scope->next; + } + wchar_t str[100]; + coco_swprintf(str, 100, _SC("%") _SFMT _SC(" is undeclared"), name); + Err(str); + return undefObj; +} + +}; // namespace diff --git a/src/Taste/SymbolTable.h b/src/Taste/SymbolTable.h new file mode 100644 index 0000000..b08d826 --- /dev/null +++ b/src/Taste/SymbolTable.h @@ -0,0 +1,78 @@ +#if !defined(TASTE_SYMBOLTABLE_H__) +#define TASTE_SYMBOLTABLE_H__ + +#include "Scanner.h" + +namespace Taste { + +class Parser; +class Errors; + +class Obj { // object describing a declared name +public: + wchar_t* name; // name of the object + int type; // type of the object (undef for proc) + Obj *next; // to next object in same scope + int kind; // var, proc, scope + int adr; // address in memory or start of proc + int level; // nesting level; 0=global, 1=local + Obj *locals; // scopes: to locally declared objects + int nextAdr; // scopes: next free address in this scope + + Obj() { + name = NULL; + type = 0; + next = NULL; + kind = 0; + adr = 0; + level = 0; + locals = NULL; + nextAdr = 0; + } + + ~Obj() { + coco_string_delete(name); + delete locals; + delete next; + } + + +}; + +class SymbolTable +{ +public: + const int // types + undef, integer, boolean; + + const int // object kinds + var, proc, scope; + + + int curLevel; // nesting level of current scope + Obj *undefObj; // object node for erroneous symbols + Obj *topScope; // topmost procedure scope + + Errors *errors; + + SymbolTable(Parser *parser); + ~SymbolTable(); + void Err(const wchar_t* msg); + + // open a new scope and make it the current scope (topScope) + void OpenScope (); + + // close the current scope + void CloseScope (); + + // create a new object node in the current scope + Obj* NewObj (const wchar_t* name, int kind, int type); + + // search the name in all open scopes and return its object node + Obj* Find (const wchar_t* name); + +}; + +}; // namespace + +#endif // !defined(TASTE_SYMBOLTABLE_H__) diff --git a/src/Taste/Taste.IN b/src/Taste/Taste.IN new file mode 100644 index 0000000..4ae0db3 --- /dev/null +++ b/src/Taste/Taste.IN @@ -0,0 +1 @@ +3 5 100 0 \ No newline at end of file diff --git a/src/Taste/Taste.atg b/src/Taste/Taste.atg new file mode 100644 index 0000000..2b671b8 --- /dev/null +++ b/src/Taste/Taste.atg @@ -0,0 +1,203 @@ +#include "SymbolTable.h" +#include "CodeGenerator.h" + +$namespace=Taste + +COMPILER Taste + + + int // operators + plus, minus, times, slash, equ, lss, gtr; + + int // types + undef, integer, boolean; + + int // object kinds + var, proc; + + int // opcodes + ADD, SUB, MUL, DIV, EQU, LSS, GTR, NEG, + LOAD, LOADG, STO, STOG, CONST, + CALL, RET, ENTER, LEAVE, + JMP, FJMP, READ, WRITE; + + SymbolTable *tab; + CodeGenerator *gen; + + void Err(const wchar_t* msg) { + errors->Error(la->line, la->col, msg); + } + + void InitDeclarations() { // it must exist + plus = 0; minus = 1; times = 2; slash = 3; equ = 4; lss = 5; gtr = 6; // operators + undef = 0; integer = 1; boolean = 2; // types + var = 0; proc = 1; // object kinds + + // opcodes + ADD = 0; SUB = 1; MUL = 2; DIV = 3; EQU = 4; LSS = 5; GTR = 6; NEG = 7; + LOAD = 8; LOADG = 9; STO = 10; STOG = 11; CONST = 12; + CALL = 13; RET = 14; ENTER = 15; LEAVE = 16; + JMP = 17; FJMP = 18; READ = 19; WRITE = 20; + } + + + +/*--------------------------------------------------------------------------*/ +CHARACTERS + letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz". + digit = "0123456789". + cr = '\r'. + lf = '\n'. + tab = '\t'. + +TOKENS + ident = letter {letter | digit}. + number = digit {digit}. + +COMMENTS FROM "/*" TO "*/" NESTED +COMMENTS FROM "//" TO lf + +IGNORE cr + lf + tab + + + +PRODUCTIONS +/*------------------------------------------------------------------------*/ +AddOp += (. op = -1; .) + ( '+' (. op = plus; .) + | '-' (. op = minus; .) + ). +/*------------------------------------------------------------------------*/ +Expr (. int type1, op; .) += SimExpr + [ RelOp + SimExpr (. if (type != type1) Err(_SC("incompatible types")); + gen->Emit(op); type = boolean; .) + ]. +/*------------------------------------------------------------------------*/ +Factor (. int n; Obj *obj; wchar_t* name; .) += (. type = undef; .) + ( Ident (. obj = tab->Find(name); coco_string_delete(name); type = obj->type; + if (obj->kind == var) { + if (obj->level == 0) gen->Emit(LOADG, obj->adr); + else gen->Emit(LOAD, obj->adr); + } else Err(_SC("variable expected")); .) + | number (. swscanf(t->val, _SC("%d"), &n); //n = Convert.ToInt32(t->val); + gen->Emit(CONST, n); type = integer; .) + | '-' + Factor (. if (type != integer) { + Err(_SC("integer type expected")); type = integer; + } + gen->Emit(NEG); .) + | "true" (. gen->Emit(CONST, 1); type = boolean; .) + | "false" (. gen->Emit(CONST, 0); type = boolean; .) + ). +/*------------------------------------------------------------------------*/ +Ident += ident (. name = coco_string_create(t->val); .). +/*------------------------------------------------------------------------*/ +MulOp += (. op = -1; .) + ( '*' (. op = times; .) + | '/' (. op = slash; .) + ). +/*------------------------------------------------------------------------*/ +ProcDecl (. wchar_t* name; Obj *obj; int adr; .) += "void" + Ident (. obj = tab->NewObj(name, proc, undef); obj->adr = gen->pc; + if (coco_string_equal(name, _SC("Main"))) gen->progStart = gen->pc; + tab->OpenScope(); coco_string_delete(name); .) + '(' ')' + '{' (. gen->Emit(ENTER, 0); adr = gen->pc - 2; .) + { VarDecl | Stat } + '}' (. gen->Emit(LEAVE); gen->Emit(RET); + gen->Patch(adr, tab->topScope->nextAdr); + tab->CloseScope(); .). +/*------------------------------------------------------------------------*/ +RelOp += (. op = -1; .) + ( "==" (. op = equ; .) + | '<' (. op = lss; .) + | '>' (. op = gtr; .) + ). +/*------------------------------------------------------------------------*/ +SimExpr (. int type1, op; .) += Term + { AddOp + Term (. if (type != integer || type1 != integer) + Err(_SC("integer type expected")); + gen->Emit(op); .) + }. +/*------------------------------------------------------------------------*/ +Stat (. int type; wchar_t* name; Obj *obj; + int adr, adr2, loopstart; .) += Ident (. obj = tab->Find(name); coco_string_delete(name); .) + ( '=' (. if (obj->kind != var) Err(_SC("cannot assign to procedure")); .) + Expr ';' + (. if (type != obj->type) Err(_SC("incompatible types")); + if (obj->level == 0) gen->Emit(STOG, obj->adr); + else gen->Emit(STO, obj->adr); .) + | '(' ')' ';' (. if (obj->kind != proc) Err(_SC("object is not a procedure")); + gen->Emit(CALL, obj->adr); .) + ) + +| "if" + '(' Expr ')' (. if (type != boolean) Err(_SC("boolean type expected")); + gen->Emit(FJMP, 0); adr = gen->pc - 2; .) + Stat + [ "else" (. gen->Emit(JMP, 0); adr2 = gen->pc - 2; + gen->Patch(adr, gen->pc); + adr = adr2; .) + Stat + ] (. gen->Patch(adr, gen->pc); .) + +| "while" (. loopstart = gen->pc; .) + '(' Expr ')' (. if (type != boolean) Err(_SC("boolean type expected")); + gen->Emit(FJMP, 0); adr = gen->pc - 2; .) + Stat (. gen->Emit(JMP, loopstart); gen->Patch(adr, gen->pc); .) + +| "read" + Ident ';' (. obj = tab->Find(name); coco_string_delete(name); + if (obj->type != integer) Err(_SC("integer type expected")); + gen->Emit(READ); + if (obj->level == 0) gen->Emit(STOG, obj->adr); + else gen->Emit(STO, obj->adr); .) + +| "write" + Expr ';' (. if (type != integer) Err(_SC("integer type expected")); + gen->Emit(WRITE); .) + +| '{' { Stat | VarDecl } '}' . +/*------------------------------------------------------------------------*/ +Taste (. wchar_t* name; + InitDeclarations(); .) += "program" + Ident (. coco_string_delete(name); tab->OpenScope(); .) + '{' + { VarDecl } + { ProcDecl } + '}' (. tab->CloseScope(); .). +/*------------------------------------------------------------------------*/ +Term (. int type1, op; .) += Factor + { MulOp + Factor (. if (type != integer || type1 != integer) + Err(_SC("integer type expected")); + gen->Emit(op); + .) + }. +/*------------------------------------------------------------------------*/ +Type += (. type = undef; .) + ( "int" (. type = integer; .) + | "bool" (. type = boolean; .) + ). +/*------------------------------------------------------------------------*/ +VarDecl (. wchar_t* name; int type; .) += Type + Ident (. tab->NewObj(name, var, type); coco_string_delete(name); .) + { ',' Ident (. tab->NewObj(name, var, type); coco_string_delete(name); .) + } ';'. + +END Taste. diff --git a/src/Taste/Taste.cpp b/src/Taste/Taste.cpp new file mode 100644 index 0000000..ff36370 --- /dev/null +++ b/src/Taste/Taste.cpp @@ -0,0 +1,34 @@ +#include + +#include "SymbolTable.h" + +#include "Scanner.h" +#include "Parser.h" + +using namespace Taste; + +int main (int argc, char *argv[]) { + + if (argc == 2) { + wchar_t *fileName = coco_string_create(argv[1]); + Taste::Scanner *scanner = new Taste::Scanner(fileName); + Taste::Parser *parser = new Taste::Parser(scanner); + parser->tab = new Taste::SymbolTable(parser); + parser->gen = new Taste::CodeGenerator(); + parser->Parse(); + if (parser->errors->count == 0) { + parser->gen->Decode(); + parser->gen->Interpret("Taste.IN"); + } + + coco_string_delete(fileName); + delete parser->gen; + delete parser->tab; + delete parser; + delete scanner; + } else + printf("-- No source file specified\n"); + + return 0; + +} diff --git a/src/Taste/Test.TAS b/src/Taste/Test.TAS new file mode 100644 index 0000000..fcedcfd --- /dev/null +++ b/src/Taste/Test.TAS @@ -0,0 +1,31 @@ + +// This is a test program which can be compiled by the Taste-compiler. +// It reads a sequence of numbers and computes the sum of all integers +// up to these numbers. + +program Test { + int i; + + void Foo() { + int a, b, max; + read a; read b; + if (a > b) max = a; else max = b; + write max; + } + + void SumUp() { + int sum; + sum = 0; + while (i > 0) { sum = sum + i; i = i - 1; } + write sum; + } + + void Main() { + read i; + while (i > 0) { + SumUp(); + read i; + } + } +} + diff --git a/src/Taste/build.sh b/src/Taste/build.sh new file mode 100755 index 0000000..8d7be51 --- /dev/null +++ b/src/Taste/build.sh @@ -0,0 +1,4 @@ +../Coco -frames .. Taste.atg +make +#myvalgrind --leak-check=full +./Taste Test.TAS diff --git a/src/TestSuite/TestAlts.ATG b/src/TestSuite/TestAlts.ATG new file mode 100644 index 0000000..1b7f47c --- /dev/null +++ b/src/TestSuite/TestAlts.ATG @@ -0,0 +1,21 @@ +$01246 +/*------------------------------------------------------------------------- +Test of alternatives in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = a A b. +A = (a | (b|c|) | d) e. + + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestAlts_Output.txt b/src/TestSuite/TestAlts_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestAlts_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAlts_Parser.cpp b/src/TestSuite/TestAlts_Parser.cpp new file mode 100644 index 0000000..c9a89c2 --- /dev/null +++ b/src/TestSuite/TestAlts_Parser.cpp @@ -0,0 +1,475 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + A_NT(); + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b) || IsKind(la, _c) || IsKind(la, _e)) { + if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _c)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + } + } else if (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid A"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAlts_Scanner.cpp b/src/TestSuite/TestAlts_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestAlts_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAlts_Trace.txt b/src/TestSuite/TestAlts_Trace.txt new file mode 100644 index 0000000..ff9d720 --- /dev/null +++ b/src/TestSuite/TestAlts_Trace.txt @@ -0,0 +1,78 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 2 17 + 2 nt A 3 17 + 3 t b 0 17 + 4 t a -15 18 + 5 t b -15 18 + 6 t c -15 18 + 7 alt -15 8 5 18 + 8 alt -15 10 6 18 + 9 eps -15 0 + 10 alt -15 0 9 0 + 11 alt 15 12 4 18 + 12 alt -15 14 7 18 + 13 t d -15 18 + 14 alt -15 0 13 18 + 15 t e 0 18 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a b c d e +follow: b + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 1 false 17 fixedToken + 1 A nt false 11 false 18 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAlts_output.txt b/src/TestSuite/TestAlts_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestAlts_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny.ATG b/src/TestSuite/TestAny.ATG new file mode 100644 index 0000000..a88f39f --- /dev/null +++ b/src/TestSuite/TestAny.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of ANY symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C D. +A = {a | ANY | b} c. +B = {ANY d} e. +C = [ANY f] g. +D = {ANY} h | i. + +END Test. diff --git a/src/TestSuite/TestAny1.ATG b/src/TestSuite/TestAny1.ATG new file mode 100644 index 0000000..43e3379 --- /dev/null +++ b/src/TestSuite/TestAny1.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of ANY symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C D. +A = (ANY |) ANY. +B = ANY | ANY. +C = {ANY} ANY. +D = [ANY] ANY. + +END Test. diff --git a/src/TestSuite/TestAny1_Output.txt b/src/TestSuite/TestAny1_Output.txt new file mode 100644 index 0000000..acd511c --- /dev/null +++ b/src/TestSuite/TestAny1_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +checking + LL1 warning in A:21:0: an ANY node that matches no symbol + LL1 warning in B:22:0: an ANY node that matches no symbol + LL1 warning in C:23:0: an ANY node that matches no symbol + LL1 warning in D:24:0: an ANY node that matches no symbol +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny1_Parser.cpp b/src/TestSuite/TestAny1_Parser.cpp new file mode 100644 index 0000000..e739ede --- /dev/null +++ b/src/TestSuite/TestAny1_Parser.cpp @@ -0,0 +1,496 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + B_NT(); + C_NT(); + D_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (false) { + SynErr(11); // ANY node that matches no symbol + } else if (StartOf(1 /* eps */)) { + } else SynErr(12); + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + if (false) { + SynErr(13); // ANY node that matches no symbol + } else if (StartOf(1 /* any */)) { + Get(); + } else SynErr(14); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + while (false) { + SynErr(15); // ANY node that matches no symbol + } + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (false) { + SynErr(16); // ANY node that matches no symbol + } + Get(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[2][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {x,T,T,T, T,T,T,T, T,T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid A"); break; + case 13: s = _SC("invalid B"); break; + case 14: s = _SC("invalid B"); break; + case 15: s = _SC("invalid C"); break; + case 16: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAny1_Scanner.cpp b/src/TestSuite/TestAny1_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestAny1_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAny1_Trace.txt b/src/TestSuite/TestAny1_Trace.txt new file mode 100644 index 0000000..139ea8d --- /dev/null +++ b/src/TestSuite/TestAny1_Trace.txt @@ -0,0 +1,117 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 any -9 0 + 6 eps -9 0 + 7 alt 9 8 5 0 + 8 alt -9 0 6 0 + 9 any 0 0 + 10 any 0 0 + 11 any 0 0 + 12 alt 0 13 10 0 + 13 alt 0 0 11 0 + 14 any -15 0 + 15 iter 16 0 14 0 + 16 any 0 0 + 17 any -19 0 + 18 opt 19 0 17 0 + 19 any 0 0 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g h i ??? +follow: EOF + +A +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +B +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +C +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +D +first: a b c d e f g h i ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 5 any -- empty set -- + 9 any a b c d e f g h i ??? + 10 any -- empty set -- + 11 any a b c d e f g h i ??? + 14 any -- empty set -- + 16 any a b c d e f g h i ??? + 17 any -- empty set -- + 19 any a b c d e f g h i ??? + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 7 false 21 fixedToken + 2 B nt false 12 false 22 fixedToken + 3 C nt false 15 false 23 fixedToken + 4 D nt false 18 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAny1_output.txt b/src/TestSuite/TestAny1_output.txt new file mode 100644 index 0000000..55fd063 --- /dev/null +++ b/src/TestSuite/TestAny1_output.txt @@ -0,0 +1,9 @@ +Coco/R (Sep 6, 2007) +checking + LL1 warning in A: an ANY node that matches no symbol + LL1 warning in B: an ANY node that matches no symbol + LL1 warning in C: an ANY node that matches no symbol + LL1 warning in D: an ANY node that matches no symbol +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny_Output.txt b/src/TestSuite/TestAny_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestAny_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestAny_Parser.cpp b/src/TestSuite/TestAny_Parser.cpp new file mode 100644 index 0000000..8c63422 --- /dev/null +++ b/src/TestSuite/TestAny_Parser.cpp @@ -0,0 +1,533 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + B_NT(); + C_NT(); + D_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + while (StartOf(1 /* alt */)) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (StartOf(2 /* any */)) { + Get(); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (StartOf(3 /* any */)) { + Get(); + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (StartOf(4 /* any */)) { + Get(); + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (StartOf(5 /* iter */)) { + while (StartOf(6 /* any */)) { + Get(); + } + Expect(_h); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _i)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[7][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {x,T,T,x, T,T,T,T, T,T,T,x}, + {x,x,x,x, T,T,T,T, T,T,T,x}, + {x,T,T,T, T,x,T,T, T,T,T,x}, + {x,T,T,T, T,T,T,x, T,T,T,x}, + {x,T,T,T, T,T,T,T, T,x,T,x}, + {x,T,T,T, T,T,T,T, x,x,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestAny_Scanner.cpp b/src/TestSuite/TestAny_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestAny_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestAny_Trace.txt b/src/TestSuite/TestAny_Trace.txt new file mode 100644 index 0000000..bceba58 --- /dev/null +++ b/src/TestSuite/TestAny_Trace.txt @@ -0,0 +1,120 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 t a -11 21 + 6 any -11 0 + 7 alt -11 8 5 21 + 8 alt -11 10 6 0 + 9 t b -11 21 + 10 alt -11 0 9 21 + 11 iter 12 0 7 21 + 12 t c 0 21 + 13 any 14 0 + 14 t d -15 22 + 15 iter 16 0 13 0 + 16 t e 0 22 + 17 any 18 0 + 18 t f -20 23 + 19 opt 20 0 17 0 + 20 t g 0 23 + 21 any -22 0 + 22 iter 23 0 21 0 + 23 t h 0 24 + 24 t i 0 24 + 25 alt 0 26 22 0 + 26 alt 0 0 24 24 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g h i ??? +follow: EOF + +A +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +B +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +C +first: a b c d e f g h i ??? +follow: a b c d e f g h i ??? + +D +first: a b c d e f g h i ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 6 any d e f g h i ??? + 13 any a b c d f g h i ??? + 17 any a b c d e f h i ??? + 21 any a b c d e f g ??? + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 11 false 21 fixedToken + 2 B nt false 15 false 22 fixedToken + 3 C nt false 19 false 23 fixedToken + 4 D nt false 25 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestAny_output.txt b/src/TestSuite/TestAny_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestAny_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCasing.ATG b/src/TestSuite/TestCasing.ATG new file mode 100644 index 0000000..7d66196 --- /dev/null +++ b/src/TestSuite/TestCasing.ATG @@ -0,0 +1,23 @@ +$01246 +/*------------------------------------------------------------------------- +Test of case insensitive scanners. +----------------------------------------------------------------------------*/ +COMPILER Test +IGNORECASE + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + suffix = 'F'. + +TOKENS + ident = letter {letter}. + float1 = digit {digit} suffix. + float2 = digit {digit} 'E' digit {digit}. + A = "AAA". + +PRODUCTIONS + +Test = A "AAA" "AAa" "AaA" "Aaa" "aAA" "aAa" "aaA" "aaa" "BBb" "bbB". + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestCasing_Output.txt b/src/TestSuite/TestCasing_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestCasing_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCasing_Parser.cpp b/src/TestSuite/TestCasing_Parser.cpp new file mode 100644 index 0000000..b92c10f --- /dev/null +++ b/src/TestSuite/TestCasing_Parser.cpp @@ -0,0 +1,471 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[7] = { + -1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_A); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(5 /* "bbb" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(5 /* "bbb" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 6; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][8] = { + {T,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("float1 expected"); break; + case 3: s = _SC("float2 expected"); break; + case 4: s = _SC("A expected"); break; + case 5: s = _SC("\"bbb\" expected"); break; + case 6: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestCasing_Scanner.cpp b/src/TestSuite/TestCasing_Scanner.cpp new file mode 100644 index 0000000..27dc416 --- /dev/null +++ b/src/TestSuite/TestCasing_Scanner.cpp @@ -0,0 +1,695 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 6; + noSym = 6; + int i; + for (i = 97; i <= 122; ++i) start.set(i, 1); + for (i = 48; i <= 57; ++i) start.set(i, 5); + start.set(Buffer::EoF, -1); + keywords.set(_SC("aaa"), 4); + keywords.set(_SC("bbb"), 5); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + valCh = ch; + if ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower() +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = valCh; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, true); break;} + case 2: + case_2: + {t->kind = 2 /* float1 */; break;} + case 3: + case_3: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else {goto case_0;} + case 4: + case_4: + recEnd = pos; recKind = 3 /* float2 */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else {t->kind = 3 /* float2 */; break;} + case 5: + case_5: + if (ch == _SC('f')) {AddCh(); goto case_2;} + else if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_5;} + else if (ch == _SC('e')) {AddCh(); goto case_3;} + else {goto case_0;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestCasing_Trace.txt b/src/TestSuite/TestCasing_Trace.txt new file mode 100644 index 0000000..4f9c2b7 --- /dev/null +++ b/src/TestSuite/TestCasing_Trace.txt @@ -0,0 +1,63 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t A 2 21 + 2 t A 3 21 + 3 t A 4 21 + 4 t A 5 21 + 5 t A 6 21 + 6 t A 7 21 + 7 t A 8 21 + 8 t A 9 21 + 9 t A 10 21 + 10 t "bbb" 11 21 + 11 t "bbb" 0 21 + + +First & follow symbols: +---------------------- + +Test +first: A +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 + digit 5 +E(ident ) 1: letter 1 +E(float1 ) 2: + 3: digit 4 +E(float2 ) 4: digit 4 + 5: suffix 2 + digit 5 + _SC('e') 3 + +---------- character classes ---------- +letter : 'a' .. 'z' +digit : '0' .. '9' +suffix : 'f' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 14 classLitToken + 2 float1 t false 15 classToken + 3 float2 t false 16 classToken + 4 A t false 17 litToken + 5 "bbb" t false 21 litToken + 6 ??? t false 0 fixedToken + 0 Test nt false 1 false 21 fixedToken + +Literal Tokens: +-------------- +_A = "aaa". + diff --git a/src/TestSuite/TestChars.ATG b/src/TestSuite/TestChars.ATG new file mode 100644 index 0000000..4d1634b --- /dev/null +++ b/src/TestSuite/TestChars.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test of CHARACTERS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + lower = "abcdefghijklmnopqrstuvwxyz". + upper = 'A' ..'Z'. + letter = lower + upper. + noquote = ANY - '\''. + idchar = letter + '0' .. '9'. + LF = '\n'. + +TOKENS + ident = letter {idchar}. + +IGNORE + '\r' + LF + '\u0009' + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestChars_Output.txt b/src/TestSuite/TestChars_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestChars_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestChars_Parser.cpp b/src/TestSuite/TestChars_Parser.cpp new file mode 100644 index 0000000..4a3fd3c --- /dev/null +++ b/src/TestSuite/TestChars_Parser.cpp @@ -0,0 +1,427 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[3] = { + -1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 2; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][4] = { + {T,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestChars_Scanner.cpp b/src/TestSuite/TestChars_Scanner.cpp new file mode 100644 index 0000000..a2a6589 --- /dev/null +++ b/src/TestSuite/TestChars_Scanner.cpp @@ -0,0 +1,674 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 2; + noSym = 2; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 1); + for (i = 97; i <= 122; ++i) start.set(i, 1); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + (ch >= 9 && ch <= 10) || ch == 13 + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestChars_Trace.txt b/src/TestSuite/TestChars_Trace.txt new file mode 100644 index 0000000..c54d1ef --- /dev/null +++ b/src/TestSuite/TestChars_Trace.txt @@ -0,0 +1,44 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 23 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 +E(ident ) 1: idchar 1 + +---------- character classes ---------- +lower : 'a' .. 'z' +upper : 'A' .. 'Z' +letter : 'A' .. 'Z' 'a' .. 'z' +noquote : 0 .. '&' '(' .. 255 +idchar : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' +LF : 10 + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 16 classToken + 2 ??? t false 0 fixedToken + 0 Test nt false 1 false 23 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestChars_output.txt b/src/TestSuite/TestChars_output.txt new file mode 100644 index 0000000..bca0a11 --- /dev/null +++ b/src/TestSuite/TestChars_output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestCircular.ATG b/src/TestSuite/TestCircular.ATG new file mode 100644 index 0000000..2d768fd --- /dev/null +++ b/src/TestSuite/TestCircular.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals can be reached. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A. +A = B D | a. +B = [b] C { c C}. +C = D A [d]. +D = [e f]. + +END Test. diff --git a/src/TestSuite/TestCircular_Output.txt b/src/TestSuite/TestCircular_Output.txt new file mode 100644 index 0000000..08079e1 --- /dev/null +++ b/src/TestSuite/TestCircular_Output.txt @@ -0,0 +1,8 @@ +Coco/R (Dec 01, 2018) +checking + D deletable + A:21 --> B:22 + B:22 --> C:23 + C:23 --> A:21 +trace output is in trace.txt +3 errors detected diff --git a/src/TestSuite/TestCircular_Parser.cpp b/src/TestSuite/TestCircular_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestCircular_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestCircular_Scanner.cpp b/src/TestSuite/TestCircular_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestCircular_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestCircular_Trace.txt b/src/TestSuite/TestCircular_Trace.txt new file mode 100644 index 0000000..9d5f7b5 --- /dev/null +++ b/src/TestSuite/TestCircular_Trace.txt @@ -0,0 +1,86 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 0 20 + 2 nt B 3 21 + 3 nt D 0 21 + 4 t a 0 21 + 5 alt 0 6 2 21 + 6 alt 0 0 4 21 + 7 t b -9 22 + 8 opt 9 0 7 22 + 9 nt C 12 22 + 10 t c 11 22 + 11 nt C -12 22 + 12 iter 0 0 10 22 + 13 nt D 14 23 + 14 nt A 16 23 + 15 t d 0 23 + 16 opt 0 0 15 23 + 17 t e 18 24 + 18 t f 0 24 + 19 opt 0 0 17 24 + + +First & follow symbols: +---------------------- + +Test +first: a b e +follow: EOF + +A +first: a b e +follow: EOF c d e + +B +first: a b e +follow: EOF c d e + +D +first: e +follow: EOF a b c d e + +C +first: a b e +follow: EOF c d e + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 5 false 21 fixedToken + 2 B nt false 8 false 22 fixedToken + 3 D nt false 19 true 24 fixedToken + 4 C nt false 13 false 23 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestComments.ATG b/src/TestSuite/TestComments.ATG new file mode 100644 index 0000000..83a0440 --- /dev/null +++ b/src/TestSuite/TestComments.ATG @@ -0,0 +1,22 @@ +$01246 +/*------------------------------------------------------------------------- +Test of COMMENTS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + +TOKENS + ident = letter {letter | digit}. + +COMMENTS FROM "//" TO "\r\n" +COMMENTS FROM "/*" TO "*/" +COMMENTS FROM "(*" TO "*)" NESTED + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestComments_Output.txt b/src/TestSuite/TestComments_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestComments_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestComments_Parser.cpp b/src/TestSuite/TestComments_Parser.cpp new file mode 100644 index 0000000..4a3fd3c --- /dev/null +++ b/src/TestSuite/TestComments_Parser.cpp @@ -0,0 +1,427 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[3] = { + -1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 2; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][4] = { + {T,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestComments_Scanner.cpp b/src/TestSuite/TestComments_Scanner.cpp new file mode 100644 index 0000000..2664361 --- /dev/null +++ b/src/TestSuite/TestComments_Scanner.cpp @@ -0,0 +1,742 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 2; + noSym = 2; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 1); + for (i = 97; i <= 122; ++i) start.set(i, 1); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + +bool Scanner::Comment0() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('*')) { + NextCh(); + for(;;) { + if (ch == _SC('*')) { + NextCh(); + if (ch == _SC(')')) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == _SC('(')) { + NextCh(); + if (ch == _SC('*')) { + level++; NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + +bool Scanner::Comment1() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('*')) { + NextCh(); + for(;;) { + if (ch == _SC('*')) { + NextCh(); + if (ch == _SC('/')) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + +bool Scanner::Comment2() { + int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; + NextCh(); + if (ch == _SC('/')) { + NextCh(); + for(;;) { + if (ch == 13) { + NextCh(); + if (ch == 10) { + level--; + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } + NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; + return false; +} + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + if ((ch == _SC('(') && Comment0()) || (ch == _SC('/') && Comment1()) || (ch == _SC('/') && Comment2())) continue; + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestComments_Trace.txt b/src/TestSuite/TestComments_Trace.txt new file mode 100644 index 0000000..d7a3f65 --- /dev/null +++ b/src/TestSuite/TestComments_Trace.txt @@ -0,0 +1,41 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 20 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: letter 1 +E(ident ) 1: #A 1 + +---------- character classes ---------- +letter : 'A' .. 'Z' 'a' .. 'z' +digit : '0' .. '9' +#A : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 12 classToken + 2 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestComplete.ATG b/src/TestSuite/TestComplete.ATG new file mode 100644 index 0000000..e608af1 --- /dev/null +++ b/src/TestSuite/TestComplete.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals have productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A C D. +A = a B a. +C = c D c. +D = A d. + +END Test. diff --git a/src/TestSuite/TestComplete_Output.txt b/src/TestSuite/TestComplete_Output.txt new file mode 100644 index 0000000..92d7998 --- /dev/null +++ b/src/TestSuite/TestComplete_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking + No production for B +trace output is in trace.txt +1 errors detected diff --git a/src/TestSuite/TestComplete_Parser.cpp b/src/TestSuite/TestComplete_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestComplete_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestComplete_Scanner.cpp b/src/TestSuite/TestComplete_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestComplete_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestComplete_Trace.txt b/src/TestSuite/TestComplete_Trace.txt new file mode 100644 index 0000000..f7057ea --- /dev/null +++ b/src/TestSuite/TestComplete_Trace.txt @@ -0,0 +1,78 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt C 3 20 + 3 nt D 0 20 + 4 t a 5 21 + 5 nt B 6 21 + 6 t a 0 21 + 7 t c 8 22 + 8 nt D 9 22 + 9 t c 0 22 + 10 nt A 11 23 + 11 t d 0 23 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: c d + +C +first: c +follow: a + +D +first: a +follow: EOF c + +B +first: -- empty set -- +follow: a + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 4 false 21 fixedToken + 2 C nt false 7 false 22 fixedToken + 3 D nt false 10 false 23 fixedToken + 4 B nt false 0 false 0 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestDel.ATG b/src/TestSuite/TestDel.ATG new file mode 100644 index 0000000..6c242d8 --- /dev/null +++ b/src/TestSuite/TestDel.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test of deletable symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A B g C g D. +A = a | {e}[f]. +B = {b}[c](d|). +C = A B. +D = C | h. + +END Test. diff --git a/src/TestSuite/TestDel_Output.txt b/src/TestSuite/TestDel_Output.txt new file mode 100644 index 0000000..76ca672 --- /dev/null +++ b/src/TestSuite/TestDel_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +checking + A deletable + B deletable + C deletable + D deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestDel_Parser.cpp b/src/TestSuite/TestDel_Parser.cpp new file mode 100644 index 0000000..c276c11 --- /dev/null +++ b/src/TestSuite/TestDel_Parser.cpp @@ -0,0 +1,532 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + B_NT(); + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + C_NT(); + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + D_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (StartOf(1 /* iter */)) { + while (IsKind(la, _e)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _f)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _c)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _EOF) || IsKind(la, _g)) { + } else SynErr(12); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + A_NT(); + B_NT(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + if (StartOf(2 /* nt */)) { + C_NT(); + } else if (IsKind(la, _h)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(13); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid B"); break; + case 13: s = _SC("invalid D"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestDel_Scanner.cpp b/src/TestSuite/TestDel_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestDel_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestDel_Trace.txt b/src/TestSuite/TestDel_Trace.txt new file mode 100644 index 0000000..9088d92 --- /dev/null +++ b/src/TestSuite/TestDel_Trace.txt @@ -0,0 +1,117 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 t g 4 20 + 4 nt C 5 20 + 5 t g 6 20 + 6 nt D 0 20 + 7 t a 0 21 + 8 t e -9 21 + 9 iter 11 0 8 21 + 10 t f 0 21 + 11 opt 0 0 10 21 + 12 alt 0 13 7 21 + 13 alt 0 0 9 21 + 14 t b -15 22 + 15 iter 17 0 14 22 + 16 t c -20 22 + 17 opt 20 0 16 22 + 18 t d 0 22 + 19 eps 0 0 + 20 alt 0 21 18 22 + 21 alt 0 0 19 0 + 22 nt A 23 23 + 23 nt B 0 23 + 24 nt C 0 24 + 25 t h 0 24 + 26 alt 0 27 24 24 + 27 alt 0 0 25 24 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f g +follow: EOF + +A +first: a e f +follow: EOF b c d g + +B +first: b c d +follow: EOF g + +C +first: a b c d e f +follow: EOF g + +D +first: a b c d e f h +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 12 true 21 fixedToken + 2 B nt false 15 true 22 fixedToken + 3 C nt false 22 true 23 fixedToken + 4 D nt false 26 true 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestEps.ATG b/src/TestSuite/TestEps.ATG new file mode 100644 index 0000000..28bac33 --- /dev/null +++ b/src/TestSuite/TestEps.ATG @@ -0,0 +1,20 @@ +$01246 +/*------------------------------------------------------------------------- +Test of eps symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = (a|) b + | (c | (. sem .) | d) e. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestEps_Output.txt b/src/TestSuite/TestEps_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestEps_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestEps_Parser.cpp b/src/TestSuite/TestEps_Parser.cpp new file mode 100644 index 0000000..0f816e1 --- /dev/null +++ b/src/TestSuite/TestEps_Parser.cpp @@ -0,0 +1,460 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (IsKind(la, _a) || IsKind(la, _b)) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + } + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _c) || IsKind(la, _d) || IsKind(la, _e)) { + if (IsKind(la, _c)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _e)) { + sem + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestEps_Scanner.cpp b/src/TestSuite/TestEps_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestEps_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestEps_Trace.txt b/src/TestSuite/TestEps_Trace.txt new file mode 100644 index 0000000..ad7edac --- /dev/null +++ b/src/TestSuite/TestEps_Trace.txt @@ -0,0 +1,72 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a -5 17 + 2 eps -5 0 + 3 alt 5 4 1 17 + 4 alt -5 0 2 0 + 5 t b 0 17 + 6 t c -12 18 + 7 sem -12 330 0 + 8 alt 12 9 6 18 + 9 alt -12 11 7 0 + 10 t d -12 18 + 11 alt -12 0 10 18 + 12 t e 0 18 + 13 alt 0 14 3 17 + 14 alt 0 0 8 18 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 13 false 17 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestEps_output.txt b/src/TestSuite/TestEps_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestEps_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestIters.ATG b/src/TestSuite/TestIters.ATG new file mode 100644 index 0000000..39bd7b5 --- /dev/null +++ b/src/TestSuite/TestIters.ATG @@ -0,0 +1,22 @@ +$01246 +/*------------------------------------------------------------------------- +Test of iterations in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = (a | {b} c | {{d} e} | {f {g}} h) i. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestIters_Output.txt b/src/TestSuite/TestIters_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestIters_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestIters_Parser.cpp b/src/TestSuite/TestIters_Parser.cpp new file mode 100644 index 0000000..60dc43e --- /dev/null +++ b/src/TestSuite/TestIters_Parser.cpp @@ -0,0 +1,483 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b) || IsKind(la, _c)) { + while (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _d) || IsKind(la, _e) || IsKind(la, _i)) { + while (IsKind(la, _d) || IsKind(la, _e)) { + while (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else if (IsKind(la, _f) || IsKind(la, _h)) { + while (IsKind(la, _f)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, _g)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_h); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + Expect(_i); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestIters_Scanner.cpp b/src/TestSuite/TestIters_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestIters_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestIters_Trace.txt b/src/TestSuite/TestIters_Trace.txt new file mode 100644 index 0000000..384965d --- /dev/null +++ b/src/TestSuite/TestIters_Trace.txt @@ -0,0 +1,88 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a -18 20 + 2 t b -3 20 + 3 iter 4 0 2 20 + 4 t c -18 20 + 5 alt 18 6 1 20 + 6 alt -18 11 3 20 + 7 t d -8 20 + 8 iter 9 0 7 20 + 9 t e -10 20 + 10 iter -18 0 8 20 + 11 alt -18 17 10 20 + 12 t f 14 20 + 13 t g -14 20 + 14 iter -15 0 13 20 + 15 iter 16 0 12 20 + 16 t h -18 20 + 17 alt -18 0 15 20 + 18 t i 0 20 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e f h i +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 5 false 20 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestIters_output.txt b/src/TestSuite/TestIters_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestIters_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestLL1.ATG b/src/TestSuite/TestLL1.ATG new file mode 100644 index 0000000..a174223 --- /dev/null +++ b/src/TestSuite/TestLL1.ATG @@ -0,0 +1,41 @@ +$01246 +/*------------------------------------------------------------------------- +Test if LL(1) warnings are reported. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A E C G H I J. +A = (a | B). +B = {b}(c|) a. +C = {a}[D] B. +D = d [b]. +E = (F|) e. +F = [f]. +G = [ ( IF (eee) (a | b) + | b + ) + ] a. +H = [a] [IF (hhh) a] [IF (hhh) a] a. +I = {a} ( IF (iii) (a | b) + | b + ). +J = + { IF (aaa) "a"} + { IF (eee) ("a" | "b") + | "b" + } "a". + +END Test. diff --git a/src/TestSuite/TestLL1_Output.txt b/src/TestSuite/TestLL1_Output.txt new file mode 100644 index 0000000..8b676cd --- /dev/null +++ b/src/TestSuite/TestLL1_Output.txt @@ -0,0 +1,39 @@ +Coco/R (Dec 01, 2018) +checking + F deletable + LL1 warning in A:21:0: a is start of several alternatives + = a:21:6: + => B:21:10: + -> B:22:0: + = a:22:13: + = a:22:13: + = a:22:13: + LL1 warning in E:25:0: e is start of several alternatives + = e:25:10: + = e:25:10: + = e:25:10: + = e:25:10: + LL1 warning in C:23:0: a is start & successor of deletable structure + = a:23:6: + => B:23:12: + -> B:22:0: + = a:22:13: + = a:22:13: + = a:22:13: + LL1 warning in G:27:0: a is start & successor of deletable structure + = a:27:19: + = a:30:5: + LL1 warning in H:31:0: a is start & successor of deletable structure + = a:31:6: + = a:31:35: + LL1 warning in I:32:0: a is start & successor of deletable structure + = a:32:6: + = a:32:21: + LL1 warning in J:35:0: a is start & successor of deletable structure + = a:37:14: + = a:39:4: + LL1 warning in D:24:0: b is start & successor of deletable structure + = b:24:8: +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestLL1_Parser.cpp b/src/TestSuite/TestLL1_Parser.cpp new file mode 100644 index 0000000..de3bed4 --- /dev/null +++ b/src/TestSuite/TestLL1_Parser.cpp @@ -0,0 +1,694 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + E_NT(); + C_NT(); + G_NT(); + H_NT(); + I_NT(); + J_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _a) || IsKind(la, _b) || IsKind(la, _c)) { + B_NT(); + } else SynErr(11); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::E_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); +#endif + if (IsKind(la, _e) || IsKind(la, _f)) { + F_NT(); + } else if (IsKind(la, _e)) { + } else SynErr(12); + Expect(_e); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + while (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _d)) { + D_NT(); + } + B_NT(); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::G_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); +#endif + if (IsKind(la, _a) || IsKind(la, _b)) { + if (eee) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(13); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::H_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (hhh) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (hhh) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::I_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); +#endif + while (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (iii) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(14); + } else if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(15); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::J_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_J, _SC("J"), la->line); +#endif + while (aaa) { + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (IsKind(la, _a) || IsKind(la, _b)) { + if (eee) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(16); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + while (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _c)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _a)) { + } else SynErr(17); + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::F_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); +#endif + if (IsKind(la, _f)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid E"); break; + case 13: s = _SC("invalid G"); break; + case 14: s = _SC("invalid I"); break; + case 15: s = _SC("invalid I"); break; + case 16: s = _SC("invalid J"); break; + case 17: s = _SC("invalid B"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestLL1_Scanner.cpp b/src/TestSuite/TestLL1_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestLL1_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestLL1_Trace.txt b/src/TestSuite/TestLL1_Trace.txt new file mode 100644 index 0000000..0e403dc --- /dev/null +++ b/src/TestSuite/TestLL1_Trace.txt @@ -0,0 +1,195 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt E 3 20 + 3 nt C 4 20 + 4 nt G 5 20 + 5 nt H 6 20 + 6 nt I 7 20 + 7 nt J 0 20 + 8 t a 0 21 + 9 nt B 0 21 + 10 alt 0 11 8 21 + 11 alt 0 0 9 21 + 12 t b -13 22 + 13 iter 16 0 12 22 + 14 t c -18 22 + 15 eps -18 0 + 16 alt 18 17 14 22 + 17 alt -18 0 15 0 + 18 t a 0 22 + 19 t a -20 23 + 20 iter 22 0 19 23 + 21 nt D -23 23 + 22 opt 23 0 21 23 + 23 nt B 0 23 + 24 t d 26 24 + 25 t b 0 24 + 26 opt 0 0 25 24 + 27 nt F -31 25 + 28 eps -31 0 + 29 alt 31 30 27 25 + 30 alt -31 0 28 0 + 31 t e 0 25 + 32 t f 0 26 + 33 opt 0 0 32 26 + 34 rslv 37 27 + 35 t a -43 27 + 36 t b -43 27 + 37 alt -43 38 35 27 + 38 alt -43 0 36 27 + 39 t b -43 28 + 40 alt -43 41 34 27 + 41 alt -43 0 39 28 + 42 opt 43 0 40 27 + 43 t a 0 30 + 44 t a -48 31 + 45 opt 48 0 44 31 + 46 rslv 47 31 + 47 t a -51 31 + 48 opt 51 0 46 31 + 49 rslv 50 31 + 50 t a -52 31 + 51 opt 52 0 49 31 + 52 t a 0 31 + 53 t a -54 32 + 54 iter 61 0 53 32 + 55 rslv 58 32 + 56 t a 0 32 + 57 t b 0 32 + 58 alt 0 59 56 32 + 59 alt 0 0 57 32 + 60 t b 0 33 + 61 alt 0 62 55 32 + 62 alt 0 0 60 33 + 63 rslv 64 36 + 64 t a -65 36 + 65 iter 74 0 63 36 + 66 rslv 69 37 + 67 t a -74 37 + 68 t b -74 37 + 69 alt -74 70 67 37 + 70 alt -74 0 68 37 + 71 t b -74 38 + 72 alt -74 73 66 37 + 73 alt -74 0 71 38 + 74 iter 75 0 72 37 + 75 t a 0 39 + + +First & follow symbols: +---------------------- + +Test +first: a b c +follow: EOF + +A +first: a b c +follow: e f + +E +first: e f +follow: a b c d + +C +first: a b c d +follow: a b + +G +first: a b +follow: a + +H +first: a +follow: a b + +I +first: a b +follow: a b + +J +first: a b +follow: EOF + +B +first: a b c +follow: a b e f + +D +first: d +follow: a b c + +F +first: f +follow: e + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 10 false 21 fixedToken + 2 E nt false 29 false 25 fixedToken + 3 C nt false 20 false 23 fixedToken + 4 G nt false 42 false 27 fixedToken + 5 H nt false 45 false 31 fixedToken + 6 I nt false 54 false 32 fixedToken + 7 J nt false 65 false 35 fixedToken + 8 B nt false 13 false 22 fixedToken + 9 D nt false 24 false 24 fixedToken + 10 F nt false 33 true 26 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts.ATG b/src/TestSuite/TestOpts.ATG new file mode 100644 index 0000000..867f323 --- /dev/null +++ b/src/TestSuite/TestOpts.ATG @@ -0,0 +1,20 @@ +$01246 +/*------------------------------------------------------------------------- +Test of options in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = a | [b] c | [Del] | d [[d][e]f]. +Del = [e]. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestOpts1.ATG b/src/TestSuite/TestOpts1.ATG new file mode 100644 index 0000000..cc47d7d --- /dev/null +++ b/src/TestSuite/TestOpts1.ATG @@ -0,0 +1,19 @@ +$01246 +/*------------------------------------------------------------------------- +Test of options in productions. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + +PRODUCTIONS + +Test = [[a]]. + +END Test. \ No newline at end of file diff --git a/src/TestSuite/TestOpts1_Output.txt b/src/TestSuite/TestOpts1_Output.txt new file mode 100644 index 0000000..6aef8af --- /dev/null +++ b/src/TestSuite/TestOpts1_Output.txt @@ -0,0 +1,7 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + LL1 warning in Test:17:1: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts1_Parser.cpp b/src/TestSuite/TestOpts1_Parser.cpp new file mode 100644 index 0000000..bbc1811 --- /dev/null +++ b/src/TestSuite/TestOpts1_Parser.cpp @@ -0,0 +1,436 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (IsKind(la, _a)) { + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestOpts1_Scanner.cpp b/src/TestSuite/TestOpts1_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestOpts1_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestOpts1_Trace.txt b/src/TestSuite/TestOpts1_Trace.txt new file mode 100644 index 0000000..5d62b0b --- /dev/null +++ b/src/TestSuite/TestOpts1_Trace.txt @@ -0,0 +1,61 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 0 17 + 2 opt 0 0 1 17 + 3 opt 0 0 2 17 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 3 true 17 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts1_output.txt b/src/TestSuite/TestOpts1_output.txt new file mode 100644 index 0000000..a8107e9 --- /dev/null +++ b/src/TestSuite/TestOpts1_output.txt @@ -0,0 +1,7 @@ +Coco/R (Sep 6, 2007) +checking + Test deletable + LL1 warning in Test: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts_Output.txt b/src/TestSuite/TestOpts_Output.txt new file mode 100644 index 0000000..7829c89 --- /dev/null +++ b/src/TestSuite/TestOpts_Output.txt @@ -0,0 +1,8 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + Del deletable + LL1 warning in Test:17:1: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestOpts_Parser.cpp b/src/TestSuite/TestOpts_Parser.cpp new file mode 100644 index 0000000..4a346f8 --- /dev/null +++ b/src/TestSuite/TestOpts_Parser.cpp @@ -0,0 +1,488 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[8] = { + -1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b) || IsKind(la, _c)) { + if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _EOF) || IsKind(la, _e)) { + if (IsKind(la, _e)) { + Del_NT(); + } + } else if (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + if (IsKind(la, _d) || IsKind(la, _e) || IsKind(la, _f)) { + if (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + if (IsKind(la, _e)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(8); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::Del_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_Del, _SC("Del"), la->line); +#endif + if (IsKind(la, _e)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 7; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][9] = { + {T,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("??? expected"); break; + case 8: s = _SC("invalid Test"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestOpts_Scanner.cpp b/src/TestSuite/TestOpts_Scanner.cpp new file mode 100644 index 0000000..0fb26f2 --- /dev/null +++ b/src/TestSuite/TestOpts_Scanner.cpp @@ -0,0 +1,684 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 7; + noSym = 7; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestOpts_Trace.txt b/src/TestSuite/TestOpts_Trace.txt new file mode 100644 index 0000000..eb33639 --- /dev/null +++ b/src/TestSuite/TestOpts_Trace.txt @@ -0,0 +1,82 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 0 17 + 2 t b -4 17 + 3 opt 4 0 2 17 + 4 t c 0 17 + 5 alt 0 6 1 17 + 6 alt 0 9 3 17 + 7 nt Del 0 17 + 8 opt 0 0 7 17 + 9 alt 0 17 8 17 + 10 t d 16 17 + 11 t d -14 17 + 12 opt 14 0 11 17 + 13 t e -15 17 + 14 opt 15 0 13 17 + 15 t f 0 17 + 16 opt 0 0 12 17 + 17 alt 0 0 10 17 + 18 t e 0 18 + 19 opt 0 0 18 18 + + +First & follow symbols: +---------------------- + +Test +first: a b c d e +follow: EOF + +Del +first: e +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 ??? t false 0 fixedToken + 0 Test nt false 5 true 17 fixedToken + 1 Del nt false 19 true 18 fixedToken + +Literal Tokens: +-------------- +_f = "f". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestOpts_output.txt b/src/TestSuite/TestOpts_output.txt new file mode 100644 index 0000000..22c022e --- /dev/null +++ b/src/TestSuite/TestOpts_output.txt @@ -0,0 +1,8 @@ +Coco/R (Sep 6, 2007) +checking + Test deletable + Del deletable + LL1 warning in Test: contents of [...] or {...} must not be deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestReached.ATG b/src/TestSuite/TestReached.ATG new file mode 100644 index 0000000..685f13e --- /dev/null +++ b/src/TestSuite/TestReached.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if all nonterminals can be reached. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A C D. +A = a C a | b. +B = b. +C = c D c | b. +D = A d. + +END Test. diff --git a/src/TestSuite/TestReached_Output.txt b/src/TestSuite/TestReached_Output.txt new file mode 100644 index 0000000..89a33c9 --- /dev/null +++ b/src/TestSuite/TestReached_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking + B cannot be reached +trace output is in trace.txt +1 errors detected diff --git a/src/TestSuite/TestReached_Parser.cpp b/src/TestSuite/TestReached_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestReached_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestReached_Scanner.cpp b/src/TestSuite/TestReached_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestReached_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestReached_Trace.txt b/src/TestSuite/TestReached_Trace.txt new file mode 100644 index 0000000..a478c03 --- /dev/null +++ b/src/TestSuite/TestReached_Trace.txt @@ -0,0 +1,85 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt C 3 20 + 3 nt D 0 20 + 4 t a 5 21 + 5 nt C 6 21 + 6 t a 0 21 + 7 t b 0 21 + 8 alt 0 9 4 21 + 9 alt 0 0 7 21 + 10 t b 0 22 + 11 t c 12 23 + 12 nt D 13 23 + 13 t c 0 23 + 14 t b 0 23 + 15 alt 0 16 11 23 + 16 alt 0 0 14 23 + 17 nt A 18 24 + 18 t d 0 24 + + +First & follow symbols: +---------------------- + +Test +first: a b +follow: EOF + +A +first: a b +follow: b c d + +C +first: b c +follow: a b + +D +first: a b +follow: EOF c + +B +first: b +follow: -- empty set -- + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 8 false 21 fixedToken + 2 C nt false 15 false 23 fixedToken + 3 D nt false 17 false 24 fixedToken + 4 B nt false 10 false 22 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestResIllegal.ATG b/src/TestSuite/TestResIllegal.ATG new file mode 100644 index 0000000..ca51b2b --- /dev/null +++ b/src/TestSuite/TestResIllegal.ATG @@ -0,0 +1,27 @@ +$AFGJSX +COMPILER Test +PRODUCTIONS + Test = A B C D E. + + A = + "a" + ( "b" (IF (aaa) "c") "d" /* misplaced resolver */ + | IF (bbb) "b" "c" /* resolver not evaluated */ + | IF (ccc) "c" /* misplaced resolver */ + ). + + B = IF (ddd) "d". /* misplaced resolver */ + + C = {IF (eee) "d"} "e". /* misplaced resolver */ + + D = + "d" + { "d" + | IF (fff) "a" "b" /* misplaced resolver */ + } "a". + + E = + IF(ggg) "a" /* misplaced resolver */ + | ANY. + +END Test. diff --git a/src/TestSuite/TestResIllegal_Output.txt b/src/TestSuite/TestResIllegal_Output.txt new file mode 100644 index 0000000..00793ae --- /dev/null +++ b/src/TestSuite/TestResIllegal_Output.txt @@ -0,0 +1,15 @@ +Coco/R (Dec 01, 2018) +checking +TestResIllegal.ATG -- line 8 col 14: Warning: Misplaced resolver: no alternative. +TestResIllegal.ATG -- line 9 col 9: Warning: Resolver will never be evaluated. Place it at previous conflicting alternative. +TestResIllegal.ATG -- line 10 col 9: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 13 col 11: Warning: Misplaced resolver: no alternative. +TestResIllegal.ATG -- line 15 col 12: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 20 col 9: Warning: Misplaced resolver: no LL(1) conflict. +TestResIllegal.ATG -- line 24 col 8: Warning: Misplaced resolver: no LL(1) conflict. + LL1 warning in D:17:0: "a" is start & successor of deletable structure + = "a":20:14: + = "a":21:5: +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestResIllegal_Parser.cpp b/src/TestSuite/TestResIllegal_Parser.cpp new file mode 100644 index 0000000..56481ba --- /dev/null +++ b/src/TestSuite/TestResIllegal_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + C(); + D(); + E(); +} + +void Parser::A() { + Expect(1 /* "a" */,__FUNCTION__); + if (la->kind == 2 /* "b" */) { + Get(); + Expect(3 /* "c" */,__FUNCTION__); + Expect(4 /* "d" */,__FUNCTION__); + } else if (bbb) { + Expect(2 /* "b" */,__FUNCTION__); + Expect(3 /* "c" */,__FUNCTION__); + } else if (ccc) { + Expect(3 /* "c" */,__FUNCTION__); + } else SynErr(7,__FUNCTION__); +} + +void Parser::B() { + Expect(4 /* "d" */,__FUNCTION__); +} + +void Parser::C() { + while (eee) { + Expect(4 /* "d" */,__FUNCTION__); + } + Expect(5 /* "e" */,__FUNCTION__); +} + +void Parser::D() { + Expect(4 /* "d" */,__FUNCTION__); + while (la->kind == 1 /* "a" */ || la->kind == 4 /* "d" */) { + if (la->kind == 4 /* "d" */) { + Get(); + } else { + Expect(1 /* "a" */,__FUNCTION__); + Expect(2 /* "b" */,__FUNCTION__); + } + } + Expect(1 /* "a" */,__FUNCTION__); +} + +void Parser::E() { + if (ggg) { + Expect(1 /* "a" */,__FUNCTION__); + } else if (StartOf(1)) { + Get(); + } else SynErr(8,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 6; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[2][8] = { + {T,x,x,x, x,x,x,x}, + {x,x,T,T, T,T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"\"a\" expected"); break; + case 2: s = coco_string_create(L"\"b\" expected"); break; + case 3: s = coco_string_create(L"\"c\" expected"); break; + case 4: s = coco_string_create(L"\"d\" expected"); break; + case 5: s = coco_string_create(L"\"e\" expected"); break; + case 6: s = coco_string_create(L"??? expected"); break; + case 7: s = coco_string_create(L"invalid A"); break; + case 8: s = coco_string_create(L"invalid E"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestResIllegal_Scanner.cpp b/src/TestSuite/TestResIllegal_Scanner.cpp new file mode 100644 index 0000000..768de1a --- /dev/null +++ b/src/TestSuite/TestResIllegal_Scanner.cpp @@ -0,0 +1,617 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 6; + noSym = 6; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestResIllegal_Trace.txt b/src/TestSuite/TestResIllegal_Trace.txt new file mode 100644 index 0000000..6ac64dc --- /dev/null +++ b/src/TestSuite/TestResIllegal_Trace.txt @@ -0,0 +1,130 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 4 + 2 nt B 3 4 + 3 nt C 4 4 + 4 nt D 5 4 + 5 nt E 0 4 + 6 t "a" 14 7 + 7 t "b" 8 8 + 8 rslv 9 8 + 9 t "c" 10 8 + 10 t "d" 0 8 + 11 rslv 12 9 + 12 t "b" 13 9 + 13 t "c" 0 9 + 14 alt 0 15 7 8 + 15 alt 0 18 11 9 + 16 rslv 17 10 + 17 t "c" 0 10 + 18 alt 0 0 16 10 + 19 rslv 20 13 + 20 t "d" 0 13 + 21 rslv 22 15 + 22 t "d" -23 15 + 23 iter 24 0 21 15 + 24 t "e" 0 15 + 25 t "d" 32 18 + 26 t "d" -32 19 + 27 rslv 28 20 + 28 t "a" 29 20 + 29 t "b" -32 20 + 30 alt -32 31 26 19 + 31 alt -32 0 27 20 + 32 iter 33 0 30 19 + 33 t "a" 0 21 + 34 rslv 35 24 + 35 t "a" 0 24 + 36 any 0 0 + 37 alt 0 38 34 24 + 38 alt 0 0 36 0 + + +First & follow symbols: +---------------------- + +Test +first: "a" +follow: EOF + +A +first: "a" +follow: "d" + +B +first: "d" +follow: "d" "e" + +C +first: "d" "e" +follow: "d" + +D +first: "d" +follow: "a" "b" "c" "d" "e" ??? + +E +first: "a" "b" "c" "d" "e" ??? +follow: EOF + + +ANY and SYNC sets: +----------------- + 36 any "b" "c" "d" "e" ??? + +Cross reference list: +-------------------- + + "a" 7 20 21 24 + "b" 8 9 20 + "c" 8 9 10 + "d" 8 13 15 18 19 + "e" 15 + A -6 4 + B -13 4 + C -15 4 + D -17 4 + E -23 4 + Test -4 + + + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 +E("a" ) 1: +E("b" ) 2: +E("c" ) 3: +E("d" ) 4: +E("e" ) 5: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 "a" t false 7 fixedToken + 2 "b" t false 8 fixedToken + 3 "c" t false 8 fixedToken + 4 "d" t false 8 fixedToken + 5 "e" t false 15 fixedToken + 6 ??? t false 0 fixedToken + 0 Test nt false 1 false 4 fixedToken + 1 A nt false 6 false 6 fixedToken + 2 B nt false 19 false 13 fixedToken + 3 C nt false 23 false 15 fixedToken + 4 D nt false 25 false 17 fixedToken + 5 E nt false 37 false 23 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestResOK.ATG b/src/TestSuite/TestResOK.ATG new file mode 100644 index 0000000..7503297 --- /dev/null +++ b/src/TestSuite/TestResOK.ATG @@ -0,0 +1,55 @@ +$AFGJSX +COMPILER Test +PRODUCTIONS + Test = A B C D E F G H + | I. + + A = "a" {[IF (true) "b" "c"] "b"} "c". + + B = + ( "a" + | IF (eee) "b" + | + ) "b". + + C = (IF(true) "a" "b" | "a"). + + D = { + IF (true) "a" + | "a" "b" + } "c". + + E = + ( "a" + | [ IF(true) "c" + | "c" "b" + ] + | "b" + ) "d". + + F = + { IF(true) ["a"] "b" + | ANY + | "a" + } "c". + + G = + { IF (aaa) "a"} + { IF (bbb) + (IF (eee) ("a" | "b") + | "b" + ) + } "a". + + H = + { IF (aaa) "a"} + { IF (eee) ("a" | "b") + | "b" + } "c". + + I = // both alternatives can be selected with EOF as the next input symbol + ( IF (aaa) ["b"] + | {"c"} + ). + +END Test. diff --git a/src/TestSuite/TestResOK_Output.txt b/src/TestSuite/TestResOK_Output.txt new file mode 100644 index 0000000..2d5fda3 --- /dev/null +++ b/src/TestSuite/TestResOK_Output.txt @@ -0,0 +1,7 @@ +Coco/R (Dec 01, 2018) +checking + Test deletable + I deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestResOK_Parser.cpp b/src/TestSuite/TestResOK_Parser.cpp new file mode 100644 index 0000000..723aebb --- /dev/null +++ b/src/TestSuite/TestResOK_Parser.cpp @@ -0,0 +1,737 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[6] = { + -1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + if (IsKind(la, 1 /* "a" */)) { + A_NT(); + B_NT(); + C_NT(); + D_NT(); + E_NT(); + F_NT(); + G_NT(); + H_NT(); + } else if (IsKind(la, _EOF) || IsKind(la, 2 /* "b" */) || IsKind(la, 3 /* "c" */)) { + I_NT(); + } else SynErr(6); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (IsKind(la, 2 /* "b" */)) { + if (true) { + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (eee) { + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 2 /* "b" */)) { + } else SynErr(7); + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (true) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(8); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::D_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_D, _SC("D"), la->line); +#endif + while (IsKind(la, 1 /* "a" */)) { + if (true) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::E_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_E, _SC("E"), la->line); +#endif + if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 3 /* "c" */) || IsKind(la, 4 /* "d" */)) { + if (IsKind(la, 3 /* "c" */)) { + if (true) { + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + } else if (IsKind(la, 2 /* "b" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(9); + Expect(4 /* "d" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::F_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_F, _SC("F"), la->line); +#endif + while (StartOf(1 /* alt */)) { + if (true) { + if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(2 /* "b" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 4 /* "d" */) || IsKind(la, 5 /* ??? */)) { + Get(); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::G_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_G, _SC("G"), la->line); +#endif + while (aaa) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (bbb) { + if (eee) { + if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 2 /* "b" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(10); + } else if (IsKind(la, 2 /* "b" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + } + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::H_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_H, _SC("H"), la->line); +#endif + while (aaa) { + Expect(1 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + while (IsKind(la, 1 /* "a" */) || IsKind(la, 2 /* "b" */)) { + if (eee) { + if (IsKind(la, 1 /* "a" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, 2 /* "b" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(12); + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(3 /* "c" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::I_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_I, _SC("I"), la->line); +#endif + if (aaa) { + if (IsKind(la, 2 /* "b" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else if (IsKind(la, _EOF) || IsKind(la, 3 /* "c" */)) { + while (IsKind(la, 3 /* "c" */)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } else SynErr(13); +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 5; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[2][7] = { + {T,x,x,x, x,x,x}, + {x,T,T,x, T,T,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("\"a\" expected"); break; + case 2: s = _SC("\"b\" expected"); break; + case 3: s = _SC("\"c\" expected"); break; + case 4: s = _SC("\"d\" expected"); break; + case 5: s = _SC("??? expected"); break; + case 6: s = _SC("invalid Test"); break; + case 7: s = _SC("invalid B"); break; + case 8: s = _SC("invalid C"); break; + case 9: s = _SC("invalid E"); break; + case 10: s = _SC("invalid G"); break; + case 11: s = _SC("invalid G"); break; + case 12: s = _SC("invalid H"); break; + case 13: s = _SC("invalid I"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestResOK_Scanner.cpp b/src/TestSuite/TestResOK_Scanner.cpp new file mode 100644 index 0000000..77d7ca6 --- /dev/null +++ b/src/TestSuite/TestResOK_Scanner.cpp @@ -0,0 +1,678 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 5; + noSym = 5; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* "a" */; break;} + case 2: + {t->kind = 2 /* "b" */; break;} + case 3: + {t->kind = 3 /* "c" */; break;} + case 4: + {t->kind = 4 /* "d" */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestResOK_Trace.txt b/src/TestSuite/TestResOK_Trace.txt new file mode 100644 index 0000000..c71f49e --- /dev/null +++ b/src/TestSuite/TestResOK_Trace.txt @@ -0,0 +1,213 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 4 + 2 nt B 3 4 + 3 nt C 4 4 + 4 nt D 5 4 + 5 nt E 6 4 + 6 nt F 7 4 + 7 nt G 8 4 + 8 nt H 0 4 + 9 nt I 0 5 + 10 alt 0 11 1 4 + 11 alt 0 0 9 5 + 12 t "a" 18 7 + 13 rslv 14 7 + 14 t "b" 15 7 + 15 t "c" -17 7 + 16 opt 17 0 13 7 + 17 t "b" -18 7 + 18 iter 19 0 16 7 + 19 t "c" 0 7 + 20 t "a" -27 10 + 21 rslv 22 11 + 22 t "b" -27 11 + 23 alt 27 24 20 10 + 24 alt -27 26 21 11 + 25 eps -27 0 + 26 alt -27 0 25 0 + 27 t "b" 0 13 + 28 rslv 29 15 + 29 t "a" 30 15 + 30 t "b" 0 15 + 31 t "a" 0 15 + 32 alt 0 33 28 15 + 33 alt 0 0 31 15 + 34 rslv 35 18 + 35 t "a" -40 18 + 36 t "a" 37 19 + 37 t "b" -40 19 + 38 alt -40 39 34 18 + 39 alt -40 0 36 19 + 40 iter 41 0 38 18 + 41 t "c" 0 20 + 42 t "a" -54 23 + 43 rslv 44 24 + 44 t "c" -54 24 + 45 t "c" 46 25 + 46 t "b" -54 25 + 47 alt -54 48 43 24 + 48 alt -54 0 45 25 + 49 opt -54 0 47 24 + 50 alt 54 51 42 23 + 51 alt -54 53 49 24 + 52 t "b" -54 27 + 53 alt -54 0 52 27 + 54 t "d" 0 28 + 55 rslv 57 31 + 56 t "a" -58 31 + 57 opt 58 0 56 31 + 58 t "b" -64 31 + 59 any -64 0 + 60 alt -64 61 55 31 + 61 alt -64 63 59 0 + 62 t "a" -64 33 + 63 alt -64 0 62 33 + 64 iter 65 0 60 31 + 65 t "c" 0 34 + 66 rslv 67 37 + 67 t "a" -68 37 + 68 iter 78 0 66 37 + 69 rslv 76 38 + 70 rslv 73 39 + 71 t "a" -78 39 + 72 t "b" -78 39 + 73 alt -78 74 71 39 + 74 alt -78 0 72 39 + 75 t "b" -78 40 + 76 alt -78 77 70 39 + 77 alt -78 0 75 40 + 78 iter 79 0 69 38 + 79 t "a" 0 42 + 80 rslv 81 45 + 81 t "a" -82 45 + 82 iter 91 0 80 45 + 83 rslv 86 46 + 84 t "a" -91 46 + 85 t "b" -91 46 + 86 alt -91 87 84 46 + 87 alt -91 0 85 46 + 88 t "b" -91 47 + 89 alt -91 90 83 46 + 90 alt -91 0 88 47 + 91 iter 92 0 89 46 + 92 t "c" 0 48 + 93 rslv 95 51 + 94 t "b" 0 51 + 95 opt 0 0 94 51 + 96 t "c" -97 52 + 97 iter 0 0 96 52 + 98 alt 0 99 93 51 + 99 alt 0 0 97 52 + + +First & follow symbols: +---------------------- + +Test +first: "a" "b" "c" +follow: EOF + +A +first: "a" +follow: "a" "b" + +B +first: "a" "b" +follow: "a" + +C +first: "a" +follow: "a" "c" + +D +first: "a" "c" +follow: "a" "b" "c" "d" + +E +first: "a" "b" "c" "d" +follow: "a" "b" "c" "d" ??? + +F +first: "a" "b" "c" "d" ??? +follow: "a" "b" + +G +first: "a" "b" +follow: "a" "b" "c" + +H +first: "a" "b" "c" +follow: EOF + +I +first: "b" "c" +follow: EOF + + +ANY and SYNC sets: +----------------- + 59 any "d" ??? + +Cross reference list: +-------------------- + + "a" 7 10 15 15 18 19 23 31 33 37 39 42 45 + 46 + "b" 7 7 11 13 15 19 25 27 31 39 40 46 47 + 51 + "c" 7 7 20 24 25 34 48 52 + "d" 28 + A -7 4 + B -9 4 + C -15 4 + D -17 4 + E -22 4 + F -30 4 + G -36 4 + H -44 4 + I -50 5 + Test -4 + + + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 +E("a" ) 1: +E("b" ) 2: +E("c" ) 3: +E("d" ) 4: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 "a" t false 7 fixedToken + 2 "b" t false 7 fixedToken + 3 "c" t false 7 fixedToken + 4 "d" t false 28 fixedToken + 5 ??? t false 0 fixedToken + 0 Test nt false 10 true 4 fixedToken + 1 A nt false 12 false 7 fixedToken + 2 B nt false 23 false 9 fixedToken + 3 C nt false 32 false 15 fixedToken + 4 D nt false 40 false 17 fixedToken + 5 E nt false 50 false 22 fixedToken + 6 F nt false 64 false 30 fixedToken + 7 G nt false 68 false 36 fixedToken + 8 H nt false 82 false 44 fixedToken + 9 I nt false 98 true 50 fixedToken + +Literal Tokens: +-------------- + diff --git a/src/TestSuite/TestSem.ATG b/src/TestSuite/TestSem.ATG new file mode 100644 index 0000000..b8333f9 --- /dev/null +++ b/src/TestSuite/TestSem.ATG @@ -0,0 +1,34 @@ +$01246 +/*------------------------------------------------------------------------- +Test of semantic actions +----------------------------------------------------------------------------*/ +using System.Collections; + +COMPILER Test + + static void Foo() { + Console.WriteLine("foo"); + } + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRAGMAS + option = '$' ('a' | 'b'). (. Console.WriteLine("pragma"); .) + +PRODUCTIONS + +Test (. decl .) = A B C. +A = ( (. aaa .) c | (. bbb .) | d) (. ccc .). +B = (. ddd .) { a (. eee .)} (. fff .) b (. !$%&/()=?`+*#'-_.:,;<>^{[]}| .). +C = (a | b) (. ggg .) c (..). + +END Test. diff --git a/src/TestSuite/TestSem_Output.txt b/src/TestSuite/TestSem_Output.txt new file mode 100644 index 0000000..ca245c9 --- /dev/null +++ b/src/TestSuite/TestSem_Output.txt @@ -0,0 +1,6 @@ +Coco/R (Dec 01, 2018) +checking + A deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSem_Parser.cpp b/src/TestSuite/TestSem_Parser.cpp new file mode 100644 index 0000000..ca16ffd --- /dev/null +++ b/src/TestSuite/TestSem_Parser.cpp @@ -0,0 +1,513 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + if (la->kind == _option) { + Console.WriteLine("pragma"); + } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { + decl +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + B_NT(); + C_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + if (IsKind(la, _c)) { + aaa + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _a) || IsKind(la, _b)) { + bbb + } else if (IsKind(la, _d)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(11); + ccc +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + ddd + while (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + eee + } + fff + Expect(_b); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + !$%&/()=?`+*#'-_.:,;<>^{[]}| +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + if (IsKind(la, _a)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else SynErr(12); + ggg + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("invalid A"); break; + case 12: s = _SC("invalid C"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestSem_Scanner.cpp b/src/TestSuite/TestSem_Scanner.cpp new file mode 100644 index 0000000..d6732ae --- /dev/null +++ b/src/TestSuite/TestSem_Scanner.cpp @@ -0,0 +1,700 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(36, 10); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + case 10: + if ((ch >= _SC('a') && ch <= _SC('b'))) {AddCh(); goto case_11;} + else {goto case_0;} + case 11: + case_11: + {t->kind = 11 /* option */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestSem_Trace.txt b/src/TestSuite/TestSem_Trace.txt new file mode 100644 index 0000000..66b8c15 --- /dev/null +++ b/src/TestSuite/TestSem_Trace.txt @@ -0,0 +1,115 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 29 + 2 nt B 3 29 + 3 nt C 0 29 + 4 sem 5 543 0 + 5 t c -11 30 + 6 sem -11 557 0 + 7 alt 11 8 4 0 + 8 alt -11 10 6 0 + 9 t d -11 30 + 10 alt -11 0 9 30 + 11 sem 0 572 0 + 12 sem 15 588 0 + 13 t a 14 31 + 14 sem -15 602 0 + 15 iter 16 0 13 31 + 16 sem 17 613 0 + 17 t b 18 31 + 18 sem 0 625 0 + 19 t a -23 32 + 20 t b -23 32 + 21 alt 23 22 19 32 + 22 alt -23 0 20 32 + 23 sem 24 674 0 + 24 t c 25 32 + 25 sem 0 685 0 + + +First & follow symbols: +---------------------- + +Test +first: a b c d +follow: EOF + +A +first: c d +follow: a b + +B +first: a b +follow: a b + +C +first: a b +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 + _SC('$') 10 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + 10: #A 11 +E(option ) 11: + +---------- character classes ---------- +#A : 'a' .. 'b' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 14 fixedToken + 2 b t false 15 fixedToken + 3 c t false 16 fixedToken + 4 d t false 17 fixedToken + 5 e t false 18 fixedToken + 6 f t false 19 fixedToken + 7 g t false 20 fixedToken + 8 h t false 21 fixedToken + 9 i t false 22 fixedToken + 10 ??? t false 0 fixedToken + 11 option pr false 25 fixedToken + 0 Test nt false 1 false 29 fixedToken + 1 A nt false 7 true 30 fixedToken + 2 B nt false 12 false 31 fixedToken + 3 C nt false 21 false 32 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestSem_output.txt b/src/TestSuite/TestSem_output.txt new file mode 100644 index 0000000..f60cea1 --- /dev/null +++ b/src/TestSuite/TestSem_output.txt @@ -0,0 +1,6 @@ +Coco/R (Sep 6, 2007) +checking + A deletable +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSync.ATG b/src/TestSuite/TestSync.ATG new file mode 100644 index 0000000..cc3dfd5 --- /dev/null +++ b/src/TestSuite/TestSync.ATG @@ -0,0 +1,23 @@ +$01246 +/*------------------------------------------------------------------------- +Test of SYNC symbols +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = a SYNC {b | c} d A. +A = SYNC [e f] g. + +END Test. diff --git a/src/TestSuite/TestSync_Output.txt b/src/TestSuite/TestSync_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestSync_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestSync_Parser.cpp b/src/TestSuite/TestSync_Parser.cpp new file mode 100644 index 0000000..ef02a97 --- /dev/null +++ b/src/TestSuite/TestSync_Parser.cpp @@ -0,0 +1,481 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (!(StartOf(1 /* sync */))) {SynErr(11); Get();} + while (IsKind(la, _b) || IsKind(la, _c)) { + if (IsKind(la, _b)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } else { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + } + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + A_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + while (!(IsKind(la, _EOF) || IsKind(la, _e) || IsKind(la, _g))) {SynErr(12); Get();} + if (IsKind(la, _e)) { + Get(); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_f); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_g); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[2][12] = { + {T,x,T,T, T,T,x,T, x,x,x,x}, + {T,x,T,T, T,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + case 11: s = _SC("this symbol not expected in Test"); break; + case 12: s = _SC("this symbol not expected in A"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestSync_Scanner.cpp b/src/TestSuite/TestSync_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestSync_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestSync_Trace.txt b/src/TestSuite/TestSync_Trace.txt new file mode 100644 index 0000000..13a77d9 --- /dev/null +++ b/src/TestSuite/TestSync_Trace.txt @@ -0,0 +1,91 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t a 2 20 + 2 sync 7 0 + 3 t b -7 20 + 4 t c -7 20 + 5 alt -7 6 3 20 + 6 alt -7 0 4 20 + 7 iter 8 0 5 20 + 8 t d 9 20 + 9 nt A 0 20 + 10 sync 13 0 + 11 t e 12 21 + 12 t f -14 21 + 13 opt 14 0 11 21 + 14 t g 0 21 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: e g +follow: EOF + + +ANY and SYNC sets: +----------------- + 2 sync EOF b c d + 10 sync EOF e g + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 10 false 21 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestSync_output.txt b/src/TestSuite/TestSync_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestSync_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestTerminalizable.ATG b/src/TestSuite/TestTerminalizable.ATG new file mode 100644 index 0000000..42d01aa --- /dev/null +++ b/src/TestSuite/TestTerminalizable.ATG @@ -0,0 +1,26 @@ +$01246 +/*------------------------------------------------------------------------- +Test if nonterminals are terminalizable. +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = 'a'. + b = 'b'. + c = 'c'. + d = 'd'. + e = 'e'. + f = 'f'. + g = 'g'. + h = 'h'. + i = 'i'. + +PRODUCTIONS + +Test = A B C D. +A = a C a. +B = b. +C = c D c. +D = A d. + +END Test. diff --git a/src/TestSuite/TestTerminalizable_Output.txt b/src/TestSuite/TestTerminalizable_Output.txt new file mode 100644 index 0000000..e7e14af --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Output.txt @@ -0,0 +1,8 @@ +Coco/R (Dec 01, 2018) +checking + Test cannot be derived to terminals + A cannot be derived to terminals + C cannot be derived to terminals + D cannot be derived to terminals +trace output is in trace.txt +4 errors detected diff --git a/src/TestSuite/TestTerminalizable_Parser.cpp b/src/TestSuite/TestTerminalizable_Parser.cpp new file mode 100644 index 0000000..fe1e1ea --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Parser.cpp @@ -0,0 +1,298 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + A(); + B(); + Expect(_g,__FUNCTION__); + C(); + Expect(_g,__FUNCTION__); + D(); +} + +void Parser::A() { + if (la->kind == _a) { + Get(); + } else if (StartOf(1)) { + while (la->kind == _e) { + Get(); + } + if (la->kind == _f) { + Get(); + } + } else SynErr(11,__FUNCTION__); +} + +void Parser::B() { + while (la->kind == _b) { + Get(); + } + if (la->kind == _c) { + Get(); + } + if (la->kind == _d) { + Get(); + } else if (la->kind == _EOF || la->kind == _g) { + } else SynErr(12,__FUNCTION__); +} + +void Parser::C() { + A(); + B(); +} + +void Parser::D() { + if (StartOf(2)) { + C(); + } else if (la->kind == _h) { + Get(); + } else SynErr(13,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[3][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,T,T, T,T,T,T, x,x,x,x}, + {T,T,T,T, T,T,T,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"a expected"); break; + case 2: s = coco_string_create(L"b expected"); break; + case 3: s = coco_string_create(L"c expected"); break; + case 4: s = coco_string_create(L"d expected"); break; + case 5: s = coco_string_create(L"e expected"); break; + case 6: s = coco_string_create(L"f expected"); break; + case 7: s = coco_string_create(L"g expected"); break; + case 8: s = coco_string_create(L"h expected"); break; + case 9: s = coco_string_create(L"i expected"); break; + case 10: s = coco_string_create(L"??? expected"); break; + case 11: s = coco_string_create(L"invalid A"); break; + case 12: s = coco_string_create(L"invalid B"); break; + case 13: s = coco_string_create(L"invalid D"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestTerminalizable_Scanner.cpp b/src/TestSuite/TestTerminalizable_Scanner.cpp new file mode 100644 index 0000000..6db27ed --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Scanner.cpp @@ -0,0 +1,629 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + {t->kind = 3; break;} + case 4: + {t->kind = 4; break;} + case 5: + {t->kind = 5; break;} + case 6: + {t->kind = 6; break;} + case 7: + {t->kind = 7; break;} + case 8: + {t->kind = 8; break;} + case 9: + {t->kind = 9; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTerminalizable_Trace.txt b/src/TestSuite/TestTerminalizable_Trace.txt new file mode 100644 index 0000000..77afd20 --- /dev/null +++ b/src/TestSuite/TestTerminalizable_Trace.txt @@ -0,0 +1,80 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 4 20 + 4 nt D 0 20 + 5 t a 6 21 + 6 nt C 7 21 + 7 t a 0 21 + 8 t b 0 22 + 9 t c 10 23 + 10 nt D 11 23 + 11 t c 0 23 + 12 nt A 13 24 + 13 t d 0 24 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: b d + +B +first: b +follow: c + +C +first: c +follow: a + +D +first: a +follow: EOF c + + +ANY and SYNC sets: +----------------- +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 5 false 21 fixedToken + 2 B nt false 8 false 22 fixedToken + 3 C nt false 9 false 23 fixedToken + 4 D nt false 12 false 24 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestTokens.ATG b/src/TestSuite/TestTokens.ATG new file mode 100644 index 0000000..5fc9766 --- /dev/null +++ b/src/TestSuite/TestTokens.ATG @@ -0,0 +1,28 @@ +$01246 +/*------------------------------------------------------------------------- +Test of TOKENS definition +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + plus = '+'. + +TOKENS + ident = letter {letter | digit}. + ident1 = letter {'_'} '*'. + ident2 = letter CONTEXT ({'_'} '+'). + number = digit {digit} + | digit {digit} '.' {digit} ['E' ['+'|'-'] digit {digit}] + | digit {digit} CONTEXT (".."). +// nul = '\0'. + nul = "nul". +// hasNul = "ab\0c". + hasNul = "ab0c". + +PRODUCTIONS + +Test = ident "abc" "abc+" "a" "a_" "a__**" nul hasNul "nul". + +END Test. diff --git a/src/TestSuite/TestTokens1.ATG b/src/TestSuite/TestTokens1.ATG new file mode 100644 index 0000000..1e5253b --- /dev/null +++ b/src/TestSuite/TestTokens1.ATG @@ -0,0 +1,28 @@ +$01246 +/*------------------------------------------------------------------------- +Test of TOKENS definition (error case) +----------------------------------------------------------------------------*/ +COMPILER Test + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z'. + digit = '0'..'9'. + +TOKENS + ident = letter {letter | digit}. + ident1 = letter {digit} letter. /* tokens ident and ident1 cannot be distinguished */ + number = digit {digit} | digit {digit} CONTEXT(".."). + B1 = '\a'. + B2 = '\u0007'. /* tokens B1 and B2 cannot be distinguished */ + C1 = "\u0008". + C2 = "\b". /* tokens C1 and C2 cannot be distinguished */ + S1 = "+" "+". + S2 = "++". /* tokens S1 and S2 cannot be distinguished */ + S3 = '+' '+'. /* tokens S1 and S3 cannot be distinguished */ + X1 = "123..". /* tokens number and X1 cannot be distinguished */ + +PRODUCTIONS + +Test = ident. + +END Test. diff --git a/src/TestSuite/TestTokens1_Output.txt b/src/TestSuite/TestTokens1_Output.txt new file mode 100644 index 0000000..1aa1a3b --- /dev/null +++ b/src/TestSuite/TestTokens1_Output.txt @@ -0,0 +1,9 @@ +Coco/R (Dec 01, 2018) +TestTokens1.ATG -- line 16 col 15: tokens B2 and B1 cannot be distinguished +TestTokens1.ATG -- line 18 col 11: tokens C2 and C1 cannot be distinguished +TestTokens1.ATG -- line 20 col 11: tokens S2 and S1 cannot be distinguished +Tokens ident and ident1 cannot be distinguished +Tokens S1 and S3 cannot be distinguished +Tokens number and X1 cannot be distinguished +trace output is in trace.txt +6 errors detected diff --git a/src/TestSuite/TestTokens1_Parser.cpp b/src/TestSuite/TestTokens1_Parser.cpp new file mode 100644 index 0000000..485fb90 --- /dev/null +++ b/src/TestSuite/TestTokens1_Parser.cpp @@ -0,0 +1,259 @@ + + +#include +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n, const char *func_name) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n, const char *func_name) { + if (la->kind==n) Get(); else { SynErr(n, func_name); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n, __FUNCTION__); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n, __FUNCTION__); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test() { + Expect(_ident,__FUNCTION__); + Expect(7 /* "abc" */,__FUNCTION__); + Expect(8 /* "abc+" */,__FUNCTION__); + Expect(9 /* "a" */,__FUNCTION__); + Expect(10 /* "a_" */,__FUNCTION__); + Expect(11 /* "a__**" */,__FUNCTION__); + Expect(_nul,__FUNCTION__); + Expect(_hasNul,__FUNCTION__); + Expect(_nul,__FUNCTION__); +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Test(); + Expect(0,__FUNCTION__); +} + +Parser::Parser(Scanner *scanner) { + maxT = 12; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][14] = { + {T,x,x,x, x,x,x,x, x,x,x,x, x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"ident expected"); break; + case 2: s = coco_string_create(L"ident1 expected"); break; + case 3: s = coco_string_create(L"ident2 expected"); break; + case 4: s = coco_string_create(L"number expected"); break; + case 5: s = coco_string_create(L"nul expected"); break; + case 6: s = coco_string_create(L"hasNul expected"); break; + case 7: s = coco_string_create(L"\"abc\" expected"); break; + case 8: s = coco_string_create(L"\"abc+\" expected"); break; + case 9: s = coco_string_create(L"\"a\" expected"); break; + case 10: s = coco_string_create(L"\"a_\" expected"); break; + case 11: s = coco_string_create(L"\"a__**\" expected"); break; + case 12: s = coco_string_create(L"??? expected"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(L"-- line %d col %d: %ls\n", line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(L"%ls\n", s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(L"%ls", s); + exit(1); +} + + diff --git a/src/TestSuite/TestTokens1_Scanner.cpp b/src/TestSuite/TestTokens1_Scanner.cpp new file mode 100644 index 0000000..ffae751 --- /dev/null +++ b/src/TestSuite/TestTokens1_Scanner.cpp @@ -0,0 +1,724 @@ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((L'A' <= ch) && (ch <= L'Z')) { + newData[i] = ch - (L'A' - L'a'); + } + else { newData[i] = ch; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(const wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + wchar_t *res = coco_string_create(buf, 0, len); + coco_string_delete(buf); + return res; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(L"--- buffer out of bounds access, position: %d\n", value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + char *chFileName = coco_string_create_char(fileName); + if ((stream = fopen(chFileName, "rb")) == NULL) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 12; + noSym = 12; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 9); + for (i = 98; i <= 122; ++i) start.set(i, 9); + for (i = 48; i <= 57; ++i) start.set(i, 10); + start.set(97, 15); + start.set(Buffer::EoF, -1); + keywords.set(L"nul", 5); + keywords.set(L"ab0c", 6); + keywords.set(L"abc", 7); + keywords.set(L"a", 9); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + int apx = 0; + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_1;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 2: + case_2: + {t->kind = 2; break;} + case 3: + case_3: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 3; break;} + case 4: + case_4: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_4;} + else if (ch == 'E') {AddCh(); goto case_5;} + else {t->kind = 4; break;} + case 5: + case_5: + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else if (ch == '+' || ch == '-') {AddCh(); goto case_6;} + else {goto case_0;} + case 6: + case_6: + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else {goto case_0;} + case 7: + case_7: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_7;} + else {t->kind = 4; break;} + case 8: + case_8: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 4; break;} + case 9: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {apx = 0; AddCh(); goto case_1;} + else if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 10: + case_10: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {AddCh(); goto case_10;} + else if (ch == '.') {apx++; AddCh(); goto case_12;} + else {t->kind = 4; break;} + case 11: + case_11: + if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 12: + case_12: + recEnd = pos; recKind = 4; + if ((ch >= '0' && ch <= '9')) {apx = 0; AddCh(); goto case_4;} + else if (ch == 'E') {apx = 0; AddCh(); goto case_5;} + else if (ch == '.') {apx++; AddCh(); goto case_8;} + else {t->kind = 4; break;} + case 13: + case_13: + {t->kind = 8; break;} + case 14: + case_14: + {t->kind = 11; break;} + case 15: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == 'a' || (ch >= 'c' && ch <= 'z')) {apx = 0; AddCh(); goto case_1;} + else if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_16;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else if (ch == 'b') {apx = 0; AddCh(); goto case_17;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 16: + case_16: + recEnd = pos; recKind = 10; + if (ch == '*') {apx = 0; AddCh(); goto case_2;} + else if (ch == '_') {apx++; AddCh(); goto case_18;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {t->kind = 10; break;} + case 17: + case_17: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'b') || (ch >= 'd' && ch <= 'z')) {AddCh(); goto case_1;} + else if (ch == 'c') {AddCh(); goto case_19;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 18: + case_18: + if (ch == '*') {apx = 0; AddCh(); goto case_20;} + else if (ch == '_') {apx++; AddCh(); goto case_11;} + else if (ch == '+') {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 19: + case_19: + recEnd = pos; recKind = 1; + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_1;} + else if (ch == '+') {AddCh(); goto case_13;} + else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} + case 20: + case_20: + recEnd = pos; recKind = 2; + if (ch == '*') {AddCh(); goto case_14;} + else {t->kind = 2; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTokens1_Trace.txt b/src/TestSuite/TestTokens1_Trace.txt new file mode 100644 index 0000000..32aea93 --- /dev/null +++ b/src/TestSuite/TestTokens1_Trace.txt @@ -0,0 +1,36 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 0 26 + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 12 classToken + 2 ident1 t false 13 classToken + 3 number t false 14 classToken + 4 B1 t false 15 fixedToken + 5 B2 t false 16 fixedToken + 6 C1 t false 17 fixedToken + 7 C2 t false 18 fixedToken + 8 S1 t false 19 fixedToken + 9 S2 t false 20 fixedToken + 10 S3 t false 21 fixedToken + 11 X1 t false 22 fixedToken + 12 ??? t false 0 fixedToken + 0 Test nt false 1 false 26 fixedToken + +Literal Tokens: +-------------- +_C2 = "\b". +_B1 = "\a". +_X1 = "123..". +_B2 = "\u0007". +_S2 = "++". +_C1 = "\u0008". + diff --git a/src/TestSuite/TestTokens_Output.txt b/src/TestSuite/TestTokens_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestTokens_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestTokens_Parser.cpp b/src/TestSuite/TestTokens_Parser.cpp new file mode 100644 index 0000000..e65023d --- /dev/null +++ b/src/TestSuite/TestTokens_Parser.cpp @@ -0,0 +1,469 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[13] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + Expect(_ident); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(7 /* "abc" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(8 /* "abc+" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(9 /* "a" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(10 /* "a_" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(11 /* "a__**" */); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_nul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_hasNul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + Expect(_nul); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 12; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[1][14] = { + {T,x,x,x, x,x,x,x, x,x,x,x, x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("ident expected"); break; + case 2: s = _SC("ident1 expected"); break; + case 3: s = _SC("ident2 expected"); break; + case 4: s = _SC("number expected"); break; + case 5: s = _SC("nul expected"); break; + case 6: s = _SC("hasNul expected"); break; + case 7: s = _SC("\"abc\" expected"); break; + case 8: s = _SC("\"abc+\" expected"); break; + case 9: s = _SC("\"a\" expected"); break; + case 10: s = _SC("\"a_\" expected"); break; + case 11: s = _SC("\"a__**\" expected"); break; + case 12: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestTokens_Scanner.cpp b/src/TestSuite/TestTokens_Scanner.cpp new file mode 100644 index 0000000..100a6c2 --- /dev/null +++ b/src/TestSuite/TestTokens_Scanner.cpp @@ -0,0 +1,788 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 12; + noSym = 12; + int i; + for (i = 65; i <= 90; ++i) start.set(i, 9); + for (i = 98; i <= 122; ++i) start.set(i, 9); + for (i = 48; i <= 57; ++i) start.set(i, 10); + start.set(97, 15); + start.set(Buffer::EoF, -1); + keywords.set(_SC("nul"), 5); + keywords.set(_SC("ab0c"), 6); + keywords.set(_SC("abc"), 7); + keywords.set(_SC("a"), 9); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int apx = 0; + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 2: + case_2: + {t->kind = 2 /* ident1 */; break;} + case 3: + case_3: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 3 /* ident2 */; break;} + case 4: + case_4: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_4;} + else if (ch == _SC('E')) {AddCh(); goto case_5;} + else {t->kind = 4 /* number */; break;} + case 5: + case_5: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else if (ch == _SC('+') || ch == _SC('-')) {AddCh(); goto case_6;} + else {goto case_0;} + case 6: + case_6: + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else {goto case_0;} + case 7: + case_7: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_7;} + else {t->kind = 4 /* number */; break;} + case 8: + case_8: + { + tlen -= apx; + SetScannerBehindT(); buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); + t->kind = 4 /* number */; break;} + case 9: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {apx = 0; AddCh(); goto case_1;} + else if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 10: + case_10: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {AddCh(); goto case_10;} + else if (ch == _SC('.')) {apx++; AddCh(); goto case_12;} + else {t->kind = 4 /* number */; break;} + case 11: + case_11: + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 12: + case_12: + recEnd = pos; recKind = 4 /* number */; + if ((ch >= _SC('0') && ch <= _SC('9'))) {apx = 0; AddCh(); goto case_4;} + else if (ch == _SC('E')) {apx = 0; AddCh(); goto case_5;} + else if (ch == _SC('.')) {apx++; AddCh(); goto case_8;} + else {t->kind = 4 /* number */; break;} + case 13: + case_13: + {t->kind = 8 /* "abc+" */; break;} + case 14: + case_14: + {t->kind = 11 /* "a__**" */; break;} + case 15: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || ch == _SC('a') || (ch >= _SC('c') && ch <= _SC('z'))) {apx = 0; AddCh(); goto case_1;} + else if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_16;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else if (ch == _SC('b')) {apx = 0; AddCh(); goto case_17;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 16: + case_16: + recEnd = pos; recKind = 10 /* "a_" */; + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_2;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_18;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {t->kind = 10 /* "a_" */; break;} + case 17: + case_17: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('b')) || (ch >= _SC('d') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else if (ch == _SC('c')) {AddCh(); goto case_19;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 18: + case_18: + if (ch == _SC('*')) {apx = 0; AddCh(); goto case_20;} + else if (ch == _SC('_')) {apx++; AddCh(); goto case_11;} + else if (ch == _SC('+')) {apx++; AddCh(); goto case_3;} + else {goto case_0;} + case 19: + case_19: + recEnd = pos; recKind = 1 /* ident */; + if ((ch >= _SC('0') && ch <= _SC('9')) || (ch >= _SC('A') && ch <= _SC('Z')) || (ch >= _SC('a') && ch <= _SC('z'))) {AddCh(); goto case_1;} + else if (ch == _SC('+')) {AddCh(); goto case_13;} + else {t->kind = 1 /* ident */; t->kind = keywords.get(tval, tlen, t->kind, false); break;} + case 20: + case_20: + recEnd = pos; recKind = 2 /* ident1 */; + if (ch == _SC('*')) {AddCh(); goto case_14;} + else {t->kind = 2 /* ident1 */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestTokens_Trace.txt b/src/TestSuite/TestTokens_Trace.txt new file mode 100644 index 0000000..b47feaa --- /dev/null +++ b/src/TestSuite/TestTokens_Trace.txt @@ -0,0 +1,107 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 t ident 2 26 + 2 t "abc" 3 26 + 3 t "abc+" 4 26 + 4 t "a" 5 26 + 5 t "a_" 6 26 + 6 t "a__**" 7 26 + 7 t nul 8 26 + 8 t hasNul 9 26 + 9 t nul 0 26 + + +First & follow symbols: +---------------------- + +Test +first: ident +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: #C 9 + digit 10 + _SC('a') 15 +E(ident ) 1: #A 1 +E(ident1 ) 2: +E(ident2 ) 3: +E(number ) 4: digit 4 + _SC('E') 5 + 5: digit 7 + #B 6 + 6: digit 7 +E(number ) 7: digit 7 +E(number ) 8: +E(ident ) 9: #A 1 + _SC('*') 2 + _SC('_') 11 context + _SC('+') 3 context +E(number ) 10: digit 10 + _SC('.') 12 context + 11: _SC('*') 2 + _SC('_') 11 context + _SC('+') 3 context +E(number ) 12: digit 4 + _SC('E') 5 + _SC('.') 8 context +E("abc+" ) 13: +E("a__**" ) 14: +E(ident ) 15: #D 1 + _SC('*') 2 + _SC('_') 16 context + _SC('+') 3 context + _SC('b') 17 +E("a_" ) 16: _SC('*') 2 + _SC('_') 18 context + _SC('+') 3 context +E(ident ) 17: #E 1 + _SC('c') 19 + 18: _SC('*') 20 + _SC('_') 11 context + _SC('+') 3 context +E(ident ) 19: #A 1 + _SC('+') 13 +E(ident1 ) 20: _SC('*') 14 + +---------- character classes ---------- +letter : 'A' .. 'Z' 'a' .. 'z' +digit : '0' .. '9' +plus : '+' +#A : '0' .. '9' 'A' .. 'Z' 'a' .. 'z' +#B : '+' '-' +#C : 'A' .. 'Z' 'b' .. 'z' +#D : '0' .. '9' 'A' .. 'Z' 'a' 'c' .. 'z' +#E : '0' .. '9' 'A' .. 'Z' 'a' .. 'b' 'd' .. 'z' + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 ident t false 13 classLitToken + 2 ident1 t false 14 classToken + 3 ident2 t false 15 classToken + 4 number t false 16 classToken + 5 nul t false 20 litToken + 6 hasNul t false 22 litToken + 7 "abc" t false 26 litToken + 8 "abc+" t false 26 fixedToken + 9 "a" t false 26 litToken + 10 "a_" t false 26 fixedToken + 11 "a__**" t false 26 fixedToken + 12 ??? t false 0 fixedToken + 0 Test nt false 1 false 26 fixedToken + +Literal Tokens: +-------------- +_hasNul = "ab0c". +_nul = "nul". + diff --git a/src/TestSuite/TestTokens_output.txt b/src/TestSuite/TestTokens_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestTokens_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestWeak.ATG b/src/TestSuite/TestWeak.ATG new file mode 100644 index 0000000..bbfe26b --- /dev/null +++ b/src/TestSuite/TestWeak.ATG @@ -0,0 +1,25 @@ +$01246 +/*------------------------------------------------------------------------- +Test of WEAK symbol +----------------------------------------------------------------------------*/ +COMPILER Test + +TOKENS + a = "a". + b = "b". + c = "c". + d = "d". + e = "e". + f = "f". + g = "g". + h = "h". + i = "i". + +PRODUCTIONS + +Test = A B C. +A = a WEAK b c. +B = a {WEAK b c} d. +C = a {WEAK b} c. + +END Test. diff --git a/src/TestSuite/TestWeak_Output.txt b/src/TestSuite/TestWeak_Output.txt new file mode 100644 index 0000000..38dc67e --- /dev/null +++ b/src/TestSuite/TestWeak_Output.txt @@ -0,0 +1,5 @@ +Coco/R (Dec 01, 2018) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/TestWeak_Parser.cpp b/src/TestSuite/TestWeak_Parser.cpp new file mode 100644 index 0000000..95453e4 --- /dev/null +++ b/src/TestSuite/TestWeak_Parser.cpp @@ -0,0 +1,498 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include "Scanner.h" +#include "Parser.h" + + + + +#ifdef PARSER_WITH_AST + +void Parser::AstAddTerminal() { + SynTree *st_t = new SynTree( t->Clone() ); + ast_stack.Top()->children.Add(st_t); +} + +bool Parser::AstAddNonTerminal(eNonTerminals kind, const wchar_t *nt_name, int line) { + Token *ntTok = new Token(); + ntTok->kind = kind; + ntTok->line = line; + ntTok->val = coco_string_create(nt_name); + SynTree *st = new SynTree( ntTok ); + ast_stack.Top()->children.Add(st); + ast_stack.Add(st); + return true; +} + +void Parser::AstPopNonTerminal() { + ast_stack.Pop(); +} + +#endif + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(const wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +bool Parser::IsKind(Token *t, int n) { + static const int tBase[11] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + int k = t->kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; +} + +void Parser::Expect(int n) { + if (IsKind(la, n)) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (IsKind(la, n)) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (IsKind(la, n)) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::Test_NT() { +#ifdef PARSER_WITH_AST + Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_Test; ntTok->line = 0; ntTok->val = coco_string_create(_SC("Test"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root); +#endif + A_NT(); + B_NT(); + C_NT(); +#ifdef PARSER_WITH_AST + AstPopNonTerminal(); +#endif +} + +void Parser::A_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_A, _SC("A"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + ExpectWeak(_b, 1); + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::B_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_B, _SC("B"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (WeakSeparator(_b,3,2) ) { + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + } + Expect(_d); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + +void Parser::C_NT() { +#ifdef PARSER_WITH_AST + bool ntAdded = AstAddNonTerminal(eNonTerminals::_C, _SC("C"), la->line); +#endif + Expect(_a); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif + while (WeakSeparator(_b,4,3) ) { + } + Expect(_c); +#ifdef PARSER_WITH_AST + AstAddTerminal(); +#endif +#ifdef PARSER_WITH_AST + if(ntAdded) AstPopNonTerminal(); +#endif +} + + + + +// If the user declared a method Init and a mehtod Destroy they should +// be called in the contructur and the destructor respctively. +// +// The following templates are used to recognize if the user declared +// the methods Init and Destroy. + +template +struct ParserInitExistsRecognizer { + template + struct ExistsIfInitIsDefinedMarker{}; + + struct InitIsMissingType { + char dummy1; + }; + + struct InitExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static InitIsMissingType is_here(...); + + // exist only if ExistsIfInitIsDefinedMarker is defined + template + static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); + + enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; +}; + +template +struct ParserDestroyExistsRecognizer { + template + struct ExistsIfDestroyIsDefinedMarker{}; + + struct DestroyIsMissingType { + char dummy1; + }; + + struct DestroyExistsType { + char dummy1; char dummy2; + }; + + // exists always + template + static DestroyIsMissingType is_here(...); + + // exist only if ExistsIfDestroyIsDefinedMarker is defined + template + static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); + + enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; +}; + +// The folloing templates are used to call the Init and Destroy methods if they exist. + +// Generic case of the ParserInitCaller, gets used if the Init method is missing +template::InitExists> +struct ParserInitCaller { + static void CallInit(T *t) { + // nothing to do + } +}; + +// True case of the ParserInitCaller, gets used if the Init method exists +template +struct ParserInitCaller { + static void CallInit(T *t) { + t->Init(); + } +}; + +// Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing +template::DestroyExists> +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + // nothing to do + } +}; + +// True case of the ParserDestroyCaller, gets used if the Destroy method exists +template +struct ParserDestroyCaller { + static void CallDestroy(T *t) { + t->Destroy(); + } +}; + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(_SC("Dummy Token")); + Get(); + Test_NT(); + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + maxT = 10; + + ParserInitCaller::CallInit(this); + dummyToken = NULL; + t = la = NULL; + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + this->errors = new Errors(scanner->GetParserFileName()); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static const bool set[5][12] = { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {T,x,x,T, x,x,x,x, x,x,x,x}, + {x,x,x,x, T,x,x,x, x,x,x,x}, + {x,x,x,T, x,x,x,x, x,x,x,x}, + {x,x,T,T, x,x,x,x, x,x,x,x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + ParserDestroyCaller::CallDestroy(this); + delete dummyToken; + delete errors; +#ifdef PARSER_WITH_AST + delete ast_root; +#endif + +#ifdef COCO_FRAME_PARSER + coco_string_delete(noString); + coco_string_delete(tokenString); +#endif +} + +Errors::Errors(const char * FileName) { + count = 0; + file = FileName; +} + +void Errors::SynErr(int line, int col, int n) { + const wchar_t* s; + const size_t format_size = 20; + wchar_t format[format_size]; + switch (n) { + case 0: s = _SC("EOF expected"); break; + case 1: s = _SC("a expected"); break; + case 2: s = _SC("b expected"); break; + case 3: s = _SC("c expected"); break; + case 4: s = _SC("d expected"); break; + case 5: s = _SC("e expected"); break; + case 6: s = _SC("f expected"); break; + case 7: s = _SC("g expected"); break; + case 8: s = _SC("h expected"); break; + case 9: s = _SC("i expected"); break; + case 10: s = _SC("??? expected"); break; + + default: + { + coco_swprintf(format, format_size, _SC("error %d"), n); + s = format; + } + break; + } + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Error(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); + count++; +} + +void Errors::Warning(int line, int col, const wchar_t *s) { + wprintf(_SC("%s -- line %d col %d: %") _SFMT _SC("\n"), file, line, col, s); +} + +void Errors::Warning(const wchar_t *s) { + wprintf(_SC("%") _SFMT _SC("\n"), s); +} + +void Errors::Exception(const wchar_t* s) { + wprintf(_SC("%") _SFMT _SC(""), s); + exit(1); +} + +#ifdef PARSER_WITH_AST + +static void printIndent(int n) { + for(int i=0; i < n; ++i) wprintf(_SC(" ")); +} + +SynTree::~SynTree() { + //wprintf(_SC("Token %") _SFMT _SC(" : %d : %d : %d : %d\n"), tok->val, tok->kind, tok->line, tok->col, children.Count); + delete tok; + for(int i=0; icol) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_all(indent+4, idx == last_idx); + } +} + +void SynTree::dump_pruned(int indent, bool isLast) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok->col) { + printIndent(indent); + wprintf(_SC("%s\t%d\t%d\t%d\t%") _SFMT _SC("\n"), ((isLast || (last_idx == 0)) ? "= " : " "), tok->line, tok->col, tok->kind, tok->val); + } + else { + if(last_idx == 1) { + if(((SynTree*)children[0])->children.Count == 0) { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + wprintf(_SC("%d\t%d\t%d\t%") _SFMT _SC("\n"), children.Count, tok->line, tok->kind, tok->val); + } + } + if(last_idx) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree*)children[idx])->dump_pruned(indent+indentPlus, idx == last_idx); + } +} + +#endif + + + +#ifndef WITH_STDCPP_LIB +/* +This code is to have an executable without libstd++ library dependency +g++ -g -Wall -fno-rtti -fno-exceptions *.cpp -o YourParser + */ + +// MSVC uses __cdecl calling convention for new/delete :-O +#ifdef _MSC_VER +# define NEWDECL_CALL __cdecl +#else +# define NEWDECL_CALL +#endif + +extern "C" void __cxa_pure_virtual () +{ + puts("__cxa_pure_virtual called\n"); + abort (); +} + +void * NEWDECL_CALL operator new (size_t size) +{ + void *p = malloc (size); + if(!p) + { + puts("not enough memory\n"); + abort (); + } + return p; +} + +void * NEWDECL_CALL operator new [] (size_t size) +{ + return ::operator new(size); +} + +void NEWDECL_CALL operator delete (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete [] (void *p) +{ + if (p) free (p); +} + +void NEWDECL_CALL operator delete (void *p, size_t) +{ + if (p) free (p); +} +#endif //WITH_STDCPP_LIB diff --git a/src/TestSuite/TestWeak_Scanner.cpp b/src/TestSuite/TestWeak_Scanner.cpp new file mode 100644 index 0000000..1aa2ba0 --- /dev/null +++ b/src/TestSuite/TestWeak_Scanner.cpp @@ -0,0 +1,693 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported to C++ by Csaba Balazs, University of Szeged +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ + + +#include +#include +#include "Scanner.h" + + + + +// string handling, wide character + + +wchar_t* coco_string_create(const wchar_t* value) { + return coco_string_create(value, 0); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex) { + int valueLen = 0; + int len = 0; + + if (value) { + valueLen = wcslen(value); + len = valueLen - startIndex; + } + + return coco_string_create(value, startIndex, len); +} + +wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(const wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((_SC('a') <= data[i]) && (data[i] <= _SC('z'))) { + newData[i] = data[i] + (_SC('A') - _SC('a')); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_lower(const wchar_t* data) { + if (!data) { return NULL; } + int dataLen = wcslen(data); + return coco_string_create_lower(data, 0, dataLen); +} + +wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { + if (!data) { return NULL; } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + wchar_t ch = data[startIndex + i]; + if ((_SC('A') <= ch) && (ch <= _SC('Z'))) { + newData[i] = ch - (_SC('A') - _SC('a')); + } + else { newData[i] = ch; } + } + newData[dataLen] = _SC('\0'); + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const int appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(const wchar_t* data, const int value) { + const wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +bool coco_string_equal_nocase(const wchar_t* data1, const wchar_t* data2) { + return wcscasecmp( data1, data2 ) == 0; +} + +bool coco_string_equal_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncmp( data1, data2, size ) == 0; +} + +bool coco_string_equal_nocase_n(const wchar_t* data1, const wchar_t* data2, size_t size) { + return wcsncasecmp( data1, data2, size ) == 0; +} + +int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { + return wcscmp(data1, data2); +} + +unsigned int coco_string_hash(const wchar_t *data) { + unsigned int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + return h; +} + +unsigned int coco_string_hash(const wchar_t *data, size_t size) { + unsigned int h = 0; + if (!data) { return 0; } + for (size_t i=0; i < size; ++i) { + h = (h * 7) ^ data[i]; + } + return h; +} + +#ifndef WITHOUT_WCHAR +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} +#endif + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token *Token::Clone() { + Token *tk = new Token(); + tk->kind = kind; + tk->pos = pos; + tk->col = col; + tk->line = line; + tk->val = coco_string_create(val); + tk->next = next; + return tk; +} + +Token::~Token() { + coco_string_delete(val); +} + +Buffer::Buffer(FILE* s, bool isUserStream) { +// ensure binary read on windows +#if _MSC_VER >= 1300 + _setmode(_fileno(s), _O_BINARY); +#endif + stream = s; this->isUserStream = isUserStream; + if (CanSeek()) { + fseek(s, 0, SEEK_END); + fileLen = ftell(s); + fseek(s, 0, SEEK_SET); + bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } else { + fileLen = bufLen = bufStart = 0; + } + bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + bufCapacity = b->bufCapacity; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + bufPos = b->bufPos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::Buffer(const unsigned char* buf, int len) { + this->buf = new unsigned char[len]; + memcpy(this->buf, buf, len*sizeof(unsigned char)); + bufStart = 0; + bufCapacity = bufLen = len; + fileLen = len; + bufPos = 0; + stream = NULL; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + fclose(stream); + stream = NULL; + } +} + +int Buffer::Read() { + if (bufPos < bufLen) { + return buf[bufPos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +// beg .. begin, zero-based, inclusive, in byte +// end .. end, zero-based, exclusive, in byte +wchar_t* Buffer::GetString(int beg, int end) { + int len = 0; + wchar_t *buf = new wchar_t[end - beg]; + int oldPos = GetPos(); + SetPos(beg); + while (GetPos() < end) buf[len++] = (wchar_t) Read(); + SetPos(oldPos); + buf[len] = 0; + return buf; +} + +int Buffer::GetPos() { + return bufPos + bufStart; +} + +void Buffer::SetPos(int value) { + if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); + } + + if ((value < 0) || (value > fileLen)) { + wprintf(_SC("--- buffer out of bounds access, position: %d\n"), value); + exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer + bufPos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + fseek(stream, value, SEEK_SET); + bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); + bufStart = value; bufPos = 0; + } else { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +int Buffer::ReadNextStreamChunk() { + int free = bufCapacity - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); + delete [] buf; + buf = newBuf; + free = bufLen; + } + int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + +bool Buffer::CanSeek() { + return (stream != NULL) && (ftell(stream) != -1); +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const unsigned char* buf, int len) { + buffer = new Buffer(buf, len); + parseFileName = NULL; + Init(); +} + +Scanner::Scanner(const wchar_t* fileName) { + FILE* stream; + parseFileName = coco_string_create_char(fileName); + if ((stream = fopen(parseFileName, "rb")) == NULL) { + wprintf(_SC("--- Cannot open file %") _SFMT _SC("\n"), parseFileName); + exit(1); + } + buffer = new Buffer(stream, false); + Init(); +} + +Scanner::Scanner(FILE* s) { + buffer = new Buffer(s, true); + parseFileName = NULL; + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; + if(parseFileName) coco_string_delete(parseFileName); +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 10; + noSym = 10; + start.set(97, 1); + start.set(98, 2); + start.set(99, 3); + start.set(100, 4); + start.set(101, 5); + start.set(102, 6); + start.set(103, 7); + start.set(104, 8); + start.set(105, 9); + start.set(Buffer::EoF, -1); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too small COCO_HEAP_BLOCK_SIZE\n")); + exit(1); + } + + pos = -1; line = 1; col = 0; charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(_SC("Illegal byte order mark at start of file")); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer->Read(); col++; charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == _SC('\r') && buffer->Peek() != _SC('\n')) ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete [] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) { + tval[tlen++] = ch; + NextCh(); + } +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > COCO_HEAP_BLOCK_SIZE) { + wprintf(_SC("--- Too long token value\n")); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = _SC('\0'); +} + +Token* Scanner::NextToken() { + for(;;) { + while (ch == _SC(' ') || + false + ) NextCh(); + + break; + } + + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + {t->kind = 1 /* a */; break;} + case 2: + {t->kind = 2 /* b */; break;} + case 3: + {t->kind = 3 /* c */; break;} + case 4: + {t->kind = 4 /* d */; break;} + case 5: + {t->kind = 5 /* e */; break;} + case 6: + {t->kind = 6 /* f */; break;} + case 7: + {t->kind = 7 /* g */; break;} + case 8: + {t->kind = 8 /* h */; break;} + case 9: + {t->kind = 9 /* i */; break;} + + } + AppendVal(t); + return t; +} + +void Scanner::SetScannerBehindT() { + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; charPos = t->charPos; + for (int i = 0; i < tlen; i++) NextCh(); +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + do { + if (pt->next == NULL) { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + diff --git a/src/TestSuite/TestWeak_Trace.txt b/src/TestSuite/TestWeak_Trace.txt new file mode 100644 index 0000000..5601707 --- /dev/null +++ b/src/TestSuite/TestWeak_Trace.txt @@ -0,0 +1,100 @@ +Graph nodes: +---------------------------------------------------- + n type name next down sub pos line + val code +---------------------------------------------------- + 0 eps 0 0 + 1 nt A 2 20 + 2 nt B 3 20 + 3 nt C 0 20 + 4 t a 5 21 + 5 wt b 6 21 + 6 t c 0 21 + 7 t a 10 22 + 8 wt b 9 22 + 9 t c -10 22 + 10 iter 11 0 8 22 + 11 t d 0 22 + 12 t a 14 23 + 13 wt b -14 23 + 14 iter 15 0 13 23 + 15 t c 0 23 + + +First & follow symbols: +---------------------- + +Test +first: a +follow: EOF + +A +first: a +follow: a + +B +first: a +follow: a + +C +first: a +follow: EOF + + +ANY and SYNC sets: +----------------- + +---------- states ---------- + 0: _SC('a') 1 + _SC('b') 2 + _SC('c') 3 + _SC('d') 4 + _SC('e') 5 + _SC('f') 6 + _SC('g') 7 + _SC('h') 8 + _SC('i') 9 +E(a ) 1: +E(b ) 2: +E(c ) 3: +E(d ) 4: +E(e ) 5: +E(f ) 6: +E(g ) 7: +E(h ) 8: +E(i ) 9: + +---------- character classes ---------- + +Symbol Table: +------------ + + nr name typ hasAt graph del line tokenKind + 0 EOF t false 0 fixedToken + 1 a t false 8 fixedToken + 2 b t false 9 fixedToken + 3 c t false 10 fixedToken + 4 d t false 11 fixedToken + 5 e t false 12 fixedToken + 6 f t false 13 fixedToken + 7 g t false 14 fixedToken + 8 h t false 15 fixedToken + 9 i t false 16 fixedToken + 10 ??? t false 0 fixedToken + 0 Test nt false 1 false 20 fixedToken + 1 A nt false 4 false 21 fixedToken + 2 B nt false 7 false 22 fixedToken + 3 C nt false 12 false 23 fixedToken + +Literal Tokens: +-------------- +_h = "h". +_i = "i". +_f = "f". +_g = "g". +_a = "a". +_d = "d". +_e = "e". +_b = "b". +_c = "c". + diff --git a/src/TestSuite/TestWeak_output.txt b/src/TestSuite/TestWeak_output.txt new file mode 100644 index 0000000..904c970 --- /dev/null +++ b/src/TestSuite/TestWeak_output.txt @@ -0,0 +1,5 @@ +Coco/R (Sep 6, 2007) +checking +parser + scanner generated +trace output is in trace.txt +0 errors detected diff --git a/src/TestSuite/check.bat b/src/TestSuite/check.bat new file mode 100644 index 0000000..15a11ef --- /dev/null +++ b/src/TestSuite/check.bat @@ -0,0 +1,5 @@ +@..\Coco -frames .. %1.ATG > output.txt +@Compare trace.txt %1_Trace.txt %1_Trace +@Compare output.txt %1_Output.txt %1_Compilation 22 +@Compare Parser.cs %1_Parser.cs %1_Parser +@Compare Scanner.cs %1_Scanner.cs %1_Scanner diff --git a/src/TestSuite/check.sh b/src/TestSuite/check.sh new file mode 100755 index 0000000..994457b --- /dev/null +++ b/src/TestSuite/check.sh @@ -0,0 +1,26 @@ +#!/bin/sh +#myvalgrind --leak-check=full +../Coco -frames .. $1.ATG > output.txt + +#cp trace.txt $1_Trace.txt +if cmp trace.txt $1_Trace.txt +then + echo $1_Trace passed +fi + +if cmp output.txt $1_Output.txt +then + echo $1_Compilation passed +fi + +#cp Parser.cpp $1_Parser.cpp +if cmp Parser.cpp $1_Parser.cpp +then + echo $1_Parser passed +fi + +#cp Scanner.cpp $1_Scanner.cpp +if cmp Scanner.cpp $1_Scanner.cpp +then + echo $1_Scanner passed +fi diff --git a/src/TestSuite/checkall.bat b/src/TestSuite/checkall.bat new file mode 100644 index 0000000..447e6f5 --- /dev/null +++ b/src/TestSuite/checkall.bat @@ -0,0 +1,24 @@ +echo off +call check TestAlts +call check TestOpts +call check TestOpts1 +call check TestIters +call check TestEps +call check TestAny +call check TestAny1 +call check TestSync +call check TestSem +call check TestWeak +call check TestChars +call check TestTokens +call checkerr TestTokens1 +call check TestComments +call check TestDel +call checkerr TestTerminalizable +call checkerr TestComplete +call checkerr TestReached +call checkerr TestCircular +call check TestLL1 +call check TestResOK +call checkerr TestResIllegal +call check TestCasing diff --git a/src/TestSuite/checkall.sh b/src/TestSuite/checkall.sh new file mode 100755 index 0000000..834b9f8 --- /dev/null +++ b/src/TestSuite/checkall.sh @@ -0,0 +1,24 @@ +#!/bin/sh +./check.sh TestAlts +./check.sh TestAny +./check.sh TestAny1 +./check.sh TestCasing +./check.sh TestChars +./check.sh TestComments +./check.sh TestDel +./check.sh TestEps +./check.sh TestIters +./check.sh TestLL1 +./check.sh TestOpts +./check.sh TestOpts1 +./check.sh TestResOK +./check.sh TestSem +./check.sh TestSync +./check.sh TestTokens +./check.sh TestWeak +./checkerr.sh TestCircular +./checkerr.sh TestComplete +./checkerr.sh TestReached +./checkerr.sh TestResIllegal +./checkerr.sh TestTerminalizable +./checkerr.sh TestTokens1 diff --git a/src/TestSuite/checkerr.bat b/src/TestSuite/checkerr.bat new file mode 100644 index 0000000..5675692 --- /dev/null +++ b/src/TestSuite/checkerr.bat @@ -0,0 +1,3 @@ +..\Coco -frames .. %1.ATG > output.txt +Compare trace.txt %1_Trace.txt %1_Trace +Compare output.txt %1_Output.txt %1_Compilation 22 diff --git a/src/TestSuite/checkerr.sh b/src/TestSuite/checkerr.sh new file mode 100755 index 0000000..e71ea9f --- /dev/null +++ b/src/TestSuite/checkerr.sh @@ -0,0 +1,14 @@ +#!/bin/sh +#myvalgrind --leak-check=full +../Coco -frames .. $1.ATG > output.txt + +#cp trace.txt $1_Trace.txt +if cmp trace.txt $1_Trace.txt +then + echo $1_Trace passed +fi + +if cmp output.txt $1_Output.txt +then + echo $1_Compilation passed +fi diff --git a/src/TestSuite/compile.bat b/src/TestSuite/compile.bat new file mode 100644 index 0000000..9f07508 --- /dev/null +++ b/src/TestSuite/compile.bat @@ -0,0 +1,4 @@ +..\Coco -frames .. %1.ATG > %1_Output.txt +copy trace.txt %1_Trace.txt +copy Parser.cs %1_Parser.cs +copy Scanner.cs %1_Scanner.cs diff --git a/src/TestSuite/compile.sh b/src/TestSuite/compile.sh new file mode 100644 index 0000000..74176a2 --- /dev/null +++ b/src/TestSuite/compile.sh @@ -0,0 +1,5 @@ +#!/bin/sh +../Coco -frames .. $1.ATG > $1_Output.txt +cp trace.txt $1_Trace.txt +cp Parser.cpp $1_Parser.cpp +cp Scanner.cpp $1_Scanner.cpp diff --git a/src/TestSuite/compileall.bat b/src/TestSuite/compileall.bat new file mode 100644 index 0000000..c6bdd48 --- /dev/null +++ b/src/TestSuite/compileall.bat @@ -0,0 +1,24 @@ +echo off +call compile TestAlts +call compile TestOpts +call compile TestOpts1 +call compile TestIters +call compile TestEps +call compile TestAny +call compile TestAny1 +call compile TestSync +call compile TestSem +call compile TestWeak +call compile TestChars +call compile TestTokens +call compile TestTokens1 +call compile TestComments +call compile TestDel +call compile TestTerminalizable +call compile TestComplete +call compile TestReached +call compile TestCircular +call compile TestLL1 +call compile TestResOK +call compile TestResIllegal +call compile TestCasing diff --git a/src/TestSuite/compileall.sh b/src/TestSuite/compileall.sh new file mode 100644 index 0000000..7693afd --- /dev/null +++ b/src/TestSuite/compileall.sh @@ -0,0 +1,24 @@ +#!/bin/sh +./compile.sh TestAlts +./compile.sh TestOpts +./compile.sh TestOpts1 +./compile.sh TestIters +./compile.sh TestEps +./compile.sh TestAny +./compile.sh TestAny1 +./compile.sh TestSync +./compile.sh TestSem +./compile.sh TestWeak +./compile.sh TestChars +./compile.sh TestTokens +./compile.sh TestTokens1 +./compile.sh TestComments +./compile.sh TestDel +./compile.sh TestTerminalizable +./compile.sh TestComplete +./compile.sh TestReached +./compile.sh TestCircular +./compile.sh TestLL1 +./compile.sh TestResOK +./compile.sh TestResIllegal +./compile.sh TestCasing diff --git a/src/TestSuite/readme.txt b/src/TestSuite/readme.txt new file mode 100644 index 0000000..937ff5d --- /dev/null +++ b/src/TestSuite/readme.txt @@ -0,0 +1,36 @@ + Test Suite for Coco/R + Hanspeter Mössenböck + +This directory contains a simple test suite, which checks if changes to Coco/R still +lead to the same results. It works as follows: + +compile.bat Sample +runs Coco/R for the grammar Sample.ATG, which has all trace switches enabled so that +Coco/R dumps its internal data structures to the file trace.txt. It then copies the +following files: + Scanner.cs => TestSample_Scanner.cs + Parser.cs => TestSample_Parser.cs + console output => TestSample_Output.txt + trace.txt => TestSample_Trace.txt + + +compileall.bat +runs compile.bat for all ATG files in this directory. You should run this command +once before you start making changes to Coco/R. + +check.bat Sample +runs Coco/R again on the file Sample.ATG and compares the generated scanner, parser, +trace.txt and console output to the previously saved files. If the files are identical the +command prints the message + ++ passed TestSample_Compilation + +otherwise it prints the message + -- failed TestSample_Compilation + + +checkall.bat +runs check.bat for all ATG files in this directory. You should run this command after +every modification to Coco/R. The generated ++ passed or -- failed messages +indicate if the modification led to any differences in the output or in the internal data +structures of Coco/R. + \ No newline at end of file diff --git a/src/TestSuite/zipall.bat b/src/TestSuite/zipall.bat new file mode 100644 index 0000000..0e5b914 --- /dev/null +++ b/src/TestSuite/zipall.bat @@ -0,0 +1 @@ +jar -cfM TestSuite.zip readme.txt *.frame *.ATG *.cs *.txt *.bat \ No newline at end of file diff --git a/src/TestSuite/zipall.sh b/src/TestSuite/zipall.sh new file mode 100644 index 0000000..3ccc9dd --- /dev/null +++ b/src/TestSuite/zipall.sh @@ -0,0 +1 @@ +jar -cfM TestSuite.zip readme.txt *.frame *.ATG *.cpp *.txt *.bat *.sh diff --git a/src/build-wasm.sh b/src/build-wasm.sh new file mode 100644 index 0000000..0981c68 --- /dev/null +++ b/src/build-wasm.sh @@ -0,0 +1,4 @@ +#!/bin/sh +#emsdk-env +em++ -Wall -O2 -m32 -fno-rtti -fno-exceptions *.cpp -o coco-release-emscripten-32.bc +[ -e coco-release-emscripten-32.bc ] && emcc coco-release-emscripten-32.bc -o coco-wasm.html diff --git a/src/mk-cocor-cpp-amalgamation.lua b/src/mk-cocor-cpp-amalgamation.lua new file mode 100644 index 0000000..acbfec8 --- /dev/null +++ b/src/mk-cocor-cpp-amalgamation.lua @@ -0,0 +1,105 @@ +local base_dir = "/home/mingo/dev/c/A_grammars/CocoR-CPP/src/"; +local includes_base = {} +local sq_sources = [==[ +Action.cpp +BitArray.cpp +CharClass.cpp +CharSet.cpp +Comment.cpp +DFA.cpp +Generator.cpp +HashTable.cpp +Melted.cpp +Node.cpp +Parser.cpp +ParserGen.cpp +Position.cpp +Scanner.cpp +SortedList.cpp +State.cpp +StringBuilder.cpp +Symbol.cpp +Tab.cpp +Target.cpp +Coco.cpp +]==]; + +local included = {}; +local inc_sys = {}; +local inc_sys_count = 0; +local out = io.stdout + +function CopyWithInline(prefix, filename) + if included[filename] then return end + included[filename] = true + print('//--Start of', filename); + --if(filename:match("luac?.c")) + local inp = io.open(prefix .. filename, "r") + if not inp then + for idx in ipairs(includes_base) do + local sdir = includes_base[idx] + local fn = prefix .. sdir .. filename + --print(fn) + inp = io.open(fn, "r") + if inp then break end + end + end + if not inp then + if filename == "fzn_picat_sat_bc.h" then + print('//--End of', filename); + end + else + assert(inp) + for line in inp:lines() do + if line:match('#define LUA_USE_READLINE') then + out:write("//" .. line .. "\n") + else + local inc = line:match('#include%s+(["<].-)[">]') + if inc then + out:write("//" .. line .. "\n") + if inc:sub(1,1) == '"' or inc:match('[<"]sq') then + CopyWithInline(prefix, inc:sub(2)) + else + local fn = inc:sub(2) + if inc_sys[fn] == null then + inc_sys_count = inc_sys_count +1 + inc_sys[fn] = inc_sys_count + end + end + else + out:write(line .. "\n") + end + end + end + print('//--End of', filename); + end +end + +print([==[ +#ifdef WITH_COSMOPOLITAN + +STATIC_STACK_SIZE(0x400000); + +#endif + +#ifndef __COSMOPOLITAN__ +//g++ -g -Wall -Wextra -DWITHOUT_WCHAR -fno-rtti -fno-exceptions cocor-am.cpp -o Coco +#include //3 +#include //7 +#include //5 +#include //1 +#include //10 +#include //9 +#include //4 +#include //8 +#include //2 +//#include //6 + +#endif +]==]) + +local prefix = base_dir; local src_files = sq_sources; +for filename in src_files:gmatch('([^\n]+)') do + CopyWithInline(prefix, filename); +end +--for k, v in pairs(inc_sys) do print("#include <" .. k .. "> //" .. v ) end diff --git a/src/mk-cocor.sh b/src/mk-cocor.sh new file mode 100644 index 0000000..bdd7a3e --- /dev/null +++ b/src/mk-cocor.sh @@ -0,0 +1,14 @@ +# run gcc compiler in freestanding mode +optim=-O2 +g++ -g $optim -static -fno-pie -nostdlib -nostdinc \ + -fno-omit-frame-pointer -pg -mnop-mcount -mno-tls-direct-seg-refs \ + -fno-exceptions -fno-rtti -Wall -fno-strict-aliasing \ + -o cocor.com.dbg cocor-am.cpp \ + -DWITHOUT_WCHAR \ + -DWITH_COSMOPOLITAN \ + -Wl,--gc-sections -fuse-ld=bfd \ + -Wl,-T,ape.lds crt.o ape-no-modify-self.o cosmopolitan.a \ + -include cosmopolitan.h + +objcopy -S -O binary cocor.com.dbg cocor.com +