diff --git a/contributors.txt b/contributors.txt index e38132548..3aef9c9ed 100644 --- a/contributors.txt +++ b/contributors.txt @@ -55,3 +55,4 @@ YYYY/MM/DD, github id, Full name, email 2012/09/17, ksgokul, Gokulakannan Somasundaram, gokul007@gmail.com 2012/11/22, sharwell, Sam Harwell, sam@tunnelvisionlabs.com 2013/02/19, murrayju, Justin Murray, murrayju@addpcs.com +2013/04/17, ibre5041, Ivan Brezina, ibre5041@ibrezina.net diff --git a/runtime/Cpp/include/antlr3.hpp b/runtime/Cpp/include/antlr3.hpp new file mode 100755 index 000000000..4e40ba48a --- /dev/null +++ b/runtime/Cpp/include/antlr3.hpp @@ -0,0 +1,60 @@ +#ifndef _ANTLR3_HPP +#define _ANTLR3_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include + +#include "antlr3defs.hpp" + +#include "antlr3errors.hpp" +#include "antlr3memory.hpp" + +#include "antlr3recognizersharedstate.hpp" +#include "antlr3baserecognizer.hpp" +#include "antlr3bitset.hpp" +#include "antlr3collections.hpp" +#include "antlr3commontoken.hpp" +#include "antlr3commontree.hpp" +#include "antlr3commontreeadaptor.hpp" +#include "antlr3cyclicdfa.hpp" +#include "antlr3debugeventlistener.hpp" +#include "antlr3exception.hpp" +#include "antlr3filestream.hpp" +#include "antlr3intstream.hpp" +#include "antlr3input.hpp" +#include "antlr3tokenstream.hpp" +#include "antlr3commontreenodestream.hpp" +#include "antlr3lexer.hpp" +#include "antlr3parser.hpp" +#include "antlr3rewritestreams.hpp" +#include "antlr3traits.hpp" +#include "antlr3treeparser.hpp" + +#endif diff --git a/runtime/Cpp/include/antlr3.inl b/runtime/Cpp/include/antlr3.inl new file mode 100755 index 000000000..79974afc1 --- /dev/null +++ b/runtime/Cpp/include/antlr3.inl @@ -0,0 +1,9 @@ +ANTLR_BEGIN_NAMESPACE() + +//static +ANTLR_INLINE void GenericStream::displayRecognitionError( const StringType& str ) +{ + fprintf(stderr, str.c_str() ); +} + +ANTLR_END_NAMESPACE() \ No newline at end of file diff --git a/runtime/Cpp/include/antlr3baserecognizer.hpp b/runtime/Cpp/include/antlr3baserecognizer.hpp new file mode 100755 index 000000000..f125400b9 --- /dev/null +++ b/runtime/Cpp/include/antlr3baserecognizer.hpp @@ -0,0 +1,512 @@ +/** \file + * Defines the basic structure to support recognizing by either a lexer, + * parser, or tree parser. + * \addtogroup BaseRecognizer + * @{ + */ +#ifndef _ANTLR3_BASERECOGNIZER_HPP +#define _ANTLR3_BASERECOGNIZER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" +#include "antlr3collections.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** \brief Base tracking context structure for all types of + * recognizers. + */ +template< class ImplTraits, class StreamType > +class BaseRecognizer : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename ComponentTypeFinder::ComponentType SuperType; + typedef typename StreamType::UnitType UnitType; + typedef typename ImplTraits::template ExceptionBaseType ExceptionBaseType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::template RecognizerSharedStateType RecognizerSharedStateType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename ImplTraits::LexerType LexerType; + typedef typename ImplTraits::ParserType ParserType; + typedef typename ImplTraits::TreeParserType TreeParserType; + + typedef typename AllocPolicyType::template StackType StringStackType; + typedef typename AllocPolicyType::template ListType StringListType; + +private: + /// A pointer to the shared recognizer state, such that multiple + /// recognizers can use the same inputs streams and so on (in + /// the case of grammar inheritance for instance. + /// + RecognizerSharedStateType* m_state; + + /// If set to something other than NULL, then this structure is + /// points to an instance of the debugger interface. In general, the + /// debugger is only referenced internally in recovery/error operations + /// so that it does not cause overhead by having to check this pointer + /// in every function/method + /// + DebugEventListenerType* m_debugger; + + +public: + BaseRecognizer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); + + SuperType* get_super(); + RecognizerSharedStateType* get_state() const; + DebugEventListenerType* get_debugger() const; + void set_state( RecognizerSharedStateType* state ); + void set_debugger( DebugEventListenerType* debugger ); + + /// Match current input symbol against ttype. Upon error, do one token + /// insertion or deletion if possible. + /// To turn off single token insertion or deletion error + /// recovery, override mismatchRecover() and have it call + /// plain mismatch(), which does not recover. Then any error + /// in a rule will cause an exception and immediate exit from + /// rule. Rule would recover by resynchronizing to the set of + /// symbols that can follow rule ref. + /// + const UnitType* match(ANTLR_UINT32 ttype, BitsetListType* follow); + + /// Consumes the next token, whatever it is, and resets the recognizer state + /// so that it is not in error. + /// + /// \param recognizer + /// Recognizer context pointer + /// + void matchAny(); + + /// function that decides if the token ahead of the current one is the + /// one we were loking for, in which case the curernt one is very likely extraneous + /// and can be reported that way. + /// + bool mismatchIsUnwantedToken(IntStreamType* input, ANTLR_UINT32 ttype); + + /// function that decides if the current token is one that can logically + /// follow the one we were looking for, in which case the one we were looking for is + /// probably missing from the input. + /// + bool mismatchIsMissingToken(IntStreamType* input, BitsetListType* follow); + + /// Factor out what to do upon token mismatch so tree parsers can behave + /// differently. Override and call mismatchRecover(input, ttype, follow) + /// to get single token insertion and deletion. Use this to turn off + /// single token insertion and deletion. Override mismatchRecover + /// to call this instead. + /// + /// \remark mismatch only works for parsers and must be overridden for anything else. + /// + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); + + /// Report a recognition problem. + /// + /// This method sets errorRecovery to indicate the parser is recovering + /// not parsing. Once in recovery mode, no errors are generated. + /// To get out of recovery mode, the parser must successfully match + /// a token (after a resync). So it will go: + /// + /// 1. error occurs + /// 2. enter recovery mode, report error + /// 3. consume until token found in resynch set + /// 4. try to resume parsing + /// 5. next match() will reset errorRecovery mode + /// + /// If you override, make sure to update errorCount if you care about that. + /// + void reportError(); + void reportError( ClassForwarder ); + template + void reportError( ClassForwarder ); + + /** Function that is called to display a recognition error message. You may + * override this function independently of (*reportError)() above as that function calls + * this one to do the actual exception printing. + */ + void displayRecognitionError(ANTLR_UINT8** tokenNames); + + /// Get number of recognition errors (lexer, parser, tree parser). Each + /// recognizer tracks its own number. So parser and lexer each have + /// separate count. Does not count the spurious errors found between + /// an error and next valid token match + /// + /// \see reportError() + /// + ANTLR_UINT32 getNumberOfSyntaxErrors(); + + /** Function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ + void recover(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void beginResync(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void endResync(); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void beginBacktrack(ANTLR_UINT32 level); + + /** function that is a hook to listen to token consumption during error recovery. + * This is mainly used by the debug parser to send events to the listener. + */ + void endBacktrack(ANTLR_UINT32 level, bool successful); + + /// Compute the error recovery set for the current rule. + /// Documentation below is from the Java implementation. + /// + /// During rule invocation, the parser pushes the set of tokens that can + /// follow that rule reference on the stack; this amounts to + /// computing FIRST of what follows the rule reference in the + /// enclosing rule. This local follow set only includes tokens + /// from within the rule; i.e., the FIRST computation done by + /// ANTLR stops at the end of a rule. + // + /// EXAMPLE + // + /// When you find a "no viable alt exception", the input is not + /// consistent with any of the alternatives for rule r. The best + /// thing to do is to consume tokens until you see something that + /// can legally follow a call to r *or* any rule that called r. + /// You don't want the exact set of viable next tokens because the + /// input might just be missing a token--you might consume the + /// rest of the input looking for one of the missing tokens. + /// + /// Consider grammar: + /// + /// a : '[' b ']' + /// | '(' b ')' + /// ; + /// b : c '^' INT ; + /// c : ID + /// | INT + /// ; + /// + /// At each rule invocation, the set of tokens that could follow + /// that rule is pushed on a stack. Here are the various "local" + /// follow sets: + /// + /// FOLLOW(b1_in_a) = FIRST(']') = ']' + /// FOLLOW(b2_in_a) = FIRST(')') = ')' + /// FOLLOW(c_in_b) = FIRST('^') = '^' + /// + /// Upon erroneous input "[]", the call chain is + /// + /// a -> b -> c + /// + /// and, hence, the follow context stack is: + /// + /// depth local follow set after call to rule + /// 0 a (from main()) + /// 1 ']' b + /// 3 '^' c + /// + /// Notice that ')' is not included, because b would have to have + /// been called from a different context in rule a for ')' to be + /// included. + /// + /// For error recovery, we cannot consider FOLLOW(c) + /// (context-sensitive or otherwise). We need the combined set of + /// all context-sensitive FOLLOW sets--the set of all tokens that + /// could follow any reference in the call chain. We need to + /// resync to one of those tokens. Note that FOLLOW(c)='^' and if + /// we resync'd to that token, we'd consume until EOF. We need to + /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + /// In this case, for input "[]", LA(1) is in this set so we would + /// not consume anything and after printing an error rule c would + /// return normally. It would not find the required '^' though. + /// At this point, it gets a mismatched token error and throws an + /// exception (since LA(1) is not in the viable following token + /// set). The rule exception handler tries to recover, but finds + /// the same recovery set and doesn't consume anything. Rule b + /// exits normally returning to rule a. Now it finds the ']' (and + /// with the successful match exits errorRecovery mode). + /// + /// So, you can see that the parser walks up call chain looking + /// for the token that was a member of the recovery set. + /// + /// Errors are not generated in errorRecovery mode. + /// + /// ANTLR's error recovery mechanism is based upon original ideas: + /// + /// "Algorithms + Data Structures = Programs" by Niklaus Wirth + /// + /// and + /// + /// "A note on error recovery in recursive descent parsers": + /// http://portal.acm.org/citation.cfm?id=947902.947905 + /// + /// Later, Josef Grosch had some good ideas: + /// + /// "Efficient and Comfortable Error Recovery in Recursive Descent + /// Parsers": + /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + /// + /// Like Grosch I implemented local FOLLOW sets that are combined + /// at run-time upon error to avoid overhead during parsing. + /// + BitsetType* computeErrorRecoverySet(); + + /// Compute the context-sensitive FOLLOW set for current rule. + /// Documentation below is from the Java runtime. + /// + /// This is the set of token types that can follow a specific rule + /// reference given a specific call chain. You get the set of + /// viable tokens that can possibly come next (look ahead depth 1) + /// given the current call chain. Contrast this with the + /// definition of plain FOLLOW for rule r: + /// + /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + /// + /// where x in T* and alpha, beta in V*; T is set of terminals and + /// V is the set of terminals and non terminals. In other words, + /// FOLLOW(r) is the set of all tokens that can possibly follow + /// references to r in///any* sentential form (context). At + /// runtime, however, we know precisely which context applies as + /// we have the call chain. We may compute the exact (rather + /// than covering superset) set of following tokens. + /// + /// For example, consider grammar: + /// + /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + /// | "return" expr '.' + /// ; + /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} + /// | '(' expr ')' + /// ; + /// + /// The FOLLOW sets are all inclusive whereas context-sensitive + /// FOLLOW sets are precisely what could follow a rule reference. + /// For input input "i=(3);", here is the derivation: + /// + /// stat => ID '=' expr ';' + /// => ID '=' atom ('+' atom)* ';' + /// => ID '=' '(' expr ')' ('+' atom)* ';' + /// => ID '=' '(' atom ')' ('+' atom)* ';' + /// => ID '=' '(' INT ')' ('+' atom)* ';' + /// => ID '=' '(' INT ')' ';' + /// + /// At the "3" token, you'd have a call chain of + /// + /// stat -> expr -> atom -> expr -> atom + /// + /// What can follow that specific nested ref to atom? Exactly ')' + /// as you can see by looking at the derivation of this specific + /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + /// + /// You want the exact viable token set when recovering from a + /// token mismatch. Upon token mismatch, if LA(1) is member of + /// the viable next token set, then you know there is most likely + /// a missing token in the input stream. "Insert" one by just not + /// throwing an exception. + /// + BitsetType* computeCSRuleFollow(); + + /// Compute the current followset for the input stream. + /// + BitsetType* combineFollows(bool exact); + + /// Attempt to recover from a single missing or extra token. + /// + /// EXTRA TOKEN + /// + /// LA(1) is not what we are looking for. If LA(2) has the right token, + /// however, then assume LA(1) is some extra spurious token. Delete it + /// and LA(2) as if we were doing a normal match(), which advances the + /// input. + /// + /// MISSING TOKEN + /// + /// If current token is consistent with what could come after + /// ttype then it is ok to "insert" the missing token, else throw + /// exception For example, Input "i=(3;" is clearly missing the + /// ')'. When the parser returns from the nested call to expr, it + /// will have call chain: + /// + /// stat -> expr -> atom + /// + /// and it will be trying to match the ')' at this point in the + /// derivation: + /// + /// => ID '=' '(' INT ')' ('+' atom)* ';' + /// ^ + /// match() will see that ';' doesn't match ')' and report a + /// mismatched token error. To recover, it sees that LA(1)==';' + /// is in the set of tokens that can follow the ')' token + /// reference in rule atom. It can assume that you forgot the ')'. + /// + /// The exception that was passed in, in the java implementation is + /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the + /// error flag and rules cascade back when this is set. + /// + const UnitType* recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow); + + /** Function that recovers from a mismatched set in the token stream, in a similar manner + * to (*recoverFromMismatchedToken) + */ + const UnitType* recoverFromMismatchedSet(BitsetListType* follow); + + /** common routine to handle single token insertion for recovery functions. + */ + /// This code is factored out from mismatched token and mismatched set + /// recovery. It handles "single token insertion" error recovery for + /// both. No tokens are consumed to recover from insertions. Return + /// true if recovery was possible else return false. + /// + bool recoverFromMismatchedElement(BitsetListType* follow); + + /** function that consumes input until the next token matches + * the given token. + */ + void consumeUntil(ANTLR_UINT32 tokenType); + + /** function that consumes input until the next token matches + * one in the given set. + */ + void consumeUntilSet(BitsetType* set); + + /** function that returns an ANTLR3_LIST of the strings that identify + * the rules in the parser that got you to this point. Can be overridden by installing your + * own function set. + * + * \todo Document how to override invocation stack functions. + */ + StringStackType getRuleInvocationStack(); + StringStackType getRuleInvocationStackNamed(ANTLR_UINT8* name); + + /** function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of + * string token names. As this is mostly used in string template processing it may not be useful + * in the C runtime. + */ + StringListType toStrings( const StringListType& ); + + /** function to return whether the rule has parsed input starting at the supplied + * start index before. If the rule has not parsed input starting from the supplied start index, + * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point + * then it will return the point where it last stopped parsing after that start point. + */ + ANTLR_MARKER getRuleMemoization( ANTLR_INTKEY ruleIndex, + ANTLR_MARKER ruleParseStart); + + /** function that determines whether the rule has parsed input at the current index + * in the input stream + */ + bool alreadyParsedRule(ANTLR_MARKER ruleIndex); + + /** Function that records whether the rule has parsed the input at a + * current position successfully or not. + */ + void memoize(ANTLR_MARKER ruleIndex, + ANTLR_MARKER ruleParseStart); + + /// Function that returns the current input symbol. + /// The is placed into any label for the associated token ref; e.g., x=ID. Token + /// and tree parsers need to return different objects. Rather than test + /// for input stream type or change the IntStream interface, I use + /// a simple method to ask the recognizer to tell me what the current + /// input symbol is. + /// + /// This is ignored for lexers and the lexer implementation of this + /// function should return NULL. + /// + const UnitType* getCurrentInputSymbol(IntStreamType* istream); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder); + const UnitType* getCurrentInputSymbol(IntStreamType* istream, ClassForwarder); + + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + /// + UnitType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow); + + /** Function that returns whether the supplied grammar function + * will parse the current input stream or not. This is the way that syntactic + * predicates are evaluated. Unlike java, C is perfectly happy to invoke code + * via a pointer to a function (hence that's what all the ANTLR3 C interfaces + * do. + */ + template + bool synpred( ClassForwarder ); + + //In place of exConstruct, just directly instantiate the Exception Object + + /** Reset the recognizer + */ + void reset(); + void reset( ClassForwarder ); + template + void reset( ClassForwarder ); + + void exConstruct(); + + ~BaseRecognizer(); + +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3baserecognizer.inl" + +/// @} +/// + +#endif /* _ANTLR3_BASERECOGNIZER_H */ + diff --git a/runtime/Cpp/include/antlr3baserecognizer.inl b/runtime/Cpp/include/antlr3baserecognizer.inl new file mode 100755 index 000000000..5d5acbfc2 --- /dev/null +++ b/runtime/Cpp/include/antlr3baserecognizer.inl @@ -0,0 +1,919 @@ +ANTLR_BEGIN_NAMESPACE() + +template< class ImplTraits, class StreamType > +BaseRecognizer::BaseRecognizer(ANTLR_UINT32 sizeHint, + RecognizerSharedStateType* state) +{ + m_debugger = NULL; + + // If we have been supplied with a pre-existing recognizer state + // then we just install it, otherwise we must create one from scratch + // + if (state == NULL) + { + m_state = new RecognizerSharedStateType(); + m_state->set_sizeHint( sizeHint ); + } + else + { + // Install the one we were given, and do not reset it here + // as it will either already have been initialized or will + // be in a state that needs to be preserved. + // + m_state = state; + } +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer::SuperType* BaseRecognizer::get_super() +{ + return static_cast(this); +} + +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer::RecognizerSharedStateType* BaseRecognizer::get_state() const +{ + return m_state; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE typename BaseRecognizer::DebugEventListenerType* BaseRecognizer::get_debugger() const +{ + return m_debugger; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer::set_state( RecognizerSharedStateType* state ) +{ + m_state = state; +} +template< class ImplTraits, class StreamType > +ANTLR_INLINE void BaseRecognizer::set_debugger( DebugEventListenerType* debugger ) +{ + m_debugger = debugger; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::match(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_istream(); + + // Pick up the current input token/node for assignment to labels + // + const UnitType* matchedSymbol = this->getCurrentInputSymbol(is); + + if (is->_LA(1) == ttype) + { + // The token was the one we were told to expect + // + is->consume(); // Consume that token from the stream + m_state->set_errorRecovery(false); // Not in error recovery now (if we were) + m_state->set_failed(false); // The match was a success + return matchedSymbol; // We are done + } + + // We did not find the expected token type, if we are backtracking then + // we just set the failed flag and return. + // + if ( m_state->get_backtracking() > 0) + { + // Backtracking is going on + // + m_state->set_failed(true); + return matchedSymbol; + } + + // We did not find the expected token and there is no backtracking + // going on, so we mismatch, which creates an exception in the recognizer exception + // stack. + // + matchedSymbol = this->recoverFromMismatchedToken(ttype, follow); + return matchedSymbol; + +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::matchAny() +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_istream(); + + is->consume(); + m_state->set_errorRecovery(false); + m_state->set_failed(false); + return; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer::mismatchIsUnwantedToken(IntStreamType* is, ANTLR_UINT32 ttype) +{ + ANTLR_UINT32 nextt = is->_LA(2); + + if (nextt == ttype) + { + if(m_state->get_exception() != NULL) + m_state->get_exception()->set_expecting(nextt); + return true; // This token is unknown, but the next one is the one we wanted + } + else + return false; // Neither this token, nor the one following is the one we wanted +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer::mismatchIsMissingToken(IntStreamType* is, BitsetListType* follow) +{ + bool retcode; + BitsetType* followClone; + BitsetType* viableTokensFollowingThisRule; + + if (follow == NULL) + { + // There is no information about the tokens that can follow the last one + // hence we must say that the current one we found is not a member of the + // follow set and does not indicate a missing token. We will just consume this + // single token and see if the parser works it out from there. + // + return false; + } + + followClone = NULL; + viableTokensFollowingThisRule = NULL; + + // The C bitset maps are laid down at compile time by the + // C code generation. Hence we cannot remove things from them + // and so on. So, in order to remove EOR (if we need to) then + // we clone the static bitset. + // + followClone = follow->bitsetLoad(); + if (followClone == NULL) + return false; + + // Compute what can follow this grammar reference + // + if (followClone->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE)) + { + // EOR can follow, but if we are not the start symbol, we + // need to remove it. + // + followClone->remove(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + + // Now compute the visiable tokens that can follow this rule, according to context + // and make them part of the follow set. + // + viableTokensFollowingThisRule = this->computeCSRuleFollow(); + followClone->borInPlace(viableTokensFollowingThisRule); + } + + /// if current token is consistent with what could come after set + /// then we know we're missing a token; error recovery is free to + /// "insert" the missing token + /// + /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR + /// in follow set to indicate that the fall of the start symbol is + /// in the set (EOF can follow). + /// + if ( followClone->isMember(is->_LA(1)) + || followClone->isMember(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) + ) + { + retcode = true; + } + else + { + retcode = false; + } + + if (viableTokensFollowingThisRule != NULL) + { + delete viableTokensFollowingThisRule; + } + if (followClone != NULL) + { + delete followClone; + } + + return retcode; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + this->get_super()->mismatch( ttype, follow ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::reportError() +{ + this->reportError( ClassForwarder() ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::reportError( ClassForwarder ) +{ + // Indicate this recognizer had an error while processing. + // + m_state->inc_errorCount(); + + this->displayRecognitionError(m_state->get_tokenNames()); +} + +template< class ImplTraits, class StreamType > +template +void BaseRecognizer::reportError(ClassForwarder ) +{ + // Invoke the debugger event if there is a debugger listening to us + // + if ( m_debugger != NULL) + { + m_debugger->recognitionException( m_state->get_exception() ); + } + + if ( m_state->get_errorRecovery() == true) + { + // Already in error recovery so don't display another error while doing so + // + return; + } + + // Signal we are in error recovery now + // + m_state->set_errorRecovery(true); + + // Indicate this recognizer had an error while processing. + // + m_state->inc_errorCount(); + + // Call the error display routine + // + this->displayRecognitionError( m_state->get_tokenNames() ); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::displayRecognitionError(ANTLR_UINT8** tokenNames) +{ + // Retrieve some info for easy reading. + // + ExceptionBaseType* ex = m_state->get_exception(); + StringType ttext; + + // See if there is a 'filename' we can use + // + SuperType* super = static_cast(this); + super->displayRecognitionError(tokenNames, ex); +} + +template< class ImplTraits, class StreamType > +ANTLR_UINT32 BaseRecognizer::getNumberOfSyntaxErrors() +{ + return m_state->get_errorCount(); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::recover() +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + // Are we about to repeat the same error? + // + if ( m_state->get_lastErrorIndex() == is->index()) + { + // The last error was at the same token index point. This must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed. Consume a single token so at least to prevent + // an infinite loop; this is a failsafe. + // + is->consume(); + } + + // Record error index position + // + m_state->set_lastErrorIndex( is->index() ); + + // Work out the follows set for error recovery + // + BitsetType* followSet = this->computeErrorRecoverySet(); + + // Call resync hook (for debuggers and so on) + // + this->beginResync(); + + // Consume tokens until we have resynced to something in the follows set + // + this->consumeUntilSet(followSet); + + // End resync hook + // + this->endResync(); + + // Destroy the temporary bitset we produced. + // + delete followSet; + + // Reset the inError flag so we don't re-report the exception + // + m_state->set_error(false); + m_state->set_failed(false); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::beginResync() +{ + if (m_debugger != NULL) + { + m_debugger->beginResync(); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::endResync() +{ + if (m_debugger != NULL) + { + m_debugger->endResync(); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::beginBacktrack(ANTLR_UINT32 level) +{ + if (m_debugger != NULL) + { + m_debugger->beginBacktrack(level); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::endBacktrack(ANTLR_UINT32 level, bool successful) +{ + if (m_debugger != NULL) + { + m_debugger->endBacktrack(level); + } +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer::BitsetType* BaseRecognizer::computeErrorRecoverySet() +{ + return this->combineFollows(false); +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer::BitsetType* BaseRecognizer::computeCSRuleFollow() +{ + return this->combineFollows(false); +} + +template< class ImplTraits, class StreamType > +typename BaseRecognizer::BitsetType* BaseRecognizer::combineFollows(bool exact) +{ + BitsetType* followSet; + BitsetType* localFollowSet; + ANTLR_UINT32 top; + ANTLR_UINT32 i; + + top = static_cast( m_state->get_following().size() ); + + followSet = new BitsetType(0); + localFollowSet = NULL; + + for (i = top; i>0; i--) + { + localFollowSet = m_state->get_following().at(i-1).bitsetLoad(); + + if (localFollowSet != NULL) + { + followSet->borInPlace(localFollowSet); + + if (exact == true) + { + if (localFollowSet->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == false) + { + // Only leave EOR in the set if at top (start rule); this lets us know + // if we have to include the follow(start rule); I.E., EOF + // + if (i>1) + { + followSet->remove(ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + } + } + else + { + break; // Cannot see End Of Rule from here, just drop out + } + } + delete localFollowSet; + localFollowSet = NULL; + } + } + + if (localFollowSet != NULL) + { + delete localFollowSet; + } + return followSet; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::recoverFromMismatchedToken( ANTLR_UINT32 ttype, BitsetListType* follow) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + const UnitType* matchedSymbol; + + // If the next token after the one we are looking at in the input stream + // is what we are looking for then we remove the one we have discovered + // from the stream by consuming it, then consume this next one along too as + // if nothing had happened. + // + if ( this->mismatchIsUnwantedToken( is, ttype) == true) + { + // Create an exception if we need one + // + new ANTLR_Exception(this, ""); + + // Call resync hook (for debuggers and so on) + // + if (m_debugger != NULL) + { + m_debugger->beginResync(); + } + + // "delete" the extra token + // + this->beginResync(); + is->consume(); + this->endResync(); + // End resync hook + // + if (m_debugger != NULL) + { + m_debugger->endResync(); + } + + // Print out the error after we consume so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + // Return the token we are actually matching + // + matchedSymbol = this->getCurrentInputSymbol(is); + + // Consume the token that the rule actually expected to get as if everything + // was hunky dory. + // + is->consume(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // Single token deletion (Unwanted above) did not work + // so we see if we can insert a token instead by calculating which + // token would be missing + // + if ( this->mismatchIsMissingToken(is, follow)) + { + // We can fake the missing token and proceed + // + new ANTLR_Exception(this, ""); + matchedSymbol = this->getMissingSymbol( is, m_state->get_exception(), ttype, follow); + m_state->get_exception()->set_token( matchedSymbol ); + m_state->get_exception()->set_expecting(ttype); + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // Create an exception if we need one + // + new ANTLR_Exception(this, ""); + + // Neither deleting nor inserting tokens allows recovery + // must just report the exception. + // + m_state->set_error(true); + return NULL; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::recoverFromMismatchedSet(BitsetListType* follow) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + const UnitType* matchedSymbol; + + if (this->mismatchIsMissingToken(is, follow) == true) + { + // We can fake the missing token and proceed + // + new ANTLR_Exception(this); + matchedSymbol = this->getMissingSymbol(is, m_state->get_exception(), follow); + m_state->get_exception()->set_token(matchedSymbol); + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + this->reportError(); + + m_state->set_error(false); // Exception is not outstanding any more + + return matchedSymbol; + } + + // TODO - Single token deletion like in recoverFromMismatchedToken() + // + m_state->set_error(true); + m_state->set_failed(true); + return NULL; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer::recoverFromMismatchedElement(BitsetListType* followBits) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + + BitsetType* follow = followBits->load(); + BitsetType* viableToksFollowingRule; + + if (follow == NULL) + { + /* The follow set is NULL, which means we don't know what can come + * next, so we "hit and hope" by just signifying that we cannot + * recover, which will just cause the next token to be consumed, + * which might dig us out. + */ + return false; + } + + /* We have a bitmap for the follow set, hence we can compute + * what can follow this grammar element reference. + */ + if (follow->isMember( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE) == true) + { + /* First we need to know which of the available tokens are viable + * to follow this reference. + */ + viableToksFollowingRule = this->computeCSRuleFollow(); + + /* Remove the EOR token, which we do not wish to compute with + */ + follow->remove( ImplTraits::CommonTokenType::EOR_TOKEN_TYPE); + delete viableToksFollowingRule; + /* We now have the computed set of what can follow the current token + */ + } + + /* We can now see if the current token works with the set of tokens + * that could follow the current grammar reference. If it looks like it + * is consistent, then we can "insert" that token by not throwing + * an exception and assuming that we saw it. + */ + if ( follow->isMember(is->_LA(1)) == true) + { + /* report the error, but don't cause any rules to abort and stuff + */ + this->reportError(); + if (follow != NULL) + { + delete follow; + } + m_state->set_error(false); + m_state->set_failed(false); + return true; /* Success in recovery */ + } + + if (follow != NULL) + { + delete follow; + } + + /* We could not find anything viable to do, so this is going to + * cause an exception. + */ + return false; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::consumeUntil(ANTLR_UINT32 tokenType) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + + // What do have at the moment? + // + ANTLR_UINT32 ttype = is->_LA(1); + + // Start eating tokens until we get to the one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && ttype != tokenType) + { + is->consume(); + ttype = is->_LA(1); + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::consumeUntilSet(BitsetType* set) +{ + ANTLR_UINT32 ttype; + SuperType* super = static_cast(this); + IntStreamType* is = super->get_parser_istream(); + + // What do have at the moment? + // + ttype = is->_LA(1); + + // Start eating tokens until we get to one we want. + // + while (ttype != ImplTraits::CommonTokenType::TOKEN_EOF && set->isMember(ttype) == false) + { + is->consume(); + ttype = is->_LA(1); + } + +} + +template< class ImplTraits, class StreamType > +ANTLR_MARKER BaseRecognizer::getRuleMemoization( ANTLR_INTKEY ruleIndex, ANTLR_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + typedef IntTrie RuleListType; + typedef TrieEntry EntryType; + typedef TrieEntry SubEntryType; + ANTLR_MARKER stopIndex; + EntryType* entry; + + /* See if we have a list in the ruleMemos for this rule, and if not, then create one + * as we will need it eventually if we are being asked for the memo here. + */ + entry = m_state->get_ruleMemo()->get(ruleIndex); + + if (entry == NULL) + { + /* Did not find it, so create a new one for it, with a bit depth based on the + * size of the input stream. We need the bit depth to incorporate the number if + * bits required to represent the largest possible stop index in the input, which is the + * last character. An int stream is free to return the largest 64 bit offset if it has + * no idea of the size, but you should remember that this will cause the leftmost + * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) + */ + m_state->get_ruleMemo()->add( ruleIndex, new RuleListType(63) ); + + /* We cannot have a stopIndex in a trie we have just created of course + */ + return MEMO_RULE_UNKNOWN; + } + + RuleListType* ruleList = entry->get_data(); + + /* See if there is a stop index associated with the supplied start index. + */ + stopIndex = 0; + + SubEntryType* sub_entry = ruleList->get(ruleParseStart); + if (sub_entry != NULL) + { + stopIndex = sub_entry->get_data(); + } + + if (stopIndex == 0) + { + return MEMO_RULE_UNKNOWN; + } + + return stopIndex; +} + +template< class ImplTraits, class StreamType > +bool BaseRecognizer::alreadyParsedRule(ANTLR_MARKER ruleIndex) +{ + SuperType* super = static_cast(this); + IntStreamType* is = super->get_istream(); + + /* See if we have a memo marker for this. + */ + ANTLR_MARKER stopIndex = this->getRuleMemoization( ruleIndex, is->index() ); + + if (stopIndex == MEMO_RULE_UNKNOWN) + { + return false; + } + + if (stopIndex == MEMO_RULE_FAILED) + { + m_state->set_failed(true); + } + else + { + is->seek(stopIndex+1); + } + + /* If here then the rule was executed for this input already + */ + return true; +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + typedef IntTrie RuleListType; + typedef TrieEntry EntryType; + EntryType* entry; + ANTLR_MARKER stopIndex; + SuperType* super = static_cast(this); + IntStreamType* is = super->get_istream(); + + stopIndex = (m_state->get_failed() == true) ? MEMO_RULE_FAILED : is->index() - 1; + + entry = m_state->get_ruleMemo()->get(ruleIndex); + + if (entry != NULL) + { + RuleListType* ruleList = entry->get_data(); + + /* If we don't already have this entry, append it. The memoize trie does not + * accept duplicates so it won't add it if already there and we just ignore the + * return code as we don't care if it is there already. + */ + ruleList->add(ruleParseStart, stopIndex); + } +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::getCurrentInputSymbol( IntStreamType* istream ) +{ + return this->getCurrentInputSymbol( istream, ClassForwarder() ); +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder) +{ + return NULL; +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder) +{ + typedef typename ImplTraits::TokenStreamType TokenStreamType; + TokenStreamType* token_stream = static_cast(istream); + return token_stream->_LT(1); +} + +template< class ImplTraits, class StreamType > +const typename BaseRecognizer::UnitType* +BaseRecognizer::getCurrentInputSymbol(IntStreamType* istream, ClassForwarder) +{ + typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; + TreeNodeStreamType* ctns = static_cast(istream); + return ctns->_LT(1); +} + + +template< class ImplTraits, class StreamType > +typename BaseRecognizer::UnitType* BaseRecognizer::getMissingSymbol( IntStreamType* istream, + ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow) +{ + return this->get_super()->getMissingSymbol( istream, e, expectedTokenType, follow ); +} + + +template< class ImplTraits, class StreamType > + template +bool BaseRecognizer::synpred(ClassForwarder pred) +{ + ANTLR_MARKER start; + SuperType* super = static_cast(this); + IntStreamType* is = super->get_istream(); + + /* Begin backtracking so we can get back to where we started after trying out + * the syntactic predicate. + */ + start = is->mark(); + m_state->inc_backtracking(); + + /* Try the syntactical predicate + */ + this->get_super()->synpred( pred ); + + /* Reset + */ + is->rewind(start); + m_state->dec_backtracking(); + + if ( m_state->get_failed() == true) + { + /* Predicate failed + */ + m_state->set_failed(false); + return false; + } + else + { + /* Predicate was successful + */ + m_state->set_failed(false); + return true; + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::exConstruct() +{ + this->get_super()->exConstruct(); +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::reset() +{ + this->reset( ClassForwarder() ); +} + +template< class ImplTraits, class StreamType > +template< typename CompType > +void BaseRecognizer::reset( ClassForwarder ) +{ + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + m_state->get_following().clear(); + + // Reset the state flags + // + m_state->set_errorRecovery(false); + m_state->set_lastErrorIndex(-1); + m_state->set_failed(false); + m_state->set_errorCount(0); + m_state->set_backtracking(0); + + if (m_state->get_ruleMemo() != NULL) + { + delete m_state->get_ruleMemo(); + m_state->set_ruleMemo( new RuleMemoType(15) ); /* 16 bit depth is enough for 32768 rules! */ + } +} + +template< class ImplTraits, class StreamType > +void BaseRecognizer::reset( ClassForwarder ) +{ + m_state->set_token_present( false ); + m_state->set_type( ImplTraits::CommonTokenType::TOKEN_INVALID ); + m_state->set_channel( TOKEN_DEFAULT_CHANNEL ); + m_state->set_tokenStartCharIndex( -1 ); + m_state->set_tokenStartCharPositionInLine(-1); + m_state->set_tokenStartLine( -1 ); + m_state->set_text(""); +} + +template< class ImplTraits, class StreamType > +BaseRecognizer::~BaseRecognizer() +{ + // Did we have a state allocated? + // + if (m_state != NULL) + { + // Free any rule memoization we set up + // + if (m_state->get_ruleMemo() != NULL) + { + delete m_state->get_ruleMemo(); + m_state->set_ruleMemo(NULL); + } + + + // Free any exception space we have left around + // + ExceptionBaseType* thisE = m_state->get_exception(); + if (thisE != NULL) + { + delete thisE; + } + + // Free the shared state memory + // + delete m_state; + } + + // Free the actual recognizer space + // +} + + + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3bitset.hpp b/runtime/Cpp/include/antlr3bitset.hpp new file mode 100755 index 000000000..a711b8a48 --- /dev/null +++ b/runtime/Cpp/include/antlr3bitset.hpp @@ -0,0 +1,224 @@ +/** + * \file + * Defines the basic structures of an ANTLR3 bitset. this is a C version of the + * cut down Bitset class provided with the java version of antlr 3. + * + * + */ +#ifndef _ANTLR3_BITSET_HPP +#define _ANTLR3_BITSET_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** How many bits in the elements + */ +static const ANTLR_UINT32 ANTLR_BITSET_BITS = 64; + +/** How many bits in a nible of bits + */ +static const ANTLR_UINT32 ANTLR_BITSET_NIBBLE = 4; + +/** log2 of ANTLR3_BITSET_BITS 2^ANTLR3_BITSET_LOG_BITS = ANTLR3_BITSET_BITS + */ +static const ANTLR_UINT32 ANTLR_BITSET_LOG_BITS = 6; + +/** We will often need to do a mod operator (i mod nbits). + * For powers of two, this mod operation is the + * same as: + * - (i & (nbits-1)). + * + * Since mod is relatively slow, we use an easily + * precomputed mod mask to do the mod instead. + */ +static const ANTLR_UINT32 ANTLR_BITSET_MOD_MASK = ANTLR_BITSET_BITS - 1; + +template +class BitsetList : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::BitsetType BitsetType; + +private: + /// Pointer to the allocated array of bits for this bit set, which + /// is an array of 64 bit elements (of the architecture). If we find a + /// machine/C compiler that does not know anything about 64 bit values + /// then it should be easy enough to produce a 32 bit (or less) version + /// of the bitset code. Note that the pointer here may be static if laid down + /// by the code generation, and it must be copied if it is to be manipulated + /// to perform followset calculations. + /// + ANTLR_BITWORD* m_bits; + + /// Length of the current bit set in ANTLR3_UINT64 units. + /// + ANTLR_UINT32 m_length; + +public: + BitsetList(); + BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ); + + ANTLR_BITWORD* get_bits() const; + ANTLR_UINT32 get_length() const; + void set_bits( ANTLR_BITWORD* bits ); + void set_length( ANTLR_UINT32 length ); + + /// + /// \brief + /// Creates a new bitset with at least one 64 bit bset of bits, but as + /// many 64 bit sets as are required. + /// + /// \param[in] bset + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified bitmaps in it and the API + /// initialized. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + /// + BitsetType* bitsetLoad(); + + BitsetType* bitsetCopy(); + +}; + +template +class Bitset : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename AllocPolicyType::template ListType IntListType; + typedef typename ImplTraits::BitsetListType BitsetListType; + +private: + /// The actual bits themselves + /// + BitsetListType m_blist; + +public: + Bitset( ANTLR_UINT32 nbits=0 ); + Bitset( const Bitset& bitset ); + Bitset* clone() const; + Bitset* bor(Bitset* bitset2); + + BitsetListType& get_blist(); + void borInPlace(Bitset* bitset2); + ANTLR_UINT32 size() const; + void add(ANTLR_INT32 bit); + void grow(ANTLR_INT32 newSize); + bool equals(Bitset* bitset2) const; + bool isMember(ANTLR_UINT32 bit) const; + ANTLR_UINT32 numBits() const; + void remove(ANTLR_UINT32 bit); + bool isNilNode() const; + + /** Produce an integer list of all the bits that are turned on + * in this bitset. Used for error processing in the main as the bitset + * reresents a number of integer tokens which we use for follow sets + * and so on. + * + * The first entry is the number of elements following in the list. + */ + ANTLR_INT32* toIntList() const; + + /// + /// \brief + /// Creates a new bitset with at least one element, but as + /// many elements are required. + /// + /// \param[in] bit + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified elements added into it. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetOf(n, n1, n2, -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + /// + //C++ doesn't like variable length arguments. so use function overloading + static Bitset* BitsetOf(ANTLR_INT32 bit); + static Bitset* BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2); + + /// + /// \brief + /// Creates a new bitset with at least one 64 bit bset of bits, but as + /// many 64 bit sets as are required. + /// + /// \param[in] bset + /// A variable number of bits to add to the set, ending in -1 (impossible bit). + /// + /// \returns + /// A new bit set with all of the specified bitmaps in it and the API + /// initialized. + /// + /// Call as: + /// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); + /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset + /// + /// \remarks + /// Stdargs function - must supply -1 as last paremeter, which is NOT + /// added to the set. + /// + ///antlr3BitsetList + static Bitset* BitsetFromList(const IntListType& list); + ~Bitset(); + +private: + void growToInclude(ANTLR_INT32 bit); + static ANTLR_UINT64 BitMask(ANTLR_UINT32 bitNumber); + static ANTLR_UINT32 NumWordsToHold(ANTLR_UINT32 bit); + static ANTLR_UINT32 WordNumber(ANTLR_UINT32 bit); + void bitsetORInPlace(Bitset* bitset2); + +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3bitset.inl" + +#endif + diff --git a/runtime/Cpp/include/antlr3bitset.inl b/runtime/Cpp/include/antlr3bitset.inl new file mode 100755 index 000000000..4ba469e99 --- /dev/null +++ b/runtime/Cpp/include/antlr3bitset.inl @@ -0,0 +1,493 @@ +ANTLR_BEGIN_NAMESPACE() + +template +ANTLR_INLINE BitsetList::BitsetList() +{ + m_bits = NULL; + m_length = 0; +} + +template +ANTLR_INLINE BitsetList::BitsetList( ANTLR_BITWORD* bits, ANTLR_UINT32 length ) +{ + m_bits = bits; + m_length = length; +} + +template +ANTLR_INLINE ANTLR_BITWORD* BitsetList::get_bits() const +{ + return m_bits; +} + +template +ANTLR_INLINE ANTLR_UINT32 BitsetList::get_length() const +{ + return m_length; +} + +template +ANTLR_INLINE void BitsetList::set_bits( ANTLR_BITWORD* bits ) +{ + m_bits = bits; +} + +template +ANTLR_INLINE void BitsetList::set_length( ANTLR_UINT32 length ) +{ + m_length = length; +} + +template +typename BitsetList::BitsetType* BitsetList::bitsetLoad() +{ + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 means default size) + // then Add() will take care + // of it. + // + BitsetType* bitset = new BitsetType(); + + if (this != NULL) + { + // Now we can add the element bits into the set + // + ANTLR_UINT32 count=0; + while (count < m_length) + { + if( bitset->get_blist().get_length() <= count) + bitset->grow(count+1); + + typename ImplTraits::BitsetListType& blist = bitset->get_blist(); + blist.m_bits[count] = *(m_bits+count); + count++; + } + } + + // return the new bitset + // + return bitset; +} + +template +typename BitsetList::BitsetType* BitsetList::bitsetCopy() +{ + BitsetType* bitset; + ANTLR_UINT32 numElements = m_length; + + // Avoid memory thrashing at the expense of a few more bytes + // + if (numElements < 8) + numElements = 8; + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset(numElements); + memcpy(bitset->get_blist().get_bits(), m_bits, numElements * sizeof(ANTLR_BITWORD)); + + // All seems good + // + return bitset; +} + +template +Bitset::Bitset( ANTLR_UINT32 numBits ) +{ + // Avoid memory thrashing at the up front expense of a few bytes + if (numBits < (8 * ANTLR_BITSET_BITS)) + numBits = 8 * ANTLR_BITSET_BITS; + + // No we need to allocate the memory for the number of bits asked for + // in multiples of ANTLR3_UINT64. + // + ANTLR_UINT32 numelements = ((numBits -1) >> ANTLR_BITSET_LOG_BITS) + 1; + + m_blist.set_bits( (ANTLR_BITWORD*) AllocPolicyType::alloc(numelements * sizeof(ANTLR_BITWORD))); + + memset( m_blist.get_bits(), 0, (numelements * sizeof(ANTLR_BITWORD))); + m_blist.set_length( numelements ); +} + +template +Bitset::Bitset( const Bitset& bitset ) + :m_blist(bitset.m_blist) +{ +} + +template +ANTLR_INLINE Bitset* Bitset::clone() const +{ + Bitset* bitset; + + // Allocate memory for the bitset structure itself + // + bitset = new Bitset( ANTLR_BITSET_BITS * m_blist.get_length() ); + + // Install the actual bits in the source set + // + memcpy(bitset->m_blist.get_bits(), m_blist.get_bits(), + m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // All seems good + // + return bitset; +} + +template +Bitset* Bitset::bor(Bitset* bitset2) +{ + Bitset* bitset; + + if (this == NULL) + return bitset2->clone(); + + if (bitset2 == NULL) + return this->clone(); + + // Allocate memory for the newly ordered bitset structure itself. + // + bitset = this->clone(); + bitset->bitsetORInPlace(bitset2); + return bitset; +} + +template +void Bitset::borInPlace(Bitset* bitset2) +{ + ANTLR_UINT32 minimum; + + if (bitset2 == NULL) + return; + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Or the miniimum number of bits after any resizing went on + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + for (ANTLR_UINT32 i = minimum; i > 0; i--) + bits1[i-1] |= bits2[i-1]; +} + +template +ANTLR_UINT32 Bitset::size() const +{ + ANTLR_UINT32 degree; + ANTLR_INT32 i; + ANTLR_INT8 bit; + + // TODO: Come back to this, it may be faster to & with 0x01 + // then shift right a copy of the 4 bits, than shift left a constant of 1. + // But then again, the optimizer might just work this out + // anyway. + // + degree = 0; + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = m_blist.get_length() - 1; i>= 0; i--) + { + if (bits[i] != 0) + { + for(bit = ANTLR_BITSET_BITS - 1; bit >= 0; bit--) + { + if((bits[i] & (((ANTLR_BITWORD)1) << bit)) != 0) + { + degree++; + } + } + } + } + return degree; +} + +template +ANTLR_INLINE void Bitset::add(ANTLR_INT32 bit) +{ + ANTLR_UINT32 word = Bitset::WordNumber(bit); + + if (word >= m_blist.get_length() ) + this->growToInclude(bit); + + ANTLR_BITWORD* bits = m_blist.get_bits(); + bits[word] |= Bitset::BitMask(bit); +} + +template +void Bitset::grow(ANTLR_INT32 newSize) +{ + ANTLR_BITWORD* newBits; + + // Space for newly sized bitset - TODO: come back to this and use realloc?, it may + // be more efficient... + // + newBits = (ANTLR_BITWORD*) AllocPolicyType::alloc0(newSize * sizeof(ANTLR_BITWORD) ); + if ( m_blist.get_bits() != NULL) + { + // Copy existing bits + // + memcpy( newBits, m_blist.get_bits(), m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Out with the old bits... de de de derrr + // + AllocPolicyType::free( m_blist.get_bits() ); + } + + // In with the new bits... keerrrang. + // + m_blist.set_bits(newBits); + m_blist.set_length(newSize); +} + +template +bool Bitset::equals(Bitset* bitset2) const +{ + ANTLR_UINT32 minimum; + ANTLR_UINT32 i; + + if (this == NULL || bitset2 == NULL) + return false; + + // Work out the minimum comparison set + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + // Make sure explict in common bits are equal + // + for (i = minimum - 1; i < minimum ; i--) + { + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + if ( bits1[i] != bits2[i]) + return false; + } + + // Now make sure the bits of the larger set are all turned + // off. + // + if ( m_blist.get_length() > minimum) + { + for (i = minimum ; i < m_blist.get_length(); i++) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + if(bits[i] != 0) + return false; + } + } + else if (bitset2->m_blist.get_length() > minimum) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = minimum; i < bitset2->m_blist.get_length(); i++) + { + if ( bits[i] != 0 ) + return false; + } + } + + return true; +} + +template +bool Bitset::isMember(ANTLR_UINT32 bit) const +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + + if (wordNo >= m_blist.get_length()) + return false; + + ANTLR_BITWORD* bits = m_blist.get_bits(); + if ( (bits[wordNo] & Bitset::BitMask(bit)) == 0) + return false; + else + return true; +} + +template +ANTLR_INLINE ANTLR_UINT32 Bitset::numBits() const +{ + return m_blist.get_length() << ANTLR_BITSET_LOG_BITS; +} + +template +ANTLR_INLINE typename ImplTraits::BitsetListType& Bitset::get_blist() +{ + return m_blist; +} + +template +ANTLR_INLINE void Bitset::remove(ANTLR_UINT32 bit) +{ + ANTLR_UINT32 wordNo = Bitset::WordNumber(bit); + + if (wordNo < m_blist.get_length()) + { + ANTLR_BITWORD* bits = m_blist.get_bits(); + bits[wordNo] &= ~(Bitset::BitMask(bit)); + } +} + +template +ANTLR_INLINE bool Bitset::isNilNode() const +{ + ANTLR_UINT32 i; + ANTLR_BITWORD* bits = m_blist.get_bits(); + for (i = m_blist.get_length() -1 ; i < m_blist.get_length(); i--) + { + if(bits[i] != 0) + return false; + } + return true; +} + +template +ANTLR_INT32* Bitset::toIntList() const +{ + ANTLR_UINT32 numInts; // How many integers we will need + ANTLR_UINT32 numBits; // How many bits are in the set + ANTLR_UINT32 i; + ANTLR_UINT32 index; + + ANTLR_INT32* intList; + + numInts = this->size() + 1; + numBits = this->numBits(); + + intList = (ANTLR_INT32*) AllocPolicyType::alloc(numInts * sizeof(ANTLR_INT32)); + + intList[0] = numInts; + + // Enumerate the bits that are turned on + // + for (i = 0, index = 1; iisMember(i) == true) + intList[index++] = i; + } + + // Result set + // + return intList; +} + +template +ANTLR_INLINE Bitset::~Bitset() +{ + if (m_blist.get_bits() != NULL) + AllocPolicyType::free(m_blist.get_bits()); + return; +} + +template +void Bitset::growToInclude(ANTLR_INT32 bit) +{ + ANTLR_UINT32 bl; + ANTLR_UINT32 nw; + + bl = (m_blist.get_length() << 1); + nw = Bitset::NumWordsToHold(bit); + + if (bl > nw) + this->grow(bl); + else + this->grow(nw); +} + +template +ANTLR_INLINE ANTLR_UINT64 Bitset::BitMask(ANTLR_UINT32 bitNumber) +{ + return ((ANTLR_UINT64)1) << (bitNumber & (ANTLR_BITSET_MOD_MASK)); +} + +template +ANTLR_INLINE ANTLR_UINT32 Bitset::NumWordsToHold(ANTLR_UINT32 bit) +{ + return (bit >> ANTLR_BITSET_LOG_BITS) + 1; +} + +template +ANTLR_INLINE ANTLR_UINT32 Bitset::WordNumber(ANTLR_UINT32 bit) +{ + return bit >> ANTLR_BITSET_LOG_BITS; +} + +template +void Bitset::bitsetORInPlace(Bitset* bitset2) +{ + ANTLR_UINT32 minimum; + ANTLR_UINT32 i; + + if (bitset2 == NULL) + return; + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + this->growToInclude( bitset2->m_blist.get_length() * sizeof(ANTLR_BITWORD) ); + + // Or the miniimum number of bits after any resizing went on + // + if ( m_blist.get_length() < bitset2->m_blist.get_length() ) + minimum = m_blist.get_length(); + else + minimum = bitset2->m_blist.get_length(); + + ANTLR_BITWORD* bits1 = m_blist.get_bits(); + ANTLR_BITWORD* bits2 = bitset2->m_blist.get_bits(); + for (i = minimum; i > 0; i--) + bits1[i-1] |= bits2[i-1]; +} + +template +Bitset* Bitset::BitsetOf(ANTLR_INT32 bit) +{ + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 menas default size) + // then Add() will take care + // of it. + // + Bitset* bitset = new Bitset(0); + bitset->add(bit); + return bitset; +} + +template +Bitset* Bitset::BitsetOf(ANTLR_INT32 bit1, ANTLR_INT32 bit2) +{ + Bitset* bitset = Bitset::BitsetOf(bit1); + bitset->add(bit2); + return bitset; +} + +//static +template +Bitset* Bitset::BitsetFromList(const IntListType& list) +{ + // We have no idea what exactly is in the list + // so create a default bitset and then just add stuff + // as we enumerate. + // + Bitset* bitset = new Bitset(0); + for( int i = 0; i < list.size(); ++i ) + bitset->add( list[i] ); + + return bitset; +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3collections.hpp b/runtime/Cpp/include/antlr3collections.hpp new file mode 100755 index 000000000..21114039b --- /dev/null +++ b/runtime/Cpp/include/antlr3collections.hpp @@ -0,0 +1,285 @@ +#ifndef ANTLR3COLLECTIONS_HPP +#define ANTLR3COLLECTIONS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/* -------------- TRIE Interfaces ---------------- */ + +/** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE + */ +template< class ImplTraits, class DataType > +class TrieEntry : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicy; + +private: + DataType m_data; + TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */ + +public: + TrieEntry(const DataType& data, TrieEntry* next); + DataType& get_data(); + const DataType& get_data() const; + TrieEntry* get_next() const; + void set_next( TrieEntry* next ); +}; + +/** Structure that defines an element/node in an ANTLR_INT_TRIE + */ +template< class ImplTraits, class DataType > +class IntTrieNode : public ImplTraits::AllocPolicyType +{ +public: + typedef TrieEntry TrieEntryType; + typedef TrieEntryType BucketsType; + +private: + ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */ + ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */ + BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ + IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ + IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ + +public: + IntTrieNode(); + ~IntTrieNode(); + + ANTLR_UINT32 get_bitNum() const; + ANTLR_INTKEY get_key() const; + BucketsType* get_buckets() const; + IntTrieNode* get_leftN() const; + IntTrieNode* get_rightN() const; + void set_bitNum( ANTLR_UINT32 bitNum ); + void set_key( ANTLR_INTKEY key ); + void set_buckets( BucketsType* buckets ); + void set_leftN( IntTrieNode* leftN ); + void set_rightN( IntTrieNode* rightN ); +}; + +/** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, + * as you might expect, the key is turned into a "string" by looking at bit(key, depth) + * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) + * and potentially a huge trie. This is the algorithm for a Patricia Trie. + * Note also that this trie [can] accept multiple entries for the same key and is + * therefore a kind of elastic bucket patricia trie. + * + * If you find this code useful, please feel free to 'steal' it for any purpose + * as covered by the BSD license under which ANTLR is issued. You can cut the code + * but as the ANTLR library is only about 50K (Windows Vista), you might find it + * easier to just link the library. Please keep all comments and licenses and so on + * in any version of this you create of course. + * + * Jim Idle. + * + */ +class IntTrieBase +{ +public: + static const ANTLR_UINT8* get_bitIndex(); + static const ANTLR_UINT64* get_bitMask(); +}; + +template< class ImplTraits, class DataType > +class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase +{ +public: + typedef TrieEntry TrieEntryType; + typedef IntTrieNode IntTrieNodeType; + +private: + IntTrieNodeType* m_root; /* Root node of this integer trie */ + IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */ + ANTLR_UINT32 m_count; /* Current entry count */ + bool m_allowDups; /* Whether this trie accepts duplicate keys */ + +public: + /* INT TRIE Implementation of depth 64 bits, being the number of bits + * in a 64 bit integer. + */ + IntTrie( ANTLR_UINT32 depth ); + + /** Search the int Trie and return a pointer to the first bucket indexed + * by the key if it is contained in the trie, otherwise NULL. + */ + TrieEntryType* get( ANTLR_INTKEY key); + bool del( ANTLR_INTKEY key); + + /** Add an entry into the INT trie. + * Basically we descend the trie as we do when searching it, which will + * locate the only node in the trie that can be reached by the bit pattern of the + * key. If the key is actually at that node, then if the trie accepts duplicates + * we add the supplied data in a new chained bucket to that data node. If it does + * not accept duplicates then we merely return FALSE in case the caller wants to know + * whether the key was already in the trie. + * If the node we locate is not the key we are looking to add, then we insert a new node + * into the trie with a bit index of the leftmost differing bit and the left or right + * node pointing to itself or the data node we are inserting 'before'. + */ + bool add( ANTLR_INTKEY key, const DataType& data ); + ~IntTrie(); +}; + +/** + * A topological sort system that given a set of dependencies of a node m on node n, + * can sort them in dependency order. This is a generally useful utility object + * that does not care what the things are it is sorting. Generally the set + * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. + * I have provided a sort method that given ANTLR3_VECTOR as an input will sort + * the vector entries in place, as well as a sort method that just returns an + * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but + * some set of your own device. + * + * Of the two main algorithms that could be used, I chose to use the depth first + * search for unvisited nodes as a) This runs in linear time, and b) it is what + * we used in the ANTLR Tool to perform a topological sort of the input grammar files + * based on their dependencies. + */ +template +class Topo : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + +private: + /** + * A vector of vectors of edges, built by calling the addEdge method() + * to indicate that node number n depends on node number m. Each entry in the vector + * contains a bitset, which has a bit index set for each node upon which the + * entry node depends. + */ + BitsetType** m_edges; + + /** + * A vector used to build up the sorted output order. Note that + * as the vector contains UINT32 then the maximum node index is + * 'limited' to 2^32, as nodes should be zero based. + */ + ANTLR_UINT32* m_sorted; + + /** + * A vector used to detect cycles in the edge dependecies. It is used + * as a stack and each time we descend a node to one of its edges we + * add the node into this stack. If we find a node that we have already + * visited in the stack, then it means there wasa cycle such as 9->8->1->9 + * as the only way a node can be on the stack is if we are currently + * descnding from it as we remove it from the stack as we exit from + * descending its dependencies + */ + ANTLR_UINT32* m_cycle; + + /** + * A flag that indicates the algorithm found a cycle in the edges + * such as 9->8->1->9 + * If this flag is set after you have called one of the sort routines + * then the detected cycle will be contained in the cycle array and + * cycleLimit will point to the one after the last entry in the cycle. + */ + bool m_hasCycle; + + /** + * A watermark used to accumulate potential cycles in the cycle array. + * This should be zero when we are done. Check hasCycle after calling one + * of the sort methods and if it is true then you can find the cycle + * in cycle[0]...cycle[cycleMark-1] + */ + ANTLR_UINT32 m_cycleMark; + + /** + * One more than the largest node index that is contained in edges/sorted. + */ + ANTLR_UINT32 m_limit; + + /** + * The set of visited nodes as determined by a set entry in + * the bitmap. + */ + BitsetType* m_visited; + +public: + Topo(); + /** + * A method that adds an edge from one node to another. An edge + * of n -> m indicates that node n is dependent on node m. Note that + * while building these edges, it is perfectly OK to add nodes out of + * sequence. So, if you have edges: + * + * 3 -> 0 + * 2 -> 1 + * 1 -> 3 + * + * The you can add them in that order and so add node 3 before nodes 2 and 1 + * + */ + void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); + + + /** + * A method that returns a pointer to an array of sorted node indexes. + * The array is sorted in topological sorted order. Note that the array + * is only as large as the largest node index you created an edge for. This means + * that if you had an input of 32 nodes, but that largest node with an edge + * was 16, then the returned array will be the sorted order of the first 16 + * nodes and the last 16 nodes of your array are basically fine as they are + * as they had no dependencies and do not need any particular sort order. + * + * NB: If the structure that contains the array is freed, then the sorted + * array will be freed too so you should use the value of limit to + * make a long term copy of this array if you do not want to keep the topo + * structure around as well. + */ + ANTLR_UINT32* sortToArray(); + + /** + * A method that sorts the supplied ANTLR3_VECTOR in place based + * on the previously supplied edge data. + */ + template + void sortVector( typename ImplTraits::template VectorType& v); + + void DFS(ANTLR_UINT32 node); + + /** + * A method to free this structure and any associated memory. + */ + ~Topo(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3collections.inl" + +#endif + + diff --git a/runtime/Cpp/include/antlr3collections.inl b/runtime/Cpp/include/antlr3collections.inl new file mode 100755 index 000000000..fb713c217 --- /dev/null +++ b/runtime/Cpp/include/antlr3collections.inl @@ -0,0 +1,995 @@ +ANTLR_BEGIN_NAMESPACE() + +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry::TrieEntry(const DataType& data, TrieEntry* next) + :m_data(data) +{ + m_next = next; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE DataType& TrieEntry::get_data() +{ + return m_data; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE const DataType& TrieEntry::get_data() const +{ + return m_data; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE TrieEntry* TrieEntry::get_next() const +{ + return m_next; +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE void TrieEntry::set_next( TrieEntry* next ) +{ + m_next = next; +} + +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_UINT32 IntTrieNode::get_bitNum() const +{ + return m_bitNum; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE ANTLR_INTKEY IntTrieNode::get_key() const +{ + return m_key; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE typename IntTrieNode::BucketsType* IntTrieNode::get_buckets() const +{ + return m_buckets; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode* IntTrieNode::get_leftN() const +{ + return m_leftN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE IntTrieNode* IntTrieNode::get_rightN() const +{ + return m_rightN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode::set_bitNum( ANTLR_UINT32 bitNum ) +{ + m_bitNum = bitNum; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode::set_key( ANTLR_INTKEY key ) +{ + m_key = key; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode::set_buckets( BucketsType* buckets ) +{ + m_buckets = buckets; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode::set_leftN( IntTrieNode* leftN ) +{ + m_leftN = leftN; +} +template< class ImplTraits, class DataType > +ANTLR_INLINE void IntTrieNode::set_rightN( IntTrieNode* rightN ) +{ + m_rightN = rightN; +} + +ANTLR_INLINE const ANTLR_UINT8* IntTrieBase::get_bitIndex() +{ + static ANTLR_UINT8 bitIndex[256] = + { + 0, // 0 - Just for padding + 0, // 1 + 1, 1, // 2..3 + 2, 2, 2, 2, // 4..7 + 3, 3, 3, 3, 3, 3, 3, 3, // 8+ + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 32+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 64+ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 128+ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + return bitIndex; +} + +ANTLR_INLINE const ANTLR_UINT64* IntTrieBase::get_bitMask() +{ + static ANTLR_UINT64 bitMask[64] = + { + 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000004ULL, 0x0000000000000008ULL, + 0x0000000000000010ULL, 0x0000000000000020ULL, 0x0000000000000040ULL, 0x0000000000000080ULL, + 0x0000000000000100ULL, 0x0000000000000200ULL, 0x0000000000000400ULL, 0x0000000000000800ULL, + 0x0000000000001000ULL, 0x0000000000002000ULL, 0x0000000000004000ULL, 0x0000000000008000ULL, + 0x0000000000010000ULL, 0x0000000000020000ULL, 0x0000000000040000ULL, 0x0000000000080000ULL, + 0x0000000000100000ULL, 0x0000000000200000ULL, 0x0000000000400000ULL, 0x0000000000800000ULL, + 0x0000000001000000ULL, 0x0000000002000000ULL, 0x0000000004000000ULL, 0x0000000008000000ULL, + 0x0000000010000000ULL, 0x0000000020000000ULL, 0x0000000040000000ULL, 0x0000000080000000ULL, + 0x0000000100000000ULL, 0x0000000200000000ULL, 0x0000000400000000ULL, 0x0000000800000000ULL, + 0x0000001000000000ULL, 0x0000002000000000ULL, 0x0000004000000000ULL, 0x0000008000000000ULL, + 0x0000010000000000ULL, 0x0000020000000000ULL, 0x0000040000000000ULL, 0x0000080000000000ULL, + 0x0000100000000000ULL, 0x0000200000000000ULL, 0x0000400000000000ULL, 0x0000800000000000ULL, + 0x0001000000000000ULL, 0x0002000000000000ULL, 0x0004000000000000ULL, 0x0008000000000000ULL, + 0x0010000000000000ULL, 0x0020000000000000ULL, 0x0040000000000000ULL, 0x0080000000000000ULL, + 0x0100000000000000ULL, 0x0200000000000000ULL, 0x0400000000000000ULL, 0x0800000000000000ULL, + 0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL, 0x8000000000000000ULL + }; + + return bitMask; +} + +template< class ImplTraits, class DataType > +IntTrie::IntTrie( ANTLR_UINT32 depth ) +{ + /* Now we need to allocate the root node. This makes it easier + * to use the tree as we don't have to do anything special + * for the root node. + */ + m_root = new IntTrieNodeType; + + /* Now we seed the root node with the index being the + * highest left most bit we want to test, which limits the + * keys in the trie. This is the trie 'depth'. The limit for + * this implementation is 63 (bits 0..63). + */ + m_root->set_bitNum( depth ); + + /* And as we have nothing in here yet, we set both child pointers + * of the root node to point back to itself. + */ + m_root->set_leftN( m_root ); + m_root->set_rightN( m_root ); + m_count = 0; + + /* Finally, note that the key for this root node is 0 because + * we use calloc() to initialise it. + */ + m_allowDups = false; + m_current = NULL; +} + +template< class ImplTraits, class DataType > +IntTrie::~IntTrie() +{ + /* Descend from the root and free all the nodes + */ + delete m_root; + + /* the nodes are all gone now, so we need only free the memory + * for the structure itself + */ +} + +template< class ImplTraits, class DataType > +typename IntTrie::TrieEntryType* IntTrie::get( ANTLR_INTKEY key) +{ + IntTrieNodeType* thisNode; + IntTrieNodeType* nextNode; + + if (m_count == 0) + return NULL; /* Nothing in this trie yet */ + + /* Starting at the root node in the trie, compare the bit index + * of the current node with its next child node (starts left from root). + * When the bit index of the child node is greater than the bit index of the current node + * then by definition (as the bit index decreases as we descent the trie) + * we have reached a 'backward' pointer. A backward pointer means we + * have reached the only node that can be reached by the bits given us so far + * and it must either be the key we are looking for, or if not then it + * means the entry was not in the trie, and we return NULL. A backward pointer + * points back in to the tree structure rather than down (deeper) within the + * tree branches. + */ + thisNode = m_root; /* Start at the root node */ + nextNode = thisNode->get_leftN(); /* Examine the left node from the root */ + + /* While we are descending the tree nodes... + */ + const ANTLR_UINT64* bitMask = this->get_bitMask(); + while( thisNode->get_bitNum() > nextNode->get_bitNum() ) + { + /* Next node now becomes the new 'current' node + */ + thisNode = nextNode; + + /* We now test the bit indicated by the bitmap in the next node + * in the key we are searching for. The new next node is the + * right node if that bit is set and the left node it is not. + */ + if (key & bitMask[nextNode->get_bitNum()]) + { + nextNode = nextNode->get_rightN(); /* 1 is right */ + } + else + { + nextNode = nextNode->get_leftN(); /* 0 is left */ + } + } + + /* Here we have reached a node where the bitMap index is lower than + * its parent. This means it is pointing backward in the tree and + * must therefore be a terminal node, being the only point than can + * be reached with the bits seen so far. It is either the actual key + * we wanted, or if that key is not in the trie it is another key + * that is currently the only one that can be reached by those bits. + * That situation would obviously change if the key was to be added + * to the trie. + * + * Hence it only remains to test whether this is actually the key or not. + */ + if (nextNode->get_key() == key) + { + /* This was the key, so return the entry pointer + */ + return nextNode->get_buckets(); + } + else + { + return NULL; /* That key is not in the trie (note that we set the pointer to -1 if no payload) */ + } +} + +template< class ImplTraits, class DataType > +bool IntTrie::del( ANTLR_INTKEY key) +{ + IntTrieNodeType* p; + + p = m_root; + + return false; + +} + +template< class ImplTraits, class DataType > +bool IntTrie::add( ANTLR_INTKEY key, const DataType& data ) +{ + IntTrieNodeType* thisNode; + IntTrieNodeType* nextNode; + IntTrieNodeType* entNode; + ANTLR_UINT32 depth; + TrieEntryType* newEnt; + TrieEntryType* nextEnt; + ANTLR_INTKEY xorKey; + + /* Cache the bit depth of this trie, which is always the highest index, + * which is in the root node + */ + depth = m_root->get_bitNum(); + + thisNode = m_root; /* Start with the root node */ + nextNode = m_root->get_leftN(); /* And assume we start to the left */ + + /* Now find the only node that can be currently reached by the bits in the + * key we are being asked to insert. + */ + const ANTLR_UINT64* bitMask = this->get_bitMask(); + while (thisNode->get_bitNum() > nextNode->get_bitNum() ) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = nextNode; + + if (key & bitMask[nextNode->get_bitNum()]) + { + /* Bit at the required index was 1, so travers the right node from here + */ + nextNode = nextNode->get_rightN(); + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + nextNode = nextNode->get_leftN(); + } + } + /* Here we have located the only node that can be reached by the + * bits in the requested key. It could in fact be that key or the node + * we need to use to insert the new key. + */ + if (nextNode->get_key() == key) + { + /* We have located an exact match, but we will only append to the bucket chain + * if this trie accepts duplicate keys. + */ + if (m_allowDups ==true) + { + /* Yes, we are accepting duplicates + */ + newEnt = new TrieEntryType(data, NULL); + + /* We want to be able to traverse the stored elements in the order that they were + * added as duplicate keys. We might need to revise this opinion if we end up having many duplicate keys + * as perhaps reverse order is just as good, so long as it is ordered. + */ + nextEnt = nextNode->get_buckets(); + while (nextEnt->get_next() != NULL) + { + nextEnt = nextEnt->get_next(); + } + nextEnt->set_next(newEnt); + + m_count++; + return true; + } + else + { + /* We found the key is already there and we are not allowed duplicates in this + * trie. + */ + return false; + } + } + + /* Here we have discovered the only node that can be reached by the bits in the key + * but we have found that this node is not the key we need to insert. We must find the + * the leftmost bit by which the current key for that node and the new key we are going + * to insert, differ. While this nested series of ifs may look a bit strange, experimentation + * showed that it allows a machine code path that works well with predicated execution + */ + xorKey = (key ^ nextNode->get_key() ); /* Gives 1 bits only where they differ then we find the left most 1 bit*/ + + /* Most common case is a 32 bit key really + */ + const ANTLR_UINT8* bitIndex = this->get_bitIndex(); +#ifdef ANTLR_USE_64BIT + if (xorKey & 0xFFFFFFFF00000000) + { + if (xorKey & 0xFFFF000000000000) + { + if (xorKey & 0xFF00000000000000) + { + depth = 56 + bitIndex[((xorKey & 0xFF00000000000000)>>56)]; + } + else + { + depth = 48 + bitIndex[((xorKey & 0x00FF000000000000)>>48)]; + } + } + else + { + if (xorKey & 0x0000FF0000000000) + { + depth = 40 + bitIndex[((xorKey & 0x0000FF0000000000)>>40)]; + } + else + { + depth = 32 + bitIndex[((xorKey & 0x000000FF00000000)>>32)]; + } + } + } + else +#endif + { + if (xorKey & 0x00000000FFFF0000) + { + if (xorKey & 0x00000000FF000000) + { + depth = 24 + bitIndex[((xorKey & 0x00000000FF000000)>>24)]; + } + else + { + depth = 16 + bitIndex[((xorKey & 0x0000000000FF0000)>>16)]; + } + } + else + { + if (xorKey & 0x000000000000FF00) + { + depth = 8 + bitIndex[((xorKey & 0x0000000000000FF00)>>8)]; + } + else + { + depth = bitIndex[xorKey & 0x00000000000000FF]; + } + } + } + + /* We have located the leftmost differing bit, indicated by the depth variable. So, we know what + * bit index we are to insert the new entry at. There are two cases, being where the two keys + * differ at a bit position that is not currently part of the bit testing, where they differ on a bit + * that is currently being skipped in the indexed comparisons, and where they differ on a bit + * that is merely lower down in the current bit search. If the bit index went bit 4, bit 2 and they differ + * at bit 3, then we have the "skipped" bit case. But if that chain was Bit 4, Bit 2 and they differ at bit 1 + * then we have the easy bit . + * + * So, set up to descend the tree again, but this time looking for the insert point + * according to whether we skip the bit that differs or not. + */ + thisNode = m_root; + entNode = m_root->get_leftN(); + + /* Note the slight difference in the checks here to cover both cases + */ + while (thisNode->get_bitNum() > entNode->get_bitNum() && entNode->get_bitNum() > depth) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = entNode; + + if (key & bitMask[entNode->get_bitNum()]) + { + /* Bit at the required index was 1, so traverse the right node from here + */ + entNode = entNode->get_rightN(); + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + entNode = entNode->get_leftN(); + } + } + + /* We have located the correct insert point for this new key, so we need + * to allocate our entry and insert it etc. + */ + nextNode = new IntTrieNodeType(); + + /* Build a new entry block for the new node + */ + newEnt = new TrieEntryType(data, NULL); + + /* Install it + */ + nextNode->set_buckets(newEnt); + nextNode->set_key(key); + nextNode->set_bitNum( depth ); + + /* Work out the right and left pointers for this new node, which involve + * terminating with the current found node either right or left according + * to whether the current index bit is 1 or 0 + */ + if (key & bitMask[depth]) + { + nextNode->set_leftN(entNode); /* Terminates at previous position */ + nextNode->set_rightN(nextNode); /* Terminates with itself */ + } + else + { + nextNode->set_rightN(entNode); /* Terminates at previous position */ + nextNode->set_leftN(nextNode); /* Terminates with itself */ + } + + /* Finally, we need to change the pointers at the node we located + * for inserting. If the key bit at its index is set then the right + * pointer for that node becomes the newly created node, otherwise the left + * pointer does. + */ + if (key & bitMask[thisNode->get_bitNum()] ) + { + thisNode->set_rightN( nextNode ); + } + else + { + thisNode->set_leftN(nextNode); + } + + /* Et voila + */ + m_count++; + return true; +} + +template< class ImplTraits, class DataType > +IntTrieNode::IntTrieNode() +{ + m_bitNum = 0; + m_key = 0; + m_buckets = NULL; + m_leftN = NULL; + m_rightN = NULL; +} + +template< class ImplTraits, class DataType > +IntTrieNode::~IntTrieNode() +{ + TrieEntryType* thisEntry; + TrieEntryType* nextEntry; + + /* If this node has a left pointer that is not a back pointer + * then recursively call to free this + */ + if ( m_bitNum > m_leftN->get_bitNum()) + { + /* We have a left node that needs descending, so do it. + */ + delete m_leftN; + } + + /* The left nodes from here should now be dealt with, so + * we need to descend any right nodes that are not back pointers + */ + if ( m_bitNum > m_rightN->get_bitNum() ) + { + /* There are some right nodes to descend and deal with. + */ + delete m_rightN; + } + + /* Now all the children are dealt with, we can destroy + * this node too + */ + thisEntry = m_buckets; + + while (thisEntry != NULL) + { + nextEntry = thisEntry->get_next(); + + /* Now free the data for this bucket entry + */ + delete thisEntry; + thisEntry = nextEntry; /* See if there are any more to free */ + } + + /* The bucket entry is now gone, so we can free the memory for + * the entry itself. + */ + + /* And that should be it for everything under this node and itself + */ +} + +/** + * Allocate and initialize a new ANTLR3 topological sorter, which can be + * used to define edges that identify numerical node indexes that depend on other + * numerical node indexes, which can then be sorted topologically such that + * any node is sorted after all its dependent nodes. + * + * Use: + * + * /verbatim + + pANTLR3_TOPO topo; + topo = antlr3NewTopo(); + + if (topo == NULL) { out of memory } + + topo->addEdge(topo, 3, 0); // Node 3 depends on node 0 + topo->addEdge(topo, 0, 1); // Node - depends on node 1 + topo->sortVector(topo, myVector); // Sort the vector in place (node numbers are the vector entry numbers) + + * /verbatim + */ +template +Topo::Topo() +{ + // Initialize variables + // + m_visited = NULL; // Don't know how big it is yet + m_limit = 1; // No edges added yet + m_edges = NULL; // No edges added yet + m_sorted = NULL; // Nothing sorted at the start + m_cycle = NULL; // No cycles at the start + m_cycleMark = 0; // No cycles at the start + m_hasCycle = false; // No cycle at the start +} + +// Topological sorter +// +template +void Topo::addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency) +{ + ANTLR_UINT32 i; + ANTLR_UINT32 maxEdge; + BitsetType* edgeDeps; + + if (edge>dependency) + { + maxEdge = edge; + } + else + { + maxEdge = dependency; + } + // We need to add an edge to says that the node indexed by 'edge' is + // dependent on the node indexed by 'dependency' + // + + // First see if we have enough room in the edges array to add the edge? + // + if ( m_edges == NULL) + { + // We don't have any edges yet, so create an array to hold them + // + m_edges = AllocPolicyType::alloc0(sizeof(BitsetType*) * (maxEdge + 1)); + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + else if (m_limit <= maxEdge) + { + // WE have some edges but not enough + // + m_edges = AllocPolicyType::realloc(m_edges, sizeof(BitsetType*) * (maxEdge + 1)); + + // Initialize the new bitmaps to ;indicate we have no edges defined yet + // + for (i = m_limit; i <= maxEdge; i++) + { + *((m_edges) + i) = NULL; + } + + // Set the limit to what we have now + // + m_limit = maxEdge + 1; + } + + // If the edge was flagged as depending on itself, then we just + // do nothing as it means this routine was just called to add it + // in to the list of nodes. + // + if (edge == dependency) + { + return; + } + + // Pick up the bit map for the requested edge + // + edgeDeps = *((m_edges) + edge); + + if (edgeDeps == NULL) + { + // No edges are defined yet for this node + // + edgeDeps = new BitsetType(0); + *((m_edges) + edge) = edgeDeps; + } + + // Set the bit in the bitmap that corresponds to the requested + // dependency. + // + edgeDeps->add(dependency); + + // And we are all set + // + return; + +} + +/** + * Given a starting node, descend its dependent nodes (ones that it has edges + * to) until we find one without edges. Having found a node without edges, we have + * discovered the bottom of a depth first search, which we can then ascend, adding + * the nodes in order from the bottom, which gives us the dependency order. + */ +template +void Topo::DFS(ANTLR_UINT32 node) +{ + BitsetType* edges; + + // Guard against a revisit and check for cycles + // + if (m_hasCycle == true) + { + return; // We don't do anything else if we found a cycle + } + + if ( m_visited->isMember(node)) + { + // Check to see if we found a cycle. To do this we search the + // current cycle stack and see if we find this node already in the stack. + // + ANTLR_UINT32 i; + + for (i=0; i< m_cycleMark; i++) + { + if ( m_cycle[i] == node) + { + // Stop! We found a cycle in the input, so rejig the cycle + // stack so that it only contains the cycle and set the cycle flag + // which will tell the caller what happened + // + ANTLR_UINT32 l; + + for (l = i; l < m_cycleMark; l++) + { + m_cycle[l - i] = m_cycle[l]; // Move to zero base in the cycle list + } + + // Recalculate the limit + // + m_cycleMark -= i; + + // Signal disaster + // + m_hasCycle = true; + } + } + return; + } + + // So far, no cycles have been found and we have not visited this node yet, + // so this node needs to go into the cycle stack before we continue + // then we will take it out of the stack once we have descended all its + // dependencies. + // + m_cycle[m_cycleMark++] = node; + + // First flag that we have visited this node + // + m_visited->add(node); + + // Now, if this node has edges, then we want to ensure we visit + // them all before we drop through and add this node into the sorted + // list. + // + edges = *((m_edges) + node); + if (edges != NULL) + { + // We have some edges, so visit each of the edge nodes + // that have not already been visited. + // + ANTLR_UINT32 numBits; // How many bits are in the set + ANTLR_UINT32 i; + ANTLR_UINT32 range; + + numBits = edges->numBits(); + range = edges->size(); // Number of set bits + + // Stop if we exahust the bit list or have checked the + // number of edges that this node refers to (so we don't + // check bits at the end that cannot possibly be set). + // + for (i=0; i<= numBits && range > 0; i++) + { + if (edges->isMember(i)) + { + range--; // About to check another one + + // Found an edge, make sure we visit and descend it + // + this->DFS(i); + } + } + } + + // At this point we will have visited all the dependencies + // of this node and they will be ordered (even if there are cycles) + // So we just add the node into the sorted list at the + // current index position. + // + m_sorted[m_limit++] = node; + + // Remove this node from the cycle list if we have not detected a cycle + // + if (m_hasCycle == false) + { + m_cycleMark--; + } + + return; +} + +template +ANTLR_UINT32* Topo::sortToArray() +{ + ANTLR_UINT32 v; + ANTLR_UINT32 oldLimit; + + // Guard against being called with no edges defined + // + if (m_edges == NULL) + { + return 0; + } + // First we need a vector to populate with enough + // entries to accomodate the sorted list and another to accomodate + // the maximum cycle we could detect which is all nodes such as 0->1->2->3->0 + // + m_sorted = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32) ); + m_cycle = AllocPolicyType::alloc( m_limit * sizeof(ANTLR_UINT32)); + + // Next we need an empty bitset to show whether we have visited a node + // or not. This is the bit that gives us linear time of course as we are essentially + // dropping through the nodes in depth first order and when we get to a node that + // has no edges, we pop back up the stack adding the nodes we traversed in reverse + // order. + // + m_visited = new BitsetType(0); + + // Now traverse the nodes as if we were just going left to right, but + // then descend each node unless it has already been visited. + // + oldLimit = m_limit; // Number of nodes to traverse linearly + m_limit = 0; // Next entry in the sorted table + + for (v = 0; v < oldLimit; v++) + { + // If we did not already visit this node, then descend it until we + // get a node without edges or arrive at a node we have already visited. + // + if (m_visited->isMember(v) == false) + { + // We have not visited this one so descend it + // + this->DFS(v); + } + + // Break the loop if we detect a cycle as we have no need to go any + // further + // + if (m_hasCycle == true) + { + break; + } + } + + // Reset the limit to the number we recorded as if we hit a + // cycle, then limit will have stopped at the node where we + // discovered the cycle, but in order to free the edge bitmaps + // we need to know how many we may have allocated and traverse them all. + // + m_limit = oldLimit; + + // Having traversed all the nodes we were given, we + // are guaranteed to have ordered all the nodes or detected a + // cycle. + // + return m_sorted; +} + +template + template +void Topo::sortVector( typename ImplTraits::template VectorType& v ) +{ + // To sort a vector, we first perform the + // sort to an array, then use the results to reorder the vector + // we are given. This is just a convenience routine that allows you to + // sort the children of a tree node into topological order before or + // during an AST walk. This can be useful for optimizations that require + // dag reorders and also when the input stream defines thigns that are + // interdependent and you want to walk the list of the generated trees + // for those things in topological order so you can ignore the interdependencies + // at that point. + // + ANTLR_UINT32 i; + + // Used as a lookup index to find the current location in the vector of + // the vector entry that was originally at position [0], [1], [2] etc + // + ANTLR_UINT32* vIndex; + + // Sort into an array, then we can use the array that is + // stored in the topo + // + if (this->sortToArray() == 0) + { + return; // There were no edges + } + + if (m_hasCycle == true) + { + return; // Do nothing if we detected a cycle + } + + // Ensure that the vector we are sorting is at least as big as the + // the input sequence we were adsked to sort. It does not matter if it is + // bigger as thaat probably just means that nodes numbered higher than the + // limit had no dependencies and so can be left alone. + // + if (m_limit > v.size() ) + { + // We can only sort the entries that we have dude! The caller is + // responsible for ensuring the vector is the correct one and is the + // correct size etc. + // + m_limit = v.size(); + } + // We need to know the locations of each of the entries + // in the vector as we don't want to duplicate them in a new vector. We + // just use an indirection table to get the vector entry for a particular sequence + // acording to where we moved it last. Then we can just swap vector entries until + // we are done :-) + // + vIndex = AllocPolicyType::alloc(m_limit * sizeof(ANTLR_UINT32)); + + // Start index, each vector entry is located where you think it is + // + for (i = 0; i < m_limit; i++) + { + vIndex[i] = i; + } + + // Now we traverse the sorted array and moved the entries of + // the vector around according to the sort order and the indirection + // table we just created. The index telsl us where in the vector the + // original element entry n is now located via vIndex[n]. + // + for (i=0; i < m_limit; i++) + { + ANTLR_UINT32 ind; + + // If the vector entry at i is already the one that it + // should be, then we skip moving it of course. + // + if (vIndex[m_sorted[i]] == i) + { + continue; + } + + // The vector entry at i, should be replaced with the + // vector entry indicated by topo->sorted[i]. The vector entry + // at topo->sorted[i] may have already been swapped out though, so we + // find where it is now and move it from there to i. + // + ind = vIndex[m_sorted[i]]; + std::swap( v[i], v[ind] ); + + // Update our index. The element at i is now the one we wanted + // to be sorted here and the element we swapped out is now the + // element that was at i just before we swapped it. If you are lost now + // don't worry about it, we are just reindexing on the fly is all. + // + vIndex[m_sorted[i]] = i; + vIndex[i] = ind; + } + + // Having traversed all the entries, we have sorted the vector in place. + // + AllocPolicyType::free(vIndex); + return; +} + +template +Topo::~Topo() +{ + ANTLR_UINT32 i; + + // Free the result vector + // + if (m_sorted != NULL) + { + AllocPolicyType::free(m_sorted); + } + + // Free the visited map + // + if (m_visited != NULL) + { + delete m_visited; + } + + // Free any edgemaps + // + if (m_edges != NULL) + { + Bitset* edgeList; + + for (i=0; i + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** The definition of an ANTLR3 common token structure, which all implementations + * of a token stream should provide, installing any further structures in the + * custom pointer element of this structure. + * + * \remark + * Token streams are in essence provided by lexers or other programs that serve + * as lexers. + */ + +template +class CommonToken : public ImplTraits::AllocPolicyType +{ +public: + /* Base token types, which all lexer/parser tokens come after in sequence. + */ + enum TOKEN_TYPE + { + /** Indicator of an invalid token + */ + TOKEN_INVALID = 0 + , EOR_TOKEN_TYPE + /** Imaginary token type to cause a traversal of child nodes in a tree parser + */ + , TOKEN_DOWN + /** Imaginary token type to signal the end of a stream of child nodes. + */ + , TOKEN_UP + /** First token that can be used by users/generated code + */ + , MIN_TOKEN_TYPE = TOKEN_UP + 1 + + /** End of file token + */ + , TOKEN_EOF = (ANTLR_CHARSTREAM_EOF & 0xFFFFFFFF) + }; + + typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::InputStreamType InputStreamType; + typedef typename ImplTraits::StreamDataType StreamDataType; + +private: + /** The actual type of this token + */ + ANTLR_UINT32 m_type; + + /** The virtual channel that this token exists in. + */ + ANTLR_UINT32 m_channel; + + mutable StringType m_tokText; + + /** The offset into the input stream that the line in which this + * token resides starts. + */ + const StreamDataType* m_lineStart; + + /** The line number in the input stream where this token was derived from + */ + ANTLR_UINT32 m_line; + + /** The character position in the line that this token was derived from + */ + ANTLR_INT32 m_charPositionInLine; + + /** Pointer to the input stream that this token originated in. + */ + InputStreamType* m_input; + + /** What the index of this token is, 0, 1, .., n-2, n-1 tokens + */ + ANTLR_MARKER m_index; + + /** The character offset in the input stream where the text for this token + * starts. + */ + ANTLR_MARKER m_startIndex; + + /** The character offset in the input stream where the text for this token + * stops. + */ + ANTLR_MARKER m_stopIndex; + +public: + CommonToken(); + CommonToken(ANTLR_UINT32 type); + CommonToken(TOKEN_TYPE type); + CommonToken( const CommonToken& ctoken ); + + CommonToken& operator=( const CommonToken& ctoken ); + bool operator==( const CommonToken& ctoken ) const; + bool operator<( const CommonToken& ctoken ) const; + + InputStreamType* get_input() const; + ANTLR_MARKER get_index() const; + void set_index( ANTLR_MARKER index ); + void set_input( InputStreamType* input ); + + /* ============================== + * API + */ + + /** Function that returns the text pointer of a token, use + * toString() if you want a pANTLR3_STRING version of the token. + */ + StringType getText() const; + + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ + void set_tokText( const StringType& text ); + + /** Pointer to a function that 'might' be able to set the text associated + * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually + * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have + * strings associated with them but just point into the current input stream. These + * tokens will implement this function with a function that errors out (probably + * drastically. + */ + void setText(ANTLR_UINT8* text); + void setText(const char* text); + + /** Pointer to a function that returns the token type of this token + */ + ANTLR_UINT32 get_type() const; + ANTLR_UINT32 getType() const; + + /** Pointer to a function that sets the type of this token + */ + void set_type(ANTLR_UINT32 ttype); + + /** Pointer to a function that gets the 'line' number where this token resides + */ + ANTLR_UINT32 get_line() const; + + /** Pointer to a function that sets the 'line' number where this token reside + */ + void set_line(ANTLR_UINT32 line); + + /** Pointer to a function that gets the offset in the line where this token exists + */ + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_INT32 getCharPositionInLine() const; + + /** Pointer to a function that sets the offset in the line where this token exists + */ + void set_charPositionInLine(ANTLR_INT32 pos); + + /** Pointer to a function that gets the channel that this token was placed in (parsers + * can 'tune' to these channels. + */ + ANTLR_UINT32 get_channel() const; + + /** Pointer to a function that sets the channel that this token should belong to + */ + void set_channel(ANTLR_UINT32 channel); + + /** Pointer to a function that returns an index 0...n-1 of the token in the token + * input stream. + */ + ANTLR_MARKER get_tokenIndex() const; + + /** Pointer to a function that can set the token index of this token in the token + * input stream. + */ + void set_tokenIndex(ANTLR_MARKER tokenIndex); + + /** Pointer to a function that gets the start index in the input stream for this token. + */ + ANTLR_MARKER get_startIndex() const; + + /** Pointer to a function that sets the start index in the input stream for this token. + */ + void set_startIndex(ANTLR_MARKER index); + + /** Pointer to a function that gets the stop index in the input stream for this token. + */ + ANTLR_MARKER get_stopIndex() const; + + /** Pointer to a function that sets the stop index in the input stream for this token. + */ + void set_stopIndex(ANTLR_MARKER index); + const StreamDataType* get_lineStart() const; + void set_lineStart( const StreamDataType* lineStart ); + + /** Pointer to a function that returns this token as a text representation that can be + * printed with embedded control codes such as \n replaced with the printable sequence "\\n" + * This also yields a string structure that can be used more easily than the pointer to + * the input stream in certain situations. + */ + StringType toString() const; + +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3commontoken.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3commontoken.inl b/runtime/Cpp/include/antlr3commontoken.inl new file mode 100755 index 000000000..87194dca1 --- /dev/null +++ b/runtime/Cpp/include/antlr3commontoken.inl @@ -0,0 +1,322 @@ +ANTLR_BEGIN_NAMESPACE() + +template +CommonToken::CommonToken() +{ + m_type = 0; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template +CommonToken::CommonToken(ANTLR_UINT32 type) +{ + m_type = type; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template +CommonToken::CommonToken(TOKEN_TYPE type) +{ + m_type = type; + m_channel = 0; + m_lineStart = NULL; + m_line = 0; + m_charPositionInLine = 0; + m_input = NULL; + m_index = 0; + m_startIndex = 0; + m_stopIndex = 0; +} + +template +CommonToken::CommonToken( const CommonToken& ctoken ) + :m_tokText( ctoken.m_tokText ) +{ + m_type = ctoken.m_type; + m_channel = ctoken.m_channel; + m_lineStart = ctoken.m_lineStart; + m_line = ctoken.m_line; + m_charPositionInLine = ctoken.m_charPositionInLine; + m_input = ctoken.m_input; + m_index = ctoken.m_index; + m_startIndex = ctoken.m_startIndex; + m_stopIndex = ctoken.m_stopIndex; +} + +template +CommonToken& CommonToken::operator=( const CommonToken& ctoken ) +{ + m_type = ctoken.m_type; + m_channel = ctoken.m_channel; + m_lineStart = ctoken.m_lineStart; + m_line = ctoken.m_line; + m_charPositionInLine = ctoken.m_charPositionInLine; + m_input = ctoken.m_input; + m_index = ctoken.m_index; + m_startIndex = ctoken.m_startIndex; + m_stopIndex = ctoken.m_stopIndex; + + m_tokText = ctoken.m_tokText; + return *this; +} + +template +ANTLR_INLINE bool CommonToken::operator<( const CommonToken& ctoken ) const +{ + return (m_index < ctoken.m_index); +} + +template +bool CommonToken::operator==( const CommonToken& ctoken ) const +{ + return ( (m_type == ctoken.m_type) && + (m_channel == ctoken.m_channel) && + (m_lineStart == ctoken.m_lineStart) && + (m_line == ctoken.m_line) && + (m_charPositionInLine == ctoken.m_charPositionInLine) && + (m_input == ctoken.m_input) && + (m_index == ctoken.m_index) && + (m_startIndex == ctoken.m_startIndex) && + (m_stopIndex == ctoken.m_stopIndex) ); +} + +template +ANTLR_INLINE typename CommonToken::InputStreamType* CommonToken::get_input() const +{ + return m_input; +} + +template +ANTLR_INLINE ANTLR_MARKER CommonToken::get_index() const +{ + return m_index; +} + +template +ANTLR_INLINE void CommonToken::set_index( ANTLR_MARKER index ) +{ + m_index = index; +} + +template +void CommonToken::set_input( InputStreamType* input ) +{ + m_input = input; +} + +template +typename CommonToken::StringType CommonToken::getText() const +{ + if ( !m_tokText.empty() ) + return m_tokText; + + // EOF is a special case + // + if ( m_type == TOKEN_EOF) + { + m_tokText = ""; + return m_tokText; + } + + // We had nothing installed in the token, create a new string + // from the input stream + // + if (m_input != NULL) + return m_input->substr( this->get_startIndex(), this->get_stopIndex() ); + + // Nothing to return, there is no input stream + // + return ""; +} + +template +ANTLR_INLINE void CommonToken::set_tokText( const StringType& text ) +{ + m_tokText = text; +} + +template +ANTLR_INLINE void CommonToken::setText(ANTLR_UINT8* text) +{ + if( text == NULL ) + m_tokText.clear(); + else + m_tokText = (const char*) text; +} + +template +ANTLR_INLINE void CommonToken::setText(const char* text) +{ + if( text == NULL ) + m_tokText.clear(); + else + m_tokText = (const char*) text; +} + +template +ANTLR_INLINE ANTLR_UINT32 CommonToken::get_type() const +{ + return m_type; +} + +template +ANTLR_INLINE ANTLR_UINT32 CommonToken::getType() const +{ + return m_type; +} + +template +ANTLR_INLINE void CommonToken::set_type(ANTLR_UINT32 ttype) +{ + m_type = ttype; +} + +template +ANTLR_INLINE ANTLR_UINT32 CommonToken::get_line() const +{ + return m_line; +} + +template +ANTLR_INLINE void CommonToken::set_line(ANTLR_UINT32 line) +{ + m_line = line; +} + +template +ANTLR_INLINE ANTLR_INT32 CommonToken::get_charPositionInLine() const +{ + return m_charPositionInLine; +} + +template +ANTLR_INLINE ANTLR_INT32 CommonToken::getCharPositionInLine() const +{ + return this->get_charPositionInLine(); +} + +template +ANTLR_INLINE void CommonToken::set_charPositionInLine(ANTLR_INT32 pos) +{ + m_charPositionInLine = pos; +} + +template +ANTLR_INLINE ANTLR_UINT32 CommonToken::get_channel() const +{ + return m_channel; +} + +template +ANTLR_INLINE void CommonToken::set_channel(ANTLR_UINT32 channel) +{ + m_channel = channel; +} + +template +ANTLR_INLINE ANTLR_MARKER CommonToken::get_tokenIndex() const +{ + return m_index; +} + +template +ANTLR_INLINE void CommonToken::set_tokenIndex(ANTLR_MARKER tokenIndex) +{ + m_index = tokenIndex; +} + +template +ANTLR_INLINE ANTLR_MARKER CommonToken::get_startIndex() const +{ + return (m_startIndex == -1) ? (ANTLR_MARKER)(m_input->get_data()) : m_startIndex; +} + +template +ANTLR_INLINE void CommonToken::set_startIndex(ANTLR_MARKER index) +{ + m_startIndex = index; +} + +template +ANTLR_INLINE ANTLR_MARKER CommonToken::get_stopIndex() const +{ + return m_stopIndex; +} + +template +ANTLR_INLINE void CommonToken::set_stopIndex(ANTLR_MARKER index) +{ + m_stopIndex = index; +} + +template +ANTLR_INLINE const typename CommonToken::StreamDataType* CommonToken::get_lineStart() const +{ + return m_lineStart; +} + +template +ANTLR_INLINE void CommonToken::set_lineStart( const StreamDataType* lineStart ) +{ + m_lineStart = lineStart; +} + +template +typename CommonToken::StringType CommonToken::toString() const +{ + StringType text; + typedef typename ImplTraits::StringStreamType StringStreamType; + StringStreamType outtext; + + text = this->getText(); + + if (text.empty()) + return ""; + + /* Now we use our handy dandy string utility to assemble the + * the reporting string + * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + */ + outtext << "[Index: "; + outtext << (int)this->get_tokenIndex(); + outtext << " (Start: "; + outtext << (int)this->get_startIndex(); + outtext << "-Stop: "; + outtext << (int)this->get_stopIndex(); + outtext << ") ='"; + outtext << text; + outtext << "', type<"; + outtext << (int)m_type; + outtext << "> "; + + if (this->get_channel() > TOKEN_DEFAULT_CHANNEL) + { + outtext << "(channel = "; + outtext << (int)this->get_channel(); + outtext << ") "; + } + + outtext << "Line: "; + outtext << (int)this->get_line(); + outtext << " LinePos:"; + outtext << (int)this->get_charPositionInLine(); + outtext << "]"; + + return outtext.str(); +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3commontree.hpp b/runtime/Cpp/include/antlr3commontree.hpp new file mode 100755 index 000000000..39096f8a0 --- /dev/null +++ b/runtime/Cpp/include/antlr3commontree.hpp @@ -0,0 +1,139 @@ +/** Interface for an ANTLR3 common tree which is what gets + * passed around by the AST producing parser. + */ + +#ifndef _ANTLR3_COMMON_TREE_HPP +#define _ANTLR3_COMMON_TREE_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +template +class CommonTree : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeType TreeType; + typedef CommonTree TokenType; + typedef typename AllocPolicyType::template VectorType ChildrenType; + typedef typename AllocPolicyType::template ListType ChildListType; + +private: + /// The list of all the children that belong to this node. They are not part of the node + /// as they belong to the common tree node that implements this. + /// + ChildrenType m_children; + + /// This is used to store the current child index position while descending + /// and ascending trees as the tree walk progresses. + /// + ANTLR_MARKER m_savedIndex; + + /// Start token index that encases this tree + /// + ANTLR_MARKER m_startIndex; + + /// End token that encases this tree + /// + ANTLR_MARKER m_stopIndex; + + /// A single token, this is the payload for the tree + /// + CommonTokenType* m_token; + + /// Points to the node that has this node as a child. + /// If this is NULL, then this is the root node. + /// + CommonTree* m_parent; + + /// What index is this particular node in the child list it + /// belongs to? + /// + ANTLR_INT32 m_childIndex; + +public: + CommonTree(); + CommonTree( CommonTokenType* token ); + CommonTree( CommonTree* token ); + CommonTree( const CommonTree& ctree ); + + TokenType* get_token() const; + ChildrenType& get_children(); + const ChildrenType& get_children() const; + ChildrenType* get_children_p(); + ANTLR_INT32 get_childIndex() const; + TreeType* get_parent() const; + + void set_parent( TreeType* parent); + void set_childIndex( ANTLR_INT32 ); + + void addChild(TreeType* child); + /// Add all elements of the supplied list as children of this node + /// + void addChildren(const ChildListType& kids); + void createChildrenList(); + TreeType* deleteChild(ANTLR_UINT32 i); + /// Delete children from start to stop and replace with t even if t is + /// a list (nil-root tree). Num of children can increase or decrease. + /// For huge child lists, inserting children can force walking rest of + /// children to set their child index; could be slow. + /// + void replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeType* t); + CommonTree* dupNode() const; + TreeType* dupTree(); + ANTLR_UINT32 getCharPositionInLine(); + TreeType* getChild(ANTLR_UINT32 i); + + ANTLR_UINT32 getChildCount() const; + ANTLR_UINT32 getType(); + TreeType* getFirstChildWithType(ANTLR_UINT32 type); + ANTLR_UINT32 getLine(); + StringType getText(); + bool isNilNode(); + void setChild(ANTLR_UINT32 i, TreeType* child); + StringType toStringTree(); + StringType toString(); + void freshenPACIndexesAll(); + void freshenPACIndexes(ANTLR_UINT32 offset); + void reuse(); + ~CommonTree(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3commontree.inl" + +#endif + + diff --git a/runtime/Cpp/include/antlr3commontree.inl b/runtime/Cpp/include/antlr3commontree.inl new file mode 100755 index 000000000..8a3111bb7 --- /dev/null +++ b/runtime/Cpp/include/antlr3commontree.inl @@ -0,0 +1,565 @@ +ANTLR_BEGIN_NAMESPACE() + +template +CommonTree::CommonTree() +{ + m_savedIndex = 0; + m_startIndex = 0; + m_stopIndex = 0; + m_token = NULL; + m_parent = NULL; + m_childIndex = 0; +} + +template +CommonTree::CommonTree( const CommonTree& ctree ) + :m_children( ctree.m_children) +{ + m_savedIndex = ctree.m_savedIndex; + m_startIndex = ctree.m_startIndex; + m_stopIndex = ctree.m_stopIndex; + m_token = ctree.m_token; + m_parent = ctree.m_parent; + m_childIndex = ctree.m_childIndex; +} + +template +CommonTree::CommonTree( CommonTokenType* token ) +{ + m_savedIndex = 0; + m_startIndex = 0; + m_stopIndex = 0; + m_token = token; + m_parent = NULL; + m_childIndex = 0; +} + +template +CommonTree::CommonTree( CommonTree* tree ) +{ + m_savedIndex = 0; + m_startIndex = 0; + m_stopIndex = 0; + m_token = tree->get_token(); + m_parent = NULL; + m_childIndex = 0; +} + +template +typename CommonTree::TokenType* CommonTree::get_token() const +{ + return m_token; +} + +template +typename CommonTree::ChildrenType& CommonTree::get_children() +{ + return m_children; +} + +template +const typename CommonTree::ChildrenType& CommonTree::get_children() const +{ + return m_children; +} + +template +typename CommonTree::ChildrenType* CommonTree::get_children_p() +{ + return &m_children; +} + +template +void CommonTree::addChild(TreeType* child) +{ + ANTLR_UINT32 n; + ANTLR_UINT32 i; + + if (child == NULL) + return; + + ChildrenType& child_children = child->get_children(); + ChildrenType& tree_children = this->get_children(); + + if (child->isNilNode() == true) + { + if ( !child_children.empty() && child_children == tree_children ) + { + // TODO: Change to exception rather than ANTLR3_FPRINTF? + // + fprintf(stderr, "ANTLR3: An attempt was made to add a child list to itself!\n"); + return; + } + + // Add all of the children's children to this list + // + if ( !child_children.empty() ) + { + if (tree_children.empty()) + { + // We are build ing the tree structure here, so we need not + // worry about duplication of pointers as the tree node + // factory will only clean up each node once. So we just + // copy in the child's children pointer as the child is + // a nil node (has not root itself). + // + tree_children.swap( child_children ); + this->freshenPACIndexesAll(); + } + else + { + // Need to copy the children + // + n = child_children.size(); + + for (i = 0; i < n; i++) + { + TreeType* entry; + entry = child_children[i]; + + // ANTLR3 lists can be sparse, unlike Array Lists + // + if (entry != NULL) + { + tree_children.push_back(entry); + } + } + } + } + } + else + { + // Tree we are adding is not a Nil and might have children to copy + // + if (tree_children.empty()) + { + // No children in the tree we are adding to, so create a new list on + // the fly to hold them. + // + this->createChildrenList(); + } + tree_children.push_back( child ); + } +} + +template +void CommonTree::addChildren(const ChildListType& kids) +{ + for( typename ChildListType::const_iterator iter = kids.begin(); + iter != kids.end(); ++iter ) + { + this->addChild( *iter ); + } +} + +//dummy one, as vector is always there +template +void CommonTree::createChildrenList() +{ +} + +template +typename CommonTree::TreeType* CommonTree::deleteChild(ANTLR_UINT32 i) +{ + if( m_children.empty() ) + return NULL; + + return m_children.erase( m_children.begin() + i); +} + +template +void CommonTree::replaceChildren(ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeType* newTree) +{ + ANTLR_INT32 replacingHowMany; // How many nodes will go away + ANTLR_INT32 replacingWithHowMany; // How many nodes will replace them + ANTLR_INT32 numNewChildren; // Tracking variable + ANTLR_INT32 delta; // Difference in new vs existing count + + ANTLR_INT32 i; + ANTLR_INT32 j; + + if ( m_children.empty() ) + { + fprintf(stderr, "replaceChildren call: Indexes are invalid; no children in list for %s", this->getText().c_str() ); + return; + } + + // Either use the existing list of children in the supplied nil node, or build a vector of the + // tree we were given if it is not a nil node, then we treat both situations exactly the same + // + ChildrenType newChildren_temp; + ChildrenType* newChildren; // Iterator for whatever we are going to add in + + if (newTree->isNilNode()) + { + newChildren = newTree->get_children_p(); + } + else + { + newChildren = &newChildren_temp; + newChildren->push_back(newTree); + } + + // Initialize + // + replacingHowMany = stopChildIndex - startChildIndex + 1; + replacingWithHowMany = newChildren->size(); + delta = replacingHowMany - replacingWithHowMany; + numNewChildren = newChildren->size(); + + // If it is the same number of nodes, then do a direct replacement + // + if (delta == 0) + { + TreeType* child; + + // Same number of nodes + // + j = 0; + for (i = startChildIndex; i <= stopChildIndex; i++) + { + child = newChildren->at(j); + ChildrenType& parent_children = this->get_children(); + parent_children[i] = child; + child->setParent(this); + child->setChildIndex(i); + } + } + else if (delta > 0) + { + ANTLR_UINT32 indexToDelete; + + // Less nodes than there were before + // reuse what we have then delete the rest + // + ChildrenType& parent_children = this->get_children(); + for (j = 0; j < numNewChildren; j++) + { + parent_children[ startChildIndex + j ] = newChildren->at(j); + } + + // We just delete the same index position until done + // + indexToDelete = startChildIndex + numNewChildren; + + for (j = indexToDelete; j <= stopChildIndex; j++) + { + parent_children.erase( parent_children.begin() + indexToDelete); + } + + this->freshenPACIndexes(startChildIndex); + } + else + { + ChildrenType& parent_children = this->get_children(); + ANTLR_UINT32 numToInsert; + + // More nodes than there were before + // Use what we can, then start adding + // + for (j = 0; j < replacingHowMany; j++) + { + parent_children[ startChildIndex + j ] = newChildren->at(j); + } + + numToInsert = replacingWithHowMany - replacingHowMany; + + for (j = replacingHowMany; j < replacingWithHowMany; j++) + { + parent_children.push_back( newChildren->at(j) ); + } + + this->freshenPACIndexes(startChildIndex); + } +} + +template +CommonTree* CommonTree::dupNode() const +{ + // The node we are duplicating is in fact the common tree (that's why we are here) + // so we use the super pointer to duplicate. + // + TreeType* clone = new TreeType(); + + // The pointer we return is the base implementation of course + // + clone->set_token( m_token ); + return clone; +} + +template +typename CommonTree::TreeType* CommonTree::dupTree() +{ + TreeType* newTree; + ANTLR_UINT32 i; + ANTLR_UINT32 s; + + newTree = this->dupNode(); + + if ( !m_children.empty() ) + { + s = m_children.size(); + + for (i = 0; i < s; i++) + { + TreeType* t; + TreeType* newNode; + + t = m_children[i]; + + if (t!= NULL) + { + newNode = t->dupTree(); + newTree->addChild(newNode); + } + } + } + + return newTree; +} + +template +ANTLR_UINT32 CommonTree::getCharPositionInLine() +{ + CommonTokenType* token; + token = m_token; + + if (token == NULL || (token->getCharPositionInLine() == -1) ) + { + if (this->getChildCount() > 0) + { + TreeType* child; + + child = this->getChild(0); + + return child->getCharPositionInLine(); + } + return 0; + } + return token->getCharPositionInLine(); +} + +template +typename CommonTree::TreeType* CommonTree::getChild(ANTLR_UINT32 i) +{ + if ( m_children.empty() + || i >= m_children.size() ) + { + return NULL; + } + return m_children[i]; + +} + +template +void CommonTree::set_childIndex( ANTLR_INT32 i) +{ + m_childIndex = i; +} + +template +ANTLR_INT32 CommonTree::get_childIndex() const +{ + return m_childIndex; +} + +template +ANTLR_UINT32 CommonTree::getChildCount() const +{ + return static_cast( m_children.size() ); +} + +template +typename CommonTree::TreeType* CommonTree::get_parent() const +{ + return m_parent; +} + +template +void CommonTree::set_parent( TreeType* parent) +{ + m_parent = parent; +} + +template +ANTLR_UINT32 CommonTree::getType() +{ + if (this == NULL) + { + return 0; + } + else + { + return m_token->getType(); + } +} + +template +typename CommonTree::TreeType* CommonTree::getFirstChildWithType(ANTLR_UINT32 type) +{ + ANTLR_UINT32 i; + std::size_t cs; + + TreeType* t; + if ( !m_children.empty() ) + { + cs = m_children.size(); + for (i = 0; i < cs; i++) + { + t = m_children[i]; + if (t->getType() == type) + { + return t; + } + } + } + return NULL; +} + +template +ANTLR_UINT32 CommonTree::getLine() +{ + TreeType* cTree = this; + CommonTokenType* token; + token = cTree->get_token(); + + if (token == NULL || token->getLine() == 0) + { + if ( this->getChildCount() > 0) + { + TreeType* child; + child = this->getChild(0); + return child->getLine(); + } + return 0; + } + return token->getLine(); +} + +template +typename CommonTree::StringType CommonTree::getText() +{ + return this->toString(); +} + +template +bool CommonTree::isNilNode() +{ + // This is a Nil tree if it has no payload (Token in our case) + // + if(m_token == NULL) + { + return true; + } + else + { + return false; + } +} + +template +void CommonTree::setChild(ANTLR_UINT32 i, TreeType* child) +{ + if( m_children.size() >= i ) + m_children.resize(i+1); + m_children[i] = child; +} + +template +typename CommonTree::StringType CommonTree::toStringTree() +{ + StringType string; + ANTLR_UINT32 i; + ANTLR_UINT32 n; + TreeType* t; + + if( m_children.empty() ) + { + return this->toString(); + } + + /* Need a new string with nothing at all in it. + */ + if (this->isNilNode() == false) + { + string.append("("); + string.append(this->toString()); + string.append(" "); + } + if ( m_children != NULL) + { + n = m_children.size(); + + for (i = 0; i < n; i++) + { + t = m_children[i]; + + if (i > 0) + { + string.append(" "); + } + string.append(t->toStringTree()); + } + } + if (this->isNilNode() == false) + { + string.append(")"); + } + + return string; +} + +template +typename CommonTree::StringType CommonTree::toString() +{ + if (this->isNilNode() ) + { + StringType nilNode; + + nilNode = "nil"; + + return nilNode; + } + + return m_token->getText(); +} + +template +void CommonTree::freshenPACIndexesAll() +{ + this->freshenPACIndexes(0); +} + +template +void CommonTree::freshenPACIndexes(ANTLR_UINT32 offset) +{ + ANTLR_UINT32 count; + ANTLR_UINT32 c; + + count = this->getChildCount(); // How many children do we have + + // Loop from the supplied index and set the indexes and parent + // + for (c = offset; c < count; c++) + { + TreeType* child; + + child = this->getChild(c); + + child->setChildIndex(c); + child->setParent(this); + } +} + +template +void CommonTree::reuse() +{ + delete this; //memory re-use should be taken by the library user +} + +template +CommonTree::~CommonTree() +{ +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3commontreeadaptor.hpp b/runtime/Cpp/include/antlr3commontreeadaptor.hpp new file mode 100755 index 000000000..8b40f1c6d --- /dev/null +++ b/runtime/Cpp/include/antlr3commontreeadaptor.hpp @@ -0,0 +1,163 @@ +/** \file + * Definition of the ANTLR3 common tree adaptor. + */ + +#ifndef _ANTLR3_COMMON_TREE_ADAPTOR_HPP +#define _ANTLR3_COMMON_TREE_ADAPTOR_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +template +class CommonTreeAdaptor : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType TokenType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::DebugEventListenerType DebuggerType; + +public: + //The parameter is there only to provide uniform constructor interface + CommonTreeAdaptor(DebuggerType* dbg = NULL); + TreeType* nilNode(); + TreeType* dupTree( TreeType* tree); + TreeType* dupTreeTT( TreeType* t, TreeType* tree); + + void addChild( TreeType* t, TreeType* child); + void addChildToken( TreeType* t, CommonTokenType* child); + void setParent( TreeType* child, TreeType* parent); + TreeType* getParent( TreeType* child); + + TreeType* errorNode( CommonTokenType* tnstream, CommonTokenType* startToken, CommonTokenType* stopToken); + bool isNilNode( TreeType* t); + + TreeType* becomeRoot( TreeType* newRoot, TreeType* oldRoot); + TreeType* rulePostProcessing( TreeType* root); + + TreeType* becomeRootToken(CommonTokenType* newRoot, TreeType* oldRoot); + + TreeType* create( CommonTokenType* payload); + TreeType* createTypeToken( ANTLR_UINT32 tokenType, CommonTokenType* fromToken); + TreeType* createTypeTokenText ( ANTLR_UINT32 tokenType, CommonTokenType* fromToken, const ANTLR_UINT8* text); + TreeType* createTypeText ( ANTLR_UINT32 tokenType, const ANTLR_UINT8* text); + + TreeType* dupNode( TreeType* treeNode); + ANTLR_UINT32 getType( TreeType* t); + StringType getText( TreeType* t); + + TreeType* getChild( TreeType* t, ANTLR_UINT32 i); + void setChild( TreeType* t, ANTLR_UINT32 i, TreeType* child); + void deleteChild( TreeType* t, ANTLR_UINT32 i); + void setChildIndex( TreeType* t, ANTLR_INT32 i); + ANTLR_INT32 getChildIndex( TreeType* t); + + ANTLR_UINT32 getChildCount( TreeType*); + ANTLR_UINT64 getUniqueID( TreeType*); + + CommonTokenType* createToken( ANTLR_UINT32 tokenType, const ANTLR_UINT8* text); + CommonTokenType* createTokenFromToken( CommonTokenType* fromToken); + CommonTokenType* getToken( TreeType* t); + + void setTokenBoundaries( TreeType* t, CommonTokenType* startToken, CommonTokenType* stopToken); + ANTLR_MARKER getTokenStartIndex( TreeType* t); + ANTLR_MARKER getTokenStopIndex( TreeType* t); + + /// Produce a DOT (see graphviz freeware suite) from a base tree + /// + StringType makeDot( TreeType* theTree); + + /// Replace from start to stop child index of parent with t, which might + /// be a list. Number of children may be different + /// after this call. + /// + /// If parent is null, don't do anything; must be at root of overall tree. + /// Can't replace whatever points to the parent externally. Do nothing. + /// + void replaceChildren( TreeType* parent, ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, TreeType* t); + + ~CommonTreeAdaptor(); + +protected: + void defineDotNodes(TreeType* t, const StringType& dotSpec); + void defineDotEdges(TreeType* t, const StringType& dotSpec); +}; + +//If someone can override the CommonTreeAdaptor at the compile time, that will be +//inherited here. Still you can choose to override the DebugTreeAdaptor, if you wish to +//change the DebugTreeAdaptor +template +class DebugTreeAdaptor : public ImplTraits::CommonTreeAdaptorType +{ +public: + //DebugEventListener implements functionality through virtual functions + //the template parameter is required for pointing back at the adaptor + typedef typename ImplTraits::DebugEventListener DebuggerType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +private: + /// If set to something other than NULL, then this structure is + /// points to an instance of the debugger interface. In general, the + /// debugger is only referenced internally in recovery/error operations + /// so that it does not cause overhead by having to check this pointer + /// in every function/method + /// + DebuggerType* m_debugger; + +public: + DebugTreeAdaptor( DebuggerType* debugger ); + void setDebugEventListener( DebuggerType* debugger); + TreeType* nilNode(); + void addChild(TreeType* t, TreeType* child); + void addChildToken(TreeType* t, CommonTokenType* child); + TreeType* becomeRoot( TreeType* newRootTree, TreeType* oldRootTree ); + TreeType* becomeRootToken(TreeType* newRoot, TreeType* oldRoot); + TreeType* createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken); + TreeType* createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text); + TreeType* createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text); + TreeType* dupTree( TreeType* tree); + + /// Sends the required debugging events for duplicating a tree + /// to the debugger. + /// + void simulateTreeConstruction(TreeType* tree); +}; + + +ANTLR_END_NAMESPACE() + +#include "antlr3commontreeadaptor.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3commontreeadaptor.inl b/runtime/Cpp/include/antlr3commontreeadaptor.inl new file mode 100755 index 000000000..698c7d73e --- /dev/null +++ b/runtime/Cpp/include/antlr3commontreeadaptor.inl @@ -0,0 +1,801 @@ +ANTLR_BEGIN_NAMESPACE() + +template +ANTLR_INLINE CommonTreeAdaptor::CommonTreeAdaptor(DebuggerType*) +{ +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::nilNode() +{ + return this->create(NULL); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::dupTree( TreeType* tree) +{ + return this->dupTreeTT(tree, NULL); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::dupTreeTT( TreeType* t, TreeType* parent) +{ + TreeType* newTree; + TreeType* child; + TreeType* newSubTree; + ANTLR_UINT32 n; + ANTLR_UINT32 i; + + if (t == NULL) + return NULL; + + newTree = t->dupNode(); + + // Ensure new subtree root has parent/child index set + // + this->setChildIndex( newTree, t->getChildIndex() ); + this->setParent(newTree, parent); + n = this->getChildCount(t); + + for (i=0; i < n; i++) + { + child = this->getChild(t, i); + newSubTree = this->dupTreeTT(child, t); + this->addChild(newTree, newSubTree); + } + return newTree; +} + +template +void CommonTreeAdaptor::addChild( TreeType* t, TreeType* child) +{ + if (t != NULL && child != NULL) + { + t->addChild(child); + } +} + +template +void CommonTreeAdaptor::addChildToken( TreeType* t, CommonTokenType* child) +{ + if (t != NULL && child != NULL) + { + this->addChild(t, this->create(child)); + } +} + +template +void CommonTreeAdaptor::setParent( TreeType* child, TreeType* parent) +{ + child->setParent(parent); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::getParent( TreeType* child) +{ + return child->getParent(); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::errorNode( CommonTokenType* tnstream, CommonTokenType* startToken, CommonTokenType* stopToken) +{ + // Use the supplied common tree node stream to get another tree from the factory + // TODO: Look at creating the erronode as in Java, but this is complicated by the + // need to track and free the memory allocated to it, so for now, we just + // want something in the tree that isn't a NULL pointer. + // + return this->createTypeText( CommonTokenType::TOKEN_INVALID, "Tree Error Node"); + +} + +template +bool CommonTreeAdaptor::isNilNode( TreeType* t) +{ + return t->isNilNode(); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::becomeRoot( TreeType* newRootTree, TreeType* oldRootTree) +{ + TreeType* saveRoot; + + /* Protect against tree rewrites if we are in some sort of error + * state, but have tried to recover. In C we can end up with a null pointer + * for a tree that was not produced. + */ + if (newRootTree == NULL) + { + return oldRootTree; + } + + /* root is just the new tree as is if there is no + * current root tree. + */ + if (oldRootTree == NULL) + { + return newRootTree; + } + + /* Produce ^(nil real-node) + */ + if (newRootTree->isNilNode()) + { + if (newRootTree->getChildCount() > 1) + { + /* TODO: Handle tree exceptions + */ + fprintf(stderr, "More than one node as root! TODO: Create tree exception handling\n"); + return newRootTree; + } + + /* The new root is the first child, keep track of the original newRoot + * because if it was a Nil Node, then we can reuse it now. + */ + saveRoot = newRootTree; + newRootTree = newRootTree->getChild(0); + + // Reclaim the old nilNode() + // + saveRoot->reuse(); + } + + /* Add old root into new root. addChild takes care of the case where oldRoot + * is a flat list (nill rooted tree). All children of oldroot are added to + * new root. + */ + newRootTree->addChild(oldRootTree); + + // If the oldroot tree was a nil node, then we know at this point + // it has become orphaned by the rewrite logic, so we tell it to do + // whatever it needs to do to be reused. + // + if (oldRootTree->isNilNode()) + { + // We have taken an old Root Tree and appended all its children to the new + // root. In addition though it was a nil node, which means the generated code + // will not reuse it again, so we will reclaim it here. First we want to zero out + // any pointers it was carrying around. We are just the baseTree handler so we + // don't know necessarilly know how to do this for the real node, we just ask the tree itself + // to do it. + // + oldRootTree->reuse(); + } + /* Always returns new root structure + */ + return newRootTree; +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::becomeRootToken(CommonTokenType* newRoot, TreeType* oldRoot) +{ + return this->becomeRoot(this->create(newRoot), oldRoot); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::create( CommonTokenType* payload) +{ + return new TreeType(payload); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::createTypeToken( ANTLR_UINT32 tokenType, + CommonTokenType* fromToken) +{ + /* Create the new token + */ + fromToken = this->createTokenFromToken(fromToken); + + /* Set the type of the new token to that supplied + */ + fromToken->setType(tokenType); + + /* Return a new node based upon this token + */ + return this->create(fromToken); + +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::createTypeTokenText( ANTLR_UINT32 tokenType, CommonTokenType* fromToken, const ANTLR_UINT8* text) +{ + /* Create the new token + */ + fromToken = this->createTokenFromToken(fromToken); + + /* Set the type of the new token to that supplied + */ + fromToken->setType(tokenType); + + /* Set the text of the token accordingly + */ + fromToken->setText(text); + + /* Return a new node based upon this token + */ + return this->create(fromToken); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::createTypeText( ANTLR_UINT32 tokenType, const ANTLR_UINT8* text) +{ + CommonTokenType* fromToken; + + /* Create the new token + */ + fromToken = this->createToken(tokenType, text); + + /* Return a new node based upon this token + */ + return this->create(fromToken); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::dupNode( TreeType* treeNode) +{ + return (treeNode == NULL) ? NULL : treeNode->dupNode(); +} + +template +ANTLR_UINT32 CommonTreeAdaptor::getType( TreeType* t) +{ + return t->getType(); +} + +template +typename CommonTreeAdaptor::StringType CommonTreeAdaptor::getText( TreeType* t) +{ + return t->getText(); +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::getChild( TreeType* t, ANTLR_UINT32 i) +{ + return t->getChild(i); +} + +template +void CommonTreeAdaptor::setChild( TreeType* t, ANTLR_UINT32 i, TreeType* child) +{ + t->setChild(i, child); +} + +template +void CommonTreeAdaptor::deleteChild( TreeType* t, ANTLR_UINT32 i) +{ + t->deleteChild(i); +} + +template +void CommonTreeAdaptor::setChildIndex( TreeType* t, ANTLR_INT32 i) +{ + t->setChildIndex(i); +} + +template +ANTLR_INT32 CommonTreeAdaptor::getChildIndex( TreeType * t) +{ + return t->getChildIndex(); +} + +template +ANTLR_UINT32 CommonTreeAdaptor::getChildCount( TreeType* t) +{ + return t->getChildCount(); +} + +template +ANTLR_UINT64 CommonTreeAdaptor::getUniqueID( TreeType* node ) +{ + return reinterpret_cast(node); +} + +template +typename CommonTreeAdaptor::CommonTokenType* + CommonTreeAdaptor::createToken( ANTLR_UINT32 tokenType, const ANTLR_UINT8* text) +{ + CommonTokenType* newToken = new CommonTokenType; + + if (newToken != NULL) + { + newToken->set_tokText( (const char*) text ); + newToken->setType(tokenType); + } + return newToken; + +} + +template +typename CommonTreeAdaptor::CommonTokenType* + CommonTreeAdaptor::createTokenFromToken( CommonTokenType* fromToken) +{ + CommonTokenType* newToken; + + newToken = new CommonTokenType; + + if (newToken != NULL) + { + // Create the text using our own string factory to avoid complicating + // commontoken. + // + StringType text = fromToken->getText(); + newToken->set_tokText( text ); + newToken->setLine( fromToken->getLine() ); + newToken->setTokenIndex( fromToken->getTokenIndex() ); + newToken->setCharPositionInLine( fromToken->getCharPositionInLine() ); + newToken->setChannel( fromToken->getChannel() ); + newToken->setType( fromToken->getType() ); + } + + return newToken; +} + +template +typename CommonTreeAdaptor::CommonTokenType* + CommonTreeAdaptor::getToken( TreeType* t) +{ + return t->getToken(); +} + +template +void CommonTreeAdaptor::setTokenBoundaries( TreeType* t, CommonTokenType* startToken, CommonTokenType* stopToken) +{ + ANTLR_MARKER start; + ANTLR_MARKER stop; + + TreeType* ct; + + if (t == NULL) + { + return; + } + + if ( startToken != NULL) + { + start = startToken->getTokenIndex(); + } + else + { + start = 0; + } + + if ( stopToken != NULL) + { + stop = stopToken->getTokenIndex(); + } + else + { + stop = 0; + } + + ct = t; + + ct->set_startIndex(start); + ct->set_stopIndex(stop); +} + +template +ANTLR_MARKER CommonTreeAdaptor::getTokenStartIndex( TreeType* t) +{ + return t->get_tokenStartIndex(); +} + +template +ANTLR_MARKER CommonTreeAdaptor::getTokenStopIndex( TreeType* t) +{ + return t->get_tokenStopIndex(); +} + +template +typename CommonTreeAdaptor::StringType CommonTreeAdaptor::makeDot( TreeType* theTree) +{ + // The string we are building up + // + StringType dotSpec; + char buff[64]; + StringType text; + + dotSpec = "digraph {\n\n" + "\tordering=out;\n" + "\tranksep=.4;\n" + "\tbgcolor=\"lightgrey\"; node [shape=box, fixedsize=false, fontsize=12, fontname=\"Helvetica-bold\", fontcolor=\"blue\"\n" + "\twidth=.25, height=.25, color=\"black\", fillcolor=\"white\", style=\"filled, solid, bold\"];\n\n" + "\tedge [arrowsize=.5, color=\"black\", style=\"bold\"]\n\n"; + + if (theTree == NULL) + { + // No tree, so create a blank spec + // + dotSpec->append("n0[label=\"EMPTY TREE\"]\n"); + return dotSpec; + } + + sprintf(buff, "\tn%p[label=\"", theTree); + dotSpec.append(buff); + text = this->getText(theTree); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec->append("\"]\n"); + + // First produce the node defintions + // + this->defineDotNodes(theTree, dotSpec); + dotSpec.append("\n"); + this->defineDotEdges(theTree, dotSpec); + + // Terminate the spec + // + dotSpec.append("\n}"); + + // Result + // + return dotSpec; +} + +template +void CommonTreeAdaptor::replaceChildren( TreeType* parent, ANTLR_INT32 startChildIndex, ANTLR_INT32 stopChildIndex, TreeType* t) +{ + if (parent != NULL) + parent->replaceChildren(startChildIndex, stopChildIndex, t); +} + +template +CommonTreeAdaptor::~CommonTreeAdaptor() +{ +} + +template +void CommonTreeAdaptor::defineDotNodes(TreeType* t, const StringType& dotSpec) +{ + // How many nodes are we talking about? + // + int nCount; + int i; + TreeType* child; + char buff[64]; + StringType text; + int j; + + // Count the nodes + // + nCount = this->getChildCount(t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child of the current tree, define a node using the + // memory address of the node to name it + // + for (i = 0; igetChild(t, i); + + // Name the node + // + sprintf(buff, "\tn%p[label=\"", child); + dotSpec->append(buff); + text = this->getText(child); + for (j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec.append("\"]\n"); + + // And now define the children of this child (if any) + // + this->defineDotNodes(child, dotSpec); + } + + // Done + // + return; +} + +template +void CommonTreeAdaptor::defineDotEdges(TreeType* t, const StringType& dotSpec) +{ + // How many nodes are we talking about? + // + int nCount; + if (t == NULL) + { + // No tree, so do nothing + // + return; + } + + // Count the nodes + // + nCount = this->getChildCount(t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child, define an edge from this parent, then process + // and children of this child in the same way + // + for (int i=0; igetChild(t, i); + + // Create the edge relation + // + sprintf(buff, "\t\tn%p -> n%p\t\t// ", t, child); + + dotSpec.append(buff); + + // Document the relationship + // + text = this->getText(t); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + + dotSpec.append(" -> "); + + text = this->getText(child); + for (std::size_t j = 0; j < text.size(); j++) + { + switch(text[j]) + { + case '"': + dotSpec.append("\\\""); + break; + + case '\n': + dotSpec.append("\\n"); + break; + + case '\r': + dotSpec.append("\\r"); + break; + + default: + dotSpec += text[j]; + break; + } + } + dotSpec.append("\n"); + + // Define edges for this child + // + this->defineDotEdges(child, dotSpec); + } + + // Done + // + return; +} + +template +typename CommonTreeAdaptor::TreeType* CommonTreeAdaptor::rulePostProcessing( TreeType* root) +{ + TreeType* saveRoot; + + // Keep track of the root we are given. If it is a nilNode, then we + // can reuse it rather than orphaning it! + // + saveRoot = root; + + if (root != NULL && root->isNilNode()) + { + if (root->getChildCount() == 0) + { + root = NULL; + } + else if (root->getChildCount() == 1) + { + root = root->getChild(0); + root->setParent(NULL); + root->setChildIndex(-1); + + // The root we were given was a nil node, wiht one child, which means it has + // been abandoned and would be lost in the node factory. However + // nodes can be flagged as resuable to prevent this terrible waste + // + saveRoot->reuse(); + } + } + return root; +} + +template +DebugTreeAdaptor::DebugTreeAdaptor( DebuggerType* debugger ) +{ + m_debugger = debugger; +} + +template +void DebugTreeAdaptor::setDebugEventListener( DebuggerType* debugger) +{ + m_debugger = debugger; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::nilNode() +{ + TreeType* t = this->create(NULL); + m_debugger->createNode(t); + return t; +} + +template +void DebugTreeAdaptor::addChild(TreeType* t, TreeType* child) +{ + if (t != NULL && child != NULL) + { + t->addChild(child); + m_debugger->addChild(t, child); + } +} + +template +void DebugTreeAdaptor::addChildToken(TreeType* t, CommonTokenType* child) +{ + TreeType* tc; + if (t != NULL && child != NULL) + { + tc = this->create(child); + this->addChild(t, tc); + m_debugger->addChild(t, tc); + } +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::becomeRoot( TreeType* newRootTree, TreeType* oldRootTree ) +{ + TreeType* t; + t = this->becomeRoot(newRootTree, oldRootTree); + m_debugger->becomeRoot(newRootTree, oldRootTree); + return t; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::becomeRootToken(TreeType* newRoot, TreeType* oldRoot) +{ + TreeType* t; + t = this->becomeRoot(this->create(newRoot), oldRoot); + m_debugger->becomeRoot(t, oldRoot); + return t; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::createTypeToken(ANTLR_UINT32 tokenType, CommonTokenType* fromToken) +{ + TreeType* t; + t = this->createTypeToken(tokenType, fromToken); + m_debugger->createNode(t); + return t; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::createTypeTokenText(ANTLR_UINT32 tokenType, CommonTokenType* fromToken, ANTLR_UINT8* text) +{ + TreeType* t; + t = this->createTypeTokenText(tokenType, fromToken, text); + m_debugger->createNode(t); + return t; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::createTypeText( ANTLR_UINT32 tokenType, ANTLR_UINT8* text) +{ + TreeType* t; + t = this->createTypeText(tokenType, text); + m_debugger->createNode(t); + return t; +} + +template +typename DebugTreeAdaptor::TreeType* DebugTreeAdaptor::dupTree( TreeType* tree) +{ + TreeType* t; + + // Call the normal dup tree mechanism first + // + t = this->dupTreeTT(tree, NULL); + + // In order to tell the debugger what we have just done, we now + // simulate the tree building mechanism. THis will fire + // lots of debugging events to the client and look like we + // duped the tree.. + // + this->simulateTreeConstruction( t); + + return t; +} + +template +void DebugTreeAdaptor::simulateTreeConstruction(TreeType* tree) +{ + ANTLR_UINT32 n; + ANTLR_UINT32 i; + TreeType* child; + + // Send the create node event + // + m_debugger->createNode(tree); + + n = this->getChildCount(tree); + for (i = 0; i < n; i++) + { + child = this->getChild(tree, i); + this->simulateTreeConstruction(child); + m_debugger->addChild(tree, child); + } +} + + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3commontreenodestream.hpp b/runtime/Cpp/include/antlr3commontreenodestream.hpp new file mode 100755 index 000000000..962758f56 --- /dev/null +++ b/runtime/Cpp/include/antlr3commontreenodestream.hpp @@ -0,0 +1,317 @@ +/// \file +/// Definition of the ANTLR3 common tree node stream. +/// + +#ifndef _ANTLR_COMMON_TREE_NODE_STREAM__HPP +#define _ANTLR_COMMON_TREE_NODE_STREAM__HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +template +class CommonTreeNodeStream : public ImplTraits::TreeNodeIntStreamType +{ +public: + enum Constants + { + /// Token buffer initial size settings ( will auto increase) + /// + DEFAULT_INITIAL_BUFFER_SIZE = 100 + , INITIAL_CALL_STACK_SIZE = 10 + }; + + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType UnitType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeNodeIntStreamType IntStreamType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename AllocPolicyType::template VectorType NodesType; + typedef typename AllocPolicyType::template VectorType< TreeWalkState > MarkersType; + typedef typename AllocPolicyType::template StackType< ANTLR_INT32 > NodeStackType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeNodeIntStreamType BaseType; + +public: + /// Dummy tree node that indicates a descent into a child + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_DOWN; + + /// Dummy tree node that indicates a descent up to a parent + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_UP; + + /// Dummy tree node that indicates the termination point of the + /// tree. Initialized by a call to create a new interface. + /// + TreeType m_EOF_NODE; + + /// Dummy node that is returned if we need to indicate an invalid node + /// for any reason. + /// + TreeType m_INVALID_NODE; + + /// The complete mapping from stream index to tree node. + /// This buffer includes pointers to DOWN, UP, and EOF nodes. + /// It is built upon ctor invocation. The elements are type + /// Object as we don't what the trees look like. + /// + /// Load upon first need of the buffer so we can set token types + /// of interest for reverseIndexing. Slows us down a wee bit to + /// do all of the if p==-1 testing everywhere though, though in C + /// you won't really be able to measure this. + /// + /// Must be freed when the tree node stream is torn down. + /// + NodesType m_nodes; + + /// Which tree are we navigating ? + /// + TreeType* m_root; + + /// Pointer to tree adaptor interface that manipulates/builds + /// the tree. + /// + TreeAdaptorType* m_adaptor; + + /// As we walk down the nodes, we must track parent nodes so we know + /// where to go after walking the last child of a node. When visiting + /// a child, push current node and current index (current index + /// is first stored in the tree node structure to avoid two stacks. + /// + NodeStackType m_nodeStack; + + /// The current index into the nodes vector of the current tree + /// we are parsing and possibly rewriting. + /// + ANTLR_INT32 m_p; + + /// Which node are we currently visiting? + /// + TreeType* m_currentNode; + + /// Which node did we last visit? Used for LT(-1) + /// + TreeType* m_previousNode; + + /// Which child are we currently visiting? If -1 we have not visited + /// this node yet; next consume() request will set currentIndex to 0. + /// + ANTLR_INT32 m_currentChildIndex; + + /// What node index did we just consume? i=0..n-1 for n node trees. + /// IntStream.next is hence 1 + this value. Size will be same. + /// + ANTLR_MARKER m_absoluteNodeIndex; + + /// Buffer tree node stream for use with LT(i). This list grows + /// to fit new lookahead depths, but consume() wraps like a circular + /// buffer. + /// + TreeType** m_lookAhead; + + /// Number of elements available in the lookahead buffer at any point in + /// time. This is the current size of the array. + /// + ANTLR_UINT32 m_lookAheadLength; + + /// lookAhead[head] is the first symbol of lookahead, LT(1). + /// + ANTLR_UINT32 m_head; + + /// Add new lookahead at lookahead[tail]. tail wraps around at the + /// end of the lookahead buffer so tail could be less than head. + /// + ANTLR_UINT32 m_tail; + + /// Calls to mark() may be nested so we have to track a stack of + /// them. The marker is an index into this stack. Index 0 is + /// the first marker. This is a List + /// + MarkersType m_markers; + + /// Indicates whether this node stream was derived from a prior + /// node stream to be used by a rewriting tree parser for instance. + /// If this flag is set to ANTLR_TRUE, then when this stream is + /// closed it will not free the root tree as this tree always + /// belongs to the origniating node stream. + /// + bool m_isRewriter; + + /// If set to ANTLR_TRUE then the navigation nodes UP, DOWN are + /// duplicated rather than reused within the tree. + /// + bool m_uniqueNavigationNodes; + +public: + // INTERFACE + // + CommonTreeNodeStream( ANTLR_UINT32 hint ); + CommonTreeNodeStream( const CommonTreeNodeStream& ctn ); + CommonTreeNodeStream( TreeType* tree, ANTLR_UINT32 hint ); + + void init( ANTLR_UINT32 hint ); + ~CommonTreeNodeStream(); + + /// Get tree node at current input pointer + i ahead where i=1 is next node. + /// i<0 indicates nodes in the past. So LT(-1) is previous node, but + /// implementations are not required to provide results for k < -1. + /// LT(0) is undefined. For i>=n, return null. + /// Return NULL for LT(0) and any index that results in an absolute address + /// that is negative (beyond the start of the list). + /// + /// This is analogous to the LT() method of the TokenStream, but this + /// returns a tree node instead of a token. Makes code gen identical + /// for both parser and tree grammars. :) + /// + TreeType* _LT(ANTLR_INT32 k); + + /// Where is this stream pulling nodes from? This is not the name, but + /// the object that provides node objects. + /// + TreeType* getTreeSource(); + + /// What adaptor can tell me how to interpret/navigate nodes and + /// trees. E.g., get text of a node. + /// + TreeAdaptorType* getTreeAdaptor(); + + /// As we flatten the tree, we use UP, DOWN nodes to represent + /// the tree structure. When debugging we need unique nodes + /// so we have to instantiate new ones. When doing normal tree + /// parsing, it's slow and a waste of memory to create unique + /// navigation nodes. Default should be false; + /// + void set_uniqueNavigationNodes(bool uniqueNavigationNodes); + + StringType toString(); + + /// Return the text of all nodes from start to stop, inclusive. + /// If the stream does not buffer all the nodes then it can still + /// walk recursively from start until stop. You can always return + /// null or "" too, but users should not access $ruleLabel.text in + /// an action of course in that case. + /// + StringType toStringSS(TreeType* start, TreeType* stop); + + /// Return the text of all nodes from start to stop, inclusive, into the + /// supplied buffer. + /// If the stream does not buffer all the nodes then it can still + /// walk recursively from start until stop. You can always return + /// null or "" too, but users should not access $ruleLabel.text in + /// an action of course in that case. + /// + void toStringWork(TreeType* start, TreeType* stop, StringType& buf); + + /// Get a tree node at an absolute index i; 0..n-1. + /// If you don't want to buffer up nodes, then this method makes no + /// sense for you. + /// + TreeType* get(ANTLR_INT32 i); + + // REWRITING TREES (used by tree parser) + + /// Replace from start to stop child index of parent with t, which might + /// be a list. Number of children may be different + /// after this call. The stream is notified because it is walking the + /// tree and might need to know you are monkeying with the underlying + /// tree. Also, it might be able to modify the node stream to avoid + /// restreaming for future phases. + /// + /// If parent is null, don't do anything; must be at root of overall tree. + /// Can't replace whatever points to the parent externally. Do nothing. + /// + void replaceChildren(TreeType* parent, ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, TreeType* t); + + TreeType* LB(ANTLR_INT32 k); + + /// As we flatten the tree, we use UP, DOWN nodes to represent + /// the tree structure. When debugging we need unique nodes + /// so instantiate new ones when uniqueNavigationNodes is true. + /// + void addNavigationNode(ANTLR_UINT32 ttype); + + TreeType* newDownNode(); + + TreeType* newUpNode(); + + bool hasUniqueNavigationNodes() const; + + ANTLR_UINT32 getLookaheadSize(); + + void push(ANTLR_INT32 index); + + ANTLR_INT32 pop(); + + void reset(); + + void fillBufferRoot(); + void fillBuffer(TreeType* t); + +}; + +/** This structure is used to save the state information in the treenodestream + * when walking ahead with cyclic DFA or for syntactic predicates, + * we need to record the state of the tree node stream. This + * class wraps up the current state of the CommonTreeNodeStream. + * Calling mark() will push another of these on the markers stack. + */ +template +class TreeWalkState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::TreeType TreeType; + +private: + ANTLR_UINT32 m_currentChildIndex; + ANTLR_MARKER m_absoluteNodeIndex; + TreeType* m_currentNode; + TreeType* m_previousNode; + ANTLR_UINT32 m_nodeStackSize; + TreeType* m_lookAhead; + ANTLR_UINT32 m_lookAheadLength; + ANTLR_UINT32 m_tail; + ANTLR_UINT32 m_head; + + +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3commontreenodestream.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3commontreenodestream.inl b/runtime/Cpp/include/antlr3commontreenodestream.inl new file mode 100755 index 000000000..4dce47cfb --- /dev/null +++ b/runtime/Cpp/include/antlr3commontreenodestream.inl @@ -0,0 +1,422 @@ +ANTLR_BEGIN_NAMESPACE() + +template +CommonTreeNodeStream::CommonTreeNodeStream(ANTLR_UINT32 hint) +{ + this->init(hint); +} + +template +void CommonTreeNodeStream::init( ANTLR_UINT32 hint ) +{ + m_root = NULL; + m_adaptor = new TreeAdaptorType; + // Create the node list map + // + if (hint == 0) + hint = DEFAULT_INITIAL_BUFFER_SIZE; + m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); + + m_p = -1; + m_currentNode = NULL; + m_previousNode = NULL; + m_currentChildIndex = 0; + m_absoluteNodeIndex = 0; + m_lookAhead = NULL; + m_lookAheadLength = 0; + m_head = 0; + m_tail = 0; + m_uniqueNavigationNodes = false; + m_isRewriter = false; + + CommonTokenType* token = new CommonTokenType(CommonTokenType::TOKEN_UP); + token->set_tokText( "UP" ); + m_UP.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); + token->set_tokText( "DOWN" ); + m_DOWN.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_EOF); + token->set_tokText( "EOF" ); + m_EOF_NODE.set_token( token ); + + token = new CommonTokenType(CommonTokenType::TOKEN_INVALID); + token->set_tokText( "INVALID" ); + m_EOF_NODE.set_token( token ); +} + +template +CommonTreeNodeStream::CommonTreeNodeStream( const CommonTreeNodeStream& ctn ) +{ + m_root = ctn.m_root; + m_adaptor = ctn.m_adaptor; + m_nodes.reserve( DEFAULT_INITIAL_BUFFER_SIZE ); + m_nodeStack = ctn.m_nodeStack; + m_p = -1; + m_currentNode = NULL; + m_previousNode = NULL; + m_currentChildIndex = 0; + m_absoluteNodeIndex = 0; + m_lookAhead = NULL; + m_lookAheadLength = 0; + m_head = 0; + m_tail = 0; + m_uniqueNavigationNodes = false; + m_isRewriter = true; + + m_UP.set_token( ctn.m_UP.get_token() ); + m_DOWN.set_token( ctn.m_DOWN.get_token() ); + m_EOF_NODE.set_token( ctn.m_EOF_NODE.get_token() ); + m_INVALID_NODE.set_token( ctn.m_INVALID_NODE.get_token() ); +} + +template +CommonTreeNodeStream::CommonTreeNodeStream( TreeType* tree, ANTLR_UINT32 hint ) +{ + this->init(hint); + m_root = tree; +} + +template +CommonTreeNodeStream::~CommonTreeNodeStream() +{ + // If this is a rewrting stream, then certain resources + // belong to the originating node stream and we do not + // free them here. + // + if ( m_isRewriter != true) + { + delete m_adaptor; + + m_nodeStack.clear(); + + delete m_INVALID_NODE.get_token(); + delete m_EOF_NODE.get_token(); + delete m_DOWN.get_token(); + delete m_UP.get_token(); + } + + m_nodes.clear(); +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::_LT(ANTLR_INT32 k) +{ + if ( m_p == -1) + { + this->fillBufferRoot(); + } + + if (k < 0) + { + return this->LB(-k); + } + else if (k == 0) + { + return &(m_INVALID_NODE); + } + + // k was a legitimate request, + // + if (( m_p + k - 1) >= (ANTLR_INT32)(m_nodes.size())) + { + return &(m_EOF_NODE); + } + + return m_nodes[ m_p + k - 1 ]; +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::getTreeSource() +{ + return m_root; +} + +template +typename CommonTreeNodeStream::TreeAdaptorType* CommonTreeNodeStream::getTreeAdaptor() +{ + return m_adaptor; +} + +template +void CommonTreeNodeStream::set_uniqueNavigationNodes(bool uniqueNavigationNodes) +{ + m_uniqueNavigationNodes = uniqueNavigationNodes; +} + +template +typename CommonTreeNodeStream::StringType CommonTreeNodeStream::toString() +{ + return this->toStringSS(m_root, NULL); +} + +template +typename CommonTreeNodeStream::StringType CommonTreeNodeStream::toStringSS(TreeType* start, TreeType* stop) +{ + StringType buf; + this->toStringWork(start, stop, buf); + return buf; +} + +template +void CommonTreeNodeStream::toStringWork(TreeType* start, TreeType* stop, StringType& str) +{ + ANTLR_UINT32 n; + ANTLR_UINT32 c; + StringStreamType buf; + + if (!start->isNilNode() ) + { + StringType text; + + text = start->toString(); + + if (text.empty()) + { + buf << ' '; + buf << start->getType(); + } + else + buf << text; + } + + if (start == stop) + { + return; /* Finished */ + } + + n = start->getChildCount(); + + if (n > 0 && ! start->isNilNode() ) + { + buf << ' '; + buf << CommonTokenType::TOKEN_DOWN; + } + + for (c = 0; cgetChild(c); + this->toStringWork(child, stop, buf); + } + + if (n > 0 && ! start->isNilNode() ) + { + buf << ' '; + buf << CommonTokenType::TOKEN_UP; + } + str = buf.str(); +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::get(ANTLR_INT32 k) +{ + if( m_p == -1 ) + { + this->fillBufferRoot(); + } + + return m_nodes[k]; +} + +template +void CommonTreeNodeStream::replaceChildren(TreeType* parent, + ANTLR_INT32 startChildIndex, + ANTLR_INT32 stopChildIndex, + TreeType* t) +{ + if (parent != NULL) + { + TreeAdaptorType* adaptor; + adaptor = this->getTreeAdaptor(); + adaptor->replaceChildren(parent, startChildIndex, stopChildIndex, t); + } +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::LB(ANTLR_INT32 k) +{ + if ( k==0) + { + return &(m_INVALID_NODE); + } + + if ( (m_p - k) < 0) + { + return &(m_INVALID_NODE); + } + + return m_nodes[ m_p - k ]; +} + +template +void CommonTreeNodeStream::addNavigationNode(ANTLR_UINT32 ttype) +{ + TreeType* node; + + node = NULL; + + if (ttype == CommonTokenType::TOKEN_DOWN) + { + if (this->hasUniqueNavigationNodes() == true) + { + node = this->newDownNode(); + } + else + { + node = &m_DOWN; + } + } + else + { + if (this->hasUniqueNavigationNodes() == true) + { + node = this->newUpNode(); + } + else + { + node = &m_UP; + } + } + + // Now add the node we decided upon. + // + m_nodes.push_back(node); +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::newDownNode() +{ + TreeType* dNode; + CommonTokenType* token; + + token = new CommonTokenType(CommonTokenType::TOKEN_DOWN); + token->set_tokText("DOWN"); + dNode = new TreeType(token); + return &dNode; +} + +template +typename CommonTreeNodeStream::TreeType* CommonTreeNodeStream::newUpNode() +{ + TreeType* uNode; + CommonTokenType* token; + + token = new CommonTokenType(CommonTokenType::TOKEN_UP); + token->set_tokText("UP"); + uNode = new TreeType(token); + return &uNode; + +} + +template +bool CommonTreeNodeStream::hasUniqueNavigationNodes() const +{ + return m_uniqueNavigationNodes; +} + +template +ANTLR_UINT32 CommonTreeNodeStream::getLookaheadSize() +{ + return m_tail < m_head + ? (m_lookAheadLength - m_head + m_tail) + : (m_tail - m_head); +} + +template +void CommonTreeNodeStream::push(ANTLR_INT32 index) +{ + m_nodeStack.push(m_p); // Save current index + this->seek(index); +} + +template +ANTLR_INT32 CommonTreeNodeStream::pop() +{ + ANTLR_INT32 retVal; + + retVal = m_nodeStack.top(); + m_nodeStack.pop(); + this->seek(retVal); + return retVal; +} + +template +void CommonTreeNodeStream::reset() +{ + if ( m_p != -1) + { + m_p = 0; + } + BaseType::m_lastMarker = 0; + + + // Free and reset the node stack only if this is not + // a rewriter, which is going to reuse the originating + // node streams node stack + // + if (m_isRewriter != true) + m_nodeStack.clear(); +} + +template +void CommonTreeNodeStream::fillBufferRoot() +{ + // Call the generic buffer routine with the root as the + // argument + // + this->fillBuffer(m_root); + m_p = 0; // Indicate we are at buffer start +} + +template +void CommonTreeNodeStream::fillBuffer(TreeType* t) +{ + bool nilNode; + ANTLR_UINT32 nCount; + ANTLR_UINT32 c; + + nilNode = m_adaptor->isNilNode(t); + + // If the supplied node is not a nil (list) node then we + // add in the node itself to the vector + // + if (nilNode == false) + { + m_nodes.push_back(t); + } + + // Only add a DOWN node if the tree is not a nil tree and + // the tree does have children. + // + nCount = t->getChildCount(); + + if (nilNode == false && nCount>0) + { + this->addNavigationNode( CommonTokenType::TOKEN_DOWN); + } + + // We always add any children the tree contains, which is + // a recursive call to this function, which will cause similar + // recursion and implement a depth first addition + // + for (c = 0; c < nCount; c++) + { + this->fillBuffer( m_adaptor->getChild(t, c)); + } + + // If the tree had children and was not a nil (list) node, then we + // we need to add an UP node here to match the DOWN node + // + if (nilNode == false && nCount > 0) + { + this->addNavigationNode(CommonTokenType::TOKEN_UP); + } +} + + + +ANTLR_END_NAMESPACE() + diff --git a/runtime/Cpp/include/antlr3convertutf.hpp b/runtime/Cpp/include/antlr3convertutf.hpp new file mode 100755 index 000000000..8085c2977 --- /dev/null +++ b/runtime/Cpp/include/antlr3convertutf.hpp @@ -0,0 +1,143 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several functions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and + targetEnd. Note: the end pointers are *after* the last item: e.g. + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: + sourceStart - pointer to a pointer to the source buffer. + The contents of this are modified on return so that + it points at the next thing to be converted. + targetStart - similarly, pointer to pointer to the target buffer. + sourceEnd, targetEnd - respectively pointers to the ends of the + two buffers, for overflow checking only. + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: , , + or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: + The value "sourceIllegal" is returned from some routines if the input + sequence is malformed. When "sourceIllegal" is returned, the source + value will point to the illegal value that caused the problem. E.g., + in UTF-8 when a sequence is malformed, it points to the start of the + malformed sequence. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + + +// Changes for ANTLR3 - Jim Idle, January 2008. +// builtin types defined for Unicode types changed to +// aliases for the types that are system determined by +// ANTLR at compile time. +// +// typedef unsigned long UTF32; /* at least 32 bits */ +// typedef unsigned short UTF16; /* at least 16 bits */ +// typedef unsigned char UTF8; /* typically 8 bits */ +// typedef unsigned char Boolean; /* 0 or 1 */ + +#ifndef _ANTLR3_CONVERTUTF_H +#define _ANTLR3_CONVERTUTF_H + +ANTLR_BEGIN_NAMESPACE() + +typedef ANTLR_UINT32 UTF32; /* at least 32 bits */ +typedef ANTLR_UINT16 UTF16; /* at least 16 bits */ +typedef ANTLR_UINT8 UTF8; /* typically 8 bits */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define halfShift ((UTF32)10) +#define halfBase ((UTF32)0x0010000UL) +#define halfMask ((UTF32)0x3FFUL) + +enum ConversionResult { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +}; + +enum ConversionFlags { + strictConversion = 0, + lenientConversion +} ; + + + +ANTLR_END_NAMESPACE() + +#endif + +/* --------------------------------------------------------------------- */ diff --git a/runtime/Cpp/include/antlr3cyclicdfa.hpp b/runtime/Cpp/include/antlr3cyclicdfa.hpp new file mode 100755 index 000000000..a0d66b925 --- /dev/null +++ b/runtime/Cpp/include/antlr3cyclicdfa.hpp @@ -0,0 +1,108 @@ +/// Definition of a cyclic dfa structure such that it can be +/// initialized at compile time and have only a single +/// runtime function that can deal with all cyclic dfa +/// structures and show Java how it is done ;-) +/// +#ifndef ANTLR3_CYCLICDFA_HPP +#define ANTLR3_CYCLICDFA_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +#ifdef ANTLR3_WINDOWS +#pragma warning (push) +#pragma warning (disable : 4510) +#pragma warning (disable : 4512) +#pragma warning (disable : 4610) +#endif + +ANTLR_BEGIN_NAMESPACE() + +template +class CyclicDFA : public ImplTraits::AllocPolicyType +{ +public: + typedef typename CtxType::StreamType StreamType; + typedef typename CtxType::ExceptionBaseType ExceptionBaseType; + typedef typename ImplTraits::template RecognizerType RecognizerType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename StreamType::TokenType TokenType; + typedef TokenType CommonTokenType; + typedef CtxType ContextType; + +private: + /// Decision number that a particular static structure + /// represents. + /// + const ANTLR_INT32 m_decisionNumber; + + /// What this decision represents + /// + const ANTLR_UCHAR* m_description; + const ANTLR_INT32* const m_eot; + const ANTLR_INT32* const m_eof; + const ANTLR_INT32* const m_min; + const ANTLR_INT32* const m_max; + const ANTLR_INT32* const m_accept; + const ANTLR_INT32* const m_special; + const ANTLR_INT32* const *const m_transition; + +public: + CyclicDFA( ANTLR_INT32 decisionNumber + , const ANTLR_UCHAR* description + , const ANTLR_INT32* const eot + , const ANTLR_INT32* const eof + , const ANTLR_INT32* const min + , const ANTLR_INT32* const max + , const ANTLR_INT32* const accept + , const ANTLR_INT32* const special + , const ANTLR_INT32* const *const transition ); + CyclicDFA( const CyclicDFA& cdfa ); + CyclicDFA& operator=( const CyclicDFA& dfa); + + ANTLR_INT32 specialStateTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); + ANTLR_INT32 specialTransition(CtxType * ctx, RecognizerType* recognizer, IntStreamType* is, ANTLR_INT32 s); + + template + ANTLR_INT32 predict(CtxType* ctx, RecognizerType* recognizer, IntStreamType* is, SuperType& super); + +private: + void noViableAlt(RecognizerType* rec, ANTLR_UINT32 s); +}; + +ANTLR_END_NAMESPACE() + +#ifdef ANTLR3_WINDOWS +#pragma warning (pop) +#endif + +#include "antlr3cyclicdfa.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3cyclicdfa.inl b/runtime/Cpp/include/antlr3cyclicdfa.inl new file mode 100755 index 000000000..b7b526f6d --- /dev/null +++ b/runtime/Cpp/include/antlr3cyclicdfa.inl @@ -0,0 +1,204 @@ +ANTLR_BEGIN_NAMESPACE() + +template +CyclicDFA::CyclicDFA( ANTLR_INT32 decisionNumber + , const ANTLR_UCHAR* description + , const ANTLR_INT32* const eot + , const ANTLR_INT32* const eof + , const ANTLR_INT32* const min + , const ANTLR_INT32* const max + , const ANTLR_INT32* const accept + , const ANTLR_INT32* const special + , const ANTLR_INT32* const *const transition ) + :m_decisionNumber(decisionNumber) + , m_eot(eot) + , m_eof(eof) + , m_min(min) + , m_max(max) + , m_accept(accept) + , m_special(special) + , m_transition(transition) +{ + m_description = description; +} + +template +CyclicDFA::CyclicDFA( const CyclicDFA& dfa ) +{ + m_decisionNumber = dfa.m_decisionNumber; + m_description = dfa.m_description; + m_eot = dfa.m_eot; + m_eof = dfa.m_eof; + m_min = dfa.m_min; + m_max = dfa.m_max; + m_accept = dfa.m_accept; + m_special = dfa.m_special; + m_transition = dfa.m_transition; +} + +template +CyclicDFA& CyclicDFA::operator=( const CyclicDFA& dfa) +{ + m_decisionNumber = dfa.m_decisionNumber; + m_description = dfa.m_description; + m_eot = dfa.m_eot; + m_eof = dfa.m_eof; + m_min = dfa.m_min; + m_max = dfa.m_max; + m_accept = dfa.m_accept; + m_special = dfa.m_special; + m_transition = dfa.m_transition; + return *this; +} + +template +ANTLR_INT32 CyclicDFA::specialStateTransition(CtxType * , + RecognizerType* , + IntStreamType* , ANTLR_INT32 ) +{ + return -1; +} + +template +ANTLR_INT32 CyclicDFA::specialTransition(CtxType * ctx, + RecognizerType* recognizer, + IntStreamType* is, ANTLR_INT32 s) +{ + return 0; +} + +template + template +ANTLR_INT32 CyclicDFA::predict(CtxType * ctx, + RecognizerType* recognizer, + IntStreamType* is, SuperType& super) +{ + ANTLR_MARKER mark; + ANTLR_INT32 s; + ANTLR_INT32 specialState; + ANTLR_INT32 c; + + mark = is->mark(); /* Store where we are right now */ + s = 0; /* Always start with state 0 */ + + for (;;) + { + /* Pick out any special state entry for this state + */ + specialState = m_special[s]; + + /* Transition the special state and consume an input token + */ + if (specialState >= 0) + { + s = super.specialStateTransition(ctx, recognizer, is, specialState); + + // Error? + // + if (s<0) + { + // If the predicate/rule raised an exception then we leave it + // in tact, else we have an NVA. + // + if (recognizer->get_state()->get_error() != true) + { + this->noViableAlt(recognizer, s); + } + is->rewind(mark); + return 0; + } + is->consume(); + continue; + } + + /* Accept state? + */ + if (m_accept[s] >= 1) + { + is->rewind(mark); + return m_accept[s]; + } + + /* Look for a normal transition state based upon the input token element + */ + c = is->_LA(1); + + /* Check against min and max for this state + */ + if (c>= m_min[s] && c <= m_max[s]) + { + ANTLR_INT32 snext; + + /* What is the next state? + */ + snext = m_transition[s][c - m_min[s]]; + + if (snext < 0) + { + /* Was in range but not a normal transition + * must check EOT, which is like the else clause. + * eot[s]>=0 indicates that an EOT edge goes to another + * state. + */ + if ( m_eot[s] >= 0) + { + s = m_eot[s]; + is->consume(); + continue; + } + this->noViableAlt(recognizer, s); + is->rewind(mark); + return 0; + } + + /* New current state - move to it + */ + s = snext; + is->consume(); + continue; + } + /* EOT Transition? + */ + if ( m_eot[s] >= 0) + { + s = m_eot[s]; + is->consume(); + continue; + } + /* EOF transition to accept state? + */ + if ( c == ImplTraits::CommonTokenType::TOKEN_EOF && m_eof[s] >= 0) + { + is->rewind(mark); + return m_accept[m_eof[s]]; + } + + /* No alt, so bomb + */ + this->noViableAlt(recognizer, s); + is->rewind(mark); + return 0; + } +} + +template +void CyclicDFA::noViableAlt(RecognizerType* rec, ANTLR_UINT32 s) +{ + // In backtracking mode, we just set the failed flag so that the + // alt can just exit right now. If we are parsing though, then + // we want the exception to be raised. + // + if (rec->get_state()->get_backtracking() > 0) + { + rec->get_state()->set_failed(true); + } + else + { + ANTLR_Exception* ex + = new ANTLR_Exception( rec, (const char*)m_description ); + ex->set_decisionNum( m_decisionNumber ); + ex->set_state(s); + } +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3debugeventlistener.hpp b/runtime/Cpp/include/antlr3debugeventlistener.hpp new file mode 100755 index 000000000..21fcf59a9 --- /dev/null +++ b/runtime/Cpp/include/antlr3debugeventlistener.hpp @@ -0,0 +1,400 @@ +/** + * \file + * The definition of all debugging events that a recognizer can trigger. + * + * \remark + * From the java implementation by Terence Parr... + * I did not create a separate AST debugging interface as it would create + * lots of extra classes and DebugParser has a dbg var defined, which makes + * it hard to change to ASTDebugEventListener. I looked hard at this issue + * and it is easier to understand as one monolithic event interface for all + * possible events. Hopefully, adding ST debugging stuff won't be bad. Leave + * for future. 4/26/2006. + */ + +#ifndef ANTLR3_DEBUG_EVENT_LISTENER_HPP +#define ANTLR3_DEBUG_EVENT_LISTENER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/// Default debugging port +/// +#define DEFAULT_DEBUGGER_PORT 0xBFCC; + +/** The ANTLR3 debugging interface for communicating with ANLTR Works. Function comments + * mostly taken from the Java version. + */ + +template +class DebugEventListener : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + +private: + /// The port number which the debug listener should listen on for a connection + /// + ANTLR_UINT32 m_port; + + /// The socket structure we receive after a successful accept on the serverSocket + /// + SOCKET m_socket; + + /** The version of the debugging protocol supported by the providing + * instance of the debug event listener. + */ + int m_PROTOCOL_VERSION; + + /// The name of the grammar file that we are debugging + /// + StringType m_grammarFileName; + + /// Indicates whether we have already connected or not + /// + bool m_initialized; + + /// Used to serialize the values of any particular token we need to + /// send back to the debugger. + /// + StringType m_tokenString; + + + /// Allows the debug event system to access the adapter in use + /// by the recognizer, if this is a tree parser of some sort. + /// + TreeAdaptorType* m_adaptor; + + +public: + /// Wait for a connection from the debugger and initiate the + /// debugging session. + /// + virtual bool handshake(); + + /** The parser has just entered a rule. No decision has been made about + * which alt is predicted. This is fired AFTER init actions have been + * executed. Attributes are defined and available etc... + */ + virtual void enterRule( const char * grammarFileName, const char * ruleName); + + /** Because rules can have lots of alternatives, it is very useful to + * know which alt you are entering. This is 1..n for n alts. + */ + virtual void enterAlt( int alt); + + /** This is the last thing executed before leaving a rule. It is + * executed even if an exception is thrown. This is triggered after + * error reporting and recovery have occurred (unless the exception is + * not caught in this rule). This implies an "exitAlt" event. + */ + virtual void exitRule( const char * grammarFileName, const char * ruleName); + + /** Track entry into any (...) subrule other EBNF construct + */ + virtual void enterSubRule( int decisionNumber); + + virtual void exitSubRule( int decisionNumber); + + /** Every decision, fixed k or arbitrary, has an enter/exit event + * so that a GUI can easily track what LT/consume events are + * associated with prediction. You will see a single enter/exit + * subrule but multiple enter/exit decision events, one for each + * loop iteration. + */ + virtual void enterDecision( int decisionNumber); + + virtual void exitDecision( int decisionNumber); + + /** An input token was consumed; matched by any kind of element. + * Trigger after the token was matched by things like match(), matchAny(). + */ + virtual void consumeToken( CommonTokenType* t); + + /** An off-channel input token was consumed. + * Trigger after the token was matched by things like match(), matchAny(). + * (unless of course the hidden token is first stuff in the input stream). + */ + virtual void consumeHiddenToken( CommonTokenType* t); + + /** Somebody (anybody) looked ahead. Note that this actually gets + * triggered by both LA and LT calls. The debugger will want to know + * which Token object was examined. Like consumeToken, this indicates + * what token was seen at that depth. A remote debugger cannot look + * ahead into a file it doesn't have so LT events must pass the token + * even if the info is redundant. + */ + virtual void LT( int i, CommonTokenType* t); + + /** The parser is going to look arbitrarily ahead; mark this location, + * the token stream's marker is sent in case you need it. + */ + virtual void mark( ANTLR_MARKER marker); + + /** After an arbitrarily long lookahead as with a cyclic DFA (or with + * any backtrack), this informs the debugger that stream should be + * rewound to the position associated with marker. + */ + virtual void rewind( ANTLR_MARKER marker); + + /** Rewind to the input position of the last marker. + * Used currently only after a cyclic DFA and just + * before starting a sem/syn predicate to get the + * input position back to the start of the decision. + * Do not "pop" the marker off the state. mark(i) + * and rewind(i) should balance still. + */ + virtual void rewindLast(); + + virtual void beginBacktrack( int level); + + virtual void endBacktrack( int level, bool successful); + + /** To watch a parser move through the grammar, the parser needs to + * inform the debugger what line/charPos it is passing in the grammar. + * For now, this does not know how to switch from one grammar to the + * other and back for island grammars etc... + * + * This should also allow breakpoints because the debugger can stop + * the parser whenever it hits this line/pos. + */ + virtual void location( int line, int pos); + + /** A recognition exception occurred such as NoViableAltException. I made + * this a generic event so that I can alter the exception hierarchy later + * without having to alter all the debug objects. + * + * Upon error, the stack of enter rule/subrule must be properly unwound. + * If no viable alt occurs it is within an enter/exit decision, which + * also must be rewound. Even the rewind for each mark must be unwound. + * In the Java target this is pretty easy using try/finally, if a bit + * ugly in the generated code. The rewind is generated in DFA.predict() + * actually so no code needs to be generated for that. For languages + * w/o this "finally" feature (C++?), the target implementor will have + * to build an event stack or something. + * + * Across a socket for remote debugging, only the RecognitionException + * data fields are transmitted. The token object or whatever that + * caused the problem was the last object referenced by LT. The + * immediately preceding LT event should hold the unexpected Token or + * char. + * + * Here is a sample event trace for grammar: + * + * b : C ({;}A|B) // {;} is there to prevent A|B becoming a set + * | D + * ; + * + * The sequence for this rule (with no viable alt in the subrule) for + * input 'c c' (there are 3 tokens) is: + * + * commence + * LT(1) + * enterRule b + * location 7 1 + * enter decision 3 + * LT(1) + * exit decision 3 + * enterAlt1 + * location 7 5 + * LT(1) + * consumeToken [c/<4>,1:0] + * location 7 7 + * enterSubRule 2 + * enter decision 2 + * LT(1) + * LT(1) + * recognitionException NoViableAltException 2 1 2 + * exit decision 2 + * exitSubRule 2 + * beginResync + * LT(1) + * consumeToken [c/<4>,1:1] + * LT(1) + * endResync + * LT(-1) + * exitRule b + * terminate + */ + template + void recognitionException( ExceptionBaseType* ) {} + + /** Indicates the recognizer is about to consume tokens to resynchronize + * the parser. Any consume events from here until the recovered event + * are not part of the parse--they are dead tokens. + */ + virtual void beginResync(); + + /** Indicates that the recognizer has finished consuming tokens in order + * to resynchronize. There may be multiple beginResync/endResync pairs + * before the recognizer comes out of errorRecovery mode (in which + * multiple errors are suppressed). This will be useful + * in a gui where you want to probably grey out tokens that are consumed + * but not matched to anything in grammar. Anything between + * a beginResync/endResync pair was tossed out by the parser. + */ + virtual void endResync(); + + /** A semantic predicate was evaluate with this result and action text + */ + virtual void semanticPredicate( bool result, const char * predicate); + + /** Announce that parsing has begun. Not technically useful except for + * sending events over a socket. A GUI for example will launch a thread + * to connect and communicate with a remote parser. The thread will want + * to notify the GUI when a connection is made. ANTLR parsers + * trigger this upon entry to the first rule (the ruleLevel is used to + * figure this out). + */ + virtual void commence(); + + /** Parsing is over; successfully or not. Mostly useful for telling + * remote debugging listeners that it's time to quit. When the rule + * invocation level goes to zero at the end of a rule, we are done + * parsing. + */ + virtual void terminate(); + + /// Retrieve acknowledge response from the debugger. in fact this + /// response is never used at the moment. So we just read whatever + /// is in the socket buffer and throw it away. + /// + virtual void ack(); + + // T r e e P a r s i n g + + /** Input for a tree parser is an AST, but we know nothing for sure + * about a node except its type and text (obtained from the adaptor). + * This is the analog of the consumeToken method. The ID is usually + * the memory address of the node. + * If the type is UP or DOWN, then + * the ID is not really meaningful as it's fixed--there is + * just one UP node and one DOWN navigation node. + * + * Note that unlike the Java version, the node type of the C parsers + * is always fixed as pANTLR3_BASE_TREE because all such structures + * contain a super pointer to their parent, which is generally COMMON_TREE and within + * that there is a super pointer that can point to a user type that encapsulates it. + * Almost akin to saying that it is an interface pointer except we don't need to + * know what the interface is in full, just those bits that are the base. + * @param t + */ + virtual void consumeNode( TreeType* t); + + /** The tree parser looked ahead. If the type is UP or DOWN, + * then the ID is not really meaningful as it's fixed--there is + * just one UP node and one DOWN navigation node. + */ + virtual void LTT( int i, TreeType* t); + + + // A S T E v e n t s + + /** A nil was created (even nil nodes have a unique ID... + * they are not "null" per se). As of 4/28/2006, this + * seems to be uniquely triggered when starting a new subtree + * such as when entering a subrule in automatic mode and when + * building a tree in rewrite mode. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID is set. + */ + virtual void nilNode( TreeType* t); + + /** If a syntax error occurs, recognizers bracket the error + * with an error node if they are building ASTs. This event + * notifies the listener that this is the case + */ + virtual void errorNode( TreeType* t); + + /** Announce a new node built from token elements such as type etc... + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID, type, text are + * set. + */ + virtual void createNode( TreeType* t); + + /** Announce a new node built from an existing token. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only node.ID and token.tokenIndex + * are set. + */ + virtual void createNodeTok( TreeType* node, CommonTokenType* token); + + /** Make a node the new root of an existing root. See + * + * Note: the newRootID parameter is possibly different + * than the TreeAdaptor.becomeRoot() newRoot parameter. + * In our case, it will always be the result of calling + * TreeAdaptor.becomeRoot() and not root_n or whatever. + * + * The listener should assume that this event occurs + * only when the current subrule (or rule) subtree is + * being reset to newRootID. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only IDs are set. + * + * @see org.antlr.runtime.tree.TreeAdaptor.becomeRoot() + */ + virtual void becomeRoot( TreeType* newRoot, TreeType* oldRoot); + + /** Make childID a child of rootID. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only IDs are set. + * + * @see org.antlr.runtime.tree.TreeAdaptor.addChild() + */ + virtual void addChild( TreeType* root, TreeType* child); + + /** Set the token start/stop token index for a subtree root or node. + * + * If you are receiving this event over a socket via + * RemoteDebugEventSocketListener then only t.ID is set. + */ + virtual void setTokenBoundaries( TreeType* t, ANTLR_MARKER tokenStartIndex, ANTLR_MARKER tokenStopIndex); + + /// Free up the resources allocated to this structure + /// + virtual ~DebugEventListener(); +}; + +ANTLR_END_NAMESPACE() + +#endif + diff --git a/runtime/Cpp/include/antlr3defs.hpp b/runtime/Cpp/include/antlr3defs.hpp new file mode 100755 index 000000000..b27db5f87 --- /dev/null +++ b/runtime/Cpp/include/antlr3defs.hpp @@ -0,0 +1,321 @@ +/** \file + * Basic type and constant definitions for ANTLR3 Runtime. + */ +#ifndef _ANTLR3DEFS_HPP +#define _ANTLR3DEFS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* Following are for generated code, they are not referenced internally!!! + */ +#if !defined(ANTLR_HUGE) && !defined(ANTLR_AVERAGE) && !defined(ANTLR_SMALL) +#define ANTLR_AVERAGE +#endif + +#ifdef ANTLR_HUGE +#ifndef ANTLR_SIZE_HINT +#define ANTLR_SIZE_HINT 2049 +#endif +#ifndef ANTLR_LIST_SIZE_HINT +#define ANTLR_LIST_SIZE_HINT 127 +#endif +#endif + +#ifdef ANTLR_AVERAGE +#ifndef ANTLR_SIZE_HINT +#define ANTLR_SIZE_HINT 1025 +#define ANTLR_LIST_SIZE_HINT 63 +#endif +#endif + +#ifdef ANTLR_SMALL +#ifndef ANTLR_SIZE_HINT +#define ANTLR_SIZE_HINT 211 +#define ANTLR_LIST_SIZE_HINT 31 +#endif +#endif + +// Definitions that indicate the encoding scheme character streams and strings etc +// +/// Indicates Big Endian for encodings where this makes sense +/// +#define ANTLR_BE 1 + +/// Indicates Little Endian for encoidngs where this makes sense +/// +#define ANTLR_LE 2 + +/// General latin-1 or other 8 bit encoding scheme such as straight ASCII +/// +#define ANTLR_ENC_8BIT 4 + +/// UTF-8 encoding scheme +/// +#define ANTLR_ENC_UTF8 8 + +/// UTF-16 encoding scheme (which also covers UCS2 as that does not have surrogates) +/// +#define ANTLR_ENC_UTF16 16 +#define ANTLR_ENC_UTF16BE 16 + ANTLR_BE +#define ANTLR_ENC_UTF16LE 16 + ANTLR_LE + +/// UTF-32 encoding scheme (basically straight 32 bit) +/// +#define ANTLR_ENC_UTF32 32 +#define ANTLR_ENC_UTF32BE 32 + ANTLR_BE +#define ANTLR_ENC_UTF32LE 32 + ANTLR_LE + +/// Input is 8 bit EBCDIC (which we convert to 8 bit ASCII on the fly +/// +#define ANTLR_ENC_EBCDIC 64 + +#define ANTLR_BEGIN_NAMESPACE() namespace antlr3 { +#define ANTLR_END_NAMESPACE() } + +#define ANTLR_USE_64BIT + +/* Common definitions come first + */ +#include + +/* Work out what operating system/compiler this is. We just do this once + * here and use an internal symbol after this. + */ +#ifdef _WIN64 + +# ifndef ANTLR_WINDOWS +# define ANTLR_WINDOWS +# endif +# define ANTLR_WIN64 +# define ANTLR_USE_64BIT + +#else + +#ifdef _WIN32 +# ifndef ANTLR_WINDOWS +# define ANTLR_WINDOWS +# endif + +#define ANTLR_WIN32 +#endif + +#endif + +#ifdef ANTLR_WINDOWS + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +/* Allow VC 8 (vs2005) and above to use 'secure' versions of various functions such as sprintf + */ +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#include +#include +#include +#include +#include + +#define ANTLR_API __declspec(dllexport) +#define ANTLR_CDECL __cdecl +#define ANTLR_FASTCALL __fastcall + + +#ifndef __MINGW32__ +// Standard Windows types +// +typedef INT32 ANTLR_CHAR; +typedef UINT32 ANTLR_UCHAR; + +typedef INT8 ANTLR_INT8; +typedef INT16 ANTLR_INT16; +typedef INT32 ANTLR_INT32; +typedef INT64 ANTLR_INT64; +typedef UINT8 ANTLR_UINT8; +typedef UINT16 ANTLR_UINT16; +typedef UINT32 ANTLR_UINT32; +typedef UINT64 ANTLR_UINT64; +typedef UINT64 ANTLR_BITWORD; + +#else +// Mingw uses stdint.h and fails to define standard Microsoft typedefs +// such as UINT16, hence we must use stdint.h for Mingw. +// +#include +typedef int32_t ANTLR_CHAR; +typedef uint32_t ANTLR_UCHAR; + +typedef int8_t ANTLR_INT8; +typedef int16_t ANTLR_INT16; +typedef int32_t ANTLR_INT32; +typedef int64_t ANTLR_INT64; + +typedef uint8_t ANTLR_UINT8; +typedef uint16_t ANTLR_UINT16; +typedef uint32_t ANTLR_UINT32; +typedef uint64_t ANTLR_UINT64; +typedef uint64_t ANTLR_BITWORD; + +#endif + + + +#define ANTLR_UINT64_LIT(lit) lit##ULL + +#define ANTLR_INLINE __inline + +typedef FILE * ANTLR_FDSC; +typedef struct stat ANTLR_FSTAT_STRUCT; + + + +#ifdef ANTLR_USE_64BIT +#define ANTLR_UINT64_CAST(ptr) ((ANTLR_UINT64)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)((ANTLR_UINT64)(ptr)) +typedef ANTLR_INT64 ANTLR_MARKER; +typedef ANTLR_UINT64 ANTLR_INTKEY; +#else +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)((ANTLR_UINT32)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)(ptr) +typedef ANTLR_INT32 ANTLR_MARKER; +typedef ANTLR_UINT32 ANTLR_INTKEY; +#endif + +#ifdef ANTLR_WIN32 +#endif + +#ifdef ANTLR_WIN64 +#endif + + +typedef int ANTLR_SALENT; // Type used for size of accept structure +typedef struct sockaddr_in ANTLR_SOCKADDRT, * pANTLR_SOCKADDRT; // Type used for socket address declaration +typedef struct sockaddr ANTLR_SOCKADDRC, * pANTLR_SOCKADDRC; // Type used for cast on accept() + +#define ANTLR_CLOSESOCKET closesocket + +/* Warnings that are over-zealous such as complaining about strdup, we + * can turn off. + */ + +/* Don't complain about "deprecated" functions such as strdup + */ +#pragma warning( disable : 4996 ) + +#else + +#ifdef __LP64__ +#define ANTLR_USE_64BIT +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _stat stat + +typedef int SOCKET; + +/* Inherit type definitions for autoconf + */ +typedef int32_t ANTLR_CHAR; +typedef uint32_t ANTLR_UCHAR; + +typedef int8_t ANTLR_INT8; +typedef int16_t ANTLR_INT16; +typedef int32_t ANTLR_INT32; +typedef int64_t ANTLR_INT64; + +typedef uint8_t ANTLR_UINT8; +typedef uint16_t ANTLR_UINT16; +typedef uint32_t ANTLR_UINT32; +typedef uint64_t ANTLR_UINT64; +typedef uint64_t ANTLR_BITWORD; + +#define ANTLR_INLINE inline +#define ANTLR_API + +typedef FILE * ANTLR_FDSC; +typedef struct stat ANTLR_FSTAT_STRUCT; + +#ifdef ANTLR_USE_64BIT +#define ANTLR_FUNC_PTR(ptr) (void *)((ANTLR_UINT64)(ptr)) +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)((ANTLR_UINT64)(ptr)) +typedef ANTLR_INT64 ANTLR_MARKER; +typedef ANTLR_UINT64 ANTLR_INTKEY; +#else +#define ANTLR_FUNC_PTR(ptr) (void *)((ANTLR_UINT32)(ptr)) +#define ANTLR_UINT64_CAST(ptr) (ANTLR_UINT64)((ANTLR_UINT32)(ptr)) +#define ANTLR_UINT32_CAST(ptr) (ANTLR_UINT32)(ptr) +typedef ANTLR_INT32 ANTLR_MARKER; +typedef ANTLR_UINT32 ANTLR_INTKEY; +#endif +#define ANTLR_UINT64_LIT(lit) lit##ULL + +#endif + +#ifdef ANTLR_USE_64BIT +#define ANTLR_TRIE_DEPTH 63 +#else +#define ANTLR_TRIE_DEPTH 31 +#endif +/* Pre declare the typedefs for all the interfaces, then + * they can be inter-dependant and we will let the linker + * sort it out for us. + */ +#include + +// Include the unicode.org conversion library header. +// +#include + +enum ChannelType +{ + /** Default channel for a token + */ + TOKEN_DEFAULT_CHANNEL = 0 + /** Reserved channel number for a HIDDEN token - a token that + * is hidden from the parser. + */ + , HIDDEN = 99 +}; + +#endif /* _ANTLR3DEFS_H */ diff --git a/runtime/Cpp/include/antlr3errors.hpp b/runtime/Cpp/include/antlr3errors.hpp new file mode 100755 index 000000000..876df4f4f --- /dev/null +++ b/runtime/Cpp/include/antlr3errors.hpp @@ -0,0 +1,49 @@ +#ifndef _ANTLR3ERRORS_HPP +#define _ANTLR3ERRORS_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#define ANTLR_SUCCESS 0 +#define ANTLR_FAIL 1 + +/** Indicates end of character stream and is an invalid Unicode code point. */ +#define ANTLR_CHARSTREAM_EOF 0xFFFFFFFF + +/** Indicates memoizing on a rule failed. + */ +#define MEMO_RULE_FAILED 0xFFFFFFFE +#define MEMO_RULE_UNKNOWN 0xFFFFFFFF + + +#define ANTLR_ERR_BASE 0 +#define ANTLR_ERR_NOMEM (ANTLR_ERR_BASE + 1) +#define ANTLR_ERR_NOFILE (ANTLR_ERR_BASE + 2) +#define ANTLR_ERR_HASHDUP (ANTLR_ERR_BASE + 3) + +#endif /* _ANTLR3ERRORS_H */ diff --git a/runtime/Cpp/include/antlr3exception.hpp b/runtime/Cpp/include/antlr3exception.hpp new file mode 100755 index 000000000..beca1fc5e --- /dev/null +++ b/runtime/Cpp/include/antlr3exception.hpp @@ -0,0 +1,209 @@ +/** \file + * Contains the definition of a basic ANTLR3 exception structure created + * by a recognizer when errors are found/predicted. + + * Two things to be noted for C++ Target: + a) This is not the C++ Exception. Consider this just as yet another class. This + has to be like this because there is a inbuilt recovery and hence there is a try..catch + block for every new token. This is not how C++ Exceptions work.Still there is exception support, as we are handling things like OutofMemory by + throwing exceptions + + b) There is no use in implementing templates here, as all the exceptions are grouped in + one container and hence needs virtual functions. But this would occur only when there is + a exception/ while deleting base recognizer. So shouldn't incur the overhead in normal operation + */ +#ifndef _ANTLR3_EXCEPTION_HPP +#define _ANTLR3_EXCEPTION_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** Base structure for an ANTLR3 exception tracker + */ + +template +class ANTLR_ExceptionBase +{ +public: + typedef typename StreamType::UnitType TokenType; + typedef typename StreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::template ExceptionBaseType ExceptionBaseType; + +protected: + /** The printable message that goes with this exception, in your preferred + * encoding format. ANTLR just uses ASCII by default but you can ignore these + * messages or convert them to another format or whatever of course. They are + * really internal messages that you then decide how to print out in a form that + * the users of your product will understand, as they are unlikely to know what + * to do with "Recognition exception at: [[TOK_GERUND..... " ;-) + */ + StringType m_message; + + /** Name of the file/input source for reporting. Note that this may be empty!! + */ + StringType m_streamName; + + /** Indicates the index of the 'token' we were looking at when the + * exception occurred. + */ + ANTLR_MARKER m_index; + + /** Indicates what the current token/tree was when the error occurred. Since not + * all input streams will be able to retrieve the nth token, we track it here + * instead. This is for parsers, and even tree parsers may set this. + */ + const TokenType* m_token; + + /** Pointer to the next exception in the chain (if any) + */ + ExceptionBaseType* m_nextException; + + /** Indicates the token we were expecting to see next when the error occurred + */ + ANTLR_UINT32 m_expecting; + + /** Indicates a set of tokens that we were expecting to see one of when the + * error occurred. It is a following bitset list, so you can use load it and use ->toIntList() on it + * to generate an array of integer tokens that it represents. + */ + BitsetListType* m_expectingSet; + + /** If this is a tree parser exception then the node is set to point to the node + * that caused the issue. + */ + TokenType* m_node; + + /** The current character when an error occurred - for lexers. + */ + ANTLR_UCHAR m_c; + + /** Track the line at which the error occurred in case this is + * generated from a lexer. We need to track this since the + * unexpected char doesn't carry the line info. + */ + ANTLR_UINT32 m_line; + + /** Character position in the line where the error occurred. + */ + ANTLR_INT32 m_charPositionInLine; + + /** decision number for NVE + */ + ANTLR_UINT32 m_decisionNum; + + /** State for NVE + */ + ANTLR_UINT32 m_state; + + /** Rule name for failed predicate exception + */ + StringType m_ruleName; + + /** Pointer to the input stream that this exception occurred in. + */ + IntStreamType* m_input; + +public: + StringType& get_message(); + StringType& get_streamName(); + ANTLR_MARKER get_index() const; + const TokenType* get_token() const; + ExceptionBaseType* get_nextException() const; + ANTLR_UINT32 get_expecting() const; + BitsetListType* get_expectingSet() const; + TokenType* get_node() const; + ANTLR_UCHAR get_c() const; + ANTLR_UINT32 get_line() const; + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_UINT32 get_decisionNum() const; + ANTLR_UINT32 get_state() const; + StringType& get_ruleName(); + IntStreamType* get_input() const; + void set_message( const StringType& message ); + void set_streamName( const StringType& streamName ); + void set_index( ANTLR_MARKER index ); + void set_token( const TokenType* token ); + void set_nextException( ExceptionBaseType* nextException ); + void set_expecting( ANTLR_UINT32 expecting ); + void set_expectingSet( BitsetListType* expectingSet ); + void set_node( TokenType* node ); + void set_c( ANTLR_UCHAR c ); + void set_line( ANTLR_UINT32 line ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); + void set_decisionNum( ANTLR_UINT32 decisionNum ); + void set_state( ANTLR_UINT32 state ); + void set_ruleName( const StringType& ruleName ); + void set_input( IntStreamType* input ); + StringType getDescription() const; + + virtual StringType getName() const = 0; + virtual ANTLR_UINT32 getType() const = 0; + virtual void print() const = 0; + virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str ) const = 0; + + virtual ~ANTLR_ExceptionBase(); + +protected: + ANTLR_ExceptionBase(const StringType& message); +}; + + +template +class ANTLR_Exception : public ImplTraits::template ExceptionBaseType +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::template ExceptionBaseType BaseType; + +public: + template + ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message); + + const StringType& get_name() const; + virtual StringType getName() const; + virtual ANTLR_UINT32 getType() const; + virtual void print() const; + virtual void displayRecognitionError( ANTLR_UINT8** tokenNames, StringStreamType& str_stream) const; +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3exception.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3exception.inl b/runtime/Cpp/include/antlr3exception.inl new file mode 100755 index 000000000..680bd455f --- /dev/null +++ b/runtime/Cpp/include/antlr3exception.inl @@ -0,0 +1,378 @@ +ANTLR_BEGIN_NAMESPACE() + +template +ANTLR_ExceptionBase::ANTLR_ExceptionBase(const StringType& message) + :m_message(message) +{ + m_index = 0; + m_token = NULL; + m_expecting = 0; + m_expectingSet = NULL; + m_node = NULL; + m_c = 0; + m_line = 0; + m_charPositionInLine = 0; + m_decisionNum = 0; + m_state = 0; + m_nextException = NULL; +} + +template +ANTLR_INLINE typename ANTLR_ExceptionBase::StringType& ANTLR_ExceptionBase::get_message() +{ + return m_message; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::StringType& ANTLR_ExceptionBase::get_streamName() +{ + return m_streamName; +} +template +ANTLR_INLINE ANTLR_MARKER ANTLR_ExceptionBase::get_index() const +{ + return m_index; +} +template +ANTLR_INLINE const typename ANTLR_ExceptionBase::TokenType* ANTLR_ExceptionBase::get_token() const +{ + return m_token; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::ExceptionBaseType* ANTLR_ExceptionBase::get_nextException() const +{ + return m_nextException; +} +template +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase::get_expecting() const +{ + return m_expecting; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::BitsetListType* ANTLR_ExceptionBase::get_expectingSet() const +{ + return m_expectingSet; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::TokenType* ANTLR_ExceptionBase::get_node() const +{ + return m_node; +} +template +ANTLR_INLINE ANTLR_UCHAR ANTLR_ExceptionBase::get_c() const +{ + return m_c; +} +template +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase::get_line() const +{ + return m_line; +} +template +ANTLR_INLINE ANTLR_INT32 ANTLR_ExceptionBase::get_charPositionInLine() const +{ + return m_charPositionInLine; +} +template +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase::get_decisionNum() const +{ + return m_decisionNum; +} +template +ANTLR_INLINE ANTLR_UINT32 ANTLR_ExceptionBase::get_state() const +{ + return m_state; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::StringType& ANTLR_ExceptionBase::get_ruleName() +{ + return m_ruleName; +} +template +ANTLR_INLINE typename ANTLR_ExceptionBase::IntStreamType* ANTLR_ExceptionBase::get_input() const +{ + return m_input; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_message( const StringType& message ) +{ + m_message = message; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_streamName( const StringType& streamName ) +{ + m_streamName = streamName; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_index( ANTLR_MARKER index ) +{ + m_index = index; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_token( const TokenType* token ) +{ + m_token = token; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_nextException( ExceptionBaseType* nextException ) +{ + m_nextException = nextException; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_expecting( ANTLR_UINT32 expecting ) +{ + m_expecting = expecting; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_expectingSet( BitsetListType* expectingSet ) +{ + m_expectingSet = expectingSet; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_node( TokenType* node ) +{ + m_node = node; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_c( ANTLR_UCHAR c ) +{ + m_c = c; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_decisionNum( ANTLR_UINT32 decisionNum ) +{ + m_decisionNum = decisionNum; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_state( ANTLR_UINT32 state ) +{ + m_state = state; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_ruleName( const StringType& ruleName ) +{ + m_ruleName = ruleName; +} +template +ANTLR_INLINE void ANTLR_ExceptionBase::set_input( IntStreamType* input ) +{ + m_input = input; +} + + +template + template +ANTLR_Exception::ANTLR_Exception(BaseRecognizerType* recognizer, const StringType& message) + :BaseType( message ) +{ + recognizer->get_super()->fillExceptionData( this ); + BaseType::m_input = recognizer->get_super()->get_istream(); + BaseType::m_nextException = recognizer->get_state()->get_exception(); /* So we don't leak the memory */ + recognizer->get_state()->set_exception(this); + recognizer->get_state()->set_error( true ); /* Exception is outstanding */ +} + +template +ANTLR_UINT32 ANTLR_Exception::getType() const +{ + return static_cast(Ex); +} + +template +void ANTLR_Exception::print() const +{ + /* Ensure valid pointer + */ + /* Number if no message, else the message + */ + if ( BaseType::m_message.empty() ) + { + fprintf(stderr, "ANTLR3_EXCEPTION number %d (%08X).\n", Ex, Ex); + } + else + { + fprintf(stderr, "ANTLR3_EXCEPTION: %s\n", BaseType::m_message.c_str() ); + } +} + +template +typename ANTLR_Exception::StringType + ANTLR_Exception::getName() const +{ + const char* exArray[] = { + "org.antlr.runtime.RecognitionException" + , "org.antlr.runtime.MismatchedTokenException" + , "org.antlr.runtime.NoViableAltException" + , "org.antlr.runtime.MismatchedSetException" + , "org.antlr.runtime.EarlyExitException" + , "org.antlr.runtime.FailedPredicateException" + , "org.antlr.runtime.MismatchedTreeNodeException" + , "org.antlr.runtime.tree.RewriteEarlyExitException" + , "org.antlr.runtime.UnwantedTokenException" + , "org.antlr.runtime.MissingTokenException" + }; + return StringType(exArray[Ex]); +} + +template +void ANTLR_Exception::displayRecognitionError( ANTLR_UINT8** tokenNames, + StringStreamType& str_stream ) const +{ + switch( Ex ) + { + case RECOGNITION_EXCEPTION: + // Indicates that the recognizer received a token + // in the input that was not predicted. This is the basic exception type + // from which all others are derived. So we assume it was a syntax error. + // You may get this if there are not more tokens and more are needed + // to complete a parse for instance. + // + str_stream << " : syntax error...\n"; + break; + case UNWANTED_TOKEN_EXCEPTION: + // Indicates that the recognizer was fed a token which seesm to be + // spurious input. We can detect this when the token that follows + // this unwanted token would normally be part of the syntactically + // correct stream. Then we can see that the token we are looking at + // is just something that should not be there and throw this exception. + // + if (tokenNames == NULL) + { + str_stream << " : Extraneous input..."; + } + else + { + if ( BaseType::m_expecting == ImplTraits::CommonTokenType::TOKEN_EOF) + { + str_stream << " : Extraneous input - expected \n"; + } + else + { + str_stream << " : Extraneous input - expected " + << tokenNames[ BaseType::m_expecting] << " ...\n"; + } + } + break; + case MISSING_TOKEN_EXCEPTION: + // Indicates that the recognizer detected that the token we just + // hit would be valid syntactically if preceeded by a particular + // token. Perhaps a missing ';' at line end or a missing ',' in an + // expression list, and such like. + // + if (tokenNames == NULL) + { + str_stream << " : Missing token (" + << BaseType::m_expecting << ")...\n"; + } + else + { + if ( BaseType::m_expecting == ImplTraits::CommonTokenType::TOKEN_EOF ) + { + str_stream <<" : Missing \n"; + } + else + { + str_stream << " : Missing " << tokenNames[BaseType::m_expecting] <<" \n"; + } + } + break; + case NO_VIABLE_ALT_EXCEPTION: + // We could not pick any alt decision from the input given + // so god knows what happened - however when you examine your grammar, + // you should. It means that at the point where the current token occurred + // that the DFA indicates nowhere to go from here. + // + str_stream << " : cannot match to any predicted input...\n"; + break; + case MISMATCHED_SET_EXCEPTION: + { + ANTLR_UINT32 count; + ANTLR_UINT32 bit; + ANTLR_UINT32 size; + ANTLR_UINT32 numbits; + BitsetType* errBits; + + // This means we were able to deal with one of a set of + // possible tokens at this point, but we did not see any + // member of that set. + // + str_stream << " : unexpected input...\n expected one of : "; + + // What tokens could we have accepted at this point in the + // parse? + // + count = 0; + errBits = BaseType::m_expectingSet->bitsetLoad(); + numbits = errBits->numBits(); + size = errBits->size(); + + if (size > 0) + { + // However many tokens we could have dealt with here, it is usually + // not useful to print ALL of the set here. I arbitrarily chose 8 + // here, but you should do whatever makes sense for you of course. + // No token number 0, so look for bit 1 and on. + // + for (bit = 1; bit < numbits && count < 8 && count < size; bit++) + { + // TODO: This doesn;t look right - should be asking if the bit is set!! + // + if (tokenNames[bit]) + { + str_stream << ( count > 0 ? ", " : "" ) + << tokenNames[bit]; + count++; + } + } + str_stream << "\n"; + } + else + { + str_stream << "Actually dude, we didn't seem to be expecting anything here, or at least\n"; + str_stream << "I could not work out what I was expecting, like so many of us these days!\n"; + } + } + break; + case EARLY_EXIT_EXCEPTION: + str_stream << " : missing elements...\n"; + break; + default: + str_stream << " : syntax not recognized...\n"; + break; + } +} + +template +ANTLR_ExceptionBase::~ANTLR_ExceptionBase() +{ + ANTLR_ExceptionBase* next; + ANTLR_ExceptionBase* ex = m_nextException; + + /* Ensure valid pointer + */ + while (ex != NULL) + { + /* Pick up anythign following now, before we free the + * current memory block. + */ + next = ex->m_nextException; + ex->m_nextException = NULL; + + /* Free the actual structure itself + */ + delete ex; + + ex = next; + } +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3filestream.hpp b/runtime/Cpp/include/antlr3filestream.hpp new file mode 100755 index 000000000..9a46d3582 --- /dev/null +++ b/runtime/Cpp/include/antlr3filestream.hpp @@ -0,0 +1,75 @@ +#ifndef _ANTLR3_FILESTREAM_HPP +#define _ANTLR3_FILESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +template +class FileUtils +{ +public: + /** \brief Open an operating system file and return the descriptor + * We just use the common open() and related functions here. + * Later we might find better ways on systems + * such as Windows and OpenVMS for instance. But the idea is to read the + * while file at once anyway, so it may be irrelevant. + */ + static ANTLR_FDSC AntlrFopen(const ANTLR_UINT8* filename, const char * mode); + + /** \brief Close an operating system file and free any handles + * etc. + */ + static void AntlrFclose (ANTLR_FDSC fd); + + static ANTLR_UINT32 AntlrFsize(const ANTLR_UINT8* filename); + template + static ANTLR_UINT32 AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName); + static ANTLR_UINT32 AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data); + +}; + +class ParseFileAbsentException : public std::exception +{ + virtual const char* what() const throw() + { + return " Parse File not Present"; + } +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3filestream.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3filestream.inl b/runtime/Cpp/include/antlr3filestream.inl new file mode 100755 index 000000000..59b4f0658 --- /dev/null +++ b/runtime/Cpp/include/antlr3filestream.inl @@ -0,0 +1,74 @@ +ANTLR_BEGIN_NAMESPACE() + +template +ANTLR_FDSC FileUtils::AntlrFopen(const ANTLR_UINT8* filename, const char * mode) +{ + return (ANTLR_FDSC)fopen((const char *)filename, mode); +} + +template +void FileUtils::AntlrFclose (ANTLR_FDSC fd) +{ + fclose(fd); +} + +template +ANTLR_UINT32 FileUtils::AntlrFsize(const ANTLR_UINT8* filename) +{ + struct _stat statbuf; + + _stat((const char *)filename, &statbuf); + + return (ANTLR_UINT32)statbuf.st_size; +} + +template +ANTLR_UINT32 FileUtils::AntlrFread(ANTLR_FDSC fdsc, ANTLR_UINT32 count, void* data) +{ + return (ANTLR_UINT32)fread(data, (size_t)count, 1, fdsc); +} + +template + template +ANTLR_UINT32 FileUtils::AntlrRead8Bit(InputStreamType* input, const ANTLR_UINT8* fileName) +{ + ANTLR_FDSC infile; + ANTLR_UINT32 fSize; + + /* Open the OS file in read binary mode + */ + infile = FileUtils::AntlrFopen(fileName, "rb"); + + /* Check that it was there + */ + if (infile == NULL) + { + ParseFileAbsentException ex; + throw ex; + } + + /* It was there, so we can read the bytes now + */ + fSize = FileUtils::AntlrFsize(fileName); /* Size of input file */ + + /* Allocate buffer for this input set + */ + void* data = ImplTraits::AllocPolicyType::alloc(fSize); + /* Now we read the file. Characters are not converted to + * the internal ANTLR encoding until they are read from the buffer + */ + FileUtils::AntlrFread(infile, fSize, data ); + + input->set_data( (unsigned char*) data ); + input->set_sizeBuf( fSize ); + + input->set_isAllocated(true); + + /* And close the file handle + */ + FileUtils::AntlrFclose(infile); + + return ANTLR_SUCCESS; +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3input.hpp b/runtime/Cpp/include/antlr3input.hpp new file mode 100755 index 000000000..175a5da6e --- /dev/null +++ b/runtime/Cpp/include/antlr3input.hpp @@ -0,0 +1,327 @@ +/** \file + * Defines the basic structures used to manipulate character + * streams from any input source. Any character size and encoding + * can in theory be used, so long as a set of functinos is provided that + * can return a 32 bit Integer representation of their characters amd efficiently mark and revert + * to specific offsets into their input streams. + */ +#ifndef _ANTLR_INPUT_HPP +#define _ANTLR_INPUT_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/// Master context structure for an ANTLR3 C runtime based input stream. +/// \ingroup apistructures. Calling _LT on this doesn't seem right. You would +/// call it only with parser / TreeParser, and their respective input streams +/// has that function. calling it from lexer will throw a compile time error +/// + +template +class InputStream : public ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::LexStateType LexStateType; + typedef typename ImplTraits::template IntStreamType< typename ImplTraits::InputStreamType > IntStreamType; + typedef IntStreamType BaseType; + typedef typename ImplTraits::StreamDataType UnitType; + typedef UnitType DataType; + typedef UnitType TokenType; + typedef typename AllocPolicyType::template VectorType MarkersType; + typedef typename ImplTraits::StringType StringType; + +private: + /** Pointer the start of the input string, characters may be + * taken as offsets from here and in original input format encoding. + */ + const DataType* m_data; + + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ + const DataType* m_nextChar; + + /** Number of characters that can be consumed at this point in time. + * Mostly this is just what is left in the pre-read buffer, but if the + * input source is a stream such as a socket or something then we may + * call special read code to wait for more input. + */ + ANTLR_UINT32 m_sizeBuf; + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammar actions. + */ + ANTLR_UINT32 m_line; + + /** Pointer into the input buffer where the current line + * started. + */ + const DataType* m_currentLine; + + /** The offset within the current line of the current character + */ + ANTLR_INT32 m_charPositionInLine; + + /** Tracks how deep mark() calls are nested + */ + ANTLR_UINT32 m_markDepth; + + /** List of mark() points in the input stream + */ + MarkersType m_markers; + + /** File name string, set to pointer to memory if + * you set it manually as it will be free()d + */ + StringType m_fileName; + + /** File number, needs to be set manually to some file index of your devising. + */ + ANTLR_UINT32 m_fileNo; + + /// Character that automatically causes an internal line count + /// increment. + /// + ANTLR_UCHAR m_newlineChar; + + /// Indicates the size, in 8 bit units, of a single character. Note that + /// the C runtime does not deal with surrogates as this would be + /// slow and complicated. If this is a UTF-8 stream then this field + /// will be set to 0. Generally you are best working internally with 32 bit characters + /// as this is the most efficient. + /// + ANTLR_UINT8 m_charByteSize; + + /** Indicates if the data pointer was allocated by us, and so should be freed + * when the stream dies. + */ + bool m_isAllocated; + + /// Indicates the encoding scheme used in this input stream + /// + ANTLR_UINT32 m_encoding; + + /* API */ +public: + InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding); + InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name); + ~InputStream(); + const DataType* get_data() const; + bool get_isAllocated() const; + const DataType* get_nextChar() const; + ANTLR_UINT32 get_sizeBuf() const; + ANTLR_UINT32 get_line() const; + const DataType* get_currentLine() const; + ANTLR_INT32 get_charPositionInLine() const; + ANTLR_UINT32 get_markDepth() const; + MarkersType& get_markers(); + const StringType& get_fileName() const; + ANTLR_UINT32 get_fileNo() const; + ANTLR_UCHAR get_newlineChar() const; + ANTLR_UINT8 get_charByteSize() const; + ANTLR_UINT32 get_encoding() const; + + void set_data( DataType* data ); + void set_isAllocated( bool isAllocated ); + void set_nextChar( const DataType* nextChar ); + void set_sizeBuf( ANTLR_UINT32 sizeBuf ); + void set_line( ANTLR_UINT32 line ); + void set_currentLine( const DataType* currentLine ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); + void set_markDepth( ANTLR_UINT32 markDepth ); + void set_markers( const MarkersType& markers ); + void set_fileName( const StringType& fileName ); + void set_fileNo( ANTLR_UINT32 fileNo ); + void set_newlineChar( ANTLR_UCHAR newlineChar ); + void set_charByteSize( ANTLR_UINT8 charByteSize ); + void set_encoding( ANTLR_UINT32 encoding ); + + void inc_charPositionInLine(); + void inc_line(); + void inc_markDepth(); + + IntStreamType* get_istream(); + + /** Function that resets the input stream + */ + void reset(); + + /** Pointer to a function that reuses and resets an input stream by + * supplying a new 'source' + */ + void reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name); + + + /** Function to return the total size of the input buffer. For streams + * this may be just the total we have available so far. This means of course that + * the input stream must be careful to accumulate enough input so that any backtracking + * can be satisfied. + */ + ANTLR_UINT32 size(); + + /** Function to return a substring of the input stream. String is returned in allocated + * memory and is in same encoding as the input stream itself, NOT internal ANTLR_UCHAR form. + */ + StringType substr(ANTLR_MARKER start, ANTLR_MARKER stop); + + /** Function to return the current line number in the input stream + */ + ANTLR_UINT32 get_line(); + + /** Function to return the current line buffer in the input stream + * The pointer returned is directly into the input stream so you must copy + * it if you wish to manipulate it without damaging the input stream. Encoding + * is obviously in the same form as the input stream. + * \remark + * - Note taht this function wil lbe inaccurate if setLine is called as there + * is no way at the moment to position the input stream at a particular line + * number offset. + */ + const DataType* getLineBuf(); + + /** Function to return the current offset in the current input stream line + */ + ANTLR_UINT32 get_charPositionInLine(); + + /** Function to set the current position in the current line. + */ + void set_charPositionInLine(ANTLR_UINT32 position); + + /** Function to override the default newline character that the input stream + * looks for to trigger the line/offset and line buffer recording information. + * \remark + * - By default the chracter '\n' will be installed as the newline trigger character. When this + * character is seen by the consume() function then the current line number is incremented and the + * current line offset is reset to 0. The Pointer for the line of input we are consuming + * is updated to point to the next character after this one in the input stream (which means it + * may become invalid if the last newline character in the file is seen (so watch out). + * - If for some reason you do not want the counters and pointers to be restee, you can set the + * chracter to some impossible character such as '\0' or whatever. + * - This is a single character only, so choose the last character in a sequence of two or more. + * - This is only a simple aid to error reporting - if you have a complicated binary input structure + * it may not be adequate, but you can always override every function in the input stream with your + * own of course, and can even write your own complete input stream set if you like. + * - It is your responsiblity to set a valid character for the input stream type. There is no point + * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never + * trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF + */ + void set_newLineChar(ANTLR_UINT32 newlineChar); + + ANTLR_MARKER index_impl(); + +private: + /** \brief Use the contents of an operating system file as the input + * for an input stream. + * + * \param fileName Name of operating system file to read. + * \return + * - Pointer to new input stream context upon success + * - One of the ANTLR3_ERR_ defines on error. + */ + void createFileStream(const ANTLR_UINT8* fileName); + + /** \brief Use the supplied 'string' as input to the stream + * + * \param data Pointer to the input data + * \return + * - Pointer to new input stream context upon success + * - NULL defines on error. + */ + void createStringStream(const ANTLR_UINT8* data); + void genericSetupStream(); + + /// Determine endianess of the input stream and install the + /// API required for the encoding in that format. + /// + void setupInputStream(); + +}; + +/** \brief Structure for track lex input states as part of mark() + * and rewind() of lexer. + */ +template +class LexState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::StreamDataType DataType; + +private: + /** Pointer to the next character to be consumed from the input data + * This is cast to point at the encoding of the original file that + * was read by the functions installed as pointer in this input stream + * context instance at file/string/whatever load time. + */ + const DataType* m_nextChar; + + /** The line number we are traversing in the input file. This gets incremented + * by a newline() call in the lexer grammer actions. + */ + ANTLR_UINT32 m_line; + + /** Pointer into the input buffer where the current line + * started. + */ + const DataType* m_currentLine; + + /** The offset within the current line of the current character + */ + ANTLR_INT32 m_charPositionInLine; + +public: + LexState(); + const DataType* get_nextChar() const; + ANTLR_UINT32 get_line() const; + const DataType* get_currentLine() const; + ANTLR_INT32 get_charPositionInLine() const; + void set_nextChar( const DataType* nextChar ); + void set_line( ANTLR_UINT32 line ); + void set_currentLine( const DataType* currentLine ); + void set_charPositionInLine( ANTLR_INT32 charPositionInLine ); +}; + +class ParseNullStringException : public std::exception +{ + virtual const char* what() const throw() + { + return "Null String"; + } +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3input.inl" + +#endif /* _ANTLR_INPUT_H */ diff --git a/runtime/Cpp/include/antlr3input.inl b/runtime/Cpp/include/antlr3input.inl new file mode 100755 index 000000000..6c9b13611 --- /dev/null +++ b/runtime/Cpp/include/antlr3input.inl @@ -0,0 +1,619 @@ +ANTLR_BEGIN_NAMESPACE() + +template +InputStream::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) +{ + // First order of business is to read the file into some buffer space + // as just straight 8 bit bytes. Then we will work out the encoding and + // byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createFileStream(fileName); + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name + // + BaseType::m_streamName = (const char* )fileName; + m_fileName = BaseType::m_streamName; +} + +template +InputStream::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ + // First order of business is to set up the stream and install the data pointer. + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + this->createStringStream(data); + + // Size (in bytes) of the given 'string' + // + m_sizeBuf = size; + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + m_encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + this->setupInputStream(); + + // Now we can set up the file name + // + BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name; + m_fileName = BaseType::m_streamName; + +} + +template +void InputStream::createStringStream(const ANTLR_UINT8* data) +{ + if (data == NULL) + { + ParseNullStringException ex; + throw ex; + } + + // Structure was allocated correctly, now we can install the pointer + // + m_data = data; + m_isAllocated = false; + + // Call the common 8 bit input stream handler + // initialization. + // + this->genericSetupStream(); +} + +template +void InputStream::createFileStream(const ANTLR_UINT8* fileName) +{ + if (fileName == NULL) + { + ParseFileAbsentException ex; + throw ex; + } + + // Structure was allocated correctly, now we can read the file. + // + FileUtils::AntlrRead8Bit(this, fileName); + + // Call the common 8 bit input stream handler + // initialization. + // + this->genericSetupStream(); +} + +template +void InputStream::genericSetupStream() +{ + this->set_charByteSize(1); + + /* Set up the input stream brand new + */ + this->reset(); + + /* Install default line separator character (it can be replaced + * by the grammar programmer later) + */ + this->set_newLineChar((ANTLR_UCHAR)'\n'); +} + +template +InputStream::~InputStream() +{ + // Free the input stream buffer if we allocated it + // + if (m_isAllocated && (m_data != NULL)) + AllocPolicyType::free((void*)m_data); //const_cast is required +} + +template +ANTLR_INLINE const typename InputStream::DataType* InputStream::get_data() const +{ + return m_data; +} +template +ANTLR_INLINE bool InputStream::get_isAllocated() const +{ + return m_isAllocated; +} +template +ANTLR_INLINE const typename InputStream::DataType* InputStream::get_nextChar() const +{ + return m_nextChar; +} +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_sizeBuf() const +{ + return m_sizeBuf; +} +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_line() const +{ + return m_line; +} +template +ANTLR_INLINE const typename InputStream::DataType* InputStream::get_currentLine() const +{ + return m_currentLine; +} +template +ANTLR_INLINE ANTLR_INT32 InputStream::get_charPositionInLine() const +{ + return m_charPositionInLine; +} +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_markDepth() const +{ + return m_markDepth; +} +template +ANTLR_INLINE typename InputStream::MarkersType& InputStream::get_markers() +{ + return m_markers; +} +template +ANTLR_INLINE const typename InputStream::StringType& InputStream::get_fileName() const +{ + return m_fileName; +} +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_fileNo() const +{ + return m_fileNo; +} +template +ANTLR_INLINE ANTLR_UCHAR InputStream::get_newlineChar() const +{ + return m_newlineChar; +} +template +ANTLR_INLINE ANTLR_UINT8 InputStream::get_charByteSize() const +{ + return m_charByteSize; +} +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_encoding() const +{ + return m_encoding; +} +template +ANTLR_INLINE void InputStream::set_data( DataType* data ) +{ + m_data = data; +} +template +ANTLR_INLINE void InputStream::set_isAllocated( bool isAllocated ) +{ + m_isAllocated = isAllocated; +} +template +ANTLR_INLINE void InputStream::set_nextChar( const DataType* nextChar ) +{ + m_nextChar = nextChar; +} +template +ANTLR_INLINE void InputStream::set_sizeBuf( ANTLR_UINT32 sizeBuf ) +{ + m_sizeBuf = sizeBuf; +} +template +ANTLR_INLINE void InputStream::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} +template +ANTLR_INLINE void InputStream::set_currentLine( const DataType* currentLine ) +{ + m_currentLine = currentLine; +} +template +ANTLR_INLINE void InputStream::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} +template +ANTLR_INLINE void InputStream::set_markDepth( ANTLR_UINT32 markDepth ) +{ + m_markDepth = markDepth; +} +template +ANTLR_INLINE void InputStream::set_markers( const MarkersType& markers ) +{ + m_markers = markers; +} +template +ANTLR_INLINE void InputStream::set_fileName( const StringType& fileName ) +{ + m_fileName = fileName; +} +template +ANTLR_INLINE void InputStream::set_fileNo( ANTLR_UINT32 fileNo ) +{ + m_fileNo = fileNo; +} +template +ANTLR_INLINE void InputStream::set_newlineChar( ANTLR_UCHAR newlineChar ) +{ + m_newlineChar = newlineChar; +} +template +ANTLR_INLINE void InputStream::set_charByteSize( ANTLR_UINT8 charByteSize ) +{ + m_charByteSize = charByteSize; +} +template +ANTLR_INLINE void InputStream::set_encoding( ANTLR_UINT32 encoding ) +{ + m_encoding = encoding; +} + +template +ANTLR_INLINE void InputStream::inc_charPositionInLine() +{ + ++m_charPositionInLine; +} + +template +ANTLR_INLINE void InputStream::inc_line() +{ + ++m_line; +} + +template +ANTLR_INLINE void InputStream::inc_markDepth() +{ + ++m_markDepth; +} + +template +ANTLR_INLINE void InputStream::reset() +{ + m_nextChar = m_data; /* Input at first character */ + m_line = 1; /* starts at line 1 */ + m_charPositionInLine = -1; + m_currentLine = m_data; + m_markDepth = 0; /* Reset markers */ + + /* Clear out up the markers table if it is there + */ + m_markers.clear(); +} + +template +void InputStream::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) +{ + m_isAllocated = false; + m_data = inString; + m_sizeBuf = size; + + // Now we can set up the file name. As we are reusing the stream, there may already + // be a string that we can reuse for holding the filename. + // + if ( BaseType::m_streamName.empty() ) + { + BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); + m_fileName = BaseType::m_streamName; + } + else + { + BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); + } + + this->reset(); +} + +/* +template +typename InputStream::DataType* InputStream::_LT(ANTLR_INT32 lt) +{ + return this->_LA(lt); +} +*/ + +template +ANTLR_UINT32 InputStream::size() +{ + return m_sizeBuf; +} + +template +ANTLR_MARKER InputStream::index_impl() +{ + return (ANTLR_MARKER)m_nextChar; +} + + +template +typename InputStream::StringType InputStream::substr(ANTLR_MARKER start, ANTLR_MARKER stop) +{ + std::size_t len = static_cast( (stop-start)/sizeof(DataType) + 1 ); + StringType str( (const char*)start, len ); + return str; +} + +template +ANTLR_UINT32 InputStream::get_line() +{ + return m_line; +} + +template +const typename InputStream::DataType* InputStream::getLineBuf() +{ + return m_currentLine; +} + +template +ANTLR_INLINE ANTLR_UINT32 InputStream::get_charPositionInLine() +{ + return m_charPositionInLine; +} + +template +ANTLR_INLINE void InputStream::set_charPositionInLine(ANTLR_UINT32 position) +{ + m_charPositionInLine = position; +} + +template +void InputStream::set_newLineChar(ANTLR_UINT32 newlineChar) +{ + m_newlineChar = newlineChar; +} + +template +ANTLR_INLINE LexState::LexState() +{ + m_nextChar = NULL; + m_line = 0; + m_currentLine = NULL; + m_charPositionInLine = 0; +} + +template +ANTLR_INLINE const typename LexState::DataType* LexState::get_nextChar() const +{ + return m_nextChar; +} + +template +ANTLR_INLINE ANTLR_UINT32 LexState::get_line() const +{ + return m_line; +} + +template +ANTLR_INLINE const typename LexState::DataType* LexState::get_currentLine() const +{ + return m_currentLine; +} + +template +ANTLR_INLINE ANTLR_INT32 LexState::get_charPositionInLine() const +{ + return m_charPositionInLine; +} + +template +ANTLR_INLINE void LexState::set_nextChar( const DataType* nextChar ) +{ + m_nextChar = nextChar; +} + +template +ANTLR_INLINE void LexState::set_line( ANTLR_UINT32 line ) +{ + m_line = line; +} + +template +ANTLR_INLINE void LexState::set_currentLine( const DataType* currentLine ) +{ + m_currentLine = currentLine; +} + +template +ANTLR_INLINE void LexState::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) +{ + m_charPositionInLine = charPositionInLine; +} + +template +ANTLR_INLINE typename InputStream::IntStreamType* InputStream::get_istream() +{ + return this; +} + +template +void InputStream::setupInputStream() +{ + bool isBigEndian; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR_UINT16 bomTest = 0xFEFF; + + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) + { + isBigEndian = true; + } + else + { + isBigEndian = false; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (m_encoding) + { + case ANTLR_ENC_UTF8: + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (*(m_nextChar)) == 0xEF + && (*(m_nextChar+1)) == 0xBB + && (*(m_nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + m_nextChar += 3; + } + + // Install the UTF8 input routines + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(0); + break; + + case ANTLR_ENC_UTF16: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, true ); + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, false ); + } + else + { + // No BOM present, assume local computer byte order + // + this->setupIntStream(isBigEndian, isBigEndian); + } + this->set_charByteSize(2); + break; + + case ANTLR_ENC_UTF32: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + m_nextChar += 1; + + this->setupIntStream(isBigEndian, true); + } + else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + m_nextChar += 1; + + this->setupIntStream( isBigEndian, false ); + } + else + { + // No BOM present, assume local computer byte order + // + this->setupIntStream( isBigEndian, isBigEndian ); + } + this->set_charByteSize(4); + break; + + case ANTLR_ENC_UTF16BE: + + // Encoding is definately Big Endian with no BOM + // + this->setupIntStream( isBigEndian, true ); + this->set_charByteSize(2); + break; + + case ANTLR_ENC_UTF16LE: + + // Encoding is definately Little Endian with no BOM + // + this->setupIntStream( isBigEndian, false ); + this->set_charByteSize(2); + break; + + case ANTLR_ENC_UTF32BE: + + // Encoding is definately Big Endian with no BOM + // + this->setupIntStream( isBigEndian, true ); + this->set_charByteSize(4); + break; + + case ANTLR_ENC_UTF32LE: + + // Encoding is definately Little Endian with no BOM + // + this->setupIntStream( isBigEndian, false ); + this->set_charByteSize(4); + break; + + case ANTLR_ENC_EBCDIC: + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(1); + break; + + case ANTLR_ENC_8BIT: + default: + + // Standard 8bit/ASCII + // + this->setupIntStream( isBigEndian, isBigEndian ); + this->set_charByteSize(1); + break; + } +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3interfaces.hpp b/runtime/Cpp/include/antlr3interfaces.hpp new file mode 100755 index 000000000..f6297844e --- /dev/null +++ b/runtime/Cpp/include/antlr3interfaces.hpp @@ -0,0 +1,301 @@ +/** \file + * Declarations for all the antlr3 C runtime interfaces/classes. This + * allows the structures that define the interfaces to contain pointers to + * each other without trying to sort out the cyclic interdependencies that + * would otherwise result. + */ +#ifndef _ANTLR3_INTERFACES_HPP +#define _ANTLR3_INTERFACES_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ANTLR_BEGIN_NAMESPACE() + +template +class IntStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_RECOGNIZER_SHARED_STATE +/// \ingroup ANTLR3_RECOGNIZER_SHARED_STATE +/// +template +class RecognizerSharedState; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET_LIST +/// \ingroup ANTLR3_BITSET_LIST +/// +template +class BitsetList; + +/// Pointer to an instantiation of 'class' #ANTLR3_BITSET +/// \ingroup ANTLR3_BITSET +/// +template +class Bitset; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN +/// \ingroup ANTLR3_COMMON_TOKEN +/// +template +class CommonToken; + +template +class CommonTokenFunctions; + +/// Pointer to an instantiation of 'class' #ANTLR3_EXCEPTION +/// \ingroup ANTLR3_EXCEPTION +/// +enum ExceptionType +{ + /** Indicates that the recognizer received a token + * in the input that was not predicted. + */ + RECOGNITION_EXCEPTION = 0 + /** Indicates that the recognizer was expecting one token and found a + * a different one. + */ + , MISMATCHED_TOKEN_EXCEPTION + + /** Recognizer could not find a valid alternative from the input + */ + , NO_VIABLE_ALT_EXCEPTION + + /* Character in a set was not found + */ + , MISMATCHED_SET_EXCEPTION + + /* A rule predicting at least n elements found less than that, + * such as: WS: " "+; + */ + , EARLY_EXIT_EXCEPTION + + , FAILED_PREDICATE_EXCEPTION + + , MISMATCHED_TREE_NODE_EXCEPTION + + , REWRITE_EARLY_EXCEPTION + + , UNWANTED_TOKEN_EXCEPTION + + , MISSING_TOKEN_EXCEPTION +}; + +template +class ANTLR_Exception; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOPO +/// \ingroup ANTLR3_TOPO +/// +template +class Topo; + +/// Pointer to an instantiation of 'class' #ANTLR3_INPUT_STREAM +/// \ingroup ANTLR3_INPUT_STREAM +/// +template +class InputStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEX_STATE +/// \ingroup ANTLR3_LEX_STATE +/// +template +class LexState; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_SOURCE +/// \ingroup ANTLR3_TOKEN_SOURCE +/// +template +class TokenSource; +template +class TokenSourceFunctions; + +/// Pointer to an instantiation of 'class' #ANTLR3_TOKEN_STREAM +/// \ingroup ANTLR3_TOKEN_STREAM +/// +template +class TokenStream; +template +class TokenStreamFunctions; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TOKEN_STREAM +/// \ingroup ANTLR3_COMMON_TOKEN_STREAM +/// +template +class CommonTokenStream; +template +class CommonTokenStreamFunctions; + + +/// Pointer to an instantiation of 'class' #ANTLR3_CYCLIC_DFA +/// \ingroup ANTLR3_CYCLIC_DFA +/// +template +class CyclicDFA; + +/// Pointer to an instantiation of 'class' #ANTLR3_LEXER +/// \ingroup ANTLR3_LEXER +/// +template +class Lexer; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSER +/// \ingroup ANTLR3_PARSER +/// +template +class Parser; + +/// Pointer to an instantiation of 'class' #ANTLR3_BASE_TREE +/// \ingroup ANTLR3_BASE_TREE +/// +template +class BaseTree; +template +class BaseTreeFunctions; + + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE +/// \ingroup ANTLR3_COMMON_TREE +/// +template +class CommonTree; +template +class CommonTreeFunctions; + +/// Pointer to an instantiation of 'class' #ANTLR3_PARSE_TREE +/// \ingroup ANTLR3_PARSE_TREE +/// +template +class ParseTree; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_NODE_STREAM +/// \ingroup ANTLR3_TREE_NODE_STREAM +/// +template +class TreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_NODE_STREAM +/// \ingroup ANTLR3_COMMON_TREE_NODE_STREAM +/// +template +class CommonTreeNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_WALK_STATE +/// \ingroup ANTLR3_TREE_WALK_STATE +/// +template +class TreeWalkState; + +/// Pointer to an instantiation of 'class' #ANTLR3_BASE_TREE_ADAPTOR +/// \ingroup ANTLR3_BASE_TREE_ADAPTOR +/// +template +class BaseTreeAdaptor; +template +class BaseTreeAdaptorFunctions; + + +/// Pointer to an instantiation of 'class' #ANTLR3_COMMON_TREE_ADAPTOR +/// \ingroup ANTLR3_COMMON_TREE_ADAPTOR +/// +template +class CommonTreeAdaptor; +template +class CommonTreeAdaptorFunctions; + + +/// Pointer to an instantiation of 'class' #ANTLR3_TREE_PARSER +/// \ingroup ANTLR3_TREE_PARSER +/// +template +class TreeParser; + +/// Pointer to an instantiation of 'class' #ANTLR3_INT_TRIE +/// \ingroup ANTLR3_INT_TRIE +/// +template< class DataType, class AllocPolicyType > +class IntTrie; + +/// Pointer to an instantiation of 'class' #ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \ingroup ANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// +template +class RewriteRuleElementStream; + +template +class RewriteRuleTokenStream; + +template +class RewriteRuleSubtreeStream; + +template +class RewriteRuleNodeStream; + +/// Pointer to an instantiation of 'class' #ANTLR3_DEBUG_EVENT_LISTENER +/// \ingroup ANTLR3_DEBUG_EVENT_LISTENER +/// +template +class DebugEventListener; + +//A Class just used for forwarding other classes for simplifying class forwarding +//Logic: constructor is made simple +template +class ClassForwarder {}; + +template +class BoolForwarder {}; +class Empty {}; + +template +class ComponentTypeFinder +{ +}; + +template +class ComponentTypeFinder< ImplTraits, typename ImplTraits::InputStreamType> +{ +public: + typedef typename ImplTraits::LexerType ComponentType; +}; + +template +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TokenStreamType> +{ +public: + typedef typename ImplTraits::ParserType ComponentType; +}; + +template +class ComponentTypeFinder< ImplTraits, typename ImplTraits::TreeNodeStreamType> +{ +public: + typedef typename ImplTraits::TreeParserType ComponentType; +}; + + +ANTLR_END_NAMESPACE() + +#endif diff --git a/runtime/Cpp/include/antlr3intstream.hpp b/runtime/Cpp/include/antlr3intstream.hpp new file mode 100755 index 000000000..82c116bd2 --- /dev/null +++ b/runtime/Cpp/include/antlr3intstream.hpp @@ -0,0 +1,404 @@ +/** \file + * Defines the the class interface for an antlr3 INTSTREAM. + * + * Certain functionality (such as DFAs for instance) abstract the stream of tokens + * or characters in to a steam of integers. Hence this structure should be included + * in any stream that is able to provide the output as a stream of integers (which is anything + * basically. + * + * There are no specific implementations of the methods in this interface in general. Though + * for purposes of casting and so on, it may be necesssary to implement a function with + * the signature in this interface which abstracts the base immplementation. In essence though + * the base stream provides a pointer to this interface, within which it installs its + * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM + * and can treat any input as an int stream. + * + * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM. + * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from + * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER + * when it is intialized with a pANTLR3_INPUT_STREAM. + * + * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the + * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM. + * + * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where + * the pANTLR3_INT_STREAM comes from? + * + * Note that because the context pointer points to the actual interface structure that is providing + * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation + * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P + */ +#ifndef _ANTLR3_INTSTREAM_HPP +#define _ANTLR3_INTSTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +enum STREAM_TYPE +{ + /** Type indicator for a character stream + * \remark if a custom stream is created but it can be treated as + * a char stream, then you may OR in this value to your type indicator + */ + CHARSTREAM = 0x0001 + + /** Type indicator for a Token stream + * \remark if a custom stream is created but it can be treated as + * a token stream, then you may OR in this value to your type indicator + */ + , TOKENSTREAM = 0x0002 + + /** Type indicator for a common tree node stream + * \remark if a custom stream is created but it can be treated as + * a common tree node stream, then you may OR in this value to your type indicator + */ + , COMMONTREENODE = 0x0004 + + /** Type mask for input stream so we can switch in the above types + * \remark DO NOT USE 0x0000 as a stream type! + */ + , INPUT_MASK = 0x0007 +}; + +class RESOLVE_ENDIAN_AT_RUNTIME {}; +class BYTE_AGNOSTIC {}; +class ANTLR_LITTLE_ENDIAN {}; +class ANTLR_BIG_ENDIAN {}; + +template +class IntStream : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::StringType StringType; + +protected: + /** Potentially useful in error reporting and so on, this string is + * an identification of the input source. It may be NULL, so anything + * attempting to access it needs to check this and substitute a sensible + * default. + */ + StringType m_streamName; + + /** Last marker position allocated + */ + ANTLR_MARKER m_lastMarker; + + bool m_upper_case; //if set, values should be returbed in upper case + + /// Indicates whether we should implement endian-specific logic + /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian + ANTLR_UINT8 m_endian_spec; + +public: + IntStream(); + + // Return a string that identifies the input source + // + StringType getSourceName(); + StringType& get_streamName(); + const StringType& get_streamName() const; + ANTLR_MARKER get_lastMarker() const; + + SuperType* get_super(); + /** + * Function that installs a version of LA that always + * returns upper case. Only valid for character streams and creates a case + * insensitive lexer if the lexer tokens are described in upper case. The + * tokens will preserve case in the token text. + */ + void setUcaseLA(bool flag); + + /** Consume the next 'ANTR3_UINT32' in the stream + */ + void consume(); + + /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32 + */ + ANTLR_UINT32 _LA( ANTLR_INT32 i); + + /** Tell the stream to start buffering if it hasn't already. Return + * current input position, index(), or some other marker so that + * when passed to rewind() you get back to the same spot. + * rewind(mark()) should not affect the input cursor. + */ + ANTLR_MARKER mark(); + + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. + */ + ANTLR_MARKER index(); + + /** Reset the stream so that next call to index would return marker. + * The marker will usually be index() but it doesn't have to be. It's + * just a marker to indicate what state the stream was in. This is + * essentially calling release() and seek(). If there are markers + * created after this marker argument, this routine must unroll them + * like a stack. Assume the state the stream was in when this marker + * was created. + */ + void rewind(ANTLR_MARKER marker); + + /** Reset the stream to the last marker position, witouh destryoing the + * last marker position. + */ + void rewindLast(); + + /** You may want to commit to a backtrack but don't want to force the + * stream to keep bookkeeping objects around for a marker that is + * no longer necessary. This will have the same behavior as + * rewind() except it releases resources without the backward seek. + */ + void release(ANTLR_MARKER mark); + + /** Set the input cursor to the position indicated by index. This is + * normally used to seek ahead in the input stream. No buffering is + * required to do this unless you know your stream will use seek to + * move backwards such as when backtracking. + * + * This is different from rewind in its multi-directional + * requirement and in that its argument is strictly an input cursor (index). + * + * For char streams, seeking forward must update the stream state such + * as line number. For seeking backwards, you will be presumably + * backtracking using the mark/rewind mechanism that restores state and + * so this method does not need to update state when seeking backwards. + * + * Currently, this method is only used for efficient backtracking, but + * in the future it may be used for incremental parsing. + */ + void seek(ANTLR_MARKER index); + + /// Debug only method to flag consumption of initial off-channel + /// tokens in the input stream + /// + void consumeInitialHiddenTokens(); + + void rewindMark(ANTLR_MARKER marker); + ANTLR_MARKER tindex(); + + /** Frees any resources that were allocated for the implementation of this + * interface. Usually this is just releasing the memory allocated + * for the structure itself, but it may of course do anything it need to + * so long as it does not stamp on anything else. + */ + ~IntStream(); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + void findout_endian_spec(bool machineBigEndian, bool inputBigEndian); + + //If the user chooses this option, then we will be resolving stuffs at run-time + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + + //resolve into one of the three categories below at runtime + void consume( ClassForwarder ); +}; + +template +class EBCDIC_IntStream : public IntStream +{ +public: + ANTLR_UINT32 _LA( ANTLR_INT32 i); + +protected: + void setupIntStream(); +}; + +template +class UTF8_IntStream : public IntStream +{ +public: + ANTLR_UINT32 _LA( ANTLR_INT32 i); + void consume(); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + +private: + static const ANTLR_UINT32* TrailingBytesForUTF8(); + static const UTF32* OffsetsFromUTF8(); +}; + +template +class UTF16_IntStream : public IntStream +{ +public: + ANTLR_UINT32 _LA( ANTLR_INT32 i); + void consume(); + ANTLR_MARKER index(); + void seek(ANTLR_MARKER seekPoint); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + + /// \brief Return the input element assuming an 8 bit ascii input + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + + /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + + /// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not + /// + /// \param[in] input Input stream context pointer + /// \param[in] la 1 based offset of next input stream element + /// + /// \return Next input character in internal ANTLR3 encoding (UTF32) + /// + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + + /// \brief Consume the next character in a UTF16 input stream + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder ); + + /// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not + /// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance + /// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we + /// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream + /// is fubar but we just ignore that. + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder ); + + /// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not + /// + /// \param input Input stream context pointer + /// + void consume( ClassForwarder ); +}; + + + +template +class UTF32_IntStream : public IntStream +{ +public: + ANTLR_UINT32 _LA( ANTLR_INT32 i); + void consume(); + + /// \brief Calculate the current index in the output stream. + /// \param[in] input Input stream context pointer + /// + ANTLR_MARKER index(); + void seek(ANTLR_MARKER seekPoint); + +protected: + void setupIntStream(bool machineBigEndian, bool inputBigEndian); + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + ANTLR_UINT32 _LA( ANTLR_INT32 i, ClassForwarder ); + + void consume( ClassForwarder ); + void consume( ClassForwarder ); + void consume( ClassForwarder ); + void consume( ClassForwarder ); +}; + +template +class TokenIntStream : public IntStream +{ +public: + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef IntStream BaseType; + +private: + /** Because the indirect call, though small in individual cases can + * mount up if there are thousands of tokens (very large input streams), callers + * of size can optionally use this cached size field. + */ + ANTLR_UINT32 m_cachedSize; + +public: + TokenIntStream(); + ANTLR_UINT32 get_cachedSize() const; + void set_cachedSize( ANTLR_UINT32 cachedSize ); + + void consume(); + void consumeInitialHiddenTokens(); + ANTLR_UINT32 _LA( ANTLR_INT32 i ); + ANTLR_MARKER mark(); + ANTLR_UINT32 size(); + void release(); + ANTLR_MARKER tindex(); + void rewindLast(); + void rewind(ANTLR_MARKER marker); + void seek(ANTLR_MARKER index); + StringType getSourceName(); + +}; + +template +class TreeNodeIntStream : public IntStream +{ +public: + typedef typename ImplTraits::CommonTreeNodeStreamType CommonTreeNodeStreamType; + typedef IntStream BaseType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +public: + void consume(); + ANTLR_MARKER tindex(); + ANTLR_UINT32 _LA(ANTLR_INT32 i); + ANTLR_MARKER mark(); + void release(ANTLR_MARKER marker); + void rewindMark(ANTLR_MARKER marker); + void rewindLast(); + void seek(ANTLR_MARKER index); + ANTLR_UINT32 size(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3intstream.inl" + +#endif + diff --git a/runtime/Cpp/include/antlr3intstream.inl b/runtime/Cpp/include/antlr3intstream.inl new file mode 100755 index 000000000..e4de290c3 --- /dev/null +++ b/runtime/Cpp/include/antlr3intstream.inl @@ -0,0 +1,1661 @@ +ANTLR_BEGIN_NAMESPACE() + +template +ANTLR_INLINE IntStream::IntStream() +{ + m_lastMarker = 0; + m_upper_case = false; +} + +template +ANTLR_INLINE typename IntStream::StringType IntStream::getSourceName() +{ + return m_streamName; +} + +template +ANTLR_INLINE typename IntStream::StringType& IntStream::get_streamName() +{ + return m_streamName; +} + +template +ANTLR_INLINE const typename IntStream::StringType& IntStream::get_streamName() const +{ + return m_streamName; +} + +template +ANTLR_INLINE ANTLR_MARKER IntStream::get_lastMarker() const +{ + return m_lastMarker; +} + +template +ANTLR_INLINE void IntStream::setUcaseLA(bool flag) +{ + m_upper_case = flag; +} + +template +ANTLR_INLINE SuperType* IntStream::get_super() +{ + return static_cast(this); +} + +template +void IntStream::consume() +{ + SuperType* input = this->get_super(); + + const ANTLR_UINT8* nextChar = input->get_nextChar(); + const ANTLR_UINT8* data = input->get_data(); + ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); + + if ( nextChar < ( data + sizeBuf ) ) + { + /* Indicate one more character in this line + */ + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() ) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(nextChar + 1); + } + + /* Increment to next character position + */ + input->set_nextChar( nextChar + 1 ); + } +} + +template +ANTLR_UINT32 IntStream::_LA( ANTLR_INT32 la ) +{ + SuperType* input = this->get_super(); + const ANTLR_UINT8* nextChar = input->get_nextChar(); + const ANTLR_UINT8* data = input->get_data(); + ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); + + if (( nextChar + la - 1) >= (data + sizeBuf)) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + if( !m_upper_case ) + return (ANTLR_UCHAR)(*(nextChar + la - 1)); + else + return (ANTLR_UCHAR)toupper(*(nextChar + la - 1)); + } +} + +template +ANTLR_MARKER IntStream::mark() +{ + LexState* state; + SuperType* input = this->get_super(); + + /* New mark point + */ + input->inc_markDepth(); + + /* See if we are revisiting a mark as we can just reuse the vector + * entry if we are, otherwise, we need a new one + */ + if (input->get_markDepth() > input->get_markers().size() ) + { + input->get_markers().push_back( LexState() ); + LexState& state_r = input->get_markers().back(); + state = &state_r; + } + else + { + LexState& state_r = input->get_markers().at( input->get_markDepth() - 1 ); + state = &state_r; + + /* Assume no errors for speed, it will just blow up if the table failed + * for some reasons, hence lots of unit tests on the tables ;-) + */ + } + + /* We have created or retrieved the state, so update it with the current + * elements of the lexer state. + */ + state->set_charPositionInLine( input->get_charPositionInLine() ); + state->set_currentLine( input->get_currentLine() ); + state->set_line( input->get_line() ); + state->set_nextChar( input->get_nextChar() ); + + m_lastMarker = input->get_markDepth(); + + /* And that's it + */ + return input->get_markDepth(); +} + +template +ANTLR_MARKER IntStream::index() +{ + SuperType* input = this->get_super(); + return input->index_impl(); +} + +template +void IntStream::rewind(ANTLR_MARKER mark) +{ + SuperType* input = this->get_super(); + + /* Perform any clean up of the marks + */ + this->release(mark); + + /* Find the supplied mark state + */ + ANTLR_UINT32 idx = static_cast( mark-1 ); + typename ImplTraits::LexStateType& state = input->get_markers().at( idx ); + + /* Seek input pointer to the requested point (note we supply the void *pointer + * to whatever is implementing the int stream to seek). + */ + this->seek( (ANTLR_MARKER)state.get_nextChar() ); + + /* Reset to the reset of the information in the mark + */ + input->set_charPositionInLine( state.get_charPositionInLine() ); + input->set_currentLine( state.get_currentLine() ); + input->set_line( state.get_line() ); + input->set_nextChar( state.get_nextChar() ); + + /* And we are done + */ +} + +template +void IntStream::rewindLast() +{ + this->rewind(m_lastMarker); +} + +template +void IntStream::release(ANTLR_MARKER mark) +{ + SuperType* input = this->get_super(); + + /* We don't do much here in fact as we never free any higher marks in + * the hashtable as we just resuse any memory allocated for them. + */ + input->set_markDepth( (ANTLR_UINT32)(mark - 1) ); + +} + +template +void IntStream::setupIntStream(bool, bool) +{ +} + +template +void IntStream::seek(ANTLR_MARKER seekPoint) +{ + ANTLR_INT32 count; + SuperType* input = this->get_super(); + + ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar(); + /* If the requested seek point is less than the current + * input point, then we assume that we are resetting from a mark + * and do not need to scan, but can just set to there. + */ + if (seekPoint <= nextChar) + { + input->set_nextChar((ANTLR_UINT8*) seekPoint); + } + else + { + count = (ANTLR_UINT32)(seekPoint - nextChar); + + while (count--) + { + this->consume(); + } + } +} + +template +IntStream::~IntStream() +{ +} + +template +ANTLR_UINT32 EBCDIC_IntStream::_LA( ANTLR_INT32 la) +{ + // EBCDIC to ASCII conversion table + // + // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX + // translation and the character tables are published all over the interweb. + // + const ANTLR_UCHAR e2a[256] = + { + 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f, + 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97, + 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, + 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, + 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, + 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, + 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, + 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, + 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, + 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae, + 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, + 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7, + 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, + 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, + 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff, + 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e + }; + + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Translate the required character via the constant conversion table + // + return e2a[(*(input->get_nextChar() + la - 1))]; + } +} + +template +void EBCDIC_IntStream::setupIntStream() +{ + SuperType* super = this->get_super(); + super->set_charByteSize(1); +} + +template +ANTLR_UINT32 UTF16_IntStream::_LA( ANTLR_INT32 i) +{ + return this->_LA(i, ClassForwarder< typename ImplTraits::Endianness >() ); +} + +template +void UTF16_IntStream::consume() +{ + this->consume( ClassForwarder< typename ImplTraits::Endianness >() ); +} + +template +ANTLR_MARKER UTF16_IntStream::index() +{ + SuperType* input = this->get_super(); + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template +void UTF16_IntStream::seek(ANTLR_MARKER seekPoint) +{ + SuperType* input = this->get_super(); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) + { + input->set_nextChar( seekPoint ); + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) ) + { + this->consume(); + } + } +} + +template +void IntStream::findout_endian_spec(bool machineBigEndian, bool inputBigEndian) +{ + // We must install different UTF16 routines according to whether the input + // is the same endianess as the machine we are executing upon or not. If it is not + // then we must install methods that can convert the endianess on the fly as they go + // + + if(machineBigEndian == true) + { + // Machine is Big Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == true) + { + // Input is machine compatible + // + m_endian_spec = 1; + } + else + { + // Need to use methods that know that the input is little endian + // + m_endian_spec = 2; + } + } + else + { + // Machine is Little Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == false) + { + // Input is machine compatible + // + m_endian_spec = 1; + } + else + { + // Need to use methods that know that the input is Big Endian + // + m_endian_spec = 3; + } + } +} + +template +void UTF16_IntStream::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(2); + + this->findout_endian_spec( machineBigEndian, inputBigEndian ); +} + +template +ANTLR_UINT32 IntStream::_LA( ANTLR_INT32 i, ClassForwarder ) +{ + assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); + switch(m_endian_spec) + { + case 1: + return this->_LA(i, ClassForwarder() ); + break; + case 2: + return this->_LA(i, ClassForwarder() ); + break; + case 3: + return this->_LA(i, ClassForwarder() ); + break; + default: + break; + } + return 0; +} + +template +void IntStream::consume( ClassForwarder ) +{ + assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); + switch(m_endian_spec) + { + case 1: + this->consume( ClassForwarder() ); + break; + case 2: + this->consume( ClassForwarder() ); + break; + case 3: + this->consume( ClassForwarder() ); + break; + default: + break; + } +} + +template +ANTLR_UINT32 UTF16_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + UTF16* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super; + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in natural machine byte order + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar++; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = *--nextChar; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = *(nextChar-1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar--; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (native machine byte order) + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template +ANTLR_UINT32 UTF16_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super(); + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Little Endian byte order + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = (*nextChar - 2) + ((*nextChar -1) << 8); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (little endian byte order) + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template +ANTLR_UINT32 UTF16_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + ANTLR_UCHAR* nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = this->get_super(); + nextChar = input->get_nextChar(); + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) + { + // Advance our copy of the input pointer + // + // Next char in Big Endian byte order + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) + { + // Get the previous 16 bit character + // + ch = ((*nextChar - 2) << 8) + (*nextChar -1); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (big endian byte order) + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +template +void UTF16_IntStream::consume( ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine( input->get_nextChar() + 1 ); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in natural machine byte order + // + ch = *(input->get_nextChar()); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1 ); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Next character is in natural machine byte order + // + ch2 = *(input->get_nextChar()); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1 ); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + +} + +template +void UTF16_IntStream::consume( ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(input->get_nextChar() + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in litle endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +template +void UTF16_IntStream::consume( ClassForwarder ) +{ + SuperType* input; + UTF32 ch; + UTF32 ch2; + + input = this->get_super(); + + // Buffer size is always in bytes + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + // Reset for start of a new line of input + // + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(input->get_nextChar() + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in big endian form + // + ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) + { + // Big endian + // + ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->set_nextChar( input->get_nextChar() + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +template +ANTLR_UINT32 UTF32_IntStream::_LA( ANTLR_INT32 i) +{ + return this->_LA( i, ClassForwarder() ); +} + +template +ANTLR_MARKER UTF32_IntStream::index() +{ + SuperType* input = this->get_super(); + return (ANTLR_MARKER)(input->get_nextChar()); +} + +template +void UTF32_IntStream::seek(ANTLR_MARKER seekPoint) +{ + SuperType* input; + + input = this->get_super(); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) + { + input->set_nextChar( static_cast(seekPoint) ); + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) ) + { + this->consume(); + } + } + +} + +template +void UTF32_IntStream::setupIntStream(bool machineBigEndian, bool inputBigEndian) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(4); + + this->findout_endian_spec(machineBigEndian, inputBigEndian); +} + +template +ANTLR_UINT32 UTF32_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + } +} + +template +ANTLR_UINT32 UTF32_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Big Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template +ANTLR_UINT32 UTF32_IntStream::_LA( ANTLR_INT32 la, ClassForwarder ) +{ + SuperType* input = this->get_super(); + + if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) + { + return ANTLR_CHARSTREAM_EOF; + } + else + { + ANTLR_UCHAR c; + + c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); + + // Swap Endianess to Little Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +template +void UTF32_IntStream::consume() +{ + SuperType* input = this->get_super(); + + // SizeBuf is always in bytes + // + if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 )) + { + /* Indicate one more character in this line + */ + input->inc_charPositionInLine(); + + if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine( input->get_nextChar() + 1 ); + } + + /* Increment to next character position + */ + input->set_nextChar( input->get_nextChar() + 1 ); + } +} + +template +void UTF8_IntStream::setupIntStream(bool, bool) +{ + SuperType* super = this->get_super(); + super->set_charByteSize(0); +} + +// ------------------------------------------------------ +// Following is from Unicode.org (see antlr3convertutf.c) +// + +/// Index into the table below with the first byte of a UTF-8 sequence to +/// get the number of trailing bytes that are supposed to follow it. +/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is +/// left as-is for anyone who may want to do such conversion, which was +/// allowed in earlier algorithms. +/// +template +const ANTLR_UINT32* UTF8_IntStream::TrailingBytesForUTF8() +{ + static const ANTLR_UINT32 trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 + }; + + return trailingBytesForUTF8; +} + +/// Magic values subtracted from a buffer value during UTF8 conversion. +/// This table contains as many values as there might be trailing bytes +/// in a UTF-8 sequence. +/// +template +const UTF32* UTF8_IntStream::OffsetsFromUTF8() +{ + static const UTF32 offsetsFromUTF8[6] = + { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL + }; + return offsetsFromUTF8; +} + +// End of Unicode.org tables +// ------------------------- + + +/** \brief Consume the next character in a UTF8 input stream + * + * \param input Input stream context pointer + */ +template +void UTF8_IntStream::consume() +{ + SuperType* input = this->get_super(); + const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); + const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); + + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + + if (nextChar < (input->get_data() + input->get_sizeBuf())) + { + // Indicate one more character in this line + // + input->inc_charPositionInLine(); + + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + + if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf())) + { + input->set_nextChar( input->get_data() + input->get_sizeBuf() ); + return; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so + // we allow it. + // + ch = 0; + switch (extraBytesToRead) + { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + if (ch == input->get_newlineChar()) + { + /* Reset for start of a new line of input + */ + input->inc_line(); + input->set_charPositionInLine(0); + input->set_currentLine(nextChar); + } + + // Update input pointer + // + input->set_nextChar(nextChar); + } +} + +/** \brief Return the input element assuming a UTF8 input + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +template +ANTLR_UCHAR UTF8_IntStream::_LA(ANTLR_INT32 la) +{ + SuperType* input = this->get_super(); + const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); + const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); + ANTLR_UINT32 extraBytesToRead; + ANTLR_UCHAR ch; + ANTLR_UINT8* nextChar; + + nextChar = input->get_nextChar(); + + // Do we need to traverse forwards or backwards? + // - LA(0) is treated as LA(1) and we assume that the nextChar is + // already positioned. + // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding + // - LA(-n) means we must traverse backwards n chracters + // + if (la > 1) { + + // Make sure that we have at least one character left before trying to + // loop through the buffer. + // + if (nextChar < (input->get_data() + input->get_sizeBuf())) + { + // Now traverse n-1 characters forward + // + while (--la > 0) + { + // Does the next character require trailing bytes? + // If so advance the pointer by that many bytes as well as advancing + // one position for what will be at least a single byte character. + // + nextChar += trailingBytesForUTF8[*nextChar] + 1; + + // Does that calculation take us past the byte length of the buffer? + // + if (nextChar >= (input->get_data() + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + } + } + else + { + return ANTLR_CHARSTREAM_EOF; + } + } + else + { + // LA is negative so we decrease the pointer by n character positions + // + while (nextChar > input->get_data() && la++ < 0) + { + // Traversing backwards in UTF8 means decermenting by one + // then continuing to decrement while ever a character pattern + // is flagged as being a trailing byte of an encoded code point. + // Trailing UTF8 bytes always start with 10 in binary. We assumne that + // the UTF8 is well formed and do not check boundary conditions + // + nextChar--; + while ((*nextChar & 0xC0) == 0x80) + { + nextChar--; + } + } + } + + // nextChar is now pointing at the UTF8 encoded character that we need to + // decode and return. + // + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf())) + { + return ANTLR_CHARSTREAM_EOF; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // + ch = 0; + switch (extraBytesToRead) + { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + + return ch; +} + +template +TokenIntStream::TokenIntStream() +{ + m_cachedSize = 0; +} + +template +ANTLR_UINT32 TokenIntStream::get_cachedSize() const +{ + return m_cachedSize; +} + +template +void TokenIntStream::set_cachedSize( ANTLR_UINT32 cachedSize ) +{ + m_cachedSize = cachedSize; +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +template +void TokenIntStream::consume() +{ + TokenStreamType* cts = static_cast(this); + + if((ANTLR_UINT32)cts->get_p() < m_cachedSize ) + { + cts->inc_p(); + cts->set_p( cts->skipOffTokenChannels(cts->get_p()) ); + } +} +template +void TokenIntStream::consumeInitialHiddenTokens() +{ + ANTLR_MARKER first; + ANTLR_INT32 i; + TokenStreamType* ts; + + ts = this->get_super(); + first = this->index(); + + for (i=0; iget_debugger()->consumeHiddenToken(ts->get(i)); + } + + ts->set_initialStreamState(false); +} + + +template +ANTLR_UINT32 TokenIntStream::_LA( ANTLR_INT32 i ) +{ + const CommonTokenType* tok; + TokenStreamType* ts = static_cast(this); + + tok = ts->_LT(i); + + if (tok != NULL) + { + return tok->get_type(); + } + else + { + return CommonTokenType::TOKEN_INVALID; + } + +} + +template +ANTLR_MARKER TokenIntStream::mark() +{ + BaseType::m_lastMarker = this->index(); + return BaseType::m_lastMarker; +} + +template +ANTLR_UINT32 TokenIntStream::size() +{ + if (this->get_cachedSize() > 0) + { + return this->get_cachedSize(); + } + TokenStreamType* cts = this->get_super(); + + this->set_cachedSize( static_cast(cts->get_tokens().size()) ); + return this->get_cachedSize(); +} + +template +void TokenIntStream::release() +{ + return; +} + +template +ANTLR_MARKER TokenIntStream::tindex() +{ + return this->get_super()->get_p(); +} + +template +void TokenIntStream::rewindLast() +{ + this->rewind( this->get_lastMarker() ); +} + +template +void TokenIntStream::rewind(ANTLR_MARKER marker) +{ + return this->seek(marker); +} + +template +void TokenIntStream::seek(ANTLR_MARKER index) +{ + TokenStreamType* cts = static_cast(this); + + cts->set_p( static_cast(index) ); +} + + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// +/// /returns +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +template +typename TokenIntStream::StringType +TokenIntStream::getSourceName() +{ + // Slightly convoluted as we must trace back to the lexer's input source + // via the token source. The streamName that is here is not initialized + // because this is a token stream, not a file or string stream, which are the + // only things that have a context for a source name. + // + return this->get_super()->get_tokenSource()->get_fileName(); +} + +template +void TreeNodeIntStream::consume() +{ + CommonTreeNodeStreamType* ctns = this->get_super(); + if( ctns->get_p() == -1 ) + ctns->fillBufferRoot(); + ctns->inc_p(); +} +template +ANTLR_MARKER TreeNodeIntStream::tindex() +{ + CommonTreeNodeStreamType* ctns = this->get_super(); + return (ANTLR_MARKER)(ctns->get_p()); +} + +template +ANTLR_UINT32 TreeNodeIntStream::_LA(ANTLR_INT32 i) +{ + CommonTreeNodeStreamType* tns = this->get_super(); + + // Ask LT for the 'token' at that position + // + TreeType* t = tns->_LT(i); + + if (t == NULL) + { + return CommonTokenType::TOKEN_INVALID; + } + + // Token node was there so return the type of it + // + return t->get_type(); +} + +template +ANTLR_MARKER TreeNodeIntStream::mark() +{ + CommonTreeNodeStreamType* ctns = this->get_super(); + + if (ctns->get_p() == -1) + { + ctns->fillBufferRoot(); + } + + // Return the current mark point + // + this->set_lastMarker( this->index() ); + + return this->get_lastMarker(); + +} + +template +void TreeNodeIntStream::release(ANTLR_MARKER marker) +{ + +} + +template +void TreeNodeIntStream::rewindMark(ANTLR_MARKER marker) +{ + this->seek(marker); +} + +template +void TreeNodeIntStream::rewindLast() +{ + this->seek( this->get_lastMarker() ); +} + +template +void TreeNodeIntStream::seek(ANTLR_MARKER index) +{ + CommonTreeNodeStreamType* ctns = this->get_super(); + ctns->set_p( ANTLR_UINT32_CAST(index) ); +} + +template +ANTLR_UINT32 TreeNodeIntStream::size() +{ + CommonTreeNodeStreamType* ctns = this->get_super(); + + if (ctns->get_p() == -1) + { + ctns->fillBufferRoot(); + } + + return ctns->get_nodes().size(); +} + + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3lexer.hpp b/runtime/Cpp/include/antlr3lexer.hpp new file mode 100755 index 000000000..abd7768ee --- /dev/null +++ b/runtime/Cpp/include/antlr3lexer.hpp @@ -0,0 +1,248 @@ +/** \file + * Base interface for any ANTLR3 lexer. + * + * An ANLTR3 lexer builds from two sets of components: + * + * - The runtime components that provide common functionality such as + * traversing character streams, building tokens for output and so on. + * - The generated rules and struutre of the actual lexer, which call upon the + * runtime components. + * + * A lexer class contains a character input stream, a base recognizer interface + * (which it will normally implement) and a token source interface (which it also + * implements. The Tokensource interface is called by a token consumer (such as + * a parser, but in theory it can be anything that wants a set of abstract + * tokens in place of a raw character stream. + * + * So then, we set up a lexer in a sequence akin to: + * + * - Create a character stream (something which implements ANTLR3_INPUT_STREAM) + * and initialize it. + * - Create a lexer interface and tell it where it its input stream is. + * This will cause the creation of a base recognizer class, which it will + * override with its own implementations of some methods. The lexer creator + * can also then in turn override anything it likes. + * - The lexer token source interface is then passed to some interface that + * knows how to use it, byte calling for a next token. + * - When a next token is called, let ze lexing begin. + * + */ +#ifndef _ANTLR3_LEXER_HPP +#define _ANTLR3_LEXER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* Definitions + */ +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +static const ANTLR_UINT32 ANTLR_STRING_TERMINATOR = 0xFFFFFFFF; + +template +class Lexer : public ImplTraits::template RecognizerType< typename ImplTraits::InputStreamType >, + public ImplTraits::TokenSourceType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::InputStreamType InputStreamType; + typedef InputStreamType StreamType; + typedef typename InputStreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef typename ImplTraits::StreamDataType TokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::StringStreamType StringStreamType; + typedef typename ImplTraits::template RecognizerType< InputStreamType > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef typename ImplTraits::template ExceptionBaseType ExceptionBaseType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + typedef typename RecognizerType::DebugEventListenerType DebuggerType; + +private: + /** A pointer to the character stream whence this lexer is receiving + * characters. + * TODO: I may come back to this and implement charstream outside + * the input stream as per the java implementation. + */ + InputStreamType* m_input; + +public: + Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state); + Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state); + + InputStreamType* get_input() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + const RecognizerType* get_rec() const; + TokenSourceType* get_tokSource(); + + //functions used in .stg file + const RecognizerType* get_recognizer() const; + RecognizerSharedStateType* get_lexstate() const; + void set_lexstate( RecognizerSharedStateType* lexstate ); + const TokenSourceType* get_tokSource() const; + CommonTokenType* get_ltoken() const; + void set_ltoken( const CommonTokenType* ltoken ); + bool hasFailed() const; + ANTLR_INT32 get_backtracking() const; + void inc_backtracking(); + void dec_backtracking(); + bool get_failedflag() const; + void set_failedflag( bool failed ); + InputStreamType* get_strstream() const; + ANTLR_MARKER index() const; + void seek(ANTLR_MARKER index); + const CommonTokenType* EOF_Token() const; + bool hasException() const; + ExceptionBaseType* get_exception() const; + void constructEx(); + void lrecover(); + ANTLR_MARKER mark(); + void rewind(ANTLR_MARKER marker); + void rewindLast(); + void setText( const StringType& text ); + void skip(); + RuleMemoType* getRuleMemo() const; + DebuggerType* get_debugger() const; + void setRuleMemo(RuleMemoType* rulememo); + ANTLR_UINT32 LA(ANTLR_INT32 i); + void consume(); + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); + bool haveParsedRule(ANTLR_MARKER ruleIndex); + + /** Pointer to a function that sets the charstream source for the lexer and + * causes it to be reset. + */ + void setCharStream(InputStreamType* input); + + /*! + * \brief + * Change to a new input stream, remembering the old one. + * + * \param lexer + * Pointer to the lexer instance to switch input streams for. + * + * \param input + * New input stream to install as the current one. + * + * Switches the current character input stream to + * a new one, saving the old one, which we will revert to at the end of this + * new one. + */ + void pushCharStream(InputStreamType* input); + + /*! + * \brief + * Stops using the current input stream and reverts to any prior + * input stream on the stack. + * + * \param lexer + * Description of parameter lexer. + * + * Pointer to a function that abandons the current input stream, whether it + * is empty or not and reverts to the previous stacked input stream. + * + * \remark + * The function fails silently if there are no prior input streams. + */ + void popCharStream(); + + /** Pointer to a function that emits the supplied token as the next token in + * the stream. + */ + void emitNew(const CommonTokenType& token); + + /** Pointer to a function that constructs a new token from the lexer stored information + */ + CommonTokenType* emit(); + + /** Pointer to a function that attempts to match and consume the specified string from the input + * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated + * with 0xFFFFFFFF, which is an invalid UTF32 character + */ + bool matchs(ANTLR_UCHAR* string); + + /** Pointer to a function that matches and consumes the specified character from the input stream. + * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer + * implementation is source encoding agnostic and so input streams do not generally need to + * override the default implmentation. + */ + bool matchc(ANTLR_UCHAR c); + + /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too + * but this would only be useful if the tokens were in tsome guaranteed order which is + * only going to happen with a hand crafted token set). + */ + bool matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high); + + /** Pointer to a function that matches the next token/char in the input stream + * regardless of what it actaully is. + */ + void matchAny(); + + /** Pointer to a function that recovers from an error found in the input stream. + * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also + * be from a mismatched token that the (*match)() could not recover from. + */ + void recover(); + + /** Function to return the current line number in the input stream + */ + ANTLR_UINT32 getLine(); + ANTLR_MARKER getCharIndex(); + ANTLR_UINT32 getCharPositionInLine(); + + /** Function to return the text so far for the current token being generated + */ + StringType getText(); + + //Other utility functions + void fillExceptionData( ExceptionBaseType* ex ); + + /** Default lexer error handler (works for 8 bit streams only!!!) + */ + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex); + void exConstruct(); + TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, BitsetListType* follow); + + /** Pointer to a function that knows how to free the resources of a lexer + */ + ~Lexer(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3lexer.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3lexer.inl b/runtime/Cpp/include/antlr3lexer.inl new file mode 100755 index 000000000..f00de404b --- /dev/null +++ b/runtime/Cpp/include/antlr3lexer.inl @@ -0,0 +1,594 @@ +ANTLR_BEGIN_NAMESPACE() + +template +Lexer::Lexer(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) + :Lexer::RecognizerType(sizeHint, state) +{ +} + +template +Lexer::Lexer(ANTLR_UINT32 sizeHint, InputStreamType* input, RecognizerSharedStateType* state) + :Lexer::RecognizerType(sizeHint, state) +{ + this->setCharStream(input); +} + +template +typename Lexer::InputStreamType* Lexer::get_input() const +{ + return m_input; +} + +template +typename Lexer::IntStreamType* Lexer::get_istream() const +{ + return m_input; +} + +template +typename Lexer::RecognizerType* Lexer::get_rec() +{ + return this; +} + +template +typename Lexer::TokenSourceType* Lexer::get_tokSource() +{ + return this; +} + +template +void Lexer::displayRecognitionError( ANTLR_UINT8** , ExceptionBaseType* ex) +{ + StringStreamType err_stream; + + // See if there is a 'filename' we can use + // + if( ex->getName().empty() ) + { + err_stream << "-unknown source-("; + } + else + { + err_stream << ex->get_streamName().c_str(); + err_stream << "("; + } + + err_stream << ex->get_line() << ")"; + err_stream << ": lexer error " << ex->getType() << " :\n\t" + << ex->get_message() << " at offset " + << ex->get_charPositionInLine()+1 << ", "; + + { + ANTLR_UINT32 width; + + width = ANTLR_UINT32_CAST(( (ANTLR_UINT8*)(m_input->get_data()) + + (m_input->size() )) - (ANTLR_UINT8*)( ex->get_index() )); + + if (width >= 1) + { + if (isprint(ex->get_c() )) + { + err_stream << "near '" << ex->get_c() << "' :\n"; + } + else + { + char tmp[128]; + sprintf( tmp, "near char(%#02X) :\n", ex->get_c() ); + err_stream << tmp; + } + err_stream << "\t"; + err_stream.width( width > 20 ? 20 : width ); + err_stream << ex->get_index() << "\n"; + } + else + { + err_stream << "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"; + err_stream << "\t The lexer was matching from line " + << this->get_state()->get_tokenStartLine() + << ", offset " << this->get_state()->get_tokenStartCharPositionInLine() + << ", which\n\t "; + width = ANTLR_UINT32_CAST(((ANTLR_UINT8*)(m_input->get_data() )+ + (m_input->size())) - + (ANTLR_UINT8*)(this->get_state()->get_tokenStartCharIndex() )); + + if (width >= 1) + { + err_stream << "looks like this:\n\t\t"; + err_stream.width( width > 20 ? 20 : width ); + err_stream << this->get_state()->get_tokenStartCharIndex() << "\n"; + } + else + { + err_stream << "is also the end of the line, so you must check your lexer rules\n"; + } + } + } + ImplTraits::displayRecognitionError( err_stream.str() ); +} + +template +void Lexer::fillExceptionData( ExceptionBaseType* ex ) +{ + ex->set_c( m_input->_LA(1) ); /* Current input character */ + ex->set_line( m_input->get_line() ); /* Line number comes from stream */ + ex->set_charPositionInLine( m_input->get_charPositionInLine() ); /* Line offset also comes from the stream */ + ex->set_index( m_input->index() ); + ex->set_streamName( m_input->get_fileName() ); + ex->set_message( "Unexpected character" ); +} + +template +void Lexer::setCharStream(InputStreamType* input) +{ + /* Install the input interface + */ + m_input = input; + + /* Set the current token to nothing + */ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + state->set_token_present( false ); + state->set_text(""); + state->set_tokenStartCharIndex(-1); + + /* Copy the name of the char stream to the token source + */ + this->get_tokSource()->set_fileName( input->get_fileName() ); +} + +template +void Lexer::pushCharStream(InputStreamType* input) +{ + // We have a stack, so we can save the current input stream + // into it. + // + this->get_istream()->mark(); + this->get_rec()->get_state()->get_streams().push(this->get_input()); + + // And now we can install this new one + // + this->setCharStream(input); +} + +template +void Lexer::popCharStream() +{ + InputStreamType* input; + + // If we do not have a stream stack or we are already at the + // stack bottom, then do nothing. + // + typename RecognizerSharedStateType::StreamsType& streams = this->get_rec()->get_state()->get_streams(); + if ( streams.size() > 0) + { + // We just leave the current stream to its fate, we do not close + // it or anything as we do not know what the programmer intended + // for it. This method can always be overridden of course. + // So just find out what was currently saved on the stack and use + // that now, then pop it from the stack. + // + input = streams.top(); + streams.pop(); + + // Now install the stream as the current one. + // + this->setCharStream(input); + this->get_istream()->rewindLast(); + } + return; +} + +template +void Lexer::emitNew(const CommonTokenType& token) +{ + CommonTokenType* tok = this->get_rec()->get_state()->get_token(); /* Voila! */ + *tok = token; +} + +template +typename Lexer::CommonTokenType* Lexer::emit() +{ + /* We could check pointers to token factories and so on, but + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + state->set_token_present(true); + CommonTokenType* token = state->get_token(); + token->set_input( this->get_input() ); + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + token->set_type( state->get_type() ); + token->set_channel( state->get_channel() ); + token->set_startIndex( state->get_tokenStartCharIndex() ); + token->set_stopIndex( this->getCharIndex() - 1 ); + token->set_line( state->get_tokenStartLine() ); + token->set_charPositionInLine( state->get_tokenStartCharPositionInLine() ); + + token->set_tokText( state->get_text() ); + token->set_lineStart( this->get_input()->get_currentLine() ); + + return token; +} + +template +Lexer::~Lexer() +{ + // This may have ben a delegate or delegator lexer, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + RecognizerSharedStateType* state = this->get_rec()->get_state(); + + if ( state != NULL) + { + state->get_streams().clear(); + } +} + +template +bool Lexer::matchs(ANTLR_UCHAR* str ) +{ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + while (*str != ANTLR_STRING_TERMINATOR) + { + if ( this->get_istream()->_LA(1) != (*str)) + { + if ( state->get_backtracking() > 0) + { + state->set_failed(true); + return false; + } + + this->exConstruct(); + state->set_failed( true ); + + /* TODO: Implement exception creation more fully perhaps + */ + this->recover(); + return false; + } + + /* Matched correctly, do consume it + */ + this->get_istream()->consume(); + str++; + + } + /* Reset any failed indicator + */ + state->set_failed( false ); + return true; +} + +template +bool Lexer::matchc(ANTLR_UCHAR c) +{ + if (this->get_istream()->_LA(1) == c) + { + /* Matched correctly, do consume it + */ + this->get_istream()->consume(); + + /* Reset any failed indicator + */ + this->get_rec()->get_state()->set_failed( false ); + + return true; + } + + /* Failed to match, exception and recovery time. + */ + if(this->get_rec()->get_state()->get_backtracking() > 0) + { + this->get_rec()->get_state()->set_failed( true ); + return false; + } + + this->exConstruct(); + + /* TODO: Implement exception creation more fully perhaps + */ + this->recover(); + + return false; +} + +template +bool Lexer::matchRange(ANTLR_UCHAR low, ANTLR_UCHAR high) +{ + ANTLR_UCHAR c; + + /* What is in the stream at the moment? + */ + c = this->get_istream()->_LA(1); + if ( c >= low && c <= high) + { + /* Matched correctly, consume it + */ + this->get_istream()->consume(); + + /* Reset any failed indicator + */ + this->get_rec()->get_state()->set_failed( false ); + + return true; + } + + /* Failed to match, execption and recovery time. + */ + + if (this->get_rec()->get_state()->get_backtracking() > 0) + { + this->get_rec()->get_state()->set_failed( true ); + return false; + } + + this->exConstruct(); + + /* TODO: Implement exception creation more fully + */ + this->recover(); + + return false; +} + +template +void Lexer::matchAny() +{ + this->get_istream()->consume(); +} + +template +void Lexer::recover() +{ + this->get_istream()->consume(); +} + +template +ANTLR_UINT32 Lexer::getLine() +{ + return this->get_input()->get_line(); +} + +template +ANTLR_MARKER Lexer::getCharIndex() +{ + return this->get_istream()->index(); +} + +template +ANTLR_UINT32 Lexer::getCharPositionInLine() +{ + return this->get_input()->get_charPositionInLine(); +} + +template +typename Lexer::StringType Lexer::getText() +{ + RecognizerSharedStateType* state = this->get_rec()->get_state(); + if ( !state->get_text().empty() ) + { + return state->get_text(); + + } + return this->get_input()->substr( state->get_tokenStartCharIndex(), + this->getCharIndex() - this->get_input()->get_charByteSize() + ); +} + +template +void Lexer::exConstruct() +{ + new ANTLR_Exception( this->get_rec(), "" ); +} + +template< class ImplTraits> +typename Lexer::TokenType* Lexer::getMissingSymbol( IntStreamType*, + ExceptionBaseType*, + ANTLR_UINT32 , BitsetListType*) +{ + return NULL; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer::RecognizerType* Lexer::get_rec() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer::RecognizerType* Lexer::get_recognizer() const +{ + return this->get_rec(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::RecognizerSharedStateType* Lexer::get_lexstate() const +{ + return this->get_rec()->get_state(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::set_lexstate( RecognizerSharedStateType* lexstate ) +{ + this->get_rec()->set_state(lexstate); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer::TokenSourceType* Lexer::get_tokSource() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::CommonTokenType* Lexer::get_ltoken() const +{ + return this->get_lexstate()->token(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::set_ltoken( const CommonTokenType* ltoken ) +{ + this->get_lexstate()->set_token( ltoken ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer::hasFailed() const +{ + return this->get_lexstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Lexer::get_backtracking() const +{ + return this->get_lexstate()->get_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::inc_backtracking() +{ + this->get_lexstate()->inc_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::dec_backtracking() +{ + this->get_lexstate()->dec_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer::get_failedflag() const +{ + return this->get_lexstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::set_failedflag( bool failed ) +{ + this->get_lexstate()->set_failed(failed); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::InputStreamType* Lexer::get_strstream() const +{ + return this->get_input(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer::index() const +{ + return this->get_istream()->index(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::seek(ANTLR_MARKER index) +{ + this->get_istream()->seek(index); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Lexer::CommonTokenType* Lexer::EOF_Token() const +{ + const CommonTokenType& eof_token = this->get_tokSource()->get_eofToken(); + return &eof_token; +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer::hasException() const +{ + return this->get_lexstate()->get_error(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::ExceptionBaseType* Lexer::get_exception() const +{ + return this->get_lexstate()->get_exception(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::constructEx() +{ + this->get_rec()->exConstruct(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Lexer::mark() +{ + return this->get_istream()->mark(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::rewind(ANTLR_MARKER marker) +{ + this->get_istream()->rewind(marker); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::rewindLast() +{ + this->get_istream()->rewindLast(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + this->get_rec()->memoize( ruleIndex, ruleParseStart ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Lexer::haveParsedRule(ANTLR_MARKER ruleIndex) +{ + return this->get_rec()->alreadyParsedRule(ruleIndex); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::setText( const StringType& text ) +{ + this->get_lexstate()->set_text(text); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::skip() +{ + CommonTokenType& skipToken = this->get_tokSource()->get_skipToken(); + this->get_lexstate()->set_token( &skipToken ); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::RuleMemoType* Lexer::getRuleMemo() const +{ + return this->get_lexstate()->get_rulememo(); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::setRuleMemo(RuleMemoType* rulememo) +{ + return this->get_lexstate()->set_rulememo(rulememo); +} + +template< class ImplTraits> +ANTLR_INLINE typename Lexer::DebuggerType* Lexer::get_debugger() const +{ + return this->get_rec()->get_debugger(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Lexer::LA(ANTLR_INT32 i) +{ + return this->get_istream()->_LA(i); +} + +template< class ImplTraits> +ANTLR_INLINE void Lexer::consume() +{ + return this->get_istream()->consume(); +} + +ANTLR_END_NAMESPACE() + diff --git a/runtime/Cpp/include/antlr3memory.hpp b/runtime/Cpp/include/antlr3memory.hpp new file mode 100755 index 000000000..2aa320c1a --- /dev/null +++ b/runtime/Cpp/include/antlr3memory.hpp @@ -0,0 +1,163 @@ +#ifndef _ANTLR3MEMORY_HPP +#define _ANTLR3MEMORY_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include +#include +#include +#include +#include + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +class DefaultAllocPolicy +{ +public: + //limitation of c++. unable to write a typedef + template + class AllocatorType : public std::allocator + { + public: + typedef TYPE value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template struct rebind { + typedef AllocatorType other; + }; + + AllocatorType() throw() {} + AllocatorType( const AllocatorType& alloc ) throw() {} + template AllocatorType(const AllocatorType& alloc) throw(){} + }; + + template + class VectorType : public std::vector< TYPE, AllocatorType > + { + }; + + template + class ListType : public std::deque< TYPE, AllocatorType > + { + }; + + template + class StackType : public std::deque< TYPE, AllocatorType > + { + public: + void push( const TYPE& elem ) { this->push_back(elem); } + void pop() { this->pop_back(); } + TYPE& peek() { return this->back(); } + TYPE& top() { return this->back(); } + const TYPE& peek() const { return this->back(); } + const TYPE& top() const { return this->back(); } + }; + + + template + class OrderedSetType : public std::set< TYPE, std::less, AllocatorType > + { + }; + + template + class UnOrderedSetType : public std::set< TYPE, std::less, AllocatorType > + { + }; + + template + class UnOrderedMapType : public std::map< KeyType, ValueType, std::less, + AllocatorType > > + { + }; + + template + class OrderedMapType : public std::map< KeyType, ValueType, std::less, + AllocatorType > > + { + }; + + static void* operator new (std::size_t bytes) + { + void* p = alloc(bytes); + return p; + } + static void* operator new (std::size_t , void* p) { return p; } + static void* operator new[]( std::size_t bytes) + { + void* p = alloc(bytes); + return p; + } + static void operator delete(void* p) + { + DefaultAllocPolicy::free(p); + } + static void operator delete(void* , void* ) {} //placement delete + + static void operator delete[](void* p) + { + DefaultAllocPolicy::free(p); + } + + static void* alloc( std::size_t bytes ) + { + void* p = malloc(bytes); + if( p== NULL ) + throw std::bad_alloc(); + return p; + } + + static void* alloc0( std::size_t bytes ) + { + void* p = DefaultAllocPolicy::alloc(bytes); + memset(p, 0, bytes ); + return p; + } + + static void free( void* p ) + { + return ::free(p); + } + + static void* realloc(void *ptr, size_t size) + { + return ::realloc( ptr, size ); + } +}; + +ANTLR_END_NAMESPACE() + +#endif /* _ANTLR3MEMORY_H */ diff --git a/runtime/Cpp/include/antlr3parser.hpp b/runtime/Cpp/include/antlr3parser.hpp new file mode 100755 index 000000000..cc9b47388 --- /dev/null +++ b/runtime/Cpp/include/antlr3parser.hpp @@ -0,0 +1,200 @@ +/** \file + * Base implementation of an ANTLR3 parser. + * + * + */ +#ifndef _ANTLR3_PARSER_HPP +#define _ANTLR3_PARSER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** This is the main interface for an ANTLR3 parser. + */ +template< class ImplTraits > +class Parser : public ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > +{ +public: + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef typename TokenStreamType::IntStreamType IntStreamType; + typedef TokenStreamType StreamType; + + typedef typename ImplTraits::template RecognizerType< typename ImplTraits::TokenStreamType > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + + typedef DebugEventListener DebugEventListenerType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + typedef CommonTokenType TokenType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef ANTLR_ExceptionBase ExceptionBaseType; + typedef Empty TokenSourceType; + + typedef typename RecognizerSharedStateType::FollowingType FollowingType; + typedef typename RecognizerSharedStateType::RuleMemoType RuleMemoType; + typedef typename ImplTraits::DebugEventListenerType DebuggerType; + +private: + /** A provider of a tokenstream interface, for the parser to consume + * tokens from. + */ + TokenStreamType* m_tstream; + +public: + Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ); + Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, RecognizerSharedStateType* state ); + Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, DebugEventListenerType* dbg, + RecognizerSharedStateType* state ); + TokenStreamType* get_tstream() const; + TokenStreamType* get_input() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + + //same as above. Just that get_istream exists for lexer, parser, treeparser + //get_parser_istream exists only for parser, treeparser. So use it accordingly + IntStreamType* get_parser_istream() const; + + /** A pointer to a function that installs a debugger object (it also + * installs the debugging versions of the parser methods. This means that + * a non debug parser incurs no overhead because of the debugging stuff. + */ + void setDebugListener(DebugEventListenerType* dbg); + + /** A pointer to a function that installs a token stream + * for the parser. + */ + void setTokenStream(TokenStreamType*); + + /** A pointer to a function that returns the token stream for this + * parser. + */ + TokenStreamType* getTokenStream(); + + void exConstruct(); + TokenType* getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, BitsetListType* follow); + + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); + + /** Pointer to a function that knows how to free resources of an ANTLR3 parser. + */ + ~Parser(); + + void fillExceptionData( ExceptionBaseType* ex ); + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); + + //convenience functions exposed in .stg + const RecognizerType* get_recognizer() const; + RecognizerSharedStateType* get_psrstate() const; + void set_psrstate(RecognizerSharedStateType* state); + bool haveParsedRule(ANTLR_MARKER ruleIndex); + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); + ANTLR_MARKER index() const; + bool hasException() const; + ExceptionBaseType* get_exception() const; + const CommonTokenType* matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ); + void matchAnyToken(); + const FollowingType& get_follow_stack() const; + void followPush( const BitsetListType& follow ); + void followPop(); + void precover(); + void preporterror(); + ANTLR_UINT32 LA(ANTLR_INT32 i); + const CommonTokenType* LT(ANTLR_INT32 k); + void constructEx(); + void consume(); + ANTLR_MARKER mark(); + void rewind(ANTLR_MARKER marker); + void rewindLast(); + void seek(ANTLR_MARKER index); + bool get_perror_recovery() const; + void set_perror_recovery( bool val ); + bool hasFailed() const; + bool get_failedflag() const; + void set_failedflag( bool failed ); + ANTLR_INT32 get_backtracking() const; + void inc_backtracking(); + void dec_backtracking(); + CommonTokenType* recoverFromMismatchedSet(BitsetListType* follow); + bool recoverFromMismatchedElement(BitsetListType* follow); + RuleMemoType* getRuleMemo() const; + DebuggerType* get_debugger() const; + TokenStreamType* get_strstream() const; + void setRuleMemo(RuleMemoType* rulememo); + +}; + +//Generic rule return value. Unlike the general ANTLR, this gets generated for +//every rule in the target. Handle rule exit here +template +class RuleReturnValue +{ +public: + typedef typename ImplTraits::BaseParserType BaseParserType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +public: + const CommonTokenType* start; + const CommonTokenType* stop; + BaseParserType* parser; + + RuleReturnValue(BaseParserType* psr = NULL ); + RuleReturnValue( const RuleReturnValue& val ); + RuleReturnValue& operator=( const RuleReturnValue& val ); + void call_start_placeholder(); + void call_stop_placeholder(); + RuleReturnValue& get_struct(); + ~RuleReturnValue(); +}; + +//This kind makes sure that whenever tokens are condensed into a rule, +//all the tokens except the start and stop tokens are deleted +template +class RuleReturnValue_1 : public RuleReturnValue +{ +public: + typedef RuleReturnValue BaseType; + typedef typename BaseType::BaseParserType BaseParserType; + +public: + RuleReturnValue_1(); + RuleReturnValue_1( BaseParserType* psr); + RuleReturnValue_1( const RuleReturnValue_1& val ); + void call_start_placeholder(); //its dummy here + ~RuleReturnValue_1(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3parser.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3parser.inl b/runtime/Cpp/include/antlr3parser.inl new file mode 100755 index 000000000..6f4d15261 --- /dev/null +++ b/runtime/Cpp/include/antlr3parser.inl @@ -0,0 +1,585 @@ +ANTLR_BEGIN_NAMESPACE() + +template< class ImplTraits > +Parser::Parser( ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state ) + :RecognizerType( sizeHint, state ) +{ + m_tstream = NULL; +} + +template< class ImplTraits > +Parser::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, + RecognizerSharedStateType* state ) + :RecognizerType( sizeHint, state ) +{ + this->setTokenStream( tstream ); +} + +template< class ImplTraits > +Parser::Parser( ANTLR_UINT32 sizeHint, TokenStreamType* tstream, + DebugEventListenerType* dbg, + RecognizerSharedStateType* state ) + :RecognizerType( sizeHint, state ) +{ + this->setTokenStream( tstream ); + this->setDebugListener( dbg ); +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::TokenStreamType* Parser::get_tstream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::IntStreamType* Parser::get_istream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::IntStreamType* Parser::get_parser_istream() const +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::TokenStreamType* Parser::get_input() const +{ + return m_tstream; +} + +template< class ImplTraits > +void Parser::fillExceptionData( ExceptionBaseType* ex ) +{ + ex->set_token( m_tstream->_LT(1) ); /* Current input token */ + ex->set_line( ex->get_token()->get_line() ); + ex->set_charPositionInLine( ex->get_token()->get_charPositionInLine() ); + ex->set_index( this->get_istream()->index() ); + if( ex->get_token()->get_type() == CommonTokenType::TOKEN_EOF) + { + ex->set_streamName(""); + } + else + { + ex->set_streamName( ex->get_token()->get_input()->get_fileName() ); + } + ex->set_message("Unexpected token"); +} + +template< class ImplTraits > +void Parser::displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ) +{ + typename ImplTraits::StringStreamType errtext; + // See if there is a 'filename' we can use + // + if( ex->get_streamName().empty() ) + { + if(ex->get_token()->get_type() == CommonTokenType::TOKEN_EOF) + { + errtext << "-end of input-("; + } + else + { + errtext << "-unknown source-("; + } + } + else + { + errtext << ex->get_streamName() << "("; + } + + // Next comes the line number + // + errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; + errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() + << " : " + << this->get_rec()->get_state()->get_exception()->get_message(); + + // Prepare the knowledge we know we have + // + const CommonTokenType* theToken = this->get_rec()->get_state()->get_exception()->get_token(); + StringType ttext = theToken->toString(); + + errtext << ", at offset , " + << this->get_rec()->get_state()->get_exception()->get_charPositionInLine(); + if (theToken != NULL) + { + if (theToken->get_type() == CommonTokenType::TOKEN_EOF) + { + errtext << ", at "; + } + else + { + // Guard against null text in a token + // + errtext << "\n near " << ( ttext.empty() + ? "" : ttext ) << "\n"; + } + } + + ex->displayRecognitionError( tokenNames, errtext ); + ImplTraits::displayRecognitionError( errtext.str() ); +} + +template< class ImplTraits > +Parser::~Parser() +{ + if (this->get_rec() != NULL) + { + // This may have ben a delegate or delegator parser, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + RecognizerSharedStateType* state = this->get_rec()->get_state(); + if (state != NULL) + { + state->get_following().clear(); + } + } +} + +template< class ImplTraits > +void Parser::setDebugListener(DebugEventListenerType* dbg) +{ + // Set the debug listener. There are no methods to override + // because currently the only ones that notify the debugger + // are error reporting and recovery. Hence we can afford to + // check and see if the debugger interface is null or not + // there. If there is ever an occasion for a performance + // sensitive function to use the debugger interface, then + // a replacement function for debug mode should be supplied + // and installed here. + // + this->get_rec()->set_debugger(dbg); + + // If there was a tokenstream installed already + // then we need to tell it about the debug interface + // + if (this->get_tstream() != NULL) + { + this->get_tstream()->setDebugListener(dbg); + } +} + +template< class ImplTraits > +ANTLR_INLINE void Parser::setTokenStream(TokenStreamType* tstream) +{ + m_tstream = tstream; + this->get_rec()->reset(); +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::TokenStreamType* Parser::getTokenStream() +{ + return m_tstream; +} + +template< class ImplTraits > +ANTLR_INLINE typename Parser::RecognizerType* Parser::get_rec() +{ + return this; +} + +template< class ImplTraits > +ANTLR_INLINE void Parser::exConstruct() +{ + new ANTLR_Exception( this->get_rec(), "" ); +} + +template< class ImplTraits > +typename Parser::TokenType* Parser::getMissingSymbol( IntStreamType* istream, + ExceptionBaseType*, + ANTLR_UINT32 expectedTokenType, + BitsetListType* ) +{ + TokenStreamType* cts; + CommonTokenType* token; + const CommonTokenType* current; + StringType text; + + // Dereference the standard pointers + // + cts = static_cast(istream); + + // Work out what to use as the current symbol to make a line and offset etc + // If we are at EOF, we use the token before EOF + // + current = cts->_LT(1); + if (current->get_type() == CommonTokenType::TOKEN_EOF) + { + current = cts->_LT(-1); + } + + token = new CommonTokenType; + + // Set some of the token properties based on the current token + // + token->set_line(current->get_line()); + token->set_charPositionInLine( current->get_charPositionInLine()); + token->set_channel( TOKEN_DEFAULT_CHANNEL ); + token->set_type(expectedTokenType); + token->set_lineStart( current->get_lineStart() ); + + // Create the token text that shows it has been inserted + // + token->setText("getText(); + + if (!text.empty()) + { + text.append((const char *) this->get_rec()->get_state()->get_tokenName(expectedTokenType) ); + text.append(">"); + } + + // Finally return the pointer to our new token + // + return token; +} + +template< class ImplTraits > +void Parser::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + // Install a mismatched token exception in the exception stack + // + new ANTLR_Exception(this, ""); + + //With the statement below, only the parsers are allowed to compile fine + IntStreamType* is = this->get_istream(); + + + if (this->mismatchIsUnwantedToken(is, ttype)) + { + // Now update it to indicate this is an unwanted token exception + // + new ANTLR_Exception(this, ""); + return; + } + + if ( this->mismatchIsMissingToken(is, follow)) + { + // Now update it to indicate this is an unwanted token exception + // + new ANTLR_Exception(this, ""); + return; + } + + // Just a mismatched token is all we can dtermine + // + new ANTLR_Exception(this, ""); + + return; +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser::RecognizerType* Parser::get_recognizer() const +{ + return this; +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::RecognizerSharedStateType* Parser::get_psrstate() const +{ + return this->get_recognizer()->get_state(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::set_psrstate(RecognizerSharedStateType* state) +{ + this->get_rec()->set_state( state ); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::haveParsedRule(ANTLR_MARKER ruleIndex) +{ + return this->get_rec()->alreadyParsedRule(ruleIndex); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart) +{ + return this->get_rec()->memoize( ruleIndex, ruleParseStart ); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser::index() const +{ + return this->get_istream()->index(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::hasException() const +{ + return this->get_psrstate()->get_error(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::ExceptionBaseType* Parser::get_exception() const +{ + return this->get_psrstate()->get_exception(); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser::CommonTokenType* Parser::matchToken( ANTLR_UINT32 ttype, BitsetListType* follow ) +{ + return this->get_rec()->match( ttype, follow ); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::matchAnyToken() +{ + return this->get_rec()->matchAny(); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser::FollowingType& Parser::get_follow_stack() const +{ + return this->get_psrstate()->get_following(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::followPush(const BitsetListType& follow) +{ +#ifndef SKIP_FOLLOW_SETS + this->get_rec()->get_state()->get_following().push(follow); +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::followPop() +{ +#ifndef SKIP_FOLLOW_SETS + this->get_rec()->get_state()->get_following().pop(); +#endif +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::precover() +{ + return this->get_rec()->recover(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::preporterror() +{ + return this->get_rec()->reportError(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_UINT32 Parser::LA(ANTLR_INT32 i) +{ + return this->get_istream()->_LA(i); +} + +template< class ImplTraits> +ANTLR_INLINE const typename Parser::CommonTokenType* Parser::LT(ANTLR_INT32 k) +{ + return this->get_input()->_LT(k); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::constructEx() +{ + this->get_rec()->constructEx(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::consume() +{ + this->get_istream()->consume(); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_MARKER Parser::mark() +{ + return this->get_istream()->mark(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::rewind(ANTLR_MARKER marker) +{ + this->get_istream()->rewind(marker); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::rewindLast() +{ + this->get_istream()->rewindLast(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::seek(ANTLR_MARKER index) +{ + this->get_istream()->seek(index); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::get_perror_recovery() const +{ + return this->get_psrstate()->get_errorRecovery(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::set_perror_recovery( bool val ) +{ + this->get_psrstate()->set_errorRecovery(val); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::hasFailed() const +{ + return this->get_psrstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::get_failedflag() const +{ + return this->get_psrstate()->get_failed(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::set_failedflag( bool failed ) +{ + this->get_psrstate()->set_failed(failed); +} + +template< class ImplTraits> +ANTLR_INLINE ANTLR_INT32 Parser::get_backtracking() const +{ + return this->get_psrstate()->get_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::inc_backtracking() +{ + this->get_psrstate()->inc_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::dec_backtracking() +{ + this->get_psrstate()->dec_backtracking(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::CommonTokenType* Parser::recoverFromMismatchedSet(BitsetListType* follow) +{ + return this->get_rec()->recoverFromMismatchedSet(follow); +} + +template< class ImplTraits> +ANTLR_INLINE bool Parser::recoverFromMismatchedElement(BitsetListType* follow) +{ + return this->get_rec()->recoverFromMismatchedElement(follow); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::RuleMemoType* Parser::getRuleMemo() const +{ + return this->get_psrstate()->get_ruleMemo(); +} + +template< class ImplTraits> +ANTLR_INLINE void Parser::setRuleMemo(RuleMemoType* rulememo) +{ + this->get_psrstate()->set_ruleMemo(rulememo); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::DebuggerType* Parser::get_debugger() const +{ + return this->get_rec()->get_debugger(); +} + +template< class ImplTraits> +ANTLR_INLINE typename Parser::TokenStreamType* Parser::get_strstream() const +{ + return this->get_tstream(); +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue::RuleReturnValue(BaseParserType* psr) +{ + parser = psr; + start = NULL; + stop = NULL; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue::RuleReturnValue( const RuleReturnValue& val ) +{ + parser = val.parser; + start = val.start; + stop = val.stop; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue& RuleReturnValue::operator=( const RuleReturnValue& val ) +{ + parser = val.parser; + start = val.start; + stop = val.stop; + return *this; +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue::~RuleReturnValue() +{ +} + +template< class ImplTraits> +ANTLR_INLINE void RuleReturnValue::call_start_placeholder() +{ + start = parser->LT(1); + stop = start; +} + +template< class ImplTraits> +ANTLR_INLINE void RuleReturnValue::call_stop_placeholder() +{ + stop = parser->LT(-1); +} + +template< class ImplTraits> +ANTLR_INLINE RuleReturnValue_1::RuleReturnValue_1() +{ +} + +template< class ImplTraits> +RuleReturnValue_1::RuleReturnValue_1( BaseParserType* psr ) + :RuleReturnValue_1::BaseType(psr) +{ + BaseType::start = psr->LT(1); + BaseType::stop = BaseType::start; +} + +template< class ImplTraits> +RuleReturnValue_1::RuleReturnValue_1( const RuleReturnValue_1& val ) + :BaseType(val) +{ +} + +template< class ImplTraits> +void RuleReturnValue_1::call_start_placeholder() +{ +} + +template< class ImplTraits> +RuleReturnValue_1::~RuleReturnValue_1() +{ + if( BaseType::parser && ( BaseType::parser->get_backtracking() == 0 ) ) + { + if( BaseType::stop == NULL ) + BaseType::stop = BaseType::parser->LT(-1); + if( BaseType::stop != NULL ) + { + ANTLR_MARKER start_token_idx = BaseType::start->get_index() + 1; + ANTLR_MARKER stop_token_idx = BaseType::stop->get_index() - 1; + if( start_token_idx > stop_token_idx ) + return; + BaseType::parser->getTokenStream()->discardTokens( start_token_idx, stop_token_idx); + } + } +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3recognizersharedstate.hpp b/runtime/Cpp/include/antlr3recognizersharedstate.hpp new file mode 100755 index 000000000..05543963d --- /dev/null +++ b/runtime/Cpp/include/antlr3recognizersharedstate.hpp @@ -0,0 +1,265 @@ +/** \file + * While the C runtime does not need to model the state of + * multiple lexers and parsers in the same way as the Java runtime does + * it is no overhead to reflect that model. In fact the + * C runtime has always been able to share recognizer state. + * + * This 'class' therefore defines all the elements of a recognizer + * (either lexer, parser or tree parser) that are need to + * track the current recognition state. Multiple recognizers + * may then share this state, for instance when one grammar + * imports another. + */ + +#ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP +#define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +ANTLR_BEGIN_NAMESPACE() + +/** All the data elements required to track the current state + * of any recognizer (lexer, parser, tree parser). + * May be share between multiple recognizers such that + * grammar inheritance is easily supported. + */ +template +class RecognizerSharedState : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename StreamType::UnitType TokenType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + + typedef typename ComponentTypeFinder::ComponentType ComponentType; + typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::template ExceptionBaseType ExceptionBaseType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::BitsetListType BitsetListType; + + typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType; + typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType; + typedef InputStreamsType StreamsType; + typedef typename AllocPolicyType::template VectorType RewriteStreamsType; + + typedef IntTrie RuleListType; + typedef IntTrie RuleMemoType; + +private: + /** Points to the first in a possible chain of exceptions that the + * recognizer has discovered. + */ + ExceptionBaseType* m_exception; + + + /** Track the set of token types that can follow any rule invocation. + * Stack structure, to support: List. + */ + FollowingType m_following; + + /** Track around a hint from the creator of the recognizer as to how big this + * thing is going to get, as the actress said to the bishop. This allows us + * to tune hash tables accordingly. This might not be the best place for this + * in the end but we will see. + */ + ANTLR_UINT32 m_sizeHint; + + + /** If set to true then the recognizer has an exception + * condition (this is tested by the generated code for the rules of + * the grammar). + */ + bool m_error; + + + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ + bool m_errorRecovery; + + /** In lieu of a return value, this indicates that a rule or token + * has failed to match. Reset to false upon valid token match. + */ + bool m_failed; + + /* + Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator + */ + bool m_token_present; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseam. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + ANTLR_MARKER m_lastErrorIndex; + + /** When the recognizer terminates, the error handling functions + * will have incremented this value if any error occurred (that was displayed). It can then be + * used by the grammar programmer without having to use static globals. + */ + ANTLR_UINT32 m_errorCount; + + /** If 0, no backtracking is going on. Safe to exec actions etc... + * If >0 then it's the level of backtracking. + */ + ANTLR_INT32 m_backtracking; + + /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. + * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is + * the memoization table for ruleIndex. For key ruleStartIndex, you + * get back the stop token for associated rule or MEMO_RULE_FAILED. + * + * This is only used if rule memoization is on. + */ + RuleMemoType* m_ruleMemo; + + /** Pointer to an array of token names + * that are generally useful in error reporting. The generated parsers install + * this pointer. The table it points to is statically allocated as 8 bit ascii + * at parser compile time - grammar token names are thus restricted in character + * sets, which does not seem to terrible. + */ + ANTLR_UINT8** m_tokenNames; + + /** The goal of all lexer rules/methods is to create a token object. + * This is an instance variable as multiple rules may collaborate to + * create a single token. For example, NUM : INT | FLOAT ; + * In this case, you want the INT or FLOAT rule to set token and not + * have it reset to a NUM token in rule NUM. + */ + CommonTokenType m_token; + + /** A lexer is a source of tokens, produced by all the generated (or + * hand crafted if you like) matching rules. As such it needs to provide + * a token source interface implementation. For others, this will become a empty class + */ + TokenSourceType* m_tokSource; + + /** The channel number for the current token + */ + ANTLR_UINT32 m_channel; + + /** The token type for the current token + */ + ANTLR_UINT32 m_type; + + /** The input line (where it makes sense) on which the first character of the current + * token resides. + */ + ANTLR_INT32 m_tokenStartLine; + + /** The character position of the first character of the current token + * within the line specified by tokenStartLine + */ + ANTLR_INT32 m_tokenStartCharPositionInLine; + + /** What character index in the stream did the current token start at? + * Needed, for example, to get the text for current token. Set at + * the start of nextToken. + */ + ANTLR_MARKER m_tokenStartCharIndex; + + /** Text for the current token. This can be overridden by setting this + * variable directly or by using the SETTEXT() macro (preferred) in your + * lexer rules. + */ + StringType m_text; + + /** Input stream stack, which allows the C programmer to switch input streams + * easily and allow the standard nextToken() implementation to deal with it + * as this is a common requirement. + */ + InputStreamsType m_streams; + +public: + RecognizerSharedState(); + ExceptionBaseType* get_exception() const; + FollowingType& get_following(); + ANTLR_UINT32 get_sizeHint() const; + bool get_error() const; + bool get_errorRecovery() const; + bool get_failed() const; + bool get_token_present() const; + ANTLR_MARKER get_lastErrorIndex() const; + ANTLR_UINT32 get_errorCount() const; + ANTLR_INT32 get_backtracking() const; + RuleMemoType* get_ruleMemo() const; + ANTLR_UINT8** get_tokenNames() const; + ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const; + CommonTokenType* get_token(); + TokenSourceType* get_tokSource() const; + ANTLR_UINT32& get_channel(); + ANTLR_UINT32 get_type() const; + ANTLR_INT32 get_tokenStartLine() const; + ANTLR_INT32 get_tokenStartCharPositionInLine() const; + ANTLR_MARKER get_tokenStartCharIndex() const; + StringType& get_text(); + InputStreamsType& get_streams(); + + void set_following( const FollowingType& following ); + void set_sizeHint( ANTLR_UINT32 sizeHint ); + void set_error( bool error ); + void set_errorRecovery( bool errorRecovery ); + void set_failed( bool failed ); + void set_token_present(bool token_present); + void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ); + void set_errorCount( ANTLR_UINT32 errorCount ); + void set_backtracking( ANTLR_INT32 backtracking ); + void set_ruleMemo( RuleMemoType* ruleMemo ); + void set_tokenNames( ANTLR_UINT8** tokenNames ); + void set_tokSource( TokenSourceType* tokSource ); + void set_channel( ANTLR_UINT32 channel ); + void set_exception( ExceptionBaseType* exception ); + void set_type( ANTLR_UINT32 type ); + void set_token( const CommonTokenType* tok); + void set_tokenStartLine( ANTLR_INT32 tokenStartLine ); + void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ); + void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ); + void set_text( const StringType& text ); + void set_streams( const InputStreamsType& streams ); + + void inc_errorCount(); + void inc_backtracking(); + void dec_backtracking(); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3recognizersharedstate.inl" + +#endif + + diff --git a/runtime/Cpp/include/antlr3recognizersharedstate.inl b/runtime/Cpp/include/antlr3recognizersharedstate.inl new file mode 100755 index 000000000..8d9cc7fd6 --- /dev/null +++ b/runtime/Cpp/include/antlr3recognizersharedstate.inl @@ -0,0 +1,267 @@ +ANTLR_BEGIN_NAMESPACE() + +template +RecognizerSharedState::RecognizerSharedState() +{ + m_exception = NULL; + m_sizeHint = 0; + m_error = false; + m_errorRecovery = false; + m_failed = false; + m_lastErrorIndex = 0; + m_errorCount = 0; + m_backtracking = false; + m_ruleMemo = NULL; + m_tokenNames = NULL; + m_tokSource = NULL; + m_channel = 0; + m_type = 0; + m_tokenStartLine = 0; + m_tokenStartCharPositionInLine = 0; + m_tokenStartCharIndex = 0; +} + +template +ANTLR_INLINE typename RecognizerSharedState::FollowingType& RecognizerSharedState::get_following() +{ + return m_following; +} +template +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState::get_sizeHint() const +{ + return m_sizeHint; +} +template +ANTLR_INLINE bool RecognizerSharedState::get_error() const +{ + return m_error; +} +template +ANTLR_INLINE typename RecognizerSharedState::ExceptionBaseType* +RecognizerSharedState::get_exception() const +{ + return m_exception; +} + +template +ANTLR_INLINE bool RecognizerSharedState::get_errorRecovery() const +{ + return m_errorRecovery; +} +template +ANTLR_INLINE bool RecognizerSharedState::get_failed() const +{ + return m_failed; +} + +template +ANTLR_INLINE bool RecognizerSharedState::get_token_present() const +{ + return m_token_present; +} + +template +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState::get_lastErrorIndex() const +{ + return m_lastErrorIndex; +} +template +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState::get_errorCount() const +{ + return m_errorCount; +} +template +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState::get_backtracking() const +{ + return m_backtracking; +} +template +ANTLR_INLINE typename RecognizerSharedState::RuleMemoType* RecognizerSharedState::get_ruleMemo() const +{ + return m_ruleMemo; +} +template +ANTLR_INLINE ANTLR_UINT8** RecognizerSharedState::get_tokenNames() const +{ + return m_tokenNames; +} +template +ANTLR_INLINE ANTLR_UINT8* RecognizerSharedState::get_tokenName( ANTLR_UINT32 i ) const +{ + return m_tokenNames[i]; +} +template +ANTLR_INLINE typename RecognizerSharedState::CommonTokenType* RecognizerSharedState::get_token() +{ + return &m_token; +} +template +ANTLR_INLINE typename RecognizerSharedState::TokenSourceType* RecognizerSharedState::get_tokSource() const +{ + return m_tokSource; +} +template +ANTLR_INLINE ANTLR_UINT32& RecognizerSharedState::get_channel() +{ + return m_channel; +} +template +ANTLR_INLINE ANTLR_UINT32 RecognizerSharedState::get_type() const +{ + return m_type; +} +template +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState::get_tokenStartLine() const +{ + return m_tokenStartLine; +} +template +ANTLR_INLINE ANTLR_INT32 RecognizerSharedState::get_tokenStartCharPositionInLine() const +{ + return m_tokenStartCharPositionInLine; +} +template +ANTLR_INLINE ANTLR_MARKER RecognizerSharedState::get_tokenStartCharIndex() const +{ + return m_tokenStartCharIndex; +} +template +ANTLR_INLINE typename RecognizerSharedState::StringType& RecognizerSharedState::get_text() +{ + return m_text; +} +template +ANTLR_INLINE typename RecognizerSharedState::StreamsType& RecognizerSharedState::get_streams() +{ + return m_streams; +} +template +ANTLR_INLINE void RecognizerSharedState::set_exception( ExceptionBaseType* exception ) +{ + m_exception = exception; +} +template +ANTLR_INLINE void RecognizerSharedState::set_following( const FollowingType& following ) +{ + m_following = following; +} +template +ANTLR_INLINE void RecognizerSharedState::set_sizeHint( ANTLR_UINT32 sizeHint ) +{ + m_sizeHint = sizeHint; +} +template +ANTLR_INLINE void RecognizerSharedState::set_error( bool error ) +{ + m_error = error; +} +template +ANTLR_INLINE void RecognizerSharedState::set_errorRecovery( bool errorRecovery ) +{ + m_errorRecovery = errorRecovery; +} +template +ANTLR_INLINE void RecognizerSharedState::set_failed( bool failed ) +{ + m_failed = failed; +} +template +ANTLR_INLINE void RecognizerSharedState::set_token_present(bool token_present) +{ + m_token_present = token_present; +} +template +ANTLR_INLINE void RecognizerSharedState::set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ) +{ + m_lastErrorIndex = lastErrorIndex; +} +template +ANTLR_INLINE void RecognizerSharedState::set_errorCount( ANTLR_UINT32 errorCount ) +{ + m_errorCount = errorCount; +} +template +ANTLR_INLINE void RecognizerSharedState::set_backtracking( ANTLR_INT32 backtracking ) +{ + m_backtracking = backtracking; +} +template +ANTLR_INLINE void RecognizerSharedState::set_ruleMemo( RuleMemoType* ruleMemo ) +{ + m_ruleMemo = ruleMemo; +} +template +ANTLR_INLINE void RecognizerSharedState::set_tokenNames( ANTLR_UINT8** tokenNames ) +{ + m_tokenNames = tokenNames; +} + +template +ANTLR_INLINE void RecognizerSharedState::set_tokSource( TokenSourceType* tokSource ) +{ + m_tokSource = tokSource; +} +template +ANTLR_INLINE void RecognizerSharedState::set_channel( ANTLR_UINT32 channel ) +{ + m_channel = channel; +} + +template +ANTLR_INLINE void RecognizerSharedState::set_token(const CommonTokenType* tok) +{ + this->set_token_present( tok != NULL ); + if( tok != NULL ) + m_token = *tok; +} + +template +ANTLR_INLINE void RecognizerSharedState::set_type( ANTLR_UINT32 type ) +{ + m_type = type; +} +template +ANTLR_INLINE void RecognizerSharedState::set_tokenStartLine( ANTLR_INT32 tokenStartLine ) +{ + m_tokenStartLine = tokenStartLine; +} +template +ANTLR_INLINE void RecognizerSharedState::set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ) +{ + m_tokenStartCharPositionInLine = tokenStartCharPositionInLine; +} +template +ANTLR_INLINE void RecognizerSharedState::set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ) +{ + m_tokenStartCharIndex = tokenStartCharIndex; +} +template +ANTLR_INLINE void RecognizerSharedState::set_text( const StringType& text ) +{ + m_text = text; +} +template +ANTLR_INLINE void RecognizerSharedState::set_streams( const InputStreamsType& streams ) +{ + m_streams = streams; +} + +template +ANTLR_INLINE void RecognizerSharedState::inc_errorCount() +{ + ++m_errorCount; +} + +template +ANTLR_INLINE void RecognizerSharedState::inc_backtracking() +{ + ++m_backtracking; +} + +template +ANTLR_INLINE void RecognizerSharedState::dec_backtracking() +{ + --m_backtracking; +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3rewritestreams.hpp b/runtime/Cpp/include/antlr3rewritestreams.hpp new file mode 100755 index 000000000..e89d70a44 --- /dev/null +++ b/runtime/Cpp/include/antlr3rewritestreams.hpp @@ -0,0 +1,254 @@ +#ifndef ANTLR3REWRITESTREAM_HPP +#define ANTLR3REWRITESTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +/// A generic list of elements tracked in an alternative to be used in +/// a -> rewrite rule. +/// +/// In the C implementation, all tree oriented streams return a pointer to +/// the same type: pANTLR3_BASE_TREE. Anything that has subclassed from this +/// still passes this type, within which there is a super pointer, which points +/// to it's own data and methods. Hence we do not need to implement this as +/// the equivalent of an abstract class, but just fill in the appropriate interface +/// as usual with this model. +/// +/// Once you start next()ing, do not try to add more elements. It will +/// break the cursor tracking I believe. +/// +/// +/// \see #pANTLR3_REWRITE_RULE_NODE_STREAM +/// \see #pANTLR3_REWRITE_RULE_ELEMENT_STREAM +/// \see #pANTLR3_REWRITE_RULE_SUBTREE_STREAM +/// +/// TODO: add mechanism to detect/puke on modification after reading from stream +/// +ANTLR_BEGIN_NAMESPACE() + +template +class RewriteRuleElementStream : public ImplTraits::AllocPolicyType +{ +public: + typedef typename ImplTraits::TreeType TreeType; + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + + typedef typename ImplTraits::template RecognizerType< typename SuperType::StreamType > RecognizerType; + typedef typename ImplTraits::StringType StringType; + typedef typename SuperType::TokenType TokenType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + +protected: + /// Track single elements w/o creating a list. Upon 2nd add, alloc list + /// + TokenType* m_singleElement; + + /// The list of tokens or subtrees we are tracking + /// + ElementsType m_elements; + + /// The element or stream description; usually has name of the token or + /// rule reference that this list tracks. Can include rulename too, but + /// the exception would track that info. + /// + StringType m_elementDescription; + + /// Pointer to the tree adaptor in use for this stream + /// + TreeAdaptorType* m_adaptor; + + // Pointer to the recognizer shared state to which this stream belongs + // + RecognizerType* m_rec; + + /// Cursor 0..n-1. If singleElement!=NULL, cursor is 0 until you next(), + /// which bumps it to 1 meaning no more elements. + /// + ANTLR_UINT32 m_cursor; + + /// Once a node / subtree has been used in a stream, it must be dup'ed + /// from then on. Streams are reset after sub rules so that the streams + /// can be reused in future sub rules. So, reset must set a dirty bit. + /// If dirty, then next() always returns a dup. + /// + bool m_dirty; + +public: + RewriteRuleElementStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description); + RewriteRuleElementStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, TokenType* oneElement); + RewriteRuleElementStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements); + + ~RewriteRuleElementStream(); + // Methods + + /// Reset the condition of this stream so that it appears we have + /// not consumed any of its elements. Elements themselves are untouched. + /// + void reset(); + + /// Add a new pANTLR3_BASE_TREE to this stream + /// + void add(TokenType* el); + + /// Return the next element in the stream. If out of elements, throw + /// an exception unless size()==1. If size is 1, then return elements[0]. + /// + TokenType* next(); + TreeType* nextTree(); + TokenType* nextToken(); + TokenType* _next(); + + /// When constructing trees, sometimes we need to dup a token or AST + /// subtree. Dup'ing a token means just creating another AST node + /// around it. For trees, you must call the adaptor.dupTree(). + /// + TokenType* dup( TokenType* el ); + + /// Ensure stream emits trees; tokens must be converted to AST nodes. + /// AST nodes can be passed through unmolested. + /// + TreeType* toTree(TreeType* el); + + /// Returns true if there is a next element available + /// + bool hasNext(); + + /// Treat next element as a single node even if it's a subtree. + /// This is used instead of next() when the result has to be a + /// tree root node. Also prevents us from duplicating recently-added + /// children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration + /// must dup the type node, but ID has been added. + /// + /// Referencing to a rule result twice is ok; dup entire tree as + /// we can't be adding trees; e.g., expr expr. + /// + TreeType* nextNode(); + + /// Number of elements available in the stream + /// + ANTLR_UINT32 size(); + + /// Returns the description string if there is one available (check for NULL). + /// + StringType getDescription(); + +protected: + void init(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description); +}; + +/// This is an implementation of a token stream, which is basically an element +/// stream that deals with tokens only. +/// +template +class RewriteRuleTokenStream : public ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::ParserType> +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::ParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef typename ImplTraits::TreeType TreeType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::ParserType> BaseType; + +public: + RewriteRuleTokenStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, TokenType* oneElement); + RewriteRuleTokenStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements); + TreeType* nextNode(); + +private: + TreeType* nextNodeToken(); +}; + +/// This is an implementation of a subtree stream which is a set of trees +/// modelled as an element stream. +/// +template +class RewriteRuleSubtreeStream : public ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeParserType> +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType TokenType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeParserType> BaseType; + +public: + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, TokenType* oneElement); + RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements); + + TreeType* dup( TreeType* el ); + +private: + TreeType* dupTree( TreeType* el ); +}; + +/// This is an implementation of a node stream, which is basically an element +/// stream that deals with tree nodes only. +/// +template +class RewriteRuleNodeStream : public ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeParserType> +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; + typedef typename ImplTraits::TreeParserType ComponentType; + typedef typename ComponentType::StreamType StreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType TokenType; + typedef typename ImplTraits::template RecognizerType< StreamType > RecognizerType; + typedef typename AllocPolicyType::template VectorType< TokenType* > ElementsType; + typedef typename ImplTraits::template RewriteRuleElementStreamType< typename ImplTraits::TreeParserType> BaseType; + +public: + RewriteRuleNodeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description); + RewriteRuleNodeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, TokenType* oneElement); + RewriteRuleNodeStream(TreeAdaptorType* adaptor, RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements); + + TreeType* toTree(TreeType* element); + +private: + TreeType* toTreeNode(TreeType* element); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3rewritestreams.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3rewritestreams.inl b/runtime/Cpp/include/antlr3rewritestreams.inl new file mode 100755 index 000000000..3303e8c41 --- /dev/null +++ b/runtime/Cpp/include/antlr3rewritestreams.inl @@ -0,0 +1,374 @@ +ANTLR_BEGIN_NAMESPACE() + +template +RewriteRuleElementStream::RewriteRuleElementStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description) +{ + this->init(adaptor, rec, description); +} + +template +RewriteRuleElementStream::RewriteRuleElementStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description, TokenType* oneElement) +{ + this->init(adaptor, rec, description); + if( oneElement != NULL ) + this->add( oneElement ); +} + +template +RewriteRuleElementStream::RewriteRuleElementStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements) + :m_elements(elements) +{ + this->init(adaptor, rec, description); +} + +template +void RewriteRuleElementStream::init(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description) +{ + m_rec = rec; + m_adaptor = adaptor; + m_cursor = 0; + m_dirty = false; + m_singleElement = NULL; +} + +template +RewriteRuleTokenStream::RewriteRuleTokenStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description) + :BaseType(adaptor, rec, description) +{ +} + +template +RewriteRuleTokenStream::RewriteRuleTokenStream(TreeAdaptorType* adaptor, RecognizerType* rec, + ANTLR_UINT8* description, TokenType* oneElement) + :BaseType(adaptor, rec, description, oneElement) +{ +} + +template +RewriteRuleTokenStream::RewriteRuleTokenStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements) + :BaseType(adaptor, rec, description, elements) +{ +} + +template +RewriteRuleSubtreeStream::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description) + :BaseType(adaptor, rec, description) +{ +} + +template +RewriteRuleSubtreeStream::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, RecognizerType* rec, + ANTLR_UINT8* description, TokenType* oneElement) + :BaseType(adaptor, rec, description, oneElement) +{ +} + +template +RewriteRuleSubtreeStream::RewriteRuleSubtreeStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements) + :BaseType(adaptor, rec, description, elements) +{ +} + +template +RewriteRuleNodeStream::RewriteRuleNodeStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description) + :BaseType(adaptor, rec, description) +{ +} + +template +RewriteRuleNodeStream::RewriteRuleNodeStream(TreeAdaptorType* adaptor, RecognizerType* rec, + ANTLR_UINT8* description, TokenType* oneElement) + :BaseType(adaptor, rec, description, oneElement) +{ +} + +template +RewriteRuleNodeStream::RewriteRuleNodeStream(TreeAdaptorType* adaptor, + RecognizerType* rec, ANTLR_UINT8* description, const ElementsType& elements) + :BaseType(adaptor, rec, description, elements) +{ +} + +template +void RewriteRuleElementStream::reset() +{ + m_dirty = true; + m_cursor = 0; +} + +template +void RewriteRuleElementStream::add(TokenType* el) +{ + if ( el== NULL ) + return; + + if ( !m_elements.empty() ) + { + // if in list, just add + m_elements.push_back(el); + return; + } + + if ( m_singleElement == NULL ) + { + // no elements yet, track w/o list + m_singleElement = el; + return; + } + + // adding 2nd element, move to list + m_elements.push_back(m_singleElement); + m_singleElement = NULL; + m_elements.push_back(el); +} + +template +typename RewriteRuleElementStream::TokenType* +RewriteRuleElementStream::_next() +{ + ANTLR_UINT32 n; + TreeType* t; + + n = this->size(); + + if (n == 0) + { + // This means that the stream is empty + // + return NULL; // Caller must cope with this + } + + // Traversed all the available elements already? + // + if ( m_cursor >= n) + { + if (n == 1) + { + // Special case when size is single element, it will just dup a lot + // + return this->toTree(m_singleElement); + } + + // Out of elements and the size is not 1, so we cannot assume + // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) + // This means we ran out of elements earlier than was expected. + // + return NULL; // Caller must cope with this + } + + // Elements available either for duping or just available + // + if ( m_singleElement != NULL) + { + m_cursor++; // Cursor advances even for single element as this tells us to dup() + return this->toTree(m_singleElement); + } + + // More than just a single element so we extract it from the + // vector. + // + t = this->toTree( m_elements.at(m_cursor)); + m_cursor++; + return t; +} + +template +typename RewriteRuleElementStream::TreeType* +RewriteRuleElementStream::nextTree() +{ + ANTLR_UINT32 n; + TreeType* el; + + n = this->size(); + + if ( m_dirty || ( (m_cursor >=n) && (n==1)) ) + { + // if out of elements and size is 1, dup + // + el = this->_next(); + return this->dup(el); + } + + // test size above then fetch + // + el = this->_next(); + return el; +} + +template +typename RewriteRuleElementStream::TokenType* +RewriteRuleElementStream::nextToken() +{ + return this->_next(); +} + +template +typename RewriteRuleElementStream::TokenType* +RewriteRuleElementStream::next() +{ + ANTLR_UINT32 s; + s = this->size(); + if ( (m_cursor >= s) && (s == 1) ) + { + TreeType* el; + el = this->_next(); + return this->dup(el); + } + return this->_next(); +} + +template +typename RewriteRuleSubtreeStream::TreeType* +RewriteRuleSubtreeStream::dup(TreeType* element) +{ + return this->dupTree(element); +} + +template +typename RewriteRuleSubtreeStream::TreeType* +RewriteRuleSubtreeStream::dupTree(TreeType* element) +{ + return BaseType::m_adaptor->dupNode(element); +} + +template +typename RewriteRuleElementStream::TreeType* +RewriteRuleElementStream::toTree( TreeType* element) +{ + return element; +} + +template +typename RewriteRuleNodeStream::TreeType* +RewriteRuleNodeStream::toTree(TreeType* element) +{ + return this->toTreeNode(element); +} + +template +typename RewriteRuleNodeStream::TreeType* +RewriteRuleNodeStream::toTreeNode(TreeType* element) +{ + return BaseType::m_adaptor->dupNode(element); +} + +template +bool RewriteRuleElementStream::hasNext() +{ + if ( ((m_singleElement != NULL) && (m_cursor < 1)) + || ( !m_elements.empty() && m_cursor < m_elements.size())) + { + return true; + } + else + { + return false; + } +} + +template +typename RewriteRuleTokenStream::TreeType* +RewriteRuleTokenStream::nextNode() +{ + return this->nextNodeToken(); +} + +template +typename RewriteRuleTokenStream::TreeType* +RewriteRuleTokenStream::nextNodeToken() +{ + return BaseType::m_adaptor->create(this->_next()); +} + +/// Number of elements available in the stream +/// +template +ANTLR_UINT32 RewriteRuleElementStream::size() +{ + ANTLR_UINT32 n = 0; + + /// Should be a count of one if singleElement is set. I copied this + /// logic from the java implementation, which I suspect is just guarding + /// against someone setting singleElement and forgetting to NULL it out + /// + if ( m_singleElement != NULL) + { + n = 1; + } + else + { + if ( !m_elements.empty() ) + { + return (ANTLR_UINT32)(m_elements.size()); + } + } + return n; + +} + +template +typename RewriteRuleElementStream::StringType +RewriteRuleElementStream::getDescription() +{ + if ( m_elementDescription.empty() ) + { + m_elementDescription = ""; + } + return m_elementDescription; +} + +template +RewriteRuleElementStream::~RewriteRuleElementStream() +{ + TreeType* tree; + + // Before placing the stream back in the pool, we + // need to clear any vector it has. This is so any + // free pointers that are associated with the + // entires are called. However, if this particular function is called + // then we know that the entries in the stream are definately + // tree nodes. Hence we check to see if any of them were nilNodes as + // if they were, we can reuse them. + // + if ( !m_elements.empty() ) + { + // We have some elements to traverse + // + ANTLR_UINT32 i; + + for (i = 1; i<= m_elements.size(); i++) + { + tree = m_elements.at(i-1); + if ( (tree != NULL) && tree->isNilNode() ) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + } + m_elements.clear(); + } + else + { + if (m_singleElement != NULL) + { + tree = m_singleElement; + if (tree->isNilNode()) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + } + m_singleElement = NULL; + } +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3tokenstream.hpp b/runtime/Cpp/include/antlr3tokenstream.hpp new file mode 100755 index 000000000..421ba7f7f --- /dev/null +++ b/runtime/Cpp/include/antlr3tokenstream.hpp @@ -0,0 +1,408 @@ +/** \file + * Defines the interface for an ANTLR3 common token stream. Custom token streams should create + * one of these and then override any functions by installing their own pointers + * to implement the various functions. + */ +#ifndef _ANTLR3_TOKENSTREAM_HPP +#define _ANTLR3_TOKENSTREAM_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +/** Definition of a token source, which has a pointer to a function that + * returns the next token (using a token factory if it is going to be + * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly + * different to the Java interface because we have no way to implement + * multiple interfaces without defining them in the interface structure + * or casting (void *), which is too convoluted. + */ +ANTLR_BEGIN_NAMESPACE() + +//We are not making it subclass AllocPolicy, as this will always be a base class +template +class TokenSource +{ +public: + typedef typename ImplTraits::CommonTokenType TokenType; + typedef TokenType CommonTokenType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::LexerType LexerType; + +private: + /** A special pre-allocated token, which signifies End Of Tokens. Because this must + * be set up with the current input index and so on, we embed the structure and + * return the address of it. It is marked as factoryMade, so that it is never + * attempted to be freed. + */ + TokenType m_eofToken; + + /// A special pre-allocated token, which is returned by mTokens() if the + /// lexer rule said to just skip the generated token altogether. + /// Having this single token stops us wasting memory by have the token factory + /// actually create something that we are going to SKIP(); anyway. + /// + TokenType m_skipToken; + + /** When the token source is constructed, it is populated with the file + * name from whence the tokens were produced by the lexer. This pointer is a + * copy of the one supplied by the CharStream (and may be NULL) so should + * not be manipulated other than to copy or print it. + */ + StringType m_fileName; + +public: + TokenType& get_eofToken(); + const TokenType& get_eofToken() const; + TokenType& get_skipToken(); + StringType& get_fileName(); + LexerType* get_super(); + + void set_fileName( const StringType& fileName ); + + /** + * \brief + * Default implementation of the nextToken() call for a lexer. + * + * \param toksource + * Points to the implementation of a token source. The lexer is + * addressed by the super structure pointer. + * + * \returns + * The next token in the current input stream or the EOF token + * if there are no more tokens in any input stream in the stack. + * + * Write detailed description for nextToken here. + * + * \remarks + * Write remarks for nextToken here. + * + * \see nextTokenStr + */ + TokenType* nextToken(); + CommonTokenType* nextToken( BoolForwarder /*isFiltered*/ ); + CommonTokenType* nextToken( BoolForwarder /*isFiltered*/ ); + + /// + /// \brief + /// Returns the next available token from the current input stream. + /// + /// \param toksource + /// Points to the implementation of a token source. The lexer is + /// addressed by the super structure pointer. + /// + /// \returns + /// The next token in the current input stream or the EOF token + /// if there are no more tokens. + /// + /// \remarks + /// Write remarks for nextToken here. + /// + /// \see nextToken + /// + TokenType* nextTokenStr(); + +protected: + TokenSource(); +}; + +/** Definition of the ANTLR3 common token stream interface. + * \remark + * Much of the documentation for this interface is stolen from Ter's Java implementation. + */ +template +class TokenStream : public ImplTraits::TokenIntStreamType +{ +public: + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::TokenIntStreamType IntStreamType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef TokenType UnitType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename ImplTraits::TokenStreamType TokenStreamType; + typedef typename ImplTraits::ParserType ComponentType; + +protected: + /** Pointer to the token source for this stream + */ + TokenSourceType* m_tokenSource; + + /// Debugger interface, is this is a debugging token stream + /// + DebugEventListenerType* m_debugger; + + /// Indicates the initial stream state for dbgConsume() + /// + bool m_initialStreamState; + +public: + TokenStream(TokenSourceType* source, DebugEventListenerType* debugger); + IntStreamType* get_istream(); + TokenSourceType* get_tokenSource() const; + void set_tokenSource( TokenSourceType* tokenSource ); + + /** Get Token at current input pointer + i ahead where i=1 is next Token. + * i<0 indicates tokens in the past. So -1 is previous token and -2 is + * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. + * Return null for LT(0) and any index that results in an absolute address + * that is negative. + */ + const TokenType* _LT(ANTLR_INT32 k); + + /** Where is this stream pulling tokens from? This is not the name, but + * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. + * The Token Source interface contains a pointer to the input stream and a pointer + * to a function that returns the next token. + */ + TokenSourceType* getTokenSource(); + + /** Function that installs a token source for teh stream + */ + void setTokenSource(TokenSourceType* tokenSource); + + /** Return the text of all the tokens in the stream, as the old tramp in + * Leeds market used to say; "Get the lot!" + */ + StringType toString(); + + /** Return the text of all tokens from start to stop, inclusive. + * If the stream does not buffer all the tokens then it can just + * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in + * an action in that case. + */ + StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop); + + /** Because the user is not required to use a token with an index stored + * in it, we must provide a means for two token objects themselves to + * indicate the start/end location. Most often this will just delegate + * to the other toString(int,int). This is also parallel with + * the pTREENODE_STREAM->toString(Object,Object). + */ + StringType toStringTT(const TokenType* start, const TokenType* stop); + + + /** Function that sets the token stream into debugging mode + */ + void setDebugListener(DebugEventListenerType* debugger); + + TokenStream(); + +}; + +/** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default + * parsers and recognizers. You may of course build your own implementation if + * you are so inclined. + */ +template +class TokenStoreSelector +{ +public: + typedef ListType TokensType; +}; + +template +class TokenStoreSelector +{ +public: + typedef MapType TokensType; +}; + +template +class CommonTokenStream : public TokenStream +{ +public: + typedef typename ImplTraits::AllocPolicyType AllocPolicyType; + typedef typename ImplTraits::BitsetType BitsetType; + typedef typename ImplTraits::CommonTokenType TokenType; + typedef typename ImplTraits::TokenSourceType TokenSourceType; + typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; + typedef typename AllocPolicyType::template ListType TokensListType; + typedef typename AllocPolicyType::template OrderedMapType TokensMapType; + typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE, + TokensListType, TokensMapType >::TokensType TokensType; + + typedef typename AllocPolicyType::template UnOrderedMapType ChannelOverridesType; + typedef typename AllocPolicyType::template OrderedSetType DiscardSetType; + typedef typename AllocPolicyType::template ListType IntListType; + typedef TokenStream BaseType; + +private: + /** Records every single token pulled from the source indexed by the token index. + * There might be more efficient ways to do this, such as referencing directly in to + * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not + * a huge overhead as it only stores pointers anyway, but allows for iterations and + * so on. + */ + TokensType m_tokens; + + /** Override map of tokens. If a token type has an entry in here, then + * the pointer in the table points to an int, being the override channel number + * that should always be used for this token type. + */ + ChannelOverridesType m_channelOverrides; + + /** Discared set. If a token has an entry in this table, then it is thrown + * away (data pointer is always NULL). + */ + DiscardSetType m_discardSet; + + /* The channel number that this token stream is tuned to. For instance, whitespace + * is usually tuned to channel 99, which no token stream would normally tune to and + * so it is thrown away. + */ + ANTLR_UINT32 m_channel; + + /** The index into the tokens list of the current token (the next one that will be + * consumed. p = -1 indicates that the token list is empty. + */ + ANTLR_INT32 m_p; + + /* The total number of tokens issued till now. For streams that delete tokens, + this helps in issuing the index + */ + ANTLR_UINT32 m_nissued; + + /** If this flag is set to true, then tokens that the stream sees that are not + * in the channel that this stream is tuned to, are not tracked in the + * tokens table. When set to false, ALL tokens are added to the tracking. + */ + bool m_discardOffChannel; + +public: + CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL, + DebugEventListenerType* debugger = NULL); + ~CommonTokenStream(); + TokensType& get_tokens(); + const TokensType& get_tokens() const; + DiscardSetType& get_discardSet(); + const DiscardSetType& get_discardSet() const; + ANTLR_INT32 get_p() const; + void set_p( ANTLR_INT32 p ); + void inc_p(); + void dec_p(); + + /** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot exec actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored + * channel. + */ + void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); + + /** Add a particular token type to the discard set. If a token is found to belong + * to this set, then it is skipped/thrown away + */ + void discardTokenType(ANTLR_INT32 ttype); + + //This will discard tokens of a particular rule after the rule execution completion + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ); + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder tokens_accessed_from_owning_rule ); + void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder tokens_accessed_from_owning_rule ); + + void insertToken( const TokenType& tok ); + void insertToken( const TokenType& tok, BoolForwarder tokens_accessed_from_owning_rule ); + void insertToken( const TokenType& tok, BoolForwarder tokens_accessed_from_owning_rule ); + + /** Get a token at an absolute index i; 0..n-1. This is really only + * needed for profiling and debugging and token stream rewriting. + * If you don't want to buffer up tokens, then this method makes no + * sense for you. Naturally you can't use the rewrite stream feature. + * I believe DebugTokenStream can easily be altered to not use + * this method, removing the dependency. + */ + const TokenType* get(ANTLR_MARKER i); + const TokenType* getToken(ANTLR_MARKER i); + const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder tokens_accessed_from_owning_rule ); + const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder tokens_accessed_from_owning_rule ); + + /** Signal to discard off channel tokens from here on in. + */ + void discardOffChannelToks(bool discard); + + /** Function that returns a pointer to the ANTLR3_LIST of all tokens + * in the stream (this causes the buffer to fill if we have not get any yet) + */ + TokensType* getTokens(); + + /** Function that returns all the tokens between a start and a stop index. + */ + void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); + + /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens + */ + void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); + + /** Function that returns all the tokens indicated by being a member of the supplied List + */ + void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, + const IntListType& list, TokensListType& tokenList); + + /** Function that returns all tokens of a certain type within a range. + */ + void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); + + /** Function that resets the token stream so that it can be reused, but + * but that does not free up any resources, such as the token factory + * the factory pool and so on. This prevents the need to keep freeing + * and reallocating the token pools if the thing you are building is + * a multi-shot dameon or somethign like that. It is much faster to + * just reuse all the vectors. + */ + void reset(); + + const TokenType* LB(ANTLR_INT32 k); + + + void fillBufferExt(); + void fillBuffer(); + + bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder tokens_accessed_from_owning_rule ); + bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder tokens_accessed_from_owning_rule ); + + ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i); + ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x); + ANTLR_MARKER index_impl(); +}; + +class TokenAccessException : public std::exception +{ + virtual const char* what() const throw() + { + return " Attempted access on Deleted Token"; + } +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3tokenstream.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3tokenstream.inl b/runtime/Cpp/include/antlr3tokenstream.inl new file mode 100755 index 000000000..9c1c5a92a --- /dev/null +++ b/runtime/Cpp/include/antlr3tokenstream.inl @@ -0,0 +1,937 @@ +ANTLR_BEGIN_NAMESPACE() + +template +TokenSource::TokenSource() + :m_eofToken( ImplTraits::CommonTokenType::TOKEN_EOF), + m_skipToken( ImplTraits::CommonTokenType::TOKEN_INVALID) +{ +} + +template +ANTLR_INLINE typename TokenSource::CommonTokenType& TokenSource::get_eofToken() +{ + return m_eofToken; +} + +template +ANTLR_INLINE const typename TokenSource::TokenType& TokenSource::get_eofToken() const +{ + return m_eofToken; +} + +template +ANTLR_INLINE typename TokenSource::CommonTokenType& TokenSource::get_skipToken() +{ + return m_skipToken; +} + +template +ANTLR_INLINE typename TokenSource::StringType& TokenSource::get_fileName() +{ + return m_fileName; +} + +template +ANTLR_INLINE void TokenSource::set_fileName( const StringType& fileName ) +{ + m_fileName = fileName; +} + +template +typename TokenSource::LexerType* TokenSource::get_super() +{ + return static_cast(this); +} + +template +typename TokenSource::TokenType* TokenSource::nextTokenStr() +{ + typedef typename LexerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef typename LexerType::InputStreamType InputStreamType; + typedef typename LexerType::IntStreamType IntStreamType; + LexerType* lexer; + RecognizerSharedStateType* state; + InputStreamType* input; + IntStreamType* istream; + + lexer = this->get_super(); + state = lexer->get_rec()->get_state(); + input = lexer->get_input(); + istream = input->get_istream(); + + /// Loop until we get a non skipped token or EOF + /// + for (;;) + { + // Get rid of any previous token (token factory takes care of + // any de-allocation when this token is finally used up. + // + state->set_token_present(false); + state->set_error(false); // Start out without an exception + state->set_failed(false); + + // Now call the matching rules and see if we can generate a new token + // + for (;;) + { + // Record the start of the token in our input stream. + // + state->set_channel( TOKEN_DEFAULT_CHANNEL ); + state->set_tokenStartCharIndex( (ANTLR_MARKER)input->get_nextChar() ); + state->set_tokenStartCharPositionInLine( input->get_charPositionInLine() ); + state->set_tokenStartLine( input->get_line() ); + state->set_text(""); + + if (istream->_LA(1) == ANTLR_CHARSTREAM_EOF) + { + // Reached the end of the current stream, nothing more to do if this is + // the last in the stack. + // + TokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.set_line(lexer->getLine()); + return &teof; + } + + state->set_token_present( false ); + state->set_error(false); // Start out without an exception + state->set_failed(false); + + // Call the generated lexer, see if it can get a new token together. + // + lexer->mTokens(); + + if (state->get_error() == true) + { + // Recognition exception, report it and try to recover. + // + state->set_failed(true); + lexer->get_rec()->reportError(); + lexer->recover(); + } + else + { + if ( !state->get_token_present() ) + { + // Emit the real token, which adds it in to the token stream basically + // + lexer->emit(); + } + else if ( *(state->get_token()) == m_skipToken ) + { + // A real token could have been generated, but "Computer say's naaaaah" and it + // it is just something we need to skip altogether. + // + continue; + } + + // Good token, not skipped, not EOF token + // + return state->get_token(); + } + } + } +} + +template +typename TokenSource::TokenType* TokenSource::nextToken() +{ + return this->nextToken( BoolForwarder() ); +} + +template +typename TokenSource::CommonTokenType* TokenSource::nextToken( BoolForwarder /*isFiltered*/ ) +{ + LexerType* lexer; + typename LexerType::RecognizerSharedStateType* state; + + lexer = this->get_super(); + state = lexer->get_lexstate(); + + /* Get rid of any previous token (token factory takes care of + * any deallocation when this token is finally used up. + */ + state->set_token_present( false ); + state->set_error( false ); /* Start out without an exception */ + state->set_failed(false); + + /* Record the start of the token in our input stream. + */ + state->set_tokenStartCharIndex( lexer->index() ); + state->set_tokenStartCharPositionInLine( lexer->getCharPositionInLine() ); + state->set_tokenStartLine( lexer->getLine() ); + state->set_text(""); + + /* Now call the matching rules and see if we can generate a new token + */ + for (;;) + { + if (lexer->LA(1) == ANTLR_CHARSTREAM_EOF) + { + /* Reached the end of the stream, nothing more to do. + */ + CommonTokenType& teof = m_eofToken; + + teof.set_startIndex(lexer->getCharIndex()); + teof.set_stopIndex(lexer->getCharIndex()); + teof.set_line(lexer->getLine()); + return &teof; + } + + state->set_token_present(false); + state->set_error(false); /* Start out without an exception */ + + { + ANTLR_MARKER m; + + m = lexer->get_istream()->mark(); + state->set_backtracking(1); /* No exceptions */ + state->set_failed(false); + + /* Call the generated lexer, see if it can get a new token together. + */ + lexer->mTokens(); + state->set_backtracking(0); + + /* mTokens backtracks with synpred at BACKTRACKING==2 + and we set the synpredgate to allow actions at level 1. */ + + if(state->get_failed()) + { + lexer->rewind(m); + lexer->consume(); // + } + else + { + lexer->emit(); /* Assemble the token and emit it to the stream */ + TokenType* tok = state->get_token(); + return tok; + } + } + } +} + +template +typename TokenSource::CommonTokenType* TokenSource::nextToken( BoolForwarder /*isFiltered*/ ) +{ + // Find the next token in the current stream + // + CommonTokenType* tok = this->nextTokenStr(); + + // If we got to the EOF token then switch to the previous + // input stream if there were any and just return the + // EOF if there are none. We must check the next token + // in any outstanding input stream we pop into the active + // role to see if it was sitting at EOF after PUSHing the + // stream we just consumed, otherwise we will return EOF + // on the reinstalled input stream, when in actual fact + // there might be more input streams to POP before the + // real EOF of the whole logical inptu stream. Hence we + // use a while loop here until we find somethign in the stream + // that isn't EOF or we reach the actual end of the last input + // stream on the stack. + // + while(tok->get_type() == CommonTokenType::TOKEN_EOF) + { + typename ImplTraits::LexerType* lexer; + lexer = static_cast( this->get_super() ); + + if ( lexer->get_rec()->get_state()->get_streams().size() > 0) + { + // We have another input stream in the stack so we + // need to revert to it, then resume the loop to check + // it wasn't sitting at EOF itself. + // + lexer->popCharStream(); + tok = this->nextTokenStr(); + } + else + { + // There were no more streams on the input stack + // so this EOF is the 'real' logical EOF for + // the input stream. So we just exit the loop and + // return the EOF we have found. + // + break; + } + + } + + // return whatever token we have, which may be EOF + // + return tok; +} + +template +TokenStream::TokenStream() +{ + m_tokenSource = NULL; + m_debugger = NULL; + m_initialStreamState = false; +} + +template +typename TokenStream::IntStreamType* TokenStream::get_istream() +{ + return this; +} + +template +TokenStream::TokenStream(TokenSourceType* source, DebugEventListenerType* debugger) +{ + m_initialStreamState = false; + m_tokenSource = source; + m_debugger = debugger; +} + +template +CommonTokenStream::CommonTokenStream(ANTLR_UINT32 , TokenSourceType* source, + DebugEventListenerType* debugger) + : CommonTokenStream::BaseType( source, debugger ) +{ + m_p = -1; + m_channel = TOKEN_DEFAULT_CHANNEL; + m_discardOffChannel = false; + m_nissued = 0; +} + +template +typename CommonTokenStream::TokensType& CommonTokenStream::get_tokens() +{ + return m_tokens; +} + +template +const typename CommonTokenStream::TokensType& CommonTokenStream::get_tokens() const +{ + return m_tokens; +} + +template +typename CommonTokenStream::DiscardSetType& CommonTokenStream::get_discardSet() +{ + return m_discardSet; +} + +template +const typename CommonTokenStream::DiscardSetType& CommonTokenStream::get_discardSet() const +{ + return m_discardSet; +} + +template +ANTLR_INLINE ANTLR_INT32 CommonTokenStream::get_p() const +{ + return m_p; +} + +template +ANTLR_INLINE void CommonTokenStream::set_p( ANTLR_INT32 p ) +{ + m_p = p; +} + +template +ANTLR_INLINE void CommonTokenStream::inc_p() +{ + ++m_p; +} + +template +ANTLR_INLINE void CommonTokenStream::dec_p() +{ + --m_p; +} + +template +ANTLR_INLINE ANTLR_MARKER CommonTokenStream::index_impl() +{ + return m_p; +} + +// Reset a token stream so it can be used again and can reuse it's +// resources. +// +template +void CommonTokenStream::reset() +{ + // Free any resources that ar most like specifc to the + // run we just did. + // + m_discardSet.clear(); + m_channelOverrides.clear(); + + // Now, if there were any existing tokens in the stream, + // then we just reset the vector count so that it starts + // again. We must traverse the entries unfortunately as + // there may be free pointers for custom token types and + // so on. However that is just a quick NULL check on the + // vector entries. + // + m_tokens.clear(); + + // Reset to defaults + // + m_discardOffChannel = false; + m_channel = ImplTraits::CommonTokenType::TOKEN_DEFAULT_CHANNEL; + m_p = -1; +} + +template +void TokenStream::setDebugListener(DebugEventListenerType* debugger) +{ + m_debugger = debugger; + m_initialStreamState = false; +} + +template +const typename TokenStream::TokenType* TokenStream::_LT(ANTLR_INT32 k) +{ + ANTLR_INT32 i; + ANTLR_INT32 n; + TokenStreamType* cts; + + cts = this->get_super(); + + if(k < 0) + { + return cts->LB(-k); + } + + ANTLR_INT32 req_idx = cts->get_p() + k - 1; + ANTLR_INT32 cached_size = static_cast(this->get_istream()->get_cachedSize()); + + if( (cts->get_p() == -1) || + ( ( req_idx >= cached_size ) && ( (cached_size % ImplTraits::TOKEN_FILL_BUFFER_INCREMENT) == 0 ) ) + ) + { + cts->fillBuffer(); + } + + // Here we used to check for k == 0 and return 0, but this seems + // a superfluous check to me. LT(k=0) is therefore just undefined + // and we won't waste the clock cycles on the check + // + cached_size = static_cast(this->get_istream()->get_cachedSize()); + if ( req_idx >= cached_size ) + { + TokenType& teof = cts->get_tokenSource()->get_eofToken(); + + teof.set_startIndex( this->get_istream()->index()); + teof.set_stopIndex( this->get_istream()->index()); + return &teof; + } + + i = cts->get_p(); + n = 1; + + /* Need to find k good tokens, skipping ones that are off channel + */ + while( n < k) + { + /* Skip off-channel tokens */ + i = cts->skipOffTokenChannels(i+1); /* leave p on valid token */ + n++; + } + + if( ( i >= cached_size ) && ( (cached_size % ImplTraits::TOKEN_FILL_BUFFER_INCREMENT) == 0 ) ) + { + cts->fillBuffer(); + } + if ( (ANTLR_UINT32) i >= this->get_istream()->get_cachedSize() ) + { + TokenType& teof = cts->get_tokenSource()->get_eofToken(); + + teof.set_startIndex(this->get_istream()->index()); + teof.set_stopIndex(this->get_istream()->index()); + return &teof; + } + + // Here the token must be in the input vector. Rather then incur + // function call penalty, we just return the pointer directly + // from the vector + // + return cts->getToken(i); +} + +template +const typename CommonTokenStream::TokenType* CommonTokenStream::LB(ANTLR_INT32 k) +{ + ANTLR_INT32 i; + ANTLR_INT32 n; + + if (m_p == -1) + { + this->fillBuffer(); + } + if (k == 0) + { + return NULL; + } + if ((m_p - k) < 0) + { + return NULL; + } + + i = m_p; + n = 1; + + /* Need to find k good tokens, going backwards, skipping ones that are off channel + */ + while (n <= k) + { + /* Skip off-channel tokens + */ + + i = this->skipOffTokenChannelsReverse(i - 1); /* leave p on valid token */ + n++; + } + if (i < 0) + { + return NULL; + } + + // Here the token must be in the input vector. Rather then incut + // function call penalty, we jsut return the pointer directly + // from the vector + // + return this->getToken(i); +} + +template +const typename CommonTokenStream::TokenType* CommonTokenStream::getToken(ANTLR_MARKER i) +{ + return this->get(i); +} + + +template +const typename CommonTokenStream::TokenType* CommonTokenStream::get(ANTLR_MARKER i) +{ + return this->getToken( static_cast(i), + BoolForwarder() ); +} + +template +const typename CommonTokenStream::TokenType* CommonTokenStream::getToken( ANTLR_MARKER tok_idx, + BoolForwarder /*tokens_accessed_from_owning_rule*/ ) +{ + typename TokensType::iterator iter = m_tokens.find(tok_idx); + if( iter == m_tokens.end() ) + { + TokenAccessException ex; + throw ex; + } + const TokenType& tok = iter->second; + return &tok; +} + +template +const typename CommonTokenStream::TokenType* CommonTokenStream::getToken( ANTLR_MARKER tok_idx, BoolForwarder /*tokens_accessed_from_owning_rule*/ ) +{ + TokenType& tok = m_tokens.at( static_cast(tok_idx) ); + return &tok; +} + +template +typename TokenStream::TokenSourceType* TokenStream::get_tokenSource() const +{ + return m_tokenSource; +} + +template +void TokenStream::set_tokenSource( TokenSourceType* tokenSource ) +{ + m_tokenSource = tokenSource; +} + +template +typename TokenStream::StringType TokenStream::toString() +{ + TokenStreamType* cts = static_cast(this); + + if (cts->get_p() == -1) + { + cts->fillBuffer(); + } + + return this->toStringSS(0, this->get_istream()->size()); +} + +template +typename TokenStream::StringType +TokenStream::toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop) +{ + StringType string; + TokenSourceType* tsource; + const TokenType* tok; + TokenStreamType* cts; + + cts = this->get_super(); + + if (cts->get_p() == -1) + { + cts->fillBuffer(); + } + if (stop >= this->get_istream()->size()) + { + stop = this->get_istream()->size() - 1; + } + + /* Who is giving us these tokens? + */ + tsource = cts->get_tokenSource(); + + if (tsource != NULL && !cts->get_tokens().empty() ) + { + /* Finally, let's get a string + */ + for (ANTLR_MARKER i = start; i <= stop; i++) + { + tok = cts->get(i); + if (tok != NULL) + { + string.append( tok->getText() ); + } + } + + return string; + } + return ""; +} + +template +typename TokenStream::StringType +TokenStream::toStringTT(const TokenType* start, const TokenType* stop) +{ + if (start != NULL && stop != NULL) + { + return this->toStringSS( start->get_tokenIndex(), + stop->get_tokenIndex()); + } + else + { + return ""; + } +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +template +void CommonTokenStream::setTokenTypeChannel ( ANTLR_UINT32 ttype, ANTLR_UINT32 channel) +{ + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + m_channelOverrides[ttype] = (ANTLR_UINT32)channel + 1; + +} + +template +void CommonTokenStream::discardTokenType(ANTLR_INT32 ttype) +{ + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. We could use bitsets for this I suppose too. + */ + m_discardSet.insert(ttype); +} + +template +void CommonTokenStream::discardOffChannelToks(bool discard) +{ + m_discardOffChannel = discard; +} + +template +typename CommonTokenStream::TokensType* CommonTokenStream::getTokens() +{ + if (m_p == -1) + { + this->fillBuffer(); + } + + return &m_tokens; +} + +template +void CommonTokenStream::getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, + TokensListType& tokenRange) +{ + return this->getTokensSet(start, stop, NULL, tokenRange); +} + +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +template +void +CommonTokenStream::getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, + TokensListType& filteredList ) +{ + ANTLR_UINT32 i; + ANTLR_UINT32 n; + TokenType* tok; + + if ( m_p == -1) + { + this->fillBuffer(); + } + if (stop > this->get_istream()->size()) + { + stop = this->get_istream()->size(); + } + if (start > stop) + { + return; + } + + /* We have the range set, now we need to iterate through the + * installed tokens and create a new list with just the ones we want + * in it. We are just moving pointers about really. + */ + for(i = start, n = 0; i<= stop; i++) + { + tok = this->get(i); + + if ( types == NULL + || (types->isMember( tok->get_type() ) == true ) + ) + { + filteredList.push_back(tok); + } + } + + return ; +} + +template +void +CommonTokenStream::getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, + const IntListType& list, TokensListType& newlist) +{ + BitsetType* bitSet; + + bitSet = Bitset::BitsetFromList(list); + this->getTokensSet(start, stop, bitSet, newlist); + delete bitSet; +} + +template +void +CommonTokenStream::getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, + TokensListType& newlist ) +{ + BitsetType* bitSet; + + bitSet = BitsetType::BitsetOf(type, -1); + this->getTokensSet(start, stop, bitSet, newlist); + + delete bitSet; +} + +template +void CommonTokenStream::fillBufferExt() +{ + this->fillBuffer(); +} + +template +bool CommonTokenStream::hasReachedFillbufferTarget( ANTLR_UINT32 cnt, + BoolForwarder ) +{ + return ( cnt >= ImplTraits::TOKEN_FILL_BUFFER_INCREMENT ); +} + +template +bool CommonTokenStream::hasReachedFillbufferTarget( ANTLR_UINT32, + BoolForwarder ) +{ + return false; +} + + +template +void CommonTokenStream::fillBuffer() +{ + ANTLR_UINT32 index; + TokenType* tok; + bool discard; + + /* Start at index 0 of course + */ + ANTLR_UINT32 cached_p = (m_p < 0) ? 0 : m_p; + index = m_nissued; + ANTLR_UINT32 cnt = 0; + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + tok = this->get_tokenSource()->nextToken(); + + while ( tok->get_type() != TokenType::TOKEN_EOF ) + { + discard = false; /* Assume we are not discarding */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if ( m_discardSet.find(tok->get_type()) != m_discardSet.end() ) + { + discard = true; + } + else if ( m_discardOffChannel == true + && tok->get_channel() != m_channel + ) + { + discard = true; + } + else if (!m_channelOverrides.empty()) + { + /* See if this type is in the override map + */ + typename ChannelOverridesType::iterator iter = m_channelOverrides.find( tok->get_type() + 1 ); + + if (iter != m_channelOverrides.end()) + { + /* Override found + */ + tok->set_channel( ANTLR_UINT32_CAST(iter->second) - 1); + } + } + + /* If not discarding it, add it to the list at the current index + */ + if (discard == false) + { + /* Add it, indicating that we will delete it and the table should not + */ + tok->set_tokenIndex(index); + ++m_p; + this->insertToken(*tok); + index++; + m_nissued++; + cnt++; + } + + if( !this->hasReachedFillbufferTarget( cnt, + BoolForwarder() ) ) + tok = this->get_tokenSource()->nextToken(); + else + break; + } + + /* Cache the size so we don't keep doing indirect method calls. We do this as + * early as possible so that anything after this may utilize the cached value. + */ + this->get_istream()->set_cachedSize( m_nissued ); + + /* Set the consume pointer to the first token that is on our channel, we just read + */ + m_p = cached_p; + m_p = this->skipOffTokenChannels( m_p ); + +} +/// Given a starting index, return the index of the first on-channel +/// token. +/// +template +ANTLR_UINT32 CommonTokenStream::skipOffTokenChannels(ANTLR_INT32 i) +{ + ANTLR_INT32 n; + n = this->get_istream()->get_cachedSize(); + + while (i < n) + { + const TokenType* tok = this->getToken(i); + + if (tok->get_channel() != m_channel ) + { + i++; + } + else + { + return i; + } + } + return i; +} + +template +ANTLR_UINT32 CommonTokenStream::skipOffTokenChannelsReverse(ANTLR_INT32 x) +{ + while (x >= 0) + { + const TokenType* tok = this->getToken(x); + + if( tok->get_channel() != m_channel ) + { + x--; + } + else + { + return x; + } + } + return x; +} + +template +void CommonTokenStream::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ) +{ + this->discardTokens( start, stop, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); +} + +template +void CommonTokenStream::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder /*tokens_accessed_from_owning_rule */ ) +{ + typename TokensType::iterator iter1 = m_tokens.lower_bound(start); + typename TokensType::iterator iter2 = m_tokens.upper_bound(stop); + m_tokens.erase( iter1, iter2 ); +} + +template +void CommonTokenStream::discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, + BoolForwarder /*tokens_accessed_from_owning_rule*/ ) +{ + m_tokens.erase( m_tokens.begin() + start, m_tokens.begin() + stop ); +} + +template +void CommonTokenStream::insertToken( const TokenType& tok ) +{ + this->insertToken( tok, BoolForwarder< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE >() ); +} + +template +void CommonTokenStream::insertToken( const TokenType& tok, BoolForwarder /*tokens_accessed_from_owning_rule*/ ) +{ + assert( m_tokens.find( tok.get_index() ) == m_tokens.end() ); + assert( tok.get_index() == m_nissued ); + m_tokens[ tok.get_index() ] = tok; +} + +template +void CommonTokenStream::insertToken( const TokenType& tok, BoolForwarder /*tokens_accessed_from_owning_rule*/ ) +{ + m_tokens.push_back( tok ); +} + +template +CommonTokenStream::~CommonTokenStream() +{ + m_tokens.clear(); +} + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/include/antlr3traits.hpp b/runtime/Cpp/include/antlr3traits.hpp new file mode 100755 index 000000000..c87604412 --- /dev/null +++ b/runtime/Cpp/include/antlr3traits.hpp @@ -0,0 +1,320 @@ +#ifndef _ANTLR3_TRAITS_HPP +#define _ANTLR3_TRAITS_HPP + +#include "antlr3defs.hpp" + + +ANTLR_BEGIN_NAMESPACE() + +//Users implementing overrides should inherit from this +template +class CustomTraitsBase +{ +public: + typedef Empty AllocPolicyType; + typedef Empty StringType; + typedef Empty StringStreamType; + typedef Empty StreamDataType; + typedef Empty Endianness; + + //collections + typedef Empty BitsetType; + typedef Empty BitsetListType; + + typedef Empty InputStreamType; + + template + class IntStreamType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty LexStateType; + + typedef Empty CommonTokenType; + typedef Empty TokenIntStreamType; + + typedef Empty TokenStreamType; + typedef Empty TreeNodeStreamType; + + + typedef Empty DebugEventListenerType; + template + class RecognizerSharedStateType : public Empty + { + public: + typedef Empty BaseType; + }; + + template + class RecognizerType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty TreeType; + typedef Empty TreeAdaptorType; + + template + class ExceptionBaseType : public Empty + { + public: + typedef Empty BaseType; + }; + + //this should be overridden with generated lexer + typedef Empty BaseLexerType; + + typedef Empty TokenSourceType; + typedef Empty BaseParserType;//this should be overridden with generated lexer + typedef Empty BaseTreeParserType; + + template + class RewriteStreamType : public Empty + { + public: + typedef Empty BaseType; + }; + + typedef Empty RuleReturnValueType; + + //If we want to change the way tokens are stored + static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = false; + static const int TOKEN_FILL_BUFFER_INCREMENT = 100; //used only if the above val is true + + static void displayRecognitionError( const std::string& str ) { printf("%s", str.c_str() ); } +}; + +template +class TraitsSelector +{ +public: + typedef A selected; +}; + +template +class TraitsSelector +{ +public: + typedef B selected; +}; + +template +class TraitsOneArgSelector +{ +public: + typedef A selected; +}; + +template +class TraitsOneArgSelector +{ +public: + typedef B selected; +}; + +template +class BoolSelector +{ +public: + typedef A selected; +}; + +template +class BoolSelector +{ +public: + typedef B selected; +}; + +template< template class UserTraits > +class TraitsBase +{ +public: + typedef TraitsBase TraitsType; + + typedef typename TraitsSelector< typename UserTraits::AllocPolicyType, DefaultAllocPolicy >::selected AllocPolicyType; + + typedef typename TraitsSelector< typename UserTraits::StringType, + std::string >::selected StringType; + + typedef typename TraitsSelector< typename UserTraits::StringStreamType, + std::stringstream >::selected StringStreamType; + + typedef typename TraitsSelector< typename UserTraits::StreamDataType, + ANTLR_UINT8 >::selected StreamDataType; + + typedef typename TraitsSelector< typename UserTraits::Endianness, + RESOLVE_ENDIAN_AT_RUNTIME >::selected Endianness; + + typedef typename TraitsSelector< typename UserTraits::BitsetType, + Bitset >::selected BitsetType; + typedef typename TraitsSelector< typename UserTraits::BitsetListType, + BitsetList >::selected BitsetListType; + + typedef typename TraitsSelector< typename UserTraits::InputStreamType, + InputStream >::selected InputStreamType; + + template + class IntStreamType + : public TraitsOneArgSelector< + typename UserTraits::template IntStreamType, + IntStream, + typename UserTraits::template IntStreamType::BaseType + >::selected + { }; + + typedef typename TraitsSelector< typename UserTraits::LexStateType, + LexState >::selected LexStateType; + + static const bool TOKENS_ACCESSED_FROM_OWNING_RULE = UserTraits::TOKENS_ACCESSED_FROM_OWNING_RULE; + static const int TOKEN_FILL_BUFFER_INCREMENT = UserTraits::TOKEN_FILL_BUFFER_INCREMENT; //used only if the above val is true + + static void displayRecognitionError( const StringType& str ) { UserTraits::displayRecognitionError(str); } +}; + +template< + class LxrType, + class PsrType, + template class UserTraits = CustomTraitsBase, + class TreePsrType = antlr3::Empty + > +class Traits : public TraitsBase +{ +public: + typedef Traits TraitsType; + typedef TraitsBase BaseTraitsType; + + typedef typename TraitsSelector< typename UserTraits::CommonTokenType, + CommonToken >::selected CommonTokenType; + typedef typename TraitsSelector< typename UserTraits::TokenIntStreamType, + TokenIntStream >::selected TokenIntStreamType; + + typedef typename TraitsSelector< typename UserTraits::TokenStreamType, + CommonTokenStream >::selected TokenStreamType; + typedef typename TraitsSelector< typename UserTraits::TreeNodeStreamType, + CommonTreeNodeStream >::selected TreeNodeStreamType; + + typedef typename TraitsSelector< typename UserTraits::DebugEventListenerType, + DebugEventListener >::selected DebugEventListenerType; + + template + class RecognizerSharedStateType + : public TraitsOneArgSelector< + typename UserTraits::template RecognizerSharedStateType, + RecognizerSharedState, + typename UserTraits::template RecognizerSharedStateType::BaseType + >::selected + {}; + + template + class RecognizerType + : public TraitsOneArgSelector< + typename UserTraits::template RecognizerType, + BaseRecognizer, + typename UserTraits::template RecognizerType::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< + typename UserTraits::template RecognizerType, + BaseRecognizer, + typename UserTraits::template RecognizerType::BaseType + >::selected BaseType; + typedef typename BaseType::RecognizerSharedStateType RecognizerSharedStateType; + + public: + RecognizerType(ANTLR_UINT32 sizeHint, RecognizerSharedStateType* state) + : BaseType( sizeHint, state ) + { + } + }; + + typedef typename TraitsSelector< typename UserTraits::TreeType, + CommonTree >::selected TreeType; + typedef typename TraitsSelector< typename UserTraits::TreeAdaptorType, + CommonTreeAdaptor >::selected TreeAdaptorType; + + template + class ExceptionBaseType : public TraitsOneArgSelector< + typename UserTraits::template ExceptionBaseType, + ANTLR_ExceptionBase, + typename UserTraits::template ExceptionBaseType::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< + typename UserTraits::template ExceptionBaseType, + ANTLR_ExceptionBase, + typename UserTraits::template ExceptionBaseType::BaseType + >::selected BaseType; + + protected: + ExceptionBaseType( const typename BaseTraitsType::StringType& message ) + :BaseType(message) + { + } + }; + + //this should be overridden with generated lexer + typedef typename TraitsSelector< typename UserTraits::BaseLexerType, + Lexer >::selected BaseLexerType; + typedef LxrType LexerType; + + typedef typename TraitsSelector< typename UserTraits::TokenSourceType, + TokenSource >::selected TokenSourceType; + typedef typename TraitsSelector< typename UserTraits::BaseParserType, + Parser >::selected BaseParserType; + + typedef PsrType ParserType; + + typedef typename TraitsSelector< typename UserTraits::BaseTreeParserType, + TreeParser >::selected BaseTreeParserType; + typedef TreePsrType TreeParserType; + + template + class RewriteStreamType : public TraitsOneArgSelector< + typename UserTraits::template RewriteStreamType, + RewriteRuleElementStream, + typename UserTraits::template RewriteStreamType::BaseType + >::selected + { + public: + typedef typename TraitsOneArgSelector< + typename UserTraits::template RewriteStreamType, + RewriteRuleElementStream, + typename UserTraits::template RewriteStreamType::BaseType + >::selected BaseType; + + typedef typename SuperType::StreamType StreamType; + typedef typename BaseType::RecognizerType Recognizer_Type; + typedef typename BaseType::TokenType TokenType; + typedef typename BaseType::ElementsType ElementsType; + + public: + RewriteStreamType(TreeAdaptorType* adaptor = NULL, Recognizer_Type* rec=NULL, ANTLR_UINT8* description = NULL) + :BaseType(adaptor, rec, description) + { + } + RewriteStreamType(TreeAdaptorType* adaptor, Recognizer_Type* rec, ANTLR_UINT8* description, TokenType* oneElement) + :BaseType(adaptor, rec, description, oneElement) + { + } + RewriteStreamType(TreeAdaptorType* adaptor, Recognizer_Type* rec, ANTLR_UINT8* description, const ElementsType& elements) + :BaseType(adaptor, rec, description, elements) + { + } + }; + + typedef typename TraitsSelector< typename UserTraits::RuleReturnValueType, + typename BoolSelector< TraitsType::TOKENS_ACCESSED_FROM_OWNING_RULE, + RuleReturnValue_1, RuleReturnValue >::selected + >::selected RuleReturnValueType; +}; + + +ANTLR_END_NAMESPACE() + +#endif //_ANTLR3_TRAITS_HPP diff --git a/runtime/Cpp/include/antlr3treeparser.hpp b/runtime/Cpp/include/antlr3treeparser.hpp new file mode 100755 index 000000000..8f83ff8ab --- /dev/null +++ b/runtime/Cpp/include/antlr3treeparser.hpp @@ -0,0 +1,101 @@ +#ifndef ANTLR3TREEPARSER_HPP +#define ANTLR3TREEPARSER_HPP + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB + +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "antlr3defs.hpp" + +/** Internal structure representing an element in a hash bucket. + * Stores the original key so that duplicate keys can be rejected + * if necessary, and contains function can be supported If the hash key + * could be unique I would have invented the perfect compression algorithm ;-) + */ +ANTLR_BEGIN_NAMESPACE() + +template +class TreeParser : public ImplTraits::template RecognizerType< TreeParser > +{ +public: + typedef typename ImplTraits::TreeNodeStreamType TreeNodeStreamType; + typedef TreeNodeStreamType StreamType; + typedef typename TreeNodeStreamType::IntStreamType IntStreamType; + typedef typename ImplTraits::TreeType TreeType; + typedef TreeType TokenType; + typedef typename ImplTraits::template ExceptionBase ExceptionBaseType; + typedef typename ImplTraits::template RecognizerType< TreeParser > RecognizerType; + typedef typename RecognizerType::RecognizerSharedStateType RecognizerSharedStateType; + typedef Empty TokenSourceType; + typedef typename ImplTraits::BitsetListType BitsetListType; + typedef typename ImplTraits::StringType StringType; + typedef typename ImplTraits::CommonTokenType CommonTokenType; + +private: + /** Pointer to the common tree node stream for the parser + */ + TreeNodeStreamType* m_ctnstream; + +public: + TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, + RecognizerSharedStateType* state); + TreeNodeStreamType* get_ctnstream() const; + IntStreamType* get_istream() const; + RecognizerType* get_rec(); + + //same as above. Just that get_istream exists for lexer, parser, treeparser + //get_parser_istream exists only for parser, treeparser. So use it accordingly + IntStreamType* get_parser_istream() const; + + /** Set the input stream and reset the parser + */ + void setTreeNodeStream(TreeNodeStreamType* input); + + /** Return a pointer to the input stream + */ + TreeNodeStreamType* getTreeNodeStream(); + + TokenType* getMissingSymbol( IntStreamType* istream, + ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, + BitsetListType* follow); + + /** Pointer to a function that knows how to free resources of an ANTLR3 tree parser. + */ + ~TreeParser(); + + void fillExceptionData( ExceptionBaseType* ex ); + void displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ); + void exConstruct(); + void mismatch(ANTLR_UINT32 ttype, BitsetListType* follow); +}; + +ANTLR_END_NAMESPACE() + +#include "antlr3treeparser.inl" + +#endif diff --git a/runtime/Cpp/include/antlr3treeparser.inl b/runtime/Cpp/include/antlr3treeparser.inl new file mode 100755 index 000000000..f08e3efe8 --- /dev/null +++ b/runtime/Cpp/include/antlr3treeparser.inl @@ -0,0 +1,198 @@ +ANTLR_BEGIN_NAMESPACE() + +template< class ImplTraits > +TreeParser::TreeParser( ANTLR_UINT32 sizeHint, TreeNodeStreamType* ctnstream, + RecognizerSharedStateType* state) + :RecognizerType( sizeHint, state ) +{ + /* Install the tree node stream + */ + this->setTreeNodeStream(ctnstream); + +} + +template< class ImplTraits > +TreeParser::~TreeParser() +{ + this->get_rec()->get_state()->get_following().clear(); +} + +template< class ImplTraits > +typename TreeParser::TreeNodeStreamType* TreeParser::get_ctnstream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser::IntStreamType* TreeParser::get_istream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser::IntStreamType* TreeParser::get_parser_istream() const +{ + return m_ctnstream; +} + +template< class ImplTraits > +typename TreeParser::RecognizerType* TreeParser::get_rec() +{ + return this; +} + +template< class ImplTraits > +void TreeParser::fillExceptionData( ExceptionBaseType* ex ) +{ + ex->set_token( m_ctnstream->_LT(1) ); /* Current input tree node */ + ex->set_line( ex->get_token()->getLine() ); + ex->set_charPositionInLine( ex->get_token()->getCharPositionInLine() ); + ex->set_index( m_ctnstream->index() ); + + // Are you ready for this? Deep breath now... + // + { + TreeType* tnode; + + tnode = ex->get_token(); + + if (tnode->get_token() == NULL) + { + ex->set_streamName("-unknown source-" ); + } + else + { + if ( tnode->get_token()->get_input() == NULL) + { + ex->set_streamName(""); + } + else + { + ex->set_streamName( tnode->get_token()->get_input()->get_fileName() ); + } + } + ex->set_message("Unexpected node"); + } +} + +template< class ImplTraits > +void TreeParser::displayRecognitionError( ANTLR_UINT8** tokenNames, ExceptionBaseType* ex ) +{ + typename ImplTraits::StringStreamType errtext; + // See if there is a 'filename' we can use + // + if( ex->get_streamName().empty() ) + { + if(ex->get_token()->get_type() == ImplTraits::CommonTokenType::TOKEN_EOF) + { + errtext << "-end of input-("; + } + else + { + errtext << "-unknown source-("; + } + } + else + { + errtext << ex->get_streamName() << "("; + } + + // Next comes the line number + // + errtext << this->get_rec()->get_state()->get_exception()->get_line() << ") "; + errtext << " : error " << this->get_rec()->get_state()->get_exception()->getType() + << " : " + << this->get_rec()->get_state()->get_exception()->get_message(); + + IntStreamType* is = this->get_istream(); + TreeType* theBaseTree = this->get_rec()->get_state()->get_exception()->get_token(); + StringType ttext = theBaseTree->toStringTree(); + + if (theBaseTree != NULL) + { + TreeType* theCommonTree = static_cast(theBaseTree); + if (theCommonTree != NULL) + { + CommonTokenType* theToken = theBaseTree->getToken(); + } + errtext << ", at offset " + << theBaseTree->getCharPositionInLine(); + errtext << ", near " << ttext; + } + ex->displayRecognitionError( errtext ); + ImplTraits::displayRecognitionError( errtext.str() ); +} + +template< class ImplTraits > +void TreeParser::setTreeNodeStream(TreeNodeStreamType* input) +{ + m_ctnstream = input; + this->get_rec()->reset(); + m_ctnstream->reset(); +} + +template< class ImplTraits > +typename TreeParser::TreeNodeStreamType* TreeParser::getTreeNodeStream() +{ + return m_ctnstream; +} + +template< class ImplTraits > +void TreeParser::exConstruct() +{ + new ANTLR_Exception( this->get_rec(), "" ); +} + +template< class ImplTraits > +void TreeParser::mismatch(ANTLR_UINT32 ttype, BitsetListType* follow) +{ + this->exConstruct(); + this->recoverFromMismatchedToken(ttype, follow); +} + +template< class ImplTraits > +typename TreeParser::TokenType* +TreeParser::getMissingSymbol( IntStreamType* istream, ExceptionBaseType* e, + ANTLR_UINT32 expectedTokenType, BitsetListType* follow) +{ + TreeNodeStreamType* tns; + TreeType* node; + TreeType* current; + CommonTokenType* token; + StringType text; + ANTLR_INT32 i; + + // Dereference the standard pointers + // + tns = static_cast(istream); + + // Create a new empty node, by stealing the current one, or the previous one if the current one is EOF + // + current = tns->_LT(1); + i = -1; + + if (current == tns->get_EOF_NODE_p()) + { + current = tns->_LT(-1); + i--; + } + node = current->dupNode(); + + // Find the newly dupicated token + // + token = node->getToken(); + + // Create the token text that shows it has been inserted + // + token->setText("getText(); + text.append((const char *)this->get_rec()->get_state()->get_tokenName(expectedTokenType)); + text.append((const char *)">"); + + // Finally return the pointer to our new node + // + return node; +} + + +ANTLR_END_NAMESPACE() diff --git a/runtime/Cpp/tests/.gitignore b/runtime/Cpp/tests/.gitignore new file mode 100644 index 000000000..b3a82e28b --- /dev/null +++ b/runtime/Cpp/tests/.gitignore @@ -0,0 +1,22 @@ +# Windows test files +t[0-9][0-9][0-9].exe + +# Unix test files +t[0-9][0-9][0-9] + +# ANTLR generated files +t[0-9][0-9][0-9]?*.cpp +t[0-9][0-9][0-9]?*.hpp +t[0-9][0-9][0-9]*.tokens + +# MSVC files +*.vcxproj +*.vcxproj.user +Debug/ +Release/ + +# Eclipse CDT files +.cproject + +# tests output files +t012.lxr.output diff --git a/runtime/Cpp/tests/Makefile b/runtime/Cpp/tests/Makefile new file mode 100644 index 000000000..0c3b35d6b --- /dev/null +++ b/runtime/Cpp/tests/Makefile @@ -0,0 +1,95 @@ +.PRECIOUS=.o + +ANTLRGRAMMARS ?= $(wildcard t0[01]*.g) +HEADERS = $(wildcard *.hpp) $(wildcard *.inl) +RUNTIME_HEADERS = $(wildcard ../include/*.hpp) $(wildcard ../include/*.inl) +SOURCES = $(wildcard *.cpp) +POBJS = $(PSOURCES:.cpp=.o) +TOKENS = $(ANTLRGRAMMARS:.g=.tokens) + +GRAMMAROPTIONS= #-report -Xconversiontimeout 1500000 -Xmultithreaded -Xwatchconversion +ANTLR = ../../../antlr-complete/target/antlr-complete-3.5.1-SNAPSHOT.jar +STGS = $(wildcard ../../../tool/src/main/resources/org/antlr/codegen/templates/Cpp/*.stg) + +.SUFFIXES: + +INCLUDES= -I. -I../include/ + +CFLAGS=-ggdb3 -O0 -fno-inline -Wall -Wno-unused-variable +#CFLAGS=-ggdb3 -O3 + +TOBJS= utils.o + +all: $(ANTLR) $(TOKENS) t001 t002 t003 t004 t005 t006 t006 t007 t008 t009 t010 t011 t012 + +# For devel only. This will replace .stg files in the tool in a fast way +# +$(ANTLR): $(STGS) + jar uvf $(ANTLR) -C ../../../tool/src/main/resources \ + org/antlr/codegen/templates/Cpp/Cpp.stg + +t001: t001.cpp t001lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t002: t002.cpp t002lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t003: t003.cpp t003lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t004: t004.cpp t004lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t005: t005.cpp t005lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t006: t006.cpp t006lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t007: t007.cpp t007lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t008: t008.cpp t008lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t009: t009.cpp t009lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t010: t010.cpp t010lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t011: t011.cpp t011lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +t012: t012.cpp t012lexerXMLLexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) + g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + + +# AST commented out +# t039: t039.cpp t039labels.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) +# g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ +# +# t042: t042.cpp t005lexer.tokens UserTestTraits.hpp Makefile $(ANTLR) $(RUNTIME_HEADERS) +# g++ $(CFLAGS) -DUSESTL $(INCLUDES) $< $@?*.cpp -o $@ + +clean: + rm -f *.o t0[0-9][0-9]??*.[ch]pp *.tokens t[0-9][0-9][0-9] t0[0-9][0-9].exe + +# %.u: %.g +# @echo "Bulding dependencies for "$< +# java -jar $(ANTLR) -depend $< > $@ +# @grep ":" $@ |awk 'BEGIN {printf "ANTLRGENLIST := " }{printf " " $$1}END {print ""}' >> $@.tmp +# @cat $@.tmp >> $@ +# $(RM) $@.tmp + +%.tokens %.cpp %Lexer.c %Parser.c %Lexer.h %Parser.h %.hpp: %.g $(ANTLR) + java -jar $(ANTLR) $(GRAMMAROPTIONS) $< + +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(strip $(ANTLRGRAMMARS)),) +#-include $(ANTLRGRAMMARS:.g=.u) +endif +endif + +%.o: %.cpp $(HEADERS) utils.hpp + g++ $(CFLAGS) -DUSESTL $(INCLUDES) -c $< -o $@ diff --git a/runtime/Cpp/tests/UserTestTraits.hpp b/runtime/Cpp/tests/UserTestTraits.hpp new file mode 100644 index 000000000..f73faa2a1 --- /dev/null +++ b/runtime/Cpp/tests/UserTestTraits.hpp @@ -0,0 +1,66 @@ +#ifndef _T_TEST_TRAITS_H +#define _T_TEST_TRAITS_H + +// First thing we always do is include the ANTLR3 generated files, which +// will automatically include the antlr3 runtime header files. +// The compiler must use -I (or set the project settings in VS2005) +// to locate the antlr3 runtime files and -I. to find this file +#include + +// Forward declaration for Lexer&Parser class(es) +namespace Antlr3Test { + class S1Lexer; + class S1Parser; + + class t001lexer; + class t002lexer; + class t003lexer; + class t004lexer; + class t005lexer; + class t006lexer; + class t007lexer; + class t008lexer; + class t009lexer; + class t010lexer; + class t011lexer; + class t012lexerXMLLexer; + + class t039labelsLexer; + class t039labelsParser; +}; + +namespace Antlr3Test { + + //code for overriding + template + class UserTraits : public antlr3::CustomTraitsBase + { + public: + }; + + // Even Lexer only samples need some Parser class as a template parameter + class NoParser { + }; + + // Instantiate the Traits class(will be used for Lexer/Parser template instantiations) + typedef antlr3::Traits S1LexerTraits; + typedef antlr3::Traits S1ParserTraits; + + typedef antlr3::Traits t001lexerTraits; + typedef antlr3::Traits t002lexerTraits; + typedef antlr3::Traits t003lexerTraits; + typedef antlr3::Traits t004lexerTraits; + typedef antlr3::Traits t005lexerTraits; + typedef antlr3::Traits t006lexerTraits; + typedef antlr3::Traits t007lexerTraits; + typedef antlr3::Traits t008lexerTraits; + typedef antlr3::Traits t009lexerTraits; + typedef antlr3::Traits t010lexerTraits; + typedef antlr3::Traits t011lexerTraits; + typedef antlr3::Traits t012lexerXMLLexerTraits; + + typedef antlr3::Traits t039labelsLexerTraits; + typedef t039labelsLexerTraits t039labelsParserTraits; +}; + +#endif diff --git a/runtime/Cpp/tests/t001.cpp b/runtime/Cpp/tests/t001.cpp new file mode 100644 index 000000000..6feb1c8ed --- /dev/null +++ b/runtime/Cpp/tests/t001.cpp @@ -0,0 +1,95 @@ +#include "UserTestTraits.hpp" +#include "t001lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testIteratorInterface(string const& data); +int testMalformedInput(string const& data); + +static t001lexer* lxr; + +int main (int argc, char *argv[]) +{ + testValid("0"); + testIteratorInterface("0"); + testMalformedInput("1"); + return 0; +} + +int testValid(string const& data) +{ + t001lexerTraits::InputStreamType* input = new t001lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t001"); + if (lxr == NULL) + lxr = new t001lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + t001lexerTraits::CommonTokenType *token1 = lxr->nextToken(); + + std::cout << token0->getText() << std::endl; + std::cout << token1->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testIteratorInterface(string const& data) +{ + t001lexerTraits::InputStreamType* input = new t001lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t001"); + if (lxr == NULL) + lxr = new t001lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testIteratorInterface: \"" << data << '"' <get_tokSource()); + t001lexerTraits::CommonTokenType const *token0 = tstream->_LT(1); + t001lexerTraits::CommonTokenType const *token1 = tstream->_LT(2); + + std::cout << token0->getText() << std::endl; + std::cout << token1->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t001lexerTraits::InputStreamType* input = new t001lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t001"); + if (lxr == NULL) + lxr = new t001lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t001lexer.g b/runtime/Cpp/tests/t001lexer.g new file mode 100644 index 000000000..a9594d40d --- /dev/null +++ b/runtime/Cpp/tests/t001lexer.g @@ -0,0 +1,13 @@ +lexer grammar t001lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +ZERO: '0'; diff --git a/runtime/Cpp/tests/t002.cpp b/runtime/Cpp/tests/t002.cpp new file mode 100644 index 000000000..b04c46358 --- /dev/null +++ b/runtime/Cpp/tests/t002.cpp @@ -0,0 +1,106 @@ +#include "UserTestTraits.hpp" +#include "t002lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testIteratorInterface(string const& data); +int testMalformedInput(string const& data); + +static t002lexer *lxr; +static t002lexerTokens::Tokens ExpectedTokens[] = + { + t002lexerTokens::ZERO, + t002lexerTokens::ONE, + t002lexerTokens::EOF_TOKEN + }; + +int main (int argc, char *argv[]) +{ + testValid("01"); + testIteratorInterface("01"); + testMalformedInput("2"); + return 0; +} + +int testValid(string const& data) +{ + t002lexerTraits::InputStreamType* input = new t002lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t002"); + if (lxr == NULL) + lxr = new t002lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + std::cout << token->getText() << '\t' + << (token->getType() == ExpectedTokens[i] ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testIteratorInterface(string const& data) +{ + t002lexerTraits::InputStreamType* input = new t002lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t002"); + if (lxr == NULL) + lxr = new t002lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testIteratorInterface: \"" << data << '"' <get_tokSource()); + t002lexerTraits::CommonTokenType const *token0 = tstream->_LT(1); + t002lexerTraits::CommonTokenType const *token1 = tstream->_LT(2); + t002lexerTraits::CommonTokenType const *token2 = tstream->_LT(3); + + std::cout << token0->getText() << std::endl; + std::cout << token1->getText() << std::endl; + std::cout << token2->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t002lexerTraits::InputStreamType* input = new t002lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t002"); + if (lxr == NULL) + lxr = new t002lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t002lexer.g b/runtime/Cpp/tests/t002lexer.g new file mode 100644 index 000000000..c36754736 --- /dev/null +++ b/runtime/Cpp/tests/t002lexer.g @@ -0,0 +1,14 @@ +lexer grammar t002lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +ZERO: '0'; +ONE: '1'; diff --git a/runtime/Cpp/tests/t003.cpp b/runtime/Cpp/tests/t003.cpp new file mode 100644 index 000000000..e6dc05901 --- /dev/null +++ b/runtime/Cpp/tests/t003.cpp @@ -0,0 +1,118 @@ +#include "UserTestTraits.hpp" +#include "t003lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testIteratorInterface(string const& data); +int testMalformedInput(string const& data); + +static t003lexer *lxr; + +struct TokenData +{ + t003lexerTokens::Tokens type; + //unsigned start; + //unsigned stop; + //const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + { t003lexerTokens::ZERO }, + { t003lexerTokens::FOOZE }, + { t003lexerTokens::ONE }, + { t003lexerTokens::EOF_TOKEN } +}; + +int main (int argc, char *argv[]) +{ + testValid("0fooze1"); + testIteratorInterface("0fooze1"); + testMalformedInput("2"); + return 0; +} + +int testValid(string const& data) +{ + t003lexerTraits::InputStreamType* input = new t003lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t003"); + if (lxr == NULL) + lxr = new t003lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + std::cout << token->getText() << '\t' + << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testIteratorInterface(string const& data) +{ + t003lexerTraits::InputStreamType* input = new t003lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t003"); + if (lxr == NULL) + lxr = new t003lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testIteratorInterface: \"" << data << '"' <get_tokSource()); + t003lexerTraits::CommonTokenType const *token0 = tstream->_LT(1); + t003lexerTraits::CommonTokenType const *token1 = tstream->_LT(2); + t003lexerTraits::CommonTokenType const *token2 = tstream->_LT(3); + t003lexerTraits::CommonTokenType const *token3 = tstream->_LT(4); + + std::cout << token0->getText() << std::endl; + std::cout << token1->getText() << std::endl; + std::cout << token2->getText() << std::endl; + std::cout << token3->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t003lexerTraits::InputStreamType* input = new t003lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t003"); + if (lxr == NULL) + lxr = new t003lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t003lexer.g b/runtime/Cpp/tests/t003lexer.g new file mode 100644 index 000000000..72e671b42 --- /dev/null +++ b/runtime/Cpp/tests/t003lexer.g @@ -0,0 +1,15 @@ +lexer grammar t003lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +ZERO: '0'; +ONE: '1'; +FOOZE: 'fooze'; diff --git a/runtime/Cpp/tests/t004.cpp b/runtime/Cpp/tests/t004.cpp new file mode 100644 index 000000000..fc0ecd3fd --- /dev/null +++ b/runtime/Cpp/tests/t004.cpp @@ -0,0 +1,100 @@ +#include "UserTestTraits.hpp" +#include "t004lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t004lexer *lxr; + +struct TokenData +{ + t004lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + { t004lexerTokens::FOO, 0, 0, "f"}, + { t004lexerTokens::FOO, 1, 2, "fo"}, + { t004lexerTokens::FOO, 3, 5, "foo"}, + { t004lexerTokens::FOO, 6, 9, "fooo"} +}; + +int main (int argc, char *argv[]) +{ + testValid("ffofoofooo"); + testMalformedInput("2"); + return 0; +} + +int testValid(string const& data) +{ + t004lexerTraits::InputStreamType* input = new t004lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t004"); + if (lxr == NULL) + lxr = new t004lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t004lexerTraits::InputStreamType* input = new t004lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t004"); + if (lxr == NULL) + lxr = new t004lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t004lexer.g b/runtime/Cpp/tests/t004lexer.g new file mode 100644 index 000000000..2079c7c09 --- /dev/null +++ b/runtime/Cpp/tests/t004lexer.g @@ -0,0 +1,13 @@ +lexer grammar t004lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO: 'f' 'o'*; diff --git a/runtime/Cpp/tests/t005.cpp b/runtime/Cpp/tests/t005.cpp new file mode 100644 index 000000000..748463052 --- /dev/null +++ b/runtime/Cpp/tests/t005.cpp @@ -0,0 +1,131 @@ +#include "UserTestTraits.hpp" +#include "t005lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput1(string const& data); +int testMalformedInput2(string const& data); + +static t005lexer *lxr; + +struct TokenData +{ + t005lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "fofoofooo" + { t005lexerTokens::FOO, 0, 1, "fo"}, + { t005lexerTokens::FOO, 2, 4, "foo"}, + { t005lexerTokens::FOO, 5, 8, "fooo"}, + { t005lexerTokens::EOF_TOKEN, 9, 9, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("fofoofooo"); + testMalformedInput1("2"); + testMalformedInput2("f"); + return 0; +} + +int testValid(string const& data) +{ + t005lexerTraits::InputStreamType* input = new t005lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t005"); + if (lxr == NULL) + lxr = new t005lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput1(string const& data) +{ + t005lexerTraits::InputStreamType* input = new t005lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t005"); + if (lxr == NULL) + lxr = new t005lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput1: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + //except antlr3.MismatchedTokenException as exc: + //self.assertEqual(exc.expecting, 'f') + //self.assertEqual(exc.unexpectedType, '2') + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput2(string const& data) +{ + t005lexerTraits::InputStreamType* input = new t005lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t005"); + if (lxr == NULL) + lxr = new t005lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput2: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + //except antlr3.EarlyExitException as exc: + //self.assertEqual(exc.unexpectedType, antlr3.EOF) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t005lexer.g b/runtime/Cpp/tests/t005lexer.g new file mode 100644 index 000000000..64e856e3a --- /dev/null +++ b/runtime/Cpp/tests/t005lexer.g @@ -0,0 +1,13 @@ +lexer grammar t005lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO: 'f' 'o'+; diff --git a/runtime/Cpp/tests/t006.cpp b/runtime/Cpp/tests/t006.cpp new file mode 100644 index 000000000..38c6d2f33 --- /dev/null +++ b/runtime/Cpp/tests/t006.cpp @@ -0,0 +1,111 @@ +#include "UserTestTraits.hpp" +#include "t006lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t006lexer *lxr; + +struct TokenData +{ + t006lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "fofaaooa" + { t006lexerTokens::FOO, 0, 1, "fo"}, + { t006lexerTokens::FOO, 2, 7, "faaooa"}, + { t006lexerTokens::EOF_TOKEN, 8, 8, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("fofaaooa"); + testMalformedInput("fofoaooaoa2"); + return 0; +} + +int testValid(string const& data) +{ + t006lexerTraits::InputStreamType* input = new t006lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t006"); + if (lxr == NULL) + lxr = new t006lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t006lexerTraits::InputStreamType* input = new t006lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t006"); + if (lxr == NULL) + lxr = new t006lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + //except antlr3.MismatchedTokenException as exc: + //self.assertEqual(exc.expecting, 'f') + //self.assertEqual(exc.unexpectedType, '2') + //self.assertEqual(exc.charPositionInLine, 10) + //self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t006lexer.g b/runtime/Cpp/tests/t006lexer.g new file mode 100644 index 000000000..ce7a9a207 --- /dev/null +++ b/runtime/Cpp/tests/t006lexer.g @@ -0,0 +1,13 @@ +lexer grammar t006lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO: 'f' ('o' | 'a')*; diff --git a/runtime/Cpp/tests/t007.cpp b/runtime/Cpp/tests/t007.cpp new file mode 100644 index 000000000..dc9b5a3a7 --- /dev/null +++ b/runtime/Cpp/tests/t007.cpp @@ -0,0 +1,105 @@ +#include "UserTestTraits.hpp" +#include "t007lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t007lexer *lxr; + +struct TokenData +{ + t007lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "fofababbooabb" + { t007lexerTokens::FOO, 0, 1, "fo"}, + { t007lexerTokens::FOO, 2, 12, "fababbooabb"}, + { t007lexerTokens::EOF_TOKEN, 13, 13, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("fofababbooabb"); + testMalformedInput("foaboao"); + return 0; +} + +int testValid(string const& data) +{ + t007lexerTraits::InputStreamType* input = new t007lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t007"); + if (lxr == NULL) + lxr = new t007lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t007lexerTraits::InputStreamType* input = new t007lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t007"); + if (lxr == NULL) + lxr = new t007lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token0->getText() << std::endl; + + //except antlr3.EarlyExitException as exc: + // self.assertEqual(exc.unexpectedType, 'o') + // self.assertEqual(exc.charPositionInLine, 6) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t007lexer.g b/runtime/Cpp/tests/t007lexer.g new file mode 100644 index 000000000..1eac53143 --- /dev/null +++ b/runtime/Cpp/tests/t007lexer.g @@ -0,0 +1,13 @@ +lexer grammar t007lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO: 'f' ('o' | 'a' 'b'+)*; diff --git a/runtime/Cpp/tests/t008.cpp b/runtime/Cpp/tests/t008.cpp new file mode 100644 index 000000000..fbb541b7e --- /dev/null +++ b/runtime/Cpp/tests/t008.cpp @@ -0,0 +1,111 @@ +#include "UserTestTraits.hpp" +#include "t008lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t008lexer *lxr; + +struct TokenData +{ + t008lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "ffaf" + { t008lexerTokens::FOO, 0, 0, "f"}, + { t008lexerTokens::FOO, 1, 2, "fa"}, + { t008lexerTokens::FOO, 3, 3, "f"}, + { t008lexerTokens::EOF_TOKEN, 4, 4, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("ffaf"); + testMalformedInput("fafb"); + return 0; +} + +int testValid(string const& data) +{ + t008lexerTraits::InputStreamType* input = new t008lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t008"); + if (lxr == NULL) + lxr = new t008lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t008lexerTraits::InputStreamType* input = new t008lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t008"); + if (lxr == NULL) + lxr = new t008lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + //except antlr3.MismatchedTokenException as exc: + // self.assertEqual(exc.unexpectedType, 'b') + // self.assertEqual(exc.charPositionInLine, 3) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t008lexer.g b/runtime/Cpp/tests/t008lexer.g new file mode 100644 index 000000000..029415562 --- /dev/null +++ b/runtime/Cpp/tests/t008lexer.g @@ -0,0 +1,13 @@ +lexer grammar t008lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO: 'f' 'a'?; diff --git a/runtime/Cpp/tests/t009.cpp b/runtime/Cpp/tests/t009.cpp new file mode 100644 index 000000000..75b2fca6b --- /dev/null +++ b/runtime/Cpp/tests/t009.cpp @@ -0,0 +1,111 @@ +#include "UserTestTraits.hpp" +#include "t009lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t009lexer *lxr; + +struct TokenData +{ + t009lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "085" + { t009lexerTokens::DIGIT, 0, 0, "0"}, + { t009lexerTokens::DIGIT, 1, 1, "8"}, + { t009lexerTokens::DIGIT, 2, 2, "5"}, + { t009lexerTokens::EOF_TOKEN, 3, 3, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("085"); + testMalformedInput("2a"); + return 0; +} + +int testValid(string const& data) +{ + t009lexerTraits::InputStreamType* input = new t009lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t009"); + if (lxr == NULL) + lxr = new t009lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t009lexerTraits::InputStreamType* input = new t009lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t009"); + if (lxr == NULL) + lxr = new t009lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + //except antlr3.MismatchedSetException as exc: + // # TODO: This should provide more useful information + // self.assertIsNone(exc.expecting) + // self.assertEqual(exc.unexpectedType, 'a') + // self.assertEqual(exc.charPositionInLine, 1) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t009lexer.g b/runtime/Cpp/tests/t009lexer.g new file mode 100644 index 000000000..928f97b88 --- /dev/null +++ b/runtime/Cpp/tests/t009lexer.g @@ -0,0 +1,13 @@ +lexer grammar t009lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +DIGIT: '0' .. '9'; diff --git a/runtime/Cpp/tests/t010.cpp b/runtime/Cpp/tests/t010.cpp new file mode 100644 index 000000000..0b9fca7ea --- /dev/null +++ b/runtime/Cpp/tests/t010.cpp @@ -0,0 +1,111 @@ +#include "UserTestTraits.hpp" +#include "t010lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t010lexer *lxr; + +struct TokenData +{ + t010lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "foobar _Ab98 \n A12sdf" + { t010lexerTokens::IDENTIFIER, 0, 5, "foobar"}, + { t010lexerTokens::WS, 6, 6, " "}, + { t010lexerTokens::IDENTIFIER, 7, 11, "_Ab98"}, + { t010lexerTokens::WS, 12, 14, " \n "}, + { t010lexerTokens::IDENTIFIER, 15, 20, "A12sdf"}, + { t010lexerTokens::EOF_TOKEN, 21, 21, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("foobar _Ab98 \n A12sdf"); + testMalformedInput("a-b"); + return 0; +} + +int testValid(string const& data) +{ + t010lexerTraits::InputStreamType* input = new t010lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t010"); + if (lxr == NULL) + lxr = new t010lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t010lexerTraits::InputStreamType* input = new t010lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t010"); + if (lxr == NULL) + lxr = new t010lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + //except antlr3.NoViableAltException as exc: + // self.assertEqual(exc.unexpectedType, '-') + // self.assertEqual(exc.charPositionInLine, 1) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t010lexer.g b/runtime/Cpp/tests/t010lexer.g new file mode 100644 index 000000000..bb5a53e88 --- /dev/null +++ b/runtime/Cpp/tests/t010lexer.g @@ -0,0 +1,14 @@ +lexer grammar t010lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; +WS: (' ' | '\n')+; diff --git a/runtime/Cpp/tests/t011.cpp b/runtime/Cpp/tests/t011.cpp new file mode 100644 index 000000000..d8e40c748 --- /dev/null +++ b/runtime/Cpp/tests/t011.cpp @@ -0,0 +1,113 @@ +#include "UserTestTraits.hpp" +#include "t011lexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t011lexer *lxr; + +struct TokenData +{ + t011lexerTokens::Tokens type; + unsigned start; + unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + // "foobar _Ab98 \n A12sdf" + { t011lexerTokens::IDENTIFIER, 0, 5, "foobar"}, + { t011lexerTokens::WS, 6, 6, " "}, + { t011lexerTokens::IDENTIFIER, 7, 11, "_Ab98"}, + { t011lexerTokens::WS, 12, 14, " \n "}, + { t011lexerTokens::IDENTIFIER, 15, 20, "A12sdf"}, + { t011lexerTokens::EOF_TOKEN, 21, 21, ""} +}; + +int main (int argc, char *argv[]) +{ + testValid("foobar _Ab98 \n A12sdf"); + testMalformedInput("a-b"); + return 0; +} + +int testValid(string const& data) +{ + t011lexerTraits::InputStreamType* input = new t011lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t011"); + if (lxr == NULL) + lxr = new t011lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <nextToken(); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getType() == ExpectedTokens[i].type ? "OK" : "Fail") + << '\t' << (startIndex == ExpectedTokens[i].start ? "OK" : "Fail") + << '\t' << (stopIndex == ExpectedTokens[i].stop ? "OK" : "Fail") + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput(string const& data) +{ + t011lexerTraits::InputStreamType* input = new t011lexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t011"); + if (lxr == NULL) + lxr = new t011lexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + //except antlr3.NoViableAltException as exc: + // self.assertEqual(exc.unexpectedType, '-') + // self.assertEqual(exc.charPositionInLine, 1) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} diff --git a/runtime/Cpp/tests/t011lexer.g b/runtime/Cpp/tests/t011lexer.g new file mode 100644 index 000000000..06a87040f --- /dev/null +++ b/runtime/Cpp/tests/t011lexer.g @@ -0,0 +1,27 @@ +lexer grammar t011lexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +#include +} +@lexer::namespace +{ Antlr3Test } + +IDENTIFIER: + ('a'..'z'|'A'..'Z'|'_') + ('a'..'z' + |'A'..'Z' + |'0'..'9' + |'_' + { + std::cout << "Underscore"; + std::cout << "foo"; + } + )* + ; + +WS: (' ' | '\n')+; diff --git a/runtime/Cpp/tests/t012.cpp b/runtime/Cpp/tests/t012.cpp new file mode 100644 index 000000000..fe9d0c9ef --- /dev/null +++ b/runtime/Cpp/tests/t012.cpp @@ -0,0 +1,187 @@ +#include "UserTestTraits.hpp" +#include "t012lexerXMLLexer.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& in, string const& out); +int testMalformedInput1(string const& data); +int testMalformedInput2(string const& data); +int testMalformedInput3(string const& data); +string slurp(string const& fileName); + +static t012lexerXMLLexer *lxr; + +int main (int argc, char *argv[]) +{ + testValid("t012lexerXML.input", "t012lexerXML.output"); + testMalformedInput1("\n\n\n"); + testMalformedInput2("\n\n\n"); + testMalformedInput3("\n\n\n"); + + return 0; +} + +int testValid(string const& inFilename, string const& outFilename) +{ + string data = slurp(inFilename); + t012lexerXMLLexerTraits::InputStreamType* input = new t012lexerXMLLexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)inFilename.c_str()); + if (lxr == NULL) + lxr = new t012lexerXMLLexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << inFilename << '"' <nextToken(); + if( token->getType() == t012lexerXMLLexerTokens::EOF_TOKEN) + break; + } + + string expOutput = slurp(outFilename); + string lxrOutput = lxr->outbuf.str(); + + ofstream out("t012.lxr.output"); + out << lxrOutput; + + std::cout << inFilename << '\t' << (expOutput == lxrOutput ? "OK" : "Fail") << std::endl; + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput1(string const& data) +{ + t012lexerXMLLexerTraits::InputStreamType* input = new t012lexerXMLLexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t012"); + if (lxr == NULL) + lxr = new t012lexerXMLLexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput1: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + // try: + // while True: + // token = lexer.nextToken() + // # Should raise NoViableAltException before hitting EOF + // if token.type == antlr3.EOF: + // self.fail() + // + // except antlr3.NoViableAltException as exc: + // self.assertEqual(exc.unexpectedType, '>') + // self.assertEqual(exc.charPositionInLine, 11) + // self.assertEqual(exc.line, 2) + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput2(string const& data) +{ + t012lexerXMLLexerTraits::InputStreamType* input = new t012lexerXMLLexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t012"); + if (lxr == NULL) + lxr = new t012lexerXMLLexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput2: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + // try: + // while True: + // token = lexer.nextToken() + // # Should raise NoViableAltException before hitting EOF + // if token.type == antlr3.EOF: + // self.fail() + // + // except antlr3.MismatchedSetException as exc: + // self.assertEqual(exc.unexpectedType, 't') + // self.assertEqual(exc.charPositionInLine, 2) + // self.assertEqual(exc.line, 1) + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +int testMalformedInput3(string const& data) +{ + t012lexerXMLLexerTraits::InputStreamType* input = new t012lexerXMLLexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t012"); + if (lxr == NULL) + lxr = new t012lexerXMLLexer(input); + else + lxr->setCharStream(input); + + std::cout << "testMalformedInput3: \"" << data << '"' <nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + token = lxr->nextToken(); + std::cout << token->getText() << std::endl; + + // try: + // while True: + // token = lexer.nextToken() + // # Should raise NoViableAltException before hitting EOF + // if token.type == antlr3.EOF: + // self.fail() + // + // except antlr3.NoViableAltException as exc: + // self.assertEqual(exc.unexpectedType, 'a') + // self.assertEqual(exc.charPositionInLine, 11) + // self.assertEqual(exc.line, 2) + + delete lxr; lxr = NULL; + delete input; + return 0; +} + +string slurp(string const& fileName) +{ + ifstream ifs(fileName.c_str(), ios::in | ios::binary | ios::ate); + ifstream::pos_type fileSize = ifs.tellg(); + ifs.seekg(0, ios::beg); + + stringstream sstr; + sstr << ifs.rdbuf(); + return sstr.str(); +} diff --git a/runtime/Cpp/tests/t012lexerXML.input b/runtime/Cpp/tests/t012lexerXML.input new file mode 100644 index 000000000..1815a9f26 --- /dev/null +++ b/runtime/Cpp/tests/t012lexerXML.input @@ -0,0 +1,21 @@ + + + + + +]> + + +Text + +öäüß +& +< + + + + \ No newline at end of file diff --git a/runtime/Cpp/tests/t012lexerXML.output b/runtime/Cpp/tests/t012lexerXML.output new file mode 100644 index 000000000..825c37fc6 --- /dev/null +++ b/runtime/Cpp/tests/t012lexerXML.output @@ -0,0 +1,39 @@ +XML declaration +Attr: version='1.0' +ROOTELEMENT: component +INTERNAL DTD: [ + + + + +] +Start Tag: component +Attr: attr="val'ue" +Attr: attr2='val"ue' +PCDATA: " +" +Comment: "" +PCDATA: " +Text +" +CDATA: "" +PCDATA: " +öäüß +& +< +" +PI: xtal +Attr: cursor='11' +PCDATA: " +" +Empty Element: sub +PCDATA: " +" +Start Tag: sub +End Tag: sub +PCDATA: " +" +End Tag: component diff --git a/runtime/Cpp/tests/t012lexerXMLLexer.g b/runtime/Cpp/tests/t012lexerXMLLexer.g new file mode 100644 index 000000000..04e3e81b0 --- /dev/null +++ b/runtime/Cpp/tests/t012lexerXMLLexer.g @@ -0,0 +1,160 @@ +lexer grammar t012lexerXMLLexer; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +#include +} +@lexer::namespace +{ Antlr3Test } + +@lexer::init { +self.outbuf = StringIO() +} + +@lexer::context { +ImplTraits::StringStreamType outbuf; + +void output(const char* line) +{ + outbuf << line << "\r\n"; +} + +void output(const char* line1, const char *line2) +{ + outbuf << line1 << line2 << "\r\n"; +} + +void output(const char* line1, ImplTraits::StringType const& line2) +{ + outbuf << line1 << line2 << "\r\n"; +} + +void appendArribute(const char* prefix, ImplTraits::StringType const& name, ImplTraits::StringType const& value) +{ + outbuf << prefix << name << '=' << value << "\r\n"; +} + +void appendString(const char* name, ImplTraits::StringType const& value) +{ + outbuf << name << '"' << value << '"' << "\r\n"; +} + +} +DOCUMENT + : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? + ; + +fragment DOCTYPE + : + '' + ; + +fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; + +fragment PI : + '' + ; + +fragment XMLDECL : + '' + ; + + +fragment ELEMENT + : ( START_TAG + (ELEMENT + | t=PCDATA + {appendString("PCDATA: ", $t.text);} + | t=CDATA + {appendString("CDATA: ", $t.text);} + | t=COMMENT + {appendString("Comment: ", $t.text);} + | pi=PI + )* + END_TAG + | EMPTY_ELEMENT + ) + ; + +fragment START_TAG + : '<' WS? name=GENERIC_ID WS? + {output("Start Tag: ", $name.text);} + ( ATTRIBUTE WS? )* '>' + ; + +fragment EMPTY_ELEMENT + : '<' WS? name=GENERIC_ID WS? + {output("Empty Element: ", $name.text);} + ( ATTRIBUTE WS? )* '/>' + ; + +fragment ATTRIBUTE + : name=GENERIC_ID WS? '=' WS? value=VALUE + {appendArribute("Attr: ", $name.text, $value.text);} + ; + +fragment END_TAG + : '' + {output("End Tag: ", $name.text);} + ; + +fragment COMMENT + : '' + ; + +fragment CDATA + : '' + ; + +fragment PCDATA : (~'<')+ ; + +fragment VALUE : + ( '\"' (~'\"')* '\"' + | '\'' (~'\'')* '\'' + ) + ; + +fragment GENERIC_ID + : ( LETTER | '_' | ':') + ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* + ; + +fragment LETTER + : 'a'..'z' + | 'A'..'Z' + ; + +fragment WS : + ( ' ' + | '\t' + | ( '\n' + | '\r\n' + | '\r' + ) + )+ + ; + diff --git a/runtime/Cpp/tests/t013parser.g b/runtime/Cpp/tests/t013parser.g new file mode 100644 index 000000000..c9621324f --- /dev/null +++ b/runtime/Cpp/tests/t013parser.g @@ -0,0 +1,36 @@ +grammar t013parser; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@parser::init { +self.identifiers = [] +self.reportedErrors = [] +} + +@parser::members { +def foundIdentifier(self, name): + self.identifiers.append(name) + +def emitErrorMessage(self, msg): + self.reportedErrors.append(msg) +} + +document: + t=IDENTIFIER {self.foundIdentifier($t.text)} + ; + +IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; diff --git a/runtime/Cpp/tests/t014parser.g b/runtime/Cpp/tests/t014parser.g new file mode 100644 index 000000000..4fa5d1d5e --- /dev/null +++ b/runtime/Cpp/tests/t014parser.g @@ -0,0 +1,48 @@ +grammar t014parser; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@parser::init { +self.events = [] +self.reportedErrors = [] +} + +@parser::members { +def emitErrorMessage(self, msg): + self.reportedErrors.append(msg) +} + + +document: + ( declaration + | call + )* + EOF + ; + +declaration: + 'var' t=IDENTIFIER ';' + {self.events.append(('decl', $t.text))} + ; + +call: + t=IDENTIFIER '(' ')' ';' + {self.events.append(('call', $t.text))} + ; + +IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; +WS: (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}; diff --git a/runtime/Cpp/tests/t015calc.g b/runtime/Cpp/tests/t015calc.g new file mode 100644 index 000000000..5af908aa1 --- /dev/null +++ b/runtime/Cpp/tests/t015calc.g @@ -0,0 +1,67 @@ +grammar t015calc; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@header { +import math +} + +@parser::init { +self.reportedErrors = [] +} + +@parser::members { +def emitErrorMessage(self, msg): + self.reportedErrors.append(msg) +} + +evaluate returns [result]: r=expression {result = r}; + +expression returns [result]: r=mult ( + '+' r2=mult {r += r2} + | '-' r2=mult {r -= r2} + )* {result = r}; + +mult returns [result]: r=log ( + '*' r2=log {r *= r2} + | '/' r2=log {r /= r2} +// | '%' r2=log {r %= r2} + )* {result = r}; + +log returns [result]: 'ln' r=exp {result = math.log(r)} + | r=exp {result = r} + ; + +exp returns [result]: r=atom ('^' r2=atom {r = math.pow(r,r2)} )? {result = r} + ; + +atom returns [result]: + n=INTEGER {result = int($n.text)} + | n=DECIMAL {result = float($n.text)} + | '(' r=expression {result = r} ')' + | 'PI' {result = math.pi} + | 'E' {result = math.e} + ; + +INTEGER: DIGIT+; + +DECIMAL: DIGIT+ '.' DIGIT+; + +fragment +DIGIT: '0'..'9'; + +WS: (' ' | '\n' | '\t')+ {$channel = HIDDEN}; diff --git a/runtime/Cpp/tests/t016actions.g b/runtime/Cpp/tests/t016actions.g new file mode 100644 index 000000000..ca2189a2a --- /dev/null +++ b/runtime/Cpp/tests/t016actions.g @@ -0,0 +1,44 @@ +grammar t016actions; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +declaration returns [name] + : functionHeader ';' + {$name = $functionHeader.name} + ; + +functionHeader returns [name] + : type ID + {$name = $ID.text} + ; + +type + : 'int' + | 'char' + | 'void' + ; + +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + ; + +WS : ( ' ' + | '\t' + | '\r' + | '\n' + )+ + {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t017parser.g b/runtime/Cpp/tests/t017parser.g new file mode 100644 index 000000000..cd251b925 --- /dev/null +++ b/runtime/Cpp/tests/t017parser.g @@ -0,0 +1,104 @@ +grammar t017parser; + +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +program + : declaration+ + ; + +declaration + : variable + | functionHeader ';' + | functionHeader block + ; + +variable + : type declarator ';' + ; + +declarator + : ID + ; + +functionHeader + : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')' + ; + +formalParameter + : type declarator + ; + +type + : 'int' + | 'char' + | 'void' + | ID + ; + +block + : '{' + variable* + stat* + '}' + ; + +stat: forStat + | expr ';' + | block + | assignStat ';' + | ';' + ; + +forStat + : 'for' '(' assignStat ';' expr ';' assignStat ')' block + ; + +assignStat + : ID '=' expr + ; + +expr: condExpr + ; + +condExpr + : aexpr ( ('==' | '<') aexpr )? + ; + +aexpr + : atom ( '+' atom )* + ; + +atom + : ID + | INT + | '(' expr ')' + ; + +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + ; + +INT : ('0'..'9')+ + ; + +WS : ( ' ' + | '\t' + | '\r' + | '\n' + )+ + {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t018llstar.g b/runtime/Cpp/tests/t018llstar.g new file mode 100644 index 000000000..e682d2c94 --- /dev/null +++ b/runtime/Cpp/tests/t018llstar.g @@ -0,0 +1,124 @@ +grammar t018llstar; + +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@header { +from io import StringIO +} + +@init { +self.output = StringIO() +} + +program + : declaration+ + ; + +/** In this rule, the functionHeader left prefix on the last two + * alternatives is not LL(k) for a fixed k. However, it is + * LL(*). The LL(*) algorithm simply scans ahead until it sees + * either the ';' or the '{' of the block and then it picks + * the appropriate alternative. Lookhead can be arbitrarily + * long in theory, but is <=10 in most cases. Works great. + * Use ANTLRWorks to see the lookahead use (step by Location) + * and look for blue tokens in the input window pane. :) + */ +declaration + : variable + | functionHeader ';' + {self.output.write($functionHeader.name+" is a declaration\n")} + | functionHeader block + {self.output.write($functionHeader.name+" is a definition\n")} + ; + +variable + : type declarator ';' + ; + +declarator + : ID + ; + +functionHeader returns [name] + : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')' + {$name = $ID.text} + ; + +formalParameter + : type declarator + ; + +type + : 'int' + | 'char' + | 'void' + | ID + ; + +block + : '{' + variable* + stat* + '}' + ; + +stat: forStat + | expr ';' + | block + | assignStat ';' + | ';' + ; + +forStat + : 'for' '(' assignStat ';' expr ';' assignStat ')' block + ; + +assignStat + : ID '=' expr + ; + +expr: condExpr + ; + +condExpr + : aexpr ( ('==' | '<') aexpr )? + ; + +aexpr + : atom ( '+' atom )* + ; + +atom + : ID + | INT + | '(' expr ')' + ; + +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + ; + +INT : ('0'..'9')+ + ; + +WS : ( ' ' + | '\t' + | '\r' + | '\n' + )+ + {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t019lexer.g b/runtime/Cpp/tests/t019lexer.g new file mode 100644 index 000000000..192f99341 --- /dev/null +++ b/runtime/Cpp/tests/t019lexer.g @@ -0,0 +1,71 @@ +lexer grammar t019lexer; +options { + language=Cpp; + filter=true; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +IMPORT + : 'import' WS name=QIDStar WS? ';' + ; + +/** Avoids having "return foo;" match as a field */ +RETURN + : 'return' (options {greedy=false;}:.)* ';' + ; + +CLASS + : 'class' WS name=ID WS? ('extends' WS QID WS?)? + ('implements' WS QID WS? (',' WS? QID WS?)*)? '{' + ; + +COMMENT + : '/*' (options {greedy=false;} : . )* '*/' + ; + +STRING + : '"' (options {greedy=false;}: ESC | .)* '"' + ; + +CHAR + : '\'' (options {greedy=false;}: ESC | .)* '\'' + ; + +WS : (' '|'\t'|'\n')+ + ; + +fragment +QID : ID ('.' ID)* + ; + +/** QID cannot see beyond end of token so using QID '.*'? somewhere won't + * ever match since k=1 lookahead in the QID loop of '.' will make it loop. + * I made this rule to compensate. + */ +fragment +QIDStar + : ID ('.' ID)* '.*'? + ; + +fragment +TYPE: QID '[]'? + ; + +fragment +ARG : TYPE WS ID + ; + +fragment +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* + ; + +fragment +ESC : '\\' ('"'|'\''|'\\') + ; + diff --git a/runtime/Cpp/tests/t020fuzzyLexer.g b/runtime/Cpp/tests/t020fuzzyLexer.g new file mode 100644 index 000000000..0fc913f0e --- /dev/null +++ b/runtime/Cpp/tests/t020fuzzyLexer.g @@ -0,0 +1,103 @@ +lexer grammar t020fuzzyLexer; +options { + language=Cpp; + filter=true; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@header { +from io import StringIO +} + +@init { +self.output = StringIO() +} + +IMPORT + : 'import' WS name=QIDStar WS? ';' + ; + +/** Avoids having "return foo;" match as a field */ +RETURN + : 'return' (options {greedy=false;}:.)* ';' + ; + +CLASS + : 'class' WS name=ID WS? ('extends' WS QID WS?)? + ('implements' WS QID WS? (',' WS? QID WS?)*)? '{' + {self.output.write("found class "+$name.text+"\n")} + ; + +METHOD + : TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? + ('throws' WS QID WS? (',' WS? QID WS?)*)? '{' + {self.output.write("found method "+$name.text+"\n");} + ; + +FIELD + : TYPE WS name=ID '[]'? WS? (';'|'=') + {self.output.write("found var "+$name.text+"\n");} + ; + +STAT: ('if'|'while'|'switch'|'for') WS? '(' ; + +CALL + : name=QID WS? '(' + {self.output.write("found call "+$name.text+"\n");} + ; + +COMMENT + : '/*' (options {greedy=false;} : . )* '*/' + {self.output.write("found comment "+self.getText()+"\n");} + ; + +SL_COMMENT + : '//' (options {greedy=false;} : . )* '\n' + {self.output.write("found // comment "+self.getText()+"\n");} + ; + +STRING + : '"' (options {greedy=false;}: ESC | .)* '"' + ; + +CHAR + : '\'' (options {greedy=false;}: ESC | .)* '\'' + ; + +WS : (' '|'\t'|'\n')+ + ; + +fragment +QID : ID ('.' ID)* + ; + +/** QID cannot see beyond end of token so using QID '.*'? somewhere won't + * ever match since k=1 lookahead in the QID loop of '.' will make it loop. + * I made this rule to compensate. + */ +fragment +QIDStar + : ID ('.' ID)* '.*'? + ; + +fragment +TYPE: QID '[]'? + ; + +fragment +ARG : TYPE WS ID + ; + +fragment +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* + ; + +fragment +ESC : '\\' ('"'|'\''|'\\') + ; diff --git a/runtime/Cpp/tests/t021hoist.g b/runtime/Cpp/tests/t021hoist.g new file mode 100644 index 000000000..51201c1c4 --- /dev/null +++ b/runtime/Cpp/tests/t021hoist.g @@ -0,0 +1,50 @@ +grammar t021hoist; +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +/* With this true, enum is seen as a keyword. False, it's an identifier */ +@parser::init { +self.enableEnum = False +} + +stat returns [enumIs] + : identifier {enumIs = "ID"} + | enumAsKeyword {enumIs = "keyword"} + ; + +identifier + : ID + | enumAsID + ; + +enumAsKeyword : {self.enableEnum}? 'enum' ; + +enumAsID : {not self.enableEnum}? 'enum' ; + +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + ; + +INT : ('0'..'9')+ + ; + +WS : ( ' ' + | '\t' + | '\r' + | '\n' + )+ + {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t022scopes.g b/runtime/Cpp/tests/t022scopes.g new file mode 100644 index 000000000..b7870c0f3 --- /dev/null +++ b/runtime/Cpp/tests/t022scopes.g @@ -0,0 +1,138 @@ +grammar t022scopes; + +options { + language=Cpp; +} + +/* global scopes */ +scope aScope { +names +} + +@lexer::includes{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a +scope aScope; + : {$aScope::names = [];} ID* + ; + + +/* rule scopes, from the book, final beta, p.147 */ + +b[v] +scope {x} + : {$b::x = v;} b2 + ; + +b2 + : b3 + ; + +b3 + : {$b::x}?=> ID // only visible, if b was called with True + | NUM + ; + + +/* rule scopes, from the book, final beta, p.148 */ + +c returns [res] +scope { + symbols +} +@init { + $c::symbols = set(); +} + : '{' c1* c2+ '}' + { $res = $c::symbols; } + ; + +c1 + : 'int' ID {$c::symbols.add($ID.text)} ';' + ; + +c2 + : ID '=' NUM ';' + { + if $ID.text not in $c::symbols: + raise RuntimeError($ID.text) + } + ; + +/* recursive rule scopes, from the book, final beta, p.150 */ + +d returns [res] +scope { + symbols +} +@init { + $d::symbols = set(); +} + : '{' d1* d2* '}' + { $res = $d::symbols; } + ; + +d1 + : 'int' ID {$d::symbols.add($ID.text)} ';' + ; + +d2 + : ID '=' NUM ';' + { + for s in reversed(range(len($d))): + if $ID.text in $d[s]::symbols: + break + else: + raise RuntimeError($ID.text) + } + | d + ; + +/* recursive rule scopes, access bottom-most scope */ + +e returns [res] +scope { + a +} +@after { + $res = $e::a; +} + : NUM { $e[0]::a = int($NUM.text); } + | '{' e '}' + ; + + +/* recursive rule scopes, access with negative index */ + +f returns [res] +scope { + a +} +@after { + $res = $f::a; +} + : NUM { $f[-2]::a = int($NUM.text); } + | '{' f '}' + ; + + +/* tokens */ + +ID : ('a'..'z')+ + ; + +NUM : ('0'..'9')+ + ; + +WS : (' '|'\n'|'\r')+ {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t023scopes.g b/runtime/Cpp/tests/t023scopes.g new file mode 100644 index 000000000..f5b89fb59 --- /dev/null +++ b/runtime/Cpp/tests/t023scopes.g @@ -0,0 +1,31 @@ +grammar t023scopes; + +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +prog +scope { +name +} + : ID {$prog::name=$ID.text;} + ; + +ID : ('a'..'z')+ + ; + +WS : (' '|'\n'|'\r')+ {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t024finally.g b/runtime/Cpp/tests/t024finally.g new file mode 100644 index 000000000..2cbda0a56 --- /dev/null +++ b/runtime/Cpp/tests/t024finally.g @@ -0,0 +1,32 @@ +grammar t024finally; + +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +prog returns [events] +@init {events = []} +@after {events.append('after')} + : ID {raise RuntimeError} + ; + catch [RuntimeError] {events.append('catch')} + finally {events.append('finally')} + +ID : ('a'..'z')+ + ; + +WS : (' '|'\n'|'\r')+ {$channel=HIDDEN} + ; diff --git a/runtime/Cpp/tests/t025lexerRulePropertyRef.g b/runtime/Cpp/tests/t025lexerRulePropertyRef.g new file mode 100644 index 000000000..3271dd4e1 --- /dev/null +++ b/runtime/Cpp/tests/t025lexerRulePropertyRef.g @@ -0,0 +1,25 @@ +lexer grammar t025lexerRulePropertyRef; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@lexer::init { +self.properties = [] +} + +IDENTIFIER: + ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + { +self.properties.append( + ($text, $type, $line, $pos, $index, $channel, $start, $stop) +) + } + ; +WS: (' ' | '\n')+; diff --git a/runtime/Cpp/tests/t026actions.g b/runtime/Cpp/tests/t026actions.g new file mode 100644 index 000000000..d699c0674 --- /dev/null +++ b/runtime/Cpp/tests/t026actions.g @@ -0,0 +1,52 @@ +grammar t026actions; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@lexer::init { + self.foobar = 'attribute;' +} + +prog +@init { + self.capture('init;') +} +@after { + self.capture('after;') +} + : IDENTIFIER EOF + ; + catch [ RecognitionException as exc ] { + self.capture('catch;') + raise + } + finally { + self.capture('finally;') + } + + +IDENTIFIER + : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + { + # a comment + self.capture('action;') + self.capture('{!r} {!r} {!r} {!r} {!r} {!r} {!r} {!r};'.format($text, $type, $line, $pos, $index, $channel, $start, $stop)) + if True: + self.capture(self.foobar) + } + ; + +WS: (' ' | '\n')+; diff --git a/runtime/Cpp/tests/t027eof.g b/runtime/Cpp/tests/t027eof.g new file mode 100644 index 000000000..6cf6d6abe --- /dev/null +++ b/runtime/Cpp/tests/t027eof.g @@ -0,0 +1,15 @@ +lexer grammar t027eof; + +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +END: EOF; +SPACE: ' '; diff --git a/runtime/Cpp/tests/t029synpredgate.g b/runtime/Cpp/tests/t029synpredgate.g new file mode 100644 index 000000000..5a1a4ab66 --- /dev/null +++ b/runtime/Cpp/tests/t029synpredgate.g @@ -0,0 +1,23 @@ +lexer grammar t029synpredgate; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +FOO + : ('ab')=> A + | ('ac')=> B + ; + +fragment +A: 'a'; + +fragment +B: 'a'; + diff --git a/runtime/Cpp/tests/t030specialStates.g b/runtime/Cpp/tests/t030specialStates.g new file mode 100644 index 000000000..62d2dbf42 --- /dev/null +++ b/runtime/Cpp/tests/t030specialStates.g @@ -0,0 +1,39 @@ +grammar t030specialStates; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@init { +self.cond = True +} + +@members { +def recover(self, input, re): + # no error recovery yet, just crash! + raise re +} + +r + : ( {self.cond}? NAME + | {not self.cond}? NAME WS+ NAME + ) + ( WS+ NAME )? + EOF + ; + +NAME: ('a'..'z') ('a'..'z' | '0'..'9')+; +NUMBER: ('0'..'9')+; +WS: ' '+; diff --git a/runtime/Cpp/tests/t031emptyAlt.g b/runtime/Cpp/tests/t031emptyAlt.g new file mode 100644 index 000000000..23b32f3bc --- /dev/null +++ b/runtime/Cpp/tests/t031emptyAlt.g @@ -0,0 +1,29 @@ +grammar t031emptyAlt; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +r + : NAME + ( {self.cond}?=> WS+ NAME + | + ) + EOF + ; + +NAME: ('a'..'z') ('a'..'z' | '0'..'9')+; +NUMBER: ('0'..'9')+; +WS: ' '+; diff --git a/runtime/Cpp/tests/t032subrulePredict.g b/runtime/Cpp/tests/t032subrulePredict.g new file mode 100644 index 000000000..4b760e198 --- /dev/null +++ b/runtime/Cpp/tests/t032subrulePredict.g @@ -0,0 +1,21 @@ +grammar t032subrulePredict; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a: 'BEGIN' b WS+ 'END'; +b: ( WS+ 'A' )+; +WS: ' '; diff --git a/runtime/Cpp/tests/t033backtracking.g b/runtime/Cpp/tests/t033backtracking.g new file mode 100644 index 000000000..a5b233bdb --- /dev/null +++ b/runtime/Cpp/tests/t033backtracking.g @@ -0,0 +1,528 @@ +grammar t033backtracking; +options { + language=Cpp; + backtrack=true; + memoize=true; + k=2; +} + +scope Symbols { + types; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@members { + def isTypeName(self, name): + for scope in reversed(self.Symbols_stack): + if name in scope.types: + return True + + return False + +} + +translation_unit +scope Symbols; // entire file is a scope +@init { + $Symbols::types = set() +} + : external_declaration+ + ; + +/** Either a function definition or any other kind of C decl/def. + * The LL(*) analysis algorithm fails to deal with this due to + * recursion in the declarator rules. I'm putting in a + * manual predicate here so that we don't backtrack over + * the entire function. Further, you get a better error + * as errors within the function itself don't make it fail + * to predict that it's a function. Weird errors previously. + * Remember: the goal is to avoid backtrack like the plague + * because it makes debugging, actions, and errors harder. + * + * Note that k=1 results in a much smaller predictor for the + * fixed lookahead; k=2 made a few extra thousand lines. ;) + * I'll have to optimize that in the future. + */ +external_declaration +options {k=1;} + : ( declaration_specifiers? declarator declaration* '{' )=> function_definition + | declaration + ; + +function_definition +scope Symbols; // put parameters and locals into same scope for now +@init { + $Symbols::types = set() +} + : declaration_specifiers? declarator +// ( declaration+ compound_statement // K&R style +// | compound_statement // ANSI style +// ) + ; + +declaration +scope { + isTypedef; +} +@init { + $declaration::isTypedef = False +} + : 'typedef' declaration_specifiers? {$declaration::isTypedef = True} + init_declarator_list ';' // special case, looking for typedef + | declaration_specifiers init_declarator_list? ';' + ; + +declaration_specifiers + : ( storage_class_specifier + | type_specifier + | type_qualifier + )+ + ; + +init_declarator_list + : init_declarator (',' init_declarator)* + ; + +init_declarator + : declarator //('=' initializer)? + ; + +storage_class_specifier + : 'extern' + | 'static' + | 'auto' + | 'register' + ; + +type_specifier + : 'void' + | 'char' + | 'short' + | 'int' + | 'long' + | 'float' + | 'double' + | 'signed' + | 'unsigned' +// | struct_or_union_specifier +// | enum_specifier + | type_id + ; + +type_id + : {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER +// {System.out.println($IDENTIFIER.text+" is a type");} + ; + +// struct_or_union_specifier +// options {k=3;} +// scope Symbols; // structs are scopes +// @init { +// $Symbols::types = set() +// } +// : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' +// | struct_or_union IDENTIFIER +// ; + +// struct_or_union +// : 'struct' +// | 'union' +// ; + +// struct_declaration_list +// : struct_declaration+ +// ; + +// struct_declaration +// : specifier_qualifier_list struct_declarator_list ';' +// ; + +// specifier_qualifier_list +// : ( type_qualifier | type_specifier )+ +// ; + +// struct_declarator_list +// : struct_declarator (',' struct_declarator)* +// ; + +// struct_declarator +// : declarator (':' constant_expression)? +// | ':' constant_expression +// ; + +// enum_specifier +// options {k=3;} +// : 'enum' '{' enumerator_list '}' +// | 'enum' IDENTIFIER '{' enumerator_list '}' +// | 'enum' IDENTIFIER +// ; + +// enumerator_list +// : enumerator (',' enumerator)* +// ; + +// enumerator +// : IDENTIFIER ('=' constant_expression)? +// ; + +type_qualifier + : 'const' + | 'volatile' + ; + +declarator + : pointer? direct_declarator + | pointer + ; + +direct_declarator + : ( IDENTIFIER + { + if $declaration and $declaration::isTypedef: + $Symbols::types.add($IDENTIFIER.text) + print("define type "+$IDENTIFIER.text) + } + | '(' declarator ')' + ) + declarator_suffix* + ; + +declarator_suffix + : /*'[' constant_expression ']' + |*/ '[' ']' +// | '(' parameter_type_list ')' +// | '(' identifier_list ')' + | '(' ')' + ; + +pointer + : '*' type_qualifier+ pointer? + | '*' pointer + | '*' + ; + +// parameter_type_list +// : parameter_list (',' '...')? +// ; + +// parameter_list +// : parameter_declaration (',' parameter_declaration)* +// ; + +// parameter_declaration +// : declaration_specifiers (declarator|abstract_declarator)* +// ; + +// identifier_list +// : IDENTIFIER (',' IDENTIFIER)* +// ; + +// type_name +// : specifier_qualifier_list abstract_declarator? +// ; + +// abstract_declarator +// : pointer direct_abstract_declarator? +// | direct_abstract_declarator +// ; + +// direct_abstract_declarator +// : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* +// ; + +// abstract_declarator_suffix +// : '[' ']' +// | '[' constant_expression ']' +// | '(' ')' +// | '(' parameter_type_list ')' +// ; + +// initializer +// : assignment_expression +// | '{' initializer_list ','? '}' +// ; + +// initializer_list +// : initializer (',' initializer)* +// ; + +// // E x p r e s s i o n s + +// argument_expression_list +// : assignment_expression (',' assignment_expression)* +// ; + +// additive_expression +// : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)* +// ; + +// multiplicative_expression +// : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)* +// ; + +// cast_expression +// : '(' type_name ')' cast_expression +// | unary_expression +// ; + +// unary_expression +// : postfix_expression +// | '++' unary_expression +// | '--' unary_expression +// | unary_operator cast_expression +// | 'sizeof' unary_expression +// | 'sizeof' '(' type_name ')' +// ; + +// postfix_expression +// : primary_expression +// ( '[' expression ']' +// | '(' ')' +// | '(' argument_expression_list ')' +// | '.' IDENTIFIER +// | '*' IDENTIFIER +// | '->' IDENTIFIER +// | '++' +// | '--' +// )* +// ; + +// unary_operator +// : '&' +// | '*' +// | '+' +// | '-' +// | '~' +// | '!' +// ; + +// primary_expression +// : IDENTIFIER +// | constant +// | '(' expression ')' +// ; + +// constant +// : HEX_LITERAL +// | OCTAL_LITERAL +// | DECIMAL_LITERAL +// | CHARACTER_LITERAL +// | STRING_LITERAL +// | FLOATING_POINT_LITERAL +// ; + +// ///// + +// expression +// : assignment_expression (',' assignment_expression)* +// ; + +// constant_expression +// : conditional_expression +// ; + +// assignment_expression +// : lvalue assignment_operator assignment_expression +// | conditional_expression +// ; + +// lvalue +// : unary_expression +// ; + +// assignment_operator +// : '=' +// | '*=' +// | '/=' +// | '%=' +// | '+=' +// | '-=' +// | '<<=' +// | '>>=' +// | '&=' +// | '^=' +// | '|=' +// ; + +// conditional_expression +// : logical_or_expression ('?' expression ':' conditional_expression)? +// ; + +// logical_or_expression +// : logical_and_expression ('||' logical_and_expression)* +// ; + +// logical_and_expression +// : inclusive_or_expression ('&&' inclusive_or_expression)* +// ; + +// inclusive_or_expression +// : exclusive_or_expression ('|' exclusive_or_expression)* +// ; + +// exclusive_or_expression +// : and_expression ('^' and_expression)* +// ; + +// and_expression +// : equality_expression ('&' equality_expression)* +// ; +// equality_expression +// : relational_expression (('=='|'!=') relational_expression)* +// ; + +// relational_expression +// : shift_expression (('<'|'>'|'<='|'>=') shift_expression)* +// ; + +// shift_expression +// : additive_expression (('<<'|'>>') additive_expression)* +// ; + +// // S t a t e m e n t s + +// statement +// : labeled_statement +// | compound_statement +// | expression_statement +// | selection_statement +// | iteration_statement +// | jump_statement +// ; + +// labeled_statement +// : IDENTIFIER ':' statement +// | 'case' constant_expression ':' statement +// | 'default' ':' statement +// ; + +// compound_statement +// scope Symbols; // blocks have a scope of symbols +// @init { +// $Symbols::types = {} +// } +// : '{' declaration* statement_list? '}' +// ; + +// statement_list +// : statement+ +// ; + +// expression_statement +// : ';' +// | expression ';' +// ; + +// selection_statement +// : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)? +// | 'switch' '(' expression ')' statement +// ; + +// iteration_statement +// : 'while' '(' expression ')' statement +// | 'do' statement 'while' '(' expression ')' ';' +// | 'for' '(' expression_statement expression_statement expression? ')' statement +// ; + +// jump_statement +// : 'goto' IDENTIFIER ';' +// | 'continue' ';' +// | 'break' ';' +// | 'return' ';' +// | 'return' expression ';' +// ; + +IDENTIFIER + : LETTER (LETTER|'0'..'9')* + ; + +fragment +LETTER + : '$' + | 'A'..'Z' + | 'a'..'z' + | '_' + ; + +CHARACTER_LITERAL + : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' + ; + +STRING_LITERAL + : '"' ( EscapeSequence | ~('\\'|'"') )* '"' + ; + +HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; + +DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; + +OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; + +fragment +HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; + +fragment +IntegerTypeSuffix + : ('u'|'U')? ('l'|'L') + | ('u'|'U') ('l'|'L')? + ; + +FLOATING_POINT_LITERAL + : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? + | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? + | ('0'..'9')+ Exponent FloatTypeSuffix? + | ('0'..'9')+ Exponent? FloatTypeSuffix + ; + +fragment +Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; + +fragment +FloatTypeSuffix : ('f'|'F'|'d'|'D') ; + +fragment +EscapeSequence + : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') + | OctalEscape + ; + +fragment +OctalEscape + : '\\' ('0'..'3') ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') ('0'..'7') + | '\\' ('0'..'7') + ; + +fragment +UnicodeEscape + : '\\' 'u' HexDigit HexDigit HexDigit HexDigit + ; + +WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} + ; + +COMMENT + : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} + ; + +LINE_COMMENT + : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} + ; + +// ignore #line info for now +LINE_COMMAND + : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} + ; + diff --git a/runtime/Cpp/tests/t034tokenLabelPropertyRef.g b/runtime/Cpp/tests/t034tokenLabelPropertyRef.g new file mode 100644 index 000000000..6a03bf22f --- /dev/null +++ b/runtime/Cpp/tests/t034tokenLabelPropertyRef.g @@ -0,0 +1,43 @@ +grammar t034tokenLabelPropertyRef; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a: t=A + { + print($t.text) + print($t.type) + print($t.line) + print($t.pos) + print($t.channel) + print($t.index) + #print($t.tree) + } + ; + +A: 'a'..'z'; + +WS : + ( ' ' + | '\t' + | ( '\n' + | '\r\n' + | '\r' + ) + )+ + { $channel = HIDDEN } + ; + diff --git a/runtime/Cpp/tests/t035ruleLabelPropertyRef.g b/runtime/Cpp/tests/t035ruleLabelPropertyRef.g new file mode 100644 index 000000000..c3b9eb945 --- /dev/null +++ b/runtime/Cpp/tests/t035ruleLabelPropertyRef.g @@ -0,0 +1,29 @@ +grammar t035ruleLabelPropertyRef; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a returns [bla]: t=b + { + $bla = $t.start, $t.stop, $t.text + } + ; + +b: A+; + +A: 'a'..'z'; + +WS: ' '+ { $channel = HIDDEN }; diff --git a/runtime/Cpp/tests/t036multipleReturnValues.g b/runtime/Cpp/tests/t036multipleReturnValues.g new file mode 100644 index 000000000..8c4748cbe --- /dev/null +++ b/runtime/Cpp/tests/t036multipleReturnValues.g @@ -0,0 +1,38 @@ +grammar t036multipleReturnValues; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a returns [foo, bar]: A + { + $foo = "foo"; + $bar = "bar"; + } + ; + +A: 'a'..'z'; + +WS : + ( ' ' + | '\t' + | ( '\n' + | '\r\n' + | '\r' + ) + )+ + { $channel = HIDDEN } + ; + diff --git a/runtime/Cpp/tests/t037rulePropertyRef.g b/runtime/Cpp/tests/t037rulePropertyRef.g new file mode 100644 index 000000000..b510092ae --- /dev/null +++ b/runtime/Cpp/tests/t037rulePropertyRef.g @@ -0,0 +1,28 @@ +grammar t037rulePropertyRef; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a returns [bla] +@after { + $bla = $start, $stop, $text +} + : A+ + ; + +A: 'a'..'z'; + +WS: ' '+ { $channel = HIDDEN }; diff --git a/runtime/Cpp/tests/t038lexerRuleLabel.g b/runtime/Cpp/tests/t038lexerRuleLabel.g new file mode 100644 index 000000000..28dbedc9a --- /dev/null +++ b/runtime/Cpp/tests/t038lexerRuleLabel.g @@ -0,0 +1,35 @@ +lexer grammar t038lexerRuleLabel; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +A: 'a'..'z' WS '0'..'9' + { + print($WS) + print($WS.type) + print($WS.line) + print($WS.pos) + print($WS.channel) + print($WS.index) + print($WS.text) + } + ; + +fragment WS : + ( ' ' + | '\t' + | ( '\n' + | '\r\n' + | '\r' + ) + )+ + { $channel = HIDDEN } + ; + diff --git a/runtime/Cpp/tests/t039.cpp b/runtime/Cpp/tests/t039.cpp new file mode 100644 index 000000000..f87ea87d6 --- /dev/null +++ b/runtime/Cpp/tests/t039.cpp @@ -0,0 +1,122 @@ +#include "UserTestTraits.hpp" +#include "t039labelsLexer.hpp" +#include "t039labelsParser.hpp" + +#include + +#include +#include +#include + +using namespace Antlr3Test; +using namespace std; + +int testValid(string const& data); +int testMalformedInput(string const& data); + +static t039labelsLexer *lxr; + + +struct TokenData +{ + //t039labelsLexerTokens::Tokens type; + //unsigned start; + //unsigned stop; + const char* text; +}; + +static TokenData ExpectedTokens[] = +{ + /* + lexer = self.getLexer(cStream) + tStream = antlr3.CommonTokenStream(lexer) + parser = self.getParser(tStream) + ids, w = parser.a() + + self.assertEqual(len(ids), 6, ids) + self.assertEqual(ids[0].text, 'a', ids[0]) + self.assertEqual(ids[1].text, 'b', ids[1]) + self.assertEqual(ids[2].text, 'c', ids[2]) + self.assertEqual(ids[3].text, '1', ids[3]) + self.assertEqual(ids[4].text, '2', ids[4]) + self.assertEqual(ids[5].text, 'A', ids[5]) + + self.assertEqual(w.text, 'GNU1', w) + */ + // "a, b, c, 1, 2 A FOOBAR GNU1 A BLARZ" + { "a"}, + { "b"}, + { "c"}, + { "1"}, + { "2"}, + { "A"}, +}; + + +int main (int argc, char *argv[]) +{ + testValid("a, b, c, 1, 2 A FOOBAR GNU1 A BLARZ"); + return 0; +} + +int testValid(string const& data) +{ + t039labelsLexerTraits::InputStreamType* input = new t039labelsLexerTraits::InputStreamType((const ANTLR_UINT8 *)data.c_str(), + ANTLR_ENC_8BIT, + data.length(), //strlen(data.c_str()), + (ANTLR_UINT8*)"t039"); + if (lxr == NULL) + lxr = new t039labelsLexer(input); + else + lxr->setCharStream(input); + + std::cout << "testValid: \"" << data << '"' <get_tokSource()); + t039labelsParser *psr = new t039labelsParser(tstream); + t039labelsParser::TokenList r = psr->a(); + + for(unsigned i = 0; i < r.tokens.size() ; i++) + { + t039labelsLexerTraits::CommonTokenType *token = r.tokens.at(i); + + size_t startIndex = ((const char*)token->get_startIndex()) - data.c_str(); + size_t stopIndex = ((const char*)token->get_stopIndex()) - data.c_str(); + + std::cout << token->getText() + << '\t' << (token->getText() == ExpectedTokens[i].text ? "OK" : "Fail") + << std::endl; + + } + delete lxr; lxr = NULL; + delete input; + return 0; +} + +/* + def testValid1(self): + cStream = antlr3.StringStream( + 'a, b, c, 1, 2 A FOOBAR GNU1 A BLARZ' + ) + + lexer = self.getLexer(cStream) + tStream = antlr3.CommonTokenStream(lexer) + parser = self.getParser(tStream) + ids, w = parser.a() + + self.assertEqual(len(ids), 6, ids) + self.assertEqual(ids[0].text, 'a', ids[0]) + self.assertEqual(ids[1].text, 'b', ids[1]) + self.assertEqual(ids[2].text, 'c', ids[2]) + self.assertEqual(ids[3].text, '1', ids[3]) + self.assertEqual(ids[4].text, '2', ids[4]) + self.assertEqual(ids[5].text, 'A', ids[5]) + + self.assertEqual(w.text, 'GNU1', w) + + +if __name__ == '__main__': + unittest.main() + + +*/ diff --git a/runtime/Cpp/tests/t039labels.g b/runtime/Cpp/tests/t039labels.g new file mode 100644 index 000000000..dc243668b --- /dev/null +++ b/runtime/Cpp/tests/t039labels.g @@ -0,0 +1,43 @@ +grammar t039labels; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +#include +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +#include "t039labelsLexer.hpp" +} +@parser::namespace +{ Antlr3Test } +@parser::members { + class TokenList { + public: + TokenList() : token() {} + TokenList(TokenList const& other) : tokens(other.tokens), token(other.token) {} + TokenList(ImplTraits::TokenPtrsListType const& lst, ImplTraits::CommonTokenType *t) : tokens(lst), token(t) {} + //private: + ImplTraits::TokenPtrsListType tokens; + ImplTraits::CommonTokenType* token; + }; +} +a returns [t039labelsParser::TokenList retval] + : ids+=A ( ',' ids+=(A|B) )* C D w=. ids+=. F EOF + { retval = t039labelsParser::TokenList($ids, $w); } + ; + +A: 'a'..'z'; +B: '0'..'9'; +C: a='A' { std::cout << $a << std::endl; }; +D: a='FOOBAR' { std::cout << $a << std::endl; }; +E: 'GNU' a=. { std::cout << $a << std::endl; }; +F: 'BLARZ' a=EOF { std::cout << $a << std::endl; }; + +WS: ' '+ { $channel = HIDDEN; }; diff --git a/runtime/Cpp/tests/t040bug80.g b/runtime/Cpp/tests/t040bug80.g new file mode 100644 index 000000000..3aa55d056 --- /dev/null +++ b/runtime/Cpp/tests/t040bug80.g @@ -0,0 +1,20 @@ +lexer grammar t040bug80; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +ID_LIKE + : 'defined' + | {False}? Identifier + | Identifier + ; + +fragment +Identifier: 'a'..'z'+ ; // with just 'a', output compiles diff --git a/runtime/Cpp/tests/t041parameters.g b/runtime/Cpp/tests/t041parameters.g new file mode 100644 index 000000000..47b64b792 --- /dev/null +++ b/runtime/Cpp/tests/t041parameters.g @@ -0,0 +1,29 @@ +grammar t041parameters; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a[arg1, arg2] returns [l] + : A+ EOF + { + l = ($arg1, $arg2) + $arg1 = "gnarz" + } + ; + +A: 'a'..'z'; + +WS: ' '+ { $channel = HIDDEN }; diff --git a/runtime/Cpp/tests/t042ast.g b/runtime/Cpp/tests/t042ast.g new file mode 100644 index 000000000..e2d4fc178 --- /dev/null +++ b/runtime/Cpp/tests/t042ast.g @@ -0,0 +1,366 @@ +grammar t042ast; +options { + language =Cpp; + output = AST; +} + +tokens { + VARDEF; + FLOAT; + EXPR; + BLOCK; + VARIABLE; + FIELD; + CALL; + INDEX; + FIELDACCESS; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@init { +self.flag = False +} + +r1 + : INT ('+'^ INT)* + ; + +r2 + : 'assert'^ x=expression (':'! y=expression)? ';'! + ; + +r3 + : 'if'^ expression s1=statement ('else'! s2=statement)? + ; + +r4 + : 'while'^ expression statement + ; + +r5 + : 'return'^ expression? ';'! + ; + +r6 + : (INT|ID)+ + ; + +r7 + : INT -> + ; + +r8 + : 'var' ID ':' type -> ^('var' type ID) + ; + +r9 + : type ID ';' -> ^(VARDEF type ID) + ; + +r10 + : INT -> {CommonTree(CommonToken(type=FLOAT, text=$INT.text + ".0"))} + ; + +r11 + : expression -> ^(EXPR expression) + | -> EXPR + ; + +r12 + : ID (',' ID)* -> ID+ + ; + +r13 + : type ID (',' ID)* ';' -> ^(type ID+) + ; + +r14 + : expression? statement* type+ + -> ^(EXPR expression? statement* type+) + ; + +r15 + : INT -> INT INT + ; + +r16 + : 'int' ID (',' ID)* -> ^('int' ID)+ + ; + +r17 + : 'for' '(' start=statement ';' expression ';' next=statement ')' statement + -> ^('for' $start expression $next statement) + ; + +r18 + : t='for' -> ^(BLOCK) + ; + +r19 + : t='for' -> ^(BLOCK[$t]) + ; + +r20 + : t='for' -> ^(BLOCK[$t,"FOR"]) + ; + +r21 + : t='for' -> BLOCK + ; + +r22 + : t='for' -> BLOCK[$t] + ; + +r23 + : t='for' -> BLOCK[$t,"FOR"] + ; + +r24 + : r=statement expression -> ^($r expression) + ; + +r25 + : r+=statement (',' r+=statement)+ expression -> ^($r expression) + ; + +r26 + : r+=statement (',' r+=statement)+ -> ^(BLOCK $r+) + ; + +r27 + : r=statement expression -> ^($r ^($r expression)) + ; + +r28 + : ('foo28a'|'foo28b') -> + ; + +r29 + : (r+=statement)* -> ^(BLOCK $r+) + ; + +r30 + : statement* -> ^(BLOCK statement?) + ; + +r31 + : modifier type ID ('=' expression)? ';' + -> {self.flag == 0}? ^(VARDEF ID modifier* type expression?) + -> {self.flag == 1}? ^(VARIABLE ID modifier* type expression?) + -> ^(FIELD ID modifier* type expression?) + ; + +r32[which] + : ID INT -> {which==1}? ID + -> {which==2}? INT + -> // yield nothing as else-clause + ; + +r33 + : modifiers! statement + ; + +r34 + : modifiers! r34a[$modifiers.tree] + //| modifiers! r33b[$modifiers.tree] + ; + +r34a[mod] + : 'class' ID ('extends' sup=type)? + ( 'implements' i+=type (',' i+=type)*)? + '{' statement* '}' + -> ^('class' ID {$mod} ^('extends' $sup)? ^('implements' $i+)? statement* ) + ; + +r35 + : '{' 'extends' (sup=type)? '}' + -> ^('extends' $sup)? + ; + +r36 + : 'if' '(' expression ')' s1=statement + ( 'else' s2=statement -> ^('if' ^(EXPR expression) $s1 $s2) + | -> ^('if' ^(EXPR expression) $s1) + ) + ; + +r37 + : (INT -> INT) ('+' i=INT -> ^('+' $r37 $i) )* + ; + +r38 + : INT ('+'^ INT)* + ; + +r39 + : (primary->primary) // set return tree to just primary + ( '(' arg=expression ')' + -> ^(CALL $r39 $arg) + | '[' ie=expression ']' + -> ^(INDEX $r39 $ie) + | '.' p=primary + -> ^(FIELDACCESS $r39 $p) + )* + ; + +r40 + : (INT -> INT) ( ('+' i+=INT)* -> ^('+' $r40 $i*) ) ';' + ; + +r41 + : (INT -> INT) ( ('+' i=INT) -> ^($i $r41) )* ';' + ; + +r42 + : ids+=ID (','! ids+=ID)* + ; + +r43 returns [res] + : ids+=ID! (','! ids+=ID!)* {$res = [id.text for id in $ids]} + ; + +r44 + : ids+=ID^ (','! ids+=ID^)* + ; + +r45 + : primary^ + ; + +r46 returns [res] + : ids+=primary! (','! ids+=primary!)* {$res = [id.text for id in $ids]} + ; + +r47 + : ids+=primary (','! ids+=primary)* + ; + +r48 + : ids+=. (','! ids+=.)* + ; + +r49 + : .^ ID + ; + +r50 + : ID + -> ^({CommonTree(CommonToken(type=FLOAT, text="1.0"))} ID) + ; + +/** templates tested: + tokenLabelPropertyRef_tree +*/ +r51 returns [res] + : ID t=ID ID + { $res = $t.tree } + ; + +/** templates tested: + rulePropertyRef_tree +*/ +r52 returns [res] +@after { + $res = $tree +} + : ID + ; + +/** templates tested: + ruleLabelPropertyRef_tree +*/ +r53 returns [res] + : t=primary + { $res = $t.tree } + ; + +/** templates tested: + ruleSetPropertyRef_tree +*/ +r54 returns [res] +@after { + $tree = $t.tree; +} + : ID t=expression ID + ; + +/** backtracking */ +r55 +options { backtrack=true; k=1; } + : (modifier+ INT)=> modifier+ expression + | modifier+ statement + ; + + +/** templates tested: + rewriteTokenRef with len(args)>0 +*/ +r56 + : t=ID* -> ID[$t,'foo'] + ; + +/** templates tested: + rewriteTokenRefRoot with len(args)>0 +*/ +r57 + : t=ID* -> ^(ID[$t,'foo']) + ; + +/** templates tested: + ??? +*/ +r58 + : ({CommonTree(CommonToken(type=FLOAT, text="2.0"))})^ + ; + +/** templates tested: + rewriteTokenListLabelRefRoot +*/ +r59 + : (t+=ID)+ statement -> ^($t statement)+ + ; + +primary + : ID + ; + +expression + : r1 + ; + +statement + : 'fooze' + | 'fooze2' + ; + +modifiers + : modifier+ + ; + +modifier + : 'public' + | 'private' + ; + +type + : 'int' + | 'bool' + ; + +ID : 'a'..'z' + ; +INT : '0'..'9' +; +WS: (' ' | '\n' | '\t')+ {$channel = HIDDEN;}; + diff --git a/runtime/Cpp/tests/t043synpred.g b/runtime/Cpp/tests/t043synpred.g new file mode 100644 index 000000000..b54cef850 --- /dev/null +++ b/runtime/Cpp/tests/t043synpred.g @@ -0,0 +1,27 @@ +grammar t043synpred; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +a: ((s+ P)=> s+ b)? E; +b: P 'foo'; + +s: S; + + +S: ' '; +P: '+'; +E: '>'; diff --git a/runtime/Cpp/tests/t044trace.g b/runtime/Cpp/tests/t044trace.g new file mode 100644 index 000000000..002aa2fe9 --- /dev/null +++ b/runtime/Cpp/tests/t044trace.g @@ -0,0 +1,33 @@ +grammar t044trace; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@init { + self._stack = None +} + +a: '<' ((INT '+')=>b|c) '>'; +b: c ('+' c)*; +c: INT + { + if self._stack is None: + self._stack = self.getRuleInvocationStack() + } + ; + +INT: ('0'..'9')+; +WS: (' ' | '\n' | '\t')+ {$channel = HIDDEN;}; diff --git a/runtime/Cpp/tests/t045dfabug.g b/runtime/Cpp/tests/t045dfabug.g new file mode 100644 index 000000000..101c58275 --- /dev/null +++ b/runtime/Cpp/tests/t045dfabug.g @@ -0,0 +1,44 @@ +grammar t045dfabug; +options { + language =Cpp; + output = AST; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +// this rule used to generate an infinite loop in DFA.predict +r +options { backtrack=true; } + : (modifier+ INT)=> modifier+ expression + | modifier+ statement + ; + +expression + : INT '+' INT + ; + +statement + : 'fooze' + | 'fooze2' + ; + +modifier + : 'public' + | 'private' + ; + +ID : 'a'..'z' + ; +INT : '0'..'9' +; +WS: (' ' | '\n' | '\t')+ {$channel = HIDDEN;}; + diff --git a/runtime/Cpp/tests/t046rewrite.g b/runtime/Cpp/tests/t046rewrite.g new file mode 100644 index 000000000..60d8a411e --- /dev/null +++ b/runtime/Cpp/tests/t046rewrite.g @@ -0,0 +1,67 @@ +grammar t046rewrite; +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +program +@init { + start = self.input.LT(1) +} + : method+ + { + self.input.insertBefore(start,"public class Wrapper {\n") + self.input.insertAfter($method.stop, "\n}\n") + } + ; + +method + : m='method' ID '(' ')' body + {self.input.replace($m, "public void");} + ; + +body +scope { + decls +} +@init { + $body::decls = set() +} + : lcurly='{' stat* '}' + { + for it in $body::decls: + self.input.insertAfter($lcurly, "\nint "+it+";") + } + ; + +stat: ID '=' expr ';' {$body::decls.add($ID.text);} + ; + +expr: mul ('+' mul)* + ; + +mul : atom ('*' atom)* + ; + +atom: ID + | INT + ; + +ID : ('a'..'z'|'A'..'Z')+ ; + +INT : ('0'..'9')+ ; + +WS : (' '|'\t'|'\n')+ {$channel=HIDDEN;} + ; diff --git a/runtime/Cpp/tests/t047treeparser.g b/runtime/Cpp/tests/t047treeparser.g new file mode 100644 index 000000000..375d84f19 --- /dev/null +++ b/runtime/Cpp/tests/t047treeparser.g @@ -0,0 +1,126 @@ +grammar t047treeparser; +options { + language=Cpp; + output=AST; +} + +tokens { + VAR_DEF; + ARG_DEF; + FUNC_HDR; + FUNC_DECL; + FUNC_DEF; + BLOCK; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +program + : declaration+ + ; + +declaration + : variable + | functionHeader ';' -> ^(FUNC_DECL functionHeader) + | functionHeader block -> ^(FUNC_DEF functionHeader block) + ; + +variable + : type declarator ';' -> ^(VAR_DEF type declarator) + ; + +declarator + : ID + ; + +functionHeader + : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')' + -> ^(FUNC_HDR type ID formalParameter+) + ; + +formalParameter + : type declarator -> ^(ARG_DEF type declarator) + ; + +type + : 'int' + | 'char' + | 'void' + | ID + ; + +block + : lc='{' + variable* + stat* + '}' + -> ^(BLOCK[$lc,"BLOCK"] variable* stat*) + ; + +stat: forStat + | expr ';'! + | block + | assignStat ';'! + | ';'! + ; + +forStat + : 'for' '(' start=assignStat ';' expr ';' next=assignStat ')' block + -> ^('for' $start expr $next block) + ; + +assignStat + : ID EQ expr -> ^(EQ ID expr) + ; + +expr: condExpr + ; + +condExpr + : aexpr ( ('=='^ | '<'^) aexpr )? + ; + +aexpr + : atom ( '+'^ atom )* + ; + +atom + : ID + | INT + | '(' expr ')' -> expr + ; + +FOR : 'for' ; +INT_TYPE : 'int' ; +CHAR: 'char'; +VOID: 'void'; + +ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* + ; + +INT : ('0'..'9')+ + ; + +EQ : '=' ; +EQEQ : '==' ; +LT : '<' ; +PLUS : '+' ; + +WS : ( ' ' + | '\t' + | '\r' + | '\n' + )+ + { $channel=HIDDEN } + ; diff --git a/runtime/Cpp/tests/t047treeparserWalker.g b/runtime/Cpp/tests/t047treeparserWalker.g new file mode 100644 index 000000000..1d4d622cb --- /dev/null +++ b/runtime/Cpp/tests/t047treeparserWalker.g @@ -0,0 +1,73 @@ +tree grammar t047treeparserWalker; +options { + language=Cpp; + tokenVocab=t047treeparser; + ASTLabelType=CommonTree; +} + +@includes { +#include "UserTestTraits.hpp" +} +@namespace +{ Antlr3Test } + +program + : declaration+ + ; + +declaration + : variable + | ^(FUNC_DECL functionHeader) + | ^(FUNC_DEF functionHeader block) + ; + +variable returns [res] + : ^(VAR_DEF type declarator) + { + $res = $declarator.text; + } + ; + +declarator + : ID + ; + +functionHeader + : ^(FUNC_HDR type ID formalParameter+) + ; + +formalParameter + : ^(ARG_DEF type declarator) + ; + +type + : 'int' + | 'char' + | 'void' + | ID + ; + +block + : ^(BLOCK variable* stat*) + ; + +stat: forStat + | expr + | block + ; + +forStat + : ^('for' expr expr expr block) + ; + +expr: ^(EQEQ expr expr) + | ^(LT expr expr) + | ^(PLUS expr expr) + | ^(EQ ID expr) + | atom + ; + +atom + : ID + | INT + ; diff --git a/runtime/Cpp/tests/t048rewrite.g b/runtime/Cpp/tests/t048rewrite.g new file mode 100644 index 000000000..2cd40fe5b --- /dev/null +++ b/runtime/Cpp/tests/t048rewrite.g @@ -0,0 +1,16 @@ +lexer grammar t048rewrite; +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +A: 'a'; +B: 'b'; +C: 'c'; + diff --git a/runtime/Cpp/tests/t048rewrite2.g b/runtime/Cpp/tests/t048rewrite2.g new file mode 100644 index 000000000..c7bb2ddc4 --- /dev/null +++ b/runtime/Cpp/tests/t048rewrite2.g @@ -0,0 +1,19 @@ +lexer grammar t048rewrite2; +options { + language=Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +ID : 'a'..'z'+; +INT : '0'..'9'+; +SEMI : ';'; +PLUS : '+'; +MUL : '*'; +ASSIGN : '='; +WS : ' '+; diff --git a/runtime/Cpp/tests/t050decorate.g b/runtime/Cpp/tests/t050decorate.g new file mode 100644 index 000000000..2f6ea71cd --- /dev/null +++ b/runtime/Cpp/tests/t050decorate.g @@ -0,0 +1,42 @@ +grammar t050decorate; +options { + language =Cpp; +} + +@lexer::includes +{ +#include "UserTestTraits.hpp" +} +@lexer::namespace +{ Antlr3Test } + +@parser::includes { +#include "UserTestTraits.hpp" +} +@parser::namespace +{ Antlr3Test } + +@header { + def logme(func): + def decorated(self, *args, **kwargs): + self.events.append('before') + try: + return func(self, *args, **kwargs) + finally: + self.events.append('after') + + return decorated +} + +@parser::init { +self.events = [] +} + +document +@decorate { + @logme +} + : IDENTIFIER + ; + +IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; diff --git a/tool/src/main/java/org/antlr/codegen/CppTarget.java b/tool/src/main/java/org/antlr/codegen/CppTarget.java index 3759e01f5..eef580f46 100755 --- a/tool/src/main/java/org/antlr/codegen/CppTarget.java +++ b/tool/src/main/java/org/antlr/codegen/CppTarget.java @@ -38,6 +38,10 @@ import java.util.Map; public class CppTarget extends Target { + @Override + public boolean useBaseTemplatesForSynPredFragments() { + return false; + } ArrayList strings = new ArrayList(); diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg b/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg index 2494f4c75..b3eb31f01 100755 --- a/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg @@ -185,17 +185,17 @@ outputFile( LEXER, >> headerFileExtension() ::= ".hpp" -beginNamespace(actions) ::= << +beginNamespace(actions) ::= <% namespace { ->> +%> -endNamespace(actions) ::= << +endNamespace(actions) ::= <% } ->> +%> headerFile( LEXER, @@ -224,21 +224,20 @@ headerFile( LEXER, generatedTimestamp, trace, scopes, - superClass, + superClass, literals ) ::= << - * The parser + * The parser has the callable functions (rules) shown below, - * The lexer + * The lexer has the callable functions (rules) shown below, - * The tree parser + * The tree parser has the callable functions (rules) shown below, -has the callable functions (rules) shown below, * which will invoke the code for the associated rule in the source grammar * assuming that the input stream is pointing to a token/text stream that could begin * this rule. @@ -282,7 +281,7 @@ has the callable functions (rules) shown below, /* ============================================================================= * Standard antlr3 C++ runtime definitions */ -#include \ +#include \ /* End of standard antlr 3 runtime definitions * ============================================================================= @@ -340,13 +339,11 @@ has the callable functions (rules) shown below, typedef Traits ImplTraits; struct {\}; }; separator="\n"> -}> class Tokens { public: /** Symbolic definitions of all the tokens that the will work with. - * \{ * * Antlr will define EOF, but we can't use that as it it is too common in * in C header files and that would be confusing. There is no way to filter this out at the moment @@ -379,7 +376,7 @@ public: }> - }> + }> private: @@ -405,16 +402,20 @@ public: void init(InputType* instream * }> ); + + void memoize(ANTLR_MARKER ruleIndex, ANTLR_MARKER ruleParseStart); bool alreadyParsedRule(ANTLR_MARKER ruleIndex); + m( );}; separator="\n"> msynpred( antlr3::ClassForwarder\< > ); void m_fragment ();}; separator="\n"> + }> (); }; separator="\n"> msynpred( antlr3::ClassForwarder\< > ); void m_fragment ();}; separator="\n"> @@ -428,8 +429,7 @@ public: const char * getGrammarFileName(); void reset(); ~(); - - + }; // Function protoypes for the constructor functions that external translation units @@ -443,7 +443,6 @@ extern ANTLR_UINT8* T /* End of token definitions for * ============================================================================= */ -/** \} */ @@ -453,7 +452,7 @@ extern ANTLR_UINT8* T >> -grammarType() ::= << +grammarType() ::= <% parser @@ -463,7 +462,7 @@ lexer tree parser ->> +%> componentType() ::= << @@ -477,7 +476,7 @@ componentType() ::= << >> -componentBaseType() ::= << +componentBaseType() ::= <% ImplTraits::BaseParserType @@ -487,7 +486,7 @@ componentBaseType() ::= << ImplTraits::BaseTreeParserType ->> +%> streamType() ::= << @@ -502,7 +501,7 @@ streamType() ::= << >> -mainName() ::= << +mainName() ::= <% @@ -512,11 +511,11 @@ mainName() ::= << ->> +%> -headerReturnScope(ruleDescriptor) ::= "" +headerReturnScope(ruleDescriptor) ::= "" -headerReturnType(ruleDescriptor) ::= << +headerReturnType(ruleDescriptor) ::= <% void @@ -526,7 +525,7 @@ headerReturnType(ruleDescriptor) ::= << ->> +%> // Produce the lexer output // @@ -803,28 +802,17 @@ actionGate() ::= "this->get_backtracking()==0" filteringActionGate() ::= "this->get_backtracking()==1" /** How to generate a parser */ -genericParser( grammar, - name, - scopes, - tokens, - tokenNames, - rules, - numRules, - bitsets, - inputStreamType, - superClass, - labelType, - members, - rewriteElementType, filterMode, - ASTLabelType="ImplTraits::TreeType*" - ) ::= << +genericParser( grammar, name, scopes, tokens, tokenNames, rules, numRules, + bitsets, inputStreamType, superClass, + labelType, members, rewriteElementType, + filterMode, ASTLabelType="ImplTraits::TreeType*") ::= << using namespace antlr3; /** \brief Table of all token names in symbolic order, mainly used for * error reporting. */ -ANTLR_UINT8* TokenNames[+4] +ANTLR_UINT8* TokenNames[+4] = { (ANTLR_UINT8*) "\", /* String to print to indicate an invalid token */ (ANTLR_UINT8*) "\", @@ -877,7 +865,6 @@ const char* ::getGrammarFileName() void ::init(StreamType* instream* }>) { - /* Create a LIST for recording rule memos. @@ -1017,11 +1004,11 @@ parser( grammar, bitsets, ASTLabelType, superClass="Parser", - labelType="const CommonTokenType*", + labelType="ImplTraits::CommonTokenType*", members={} ) ::= << - + >> @@ -1039,12 +1026,12 @@ treeParser( grammar, bitsets, filterMode, labelType={}, - ASTLabelType="TreeType*", + ASTLabelType="ImplTraits::TreeType*", superClass="TreeParser", members={} ) ::= << - + >> @@ -1070,7 +1057,9 @@ void ::m_fragment( - + +goto ruleEx; /* Prevent compiler warnings */ +ruleEx: ; } // $ANTLR end >> @@ -1237,10 +1226,12 @@ ruleDeclarations() ::= << ImplTraits::RuleReturnValueType _antlr_rule_exit(this); + ; }> + ANTLR_MARKER _StartIndex; @@ -1249,14 +1240,12 @@ ANTLR_MARKER _StartIndex; ruleInitializations() ::= << /* Initialize rule variables */ + -retval. = ; -}> +retval. = ; }> - = ; -}> + = ; }> + _StartIndex = this->index();<\n> @@ -1266,27 +1255,20 @@ ruleInitializations() ::= << >> ruleLabelDefs() ::= << -<[ruleDescriptor.tokenLabels,ruleDescriptor.tokenListLabels] - :{it | ;}; separator="\n" +<[ruleDescriptor.tokenLabels,ruleDescriptor.tokenListLabels, + ruleDescriptor.wildcardTreeLabels,ruleDescriptor.wildcardTreeListLabels] + :{it | = NULL;}; separator="\n" > -<[ruleDescriptor.tokenListLabels,ruleDescriptor.ruleListLabels] - :{it | VectorType\ list_;}; separator="\n" -> -<[ruleDescriptor.ruleLabels,ruleDescriptor.ruleListLabels] - :ruleLabelDef(); separator="\n" +<[ruleDescriptor.tokenListLabels,ruleDescriptor.ruleListLabels,ruleDescriptor.wildcardTreeListLabels] + :{it |ImplTraits::TokenPtrsListType list_;}; separator="\n" > + >> ruleLabelInitializations() ::= << -<[ruleDescriptor.tokenLabels,ruleDescriptor.tokenListLabels] - :{it | = NULL;}; separator="\n" -> -<[ruleDescriptor.ruleLabels,ruleDescriptor.ruleListLabels] - :ruleLabelInitVal(); separator="\n" -> -retval.call_start_placeholder(); <\n> +retval.call_start_placeholder(); >> @@ -1295,7 +1277,7 @@ lexerRuleLabelDefs() ::= << <[ruleDescriptor.tokenLabels, ruleDescriptor.tokenListLabels, ruleDescriptor.ruleLabels] - :{it | ;}; separator="\n" + :{it | = NULL;}; separator="\n" > ;}; separator="\n"> <[ruleDescriptor.tokenListLabels, @@ -1306,11 +1288,6 @@ lexerRuleLabelDefs() ::= << >> lexerRuleLabelInit() ::= << -<[ruleDescriptor.tokenLabels, - ruleDescriptor.tokenListLabels, - ruleDescriptor.ruleLabels] - :{it | = NULL;}; separator="\n" -> <[ruleDescriptor.tokenListLabels, ruleDescriptor.ruleListLabels, ruleDescriptor.ruleListLabels] @@ -1696,8 +1673,8 @@ matchSetAndListLabel(s,label,elementIndex,postmatchCode) ::= << /** Match a string literal */ lexerStringRef(string,label,elementIndex) ::= << -