1#ifndef INC_Parser_hpp__
2#define INC_Parser_hpp__
3
4/* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/Parser.hpp#2 $
9 */
10
11#include <antlr/config.hpp>
12
13#include <stdio.h>
14#include <exception>
15
16#include <antlr/BitSet.hpp>
17#include <antlr/TokenBuffer.hpp>
18#include <antlr/RecognitionException.hpp>
19#include <antlr/MismatchedTokenException.hpp>
20#include <antlr/ASTFactory.hpp>
21#include <antlr/ParserSharedInputState.hpp>
22
23#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
24namespace antlr {
25#endif
26
27extern bool DEBUG_PARSER;
28
29/** A generic ANTLR parser (LL(k) for k>=1) containing a bunch of
30 * utility routines useful at any lookahead depth.  We distinguish between
31 * the LL(1) and LL(k) parsers because of efficiency.  This may not be
32 * necessary in the near future.
33 *
34 * Each parser object contains the state of the parse including a lookahead
35 * cache (the form of which is determined by the subclass), whether or
36 * not the parser is in guess mode, where tokens come from, etc...
37 *
38 * <p>
39 * During <b>guess</b> mode, the current lookahead token(s) and token type(s)
40 * cache must be saved because the token stream may not have been informed
41 * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block.
42 * Guessing is started by:
43 * <ol>
44 * <li>saving the lookahead cache.
45 * <li>marking the current position in the TokenBuffer.
46 * <li>increasing the guessing level.
47 * </ol>
48 *
49 * After guessing, the parser state is restored by:
50 * <ol>
51 * <li>restoring the lookahead cache.
52 * <li>rewinding the TokenBuffer.
53 * <li>decreasing the guessing level.
54 * </ol>
55 *
56 * @see antlr.Token
57 * @see antlr.TokenBuffer
58 * @see antlr.TokenStream
59 * @see antlr.LL1Parser
60 * @see antlr.LLkParser
61 *
62 * @todo add constructors with ASTFactory.
63 */
64class ANTLR_API Parser {
65protected:
66	Parser(TokenBuffer& input)
67	: inputState(new ParserInputState(input)), astFactory(0), traceDepth(0)
68	{
69	}
70	Parser(TokenBuffer* input)
71	: inputState(new ParserInputState(input)), astFactory(0), traceDepth(0)
72	{
73	}
74	Parser(const ParserSharedInputState& state)
75	: inputState(state), astFactory(0), traceDepth(0)
76	{
77	}
78public:
79	virtual ~Parser()
80	{
81	}
82
83	/** Return the token type of the ith token of lookahead where i=1
84	 * is the current token being examined by the parser (i.e., it
85	 * has not been matched yet).
86	 */
87	virtual int LA(unsigned int i)=0;
88
89	/// Return the i-th token of lookahead
90	virtual RefToken LT(unsigned int i)=0;
91
92	/** DEPRECATED! Specify the factory to be used during tree building. (Compulsory)
93	 * Setting the factory is nowadays compulsory.
94	 * @see setASTFactory
95	 */
96	virtual void setASTNodeFactory( ASTFactory *factory )
97	{
98		astFactory = factory;
99	}
100	/** Specify the factory to be used during tree building. (Compulsory)
101	 * Setting the factory is nowadays compulsory.
102	 */
103	virtual void setASTFactory( ASTFactory *factory )
104	{
105		astFactory = factory;
106	}
107	/** Return a pointer to the ASTFactory used.
108	 * So you might use it in subsequent treewalkers or to reload AST's
109	 * from disk.
110	 */
111	virtual ASTFactory* getASTFactory()
112	{
113		return astFactory;
114	}
115	/** Get the root AST node of the generated AST. When using a custom AST type
116	 * or heterogenous AST's, you'll have to convert it to the right type
117	 * yourself.
118	 */
119	virtual RefAST getAST() = 0;
120
121	/// Return the filename of the input file.
122	virtual inline ANTLR_USE_NAMESPACE(std)string getFilename() const
123	{
124		return inputState->filename;
125	}
126	/// Set the filename of the input file (used for error reporting).
127	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
128	{
129		inputState->filename = f;
130	}
131
132	virtual void setInputState(ParserSharedInputState state)
133	{
134		inputState = state;
135	}
136	virtual inline ParserSharedInputState getInputState() const
137	{
138		return inputState;
139	}
140
141	/// Get another token object from the token stream
142	virtual void consume()=0;
143	/// Consume tokens until one matches the given token
144	virtual void consumeUntil(int tokenType)
145	{
146		while (LA(1) != Token::EOF_TYPE && LA(1) != tokenType)
147			consume();
148	}
149
150	/// Consume tokens until one matches the given token set
151	virtual void consumeUntil(const BitSet& set)
152	{
153		while (LA(1) != Token::EOF_TYPE && !set.member(LA(1)))
154			consume();
155	}
156
157	/** Make sure current lookahead symbol matches token type <tt>t</tt>.
158	 * Throw an exception upon mismatch, which is catch by either the
159	 * error handler or by the syntactic predicate.
160	 */
161	virtual void match(int t)
162	{
163		if ( DEBUG_PARSER )
164		{
165			traceIndent();
166                   //     printf("< lexer %s; c==%d\n", rname, LA(1));
167			printf("enter match(%d) with LA(1)=%d\n", t, LA(1));
168		}
169		if ( LA(1) != t )
170		{
171			if ( DEBUG_PARSER )
172			{
173				traceIndent();
174				printf("token mismatch: %d!=%d\n", LA(1), t);
175			}
176			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), t, false, getFilename());
177		}
178		else
179		{
180			// mark token as consumed -- fetch next token deferred until LA/LT
181			consume();
182		}
183	}
184
185	virtual void matchNot(int t)
186	{
187		if ( LA(1)==t )
188		{
189			// Throws inverted-sense exception
190			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), t, true, getFilename());
191		}
192		else
193		{
194			// mark token as consumed -- fetch next token deferred until LA/LT
195			consume();
196		}
197	}
198
199	/** Make sure current lookahead symbol matches the given set
200	 * Throw an exception upon mismatch, which is catch by either the
201	 * error handler or by the syntactic predicate.
202	 */
203	virtual void match(const BitSet& b)
204	{
205		if ( DEBUG_PARSER )
206		{
207			traceIndent();
208			printf("enter match(bitset) with LA(1)=%d\n", LA(1));
209		}
210		if ( !b.member(LA(1)) )
211		{
212			if ( DEBUG_PARSER )
213			{
214				traceIndent();
215				printf("token mismatch: %d not member of bitset\n", LA(1));
216			}
217			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), b, false, getFilename());
218		}
219		else
220		{
221			// mark token as consumed -- fetch next token deferred until LA/LT
222			consume();
223		}
224	}
225
226	/** Mark a spot in the input and return the position.
227	 * Forwarded to TokenBuffer.
228	 */
229	virtual inline unsigned int mark()
230	{
231		return inputState->getInput().mark();
232	}
233	/// rewind to a previously marked position
234	virtual inline void rewind(unsigned int pos)
235	{
236		inputState->getInput().rewind(pos);
237	}
238	/** called by the generated parser to do error recovery, override to
239	 * customize the behaviour.
240	 */
241	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
242	{
243		consume();
244		consumeUntil(tokenSet);
245	}
246
247	/// Parser error-reporting function can be overridden in subclass
248	virtual void reportError(const RecognitionException& ex);
249	/// Parser error-reporting function can be overridden in subclass
250	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
251	/// Parser warning-reporting function can be overridden in subclass
252	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
253
254	/// get the token name for the token number 'num'
255	virtual const char* getTokenName(int num) const = 0;
256	/// get a vector with all token names
257	virtual const char* const* getTokenNames() const = 0;
258	/** Get the number of tokens defined.
259	 * This one should be overridden in subclasses.
260	 */
261	virtual int getNumTokens(void) const = 0;
262
263	/** Set or change the input token buffer */
264//	void setTokenBuffer(TokenBuffer<Token>* t);
265
266	virtual void traceIndent();
267	virtual void traceIn(const char* rname);
268	virtual void traceOut(const char* rname);
269protected:
270//	void setTokenNames(const char** tokenNames_);
271
272	ParserSharedInputState inputState;
273
274//	/// AST return value for a rule is squirreled away here
275//	RefAST returnAST;
276
277	/// AST support code; parser and treeparser delegate to this object
278	ASTFactory *astFactory;
279
280	// used to keep track of the indentation for the trace
281	int traceDepth;
282
283	/** Utility class which allows tracing to work even when exceptions are
284	 * thrown.
285	 */
286	class Tracer { /*{{{*/
287	private:
288		Parser* parser;
289		const char* text;
290	public:
291		Tracer(Parser* p,const char * t)
292		: parser(p), text(t)
293		{
294			parser->traceIn(text);
295		}
296		~Tracer()
297		{
298#ifdef ANTLR_CXX_SUPPORTS_UNCAUGHT_EXCEPTION
299			// Only give trace if there's no uncaught exception..
300			if(!ANTLR_USE_NAMESPACE(std)uncaught_exception())
301#endif
302				parser->traceOut(text);
303		}
304	private:
305		Tracer(const Tracer&);							// undefined
306		const Tracer& operator=(const Tracer&);	// undefined
307		/*}}}*/
308	};
309private:
310	Parser(const Parser&);								// undefined
311	const Parser& operator=(const Parser&);		// undefined
312};
313
314#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
315}
316#endif
317
318#endif //INC_Parser_hpp__
319