summaryrefslogtreecommitdiffstats
path: root/lib/antlr/antlr/Parser.h
blob: 4c53c43346cfbaae4b131f838af87822c7a660f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#ifndef INC_Parser_h__
#define INC_Parser_h__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.h>

#include <iostream>
#include <exception>

#include <antlr/BitSet.h>
#include <antlr/TokenBuffer.h>
#include <antlr/RecognitionException.h>
#include <antlr/MismatchedTokenException.h>
#include <antlr/ASTFactory.h>
#include <antlr/ParserSharedInputState.h>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

extern bool DEBUG_PARSER;

/** A generic ANTLR parser (LL(k) for k>=1) containing a bunch of
 * utility routines useful at any lookahead depth.  We distinguish between
 * the LL(1) and LL(k) parsers because of efficiency.  This may not be
 * necessary in the near future.
 *
 * Each parser object contains the state of the parse including a lookahead
 * cache (the form of which is determined by the subclass), whether or
 * not the parser is in guess mode, where tokens come from, etc...
 *
 * <p>
 * During <b>guess</b> mode, the current lookahead token(s) and token type(s)
 * cache must be saved because the token stream may not have been informed
 * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block.
 * Guessing is started by:
 * <ol>
 * <li>saving the lookahead cache.
 * <li>marking the current position in the TokenBuffer.
 * <li>increasing the guessing level.
 * </ol>
 *
 * After guessing, the parser state is restored by:
 * <ol>
 * <li>restoring the lookahead cache.
 * <li>rewinding the TokenBuffer.
 * <li>decreasing the guessing level.
 * </ol>
 *
 * @see antlr.Token
 * @see antlr.TokenBuffer
 * @see antlr.TokenStream
 * @see antlr.LL1Parser
 * @see antlr.LLkParser
 *
 * @todo add constructors with ASTFactory.
 */
class ANTLR_API Parser {
protected:
	Parser(TokenBuffer& input)
	: inputState(new ParserInputState(input)), astFactory(0), traceDepth(0)
	{
	}
	Parser(TokenBuffer* input)
	: inputState(new ParserInputState(input)), astFactory(0), traceDepth(0)
	{
	}
	Parser(const ParserSharedInputState& state)
	: inputState(state), astFactory(0), traceDepth(0)
	{
	}
public:
	virtual ~Parser()
	{
	}

	/** Return the token type of the ith token of lookahead where i=1
	 * is the current token being examined by the parser (i.e., it
	 * has not been matched yet).
	 */
	virtual int LA(unsigned int i)=0;

	/// Return the i-th token of lookahead
	virtual RefToken LT(unsigned int i)=0;

	/** DEPRECATED! Specify the factory to be used during tree building. (Compulsory)
	 * Setting the factory is nowadays compulsory.
	 * @see setASTFactory
	 */
	virtual void setASTNodeFactory( ASTFactory *factory )
	{
		astFactory = factory;
	}
	/** Specify the factory to be used during tree building. (Compulsory)
	 * Setting the factory is nowadays compulsory.
	 */
	virtual void setASTFactory( ASTFactory *factory )
	{
		astFactory = factory;
	}
	/** Return a pointer to the ASTFactory used.
	 * So you might use it in subsequent treewalkers or to reload AST's
	 * from disk.
	 */
	virtual ASTFactory* getASTFactory()
	{
		return astFactory;
	}
	/** Get the root AST node of the generated AST. When using a custom AST type
	 * or heterogenous AST's, you'll have to convert it to the right type
	 * yourself.
	 */
	virtual RefAST getAST() = 0;

	/// Return the filename of the input file.
	virtual inline ANTLR_USE_NAMESPACE(std)string getFilename() const
	{
		return inputState->filename;
	}
	/// Set the filename of the input file (used for error reporting).
	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
	{
		inputState->filename = f;
	}

	virtual void setInputState(ParserSharedInputState state)
	{
		inputState = state;
	}
	virtual inline ParserSharedInputState getInputState() const
	{
		return inputState;
	}

	/// Get another token object from the token stream
	virtual void consume()=0;
	/// Consume tokens until one matches the given token
	virtual void consumeUntil(int tokenType)
	{
		while (LA(1) != Token::EOF_TYPE && LA(1) != tokenType)
			consume();
	}

	/// Consume tokens until one matches the given token set
	virtual void consumeUntil(const BitSet& set)
	{
		while (LA(1) != Token::EOF_TYPE && !set.member(LA(1)))
			consume();
	}

	/** Make sure current lookahead symbol matches token type <tt>t</tt>.
	 * Throw an exception upon mismatch, which is catch by either the
	 * error handler or by the syntactic predicate.
	 */
	virtual void match(int t)
	{
		if ( DEBUG_PARSER )
		{
			traceIndent();
			ANTLR_USE_NAMESPACE(std)cout << "enter match(" << t << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
		}
		if ( LA(1) != t )
		{
			if ( DEBUG_PARSER )
			{
				traceIndent();
				ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << "!=" << t << ANTLR_USE_NAMESPACE(std)endl;
			}
			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), t, false, getFilename());
		}
		else
		{
			// mark token as consumed -- fetch next token deferred until LA/LT
			consume();
		}
	}

	virtual void matchNot(int t)
	{
		if ( LA(1)==t )
		{
			// Throws inverted-sense exception
			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), t, true, getFilename());
		}
		else
		{
			// mark token as consumed -- fetch next token deferred until LA/LT
			consume();
		}
	}

	/** Make sure current lookahead symbol matches the given set
	 * Throw an exception upon mismatch, which is catch by either the
	 * error handler or by the syntactic predicate.
	 */
	virtual void match(const BitSet& b)
	{
		if ( DEBUG_PARSER )
		{
			traceIndent();
			ANTLR_USE_NAMESPACE(std)cout << "enter match(" << "bitset" /*b.toString()*/
				<< ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
		}
		if ( !b.member(LA(1)) )
		{
			if ( DEBUG_PARSER )
			{
				traceIndent();
				ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << " not member of "
					<< "bitset" /*b.toString()*/ << ANTLR_USE_NAMESPACE(std)endl;
			}
			throw MismatchedTokenException(getTokenNames(), getNumTokens(), LT(1), b, false, getFilename());
		}
		else
		{
			// mark token as consumed -- fetch next token deferred until LA/LT
			consume();
		}
	}

	/** Mark a spot in the input and return the position.
	 * Forwarded to TokenBuffer.
	 */
	virtual inline unsigned int mark()
	{
		return inputState->getInput().mark();
	}
	/// rewind to a previously marked position
	virtual inline void rewind(unsigned int pos)
	{
		inputState->getInput().rewind(pos);
	}
	/** called by the generated parser to do error recovery, override to
	 * customize the behaviour.
	 */
	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
	{
		consume();
		consumeUntil(tokenSet);
	}

	/// Parser error-reporting function can be overridden in subclass
	virtual void reportError(const RecognitionException& ex);
	/// Parser error-reporting function can be overridden in subclass
	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
	/// Parser warning-reporting function can be overridden in subclass
	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

	/// get the token name for the token number 'num'
	virtual const char* getTokenName(int num) const = 0;
	/// get a vector with all token names
	virtual const char* const* getTokenNames() const = 0;
	/** Get the number of tokens defined.
	 * This one should be overridden in subclasses.
	 */
	virtual int getNumTokens(void) const = 0;

	/** Set or change the input token buffer */
//	void setTokenBuffer(TokenBuffer<Token>* t);

	virtual void traceIndent();
	virtual void traceIn(const char* rname);
	virtual void traceOut(const char* rname);
protected:
//	void setTokenNames(const char** tokenNames_);

	ParserSharedInputState inputState;

//	/// AST return value for a rule is squirreled away here
//	RefAST returnAST;

	/// AST support code; parser and treeparser delegate to this object
	ASTFactory *astFactory;

	// used to keep track of the indentation for the trace
	int traceDepth;

	/** Utility class which allows tracing to work even when exceptions are
	 * thrown.
	 */
	class Tracer { /*{{{*/
	private:
		Parser* parser;
		const char* text;
	public:
		Tracer(Parser* p,const char * t)
		: parser(p), text(t)
		{
			parser->traceIn(text);
		}
		~Tracer()
		{
#ifdef ANTLR_CXX_SUPPORTS_UNCAUGHT_EXCEPTION
			// Only give trace if there's no uncaught exception..
			if(!ANTLR_USE_NAMESPACE(std)uncaught_exception())
#endif
				parser->traceOut(text);
		}
	private:
		Tracer(const Tracer&);							// undefined
		const Tracer& operator=(const Tracer&);	// undefined
		/*}}}*/
	};
private:
	Parser(const Parser&);								// undefined
	const Parser& operator=(const Parser&);		// undefined
};

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_Parser_h__