1#ifndef INC_TokenStreamRewriteEngine_hpp__
2#define INC_TokenStreamRewriteEngine_hpp__
3
4/* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 */
8
9#include <string>
10#include <list>
11#include <vector>
12#include <map>
13#include <utility>
14#include <ostream>
15#include <iterator>
16#include <cassert>
17#include <algorithm>
18
19#include <antlr/config.hpp>
20
21#include <antlr/TokenStream.hpp>
22#include <antlr/TokenWithIndex.hpp>
23#include <antlr/BitSet.hpp>
24
25#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
26namespace antlr {
27#endif
28
29/** This token stream tracks the *entire* token stream coming from
30 *	 a lexer, but does not pass on the whitespace (or whatever else
31 *	 you want to discard) to the parser.
32 *
33 *	 This class can then be asked for the ith token in the input stream.
34 *	 Useful for dumping out the input stream exactly after doing some
35 *	 augmentation or other manipulations.	Tokens are index from 0..n-1
36 *
37 *	 You can insert stuff, replace, and delete chunks.	 Note that the
38 *	 operations are done lazily--only if you convert the buffer to a
39 *	 String.	 This is very efficient because you are not moving data around
40 *	 all the time.	 As the buffer of tokens is converted to strings, the
41 *	 toString() method(s) check to see if there is an operation at the
42 *	 current index.  If so, the operation is done and then normal String
43 *	 rendering continues on the buffer.	 This is like having multiple Turing
44 *	 machine instruction streams (programs) operating on a single input tape. :)
45 *
46 *	 Since the operations are done lazily at toString-time, operations do not
47 *	 screw up the token index values.  That is, an insert operation at token
48 *	 index i does not change the index values for tokens i+1..n-1.
49 *
50 *	 Because operations never actually alter the buffer, you may always get
51 *	 the original token stream back without undoing anything.  Since
52 *	 the instructions are queued up, you can easily simulate transactions and
53 *	 roll back any changes if there is an error just by removing instructions.
54 *	 For example,
55 *
56 *			TokenStreamRewriteEngine rewriteEngine =
57 *				new TokenStreamRewriteEngine(lexer);
58 *		  JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
59 *		  ...
60 *		  rewriteEngine.insertAfter("pass1", t, "foobar");}
61 *			rewriteEngine.insertAfter("pass2", u, "start");}
62 *			System.out.println(rewriteEngine.toString("pass1"));
63 *			System.out.println(rewriteEngine.toString("pass2"));
64 *
65 *	 You can also have multiple "instruction streams" and get multiple
66 *	 rewrites from a single pass over the input.	 Just name the instruction
67 *	 streams and use that name again when printing the buffer.	This could be
68 *	 useful for generating a C file and also its header file--all from the
69 *	 same buffer.
70 *
71 *	 If you don't use named rewrite streams, a "default" stream is used.
72 *
73 *	 Terence Parr, parrt@cs.usfca.edu
74 *	 University of San Francisco
75 *	 February 2004
76 */
77class TokenStreamRewriteEngine : public TokenStream
78{
79public:
80	typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81	static const char* DEFAULT_PROGRAM_NAME;
82#ifndef NO_STATIC_CONSTS
83	static const size_t MIN_TOKEN_INDEX;
84	static const int PROGRAM_INIT_SIZE;
85#else
86	enum {
87		MIN_TOKEN_INDEX = 0,
88		PROGRAM_INIT_SIZE = 100
89	};
90#endif
91
92	struct tokenToStream {
93		tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
94		template <typename T> void operator() ( const T& t ) {
95			out << t->getText();
96		}
97		ANTLR_USE_NAMESPACE(std)ostream& out;
98	};
99
100	class RewriteOperation {
101	protected:
102		RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
103		: index(idx), text(txt)
104		{
105		}
106	public:
107		virtual ~RewriteOperation()
108		{
109		}
110		/** Execute the rewrite operation by possibly adding to the buffer.
111		 *	 Return the index of the next token to operate on.
112		 */
113		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
114			return index;
115		}
116		virtual size_t getIndex() const {
117			return index;
118		}
119		virtual const char* type() const {
120			return "RewriteOperation";
121		}
122	protected:
123		size_t index;
124		ANTLR_USE_NAMESPACE(std)string text;
125	};
126
127	struct executeOperation {
128		ANTLR_USE_NAMESPACE(std)ostream& out;
129		executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
130		void operator () ( RewriteOperation* t ) {
131			t->execute(out);
132		}
133	};
134
135	/// list of rewrite operations
136	typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
137	/// map program name to <program counter,program> tuple
138	typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
139
140	class InsertBeforeOp : public RewriteOperation
141	{
142	public:
143		InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
144		: RewriteOperation(index, text)
145		{
146		}
147		virtual ~InsertBeforeOp() {}
148		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
149		{
150			out << text;
151			return index;
152		}
153		virtual const char* type() const {
154			return "InsertBeforeOp";
155		}
156	};
157
158	class ReplaceOp : public RewriteOperation
159	{
160	public:
161		ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
162		: RewriteOperation(from,text)
163		, lastIndex(to)
164		{
165		}
166		virtual ~ReplaceOp() {}
167		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
168			out << text;
169			return lastIndex+1;
170		}
171		virtual const char* type() const {
172			return "ReplaceOp";
173		}
174	protected:
175		size_t lastIndex;
176	};
177
178	class DeleteOp : public ReplaceOp {
179	public:
180		DeleteOp(size_t from, size_t to)
181		: ReplaceOp(from,to,"")
182		{
183		}
184		virtual const char* type() const {
185			return "DeleteOp";
186		}
187	};
188
189	TokenStreamRewriteEngine(TokenStream& upstream);
190
191	TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
192
193	RefToken nextToken( void );
194
195	void rollback(size_t instructionIndex) {
196		rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
197	}
198
199	/** Rollback the instruction stream for a program so that
200	 *	 the indicated instruction (via instructionIndex) is no
201	 *	 longer in the stream.	UNTESTED!
202	 */
203	void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
204					  size_t instructionIndex );
205
206	void deleteProgram() {
207		deleteProgram(DEFAULT_PROGRAM_NAME);
208	}
209
210	/** Reset the program so that no instructions exist */
211	void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
212		rollback(programName, MIN_TOKEN_INDEX);
213	}
214
215	void insertAfter( RefTokenWithIndex t,
216							const ANTLR_USE_NAMESPACE(std)string& text )
217	{
218		insertAfter(DEFAULT_PROGRAM_NAME, t, text);
219	}
220
221	void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
222		insertAfter(DEFAULT_PROGRAM_NAME, index, text);
223	}
224
225	void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
226							RefTokenWithIndex t,
227							const ANTLR_USE_NAMESPACE(std)string& text )
228	{
229		insertAfter(programName, t->getIndex(), text);
230	}
231
232	void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
233							size_t index,
234							const ANTLR_USE_NAMESPACE(std)string& text )
235	{
236		// to insert after, just insert before next index (even if past end)
237		insertBefore(programName,index+1, text);
238	}
239
240	void insertBefore( RefTokenWithIndex t,
241							 const ANTLR_USE_NAMESPACE(std)string& text )
242	{
243		// std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
244		insertBefore(DEFAULT_PROGRAM_NAME, t, text);
245	}
246
247	void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
248		insertBefore(DEFAULT_PROGRAM_NAME, index, text);
249	}
250
251	void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
252							 RefTokenWithIndex t,
253							 const ANTLR_USE_NAMESPACE(std)string& text )
254	{
255		insertBefore(programName, t->getIndex(), text);
256	}
257
258	void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
259							 size_t index,
260							 const ANTLR_USE_NAMESPACE(std)string& text )
261	{
262		addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
263	}
264
265	void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
266	{
267		replace(DEFAULT_PROGRAM_NAME, index, index, text);
268	}
269
270	void replace( size_t from, size_t to,
271					  const ANTLR_USE_NAMESPACE(std)string& text)
272	{
273		replace(DEFAULT_PROGRAM_NAME, from, to, text);
274	}
275
276	void replace( RefTokenWithIndex indexT,
277					  const ANTLR_USE_NAMESPACE(std)string& text )
278	{
279		replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
280	}
281
282	void replace( RefTokenWithIndex from,
283					  RefTokenWithIndex to,
284					  const ANTLR_USE_NAMESPACE(std)string& text )
285	{
286		replace(DEFAULT_PROGRAM_NAME, from, to, text);
287	}
288
289	void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
290					 size_t from, size_t to,
291					 const ANTLR_USE_NAMESPACE(std)string& text )
292	{
293		addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
294	}
295
296	void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
297					  RefTokenWithIndex from,
298					  RefTokenWithIndex to,
299					  const ANTLR_USE_NAMESPACE(std)string& text )
300	{
301		replace(programName,
302				  from->getIndex(),
303				  to->getIndex(),
304				  text);
305	}
306
307	void remove(size_t index) {
308		remove(DEFAULT_PROGRAM_NAME, index, index);
309	}
310
311	void remove(size_t from, size_t to) {
312		remove(DEFAULT_PROGRAM_NAME, from, to);
313	}
314
315	void remove(RefTokenWithIndex indexT) {
316		remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
317	}
318
319	void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
320		remove(DEFAULT_PROGRAM_NAME, from, to);
321	}
322
323	void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324					 size_t from, size_t to)
325	{
326		replace(programName,from,to,"");
327	}
328
329	void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
330					 RefTokenWithIndex from, RefTokenWithIndex to )
331	{
332		replace(programName,from,to,"");
333	}
334
335	void discard(int ttype) {
336		discardMask.add(ttype);
337	}
338
339	RefToken getToken( size_t i )
340	{
341		return RefToken(tokens.at(i));
342	}
343
344	size_t getTokenStreamSize() const {
345		return tokens.size();
346	}
347
348	void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
349		ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
350	}
351
352	void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
353								  size_t start, size_t end ) const;
354
355	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
356		toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
357	}
358
359	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360						const ANTLR_USE_NAMESPACE(std)string& programName ) const
361	{
362		toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
363	}
364
365	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366						size_t start, size_t end ) const
367	{
368		toStream(out, DEFAULT_PROGRAM_NAME, start, end);
369	}
370
371	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
372						const ANTLR_USE_NAMESPACE(std)string& programName,
373						size_t firstToken, size_t lastToken ) const;
374
375	void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
376		toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
377	}
378
379	void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
380							  size_t start, size_t end ) const;
381
382	size_t getLastRewriteTokenIndex() const {
383		return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
384	}
385
386	/** Return the last index for the program named programName
387	 * return 0 if the program does not exist or the program is empty.
388	 * (Note this is different from the java implementation that returns -1)
389	 */
390	size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
391		program_map::const_iterator rewrites = programs.find(programName);
392
393		if( rewrites == programs.end() )
394			return 0;
395
396		const operation_list& prog = rewrites->second;
397		if( !prog.empty() )
398		{
399			operation_list::const_iterator last = prog.end();
400			--last;
401			return (*last)->getIndex();
402		}
403		return 0;
404	}
405
406protected:
407	/** If op.index > lastRewriteTokenIndexes, just add to the end.
408	 *	 Otherwise, do linear */
409	void addToSortedRewriteList(RewriteOperation* op) {
410		addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
411	}
412
413	void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
414										  RewriteOperation* op );
415
416protected:
417	/** Who do we suck tokens from? */
418	TokenStream& stream;
419	/** track index of tokens */
420	size_t index;
421
422	/** Track the incoming list of tokens */
423	token_list tokens;
424
425	/** You may have multiple, named streams of rewrite operations.
426	 *  I'm calling these things "programs."
427	 *  Maps String (name) -> rewrite (List)
428	 */
429	program_map programs;
430
431	/** Which (whitespace) token(s) to throw out */
432	BitSet discardMask;
433};
434
435#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
436}
437#endif
438
439#endif
440