1#ifndef INC_TokenStreamRewriteEngine_hpp__ 2#define INC_TokenStreamRewriteEngine_hpp__ 3 4/* ANTLR Translator Generator 5 * Project led by Terence Parr at http://www.jGuru.com 6 * Software rights: http://www.antlr.org/license.html 7 */ 8 9#include <string> 10#include <list> 11#include <vector> 12#include <map> 13#include <utility> 14#include <ostream> 15#include <iterator> 16#include <cassert> 17#include <algorithm> 18 19#include <antlr/config.hpp> 20 21#include <antlr/TokenStream.hpp> 22#include <antlr/TokenWithIndex.hpp> 23#include <antlr/BitSet.hpp> 24 25#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE 26namespace antlr { 27#endif 28 29/** This token stream tracks the *entire* token stream coming from 30 * a lexer, but does not pass on the whitespace (or whatever else 31 * you want to discard) to the parser. 32 * 33 * This class can then be asked for the ith token in the input stream. 34 * Useful for dumping out the input stream exactly after doing some 35 * augmentation or other manipulations. Tokens are index from 0..n-1 36 * 37 * You can insert stuff, replace, and delete chunks. Note that the 38 * operations are done lazily--only if you convert the buffer to a 39 * String. This is very efficient because you are not moving data around 40 * all the time. As the buffer of tokens is converted to strings, the 41 * toString() method(s) check to see if there is an operation at the 42 * current index. If so, the operation is done and then normal String 43 * rendering continues on the buffer. This is like having multiple Turing 44 * machine instruction streams (programs) operating on a single input tape. :) 45 * 46 * Since the operations are done lazily at toString-time, operations do not 47 * screw up the token index values. That is, an insert operation at token 48 * index i does not change the index values for tokens i+1..n-1. 49 * 50 * Because operations never actually alter the buffer, you may always get 51 * the original token stream back without undoing anything. Since 52 * the instructions are queued up, you can easily simulate transactions and 53 * roll back any changes if there is an error just by removing instructions. 54 * For example, 55 * 56 * TokenStreamRewriteEngine rewriteEngine = 57 * new TokenStreamRewriteEngine(lexer); 58 * JavaRecognizer parser = new JavaRecognizer(rewriteEngine); 59 * ... 60 * rewriteEngine.insertAfter("pass1", t, "foobar");} 61 * rewriteEngine.insertAfter("pass2", u, "start");} 62 * System.out.println(rewriteEngine.toString("pass1")); 63 * System.out.println(rewriteEngine.toString("pass2")); 64 * 65 * You can also have multiple "instruction streams" and get multiple 66 * rewrites from a single pass over the input. Just name the instruction 67 * streams and use that name again when printing the buffer. This could be 68 * useful for generating a C file and also its header file--all from the 69 * same buffer. 70 * 71 * If you don't use named rewrite streams, a "default" stream is used. 72 * 73 * Terence Parr, parrt@cs.usfca.edu 74 * University of San Francisco 75 * February 2004 76 */ 77class TokenStreamRewriteEngine : public TokenStream 78{ 79public: 80 typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list; 81 static const char* DEFAULT_PROGRAM_NAME; 82#ifndef NO_STATIC_CONSTS 83 static const size_t MIN_TOKEN_INDEX; 84 static const int PROGRAM_INIT_SIZE; 85#else 86 enum { 87 MIN_TOKEN_INDEX = 0, 88 PROGRAM_INIT_SIZE = 100 89 }; 90#endif 91 92 struct tokenToStream { 93 tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} 94 template <typename T> void operator() ( const T& t ) { 95 out << t->getText(); 96 } 97 ANTLR_USE_NAMESPACE(std)ostream& out; 98 }; 99 100 class RewriteOperation { 101 protected: 102 RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt ) 103 : index(idx), text(txt) 104 { 105 } 106 public: 107 virtual ~RewriteOperation() 108 { 109 } 110 /** Execute the rewrite operation by possibly adding to the buffer. 111 * Return the index of the next token to operate on. 112 */ 113 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) { 114 return index; 115 } 116 virtual size_t getIndex() const { 117 return index; 118 } 119 virtual const char* type() const { 120 return "RewriteOperation"; 121 } 122 protected: 123 size_t index; 124 ANTLR_USE_NAMESPACE(std)string text; 125 }; 126 127 struct executeOperation { 128 ANTLR_USE_NAMESPACE(std)ostream& out; 129 executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {} 130 void operator () ( RewriteOperation* t ) { 131 t->execute(out); 132 } 133 }; 134 135 /// list of rewrite operations 136 typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list; 137 /// map program name to <program counter,program> tuple 138 typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map; 139 140 class InsertBeforeOp : public RewriteOperation 141 { 142 public: 143 InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text ) 144 : RewriteOperation(index, text) 145 { 146 } 147 virtual ~InsertBeforeOp() {} 148 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) 149 { 150 out << text; 151 return index; 152 } 153 virtual const char* type() const { 154 return "InsertBeforeOp"; 155 } 156 }; 157 158 class ReplaceOp : public RewriteOperation 159 { 160 public: 161 ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text) 162 : RewriteOperation(from,text) 163 , lastIndex(to) 164 { 165 } 166 virtual ~ReplaceOp() {} 167 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) { 168 out << text; 169 return lastIndex+1; 170 } 171 virtual const char* type() const { 172 return "ReplaceOp"; 173 } 174 protected: 175 size_t lastIndex; 176 }; 177 178 class DeleteOp : public ReplaceOp { 179 public: 180 DeleteOp(size_t from, size_t to) 181 : ReplaceOp(from,to,"") 182 { 183 } 184 virtual const char* type() const { 185 return "DeleteOp"; 186 } 187 }; 188 189 TokenStreamRewriteEngine(TokenStream& upstream); 190 191 TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize); 192 193 RefToken nextToken( void ); 194 195 void rollback(size_t instructionIndex) { 196 rollback(DEFAULT_PROGRAM_NAME, instructionIndex); 197 } 198 199 /** Rollback the instruction stream for a program so that 200 * the indicated instruction (via instructionIndex) is no 201 * longer in the stream. UNTESTED! 202 */ 203 void rollback(const ANTLR_USE_NAMESPACE(std)string& programName, 204 size_t instructionIndex ); 205 206 void deleteProgram() { 207 deleteProgram(DEFAULT_PROGRAM_NAME); 208 } 209 210 /** Reset the program so that no instructions exist */ 211 void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) { 212 rollback(programName, MIN_TOKEN_INDEX); 213 } 214 215 void insertAfter( RefTokenWithIndex t, 216 const ANTLR_USE_NAMESPACE(std)string& text ) 217 { 218 insertAfter(DEFAULT_PROGRAM_NAME, t, text); 219 } 220 221 void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { 222 insertAfter(DEFAULT_PROGRAM_NAME, index, text); 223 } 224 225 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, 226 RefTokenWithIndex t, 227 const ANTLR_USE_NAMESPACE(std)string& text ) 228 { 229 insertAfter(programName, t->getIndex(), text); 230 } 231 232 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, 233 size_t index, 234 const ANTLR_USE_NAMESPACE(std)string& text ) 235 { 236 // to insert after, just insert before next index (even if past end) 237 insertBefore(programName,index+1, text); 238 } 239 240 void insertBefore( RefTokenWithIndex t, 241 const ANTLR_USE_NAMESPACE(std)string& text ) 242 { 243 // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl; 244 insertBefore(DEFAULT_PROGRAM_NAME, t, text); 245 } 246 247 void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { 248 insertBefore(DEFAULT_PROGRAM_NAME, index, text); 249 } 250 251 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, 252 RefTokenWithIndex t, 253 const ANTLR_USE_NAMESPACE(std)string& text ) 254 { 255 insertBefore(programName, t->getIndex(), text); 256 } 257 258 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, 259 size_t index, 260 const ANTLR_USE_NAMESPACE(std)string& text ) 261 { 262 addToSortedRewriteList(programName, new InsertBeforeOp(index,text)); 263 } 264 265 void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) 266 { 267 replace(DEFAULT_PROGRAM_NAME, index, index, text); 268 } 269 270 void replace( size_t from, size_t to, 271 const ANTLR_USE_NAMESPACE(std)string& text) 272 { 273 replace(DEFAULT_PROGRAM_NAME, from, to, text); 274 } 275 276 void replace( RefTokenWithIndex indexT, 277 const ANTLR_USE_NAMESPACE(std)string& text ) 278 { 279 replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text); 280 } 281 282 void replace( RefTokenWithIndex from, 283 RefTokenWithIndex to, 284 const ANTLR_USE_NAMESPACE(std)string& text ) 285 { 286 replace(DEFAULT_PROGRAM_NAME, from, to, text); 287 } 288 289 void replace(const ANTLR_USE_NAMESPACE(std)string& programName, 290 size_t from, size_t to, 291 const ANTLR_USE_NAMESPACE(std)string& text ) 292 { 293 addToSortedRewriteList(programName,new ReplaceOp(from, to, text)); 294 } 295 296 void replace( const ANTLR_USE_NAMESPACE(std)string& programName, 297 RefTokenWithIndex from, 298 RefTokenWithIndex to, 299 const ANTLR_USE_NAMESPACE(std)string& text ) 300 { 301 replace(programName, 302 from->getIndex(), 303 to->getIndex(), 304 text); 305 } 306 307 void remove(size_t index) { 308 remove(DEFAULT_PROGRAM_NAME, index, index); 309 } 310 311 void remove(size_t from, size_t to) { 312 remove(DEFAULT_PROGRAM_NAME, from, to); 313 } 314 315 void remove(RefTokenWithIndex indexT) { 316 remove(DEFAULT_PROGRAM_NAME, indexT, indexT); 317 } 318 319 void remove(RefTokenWithIndex from, RefTokenWithIndex to) { 320 remove(DEFAULT_PROGRAM_NAME, from, to); 321 } 322 323 void remove( const ANTLR_USE_NAMESPACE(std)string& programName, 324 size_t from, size_t to) 325 { 326 replace(programName,from,to,""); 327 } 328 329 void remove( const ANTLR_USE_NAMESPACE(std)string& programName, 330 RefTokenWithIndex from, RefTokenWithIndex to ) 331 { 332 replace(programName,from,to,""); 333 } 334 335 void discard(int ttype) { 336 discardMask.add(ttype); 337 } 338 339 RefToken getToken( size_t i ) 340 { 341 return RefToken(tokens.at(i)); 342 } 343 344 size_t getTokenStreamSize() const { 345 return tokens.size(); 346 } 347 348 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { 349 ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) ); 350 } 351 352 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out, 353 size_t start, size_t end ) const; 354 355 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { 356 toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); 357 } 358 359 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, 360 const ANTLR_USE_NAMESPACE(std)string& programName ) const 361 { 362 toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); 363 } 364 365 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, 366 size_t start, size_t end ) const 367 { 368 toStream(out, DEFAULT_PROGRAM_NAME, start, end); 369 } 370 371 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, 372 const ANTLR_USE_NAMESPACE(std)string& programName, 373 size_t firstToken, size_t lastToken ) const; 374 375 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { 376 toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); 377 } 378 379 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out, 380 size_t start, size_t end ) const; 381 382 size_t getLastRewriteTokenIndex() const { 383 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 384 } 385 386 /** Return the last index for the program named programName 387 * return 0 if the program does not exist or the program is empty. 388 * (Note this is different from the java implementation that returns -1) 389 */ 390 size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const { 391 program_map::const_iterator rewrites = programs.find(programName); 392 393 if( rewrites == programs.end() ) 394 return 0; 395 396 const operation_list& prog = rewrites->second; 397 if( !prog.empty() ) 398 { 399 operation_list::const_iterator last = prog.end(); 400 --last; 401 return (*last)->getIndex(); 402 } 403 return 0; 404 } 405 406protected: 407 /** If op.index > lastRewriteTokenIndexes, just add to the end. 408 * Otherwise, do linear */ 409 void addToSortedRewriteList(RewriteOperation* op) { 410 addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); 411 } 412 413 void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName, 414 RewriteOperation* op ); 415 416protected: 417 /** Who do we suck tokens from? */ 418 TokenStream& stream; 419 /** track index of tokens */ 420 size_t index; 421 422 /** Track the incoming list of tokens */ 423 token_list tokens; 424 425 /** You may have multiple, named streams of rewrite operations. 426 * I'm calling these things "programs." 427 * Maps String (name) -> rewrite (List) 428 */ 429 program_map programs; 430 431 /** Which (whitespace) token(s) to throw out */ 432 BitSet discardMask; 433}; 434 435#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE 436} 437#endif 438 439#endif 440