1/** \file 2 * 3 * Base implementation of an antlr 3 lexer. 4 * 5 * An ANTLR3 lexer implements a base recongizer, a token source and 6 * a lexer interface. It constructs a base recognizer with default 7 * functions, then overrides any of these that are parser specific (usual 8 * default implementation of base recognizer. 9 */ 10 11// [The "BSD licence"] 12// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 13// http://www.temporal-wave.com 14// http://www.linkedin.com/in/jimidle 15// 16// All rights reserved. 17// 18// Redistribution and use in source and binary forms, with or without 19// modification, are permitted provided that the following conditions 20// are met: 21// 1. Redistributions of source code must retain the above copyright 22// notice, this list of conditions and the following disclaimer. 23// 2. Redistributions in binary form must reproduce the above copyright 24// notice, this list of conditions and the following disclaimer in the 25// documentation and/or other materials provided with the distribution. 26// 3. The name of the author may not be used to endorse or promote products 27// derived from this software without specific prior written permission. 28// 29// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 30// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 31// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 32// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 33// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 34// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 38// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 40#include <antlr3lexer.h> 41 42static void mTokens (pANTLR3_LEXER lexer); 43static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 44static void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 45static void popCharStream (pANTLR3_LEXER lexer); 46 47static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token); 48static pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer); 49static ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string); 50static ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c); 51static ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high); 52static void matchAny (pANTLR3_LEXER lexer); 53static void recover (pANTLR3_LEXER lexer); 54static ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer); 55static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer); 56static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer); 57static pANTLR3_STRING getText (pANTLR3_LEXER lexer); 58static pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource); 59 60static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames); 61static void reportError (pANTLR3_BASE_RECOGNIZER rec); 62static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); 63static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 64 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); 65 66static void reset (pANTLR3_BASE_RECOGNIZER rec); 67 68static void freeLexer (pANTLR3_LEXER lexer); 69 70 71ANTLR3_API pANTLR3_LEXER 72antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) 73{ 74 pANTLR3_LEXER lexer; 75 pANTLR3_COMMON_TOKEN specialT; 76 77 /* Allocate memory 78 */ 79 lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER)); 80 81 if (lexer == NULL) 82 { 83 return NULL; 84 } 85 86 /* Now we need to create the base recognizer 87 */ 88 lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state); 89 90 if (lexer->rec == NULL) 91 { 92 lexer->free(lexer); 93 return NULL; 94 } 95 lexer->rec->super = lexer; 96 97 lexer->rec->displayRecognitionError = displayRecognitionError; 98 lexer->rec->reportError = reportError; 99 lexer->rec->reset = reset; 100 lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol; 101 lexer->rec->getMissingSymbol = getMissingSymbol; 102 103 /* Now install the token source interface 104 */ 105 if (lexer->rec->state->tokSource == NULL) 106 { 107 lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_SOURCE)); 108 109 if (lexer->rec->state->tokSource == NULL) 110 { 111 lexer->rec->free(lexer->rec); 112 lexer->free(lexer); 113 114 return NULL; 115 } 116 lexer->rec->state->tokSource->super = lexer; 117 118 /* Install the default nextToken() method, which may be overridden 119 * by generated code, or by anything else in fact. 120 */ 121 lexer->rec->state->tokSource->nextToken = nextToken; 122 lexer->rec->state->tokSource->strFactory = NULL; 123 124 lexer->rec->state->tokFactory = NULL; 125 } 126 127 /* Install the lexer API 128 */ 129 lexer->setCharStream = setCharStream; 130 lexer->mTokens = (void (*)(void *))(mTokens); 131 lexer->setCharStream = setCharStream; 132 lexer->pushCharStream = pushCharStream; 133 lexer->popCharStream = popCharStream; 134 lexer->emit = emit; 135 lexer->emitNew = emitNew; 136 lexer->matchs = matchs; 137 lexer->matchc = matchc; 138 lexer->matchRange = matchRange; 139 lexer->matchAny = matchAny; 140 lexer->recover = recover; 141 lexer->getLine = getLine; 142 lexer->getCharIndex = getCharIndex; 143 lexer->getCharPositionInLine = getCharPositionInLine; 144 lexer->getText = getText; 145 lexer->free = freeLexer; 146 147 /* Initialise the eof token 148 */ 149 specialT = &(lexer->rec->state->tokSource->eofToken); 150 antlr3SetTokenAPI (specialT); 151 specialT->setType (specialT, ANTLR3_TOKEN_EOF); 152 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 153 specialT->strFactory = NULL; 154 155 // Initialize the skip token. 156 // 157 specialT = &(lexer->rec->state->tokSource->skipToken); 158 antlr3SetTokenAPI (specialT); 159 specialT->setType (specialT, ANTLR3_TOKEN_INVALID); 160 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 161 specialT->strFactory = NULL; 162 return lexer; 163} 164 165static void 166reset (pANTLR3_BASE_RECOGNIZER rec) 167{ 168 pANTLR3_LEXER lexer; 169 170 lexer = rec->super; 171 172 lexer->rec->state->token = NULL; 173 lexer->rec->state->type = ANTLR3_TOKEN_INVALID; 174 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 175 lexer->rec->state->tokenStartCharIndex = -1; 176 lexer->rec->state->tokenStartCharPositionInLine = -1; 177 lexer->rec->state->tokenStartLine = -1; 178 179 lexer->rec->state->text = NULL; 180 181 if (lexer->input != NULL) 182 { 183 lexer->input->istream->seek(lexer->input->istream, 0); 184 } 185} 186 187/// 188/// \brief 189/// Returns the next available token from the current input stream. 190/// 191/// \param toksource 192/// Points to the implementation of a token source. The lexer is 193/// addressed by the super structure pointer. 194/// 195/// \returns 196/// The next token in the current input stream or the EOF token 197/// if there are no more tokens. 198/// 199/// \remarks 200/// Write remarks for nextToken here. 201/// 202/// \see nextToken 203/// 204ANTLR3_INLINE static pANTLR3_COMMON_TOKEN 205nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) 206{ 207 pANTLR3_LEXER lexer; 208 209 lexer = (pANTLR3_LEXER)(toksource->super); 210 211 /// Loop until we get a non skipped token or EOF 212 /// 213 for (;;) 214 { 215 // Get rid of any previous token (token factory takes care of 216 // any de-allocation when this token is finally used up. 217 // 218 lexer->rec->state->token = NULL; 219 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception 220 lexer->rec->state->failed = ANTLR3_FALSE; 221 222 223 224 // Now call the matching rules and see if we can generate a new token 225 // 226 for (;;) 227 { 228 // Record the start of the token in our input stream. 229 // 230 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 231 lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream); 232 lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input); 233 lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input); 234 lexer->rec->state->text = NULL; 235 236 if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF) 237 { 238 // Reached the end of the current stream, nothing more to do if this is 239 // the last in the stack. 240 // 241 pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); 242 243 teof->setStartIndex (teof, lexer->getCharIndex(lexer)); 244 teof->setStopIndex (teof, lexer->getCharIndex(lexer)); 245 teof->setLine (teof, lexer->getLine(lexer)); 246 teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it 247 return teof; 248 } 249 250 lexer->rec->state->token = NULL; 251 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception 252 lexer->rec->state->failed = ANTLR3_FALSE; 253 254 // Call the generated lexer, see if it can get a new token together. 255 // 256 lexer->mTokens(lexer->ctx); 257 258 if (lexer->rec->state->error == ANTLR3_TRUE) 259 { 260 // Recognition exception, report it and try to recover. 261 // 262 lexer->rec->state->failed = ANTLR3_TRUE; 263 lexer->rec->reportError(lexer->rec); 264 lexer->recover(lexer); 265 } 266 else 267 { 268 if (lexer->rec->state->token == NULL) 269 { 270 // Emit the real token, which adds it in to the token stream basically 271 // 272 emit(lexer); 273 } 274 else if (lexer->rec->state->token == &(toksource->skipToken)) 275 { 276 // A real token could have been generated, but "Computer say's naaaaah" and it 277 // it is just something we need to skip altogether. 278 // 279 continue; 280 } 281 282 // Good token, not skipped, not EOF token 283 // 284 return lexer->rec->state->token; 285 } 286 } 287 } 288} 289 290/** 291 * \brief 292 * Default implementation of the nextToken() call for a lexer. 293 * 294 * \param toksource 295 * Points to the implementation of a token source. The lexer is 296 * addressed by the super structure pointer. 297 * 298 * \returns 299 * The next token in the current input stream or the EOF token 300 * if there are no more tokens in any input stream in the stack. 301 * 302 * Write detailed description for nextToken here. 303 * 304 * \remarks 305 * Write remarks for nextToken here. 306 * 307 * \see nextTokenStr 308 */ 309static pANTLR3_COMMON_TOKEN 310nextToken (pANTLR3_TOKEN_SOURCE toksource) 311{ 312 pANTLR3_COMMON_TOKEN tok; 313 314 // Find the next token in the current stream 315 // 316 tok = nextTokenStr(toksource); 317 318 // If we got to the EOF token then switch to the previous 319 // input stream if there were any and just return the 320 // EOF if there are none. We must check the next token 321 // in any outstanding input stream we pop into the active 322 // role to see if it was sitting at EOF after PUSHing the 323 // stream we just consumed, otherwise we will return EOF 324 // on the reinstalled input stream, when in actual fact 325 // there might be more input streams to POP before the 326 // real EOF of the whole logical inptu stream. Hence we 327 // use a while loop here until we find somethign in the stream 328 // that isn't EOF or we reach the actual end of the last input 329 // stream on the stack. 330 // 331 while (tok->type == ANTLR3_TOKEN_EOF) 332 { 333 pANTLR3_LEXER lexer; 334 335 lexer = (pANTLR3_LEXER)(toksource->super); 336 337 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 338 { 339 // We have another input stream in the stack so we 340 // need to revert to it, then resume the loop to check 341 // it wasn't sitting at EOF itself. 342 // 343 lexer->popCharStream(lexer); 344 tok = nextTokenStr(toksource); 345 } 346 else 347 { 348 // There were no more streams on the input stack 349 // so this EOF is the 'real' logical EOF for 350 // the input stream. So we just exit the loop and 351 // return the EOF we have found. 352 // 353 break; 354 } 355 356 } 357 358 // return whatever token we have, which may be EOF 359 // 360 return tok; 361} 362 363ANTLR3_API pANTLR3_LEXER 364antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state) 365{ 366 pANTLR3_LEXER lexer; 367 368 // Create a basic lexer first 369 // 370 lexer = antlr3LexerNew(sizeHint, state); 371 372 if (lexer != NULL) 373 { 374 // Install the input stream and reset the lexer 375 // 376 setCharStream(lexer, input); 377 } 378 379 return lexer; 380} 381 382static void mTokens (pANTLR3_LEXER lexer) 383{ 384 if (lexer) // Fool compiler, avoid pragmas 385 { 386 ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n"); 387 } 388} 389 390static void 391reportError (pANTLR3_BASE_RECOGNIZER rec) 392{ 393 // Indicate this recognizer had an error while processing. 394 // 395 rec->state->errorCount++; 396 397 rec->displayRecognitionError(rec, rec->state->tokenNames); 398} 399 400#ifdef ANTLR3_WINDOWS 401#pragma warning( disable : 4100 ) 402#endif 403 404/** Default lexer error handler (works for 8 bit streams only!!!) 405 */ 406static void 407displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) 408{ 409 pANTLR3_LEXER lexer; 410 pANTLR3_EXCEPTION ex; 411 pANTLR3_STRING ftext; 412 413 lexer = (pANTLR3_LEXER)(recognizer->super); 414 ex = lexer->rec->state->exception; 415 416 // See if there is a 'filename' we can use 417 // 418 if (ex->name == NULL) 419 { 420 ANTLR3_FPRINTF(stderr, "-unknown source-("); 421 } 422 else 423 { 424 ftext = ex->streamName->to8(ex->streamName); 425 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); 426 } 427 428 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); 429 ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ", 430 ex->type, 431 (pANTLR3_UINT8) (ex->message), 432 ex->charPositionInLine+1 433 ); 434 { 435 ANTLR3_INT32 width; 436 437 width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); 438 439 if (width >= 1) 440 { 441 if (isprint(ex->c)) 442 { 443 ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c); 444 } 445 else 446 { 447 ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c)); 448 } 449 ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index)); 450 } 451 else 452 { 453 ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"); 454 ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ", 455 (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine), 456 (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) 457 ); 458 width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 459 460 if (width >= 1) 461 { 462 ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 463 } 464 else 465 { 466 ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n"); 467 } 468 } 469 } 470} 471 472static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 473{ 474 /* Install the input interface 475 */ 476 lexer->input = input; 477 478 /* We may need a token factory for the lexer; we don't destroy any existing factory 479 * until the lexer is destroyed, as people may still be using the tokens it produced. 480 * TODO: Later I will provide a dup() method for a token so that it can extract itself 481 * out of the factory. 482 */ 483 if (lexer->rec->state->tokFactory == NULL) 484 { 485 lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input); 486 } 487 else 488 { 489 /* When the input stream is being changed on the fly, rather than 490 * at the start of a new lexer, then we must tell the tokenFactory 491 * which input stream to adorn the tokens with so that when they 492 * are asked to provide their original input strings they can 493 * do so from the correct text stream. 494 */ 495 lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input); 496 } 497 498 /* Propagate the string factory so that we preserve the encoding form from 499 * the input stream. 500 */ 501 if (lexer->rec->state->tokSource->strFactory == NULL) 502 { 503 lexer->rec->state->tokSource->strFactory = input->strFactory; 504 505 // Set the newly acquired string factory up for our pre-made tokens 506 // for EOF. 507 // 508 if (lexer->rec->state->tokSource->eofToken.strFactory == NULL) 509 { 510 lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory; 511 } 512 } 513 514 /* This is a lexer, install the appropriate exception creator 515 */ 516 lexer->rec->exConstruct = antlr3RecognitionExceptionNew; 517 518 /* Set the current token to nothing 519 */ 520 lexer->rec->state->token = NULL; 521 lexer->rec->state->text = NULL; 522 lexer->rec->state->tokenStartCharIndex = -1; 523 524 /* Copy the name of the char stream to the token source 525 */ 526 lexer->rec->state->tokSource->fileName = input->fileName; 527} 528 529/*! 530 * \brief 531 * Change to a new input stream, remembering the old one. 532 * 533 * \param lexer 534 * Pointer to the lexer instance to switch input streams for. 535 * 536 * \param input 537 * New input stream to install as the current one. 538 * 539 * Switches the current character input stream to 540 * a new one, saving the old one, which we will revert to at the end of this 541 * new one. 542 */ 543static void 544pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 545{ 546 // Do we need a new input stream stack? 547 // 548 if (lexer->rec->state->streams == NULL) 549 { 550 // This is the first call to stack a new 551 // stream and so we must create the stack first. 552 // 553 lexer->rec->state->streams = antlr3StackNew(0); 554 555 if (lexer->rec->state->streams == NULL) 556 { 557 // Could not do this, we just fail to push it. 558 // TODO: Consider if this is what we want to do, but then 559 // any programmer can override this method to do something else. 560 return; 561 } 562 } 563 564 // We have a stack, so we can save the current input stream 565 // into it. 566 // 567 lexer->input->istream->mark(lexer->input->istream); 568 lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL); 569 570 // And now we can install this new one 571 // 572 lexer->setCharStream(lexer, input); 573} 574 575/*! 576 * \brief 577 * Stops using the current input stream and reverts to any prior 578 * input stream on the stack. 579 * 580 * \param lexer 581 * Description of parameter lexer. 582 * 583 * Pointer to a function that abandons the current input stream, whether it 584 * is empty or not and reverts to the previous stacked input stream. 585 * 586 * \remark 587 * The function fails silently if there are no prior input streams. 588 */ 589static void 590popCharStream (pANTLR3_LEXER lexer) 591{ 592 pANTLR3_INPUT_STREAM input; 593 594 // If we do not have a stream stack or we are already at the 595 // stack bottom, then do nothing. 596 // 597 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 598 { 599 // We just leave the current stream to its fate, we do not close 600 // it or anything as we do not know what the programmer intended 601 // for it. This method can always be overridden of course. 602 // So just find out what was currently saved on the stack and use 603 // that now, then pop it from the stack. 604 // 605 input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top); 606 lexer->rec->state->streams->pop(lexer->rec->state->streams); 607 608 // Now install the stream as the current one. 609 // 610 lexer->setCharStream(lexer, input); 611 lexer->input->istream->rewindLast(lexer->input->istream); 612 } 613 return; 614} 615 616static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token) 617{ 618 lexer->rec->state->token = token; /* Voila! */ 619} 620 621static pANTLR3_COMMON_TOKEN 622emit (pANTLR3_LEXER lexer) 623{ 624 pANTLR3_COMMON_TOKEN token; 625 626 /* We could check pointers to token factories and so on, but 627 * we are in code that we want to run as fast as possible 628 * so we are not checking any errors. So make sure you have installed an input stream before 629 * trying to emit a new token. 630 */ 631 token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); 632 633 /* Install the supplied information, and some other bits we already know 634 * get added automatically, such as the input stream it is associated with 635 * (though it can all be overridden of course) 636 */ 637 token->type = lexer->rec->state->type; 638 token->channel = lexer->rec->state->channel; 639 token->start = lexer->rec->state->tokenStartCharIndex; 640 token->stop = lexer->getCharIndex(lexer) - 1; 641 token->line = lexer->rec->state->tokenStartLine; 642 token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; 643 644 if (lexer->rec->state->text != NULL) 645 { 646 token->textState = ANTLR3_TEXT_STRING; 647 token->tokText.text = lexer->rec->state->text; 648 } 649 else 650 { 651 token->textState = ANTLR3_TEXT_NONE; 652 } 653 token->lineStart = lexer->input->currentLine; 654 token->user1 = lexer->rec->state->user1; 655 token->user2 = lexer->rec->state->user2; 656 token->user3 = lexer->rec->state->user3; 657 token->custom = lexer->rec->state->custom; 658 659 lexer->rec->state->token = token; 660 661 return token; 662} 663 664/** 665 * Free the resources allocated by a lexer 666 */ 667static void 668freeLexer (pANTLR3_LEXER lexer) 669{ 670 // This may have ben a delegate or delegator lexer, in which case the 671 // state may already have been freed (and set to NULL therefore) 672 // so we ignore the state if we don't have it. 673 // 674 if (lexer->rec->state != NULL) 675 { 676 if (lexer->rec->state->streams != NULL) 677 { 678 lexer->rec->state->streams->free(lexer->rec->state->streams); 679 } 680 if (lexer->rec->state->tokFactory != NULL) 681 { 682 lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory); 683 lexer->rec->state->tokFactory = NULL; 684 } 685 if (lexer->rec->state->tokSource != NULL) 686 { 687 ANTLR3_FREE(lexer->rec->state->tokSource); 688 lexer->rec->state->tokSource = NULL; 689 } 690 } 691 if (lexer->rec != NULL) 692 { 693 lexer->rec->free(lexer->rec); 694 lexer->rec = NULL; 695 } 696 ANTLR3_FREE(lexer); 697} 698 699/** Implementation of matchs for the lexer, overrides any 700 * base implementation in the base recognizer. 701 * 702 * \remark 703 * Note that the generated code lays down arrays of ints for constant 704 * strings so that they are int UTF32 form! 705 */ 706static ANTLR3_BOOLEAN 707matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string) 708{ 709 while (*string != ANTLR3_STRING_TERMINATOR) 710 { 711 if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string)) 712 { 713 if (lexer->rec->state->backtracking > 0) 714 { 715 lexer->rec->state->failed = ANTLR3_TRUE; 716 return ANTLR3_FALSE; 717 } 718 719 lexer->rec->exConstruct(lexer->rec); 720 lexer->rec->state->failed = ANTLR3_TRUE; 721 722 /* TODO: Implement exception creation more fully perhaps 723 */ 724 lexer->recover(lexer); 725 return ANTLR3_FALSE; 726 } 727 728 /* Matched correctly, do consume it 729 */ 730 lexer->input->istream->consume(lexer->input->istream); 731 string++; 732 733 /* Reset any failed indicator 734 */ 735 lexer->rec->state->failed = ANTLR3_FALSE; 736 } 737 738 739 return ANTLR3_TRUE; 740} 741 742/** Implementation of matchc for the lexer, overrides any 743 * base implementation in the base recognizer. 744 * 745 * \remark 746 * Note that the generated code lays down arrays of ints for constant 747 * strings so that they are int UTF32 form! 748 */ 749static ANTLR3_BOOLEAN 750matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c) 751{ 752 if (lexer->input->istream->_LA(lexer->input->istream, 1) == c) 753 { 754 /* Matched correctly, do consume it 755 */ 756 lexer->input->istream->consume(lexer->input->istream); 757 758 /* Reset any failed indicator 759 */ 760 lexer->rec->state->failed = ANTLR3_FALSE; 761 762 return ANTLR3_TRUE; 763 } 764 765 /* Failed to match, exception and recovery time. 766 */ 767 if (lexer->rec->state->backtracking > 0) 768 { 769 lexer->rec->state->failed = ANTLR3_TRUE; 770 return ANTLR3_FALSE; 771 } 772 773 lexer->rec->exConstruct(lexer->rec); 774 775 /* TODO: Implement exception creation more fully perhaps 776 */ 777 lexer->recover(lexer); 778 779 return ANTLR3_FALSE; 780} 781 782/** Implementation of match range for the lexer, overrides any 783 * base implementation in the base recognizer. 784 * 785 * \remark 786 * Note that the generated code lays down arrays of ints for constant 787 * strings so that they are int UTF32 form! 788 */ 789static ANTLR3_BOOLEAN 790matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high) 791{ 792 ANTLR3_UCHAR c; 793 794 /* What is in the stream at the moment? 795 */ 796 c = lexer->input->istream->_LA(lexer->input->istream, 1); 797 if ( c >= low && c <= high) 798 { 799 /* Matched correctly, consume it 800 */ 801 lexer->input->istream->consume(lexer->input->istream); 802 803 /* Reset any failed indicator 804 */ 805 lexer->rec->state->failed = ANTLR3_FALSE; 806 807 return ANTLR3_TRUE; 808 } 809 810 /* Failed to match, execption and recovery time. 811 */ 812 813 if (lexer->rec->state->backtracking > 0) 814 { 815 lexer->rec->state->failed = ANTLR3_TRUE; 816 return ANTLR3_FALSE; 817 } 818 819 lexer->rec->exConstruct(lexer->rec); 820 821 /* TODO: Implement exception creation more fully 822 */ 823 lexer->recover(lexer); 824 825 return ANTLR3_FALSE; 826} 827 828static void 829matchAny (pANTLR3_LEXER lexer) 830{ 831 lexer->input->istream->consume(lexer->input->istream); 832} 833 834static void 835recover (pANTLR3_LEXER lexer) 836{ 837 lexer->input->istream->consume(lexer->input->istream); 838} 839 840static ANTLR3_UINT32 841getLine (pANTLR3_LEXER lexer) 842{ 843 return lexer->input->getLine(lexer->input); 844} 845 846static ANTLR3_UINT32 847getCharPositionInLine (pANTLR3_LEXER lexer) 848{ 849 return lexer->input->getCharPositionInLine(lexer->input); 850} 851 852static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer) 853{ 854 return lexer->input->istream->index(lexer->input->istream); 855} 856 857static pANTLR3_STRING 858getText (pANTLR3_LEXER lexer) 859{ 860 if (lexer->rec->state->text) 861 { 862 return lexer->rec->state->text; 863 864 } 865 return lexer->input->substr( 866 lexer->input, 867 lexer->rec->state->tokenStartCharIndex, 868 lexer->getCharIndex(lexer) - lexer->input->charByteSize 869 ); 870 871} 872 873static void * 874getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) 875{ 876 return NULL; 877} 878 879static void * 880getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 881 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) 882{ 883 return NULL; 884} 885