1/** \file
2 *
3 * Base implementation of an antlr 3 lexer.
4 *
5 * An ANTLR3 lexer implements a base recongizer, a token source and
6 * a lexer interface. It constructs a base recognizer with default
7 * functions, then overrides any of these that are parser specific (usual
8 * default implementation of base recognizer.
9 */
10
11// [The "BSD licence"]
12// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13// http://www.temporal-wave.com
14// http://www.linkedin.com/in/jimidle
15//
16// All rights reserved.
17//
18// Redistribution and use in source and binary forms, with or without
19// modification, are permitted provided that the following conditions
20// are met:
21// 1. Redistributions of source code must retain the above copyright
22//    notice, this list of conditions and the following disclaimer.
23// 2. Redistributions in binary form must reproduce the above copyright
24//    notice, this list of conditions and the following disclaimer in the
25//    documentation and/or other materials provided with the distribution.
26// 3. The name of the author may not be used to endorse or promote products
27//    derived from this software without specific prior written permission.
28//
29// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40#include    <antlr3lexer.h>
41
42static void					mTokens						(pANTLR3_LEXER lexer);
43static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
44static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
45static void					popCharStream				(pANTLR3_LEXER lexer);
46
47static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
48static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
49static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52static void					matchAny					(pANTLR3_LEXER lexer);
53static void					recover						(pANTLR3_LEXER lexer);
54static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
55static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
56static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
57static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
58static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);
59
60static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
62static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
64															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65
66static void					reset						(pANTLR3_BASE_RECOGNIZER rec);
67
68static void					freeLexer					(pANTLR3_LEXER lexer);
69
70
71ANTLR3_API pANTLR3_LEXER
72antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73{
74    pANTLR3_LEXER   lexer;
75    pANTLR3_COMMON_TOKEN	specialT;
76
77	/* Allocate memory
78	*/
79	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80
81	if	(lexer == NULL)
82	{
83		return	NULL;
84	}
85
86	/* Now we need to create the base recognizer
87	*/
88	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89
90	if	(lexer->rec == NULL)
91	{
92		lexer->free(lexer);
93		return	NULL;
94	}
95	lexer->rec->super  =  lexer;
96
97	lexer->rec->displayRecognitionError	    = displayRecognitionError;
98	lexer->rec->reportError					= reportError;
99	lexer->rec->reset						= reset;
100	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
101	lexer->rec->getMissingSymbol			= getMissingSymbol;
102
103	/* Now install the token source interface
104	*/
105	if	(lexer->rec->state->tokSource == NULL)
106	{
107		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_SOURCE));
108
109		if	(lexer->rec->state->tokSource == NULL)
110		{
111			lexer->rec->free(lexer->rec);
112			lexer->free(lexer);
113
114			return	NULL;
115		}
116		lexer->rec->state->tokSource->super    =  lexer;
117
118		/* Install the default nextToken() method, which may be overridden
119		 * by generated code, or by anything else in fact.
120		 */
121		lexer->rec->state->tokSource->nextToken	    =  nextToken;
122		lexer->rec->state->tokSource->strFactory    = NULL;
123
124		lexer->rec->state->tokFactory				= NULL;
125	}
126
127    /* Install the lexer API
128     */
129    lexer->setCharStream			=  setCharStream;
130    lexer->mTokens					= (void (*)(void *))(mTokens);
131    lexer->setCharStream			=  setCharStream;
132    lexer->pushCharStream			=  pushCharStream;
133    lexer->popCharStream			=  popCharStream;
134    lexer->emit						=  emit;
135    lexer->emitNew					=  emitNew;
136    lexer->matchs					=  matchs;
137    lexer->matchc					=  matchc;
138    lexer->matchRange				=  matchRange;
139    lexer->matchAny					=  matchAny;
140    lexer->recover					=  recover;
141    lexer->getLine					=  getLine;
142    lexer->getCharIndex				=  getCharIndex;
143    lexer->getCharPositionInLine    =  getCharPositionInLine;
144    lexer->getText					=  getText;
145    lexer->free						=  freeLexer;
146
147    /* Initialise the eof token
148     */
149    specialT				= &(lexer->rec->state->tokSource->eofToken);
150    antlr3SetTokenAPI	  (specialT);
151    specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
152    specialT->factoryMade	= ANTLR3_TRUE;					// Prevent things trying to free() it
153    specialT->strFactory    = NULL;
154
155	// Initialize the skip token.
156	//
157    specialT				= &(lexer->rec->state->tokSource->skipToken);
158    antlr3SetTokenAPI	  (specialT);
159    specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
160    specialT->factoryMade	= ANTLR3_TRUE;					// Prevent things trying to free() it
161    specialT->strFactory    = NULL;
162    return  lexer;
163}
164
165static void
166reset	(pANTLR3_BASE_RECOGNIZER rec)
167{
168    pANTLR3_LEXER   lexer;
169
170    lexer   = rec->super;
171
172    lexer->rec->state->token			= NULL;
173    lexer->rec->state->type				= ANTLR3_TOKEN_INVALID;
174    lexer->rec->state->channel			= ANTLR3_TOKEN_DEFAULT_CHANNEL;
175    lexer->rec->state->tokenStartCharIndex		= -1;
176    lexer->rec->state->tokenStartCharPositionInLine = -1;
177    lexer->rec->state->tokenStartLine		= -1;
178
179    lexer->rec->state->text	    = NULL;
180
181    if (lexer->input != NULL)
182    {
183	lexer->input->istream->seek(lexer->input->istream, 0);
184    }
185}
186
187///
188/// \brief
189/// Returns the next available token from the current input stream.
190///
191/// \param toksource
192/// Points to the implementation of a token source. The lexer is
193/// addressed by the super structure pointer.
194///
195/// \returns
196/// The next token in the current input stream or the EOF token
197/// if there are no more tokens.
198///
199/// \remarks
200/// Write remarks for nextToken here.
201///
202/// \see nextToken
203///
204ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
205nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
206{
207	pANTLR3_LEXER   lexer;
208
209	lexer   = (pANTLR3_LEXER)(toksource->super);
210
211	/// Loop until we get a non skipped token or EOF
212	///
213	for	(;;)
214	{
215		// Get rid of any previous token (token factory takes care of
216		// any de-allocation when this token is finally used up.
217		//
218		lexer->rec->state->token		    = NULL;
219		lexer->rec->state->error		    = ANTLR3_FALSE;	    // Start out without an exception
220		lexer->rec->state->failed		    = ANTLR3_FALSE;
221
222
223
224		// Now call the matching rules and see if we can generate a new token
225		//
226		for	(;;)
227		{
228            // Record the start of the token in our input stream.
229            //
230            lexer->rec->state->channel						= ANTLR3_TOKEN_DEFAULT_CHANNEL;
231            lexer->rec->state->tokenStartCharIndex			= lexer->input->istream->index(lexer->input->istream);
232            lexer->rec->state->tokenStartCharPositionInLine	= lexer->input->getCharPositionInLine(lexer->input);
233        	lexer->rec->state->tokenStartLine				= lexer->input->getLine(lexer->input);
234            lexer->rec->state->text							= NULL;
235
236			if  (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
237			{
238				// Reached the end of the current stream, nothing more to do if this is
239				// the last in the stack.
240				//
241				pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);
242
243				teof->setStartIndex (teof, lexer->getCharIndex(lexer));
244				teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
245				teof->setLine	(teof, lexer->getLine(lexer));
246				teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
247				return  teof;
248			}
249
250			lexer->rec->state->token		= NULL;
251			lexer->rec->state->error		= ANTLR3_FALSE;	    // Start out without an exception
252			lexer->rec->state->failed		= ANTLR3_FALSE;
253
254			// Call the generated lexer, see if it can get a new token together.
255			//
256			lexer->mTokens(lexer->ctx);
257
258			if  (lexer->rec->state->error  == ANTLR3_TRUE)
259			{
260				// Recognition exception, report it and try to recover.
261				//
262				lexer->rec->state->failed	    = ANTLR3_TRUE;
263				lexer->rec->reportError(lexer->rec);
264				lexer->recover(lexer);
265			}
266			else
267			{
268				if (lexer->rec->state->token == NULL)
269				{
270					// Emit the real token, which adds it in to the token stream basically
271					//
272					emit(lexer);
273				}
274				else if	(lexer->rec->state->token ==  &(toksource->skipToken))
275				{
276					// A real token could have been generated, but "Computer say's naaaaah" and it
277					// it is just something we need to skip altogether.
278					//
279					continue;
280				}
281
282				// Good token, not skipped, not EOF token
283				//
284				return  lexer->rec->state->token;
285			}
286		}
287	}
288}
289
290/**
291 * \brief
292 * Default implementation of the nextToken() call for a lexer.
293 *
294 * \param toksource
295 * Points to the implementation of a token source. The lexer is
296 * addressed by the super structure pointer.
297 *
298 * \returns
299 * The next token in the current input stream or the EOF token
300 * if there are no more tokens in any input stream in the stack.
301 *
302 * Write detailed description for nextToken here.
303 *
304 * \remarks
305 * Write remarks for nextToken here.
306 *
307 * \see nextTokenStr
308 */
309static pANTLR3_COMMON_TOKEN
310nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
311{
312	pANTLR3_COMMON_TOKEN tok;
313
314	// Find the next token in the current stream
315	//
316	tok = nextTokenStr(toksource);
317
318	// If we got to the EOF token then switch to the previous
319	// input stream if there were any and just return the
320	// EOF if there are none. We must check the next token
321	// in any outstanding input stream we pop into the active
322	// role to see if it was sitting at EOF after PUSHing the
323	// stream we just consumed, otherwise we will return EOF
324	// on the reinstalled input stream, when in actual fact
325	// there might be more input streams to POP before the
326	// real EOF of the whole logical inptu stream. Hence we
327	// use a while loop here until we find somethign in the stream
328	// that isn't EOF or we reach the actual end of the last input
329	// stream on the stack.
330	//
331	while	(tok->type == ANTLR3_TOKEN_EOF)
332	{
333		pANTLR3_LEXER   lexer;
334
335		lexer   = (pANTLR3_LEXER)(toksource->super);
336
337		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
338		{
339			// We have another input stream in the stack so we
340			// need to revert to it, then resume the loop to check
341			// it wasn't sitting at EOF itself.
342			//
343			lexer->popCharStream(lexer);
344			tok = nextTokenStr(toksource);
345		}
346		else
347		{
348			// There were no more streams on the input stack
349			// so this EOF is the 'real' logical EOF for
350			// the input stream. So we just exit the loop and
351			// return the EOF we have found.
352			//
353			break;
354		}
355
356	}
357
358	// return whatever token we have, which may be EOF
359	//
360	return  tok;
361}
362
363ANTLR3_API pANTLR3_LEXER
364antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
365{
366    pANTLR3_LEXER   lexer;
367
368    // Create a basic lexer first
369    //
370    lexer   = antlr3LexerNew(sizeHint, state);
371
372    if	(lexer != NULL)
373    {
374		// Install the input stream and reset the lexer
375		//
376		setCharStream(lexer, input);
377    }
378
379    return  lexer;
380}
381
382static void mTokens	    (pANTLR3_LEXER lexer)
383{
384    if	(lexer)	    // Fool compiler, avoid pragmas
385    {
386		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
387    }
388}
389
390static void
391reportError		    (pANTLR3_BASE_RECOGNIZER rec)
392{
393    // Indicate this recognizer had an error while processing.
394	//
395	rec->state->errorCount++;
396
397    rec->displayRecognitionError(rec, rec->state->tokenNames);
398}
399
400#ifdef	ANTLR3_WINDOWS
401#pragma warning( disable : 4100 )
402#endif
403
404/** Default lexer error handler (works for 8 bit streams only!!!)
405 */
406static void
407displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
408{
409    pANTLR3_LEXER			lexer;
410	pANTLR3_EXCEPTION	    ex;
411	pANTLR3_STRING			ftext;
412
413    lexer   = (pANTLR3_LEXER)(recognizer->super);
414	ex		= lexer->rec->state->exception;
415
416	// See if there is a 'filename' we can use
417    //
418    if	(ex->name == NULL)
419    {
420		ANTLR3_FPRINTF(stderr, "-unknown source-(");
421    }
422    else
423    {
424		ftext = ex->streamName->to8(ex->streamName);
425		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
426    }
427
428    ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
429    ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
430						ex->type,
431						(pANTLR3_UINT8)	   (ex->message),
432					    ex->charPositionInLine+1
433		    );
434	{
435		ANTLR3_INT32	width;
436
437		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
438
439		if	(width >= 1)
440		{
441			if	(isprint(ex->c))
442			{
443				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
444			}
445			else
446			{
447				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
448			}
449			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
450		}
451		else
452		{
453			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
454			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
455								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
456								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
457								);
458			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
459
460			if	(width >= 1)
461			{
462				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
463			}
464			else
465			{
466				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
467			}
468		}
469	}
470}
471
472static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
473{
474    /* Install the input interface
475     */
476    lexer->input	= input;
477
478    /* We may need a token factory for the lexer; we don't destroy any existing factory
479     * until the lexer is destroyed, as people may still be using the tokens it produced.
480     * TODO: Later I will provide a dup() method for a token so that it can extract itself
481     * out of the factory.
482     */
483    if	(lexer->rec->state->tokFactory == NULL)
484    {
485	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
486    }
487    else
488    {
489	/* When the input stream is being changed on the fly, rather than
490	 * at the start of a new lexer, then we must tell the tokenFactory
491	 * which input stream to adorn the tokens with so that when they
492	 * are asked to provide their original input strings they can
493	 * do so from the correct text stream.
494	 */
495	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
496    }
497
498    /* Propagate the string factory so that we preserve the encoding form from
499     * the input stream.
500     */
501    if	(lexer->rec->state->tokSource->strFactory == NULL)
502    {
503        lexer->rec->state->tokSource->strFactory	= input->strFactory;
504
505        // Set the newly acquired string factory up for our pre-made tokens
506        // for EOF.
507        //
508        if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
509        {
510            lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
511        }
512    }
513
514    /* This is a lexer, install the appropriate exception creator
515     */
516    lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
517
518    /* Set the current token to nothing
519     */
520    lexer->rec->state->token		= NULL;
521    lexer->rec->state->text			= NULL;
522    lexer->rec->state->tokenStartCharIndex	= -1;
523
524    /* Copy the name of the char stream to the token source
525     */
526    lexer->rec->state->tokSource->fileName = input->fileName;
527}
528
529/*!
530 * \brief
531 * Change to a new input stream, remembering the old one.
532 *
533 * \param lexer
534 * Pointer to the lexer instance to switch input streams for.
535 *
536 * \param input
537 * New input stream to install as the current one.
538 *
539 * Switches the current character input stream to
540 * a new one, saving the old one, which we will revert to at the end of this
541 * new one.
542 */
543static void
544pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
545{
546	// Do we need a new input stream stack?
547	//
548	if	(lexer->rec->state->streams == NULL)
549	{
550		// This is the first call to stack a new
551		// stream and so we must create the stack first.
552		//
553		lexer->rec->state->streams = antlr3StackNew(0);
554
555		if  (lexer->rec->state->streams == NULL)
556		{
557			// Could not do this, we just fail to push it.
558			// TODO: Consider if this is what we want to do, but then
559			//       any programmer can override this method to do something else.
560			return;
561		}
562	}
563
564	// We have a stack, so we can save the current input stream
565	// into it.
566	//
567	lexer->input->istream->mark(lexer->input->istream);
568	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
569
570	// And now we can install this new one
571	//
572	lexer->setCharStream(lexer, input);
573}
574
575/*!
576 * \brief
577 * Stops using the current input stream and reverts to any prior
578 * input stream on the stack.
579 *
580 * \param lexer
581 * Description of parameter lexer.
582 *
583 * Pointer to a function that abandons the current input stream, whether it
584 * is empty or not and reverts to the previous stacked input stream.
585 *
586 * \remark
587 * The function fails silently if there are no prior input streams.
588 */
589static void
590popCharStream   (pANTLR3_LEXER lexer)
591{
592    pANTLR3_INPUT_STREAM input;
593
594    // If we do not have a stream stack or we are already at the
595    // stack bottom, then do nothing.
596    //
597    if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
598    {
599	// We just leave the current stream to its fate, we do not close
600	// it or anything as we do not know what the programmer intended
601	// for it. This method can always be overridden of course.
602	// So just find out what was currently saved on the stack and use
603	// that now, then pop it from the stack.
604	//
605	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
606	lexer->rec->state->streams->pop(lexer->rec->state->streams);
607
608	// Now install the stream as the current one.
609	//
610	lexer->setCharStream(lexer, input);
611	lexer->input->istream->rewindLast(lexer->input->istream);
612    }
613    return;
614}
615
616static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
617{
618    lexer->rec->state->token    = token;	/* Voila!   */
619}
620
621static pANTLR3_COMMON_TOKEN
622emit	    (pANTLR3_LEXER lexer)
623{
624    pANTLR3_COMMON_TOKEN	token;
625
626    /* We could check pointers to token factories and so on, but
627     * we are in code that we want to run as fast as possible
628     * so we are not checking any errors. So make sure you have installed an input stream before
629     * trying to emit a new token.
630     */
631    token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
632
633    /* Install the supplied information, and some other bits we already know
634     * get added automatically, such as the input stream it is associated with
635     * (though it can all be overridden of course)
636     */
637    token->type		    = lexer->rec->state->type;
638    token->channel	    = lexer->rec->state->channel;
639    token->start	    = lexer->rec->state->tokenStartCharIndex;
640    token->stop		    = lexer->getCharIndex(lexer) - 1;
641    token->line		    = lexer->rec->state->tokenStartLine;
642    token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;
643
644	if	(lexer->rec->state->text != NULL)
645	{
646		token->textState		= ANTLR3_TEXT_STRING;
647		token->tokText.text	    = lexer->rec->state->text;
648	}
649	else
650	{
651		token->textState	= ANTLR3_TEXT_NONE;
652	}
653    token->lineStart	= lexer->input->currentLine;
654	token->user1		= lexer->rec->state->user1;
655	token->user2		= lexer->rec->state->user2;
656	token->user3		= lexer->rec->state->user3;
657	token->custom		= lexer->rec->state->custom;
658
659    lexer->rec->state->token	    = token;
660
661    return  token;
662}
663
664/**
665 * Free the resources allocated by a lexer
666 */
667static void
668freeLexer    (pANTLR3_LEXER lexer)
669{
670	// This may have ben a delegate or delegator lexer, in which case the
671	// state may already have been freed (and set to NULL therefore)
672	// so we ignore the state if we don't have it.
673	//
674	if	(lexer->rec->state != NULL)
675	{
676		if	(lexer->rec->state->streams != NULL)
677		{
678			lexer->rec->state->streams->free(lexer->rec->state->streams);
679		}
680		if	(lexer->rec->state->tokFactory != NULL)
681		{
682			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
683			lexer->rec->state->tokFactory = NULL;
684		}
685		if	(lexer->rec->state->tokSource != NULL)
686		{
687			ANTLR3_FREE(lexer->rec->state->tokSource);
688			lexer->rec->state->tokSource = NULL;
689		}
690	}
691	if	(lexer->rec != NULL)
692	{
693		lexer->rec->free(lexer->rec);
694		lexer->rec = NULL;
695	}
696	ANTLR3_FREE(lexer);
697}
698
699/** Implementation of matchs for the lexer, overrides any
700 *  base implementation in the base recognizer.
701 *
702 *  \remark
703 *  Note that the generated code lays down arrays of ints for constant
704 *  strings so that they are int UTF32 form!
705 */
706static ANTLR3_BOOLEAN
707matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
708{
709	while   (*string != ANTLR3_STRING_TERMINATOR)
710	{
711		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
712		{
713			if	(lexer->rec->state->backtracking > 0)
714			{
715				lexer->rec->state->failed = ANTLR3_TRUE;
716				return ANTLR3_FALSE;
717			}
718
719			lexer->rec->exConstruct(lexer->rec);
720			lexer->rec->state->failed	 = ANTLR3_TRUE;
721
722			/* TODO: Implement exception creation more fully perhaps
723			 */
724			lexer->recover(lexer);
725			return  ANTLR3_FALSE;
726		}
727
728		/* Matched correctly, do consume it
729		 */
730		lexer->input->istream->consume(lexer->input->istream);
731		string++;
732
733		/* Reset any failed indicator
734		 */
735		lexer->rec->state->failed = ANTLR3_FALSE;
736	}
737
738
739	return  ANTLR3_TRUE;
740}
741
742/** Implementation of matchc for the lexer, overrides any
743 *  base implementation in the base recognizer.
744 *
745 *  \remark
746 *  Note that the generated code lays down arrays of ints for constant
747 *  strings so that they are int UTF32 form!
748 */
749static ANTLR3_BOOLEAN
750matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
751{
752	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
753	{
754		/* Matched correctly, do consume it
755		 */
756		lexer->input->istream->consume(lexer->input->istream);
757
758		/* Reset any failed indicator
759		 */
760		lexer->rec->state->failed = ANTLR3_FALSE;
761
762		return	ANTLR3_TRUE;
763	}
764
765	/* Failed to match, exception and recovery time.
766	 */
767	if	(lexer->rec->state->backtracking > 0)
768	{
769		lexer->rec->state->failed  = ANTLR3_TRUE;
770		return	ANTLR3_FALSE;
771	}
772
773	lexer->rec->exConstruct(lexer->rec);
774
775	/* TODO: Implement exception creation more fully perhaps
776	 */
777	lexer->recover(lexer);
778
779	return  ANTLR3_FALSE;
780}
781
782/** Implementation of match range for the lexer, overrides any
783 *  base implementation in the base recognizer.
784 *
785 *  \remark
786 *  Note that the generated code lays down arrays of ints for constant
787 *  strings so that they are int UTF32 form!
788 */
789static ANTLR3_BOOLEAN
790matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
791{
792    ANTLR3_UCHAR    c;
793
794    /* What is in the stream at the moment?
795     */
796    c	= lexer->input->istream->_LA(lexer->input->istream, 1);
797    if	( c >= low && c <= high)
798    {
799	/* Matched correctly, consume it
800	 */
801	lexer->input->istream->consume(lexer->input->istream);
802
803	/* Reset any failed indicator
804	 */
805	lexer->rec->state->failed = ANTLR3_FALSE;
806
807	return	ANTLR3_TRUE;
808    }
809
810    /* Failed to match, execption and recovery time.
811     */
812
813    if	(lexer->rec->state->backtracking > 0)
814    {
815	lexer->rec->state->failed  = ANTLR3_TRUE;
816	return	ANTLR3_FALSE;
817    }
818
819    lexer->rec->exConstruct(lexer->rec);
820
821    /* TODO: Implement exception creation more fully
822     */
823    lexer->recover(lexer);
824
825    return  ANTLR3_FALSE;
826}
827
828static void
829matchAny	    (pANTLR3_LEXER lexer)
830{
831    lexer->input->istream->consume(lexer->input->istream);
832}
833
834static void
835recover	    (pANTLR3_LEXER lexer)
836{
837    lexer->input->istream->consume(lexer->input->istream);
838}
839
840static ANTLR3_UINT32
841getLine	    (pANTLR3_LEXER lexer)
842{
843    return  lexer->input->getLine(lexer->input);
844}
845
846static ANTLR3_UINT32
847getCharPositionInLine	(pANTLR3_LEXER lexer)
848{
849    return  lexer->input->getCharPositionInLine(lexer->input);
850}
851
852static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
853{
854    return lexer->input->istream->index(lexer->input->istream);
855}
856
857static pANTLR3_STRING
858getText	    (pANTLR3_LEXER lexer)
859{
860	if (lexer->rec->state->text)
861	{
862		return	lexer->rec->state->text;
863
864	}
865	return  lexer->input->substr(
866									lexer->input,
867									lexer->rec->state->tokenStartCharIndex,
868									lexer->getCharIndex(lexer) - lexer->input->charByteSize
869							);
870
871}
872
873static void *
874getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
875{
876	return NULL;
877}
878
879static void *
880getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
881									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
882{
883	return NULL;
884}
885