1/*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3 *
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6
7/*
8 * scan.c - the jam yacc scanner
9 *
10 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
11 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
12 *			Also handle tokens abutting EOF by remembering
13 *			to return EOF now matter how many times yylex()
14 *			reinvokes yyline().
15 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
16 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
17 *			defined before Linux's yacc tries to redefine it.
18 * 01/10/01 (seiwald) - \ can now escape any whitespace char
19 * 11/04/02 (seiwald) - const-ing for string literals
20 */
21
22# include "jam.h"
23# include "lists.h"
24# include "parse.h"
25# include "scan.h"
26# include "jamgram.h"
27# include "jambase.h"
28# include "jcache.h"
29# include "newstr.h"
30
31struct keyword {
32	const char *word;
33	int type;
34} keywords[] = {
35# include "jamgramtab.h"
36	{ 0, 0 }
37} ;
38
39struct include {
40	struct include 	*next;		/* next serial include file */
41	const char 	*string;	/* pointer into current line */
42	char		**strings;	/* for yyfparse() -- text to parse */
43	FILE 		*file;		/* for yyfparse() -- file being read */
44	const char 	*fname;		/* for yyfparse() -- file name */
45	int 		line;		/* line counter for error messages */
46	char 		buf[ 512 ];	/* for yyfparse() -- line buffer */
47} ;
48
49static struct include *incp = 0; /* current file; head of chain */
50
51static int scanmode = SCAN_NORMAL;
52static int anyerrors = 0;
53static char *symdump( YYSTYPE *s );
54
55# define BIGGEST_TOKEN 10240	/* no single token can be larger */
56
57/*
58 * Set parser mode: normal, string, or keyword
59 */
60
61void
62yymode( int n )
63{
64	scanmode = n;
65}
66
67void
68yyerror( const char *s )
69{
70	if( incp )
71	    printf( "%s: line %d: ", incp->fname, incp->line );
72
73	printf( "%s at %s\n", s, symdump( &yylval ) );
74
75	++anyerrors;
76}
77
78int
79yyanyerrors()
80{
81	return anyerrors != 0;
82}
83
84void
85yyfparse( const char *s )
86{
87	struct include *i = (struct include *)malloc( sizeof( *i ) );
88
89	/* Push this onto the incp chain. */
90
91	i->string = "";
92	i->strings = 0;
93	i->file = 0;
94	i->fname = copystr( s );
95	i->line = 0;
96	i->next = incp;
97	incp = i;
98
99	/* If the filename is "+", it means use the internal jambase. */
100
101	if( !strcmp( s, "+" ) )
102	    i->strings = jambase;
103}
104
105/*
106 * yyline() - read new line and return first character
107 *
108 * Fabricates a continuous stream of characters across include files,
109 * returning EOF at the bitter end.
110 */
111
112int
113yyline()
114{
115	struct include *i = incp;
116
117	if( !incp )
118	    return EOF;
119
120	/* Once we start reading from the input stream, we reset the */
121	/* include insertion point so that the next include file becomes */
122	/* the head of the list. */
123
124	/* If there is more data in this line, return it. */
125
126	if( *i->string )
127	    return *i->string++;
128
129	/* If we're reading from an internal string list, go to the */
130	/* next string. */
131
132	if( i->strings )
133	{
134	    if( !*i->strings )
135		goto next;
136
137	    i->line++;
138	    i->string = *(i->strings++);
139	    return *i->string++;
140	}
141
142	/* If necessary, open the file */
143
144#ifdef OPT_JAMFILE_CACHE_EXT
145	if( !i->file )
146	{
147		if ( strcmp( i->fname, "-" ) )
148		{
149			i->strings = jcache((char*)i->fname);
150			if (!i->strings || !*i->strings)
151				goto next;
152			i->line++;
153			i->string = *(i->strings++);
154			return *i->string++;
155		}
156		else
157		{
158			i->file = stdin;
159			if( fgets( i->buf, sizeof( i->buf ), i->file ) )
160			{
161			    i->line++;
162			    i->string = i->buf;
163			    return *i->string++;
164			}
165		}
166	}
167#else
168	if( !i->file )
169	{
170	    FILE *f = stdin;
171
172	    if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
173		perror( i->fname );
174
175	    i->file = f;
176	}
177
178	/* If there's another line in this file, start it. */
179
180	if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
181	{
182	    i->line++;
183	    i->string = i->buf;
184	    return *i->string++;
185	}
186#endif
187
188    next:
189	/* This include is done.  */
190	/* Free it up and return EOF so yyparse() returns to parse_file(). */
191
192	incp = i->next;
193
194	/* Close file, free name */
195
196	if( i->file && i->file != stdin )
197	    fclose( i->file );
198	freestr( i->fname );
199	free( (char *)i );
200
201	return EOF;
202}
203
204/*
205 * yylex() - set yylval to current token; return its type
206 *
207 * Macros to move things along:
208 *
209 *	yychar() - return and advance character; invalid after EOF
210 *	yyprev() - back up one character; invalid before yychar()
211 *
212 * yychar() returns a continuous stream of characters, until it hits
213 * the EOF of the current include file.
214 */
215
216# define yychar() ( *incp->string ? *incp->string++ : yyline() )
217# define yyprev() ( incp->string-- )
218
219int
220yylex()
221{
222	int c;
223	char buf[BIGGEST_TOKEN];
224	char *b = buf;
225
226	if( !incp )
227	    goto eof;
228
229	/* Get first character (whitespace or of token) */
230
231	c = yychar();
232
233	if( scanmode == SCAN_STRING )
234	{
235	    /* If scanning for a string (action's {}'s), look for the */
236	    /* closing brace.  We handle matching braces, if they match! */
237
238	    int nest = 1;
239
240	    while( c != EOF && b < buf + sizeof( buf ) )
241	    {
242		    if( c == '{' )
243			nest++;
244
245		    if( c == '}' && !--nest )
246			break;
247
248		    *b++ = c;
249
250		    c = yychar();
251	    }
252
253	    /* We ate the ending brace -- regurgitate it. */
254
255	    if( c != EOF )
256		yyprev();
257
258	    /* Check obvious errors. */
259
260	    if( b == buf + sizeof( buf ) )
261	    {
262		yyerror( "action block too big" );
263		goto eof;
264	    }
265
266	    if( nest )
267	    {
268		yyerror( "unmatched {} in action block" );
269		goto eof;
270	    }
271
272	    *b = 0;
273	    yylval.type = STRING;
274	    yylval.string = newstr( buf );
275
276	}
277	else
278	{
279	    char *b = buf;
280	    struct keyword *k;
281	    int inquote = 0;
282	    int notkeyword;
283
284	    /* Eat white space */
285
286	    for( ;; )
287	    {
288		/* Skip past white space */
289
290		while( c != EOF && isspace( c ) )
291			c = yychar();
292
293		/* Not a comment?  Swallow up comment line. */
294
295		if( c != '#' )
296			break;
297		while( ( c = yychar() ) != EOF && c != '\n' )
298			;
299	    }
300
301	    /* c now points to the first character of a token. */
302
303	    if( c == EOF )
304		goto eof;
305
306	    /* While scanning the word, disqualify it for (expensive) */
307	    /* keyword lookup when we can: $anything, "anything", \anything */
308
309	    notkeyword = c == '$';
310
311	    /* look for white space to delimit word */
312	    /* "'s get stripped but preserve white space */
313	    /* \ protects next character */
314
315	    while(
316		c != EOF &&
317		b < buf + sizeof( buf ) &&
318		( inquote || !isspace( c ) ) )
319	    {
320		if( c == '"' )
321		{
322		    /* begin or end " */
323		    inquote = !inquote;
324		    notkeyword = 1;
325		}
326		else if( c != '\\' )
327		{
328		    /* normal char */
329		    *b++ = c;
330		}
331		else if( ( c = yychar()) != EOF )
332		{
333		    /* \c */
334		    *b++ = c;
335		    notkeyword = 1;
336		}
337		else
338		{
339		    /* \EOF */
340		    break;
341		}
342
343		c = yychar();
344	    }
345
346	    /* Check obvious errors. */
347
348	    if( b == buf + sizeof( buf ) )
349	    {
350		yyerror( "string too big" );
351		goto eof;
352	    }
353
354	    if( inquote )
355	    {
356		yyerror( "unmatched \" in string" );
357		goto eof;
358	    }
359
360	    /* We looked ahead a character - back up. */
361
362	    if( c != EOF )
363		yyprev();
364
365	    /* scan token table */
366	    /* don't scan if it's obviously not a keyword or if its */
367	    /* an alphabetic when were looking for punctuation */
368
369	    *b = 0;
370	    yylval.type = ARG;
371
372	    if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
373	    {
374		for( k = keywords; k->word; k++ )
375		    if( *buf == *k->word && !strcmp( k->word, buf ) )
376		{
377		    yylval.type = k->type;
378		    yylval.string = k->word;	/* used by symdump */
379		    break;
380		}
381	    }
382
383	    if( yylval.type == ARG )
384		yylval.string = newstr( buf );
385	}
386
387	if( DEBUG_SCAN )
388		printf( "scan %s\n", symdump( &yylval ) );
389
390	return yylval.type;
391
392eof:
393	yylval.type = EOF;
394	return yylval.type;
395}
396
397static char *
398symdump( YYSTYPE *s )
399{
400	static char buf[ BIGGEST_TOKEN + 20 ];
401
402	switch( s->type )
403	{
404	case EOF:
405		sprintf( buf, "EOF" );
406		break;
407	case 0:
408		sprintf( buf, "unknown symbol %s", s->string );
409		break;
410	case ARG:
411		sprintf( buf, "argument %s", s->string );
412		break;
413	case STRING:
414		sprintf( buf, "string \"%s\"", s->string );
415		break;
416	default:
417		sprintf( buf, "keyword %s", s->string );
418		break;
419	}
420	return buf;
421}
422