1/* scan.l - scanner for flex input */
2
3%{
4/*-
5 * Copyright (c) 1990 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Vern Paxson.
10 *
11 * The United States Government has rights in this work pursuant
12 * to contract no. DE-AC03-76SF00098 between the United States
13 * Department of Energy and the University of California.
14 *
15 * Redistribution and use in source and binary forms are permitted provided
16 * that: (1) source distributions retain this entire copyright notice and
17 * comment, and (2) distributions including binaries display the following
18 * acknowledgement:  ``This product includes software developed by the
19 * University of California, Berkeley and its contributors'' in the
20 * documentation or other materials provided with the distribution and in
21 * all advertising materials mentioning features or use of this software.
22 * Neither the name of the University nor the names of its contributors may
23 * be used to endorse or promote products derived from this software without
24 * specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 */
29
30/* $Header: /home/daffy/u0/vern/flex/RCS/scan.l,v 2.56 95/04/24 12:17:19 vern Exp $ */
31/* $FreeBSD$ */
32
33#include "flexdef.h"
34#include "parse.h"
35
36#define ACTION_ECHO add_action( yytext )
37#define ACTION_IFDEF(def, should_define) \
38	{ \
39	if ( should_define ) \
40		action_define( def, 1 ); \
41	}
42
43#define MARK_END_OF_PROLOG mark_prolog();
44
45#define YY_DECL \
46	int flexscan()
47
48#define RETURNCHAR \
49	yylval = (unsigned char) yytext[0]; \
50	return CHAR;
51
52#define RETURNNAME \
53	strcpy( nmstr, yytext ); \
54	return NAME;
55
56#define PUT_BACK_STRING(str, start) \
57	for ( i = strlen( str ) - 1; i >= start; --i ) \
58		unput((str)[i])
59
60#define CHECK_REJECT(str) \
61	if ( all_upper( str ) ) \
62		reject = true;
63
64#define CHECK_YYMORE(str) \
65	if ( all_lower( str ) ) \
66		yymore_used = true;
67%}
68
69%option caseless nodefault outfile="scan.c" stack noyy_top_state
70%option nostdinit
71
72%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
73%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
74%x OPTION LINEDIR
75
76WS		[[:blank:]]+
77OPTWS		[[:blank:]]*
78NOT_WS		[^[:blank:]\n]
79
80NL		\r?\n
81
82NAME		([[:alpha:]_][[:alnum:]_-]*)
83NOT_NAME	[^[:alpha:]_*\n]+
84
85SCNAME		{NAME}
86
87ESCSEQ		(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
88
89FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
90CCL_CHAR	([^\\\n\]]|{ESCSEQ})
91CCL_EXPR	("[:"[[:alpha:]]+":]")
92
93LEXOPT		[aceknopr]
94
95%%
96	static int bracelevel, didadef, indented_code;
97	static int doing_rule_action = false;
98	static int option_sense;
99
100	int doing_codeblock = false;
101	int i;
102	Char nmdef[MAXLINE], myesc();
103
104
105<INITIAL>{
106	^{WS}		indented_code = true; BEGIN(CODEBLOCK);
107	^"/*"		ACTION_ECHO; yy_push_state( COMMENT );
108	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
109	^"%s"{NAME}?	return SCDECL;
110	^"%x"{NAME}?	return XSCDECL;
111	^"%{".*{NL}	{
112			++linenum;
113			line_directive_out( (FILE *) 0, 1 );
114			indented_code = false;
115			BEGIN(CODEBLOCK);
116			}
117
118	{WS}		/* discard */
119
120	^"%%".*		{
121			sectnum = 2;
122			bracelevel = 0;
123			mark_defs1();
124			line_directive_out( (FILE *) 0, 1 );
125			BEGIN(SECT2PROLOG);
126			return SECTEND;
127			}
128
129	^"%pointer".*{NL}	yytext_is_array = false; ++linenum;
130	^"%array".*{NL}		yytext_is_array = true; ++linenum;
131
132	^"%option"	BEGIN(OPTION); return OPTION_OP;
133
134	^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}	++linenum; /* ignore */
135	^"%"{LEXOPT}{WS}.*{NL}	++linenum;	/* ignore */
136
137	^"%"[^sxaceknopr{}].*	synerr( _( "unrecognized '%' directive" ) );
138
139	^{NAME}		{
140			strcpy( nmstr, yytext );
141			didadef = false;
142			BEGIN(PICKUPDEF);
143			}
144
145	{SCNAME}	RETURNNAME;
146	^{OPTWS}{NL}	++linenum; /* allows blank lines in section 1 */
147	{OPTWS}{NL}	ACTION_ECHO; ++linenum; /* maybe end of comment line */
148}
149
150
151<COMMENT>{
152	"*/"		ACTION_ECHO; yy_pop_state();
153	"*"		ACTION_ECHO;
154	[^*\n]+		ACTION_ECHO;
155	[^*\n]*{NL}	++linenum; ACTION_ECHO;
156}
157
158<LINEDIR>{
159	\n		yy_pop_state();
160	[[:digit:]]+	linenum = myctoi( yytext );
161
162	\"[^"\n]*\"	{
163			flex_free( (void *) infilename );
164			infilename = copy_string( yytext + 1 );
165			infilename[strlen( infilename ) - 1] = '\0';
166			}
167	.		/* ignore spurious characters */
168}
169
170<CODEBLOCK>{
171	^"%}".*{NL}	++linenum; BEGIN(INITIAL);
172
173	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
174
175	{NL}		{
176			++linenum;
177			ACTION_ECHO;
178			if ( indented_code )
179				BEGIN(INITIAL);
180			}
181}
182
183
184<PICKUPDEF>{
185	{WS}		/* separates name and definition */
186
187	{NOT_WS}.*	{
188			strcpy( (char *) nmdef, yytext );
189
190			/* Skip trailing whitespace. */
191			for ( i = strlen( (char *) nmdef ) - 1;
192			      i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
193			      --i )
194				;
195
196			nmdef[i + 1] = '\0';
197
198			ndinstal( nmstr, nmdef );
199			didadef = true;
200			}
201
202	{NL}		{
203			if ( ! didadef )
204				synerr( _( "incomplete name definition" ) );
205			BEGIN(INITIAL);
206			++linenum;
207			}
208}
209
210
211<OPTION>{
212	{NL}		++linenum; BEGIN(INITIAL);
213	{WS}		option_sense = true;
214
215	"="		return '=';
216
217	no		option_sense = ! option_sense;
218
219	7bit		csize = option_sense ? 128 : 256;
220	8bit		csize = option_sense ? 256 : 128;
221
222	align		long_align = option_sense;
223	always-interactive	{
224			action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
225			}
226	array		yytext_is_array = option_sense;
227	backup		backing_up_report = option_sense;
228	batch		interactive = ! option_sense;
229	"c++"		C_plus_plus = option_sense;
230	caseful|case-sensitive		caseins = ! option_sense;
231	caseless|case-insensitive	caseins = option_sense;
232	debug		ddebug = option_sense;
233	default		spprdflt = ! option_sense;
234	ecs		useecs = option_sense;
235	fast		{
236			useecs = usemecs = false;
237			use_read = fullspd = true;
238			}
239	full		{
240			useecs = usemecs = false;
241			use_read = fulltbl = true;
242			}
243	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
244	interactive	interactive = option_sense;
245	lex-compat	lex_compat = option_sense;
246	main		{
247			action_define( "YY_MAIN", option_sense );
248			do_yywrap = ! option_sense;
249			}
250	meta-ecs	usemecs = option_sense;
251	never-interactive	{
252			action_define( "YY_NEVER_INTERACTIVE", option_sense );
253			}
254	perf-report	performance_report += option_sense ? 1 : -1;
255	pointer		yytext_is_array = ! option_sense;
256	read		use_read = option_sense;
257	reject		reject_really_used = option_sense;
258	stack		action_define( "YY_STACK_USED", option_sense );
259	stdinit		do_stdinit = option_sense;
260	stdout		use_stdout = option_sense;
261	unput		ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
262	verbose		printstats = option_sense;
263	warn		nowarn = ! option_sense;
264	yylineno	do_yylineno = option_sense;
265	yymore		yymore_really_used = option_sense;
266	yywrap		do_yywrap = option_sense;
267
268	yy_push_state	ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
269	yy_pop_state	ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
270	yy_top_state	ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
271
272	yy_scan_buffer	ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
273	yy_scan_bytes	ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
274	yy_scan_string	ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
275
276	outfile		return OPT_OUTFILE;
277	prefix		return OPT_PREFIX;
278	yyclass		return OPT_YYCLASS;
279
280	\"[^"\n]*\"	{
281			strcpy( nmstr, yytext + 1 );
282			nmstr[strlen( nmstr ) - 1] = '\0';
283			return NAME;
284			}
285
286	(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|.	{
287			format_synerr( _( "unrecognized %%option: %s" ),
288				yytext );
289			BEGIN(RECOVER);
290			}
291}
292
293<RECOVER>.*{NL}		++linenum; BEGIN(INITIAL);
294
295
296<SECT2PROLOG>{
297	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
298	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
299
300	^{WS}.*	ACTION_ECHO;	/* indented code in prolog */
301
302	^{NOT_WS}.*	{	/* non-indented code */
303			if ( bracelevel <= 0 )
304				{ /* not in %{ ... %} */
305				yyless( 0 );	/* put it all back */
306				yy_set_bol( 1 );
307				mark_prolog();
308				BEGIN(SECT2);
309				}
310			else
311				ACTION_ECHO;
312			}
313
314	.*		ACTION_ECHO;
315	{NL}	++linenum; ACTION_ECHO;
316
317	<<EOF>>		{
318			mark_prolog();
319			sectnum = 0;
320			yyterminate(); /* to stop the parser */
321			}
322}
323
324<SECT2>{
325	^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
326
327	^{OPTWS}"%{"	{
328			indented_code = false;
329			doing_codeblock = true;
330			bracelevel = 1;
331			BEGIN(PERCENT_BRACE_ACTION);
332			}
333
334	^{OPTWS}"<"	BEGIN(SC); return '<';
335	^{OPTWS}"^"	return '^';
336	\"		BEGIN(QUOTE); return '"';
337	"{"/[[:digit:]]	BEGIN(NUM); return '{';
338	"$"/([[:blank:]]|{NL})	return '$';
339
340	{WS}"%{"		{
341			bracelevel = 1;
342			BEGIN(PERCENT_BRACE_ACTION);
343
344			if ( in_rule )
345				{
346				doing_rule_action = true;
347				in_rule = false;
348				return '\n';
349				}
350			}
351	{WS}"|".*{NL}	continued_action = true; ++linenum; return '\n';
352
353	^{WS}"/*"	{
354			yyless( yyleng - 2 );	/* put back '/', '*' */
355			bracelevel = 0;
356			continued_action = false;
357			BEGIN(ACTION);
358			}
359
360	^{WS}		/* allow indented rules */
361
362	{WS}		{
363			/* This rule is separate from the one below because
364			 * otherwise we get variable trailing context, so
365			 * we can't build the scanner using -{f,F}.
366			 */
367			bracelevel = 0;
368			continued_action = false;
369			BEGIN(ACTION);
370
371			if ( in_rule )
372				{
373				doing_rule_action = true;
374				in_rule = false;
375				return '\n';
376				}
377			}
378
379	{OPTWS}{NL}	{
380			bracelevel = 0;
381			continued_action = false;
382			BEGIN(ACTION);
383			unput( '\n' );	/* so <ACTION> sees it */
384
385			if ( in_rule )
386				{
387				doing_rule_action = true;
388				in_rule = false;
389				return '\n';
390				}
391			}
392
393	^{OPTWS}"<<EOF>>"	|
394	"<<EOF>>"	return EOF_OP;
395
396	^"%%".*		{
397			sectnum = 3;
398			BEGIN(SECT3);
399			yyterminate(); /* to stop the parser */
400			}
401
402	"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*	{
403			int cclval;
404
405			strcpy( nmstr, yytext );
406
407			/* Check to see if we've already encountered this
408			 * ccl.
409			 */
410			if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
411				{
412				if ( input() != ']' )
413					synerr( _( "bad character class" ) );
414
415				yylval = cclval;
416				++cclreuse;
417				return PREVCCL;
418				}
419			else
420				{
421				/* We fudge a bit.  We know that this ccl will
422				 * soon be numbered as lastccl + 1 by cclinit.
423				 */
424				cclinstal( (Char *) nmstr, lastccl + 1 );
425
426				/* Push back everything but the leading bracket
427				 * so the ccl can be rescanned.
428				 */
429				yyless( 1 );
430
431				BEGIN(FIRSTCCL);
432				return '[';
433				}
434			}
435
436	"{"{NAME}"}"	{
437			Char *nmdefptr;
438			Char *ndlookup();
439
440			strcpy( nmstr, yytext + 1 );
441			nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
442
443			if ( (nmdefptr = ndlookup( nmstr )) == 0 )
444				format_synerr(
445					_( "undefined definition {%s}" ),
446						nmstr );
447
448			else
449				{ /* push back name surrounded by ()'s */
450				int len = strlen( (char *) nmdefptr );
451
452				if ( lex_compat || nmdefptr[0] == '^' ||
453				     (len > 0 && nmdefptr[len - 1] == '$') )
454					{ /* don't use ()'s after all */
455					PUT_BACK_STRING((char *) nmdefptr, 0);
456
457					if ( nmdefptr[0] == '^' )
458						BEGIN(CARETISBOL);
459					}
460
461				else
462					{
463					unput(')');
464					PUT_BACK_STRING((char *) nmdefptr, 0);
465					unput('(');
466					}
467				}
468			}
469
470	[/|*+?.(){}]	return (unsigned char) yytext[0];
471	.		RETURNCHAR;
472}
473
474
475<SC>{
476	[,*]		return (unsigned char) yytext[0];
477	">"		BEGIN(SECT2); return '>';
478	">"/^		BEGIN(CARETISBOL); return '>';
479	{SCNAME}	RETURNNAME;
480	.		{
481			format_synerr( _( "bad <start condition>: %s" ),
482				yytext );
483			}
484}
485
486<CARETISBOL>"^"		BEGIN(SECT2); return '^';
487
488
489<QUOTE>{
490	[^"\n]		RETURNCHAR;
491	\"		BEGIN(SECT2); return '"';
492
493	{NL}		{
494			synerr( _( "missing quote" ) );
495			BEGIN(SECT2);
496			++linenum;
497			return '"';
498			}
499}
500
501
502<FIRSTCCL>{
503	"^"/[^-\]\n]	BEGIN(CCL); return '^';
504	"^"/("-"|"]")	return '^';
505	.		BEGIN(CCL); RETURNCHAR;
506}
507
508<CCL>{
509	-/[^\]\n]	return '-';
510	[^\]\n]		RETURNCHAR;
511	"]"		BEGIN(SECT2); return ']';
512	.|{NL}		{
513			synerr( _( "bad character class" ) );
514			BEGIN(SECT2);
515			return ']';
516			}
517}
518
519<FIRSTCCL,CCL>{
520	"[:alnum:]"	BEGIN(CCL); return CCE_ALNUM;
521	"[:alpha:]"	BEGIN(CCL); return CCE_ALPHA;
522	"[:blank:]"	BEGIN(CCL); return CCE_BLANK;
523	"[:cntrl:]"	BEGIN(CCL); return CCE_CNTRL;
524	"[:digit:]"	BEGIN(CCL); return CCE_DIGIT;
525	"[:graph:]"	BEGIN(CCL); return CCE_GRAPH;
526	"[:lower:]"	BEGIN(CCL); return CCE_LOWER;
527	"[:print:]"	BEGIN(CCL); return CCE_PRINT;
528	"[:punct:]"	BEGIN(CCL); return CCE_PUNCT;
529	"[:space:]"	BEGIN(CCL); return CCE_SPACE;
530	"[:upper:]"	BEGIN(CCL); return CCE_UPPER;
531	"[:xdigit:]"	BEGIN(CCL); return CCE_XDIGIT;
532	{CCL_EXPR}	{
533			format_synerr(
534				_( "bad character class expression: %s" ),
535					yytext );
536			BEGIN(CCL); return CCE_ALNUM;
537			}
538}
539
540<NUM>{
541	[[:digit:]]+	{
542			yylval = myctoi( yytext );
543			return NUMBER;
544			}
545
546	","		return ',';
547	"}"		BEGIN(SECT2); return '}';
548
549	.		{
550			synerr( _( "bad character inside {}'s" ) );
551			BEGIN(SECT2);
552			return '}';
553			}
554
555	{NL}		{
556			synerr( _( "missing }" ) );
557			BEGIN(SECT2);
558			++linenum;
559			return '}';
560			}
561}
562
563
564<PERCENT_BRACE_ACTION>{
565	{OPTWS}"%}".*		bracelevel = 0;
566
567	<ACTION>"/*"		ACTION_ECHO; yy_push_state( COMMENT );
568
569	<CODEBLOCK,ACTION>{
570		"reject"	{
571			ACTION_ECHO;
572			CHECK_REJECT(yytext);
573			}
574		"yymore"	{
575			ACTION_ECHO;
576			CHECK_YYMORE(yytext);
577			}
578	}
579
580	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
581	{NL}		{
582			++linenum;
583			ACTION_ECHO;
584			if ( bracelevel == 0 ||
585			     (doing_codeblock && indented_code) )
586				{
587				if ( doing_rule_action )
588					add_action( "\tYY_BREAK\n" );
589
590				doing_rule_action = doing_codeblock = false;
591				BEGIN(SECT2);
592				}
593			}
594}
595
596
597	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
598<ACTION>{
599	"{"		ACTION_ECHO; ++bracelevel;
600	"}"		ACTION_ECHO; --bracelevel;
601	[^[:alpha:]_{}"'/\n]+	ACTION_ECHO;
602	{NAME}		ACTION_ECHO;
603	"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
604	\"		ACTION_ECHO; BEGIN(ACTION_STRING);
605	{NL}		{
606			++linenum;
607			ACTION_ECHO;
608			if ( bracelevel == 0 )
609				{
610				if ( doing_rule_action )
611					add_action( "\tYY_BREAK\n" );
612
613				doing_rule_action = false;
614				BEGIN(SECT2);
615				}
616			}
617	.		ACTION_ECHO;
618}
619
620<ACTION_STRING>{
621	[^"\\\n]+	ACTION_ECHO;
622	\\.		ACTION_ECHO;
623	{NL}		++linenum; ACTION_ECHO;
624	\"		ACTION_ECHO; BEGIN(ACTION);
625	.		ACTION_ECHO;
626}
627
628<COMMENT,ACTION,ACTION_STRING><<EOF>>	{
629			synerr( _( "EOF encountered inside an action" ) );
630			yyterminate();
631			}
632
633
634<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}	{
635			yylval = myesc( (Char *) yytext );
636
637			if ( YY_START == FIRSTCCL )
638				BEGIN(CCL);
639
640			return CHAR;
641			}
642
643
644<SECT3>{
645	.*(\n?)		ECHO;
646	<<EOF>>		sectnum = 0; yyterminate();
647}
648
649<*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
650
651%%
652
653
654int yywrap()
655	{
656	if ( --num_input_files > 0 )
657		{
658		set_input_file( *++input_files );
659		return 0;
660		}
661
662	else
663		return 1;
664	}
665
666
667/* set_input_file - open the given file (if NULL, stdin) for scanning */
668
669void set_input_file( file )
670char *file;
671	{
672	if ( file && strcmp( file, "-" ) )
673		{
674		infilename = copy_string( file );
675		yyin = fopen( infilename, "r" );
676
677		if ( yyin == NULL )
678			lerrsf( _( "can't open %s" ), file );
679		}
680
681	else
682		{
683		yyin = stdin;
684		infilename = copy_string( "<stdin>" );
685		}
686
687	linenum = 1;
688	}
689
690
691/* Wrapper routines for accessing the scanner's malloc routines. */
692
693void *flex_alloc( size )
694size_t size;
695	{
696	return (void *) malloc( size );
697	}
698
699void *flex_realloc( ptr, size )
700void *ptr;
701size_t size;
702	{
703	return (void *) realloc( ptr, size );
704	}
705
706void flex_free( ptr )
707void *ptr;
708	{
709	if ( ptr )
710		free( ptr );
711	}
712