1/* parse.y - parser for flex input */
2
3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
5
6%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
8
9%{
10/*-
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Vern Paxson.
16 *
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
20 *
21 * Redistribution and use in source and binary forms with or without
22 * modification are permitted provided that: (1) source distributions retain
23 * this entire copyright notice and comment, and (2) distributions including
24 * binaries display the following acknowledgement:  ``This product includes
25 * software developed by the University of California, Berkeley and its
26 * contributors'' in the documentation or other materials provided with the
27 * distribution and in all advertising materials mentioning features or use
28 * of this software.  Neither the name of the University nor the names of
29 * its contributors may be used to endorse or promote products derived from
30 * this software without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
34 */
35
36/* $Header: /projects/cvsroot/src/router/flex/parse.y,v 1.1.1.1 2001/04/08 23:53:37 mhuang Exp $ */
37
38
39/* Some versions of bison are broken in that they use alloca() but don't
40 * declare it properly.  The following is the patented (just kidding!)
41 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
42 */
43#ifdef YYBISON
44/* AIX requires this to be the first thing in the file.  What a piece.  */
45# ifdef _AIX
46 #pragma alloca
47# endif
48#endif
49
50#include "flexdef.h"
51
52/* The remainder of the alloca() cruft has to come after including flexdef.h,
53 * so HAVE_ALLOCA_H is (possibly) defined.
54 */
55#ifdef YYBISON
56# ifdef __GNUC__
57#  ifndef alloca
58#   define alloca __builtin_alloca
59#  endif
60# else
61#  if HAVE_ALLOCA_H
62#   include <alloca.h>
63#  else
64#   ifdef __hpux
65void *alloca ();
66#   else
67#    ifdef __TURBOC__
68#     include <malloc.h>
69#    else
70char *alloca ();
71#    endif
72#   endif
73#  endif
74# endif
75#endif
76
77/* Bletch, ^^^^ that was ugly! */
78
79
80int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
81int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
82
83int *scon_stk;
84int scon_stk_ptr;
85
86static int madeany = false;  /* whether we've made the '.' character class */
87int previous_continued_action;	/* whether the previous rule's action was '|' */
88
89/* Expand a POSIX character class expression. */
90#define CCL_EXPR(func) \
91	{ \
92	int c; \
93	for ( c = 0; c < csize; ++c ) \
94		if ( isascii(c) && func(c) ) \
95			ccladd( currccl, c ); \
96	}
97
98/* While POSIX defines isblank(), it's not ANSI C. */
99#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
100
101/* On some over-ambitious machines, such as DEC Alpha's, the default
102 * token type is "long" instead of "int"; this leads to problems with
103 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
104 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
105 * following should ensure that the default token type is "int".
106 */
107#define YYSTYPE int
108
109%}
110
111%%
112goal		:  initlex sect1 sect1end sect2 initforrule
113			{ /* add default rule */
114			int def_rule;
115
116			pat = cclinit();
117			cclnegate( pat );
118
119			def_rule = mkstate( -pat );
120
121			/* Remember the number of the default rule so we
122			 * don't generate "can't match" warnings for it.
123			 */
124			default_rule = num_rules;
125
126			finish_rule( def_rule, false, 0, 0 );
127
128			for ( i = 1; i <= lastsc; ++i )
129				scset[i] = mkbranch( scset[i], def_rule );
130
131			if ( spprdflt )
132				add_action(
133				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
134			else
135				add_action( "ECHO" );
136
137			add_action( ";\n\tYY_BREAK\n" );
138			}
139		;
140
141initlex		:
142			{ /* initialize for processing rules */
143
144			/* Create default DFA start condition. */
145			scinstal( "INITIAL", false );
146			}
147		;
148
149sect1		:  sect1 startconddecl namelist1
150		|  sect1 options
151		|
152		|  error
153			{ synerr( "unknown error processing section 1" ); }
154		;
155
156sect1end	:  SECTEND
157			{
158			check_options();
159			scon_stk = allocate_integer_array( lastsc + 1 );
160			scon_stk_ptr = 0;
161			}
162		;
163
164startconddecl	:  SCDECL
165			{ xcluflg = false; }
166
167		|  XSCDECL
168			{ xcluflg = true; }
169		;
170
171namelist1	:  namelist1 NAME
172			{ scinstal( nmstr, xcluflg ); }
173
174		|  NAME
175			{ scinstal( nmstr, xcluflg ); }
176
177		|  error
178			{ synerr( "bad start condition list" ); }
179		;
180
181options		:  OPTION_OP optionlist
182		;
183
184optionlist	:  optionlist option
185		|
186		;
187
188option		:  OPT_OUTFILE '=' NAME
189			{
190			outfilename = copy_string( nmstr );
191			did_outfilename = 1;
192			}
193		|  OPT_PREFIX '=' NAME
194			{ prefix = copy_string( nmstr ); }
195		|  OPT_YYCLASS '=' NAME
196			{ yyclass = copy_string( nmstr ); }
197		;
198
199sect2		:  sect2 scon initforrule flexrule '\n'
200			{ scon_stk_ptr = $2; }
201		|  sect2 scon '{' sect2 '}'
202			{ scon_stk_ptr = $2; }
203		|
204		;
205
206initforrule	:
207			{
208			/* Initialize for a parse of one rule. */
209			trlcontxt = variable_trail_rule = varlength = false;
210			trailcnt = headcnt = rulelen = 0;
211			current_state_type = STATE_NORMAL;
212			previous_continued_action = continued_action;
213			in_rule = true;
214
215			new_rule();
216			}
217		;
218
219flexrule	:  '^' rule
220			{
221			pat = $2;
222			finish_rule( pat, variable_trail_rule,
223				headcnt, trailcnt );
224
225			if ( scon_stk_ptr > 0 )
226				{
227				for ( i = 1; i <= scon_stk_ptr; ++i )
228					scbol[scon_stk[i]] =
229						mkbranch( scbol[scon_stk[i]],
230								pat );
231				}
232
233			else
234				{
235				/* Add to all non-exclusive start conditions,
236				 * including the default (0) start condition.
237				 */
238
239				for ( i = 1; i <= lastsc; ++i )
240					if ( ! scxclu[i] )
241						scbol[i] = mkbranch( scbol[i],
242									pat );
243				}
244
245			if ( ! bol_needed )
246				{
247				bol_needed = true;
248
249				if ( performance_report > 1 )
250					pinpoint_message(
251			"'^' operator results in sub-optimal performance" );
252				}
253			}
254
255		|  rule
256			{
257			pat = $1;
258			finish_rule( pat, variable_trail_rule,
259				headcnt, trailcnt );
260
261			if ( scon_stk_ptr > 0 )
262				{
263				for ( i = 1; i <= scon_stk_ptr; ++i )
264					scset[scon_stk[i]] =
265						mkbranch( scset[scon_stk[i]],
266								pat );
267				}
268
269			else
270				{
271				for ( i = 1; i <= lastsc; ++i )
272					if ( ! scxclu[i] )
273						scset[i] =
274							mkbranch( scset[i],
275								pat );
276				}
277			}
278
279		|  EOF_OP
280			{
281			if ( scon_stk_ptr > 0 )
282				build_eof_action();
283
284			else
285				{
286				/* This EOF applies to all start conditions
287				 * which don't already have EOF actions.
288				 */
289				for ( i = 1; i <= lastsc; ++i )
290					if ( ! sceof[i] )
291						scon_stk[++scon_stk_ptr] = i;
292
293				if ( scon_stk_ptr == 0 )
294					warn(
295			"all start conditions already have <<EOF>> rules" );
296
297				else
298					build_eof_action();
299				}
300			}
301
302		|  error
303			{ synerr( "unrecognized rule" ); }
304		;
305
306scon_stk_ptr	:
307			{ $$ = scon_stk_ptr; }
308		;
309
310scon		:  '<' scon_stk_ptr namelist2 '>'
311			{ $$ = $2; }
312
313		|  '<' '*' '>'
314			{
315			$$ = scon_stk_ptr;
316
317			for ( i = 1; i <= lastsc; ++i )
318				{
319				int j;
320
321				for ( j = 1; j <= scon_stk_ptr; ++j )
322					if ( scon_stk[j] == i )
323						break;
324
325				if ( j > scon_stk_ptr )
326					scon_stk[++scon_stk_ptr] = i;
327				}
328			}
329
330		|
331			{ $$ = scon_stk_ptr; }
332		;
333
334namelist2	:  namelist2 ',' sconname
335
336		|  sconname
337
338		|  error
339			{ synerr( "bad start condition list" ); }
340		;
341
342sconname	:  NAME
343			{
344			if ( (scnum = sclookup( nmstr )) == 0 )
345				format_pinpoint_message(
346					"undeclared start condition %s",
347					nmstr );
348			else
349				{
350				for ( i = 1; i <= scon_stk_ptr; ++i )
351					if ( scon_stk[i] == scnum )
352						{
353						format_warn(
354							"<%s> specified twice",
355							scname[scnum] );
356						break;
357						}
358
359				if ( i > scon_stk_ptr )
360					scon_stk[++scon_stk_ptr] = scnum;
361				}
362			}
363		;
364
365rule		:  re2 re
366			{
367			if ( transchar[lastst[$2]] != SYM_EPSILON )
368				/* Provide final transition \now/ so it
369				 * will be marked as a trailing context
370				 * state.
371				 */
372				$2 = link_machines( $2,
373						mkstate( SYM_EPSILON ) );
374
375			mark_beginning_as_normal( $2 );
376			current_state_type = STATE_NORMAL;
377
378			if ( previous_continued_action )
379				{
380				/* We need to treat this as variable trailing
381				 * context so that the backup does not happen
382				 * in the action but before the action switch
383				 * statement.  If the backup happens in the
384				 * action, then the rules "falling into" this
385				 * one's action will *also* do the backup,
386				 * erroneously.
387				 */
388				if ( ! varlength || headcnt != 0 )
389					warn(
390		"trailing context made variable due to preceding '|' action" );
391
392				/* Mark as variable. */
393				varlength = true;
394				headcnt = 0;
395				}
396
397			if ( lex_compat || (varlength && headcnt == 0) )
398				{ /* variable trailing context rule */
399				/* Mark the first part of the rule as the
400				 * accepting "head" part of a trailing
401				 * context rule.
402				 *
403				 * By the way, we didn't do this at the
404				 * beginning of this production because back
405				 * then current_state_type was set up for a
406				 * trail rule, and add_accept() can create
407				 * a new state ...
408				 */
409				add_accept( $1,
410					num_rules | YY_TRAILING_HEAD_MASK );
411				variable_trail_rule = true;
412				}
413
414			else
415				trailcnt = rulelen;
416
417			$$ = link_machines( $1, $2 );
418			}
419
420		|  re2 re '$'
421			{ synerr( "trailing context used twice" ); }
422
423		|  re '$'
424			{
425			headcnt = 0;
426			trailcnt = 1;
427			rulelen = 1;
428			varlength = false;
429
430			current_state_type = STATE_TRAILING_CONTEXT;
431
432			if ( trlcontxt )
433				{
434				synerr( "trailing context used twice" );
435				$$ = mkstate( SYM_EPSILON );
436				}
437
438			else if ( previous_continued_action )
439				{
440				/* See the comment in the rule for "re2 re"
441				 * above.
442				 */
443				warn(
444		"trailing context made variable due to preceding '|' action" );
445
446				varlength = true;
447				}
448
449			if ( lex_compat || varlength )
450				{
451				/* Again, see the comment in the rule for
452				 * "re2 re" above.
453				 */
454				add_accept( $1,
455					num_rules | YY_TRAILING_HEAD_MASK );
456				variable_trail_rule = true;
457				}
458
459			trlcontxt = true;
460
461			eps = mkstate( SYM_EPSILON );
462			$$ = link_machines( $1,
463				link_machines( eps, mkstate( '\n' ) ) );
464			}
465
466		|  re
467			{
468			$$ = $1;
469
470			if ( trlcontxt )
471				{
472				if ( lex_compat || (varlength && headcnt == 0) )
473					/* Both head and trail are
474					 * variable-length.
475					 */
476					variable_trail_rule = true;
477				else
478					trailcnt = rulelen;
479				}
480			}
481		;
482
483
484re		:  re '|' series
485			{
486			varlength = true;
487			$$ = mkor( $1, $3 );
488			}
489
490		|  series
491			{ $$ = $1; }
492		;
493
494
495re2		:  re '/'
496			{
497			/* This rule is written separately so the
498			 * reduction will occur before the trailing
499			 * series is parsed.
500			 */
501
502			if ( trlcontxt )
503				synerr( "trailing context used twice" );
504			else
505				trlcontxt = true;
506
507			if ( varlength )
508				/* We hope the trailing context is
509				 * fixed-length.
510				 */
511				varlength = false;
512			else
513				headcnt = rulelen;
514
515			rulelen = 0;
516
517			current_state_type = STATE_TRAILING_CONTEXT;
518			$$ = $1;
519			}
520		;
521
522series		:  series singleton
523			{
524			/* This is where concatenation of adjacent patterns
525			 * gets done.
526			 */
527			$$ = link_machines( $1, $2 );
528			}
529
530		|  singleton
531			{ $$ = $1; }
532		;
533
534singleton	:  singleton '*'
535			{
536			varlength = true;
537
538			$$ = mkclos( $1 );
539			}
540
541		|  singleton '+'
542			{
543			varlength = true;
544			$$ = mkposcl( $1 );
545			}
546
547		|  singleton '?'
548			{
549			varlength = true;
550			$$ = mkopt( $1 );
551			}
552
553		|  singleton '{' NUMBER ',' NUMBER '}'
554			{
555			varlength = true;
556
557			if ( $3 > $5 || $3 < 0 )
558				{
559				synerr( "bad iteration values" );
560				$$ = $1;
561				}
562			else
563				{
564				if ( $3 == 0 )
565					{
566					if ( $5 <= 0 )
567						{
568						synerr(
569						"bad iteration values" );
570						$$ = $1;
571						}
572					else
573						$$ = mkopt(
574							mkrep( $1, 1, $5 ) );
575					}
576				else
577					$$ = mkrep( $1, $3, $5 );
578				}
579			}
580
581		|  singleton '{' NUMBER ',' '}'
582			{
583			varlength = true;
584
585			if ( $3 <= 0 )
586				{
587				synerr( "iteration value must be positive" );
588				$$ = $1;
589				}
590
591			else
592				$$ = mkrep( $1, $3, INFINITY );
593			}
594
595		|  singleton '{' NUMBER '}'
596			{
597			/* The singleton could be something like "(foo)",
598			 * in which case we have no idea what its length
599			 * is, so we punt here.
600			 */
601			varlength = true;
602
603			if ( $3 <= 0 )
604				{
605				synerr( "iteration value must be positive" );
606				$$ = $1;
607				}
608
609			else
610				$$ = link_machines( $1,
611						copysingl( $1, $3 - 1 ) );
612			}
613
614		|  '.'
615			{
616			if ( ! madeany )
617				{
618				/* Create the '.' character class. */
619				anyccl = cclinit();
620				ccladd( anyccl, '\n' );
621				cclnegate( anyccl );
622
623				if ( useecs )
624					mkeccl( ccltbl + cclmap[anyccl],
625						ccllen[anyccl], nextecm,
626						ecgroup, csize, csize );
627
628				madeany = true;
629				}
630
631			++rulelen;
632
633			$$ = mkstate( -anyccl );
634			}
635
636		|  fullccl
637			{
638			if ( ! cclsorted )
639				/* Sort characters for fast searching.  We
640				 * use a shell sort since this list could
641				 * be large.
642				 */
643				cshell( ccltbl + cclmap[$1], ccllen[$1], true );
644
645			if ( useecs )
646				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
647					nextecm, ecgroup, csize, csize );
648
649			++rulelen;
650
651			$$ = mkstate( -$1 );
652			}
653
654		|  PREVCCL
655			{
656			++rulelen;
657
658			$$ = mkstate( -$1 );
659			}
660
661		|  '"' string '"'
662			{ $$ = $2; }
663
664		|  '(' re ')'
665			{ $$ = $2; }
666
667		|  CHAR
668			{
669			++rulelen;
670
671			if ( caseins && $1 >= 'A' && $1 <= 'Z' )
672				$1 = clower( $1 );
673
674			$$ = mkstate( $1 );
675			}
676		;
677
678fullccl		:  '[' ccl ']'
679			{ $$ = $2; }
680
681		|  '[' '^' ccl ']'
682			{
683			cclnegate( $3 );
684			$$ = $3;
685			}
686		;
687
688ccl		:  ccl CHAR '-' CHAR
689			{
690			if ( caseins )
691				{
692				if ( $2 >= 'A' && $2 <= 'Z' )
693					$2 = clower( $2 );
694				if ( $4 >= 'A' && $4 <= 'Z' )
695					$4 = clower( $4 );
696				}
697
698			if ( $2 > $4 )
699				synerr( "negative range in character class" );
700
701			else
702				{
703				for ( i = $2; i <= $4; ++i )
704					ccladd( $1, i );
705
706				/* Keep track if this ccl is staying in
707				 * alphabetical order.
708				 */
709				cclsorted = cclsorted && ($2 > lastchar);
710				lastchar = $4;
711				}
712
713			$$ = $1;
714			}
715
716		|  ccl CHAR
717			{
718			if ( caseins && $2 >= 'A' && $2 <= 'Z' )
719				$2 = clower( $2 );
720
721			ccladd( $1, $2 );
722			cclsorted = cclsorted && ($2 > lastchar);
723			lastchar = $2;
724			$$ = $1;
725			}
726
727		|  ccl ccl_expr
728			{
729			/* Too hard to properly maintain cclsorted. */
730			cclsorted = false;
731			$$ = $1;
732			}
733
734		|
735			{
736			cclsorted = true;
737			lastchar = 0;
738			currccl = $$ = cclinit();
739			}
740		;
741
742ccl_expr:	   CCE_ALNUM	{ CCL_EXPR(isalnum) }
743		|  CCE_ALPHA	{ CCL_EXPR(isalpha) }
744		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK) }
745		|  CCE_CNTRL	{ CCL_EXPR(iscntrl) }
746		|  CCE_DIGIT	{ CCL_EXPR(isdigit) }
747		|  CCE_GRAPH	{ CCL_EXPR(isgraph) }
748		|  CCE_LOWER	{ CCL_EXPR(islower) }
749		|  CCE_PRINT	{ CCL_EXPR(isprint) }
750		|  CCE_PUNCT	{ CCL_EXPR(ispunct) }
751		|  CCE_SPACE	{ CCL_EXPR(isspace) }
752		|  CCE_UPPER	{
753				if ( caseins )
754					CCL_EXPR(islower)
755				else
756					CCL_EXPR(isupper)
757				}
758		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit) }
759		;
760
761string		:  string CHAR
762			{
763			if ( caseins && $2 >= 'A' && $2 <= 'Z' )
764				$2 = clower( $2 );
765
766			++rulelen;
767
768			$$ = link_machines( $1, mkstate( $2 ) );
769			}
770
771		|
772			{ $$ = mkstate( SYM_EPSILON ); }
773		;
774
775%%
776
777
778/* build_eof_action - build the "<<EOF>>" action for the active start
779 *                    conditions
780 */
781
782void build_eof_action()
783	{
784	register int i;
785	char action_text[MAXLINE];
786
787	for ( i = 1; i <= scon_stk_ptr; ++i )
788		{
789		if ( sceof[scon_stk[i]] )
790			format_pinpoint_message(
791				"multiple <<EOF>> rules for start condition %s",
792				scname[scon_stk[i]] );
793
794		else
795			{
796			sceof[scon_stk[i]] = true;
797			sprintf( action_text, "case YY_STATE_EOF(%s):\n",
798				scname[scon_stk[i]] );
799			add_action( action_text );
800			}
801		}
802
803	line_directive_out( (FILE *) 0, 1 );
804
805	/* This isn't a normal rule after all - don't count it as
806	 * such, so we don't have any holes in the rule numbering
807	 * (which make generating "rule can never match" warnings
808	 * more difficult.
809	 */
810	--num_rules;
811	++num_eof_rules;
812	}
813
814
815/* format_synerr - write out formatted syntax error */
816
817void format_synerr( msg, arg )
818char msg[], arg[];
819	{
820	char errmsg[MAXLINE];
821
822	(void) sprintf( errmsg, msg, arg );
823	synerr( errmsg );
824	}
825
826
827/* synerr - report a syntax error */
828
829void synerr( str )
830char str[];
831	{
832	syntaxerror = true;
833	pinpoint_message( str );
834	}
835
836
837/* format_warn - write out formatted warning */
838
839void format_warn( msg, arg )
840char msg[], arg[];
841	{
842	char warn_msg[MAXLINE];
843
844	(void) sprintf( warn_msg, msg, arg );
845	warn( warn_msg );
846	}
847
848
849/* warn - report a warning, unless -w was given */
850
851void warn( str )
852char str[];
853	{
854	line_warning( str, linenum );
855	}
856
857/* format_pinpoint_message - write out a message formatted with one string,
858 *			     pinpointing its location
859 */
860
861void format_pinpoint_message( msg, arg )
862char msg[], arg[];
863	{
864	char errmsg[MAXLINE];
865
866	(void) sprintf( errmsg, msg, arg );
867	pinpoint_message( errmsg );
868	}
869
870
871/* pinpoint_message - write out a message, pinpointing its location */
872
873void pinpoint_message( str )
874char str[];
875	{
876	line_pinpoint( str, linenum );
877	}
878
879
880/* line_warning - report a warning at a given line, unless -w was given */
881
882void line_warning( str, line )
883char str[];
884int line;
885	{
886	char warning[MAXLINE];
887
888	if ( ! nowarn )
889		{
890		sprintf( warning, "warning, %s", str );
891		line_pinpoint( warning, line );
892		}
893	}
894
895
896/* line_pinpoint - write out a message, pinpointing it at the given line */
897
898void line_pinpoint( str, line )
899char str[];
900int line;
901	{
902	fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
903	}
904
905
906/* yyerror - eat up an error message from the parser;
907 *	     currently, messages are ignore
908 */
909
910void yyerror( msg )
911char msg[];
912	{
913	}
914