1/* parse.y - parser for flex input */
2
3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4%token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
5%token TOK_TABLES_FILE
6
7%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12
13%left CCL_OP_DIFF CCL_OP_UNION
14
15/*
16 *POSIX and AT&T lex place the
17 * precedence of the repeat operator, {}, below that of concatenation.
18 * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19 * Regular Expression (ERE) precedence that has the repeat operator
20 * higher than concatenation.  This causes ab{3} to yield abbb.
21 *
22 * In order to support the POSIX and AT&T precedence and the flex
23 * precedence we define two token sets for the begin and end tokens of
24 * the repeat operator, '{' and '}'.  The lexical scanner chooses
25 * which tokens to return based on whether posix_compat or lex_compat
26 * are specified. Specifying either posix_compat or lex_compat will
27 * cause flex to parse scanner files as per the AT&T and
28 * POSIX-mandated behavior.
29 */
30
31%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32
33
34%{
35/*  Copyright (c) 1990 The Regents of the University of California. */
36/*  All rights reserved. */
37
38/*  This code is derived from software contributed to Berkeley by */
39/*  Vern Paxson. */
40
41/*  The United States Government has rights in this work pursuant */
42/*  to contract no. DE-AC03-76SF00098 between the United States */
43/*  Department of Energy and the University of California. */
44
45/*  This file is part of flex. */
46
47/*  Redistribution and use in source and binary forms, with or without */
48/*  modification, are permitted provided that the following conditions */
49/*  are met: */
50
51/*  1. Redistributions of source code must retain the above copyright */
52/*     notice, this list of conditions and the following disclaimer. */
53/*  2. Redistributions in binary form must reproduce the above copyright */
54/*     notice, this list of conditions and the following disclaimer in the */
55/*     documentation and/or other materials provided with the distribution. */
56
57/*  Neither the name of the University nor the names of its contributors */
58/*  may be used to endorse or promote products derived from this software */
59/*  without specific prior written permission. */
60
61/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64/*  PURPOSE. */
65
66#include "flexdef.h"
67#include "tables.h"
68
69int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71
72int *scon_stk;
73int scon_stk_ptr;
74
75static int madeany = false;  /* whether we've made the '.' character class */
76static int ccldot, cclany;
77int previous_continued_action;	/* whether the previous rule's action was '|' */
78
79#define format_warn3(fmt, a1, a2) \
80	do{ \
81        char fw3_msg[MAXLINE];\
82        snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83        lwarn( fw3_msg );\
84	}while(0)
85
86/* Expand a POSIX character class expression. */
87#define CCL_EXPR(func) \
88	do{ \
89	int c; \
90	for ( c = 0; c < csize; ++c ) \
91		if ( isascii(c) && func(c) ) \
92			ccladd( currccl, c ); \
93	}while(0)
94
95/* negated class */
96#define CCL_NEG_EXPR(func) \
97	do{ \
98	int c; \
99	for ( c = 0; c < csize; ++c ) \
100		if ( !func(c) ) \
101			ccladd( currccl, c ); \
102	}while(0)
103
104/* While POSIX defines isblank(), it's not ANSI C. */
105#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106
107/* On some over-ambitious machines, such as DEC Alpha's, the default
108 * token type is "long" instead of "int"; this leads to problems with
109 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111 * following should ensure that the default token type is "int".
112 */
113#define YYSTYPE int
114
115%}
116
117%%
118goal		:  initlex sect1 sect1end sect2 initforrule
119			{ /* add default rule */
120			int def_rule;
121
122			pat = cclinit();
123			cclnegate( pat );
124
125			def_rule = mkstate( -pat );
126
127			/* Remember the number of the default rule so we
128			 * don't generate "can't match" warnings for it.
129			 */
130			default_rule = num_rules;
131
132			finish_rule( def_rule, false, 0, 0, 0);
133
134			for ( i = 1; i <= lastsc; ++i )
135				scset[i] = mkbranch( scset[i], def_rule );
136
137			if ( spprdflt )
138				add_action(
139				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140			else
141				add_action( "ECHO" );
142
143			add_action( ";\n\tYY_BREAK]]\n" );
144			}
145		;
146
147initlex		:
148			{ /* initialize for processing rules */
149
150			/* Create default DFA start condition. */
151			scinstal( "INITIAL", false );
152			}
153		;
154
155sect1		:  sect1 startconddecl namelist1
156		|  sect1 options
157		|
158		|  error
159			{ synerr( _("unknown error processing section 1") ); }
160		;
161
162sect1end	:  SECTEND
163			{
164			check_options();
165			scon_stk = allocate_integer_array( lastsc + 1 );
166			scon_stk_ptr = 0;
167			}
168		;
169
170startconddecl	:  SCDECL
171			{ xcluflg = false; }
172
173		|  XSCDECL
174			{ xcluflg = true; }
175		;
176
177namelist1	:  namelist1 NAME
178			{ scinstal( nmstr, xcluflg ); }
179
180		|  NAME
181			{ scinstal( nmstr, xcluflg ); }
182
183		|  error
184			{ synerr( _("bad start condition list") ); }
185		;
186
187options		:  TOK_OPTION optionlist
188		;
189
190optionlist	:  optionlist option
191		|
192		;
193
194option		:  TOK_OUTFILE '=' NAME
195			{
196			outfilename = xstrdup(nmstr);
197			did_outfilename = 1;
198			}
199		|  TOK_EXTRA_TYPE '=' NAME
200			{ extra_type = xstrdup(nmstr); }
201		|  TOK_PREFIX '=' NAME
202			{ prefix = xstrdup(nmstr);
203                          if (strchr(prefix, '[') || strchr(prefix, ']'))
204                              flexerror(_("Prefix must not contain [ or ]")); }
205		|  TOK_YYCLASS '=' NAME
206			{ yyclass = xstrdup(nmstr); }
207		|  TOK_HEADER_FILE '=' NAME
208			{ headerfilename = xstrdup(nmstr); }
209	    |  TOK_TABLES_FILE '=' NAME
210            { tablesext = true; tablesfilename = xstrdup(nmstr); }
211		;
212
213sect2		:  sect2 scon initforrule flexrule '\n'
214			{ scon_stk_ptr = $2; }
215		|  sect2 scon '{' sect2 '}'
216			{ scon_stk_ptr = $2; }
217		|
218		;
219
220initforrule	:
221			{
222			/* Initialize for a parse of one rule. */
223			trlcontxt = variable_trail_rule = varlength = false;
224			trailcnt = headcnt = rulelen = 0;
225			current_state_type = STATE_NORMAL;
226			previous_continued_action = continued_action;
227			in_rule = true;
228
229			new_rule();
230			}
231		;
232
233flexrule	:  '^' rule
234			{
235			pat = $2;
236			finish_rule( pat, variable_trail_rule,
237				headcnt, trailcnt , previous_continued_action);
238
239			if ( scon_stk_ptr > 0 )
240				{
241				for ( i = 1; i <= scon_stk_ptr; ++i )
242					scbol[scon_stk[i]] =
243						mkbranch( scbol[scon_stk[i]],
244								pat );
245				}
246
247			else
248				{
249				/* Add to all non-exclusive start conditions,
250				 * including the default (0) start condition.
251				 */
252
253				for ( i = 1; i <= lastsc; ++i )
254					if ( ! scxclu[i] )
255						scbol[i] = mkbranch( scbol[i],
256									pat );
257				}
258
259			if ( ! bol_needed )
260				{
261				bol_needed = true;
262
263				if ( performance_report > 1 )
264					pinpoint_message(
265			"'^' operator results in sub-optimal performance" );
266				}
267			}
268
269		|  rule
270			{
271			pat = $1;
272			finish_rule( pat, variable_trail_rule,
273				headcnt, trailcnt , previous_continued_action);
274
275			if ( scon_stk_ptr > 0 )
276				{
277				for ( i = 1; i <= scon_stk_ptr; ++i )
278					scset[scon_stk[i]] =
279						mkbranch( scset[scon_stk[i]],
280								pat );
281				}
282
283			else
284				{
285				for ( i = 1; i <= lastsc; ++i )
286					if ( ! scxclu[i] )
287						scset[i] =
288							mkbranch( scset[i],
289								pat );
290				}
291			}
292
293		|  EOF_OP
294			{
295			if ( scon_stk_ptr > 0 )
296				build_eof_action();
297
298			else
299				{
300				/* This EOF applies to all start conditions
301				 * which don't already have EOF actions.
302				 */
303				for ( i = 1; i <= lastsc; ++i )
304					if ( ! sceof[i] )
305						scon_stk[++scon_stk_ptr] = i;
306
307				if ( scon_stk_ptr == 0 )
308					lwarn(
309			"all start conditions already have <<EOF>> rules" );
310
311				else
312					build_eof_action();
313				}
314			}
315
316		|  error
317			{ synerr( _("unrecognized rule") ); }
318		;
319
320scon_stk_ptr	:
321			{ $$ = scon_stk_ptr; }
322		;
323
324scon		:  '<' scon_stk_ptr namelist2 '>'
325			{ $$ = $2; }
326
327		|  '<' '*' '>'
328			{
329			$$ = scon_stk_ptr;
330
331			for ( i = 1; i <= lastsc; ++i )
332				{
333				int j;
334
335				for ( j = 1; j <= scon_stk_ptr; ++j )
336					if ( scon_stk[j] == i )
337						break;
338
339				if ( j > scon_stk_ptr )
340					scon_stk[++scon_stk_ptr] = i;
341				}
342			}
343
344		|
345			{ $$ = scon_stk_ptr; }
346		;
347
348namelist2	:  namelist2 ',' sconname
349
350		|  sconname
351
352		|  error
353			{ synerr( _("bad start condition list") ); }
354		;
355
356sconname	:  NAME
357			{
358			if ( (scnum = sclookup( nmstr )) == 0 )
359				format_pinpoint_message(
360					"undeclared start condition %s",
361					nmstr );
362			else
363				{
364				for ( i = 1; i <= scon_stk_ptr; ++i )
365					if ( scon_stk[i] == scnum )
366						{
367						format_warn(
368							"<%s> specified twice",
369							scname[scnum] );
370						break;
371						}
372
373				if ( i > scon_stk_ptr )
374					scon_stk[++scon_stk_ptr] = scnum;
375				}
376			}
377		;
378
379rule		:  re2 re
380			{
381			if ( transchar[lastst[$2]] != SYM_EPSILON )
382				/* Provide final transition \now/ so it
383				 * will be marked as a trailing context
384				 * state.
385				 */
386				$2 = link_machines( $2,
387						mkstate( SYM_EPSILON ) );
388
389			mark_beginning_as_normal( $2 );
390			current_state_type = STATE_NORMAL;
391
392			if ( previous_continued_action )
393				{
394				/* We need to treat this as variable trailing
395				 * context so that the backup does not happen
396				 * in the action but before the action switch
397				 * statement.  If the backup happens in the
398				 * action, then the rules "falling into" this
399				 * one's action will *also* do the backup,
400				 * erroneously.
401				 */
402				if ( ! varlength || headcnt != 0 )
403					lwarn(
404		"trailing context made variable due to preceding '|' action" );
405
406				/* Mark as variable. */
407				varlength = true;
408				headcnt = 0;
409
410				}
411
412			if ( lex_compat || (varlength && headcnt == 0) )
413				{ /* variable trailing context rule */
414				/* Mark the first part of the rule as the
415				 * accepting "head" part of a trailing
416				 * context rule.
417				 *
418				 * By the way, we didn't do this at the
419				 * beginning of this production because back
420				 * then current_state_type was set up for a
421				 * trail rule, and add_accept() can create
422				 * a new state ...
423				 */
424				add_accept( $1,
425					num_rules | YY_TRAILING_HEAD_MASK );
426				variable_trail_rule = true;
427				}
428
429			else
430				trailcnt = rulelen;
431
432			$$ = link_machines( $1, $2 );
433			}
434
435		|  re2 re '$'
436			{ synerr( _("trailing context used twice") ); }
437
438		|  re '$'
439			{
440			headcnt = 0;
441			trailcnt = 1;
442			rulelen = 1;
443			varlength = false;
444
445			current_state_type = STATE_TRAILING_CONTEXT;
446
447			if ( trlcontxt )
448				{
449				synerr( _("trailing context used twice") );
450				$$ = mkstate( SYM_EPSILON );
451				}
452
453			else if ( previous_continued_action )
454				{
455				/* See the comment in the rule for "re2 re"
456				 * above.
457				 */
458				lwarn(
459		"trailing context made variable due to preceding '|' action" );
460
461				varlength = true;
462				}
463
464			if ( lex_compat || varlength )
465				{
466				/* Again, see the comment in the rule for
467				 * "re2 re" above.
468				 */
469				add_accept( $1,
470					num_rules | YY_TRAILING_HEAD_MASK );
471				variable_trail_rule = true;
472				}
473
474			trlcontxt = true;
475
476			eps = mkstate( SYM_EPSILON );
477			$$ = link_machines( $1,
478				link_machines( eps, mkstate( '\n' ) ) );
479			}
480
481		|  re
482			{
483			$$ = $1;
484
485			if ( trlcontxt )
486				{
487				if ( lex_compat || (varlength && headcnt == 0) )
488					/* Both head and trail are
489					 * variable-length.
490					 */
491					variable_trail_rule = true;
492				else
493					trailcnt = rulelen;
494				}
495			}
496		;
497
498
499re		:  re '|' series
500			{
501			varlength = true;
502			$$ = mkor( $1, $3 );
503			}
504
505		|  series
506			{ $$ = $1; }
507		;
508
509
510re2		:  re '/'
511			{
512			/* This rule is written separately so the
513			 * reduction will occur before the trailing
514			 * series is parsed.
515			 */
516
517			if ( trlcontxt )
518				synerr( _("trailing context used twice") );
519			else
520				trlcontxt = true;
521
522			if ( varlength )
523				/* We hope the trailing context is
524				 * fixed-length.
525				 */
526				varlength = false;
527			else
528				headcnt = rulelen;
529
530			rulelen = 0;
531
532			current_state_type = STATE_TRAILING_CONTEXT;
533			$$ = $1;
534			}
535		;
536
537series		:  series singleton
538			{
539			/* This is where concatenation of adjacent patterns
540			 * gets done.
541			 */
542			$$ = link_machines( $1, $2 );
543			}
544
545		|  singleton
546			{ $$ = $1; }
547
548		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
549			{
550			varlength = true;
551
552			if ( $3 > $5 || $3 < 0 )
553				{
554				synerr( _("bad iteration values") );
555				$$ = $1;
556				}
557			else
558				{
559				if ( $3 == 0 )
560					{
561					if ( $5 <= 0 )
562						{
563						synerr(
564						_("bad iteration values") );
565						$$ = $1;
566						}
567					else
568						$$ = mkopt(
569							mkrep( $1, 1, $5 ) );
570					}
571				else
572					$$ = mkrep( $1, $3, $5 );
573				}
574			}
575
576		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
577			{
578			varlength = true;
579
580			if ( $3 <= 0 )
581				{
582				synerr( _("iteration value must be positive") );
583				$$ = $1;
584				}
585
586			else
587				$$ = mkrep( $1, $3, INFINITE_REPEAT );
588			}
589
590		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
591			{
592			/* The series could be something like "(foo)",
593			 * in which case we have no idea what its length
594			 * is, so we punt here.
595			 */
596			varlength = true;
597
598			if ( $3 <= 0 )
599				{
600				  synerr( _("iteration value must be positive")
601					  );
602				$$ = $1;
603				}
604
605			else
606				$$ = link_machines( $1,
607						copysingl( $1, $3 - 1 ) );
608			}
609
610		;
611
612singleton	:  singleton '*'
613			{
614			varlength = true;
615
616			$$ = mkclos( $1 );
617			}
618
619		|  singleton '+'
620			{
621			varlength = true;
622			$$ = mkposcl( $1 );
623			}
624
625		|  singleton '?'
626			{
627			varlength = true;
628			$$ = mkopt( $1 );
629			}
630
631		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
632			{
633			varlength = true;
634
635			if ( $3 > $5 || $3 < 0 )
636				{
637				synerr( _("bad iteration values") );
638				$$ = $1;
639				}
640			else
641				{
642				if ( $3 == 0 )
643					{
644					if ( $5 <= 0 )
645						{
646						synerr(
647						_("bad iteration values") );
648						$$ = $1;
649						}
650					else
651						$$ = mkopt(
652							mkrep( $1, 1, $5 ) );
653					}
654				else
655					$$ = mkrep( $1, $3, $5 );
656				}
657			}
658
659		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
660			{
661			varlength = true;
662
663			if ( $3 <= 0 )
664				{
665				synerr( _("iteration value must be positive") );
666				$$ = $1;
667				}
668
669			else
670				$$ = mkrep( $1, $3, INFINITE_REPEAT );
671			}
672
673		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
674			{
675			/* The singleton could be something like "(foo)",
676			 * in which case we have no idea what its length
677			 * is, so we punt here.
678			 */
679			varlength = true;
680
681			if ( $3 <= 0 )
682				{
683				synerr( _("iteration value must be positive") );
684				$$ = $1;
685				}
686
687			else
688				$$ = link_machines( $1,
689						copysingl( $1, $3 - 1 ) );
690			}
691
692		|  '.'
693			{
694			if ( ! madeany )
695				{
696				/* Create the '.' character class. */
697                    ccldot = cclinit();
698                    ccladd( ccldot, '\n' );
699                    cclnegate( ccldot );
700
701                    if ( useecs )
702                        mkeccl( ccltbl + cclmap[ccldot],
703                            ccllen[ccldot], nextecm,
704                            ecgroup, csize, csize );
705
706				/* Create the (?s:'.') character class. */
707                    cclany = cclinit();
708                    cclnegate( cclany );
709
710                    if ( useecs )
711                        mkeccl( ccltbl + cclmap[cclany],
712                            ccllen[cclany], nextecm,
713                            ecgroup, csize, csize );
714
715				madeany = true;
716				}
717
718			++rulelen;
719
720            if (sf_dot_all())
721                $$ = mkstate( -cclany );
722            else
723                $$ = mkstate( -ccldot );
724			}
725
726		|  fullccl
727			{
728				/* Sort characters for fast searching.
729				 */
730				qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
731
732			if ( useecs )
733				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734					nextecm, ecgroup, csize, csize );
735
736			++rulelen;
737
738			if (ccl_has_nl[$1])
739				rule_has_nl[num_rules] = true;
740
741			$$ = mkstate( -$1 );
742			}
743
744		|  PREVCCL
745			{
746			++rulelen;
747
748			if (ccl_has_nl[$1])
749				rule_has_nl[num_rules] = true;
750
751			$$ = mkstate( -$1 );
752			}
753
754		|  '"' string '"'
755			{ $$ = $2; }
756
757		|  '(' re ')'
758			{ $$ = $2; }
759
760		|  CHAR
761			{
762			++rulelen;
763
764			if ($1 == nlch)
765				rule_has_nl[num_rules] = true;
766
767            if (sf_case_ins() && has_case($1))
768                /* create an alternation, as in (a|A) */
769                $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
770            else
771                $$ = mkstate( $1 );
772			}
773		;
774fullccl:
775        fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
776    |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
777    |   braceccl
778    ;
779
780braceccl:
781
782            '[' ccl ']' { $$ = $2; }
783
784		|  '[' '^' ccl ']'
785			{
786			cclnegate( $3 );
787			$$ = $3;
788			}
789		;
790
791ccl		:  ccl CHAR '-' CHAR
792			{
793
794			if (sf_case_ins())
795			  {
796
797			    /* If one end of the range has case and the other
798			     * does not, or the cases are different, then we're not
799			     * sure what range the user is trying to express.
800			     * Examples: [@-z] or [S-t]
801			     */
802			    if (has_case ($2) != has_case ($4)
803				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
804				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
805			      format_warn3 (
806			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807					    $2, $4);
808
809			    /* If the range spans uppercase characters but not
810			     * lowercase (or vice-versa), then should we automatically
811			     * include lowercase characters in the range?
812			     * Example: [@-_] spans [a-z] but not [A-Z]
813			     */
814			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
815			      format_warn3 (
816			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
817					    $2, $4);
818			  }
819
820			if ( $2 > $4 )
821				synerr( _("negative range in character class") );
822
823			else
824				{
825				for ( i = $2; i <= $4; ++i )
826					ccladd( $1, i );
827
828				/* Keep track if this ccl is staying in
829				 * alphabetical order.
830				 */
831				cclsorted = cclsorted && ($2 > lastchar);
832				lastchar = $4;
833
834                /* Do it again for upper/lowercase */
835                if (sf_case_ins() && has_case($2) && has_case($4)){
836                    $2 = reverse_case ($2);
837                    $4 = reverse_case ($4);
838
839                    for ( i = $2; i <= $4; ++i )
840                        ccladd( $1, i );
841
842                    cclsorted = cclsorted && ($2 > lastchar);
843                    lastchar = $4;
844                }
845
846				}
847
848			$$ = $1;
849			}
850
851		|  ccl CHAR
852			{
853			ccladd( $1, $2 );
854			cclsorted = cclsorted && ($2 > lastchar);
855			lastchar = $2;
856
857            /* Do it again for upper/lowercase */
858            if (sf_case_ins() && has_case($2)){
859                $2 = reverse_case ($2);
860                ccladd ($1, $2);
861
862                cclsorted = cclsorted && ($2 > lastchar);
863                lastchar = $2;
864            }
865
866			$$ = $1;
867			}
868
869		|  ccl ccl_expr
870			{
871			/* Too hard to properly maintain cclsorted. */
872			cclsorted = false;
873			$$ = $1;
874			}
875
876		|
877			{
878			cclsorted = true;
879			lastchar = 0;
880			currccl = $$ = cclinit();
881			}
882		;
883
884ccl_expr:
885           CCE_ALNUM	{ CCL_EXPR(isalnum); }
886		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
887		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
888		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
889		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
890		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
891		|  CCE_LOWER	{
892                          CCL_EXPR(islower);
893                          if (sf_case_ins())
894                              CCL_EXPR(isupper);
895                        }
896		|  CCE_PRINT	{ CCL_EXPR(isprint); }
897		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
898		|  CCE_SPACE	{ CCL_EXPR(isspace); }
899		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
900		|  CCE_UPPER	{
901                    CCL_EXPR(isupper);
902                    if (sf_case_ins())
903                        CCL_EXPR(islower);
904				}
905
906        |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
907		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
908		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
909		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
910		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
911		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
912		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
913		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
914		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
915		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
916		|  CCE_NEG_LOWER	{
917				if ( sf_case_ins() )
918					lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
919				else
920					CCL_NEG_EXPR(islower);
921				}
922		|  CCE_NEG_UPPER	{
923				if ( sf_case_ins() )
924					lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
925				else
926					CCL_NEG_EXPR(isupper);
927				}
928		;
929
930string		:  string CHAR
931			{
932			if ( $2 == nlch )
933				rule_has_nl[num_rules] = true;
934
935			++rulelen;
936
937            if (sf_case_ins() && has_case($2))
938                $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
939            else
940                $$ = mkstate ($2);
941
942			$$ = link_machines( $1, $$);
943			}
944
945		|
946			{ $$ = mkstate( SYM_EPSILON ); }
947		;
948
949%%
950
951
952/* build_eof_action - build the "<<EOF>>" action for the active start
953 *                    conditions
954 */
955
956void build_eof_action(void)
957	{
958	int i;
959	char action_text[MAXLINE];
960
961	for ( i = 1; i <= scon_stk_ptr; ++i )
962		{
963		if ( sceof[scon_stk[i]] )
964			format_pinpoint_message(
965				"multiple <<EOF>> rules for start condition %s",
966				scname[scon_stk[i]] );
967
968		else
969			{
970			sceof[scon_stk[i]] = true;
971
972			if (previous_continued_action /* && previous action was regular */)
973				add_action("YY_RULE_SETUP\n");
974
975			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
976				scname[scon_stk[i]] );
977			add_action( action_text );
978			}
979		}
980
981	line_directive_out(NULL, 1);
982        add_action("[[");
983
984	/* This isn't a normal rule after all - don't count it as
985	 * such, so we don't have any holes in the rule numbering
986	 * (which make generating "rule can never match" warnings
987	 * more difficult.
988	 */
989	--num_rules;
990	++num_eof_rules;
991	}
992
993
994/* format_synerr - write out formatted syntax error */
995
996void format_synerr( const char *msg, const char arg[] )
997	{
998	char errmsg[MAXLINE];
999
1000	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001	synerr( errmsg );
1002	}
1003
1004
1005/* synerr - report a syntax error */
1006
1007void synerr( const char *str )
1008	{
1009	syntaxerror = true;
1010	pinpoint_message( str );
1011	}
1012
1013
1014/* format_warn - write out formatted warning */
1015
1016void format_warn( const char *msg, const char arg[] )
1017	{
1018	char warn_msg[MAXLINE];
1019
1020	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021	lwarn( warn_msg );
1022	}
1023
1024
1025/* lwarn - report a warning, unless -w was given */
1026
1027void lwarn( const char *str )
1028	{
1029	line_warning( str, linenum );
1030	}
1031
1032/* format_pinpoint_message - write out a message formatted with one string,
1033 *			     pinpointing its location
1034 */
1035
1036void format_pinpoint_message( const char *msg, const char arg[] )
1037	{
1038	char errmsg[MAXLINE];
1039
1040	snprintf( errmsg, sizeof(errmsg), msg, arg );
1041	pinpoint_message( errmsg );
1042	}
1043
1044
1045/* pinpoint_message - write out a message, pinpointing its location */
1046
1047void pinpoint_message( const char *str )
1048	{
1049	line_pinpoint( str, linenum );
1050	}
1051
1052
1053/* line_warning - report a warning at a given line, unless -w was given */
1054
1055void line_warning( const char *str, int line )
1056	{
1057	char warning[MAXLINE];
1058
1059	if ( ! nowarn )
1060		{
1061		snprintf( warning, sizeof(warning), "warning, %s", str );
1062		line_pinpoint( warning, line );
1063		}
1064	}
1065
1066
1067/* line_pinpoint - write out a message, pinpointing it at the given line */
1068
1069void line_pinpoint( const char *str, int line )
1070	{
1071	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1072	}
1073
1074
1075/* yyerror - eat up an error message from the parser;
1076 *	     currently, messages are ignore
1077 */
1078
1079void yyerror( const char *msg )
1080	{
1081		(void)msg;
1082	}
1083