1/*	$OpenBSD: parse.y,v 1.10 2017/04/12 14:53:27 millert Exp $	*/
2
3/* parse.y - parser for flex input */
4
5%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7%token OPT_TABLES
8
9%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11
12%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14
15%left CCL_OP_DIFF CCL_OP_UNION
16
17/*
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation.  This causes ab{3} to yield abbb.
23 *
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'.  The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
31 */
32
33%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34
35
36%{
37/*  Copyright (c) 1990 The Regents of the University of California. */
38/*  All rights reserved. */
39
40/*  This code is derived from software contributed to Berkeley by */
41/*  Vern Paxson. */
42
43/*  The United States Government has rights in this work pursuant */
44/*  to contract no. DE-AC03-76SF00098 between the United States */
45/*  Department of Energy and the University of California. */
46
47/*  This file is part of flex. */
48
49/*  Redistribution and use in source and binary forms, with or without */
50/*  modification, are permitted provided that the following conditions */
51/*  are met: */
52
53/*  1. Redistributions of source code must retain the above copyright */
54/*     notice, this list of conditions and the following disclaimer. */
55/*  2. Redistributions in binary form must reproduce the above copyright */
56/*     notice, this list of conditions and the following disclaimer in the */
57/*     documentation and/or other materials provided with the distribution. */
58
59/*  Neither the name of the University nor the names of its contributors */
60/*  may be used to endorse or promote products derived from this software */
61/*  without specific prior written permission. */
62
63/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66/*  PURPOSE. */
67
68#include "flexdef.h"
69#include "tables.h"
70
71int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
72int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
73
74int *scon_stk;
75int scon_stk_ptr;
76
77static int madeany = false;  /* whether we've made the '.' character class */
78static int ccldot, cclany;
79int previous_continued_action;	/* whether the previous rule's action was '|' */
80
81#define format_warn3(fmt, a1, a2) \
82	do{ \
83        char fw3_msg[MAXLINE];\
84        snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
85        warn( fw3_msg );\
86	}while(0)
87
88/* Expand a POSIX character class expression. */
89#define CCL_EXPR(func) \
90	do{ \
91	int c; \
92	for ( c = 0; c < csize; ++c ) \
93		if ( isascii(c) && func(c) ) \
94			ccladd( currccl, c ); \
95	}while(0)
96
97/* negated class */
98#define CCL_NEG_EXPR(func) \
99	do{ \
100	int c; \
101	for ( c = 0; c < csize; ++c ) \
102		if ( !func(c) ) \
103			ccladd( currccl, c ); \
104	}while(0)
105
106/* On some over-ambitious machines, such as DEC Alpha's, the default
107 * token type is "long" instead of "int"; this leads to problems with
108 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
109 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
110 * following should ensure that the default token type is "int".
111 */
112#define YYSTYPE int
113
114%}
115
116%%
117goal		:  initlex sect1 sect1end sect2 initforrule
118			{ /* add default rule */
119			int def_rule;
120
121			pat = cclinit();
122			cclnegate( pat );
123
124			def_rule = mkstate( -pat );
125
126			/* Remember the number of the default rule so we
127			 * don't generate "can't match" warnings for it.
128			 */
129			default_rule = num_rules;
130
131			finish_rule( def_rule, false, 0, 0, 0);
132
133			for ( i = 1; i <= lastsc; ++i )
134				scset[i] = mkbranch( scset[i], def_rule );
135
136			if ( spprdflt )
137				add_action(
138				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
139			else
140				add_action( "ECHO" );
141
142			add_action( ";\n\tYY_BREAK\n" );
143			}
144		;
145
146initlex		:
147			{ /* initialize for processing rules */
148
149			/* Create default DFA start condition. */
150			scinstal( "INITIAL", false );
151			}
152		;
153
154sect1		:  sect1 startconddecl namelist1
155		|  sect1 options
156		|
157		|  error
158			{ synerr( _("unknown error processing section 1") ); }
159		;
160
161sect1end	:  SECTEND
162			{
163			check_options();
164			scon_stk = allocate_integer_array( lastsc + 1 );
165			scon_stk_ptr = 0;
166			}
167		;
168
169startconddecl	:  SCDECL
170			{ xcluflg = false; }
171
172		|  XSCDECL
173			{ xcluflg = true; }
174		;
175
176namelist1	:  namelist1 NAME
177			{ scinstal( nmstr, xcluflg ); }
178
179		|  NAME
180			{ scinstal( nmstr, xcluflg ); }
181
182		|  error
183			{ synerr( _("bad start condition list") ); }
184		;
185
186options		:  OPTION_OP optionlist
187		;
188
189optionlist	:  optionlist option
190		|
191		;
192
193option		:  OPT_OUTFILE '=' NAME
194			{
195			outfilename = copy_string( nmstr );
196			did_outfilename = 1;
197			}
198		|  OPT_EXTRA_TYPE '=' NAME
199			{ extra_type = copy_string( nmstr ); }
200		|  OPT_PREFIX '=' NAME
201			{ prefix = copy_string( nmstr ); }
202		|  OPT_YYCLASS '=' NAME
203			{ yyclass = copy_string( nmstr ); }
204		|  OPT_HEADER '=' NAME
205			{ headerfilename = copy_string( nmstr ); }
206	    |  OPT_TABLES '=' NAME
207            { tablesext = true; tablesfilename = copy_string( nmstr ); }
208		;
209
210sect2		:  sect2 scon initforrule flexrule '\n'
211			{ scon_stk_ptr = $2; }
212		|  sect2 scon '{' sect2 '}'
213			{ scon_stk_ptr = $2; }
214		|
215		;
216
217initforrule	:
218			{
219			/* Initialize for a parse of one rule. */
220			trlcontxt = variable_trail_rule = varlength = false;
221			trailcnt = headcnt = rulelen = 0;
222			current_state_type = STATE_NORMAL;
223			previous_continued_action = continued_action;
224			in_rule = true;
225
226			new_rule();
227			}
228		;
229
230flexrule	:  '^' rule
231			{
232			pat = $2;
233			finish_rule( pat, variable_trail_rule,
234				headcnt, trailcnt , previous_continued_action);
235
236			if ( scon_stk_ptr > 0 )
237				{
238				for ( i = 1; i <= scon_stk_ptr; ++i )
239					scbol[scon_stk[i]] =
240						mkbranch( scbol[scon_stk[i]],
241								pat );
242				}
243
244			else
245				{
246				/* Add to all non-exclusive start conditions,
247				 * including the default (0) start condition.
248				 */
249
250				for ( i = 1; i <= lastsc; ++i )
251					if ( ! scxclu[i] )
252						scbol[i] = mkbranch( scbol[i],
253									pat );
254				}
255
256			if ( ! bol_needed )
257				{
258				bol_needed = true;
259
260				if ( performance_report > 1 )
261					pinpoint_message(
262			"'^' operator results in sub-optimal performance" );
263				}
264			}
265
266		|  rule
267			{
268			pat = $1;
269			finish_rule( pat, variable_trail_rule,
270				headcnt, trailcnt , previous_continued_action);
271
272			if ( scon_stk_ptr > 0 )
273				{
274				for ( i = 1; i <= scon_stk_ptr; ++i )
275					scset[scon_stk[i]] =
276						mkbranch( scset[scon_stk[i]],
277								pat );
278				}
279
280			else
281				{
282				for ( i = 1; i <= lastsc; ++i )
283					if ( ! scxclu[i] )
284						scset[i] =
285							mkbranch( scset[i],
286								pat );
287				}
288			}
289
290		|  EOF_OP
291			{
292			if ( scon_stk_ptr > 0 )
293				build_eof_action();
294
295			else
296				{
297				/* This EOF applies to all start conditions
298				 * which don't already have EOF actions.
299				 */
300				for ( i = 1; i <= lastsc; ++i )
301					if ( ! sceof[i] )
302						scon_stk[++scon_stk_ptr] = i;
303
304				if ( scon_stk_ptr == 0 )
305					warn(
306			"all start conditions already have <<EOF>> rules" );
307
308				else
309					build_eof_action();
310				}
311			}
312
313		|  error
314			{ synerr( _("unrecognized rule") ); }
315		;
316
317scon_stk_ptr	:
318			{ $$ = scon_stk_ptr; }
319		;
320
321scon		:  '<' scon_stk_ptr namelist2 '>'
322			{ $$ = $2; }
323
324		|  '<' '*' '>'
325			{
326			$$ = scon_stk_ptr;
327
328			for ( i = 1; i <= lastsc; ++i )
329				{
330				int j;
331
332				for ( j = 1; j <= scon_stk_ptr; ++j )
333					if ( scon_stk[j] == i )
334						break;
335
336				if ( j > scon_stk_ptr )
337					scon_stk[++scon_stk_ptr] = i;
338				}
339			}
340
341		|
342			{ $$ = scon_stk_ptr; }
343		;
344
345namelist2	:  namelist2 ',' sconname
346
347		|  sconname
348
349		|  error
350			{ synerr( _("bad start condition list") ); }
351		;
352
353sconname	:  NAME
354			{
355			if ( (scnum = sclookup( nmstr )) == 0 )
356				format_pinpoint_message(
357					"undeclared start condition %s",
358					nmstr );
359			else
360				{
361				for ( i = 1; i <= scon_stk_ptr; ++i )
362					if ( scon_stk[i] == scnum )
363						{
364						format_warn(
365							"<%s> specified twice",
366							scname[scnum] );
367						break;
368						}
369
370				if ( i > scon_stk_ptr )
371					scon_stk[++scon_stk_ptr] = scnum;
372				}
373			}
374		;
375
376rule		:  re2 re
377			{
378			if ( transchar[lastst[$2]] != SYM_EPSILON )
379				/* Provide final transition \now/ so it
380				 * will be marked as a trailing context
381				 * state.
382				 */
383				$2 = link_machines( $2,
384						mkstate( SYM_EPSILON ) );
385
386			mark_beginning_as_normal( $2 );
387			current_state_type = STATE_NORMAL;
388
389			if ( previous_continued_action )
390				{
391				/* We need to treat this as variable trailing
392				 * context so that the backup does not happen
393				 * in the action but before the action switch
394				 * statement.  If the backup happens in the
395				 * action, then the rules "falling into" this
396				 * one's action will *also* do the backup,
397				 * erroneously.
398				 */
399				if ( ! varlength || headcnt != 0 )
400					warn(
401		"trailing context made variable due to preceding '|' action" );
402
403				/* Mark as variable. */
404				varlength = true;
405				headcnt = 0;
406
407				}
408
409			if ( lex_compat || (varlength && headcnt == 0) )
410				{ /* variable trailing context rule */
411				/* Mark the first part of the rule as the
412				 * accepting "head" part of a trailing
413				 * context rule.
414				 *
415				 * By the way, we didn't do this at the
416				 * beginning of this production because back
417				 * then current_state_type was set up for a
418				 * trail rule, and add_accept() can create
419				 * a new state ...
420				 */
421				add_accept( $1,
422					num_rules | YY_TRAILING_HEAD_MASK );
423				variable_trail_rule = true;
424				}
425
426			else
427				trailcnt = rulelen;
428
429			$$ = link_machines( $1, $2 );
430			}
431
432		|  re2 re '$'
433			{ synerr( _("trailing context used twice") ); }
434
435		|  re '$'
436			{
437			headcnt = 0;
438			trailcnt = 1;
439			rulelen = 1;
440			varlength = false;
441
442			current_state_type = STATE_TRAILING_CONTEXT;
443
444			if ( trlcontxt )
445				{
446				synerr( _("trailing context used twice") );
447				$$ = mkstate( SYM_EPSILON );
448				}
449
450			else if ( previous_continued_action )
451				{
452				/* See the comment in the rule for "re2 re"
453				 * above.
454				 */
455				warn(
456		"trailing context made variable due to preceding '|' action" );
457
458				varlength = true;
459				}
460
461			if ( lex_compat || varlength )
462				{
463				/* Again, see the comment in the rule for
464				 * "re2 re" above.
465				 */
466				add_accept( $1,
467					num_rules | YY_TRAILING_HEAD_MASK );
468				variable_trail_rule = true;
469				}
470
471			trlcontxt = true;
472
473			eps = mkstate( SYM_EPSILON );
474			$$ = link_machines( $1,
475				link_machines( eps, mkstate( '\n' ) ) );
476			}
477
478		|  re
479			{
480			$$ = $1;
481
482			if ( trlcontxt )
483				{
484				if ( lex_compat || (varlength && headcnt == 0) )
485					/* Both head and trail are
486					 * variable-length.
487					 */
488					variable_trail_rule = true;
489				else
490					trailcnt = rulelen;
491				}
492			}
493		;
494
495
496re		:  re '|' series
497			{
498			varlength = true;
499			$$ = mkor( $1, $3 );
500			}
501
502		|  series
503			{ $$ = $1; }
504		;
505
506
507re2		:  re '/'
508			{
509			/* This rule is written separately so the
510			 * reduction will occur before the trailing
511			 * series is parsed.
512			 */
513
514			if ( trlcontxt )
515				synerr( _("trailing context used twice") );
516			else
517				trlcontxt = true;
518
519			if ( varlength )
520				/* We hope the trailing context is
521				 * fixed-length.
522				 */
523				varlength = false;
524			else
525				headcnt = rulelen;
526
527			rulelen = 0;
528
529			current_state_type = STATE_TRAILING_CONTEXT;
530			$$ = $1;
531			}
532		;
533
534series		:  series singleton
535			{
536			/* This is where concatenation of adjacent patterns
537			 * gets done.
538			 */
539			$$ = link_machines( $1, $2 );
540			}
541
542		|  singleton
543			{ $$ = $1; }
544
545		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
546			{
547			varlength = true;
548
549			if ( $3 > $5 || $3 < 0 )
550				{
551				synerr( _("bad iteration values") );
552				$$ = $1;
553				}
554			else
555				{
556				if ( $3 == 0 )
557					{
558					if ( $5 <= 0 )
559						{
560						synerr(
561						_("bad iteration values") );
562						$$ = $1;
563						}
564					else
565						$$ = mkopt(
566							mkrep( $1, 1, $5 ) );
567					}
568				else
569					$$ = mkrep( $1, $3, $5 );
570				}
571			}
572
573		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
574			{
575			varlength = true;
576
577			if ( $3 <= 0 )
578				{
579				synerr( _("iteration value must be positive") );
580				$$ = $1;
581				}
582
583			else
584				$$ = mkrep( $1, $3, INFINITE_REPEAT );
585			}
586
587		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
588			{
589			/* The series could be something like "(foo)",
590			 * in which case we have no idea what its length
591			 * is, so we punt here.
592			 */
593			varlength = true;
594
595			if ( $3 <= 0 )
596				{
597				  synerr( _("iteration value must be positive")
598					  );
599				$$ = $1;
600				}
601
602			else
603				$$ = link_machines( $1,
604						copysingl( $1, $3 - 1 ) );
605			}
606
607		;
608
609singleton	:  singleton '*'
610			{
611			varlength = true;
612
613			$$ = mkclos( $1 );
614			}
615
616		|  singleton '+'
617			{
618			varlength = true;
619			$$ = mkposcl( $1 );
620			}
621
622		|  singleton '?'
623			{
624			varlength = true;
625			$$ = mkopt( $1 );
626			}
627
628		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
629			{
630			varlength = true;
631
632			if ( $3 > $5 || $3 < 0 )
633				{
634				synerr( _("bad iteration values") );
635				$$ = $1;
636				}
637			else
638				{
639				if ( $3 == 0 )
640					{
641					if ( $5 <= 0 )
642						{
643						synerr(
644						_("bad iteration values") );
645						$$ = $1;
646						}
647					else
648						$$ = mkopt(
649							mkrep( $1, 1, $5 ) );
650					}
651				else
652					$$ = mkrep( $1, $3, $5 );
653				}
654			}
655
656		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
657			{
658			varlength = true;
659
660			if ( $3 <= 0 )
661				{
662				synerr( _("iteration value must be positive") );
663				$$ = $1;
664				}
665
666			else
667				$$ = mkrep( $1, $3, INFINITE_REPEAT );
668			}
669
670		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
671			{
672			/* The singleton could be something like "(foo)",
673			 * in which case we have no idea what its length
674			 * is, so we punt here.
675			 */
676			varlength = true;
677
678			if ( $3 <= 0 )
679				{
680				synerr( _("iteration value must be positive") );
681				$$ = $1;
682				}
683
684			else
685				$$ = link_machines( $1,
686						copysingl( $1, $3 - 1 ) );
687			}
688
689		|  '.'
690			{
691			if ( ! madeany )
692				{
693				/* Create the '.' character class. */
694                    ccldot = cclinit();
695                    ccladd( ccldot, '\n' );
696                    cclnegate( ccldot );
697
698                    if ( useecs )
699                        mkeccl( ccltbl + cclmap[ccldot],
700                            ccllen[ccldot], nextecm,
701                            ecgroup, csize, csize );
702
703				/* Create the (?s:'.') character class. */
704                    cclany = cclinit();
705                    cclnegate( cclany );
706
707                    if ( useecs )
708                        mkeccl( ccltbl + cclmap[cclany],
709                            ccllen[cclany], nextecm,
710                            ecgroup, csize, csize );
711
712				madeany = true;
713				}
714
715			++rulelen;
716
717            if (sf_dot_all())
718                $$ = mkstate( -cclany );
719            else
720                $$ = mkstate( -ccldot );
721			}
722
723		|  fullccl
724			{
725				/* Sort characters for fast searching.
726				 */
727				qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
728
729			if ( useecs )
730				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
731					nextecm, ecgroup, csize, csize );
732
733			++rulelen;
734
735			if (ccl_has_nl[$1])
736				rule_has_nl[num_rules] = true;
737
738			$$ = mkstate( -$1 );
739			}
740
741		|  PREVCCL
742			{
743			++rulelen;
744
745			if (ccl_has_nl[$1])
746				rule_has_nl[num_rules] = true;
747
748			$$ = mkstate( -$1 );
749			}
750
751		|  '"' string '"'
752			{ $$ = $2; }
753
754		|  '(' re ')'
755			{ $$ = $2; }
756
757		|  CHAR
758			{
759			++rulelen;
760
761			if ($1 == nlch)
762				rule_has_nl[num_rules] = true;
763
764            if (sf_case_ins() && has_case($1))
765                /* create an alternation, as in (a|A) */
766                $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
767            else
768                $$ = mkstate( $1 );
769			}
770		;
771fullccl:
772        fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
773    |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
774    |   braceccl
775    ;
776
777braceccl:
778
779            '[' ccl ']' { $$ = $2; }
780
781		|  '[' '^' ccl ']'
782			{
783			cclnegate( $3 );
784			$$ = $3;
785			}
786		;
787
788ccl		:  ccl CHAR '-' CHAR
789			{
790
791			if (sf_case_ins())
792			  {
793
794			    /* If one end of the range has case and the other
795			     * does not, or the cases are different, then we're not
796			     * sure what range the user is trying to express.
797			     * Examples: [@-z] or [S-t]
798			     */
799			    if (has_case ($2) != has_case ($4)
800				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
801				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
802			      format_warn3 (
803			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
804					    $2, $4);
805
806			    /* If the range spans uppercase characters but not
807			     * lowercase (or vice-versa), then should we automatically
808			     * include lowercase characters in the range?
809			     * Example: [@-_] spans [a-z] but not [A-Z]
810			     */
811			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
812			      format_warn3 (
813			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
814					    $2, $4);
815			  }
816
817			if ( $2 > $4 )
818				synerr( _("negative range in character class") );
819
820			else
821				{
822				for ( i = $2; i <= $4; ++i )
823					ccladd( $1, i );
824
825				/* Keep track if this ccl is staying in
826				 * alphabetical order.
827				 */
828				cclsorted = cclsorted && ($2 > lastchar);
829				lastchar = $4;
830
831                /* Do it again for upper/lowercase */
832                if (sf_case_ins() && has_case($2) && has_case($4)){
833                    $2 = reverse_case ($2);
834                    $4 = reverse_case ($4);
835
836                    for ( i = $2; i <= $4; ++i )
837                        ccladd( $1, i );
838
839                    cclsorted = cclsorted && ($2 > lastchar);
840                    lastchar = $4;
841                }
842
843				}
844
845			$$ = $1;
846			}
847
848		|  ccl CHAR
849			{
850			ccladd( $1, $2 );
851			cclsorted = cclsorted && ($2 > lastchar);
852			lastchar = $2;
853
854            /* Do it again for upper/lowercase */
855            if (sf_case_ins() && has_case($2)){
856                $2 = reverse_case ($2);
857                ccladd ($1, $2);
858
859                cclsorted = cclsorted && ($2 > lastchar);
860                lastchar = $2;
861            }
862
863			$$ = $1;
864			}
865
866		|  ccl ccl_expr
867			{
868			/* Too hard to properly maintain cclsorted. */
869			cclsorted = false;
870			$$ = $1;
871			}
872
873		|
874			{
875			cclsorted = true;
876			lastchar = 0;
877			currccl = $$ = cclinit();
878			}
879		;
880
881ccl_expr:
882           CCE_ALNUM	{ CCL_EXPR(isalnum); }
883		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
884		|  CCE_BLANK	{ CCL_EXPR(isblank); }
885		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
886		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
887		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
888		|  CCE_LOWER	{
889                          CCL_EXPR(islower);
890                          if (sf_case_ins())
891                              CCL_EXPR(isupper);
892                        }
893		|  CCE_PRINT	{ CCL_EXPR(isprint); }
894		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
895		|  CCE_SPACE	{ CCL_EXPR(isspace); }
896		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
897		|  CCE_UPPER	{
898                    CCL_EXPR(isupper);
899                    if (sf_case_ins())
900                        CCL_EXPR(islower);
901				}
902
903        |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
904		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
905		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(isblank); }
906		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
907		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
908		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
909		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
910		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
911		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
912		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
913		|  CCE_NEG_LOWER	{
914				if ( sf_case_ins() )
915					warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
916				else
917					CCL_NEG_EXPR(islower);
918				}
919		|  CCE_NEG_UPPER	{
920				if ( sf_case_ins() )
921					warn(_("[:^upper:] ambiguous in case insensitive scanner"));
922				else
923					CCL_NEG_EXPR(isupper);
924				}
925		;
926
927string		:  string CHAR
928			{
929			if ( $2 == nlch )
930				rule_has_nl[num_rules] = true;
931
932			++rulelen;
933
934            if (sf_case_ins() && has_case($2))
935                $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
936            else
937                $$ = mkstate ($2);
938
939			$$ = link_machines( $1, $$);
940			}
941
942		|
943			{ $$ = mkstate( SYM_EPSILON ); }
944		;
945
946%%
947
948
949/* build_eof_action - build the "<<EOF>>" action for the active start
950 *                    conditions
951 */
952
953void build_eof_action()
954	{
955	int i;
956	char action_text[MAXLINE];
957
958	for ( i = 1; i <= scon_stk_ptr; ++i )
959		{
960		if ( sceof[scon_stk[i]] )
961			format_pinpoint_message(
962				"multiple <<EOF>> rules for start condition %s",
963				scname[scon_stk[i]] );
964
965		else
966			{
967			sceof[scon_stk[i]] = true;
968
969			if (previous_continued_action /* && previous action was regular */)
970				add_action("YY_RULE_SETUP\n");
971
972			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
973				scname[scon_stk[i]] );
974			add_action( action_text );
975			}
976		}
977
978	line_directive_out( (FILE *) 0, 1 );
979
980	/* This isn't a normal rule after all - don't count it as
981	 * such, so we don't have any holes in the rule numbering
982	 * (which make generating "rule can never match" warnings
983	 * more difficult.
984	 */
985	--num_rules;
986	++num_eof_rules;
987	}
988
989
990/* format_synerr - write out formatted syntax error */
991
992void format_synerr( msg, arg )
993const char *msg, arg[];
994	{
995	char errmsg[MAXLINE];
996
997	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
998	synerr( errmsg );
999	}
1000
1001
1002/* synerr - report a syntax error */
1003
1004void synerr( str )
1005const char *str;
1006	{
1007	syntaxerror = true;
1008	pinpoint_message( str );
1009	}
1010
1011
1012/* format_warn - write out formatted warning */
1013
1014void format_warn( msg, arg )
1015const char *msg, arg[];
1016	{
1017	char warn_msg[MAXLINE];
1018
1019	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1020	warn( warn_msg );
1021	}
1022
1023
1024/* warn - report a warning, unless -w was given */
1025
1026void warn( str )
1027const char *str;
1028	{
1029	line_warning( str, linenum );
1030	}
1031
1032/* format_pinpoint_message - write out a message formatted with one string,
1033 *			     pinpointing its location
1034 */
1035
1036void format_pinpoint_message( msg, arg )
1037const char *msg, arg[];
1038	{
1039	char errmsg[MAXLINE];
1040
1041	snprintf( errmsg, sizeof(errmsg), msg, arg );
1042	pinpoint_message( errmsg );
1043	}
1044
1045
1046/* pinpoint_message - write out a message, pinpointing its location */
1047
1048void pinpoint_message( str )
1049const char *str;
1050	{
1051	line_pinpoint( str, linenum );
1052	}
1053
1054
1055/* line_warning - report a warning at a given line, unless -w was given */
1056
1057void line_warning( str, line )
1058const char *str;
1059int line;
1060	{
1061	char warning[MAXLINE];
1062
1063	if ( ! nowarn )
1064		{
1065		snprintf( warning, sizeof(warning), "warning, %s", str );
1066		line_pinpoint( warning, line );
1067		}
1068	}
1069
1070
1071/* line_pinpoint - write out a message, pinpointing it at the given line */
1072
1073void line_pinpoint( str, line )
1074const char *str;
1075int line;
1076	{
1077	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1078	}
1079
1080
1081/* yyerror - eat up an error message from the parser;
1082 *	     currently, messages are ignore
1083 */
1084
1085void yyerror( msg )
1086const char *msg;
1087	{
1088	}
1089