1/* parse.y - parser for flex input */ 2 3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE 5%token OPT_TABLES 6 7%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 8%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 9 10%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH 11%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT 12 13%left CCL_OP_DIFF CCL_OP_UNION 14 15/* 16 *POSIX and AT&T lex place the 17 * precedence of the repeat operator, {}, below that of concatenation. 18 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended 19 * Regular Expression (ERE) precedence that has the repeat operator 20 * higher than concatenation. This causes ab{3} to yield abbb. 21 * 22 * In order to support the POSIX and AT&T precedence and the flex 23 * precedence we define two token sets for the begin and end tokens of 24 * the repeat operator, '{' and '}'. The lexical scanner chooses 25 * which tokens to return based on whether posix_compat or lex_compat 26 * are specified. Specifying either posix_compat or lex_compat will 27 * cause flex to parse scanner files as per the AT&T and 28 * POSIX-mandated behavior. 29 */ 30 31%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX 32 33 34%{ 35/* Copyright (c) 1990 The Regents of the University of California. */ 36/* All rights reserved. */ 37 38/* This code is derived from software contributed to Berkeley by */ 39/* Vern Paxson. */ 40 41/* The United States Government has rights in this work pursuant */ 42/* to contract no. DE-AC03-76SF00098 between the United States */ 43/* Department of Energy and the University of California. */ 44 45/* This file is part of flex. */ 46 47/* Redistribution and use in source and binary forms, with or without */ 48/* modification, are permitted provided that the following conditions */ 49/* are met: */ 50 51/* 1. Redistributions of source code must retain the above copyright */ 52/* notice, this list of conditions and the following disclaimer. */ 53/* 2. Redistributions in binary form must reproduce the above copyright */ 54/* notice, this list of conditions and the following disclaimer in the */ 55/* documentation and/or other materials provided with the distribution. */ 56 57/* Neither the name of the University nor the names of its contributors */ 58/* may be used to endorse or promote products derived from this software */ 59/* without specific prior written permission. */ 60 61/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 62/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 63/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 64/* PURPOSE. */ 65 66#include "flexdef.h" 67#include "tables.h" 68 69int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; 70int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 71 72int *scon_stk; 73int scon_stk_ptr; 74 75static int madeany = false; /* whether we've made the '.' character class */ 76static int ccldot, cclany; 77int previous_continued_action; /* whether the previous rule's action was '|' */ 78 79#define format_warn3(fmt, a1, a2) \ 80 do{ \ 81 char fw3_msg[MAXLINE];\ 82 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ 83 warn( fw3_msg );\ 84 }while(0) 85 86/* Expand a POSIX character class expression. */ 87#define CCL_EXPR(func) \ 88 do{ \ 89 int c; \ 90 for ( c = 0; c < csize; ++c ) \ 91 if ( isascii(c) && func(c) ) \ 92 ccladd( currccl, c ); \ 93 }while(0) 94 95/* negated class */ 96#define CCL_NEG_EXPR(func) \ 97 do{ \ 98 int c; \ 99 for ( c = 0; c < csize; ++c ) \ 100 if ( !func(c) ) \ 101 ccladd( currccl, c ); \ 102 }while(0) 103 104/* While POSIX defines isblank(), it's not ANSI C. */ 105#define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 106 107/* On some over-ambitious machines, such as DEC Alpha's, the default 108 * token type is "long" instead of "int"; this leads to problems with 109 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 111 * following should ensure that the default token type is "int". 112 */ 113#define YYSTYPE int 114 115%} 116 117%% 118goal : initlex sect1 sect1end sect2 initforrule 119 { /* add default rule */ 120 int def_rule; 121 122 pat = cclinit(); 123 cclnegate( pat ); 124 125 def_rule = mkstate( -pat ); 126 127 /* Remember the number of the default rule so we 128 * don't generate "can't match" warnings for it. 129 */ 130 default_rule = num_rules; 131 132 finish_rule( def_rule, false, 0, 0, 0); 133 134 for ( i = 1; i <= lastsc; ++i ) 135 scset[i] = mkbranch( scset[i], def_rule ); 136 137 if ( spprdflt ) 138 add_action( 139 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 140 else 141 add_action( "ECHO" ); 142 143 add_action( ";\n\tYY_BREAK\n" ); 144 } 145 ; 146 147initlex : 148 { /* initialize for processing rules */ 149 150 /* Create default DFA start condition. */ 151 scinstal( "INITIAL", false ); 152 } 153 ; 154 155sect1 : sect1 startconddecl namelist1 156 | sect1 options 157 | 158 | error 159 { synerr( _("unknown error processing section 1") ); } 160 ; 161 162sect1end : SECTEND 163 { 164 check_options(); 165 scon_stk = allocate_integer_array( lastsc + 1 ); 166 scon_stk_ptr = 0; 167 } 168 ; 169 170startconddecl : SCDECL 171 { xcluflg = false; } 172 173 | XSCDECL 174 { xcluflg = true; } 175 ; 176 177namelist1 : namelist1 NAME 178 { scinstal( nmstr, xcluflg ); } 179 180 | NAME 181 { scinstal( nmstr, xcluflg ); } 182 183 | error 184 { synerr( _("bad start condition list") ); } 185 ; 186 187options : OPTION_OP optionlist 188 ; 189 190optionlist : optionlist option 191 | 192 ; 193 194option : OPT_OUTFILE '=' NAME 195 { 196 outfilename = copy_string( nmstr ); 197 did_outfilename = 1; 198 } 199 | OPT_EXTRA_TYPE '=' NAME 200 { extra_type = copy_string( nmstr ); } 201 | OPT_PREFIX '=' NAME 202 { prefix = copy_string( nmstr ); } 203 | OPT_YYCLASS '=' NAME 204 { yyclass = copy_string( nmstr ); } 205 | OPT_HEADER '=' NAME 206 { headerfilename = copy_string( nmstr ); } 207 | OPT_TABLES '=' NAME 208 { tablesext = true; tablesfilename = copy_string( nmstr ); } 209 ; 210 211sect2 : sect2 scon initforrule flexrule '\n' 212 { scon_stk_ptr = $2; } 213 | sect2 scon '{' sect2 '}' 214 { scon_stk_ptr = $2; } 215 | 216 ; 217 218initforrule : 219 { 220 /* Initialize for a parse of one rule. */ 221 trlcontxt = variable_trail_rule = varlength = false; 222 trailcnt = headcnt = rulelen = 0; 223 current_state_type = STATE_NORMAL; 224 previous_continued_action = continued_action; 225 in_rule = true; 226 227 new_rule(); 228 } 229 ; 230 231flexrule : '^' rule 232 { 233 pat = $2; 234 finish_rule( pat, variable_trail_rule, 235 headcnt, trailcnt , previous_continued_action); 236 237 if ( scon_stk_ptr > 0 ) 238 { 239 for ( i = 1; i <= scon_stk_ptr; ++i ) 240 scbol[scon_stk[i]] = 241 mkbranch( scbol[scon_stk[i]], 242 pat ); 243 } 244 245 else 246 { 247 /* Add to all non-exclusive start conditions, 248 * including the default (0) start condition. 249 */ 250 251 for ( i = 1; i <= lastsc; ++i ) 252 if ( ! scxclu[i] ) 253 scbol[i] = mkbranch( scbol[i], 254 pat ); 255 } 256 257 if ( ! bol_needed ) 258 { 259 bol_needed = true; 260 261 if ( performance_report > 1 ) 262 pinpoint_message( 263 "'^' operator results in sub-optimal performance" ); 264 } 265 } 266 267 | rule 268 { 269 pat = $1; 270 finish_rule( pat, variable_trail_rule, 271 headcnt, trailcnt , previous_continued_action); 272 273 if ( scon_stk_ptr > 0 ) 274 { 275 for ( i = 1; i <= scon_stk_ptr; ++i ) 276 scset[scon_stk[i]] = 277 mkbranch( scset[scon_stk[i]], 278 pat ); 279 } 280 281 else 282 { 283 for ( i = 1; i <= lastsc; ++i ) 284 if ( ! scxclu[i] ) 285 scset[i] = 286 mkbranch( scset[i], 287 pat ); 288 } 289 } 290 291 | EOF_OP 292 { 293 if ( scon_stk_ptr > 0 ) 294 build_eof_action(); 295 296 else 297 { 298 /* This EOF applies to all start conditions 299 * which don't already have EOF actions. 300 */ 301 for ( i = 1; i <= lastsc; ++i ) 302 if ( ! sceof[i] ) 303 scon_stk[++scon_stk_ptr] = i; 304 305 if ( scon_stk_ptr == 0 ) 306 warn( 307 "all start conditions already have <<EOF>> rules" ); 308 309 else 310 build_eof_action(); 311 } 312 } 313 314 | error 315 { synerr( _("unrecognized rule") ); } 316 ; 317 318scon_stk_ptr : 319 { $$ = scon_stk_ptr; } 320 ; 321 322scon : '<' scon_stk_ptr namelist2 '>' 323 { $$ = $2; } 324 325 | '<' '*' '>' 326 { 327 $$ = scon_stk_ptr; 328 329 for ( i = 1; i <= lastsc; ++i ) 330 { 331 int j; 332 333 for ( j = 1; j <= scon_stk_ptr; ++j ) 334 if ( scon_stk[j] == i ) 335 break; 336 337 if ( j > scon_stk_ptr ) 338 scon_stk[++scon_stk_ptr] = i; 339 } 340 } 341 342 | 343 { $$ = scon_stk_ptr; } 344 ; 345 346namelist2 : namelist2 ',' sconname 347 348 | sconname 349 350 | error 351 { synerr( _("bad start condition list") ); } 352 ; 353 354sconname : NAME 355 { 356 if ( (scnum = sclookup( nmstr )) == 0 ) 357 format_pinpoint_message( 358 "undeclared start condition %s", 359 nmstr ); 360 else 361 { 362 for ( i = 1; i <= scon_stk_ptr; ++i ) 363 if ( scon_stk[i] == scnum ) 364 { 365 format_warn( 366 "<%s> specified twice", 367 scname[scnum] ); 368 break; 369 } 370 371 if ( i > scon_stk_ptr ) 372 scon_stk[++scon_stk_ptr] = scnum; 373 } 374 } 375 ; 376 377rule : re2 re 378 { 379 if ( transchar[lastst[$2]] != SYM_EPSILON ) 380 /* Provide final transition \now/ so it 381 * will be marked as a trailing context 382 * state. 383 */ 384 $2 = link_machines( $2, 385 mkstate( SYM_EPSILON ) ); 386 387 mark_beginning_as_normal( $2 ); 388 current_state_type = STATE_NORMAL; 389 390 if ( previous_continued_action ) 391 { 392 /* We need to treat this as variable trailing 393 * context so that the backup does not happen 394 * in the action but before the action switch 395 * statement. If the backup happens in the 396 * action, then the rules "falling into" this 397 * one's action will *also* do the backup, 398 * erroneously. 399 */ 400 if ( ! varlength || headcnt != 0 ) 401 warn( 402 "trailing context made variable due to preceding '|' action" ); 403 404 /* Mark as variable. */ 405 varlength = true; 406 headcnt = 0; 407 408 } 409 410 if ( lex_compat || (varlength && headcnt == 0) ) 411 { /* variable trailing context rule */ 412 /* Mark the first part of the rule as the 413 * accepting "head" part of a trailing 414 * context rule. 415 * 416 * By the way, we didn't do this at the 417 * beginning of this production because back 418 * then current_state_type was set up for a 419 * trail rule, and add_accept() can create 420 * a new state ... 421 */ 422 add_accept( $1, 423 num_rules | YY_TRAILING_HEAD_MASK ); 424 variable_trail_rule = true; 425 } 426 427 else 428 trailcnt = rulelen; 429 430 $$ = link_machines( $1, $2 ); 431 } 432 433 | re2 re '$' 434 { synerr( _("trailing context used twice") ); } 435 436 | re '$' 437 { 438 headcnt = 0; 439 trailcnt = 1; 440 rulelen = 1; 441 varlength = false; 442 443 current_state_type = STATE_TRAILING_CONTEXT; 444 445 if ( trlcontxt ) 446 { 447 synerr( _("trailing context used twice") ); 448 $$ = mkstate( SYM_EPSILON ); 449 } 450 451 else if ( previous_continued_action ) 452 { 453 /* See the comment in the rule for "re2 re" 454 * above. 455 */ 456 warn( 457 "trailing context made variable due to preceding '|' action" ); 458 459 varlength = true; 460 } 461 462 if ( lex_compat || varlength ) 463 { 464 /* Again, see the comment in the rule for 465 * "re2 re" above. 466 */ 467 add_accept( $1, 468 num_rules | YY_TRAILING_HEAD_MASK ); 469 variable_trail_rule = true; 470 } 471 472 trlcontxt = true; 473 474 eps = mkstate( SYM_EPSILON ); 475 $$ = link_machines( $1, 476 link_machines( eps, mkstate( '\n' ) ) ); 477 } 478 479 | re 480 { 481 $$ = $1; 482 483 if ( trlcontxt ) 484 { 485 if ( lex_compat || (varlength && headcnt == 0) ) 486 /* Both head and trail are 487 * variable-length. 488 */ 489 variable_trail_rule = true; 490 else 491 trailcnt = rulelen; 492 } 493 } 494 ; 495 496 497re : re '|' series 498 { 499 varlength = true; 500 $$ = mkor( $1, $3 ); 501 } 502 503 | series 504 { $$ = $1; } 505 ; 506 507 508re2 : re '/' 509 { 510 /* This rule is written separately so the 511 * reduction will occur before the trailing 512 * series is parsed. 513 */ 514 515 if ( trlcontxt ) 516 synerr( _("trailing context used twice") ); 517 else 518 trlcontxt = true; 519 520 if ( varlength ) 521 /* We hope the trailing context is 522 * fixed-length. 523 */ 524 varlength = false; 525 else 526 headcnt = rulelen; 527 528 rulelen = 0; 529 530 current_state_type = STATE_TRAILING_CONTEXT; 531 $$ = $1; 532 } 533 ; 534 535series : series singleton 536 { 537 /* This is where concatenation of adjacent patterns 538 * gets done. 539 */ 540 $$ = link_machines( $1, $2 ); 541 } 542 543 | singleton 544 { $$ = $1; } 545 546 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX 547 { 548 varlength = true; 549 550 if ( $3 > $5 || $3 < 0 ) 551 { 552 synerr( _("bad iteration values") ); 553 $$ = $1; 554 } 555 else 556 { 557 if ( $3 == 0 ) 558 { 559 if ( $5 <= 0 ) 560 { 561 synerr( 562 _("bad iteration values") ); 563 $$ = $1; 564 } 565 else 566 $$ = mkopt( 567 mkrep( $1, 1, $5 ) ); 568 } 569 else 570 $$ = mkrep( $1, $3, $5 ); 571 } 572 } 573 574 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX 575 { 576 varlength = true; 577 578 if ( $3 <= 0 ) 579 { 580 synerr( _("iteration value must be positive") ); 581 $$ = $1; 582 } 583 584 else 585 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 586 } 587 588 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX 589 { 590 /* The series could be something like "(foo)", 591 * in which case we have no idea what its length 592 * is, so we punt here. 593 */ 594 varlength = true; 595 596 if ( $3 <= 0 ) 597 { 598 synerr( _("iteration value must be positive") 599 ); 600 $$ = $1; 601 } 602 603 else 604 $$ = link_machines( $1, 605 copysingl( $1, $3 - 1 ) ); 606 } 607 608 ; 609 610singleton : singleton '*' 611 { 612 varlength = true; 613 614 $$ = mkclos( $1 ); 615 } 616 617 | singleton '+' 618 { 619 varlength = true; 620 $$ = mkposcl( $1 ); 621 } 622 623 | singleton '?' 624 { 625 varlength = true; 626 $$ = mkopt( $1 ); 627 } 628 629 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX 630 { 631 varlength = true; 632 633 if ( $3 > $5 || $3 < 0 ) 634 { 635 synerr( _("bad iteration values") ); 636 $$ = $1; 637 } 638 else 639 { 640 if ( $3 == 0 ) 641 { 642 if ( $5 <= 0 ) 643 { 644 synerr( 645 _("bad iteration values") ); 646 $$ = $1; 647 } 648 else 649 $$ = mkopt( 650 mkrep( $1, 1, $5 ) ); 651 } 652 else 653 $$ = mkrep( $1, $3, $5 ); 654 } 655 } 656 657 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX 658 { 659 varlength = true; 660 661 if ( $3 <= 0 ) 662 { 663 synerr( _("iteration value must be positive") ); 664 $$ = $1; 665 } 666 667 else 668 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 669 } 670 671 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX 672 { 673 /* The singleton could be something like "(foo)", 674 * in which case we have no idea what its length 675 * is, so we punt here. 676 */ 677 varlength = true; 678 679 if ( $3 <= 0 ) 680 { 681 synerr( _("iteration value must be positive") ); 682 $$ = $1; 683 } 684 685 else 686 $$ = link_machines( $1, 687 copysingl( $1, $3 - 1 ) ); 688 } 689 690 | '.' 691 { 692 if ( ! madeany ) 693 { 694 /* Create the '.' character class. */ 695 ccldot = cclinit(); 696 ccladd( ccldot, '\n' ); 697 cclnegate( ccldot ); 698 699 if ( useecs ) 700 mkeccl( ccltbl + cclmap[ccldot], 701 ccllen[ccldot], nextecm, 702 ecgroup, csize, csize ); 703 704 /* Create the (?s:'.') character class. */ 705 cclany = cclinit(); 706 cclnegate( cclany ); 707 708 if ( useecs ) 709 mkeccl( ccltbl + cclmap[cclany], 710 ccllen[cclany], nextecm, 711 ecgroup, csize, csize ); 712 713 madeany = true; 714 } 715 716 ++rulelen; 717 718 if (sf_dot_all()) 719 $$ = mkstate( -cclany ); 720 else 721 $$ = mkstate( -ccldot ); 722 } 723 724 | fullccl 725 { 726 /* Sort characters for fast searching. 727 */ 728 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp ); 729 730 if ( useecs ) 731 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 732 nextecm, ecgroup, csize, csize ); 733 734 ++rulelen; 735 736 if (ccl_has_nl[$1]) 737 rule_has_nl[num_rules] = true; 738 739 $$ = mkstate( -$1 ); 740 } 741 742 | PREVCCL 743 { 744 ++rulelen; 745 746 if (ccl_has_nl[$1]) 747 rule_has_nl[num_rules] = true; 748 749 $$ = mkstate( -$1 ); 750 } 751 752 | '"' string '"' 753 { $$ = $2; } 754 755 | '(' re ')' 756 { $$ = $2; } 757 758 | CHAR 759 { 760 ++rulelen; 761 762 if ($1 == nlch) 763 rule_has_nl[num_rules] = true; 764 765 if (sf_case_ins() && has_case($1)) 766 /* create an alternation, as in (a|A) */ 767 $$ = mkor (mkstate($1), mkstate(reverse_case($1))); 768 else 769 $$ = mkstate( $1 ); 770 } 771 ; 772fullccl: 773 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } 774 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } 775 | braceccl 776 ; 777 778braceccl: 779 780 '[' ccl ']' { $$ = $2; } 781 782 | '[' '^' ccl ']' 783 { 784 cclnegate( $3 ); 785 $$ = $3; 786 } 787 ; 788 789ccl : ccl CHAR '-' CHAR 790 { 791 792 if (sf_case_ins()) 793 { 794 795 /* If one end of the range has case and the other 796 * does not, or the cases are different, then we're not 797 * sure what range the user is trying to express. 798 * Examples: [@-z] or [S-t] 799 */ 800 if (has_case ($2) != has_case ($4) 801 || (has_case ($2) && (b_islower ($2) != b_islower ($4))) 802 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) 803 format_warn3 ( 804 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 805 $2, $4); 806 807 /* If the range spans uppercase characters but not 808 * lowercase (or vice-versa), then should we automatically 809 * include lowercase characters in the range? 810 * Example: [@-_] spans [a-z] but not [A-Z] 811 */ 812 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) 813 format_warn3 ( 814 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 815 $2, $4); 816 } 817 818 if ( $2 > $4 ) 819 synerr( _("negative range in character class") ); 820 821 else 822 { 823 for ( i = $2; i <= $4; ++i ) 824 ccladd( $1, i ); 825 826 /* Keep track if this ccl is staying in 827 * alphabetical order. 828 */ 829 cclsorted = cclsorted && ($2 > lastchar); 830 lastchar = $4; 831 832 /* Do it again for upper/lowercase */ 833 if (sf_case_ins() && has_case($2) && has_case($4)){ 834 $2 = reverse_case ($2); 835 $4 = reverse_case ($4); 836 837 for ( i = $2; i <= $4; ++i ) 838 ccladd( $1, i ); 839 840 cclsorted = cclsorted && ($2 > lastchar); 841 lastchar = $4; 842 } 843 844 } 845 846 $$ = $1; 847 } 848 849 | ccl CHAR 850 { 851 ccladd( $1, $2 ); 852 cclsorted = cclsorted && ($2 > lastchar); 853 lastchar = $2; 854 855 /* Do it again for upper/lowercase */ 856 if (sf_case_ins() && has_case($2)){ 857 $2 = reverse_case ($2); 858 ccladd ($1, $2); 859 860 cclsorted = cclsorted && ($2 > lastchar); 861 lastchar = $2; 862 } 863 864 $$ = $1; 865 } 866 867 | ccl ccl_expr 868 { 869 /* Too hard to properly maintain cclsorted. */ 870 cclsorted = false; 871 $$ = $1; 872 } 873 874 | 875 { 876 cclsorted = true; 877 lastchar = 0; 878 currccl = $$ = cclinit(); 879 } 880 ; 881 882ccl_expr: 883 CCE_ALNUM { CCL_EXPR(isalnum); } 884 | CCE_ALPHA { CCL_EXPR(isalpha); } 885 | CCE_BLANK { CCL_EXPR(IS_BLANK); } 886 | CCE_CNTRL { CCL_EXPR(iscntrl); } 887 | CCE_DIGIT { CCL_EXPR(isdigit); } 888 | CCE_GRAPH { CCL_EXPR(isgraph); } 889 | CCE_LOWER { 890 CCL_EXPR(islower); 891 if (sf_case_ins()) 892 CCL_EXPR(isupper); 893 } 894 | CCE_PRINT { CCL_EXPR(isprint); } 895 | CCE_PUNCT { CCL_EXPR(ispunct); } 896 | CCE_SPACE { CCL_EXPR(isspace); } 897 | CCE_XDIGIT { CCL_EXPR(isxdigit); } 898 | CCE_UPPER { 899 CCL_EXPR(isupper); 900 if (sf_case_ins()) 901 CCL_EXPR(islower); 902 } 903 904 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } 905 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } 906 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } 907 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } 908 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } 909 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } 910 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } 911 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } 912 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } 913 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } 914 | CCE_NEG_LOWER { 915 if ( sf_case_ins() ) 916 warn(_("[:^lower:] is ambiguous in case insensitive scanner")); 917 else 918 CCL_NEG_EXPR(islower); 919 } 920 | CCE_NEG_UPPER { 921 if ( sf_case_ins() ) 922 warn(_("[:^upper:] ambiguous in case insensitive scanner")); 923 else 924 CCL_NEG_EXPR(isupper); 925 } 926 ; 927 928string : string CHAR 929 { 930 if ( $2 == nlch ) 931 rule_has_nl[num_rules] = true; 932 933 ++rulelen; 934 935 if (sf_case_ins() && has_case($2)) 936 $$ = mkor (mkstate($2), mkstate(reverse_case($2))); 937 else 938 $$ = mkstate ($2); 939 940 $$ = link_machines( $1, $$); 941 } 942 943 | 944 { $$ = mkstate( SYM_EPSILON ); } 945 ; 946 947%% 948 949 950/* build_eof_action - build the "<<EOF>>" action for the active start 951 * conditions 952 */ 953 954void build_eof_action() 955 { 956 int i; 957 char action_text[MAXLINE]; 958 959 for ( i = 1; i <= scon_stk_ptr; ++i ) 960 { 961 if ( sceof[scon_stk[i]] ) 962 format_pinpoint_message( 963 "multiple <<EOF>> rules for start condition %s", 964 scname[scon_stk[i]] ); 965 966 else 967 { 968 sceof[scon_stk[i]] = true; 969 970 if (previous_continued_action /* && previous action was regular */) 971 add_action("YY_RULE_SETUP\n"); 972 973 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", 974 scname[scon_stk[i]] ); 975 add_action( action_text ); 976 } 977 } 978 979 line_directive_out( (FILE *) 0, 1 ); 980 981 /* This isn't a normal rule after all - don't count it as 982 * such, so we don't have any holes in the rule numbering 983 * (which make generating "rule can never match" warnings 984 * more difficult. 985 */ 986 --num_rules; 987 ++num_eof_rules; 988 } 989 990 991/* format_synerr - write out formatted syntax error */ 992 993void format_synerr( msg, arg ) 994const char *msg, arg[]; 995 { 996 char errmsg[MAXLINE]; 997 998 (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); 999 synerr( errmsg ); 1000 } 1001 1002 1003/* synerr - report a syntax error */ 1004 1005void synerr( str ) 1006const char *str; 1007 { 1008 syntaxerror = true; 1009 pinpoint_message( str ); 1010 } 1011 1012 1013/* format_warn - write out formatted warning */ 1014 1015void format_warn( msg, arg ) 1016const char *msg, arg[]; 1017 { 1018 char warn_msg[MAXLINE]; 1019 1020 snprintf( warn_msg, sizeof(warn_msg), msg, arg ); 1021 warn( warn_msg ); 1022 } 1023 1024 1025/* warn - report a warning, unless -w was given */ 1026 1027void warn( str ) 1028const char *str; 1029 { 1030 line_warning( str, linenum ); 1031 } 1032 1033/* format_pinpoint_message - write out a message formatted with one string, 1034 * pinpointing its location 1035 */ 1036 1037void format_pinpoint_message( msg, arg ) 1038const char *msg, arg[]; 1039 { 1040 char errmsg[MAXLINE]; 1041 1042 snprintf( errmsg, sizeof(errmsg), msg, arg ); 1043 pinpoint_message( errmsg ); 1044 } 1045 1046 1047/* pinpoint_message - write out a message, pinpointing its location */ 1048 1049void pinpoint_message( str ) 1050const char *str; 1051 { 1052 line_pinpoint( str, linenum ); 1053 } 1054 1055 1056/* line_warning - report a warning at a given line, unless -w was given */ 1057 1058void line_warning( str, line ) 1059const char *str; 1060int line; 1061 { 1062 char warning[MAXLINE]; 1063 1064 if ( ! nowarn ) 1065 { 1066 snprintf( warning, sizeof(warning), "warning, %s", str ); 1067 line_pinpoint( warning, line ); 1068 } 1069 } 1070 1071 1072/* line_pinpoint - write out a message, pinpointing it at the given line */ 1073 1074void line_pinpoint( str, line ) 1075const char *str; 1076int line; 1077 { 1078 fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); 1079 } 1080 1081 1082/* yyerror - eat up an error message from the parser; 1083 * currently, messages are ignore 1084 */ 1085 1086void yyerror( msg ) 1087const char *msg; 1088 { 1089 } 1090