scan.l revision 250873
1184138Smav/* scan.l - scanner for flex input -*-C-*- */ 2184138Smav 3184138Smav%{ 4184138Smav/* Copyright (c) 1990 The Regents of the University of California. */ 5184138Smav/* All rights reserved. */ 6184138Smav 7184138Smav/* This code is derived from software contributed to Berkeley by */ 8184138Smav/* Vern Paxson. */ 9184138Smav 10184138Smav/* The United States Government has rights in this work pursuant */ 11184138Smav/* to contract no. DE-AC03-76SF00098 between the United States */ 12184138Smav/* Department of Energy and the University of California. */ 13184138Smav 14184138Smav/* This file is part of flex. */ 15184138Smav 16184138Smav/* Redistribution and use in source and binary forms, with or without */ 17184138Smav/* modification, are permitted provided that the following conditions */ 18184138Smav/* are met: */ 19184138Smav 20184138Smav/* 1. Redistributions of source code must retain the above copyright */ 21184138Smav/* notice, this list of conditions and the following disclaimer. */ 22184138Smav/* 2. Redistributions in binary form must reproduce the above copyright */ 23184138Smav/* notice, this list of conditions and the following disclaimer in the */ 24184138Smav/* documentation and/or other materials provided with the distribution. */ 25184138Smav 26184138Smav/* Neither the name of the University nor the names of its contributors */ 27184138Smav/* may be used to endorse or promote products derived from this software */ 28184138Smav/* without specific prior written permission. */ 29184138Smav 30184138Smav/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 31184138Smav/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 32184138Smav/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 33184138Smav/* PURPOSE. */ 34184138Smav 35184138Smav#include "flexdef.h" 36184138Smav#include "parse.h" 37184138Smavextern bool tablesverify, tablesext; 38184138Smavextern int trlcontxt; /* Set in parse.y for each rule. */ 39187876Smavextern const char *escaped_qstart, *escaped_qend; 40184138Smav 41184138Smav#define ACTION_ECHO add_action( yytext ) 42184138Smav#define ACTION_IFDEF(def, should_define) \ 43184138Smav { \ 44184138Smav if ( should_define ) \ 45184138Smav action_define( def, 1 ); \ 46184138Smav } 47184138Smav 48184138Smav#define ACTION_ECHO_QSTART add_action (escaped_qstart) 49184138Smav#define ACTION_ECHO_QEND add_action (escaped_qend) 50184138Smav 51184138Smav#define ACTION_M4_IFDEF(def, should_define) \ 52241600Sgonzo do{ \ 53184138Smav if ( should_define ) \ 54184138Smav buf_m4_define( &m4defs_buf, def, NULL);\ 55184138Smav else \ 56184138Smav buf_m4_undefine( &m4defs_buf, def);\ 57184138Smav } while(0) 58184138Smav 59184138Smav#define MARK_END_OF_PROLOG mark_prolog(); 60184138Smav 61184138Smav#define YY_DECL \ 62184138Smav int flexscan() 63184138Smav 64184138Smav#define RETURNCHAR \ 65184138Smav yylval = (unsigned char) yytext[0]; \ 66227309Sed return CHAR; 67187876Smav 68241600Sgonzo#define RETURNNAME \ 69187876Smav if(yyleng < MAXLINE) \ 70187876Smav { \ 71187876Smav strcpy( nmstr, yytext ); \ 72241600Sgonzo } \ 73241600Sgonzo else \ 74241600Sgonzo { \ 75241600Sgonzo synerr(_("Input line too long\n")); \ 76241600Sgonzo FLEX_EXIT(EXIT_FAILURE); \ 77184138Smav } \ 78241600Sgonzo return NAME; 79241600Sgonzo 80241600Sgonzo#define PUT_BACK_STRING(str, start) \ 81241600Sgonzo for ( i = strlen( str ) - 1; i >= start; --i ) \ 82241600Sgonzo unput((str)[i]) 83184138Smav 84184138Smav#define CHECK_REJECT(str) \ 85184138Smav if ( all_upper( str ) ) \ 86184138Smav reject = true; 87184138Smav 88184138Smav#define CHECK_YYMORE(str) \ 89184138Smav if ( all_lower( str ) ) \ 90184138Smav yymore_used = true; 91184138Smav 92184138Smav#define YY_USER_INIT \ 93184138Smav if ( getenv("POSIXLY_CORRECT") ) \ 94184138Smav posix_compat = true; 95184138Smav 96184138Smav%} 97184138Smav 98184138Smav%option caseless nodefault stack noyy_top_state 99241600Sgonzo%option nostdinit 100241600Sgonzo 101241600Sgonzo%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE 102241600Sgonzo%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION 103241600Sgonzo%x OPTION LINEDIR CODEBLOCK_MATCH_BRACE 104241600Sgonzo%x GROUP_WITH_PARAMS 105241600Sgonzo%x GROUP_MINUS_PARAMS 106241600Sgonzo%x EXTENDED_COMMENT 107241600Sgonzo%x COMMENT_DISCARD 108241600Sgonzo 109184138SmavWS [[:blank:]]+ 110184138SmavOPTWS [[:blank:]]* 111184138SmavNOT_WS [^[:blank:]\r\n] 112184138Smav 113184138SmavNL \r?\n 114184138Smav 115184138SmavNAME ([[:alpha:]_][[:alnum:]_-]*) 116241600SgonzoNOT_NAME [^[:alpha:]_*\n]+ 117184138Smav 118184138SmavSCNAME {NAME} 119184138Smav 120184138SmavESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) 121184138Smav 122184138SmavFIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) 123184138SmavCCL_CHAR ([^\\\n\]]|{ESCSEQ}) 124184138SmavCCL_EXPR ("[:"^?[[:alpha:]]+":]") 125184138Smav 126184138SmavLEXOPT [aceknopr] 127184138Smav 128184138SmavM4QSTART "[[" 129184138SmavM4QEND "]]" 130184138Smav 131184138Smav%% 132184138Smav static int bracelevel, didadef, indented_code; 133184138Smav static int doing_rule_action = false; 134184138Smav static int option_sense; 135184138Smav 136184138Smav int doing_codeblock = false; 137184138Smav int i, brace_depth=0, brace_start_line=0; 138184138Smav Char nmdef[MAXLINE]; 139184138Smav 140184138Smav 141184138Smav<INITIAL>{ 142184138Smav ^{WS} indented_code = true; BEGIN(CODEBLOCK); 143184138Smav ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); 144184138Smav ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); 145184138Smav ^"%s"{NAME}? return SCDECL; 146184138Smav ^"%x"{NAME}? return XSCDECL; 147184138Smav ^"%{".*{NL} { 148184138Smav ++linenum; 149184138Smav line_directive_out( (FILE *) 0, 1 ); 150184138Smav indented_code = false; 151184138Smav BEGIN(CODEBLOCK); 152184138Smav } 153184138Smav ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} { 154184138Smav brace_start_line = linenum; 155184138Smav ++linenum; 156184138Smav buf_linedir( &top_buf, infilename?infilename:"<stdin>", linenum); 157184138Smav brace_depth = 1; 158184138Smav yy_push_state(CODEBLOCK_MATCH_BRACE); 159184138Smav } 160184138Smav 161241600Sgonzo ^"%top".* synerr( _("malformed '%top' directive") ); 162184138Smav 163184138Smav {WS} /* discard */ 164184138Smav 165184138Smav ^"%%".* { 166184138Smav sectnum = 2; 167184138Smav bracelevel = 0; 168184138Smav mark_defs1(); 169241600Sgonzo line_directive_out( (FILE *) 0, 1 ); 170184138Smav BEGIN(SECT2PROLOG); 171184138Smav return SECTEND; 172184138Smav } 173184138Smav 174184138Smav ^"%pointer".*{NL} yytext_is_array = false; ++linenum; 175184138Smav ^"%array".*{NL} yytext_is_array = true; ++linenum; 176184138Smav 177184138Smav ^"%option" BEGIN(OPTION); return OPTION_OP; 178184138Smav 179184138Smav ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ 180185661Smav ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ 181184138Smav 182185661Smav /* xgettext: no-c-format */ 183185661Smav ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); 184184138Smav 185184138Smav ^{NAME} { 186184138Smav if(yyleng < MAXLINE) 187184138Smav { 188184138Smav strcpy( nmstr, yytext ); 189184138Smav } 190184138Smav else 191184138Smav { 192184138Smav synerr( _("Definition name too long\n")); 193184138Smav FLEX_EXIT(EXIT_FAILURE); 194184138Smav } 195184138Smav 196184138Smav didadef = false; 197184138Smav BEGIN(PICKUPDEF); 198184138Smav } 199184138Smav 200184138Smav {SCNAME} RETURNNAME; 201184138Smav ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ 202184138Smav {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ 203184138Smav} 204184138Smav 205184138Smav 206184138Smav<COMMENT>{ 207184138Smav "*/" ACTION_ECHO; yy_pop_state(); 208184138Smav "*" ACTION_ECHO; 209184138Smav {M4QSTART} ACTION_ECHO_QSTART; 210184138Smav {M4QEND} ACTION_ECHO_QEND; 211184138Smav [^*\n] ACTION_ECHO; 212184138Smav {NL} ++linenum; ACTION_ECHO; 213184138Smav} 214184138Smav 215184138Smav<COMMENT_DISCARD>{ 216184138Smav /* This is the same as COMMENT, but is discarded rather than output. */ 217184138Smav "*/" yy_pop_state(); 218184138Smav "*" ; 219184138Smav [^*\n] ; 220184138Smav {NL} ++linenum; 221184138Smav} 222184138Smav 223184138Smav<EXTENDED_COMMENT>{ 224184138Smav ")" yy_pop_state(); 225184138Smav [^\n\)]+ ; 226184138Smav {NL} ++linenum; 227184138Smav} 228184138Smav 229184138Smav<LINEDIR>{ 230184138Smav \n yy_pop_state(); 231184138Smav [[:digit:]]+ linenum = myctoi( yytext ); 232184138Smav 233184138Smav \"[^"\n]*\" { 234184138Smav flex_free( (void *) infilename ); 235184138Smav infilename = copy_string( yytext + 1 ); 236184138Smav infilename[strlen( infilename ) - 1] = '\0'; 237184138Smav } 238184138Smav . /* ignore spurious characters */ 239184138Smav} 240184138Smav 241184138Smav<CODEBLOCK>{ 242184138Smav ^"%}".*{NL} ++linenum; BEGIN(INITIAL); 243184138Smav 244184138Smav {M4QSTART} ACTION_ECHO_QSTART; 245184138Smav {M4QEND} ACTION_ECHO_QEND; 246184138Smav . ACTION_ECHO; 247184138Smav 248184138Smav {NL} { 249184138Smav ++linenum; 250184138Smav ACTION_ECHO; 251184138Smav if ( indented_code ) 252184138Smav BEGIN(INITIAL); 253184138Smav } 254184138Smav} 255184138Smav 256184138Smav<CODEBLOCK_MATCH_BRACE>{ 257184138Smav "}" { 258184138Smav if( --brace_depth == 0){ 259184138Smav /* TODO: Matched. */ 260184138Smav yy_pop_state(); 261184138Smav }else 262184138Smav buf_strnappend(&top_buf, yytext, yyleng); 263184138Smav } 264184138Smav 265184138Smav "{" { 266184138Smav brace_depth++; 267184138Smav buf_strnappend(&top_buf, yytext, yyleng); 268184138Smav } 269184138Smav 270184138Smav {NL} { 271184138Smav ++linenum; 272184138Smav buf_strnappend(&top_buf, yytext, yyleng); 273184138Smav } 274184138Smav 275241600Sgonzo {M4QSTART} buf_strnappend(&top_buf, escaped_qstart, strlen(escaped_qstart)); 276184138Smav {M4QEND} buf_strnappend(&top_buf, escaped_qend, strlen(escaped_qend)); 277184138Smav 278184138Smav [^{}\r\n] { 279184138Smav buf_strnappend(&top_buf, yytext, yyleng); 280184138Smav } 281184138Smav 282184138Smav <<EOF>> { 283184138Smav linenum = brace_start_line; 284184138Smav synerr(_("Unmatched '{'")); 285184138Smav yyterminate(); 286184138Smav } 287184138Smav} 288184138Smav 289184138Smav 290184138Smav<PICKUPDEF>{ 291184138Smav {WS} /* separates name and definition */ 292184138Smav 293184138Smav {NOT_WS}[^\r\n]* { 294184138Smav if(yyleng < MAXLINE) 295184138Smav { 296184138Smav strcpy( (char *) nmdef, yytext ); 297184138Smav } 298184138Smav else 299184138Smav { 300184138Smav format_synerr( _("Definition value for {%s} too long\n"), nmstr); 301184138Smav FLEX_EXIT(EXIT_FAILURE); 302184138Smav } 303184138Smav /* Skip trailing whitespace. */ 304184138Smav for ( i = strlen( (char *) nmdef ) - 1; 305184138Smav i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); 306184138Smav --i ) 307184138Smav ; 308184138Smav 309184138Smav nmdef[i + 1] = '\0'; 310184138Smav 311184138Smav ndinstal( nmstr, nmdef ); 312184138Smav didadef = true; 313184138Smav } 314184138Smav 315184138Smav {NL} { 316184138Smav if ( ! didadef ) 317184138Smav synerr( _( "incomplete name definition" ) ); 318241600Sgonzo BEGIN(INITIAL); 319184138Smav ++linenum; 320184138Smav } 321184138Smav} 322184138Smav 323184138Smav 324184138Smav<OPTION>{ 325184138Smav {NL} ++linenum; BEGIN(INITIAL); 326184138Smav {WS} option_sense = true; 327184138Smav 328184138Smav "=" return '='; 329184138Smav 330184138Smav no option_sense = ! option_sense; 331184138Smav 332241600Sgonzo 7bit csize = option_sense ? 128 : 256; 333184138Smav 8bit csize = option_sense ? 256 : 128; 334184138Smav 335184138Smav align long_align = option_sense; 336184138Smav always-interactive { 337184138Smav ACTION_M4_IFDEF( "M4""_YY_ALWAYS_INTERACTIVE", option_sense ); 338184138Smav interactive = option_sense; 339184138Smav } 340184138Smav array yytext_is_array = option_sense; 341184138Smav ansi-definitions ansi_func_defs = option_sense; 342184138Smav ansi-prototypes ansi_func_protos = option_sense; 343184138Smav backup backing_up_report = option_sense; 344184138Smav batch interactive = ! option_sense; 345184138Smav bison-bridge bison_bridge_lval = option_sense; 346184138Smav bison-locations { if((bison_bridge_lloc = option_sense)) 347184138Smav bison_bridge_lval = true; 348184138Smav } 349184138Smav "c++" C_plus_plus = option_sense; 350184138Smav caseful|case-sensitive sf_set_case_ins(!option_sense); 351184138Smav caseless|case-insensitive sf_set_case_ins(option_sense); 352184138Smav debug ddebug = option_sense; 353184138Smav default spprdflt = ! option_sense; 354184138Smav ecs useecs = option_sense; 355184138Smav fast { 356184138Smav useecs = usemecs = false; 357184138Smav use_read = fullspd = true; 358184138Smav } 359184138Smav full { 360184138Smav useecs = usemecs = false; 361184138Smav use_read = fulltbl = true; 362184138Smav } 363184138Smav input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); 364184138Smav interactive interactive = option_sense; 365184138Smav lex-compat lex_compat = option_sense; 366184138Smav posix-compat posix_compat = option_sense; 367184138Smav main { 368184138Smav ACTION_M4_IFDEF( "M4""_YY_MAIN", option_sense); 369184138Smav /* Override yywrap */ 370184138Smav if( option_sense == true ) 371184138Smav do_yywrap = false; 372241600Sgonzo } 373184138Smav meta-ecs usemecs = option_sense; 374184138Smav never-interactive { 375184138Smav ACTION_M4_IFDEF( "M4""_YY_NEVER_INTERACTIVE", option_sense ); 376184138Smav interactive = !option_sense; 377184138Smav } 378184138Smav perf-report performance_report += option_sense ? 1 : -1; 379184138Smav pointer yytext_is_array = ! option_sense; 380184138Smav read use_read = option_sense; 381184138Smav reentrant reentrant = option_sense; 382184138Smav reject reject_really_used = option_sense; 383184138Smav stack ACTION_M4_IFDEF( "M4""_YY_STACK_USED", option_sense ); 384184138Smav stdinit do_stdinit = option_sense; 385184138Smav stdout use_stdout = option_sense; 386184138Smav unistd ACTION_IFDEF("YY_NO_UNISTD_H", ! option_sense); 387184138Smav unput ACTION_M4_IFDEF("M4""_YY_NO_UNPUT", ! option_sense); 388184138Smav verbose printstats = option_sense; 389184138Smav warn nowarn = ! option_sense; 390184138Smav yylineno do_yylineno = option_sense; ACTION_M4_IFDEF("M4""_YY_USE_LINENO", option_sense); 391184138Smav yymore yymore_really_used = option_sense; 392184138Smav yywrap do_yywrap = option_sense; 393184138Smav 394184138Smav yy_push_state ACTION_M4_IFDEF("M4""_YY_NO_PUSH_STATE", ! option_sense); 395184138Smav yy_pop_state ACTION_M4_IFDEF("M4""_YY_NO_POP_STATE", ! option_sense); 396184138Smav yy_top_state ACTION_M4_IFDEF("M4""_YY_NO_TOP_STATE", ! option_sense); 397184138Smav 398184138Smav yy_scan_buffer ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BUFFER", ! option_sense); 399184138Smav yy_scan_bytes ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BYTES", ! option_sense); 400184138Smav yy_scan_string ACTION_M4_IFDEF("M4""_YY_NO_SCAN_STRING", ! option_sense); 401184138Smav 402184138Smav yyalloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_ALLOC", ! option_sense); 403184138Smav yyrealloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_REALLOC", ! option_sense); 404184138Smav yyfree ACTION_M4_IFDEF("M4""_YY_NO_FLEX_FREE", ! option_sense); 405184138Smav 406184138Smav yyget_debug ACTION_M4_IFDEF("M4""_YY_NO_GET_DEBUG", ! option_sense); 407184138Smav yyset_debug ACTION_M4_IFDEF("M4""_YY_NO_SET_DEBUG", ! option_sense); 408184138Smav yyget_extra ACTION_M4_IFDEF("M4""_YY_NO_GET_EXTRA", ! option_sense); 409184138Smav yyset_extra ACTION_M4_IFDEF("M4""_YY_NO_SET_EXTRA", ! option_sense); 410184138Smav yyget_leng ACTION_M4_IFDEF("M4""_YY_NO_GET_LENG", ! option_sense); 411184138Smav yyget_text ACTION_M4_IFDEF("M4""_YY_NO_GET_TEXT", ! option_sense); 412184138Smav yyget_lineno ACTION_M4_IFDEF("M4""_YY_NO_GET_LINENO", ! option_sense); 413184138Smav yyset_lineno ACTION_M4_IFDEF("M4""_YY_NO_SET_LINENO", ! option_sense); 414184138Smav yyget_in ACTION_M4_IFDEF("M4""_YY_NO_GET_IN", ! option_sense); 415184138Smav yyset_in ACTION_M4_IFDEF("M4""_YY_NO_SET_IN", ! option_sense); 416184138Smav yyget_out ACTION_M4_IFDEF("M4""_YY_NO_GET_OUT", ! option_sense); 417184138Smav yyset_out ACTION_M4_IFDEF("M4""_YY_NO_SET_OUT", ! option_sense); 418184138Smav yyget_lval ACTION_M4_IFDEF("M4""_YY_NO_GET_LVAL", ! option_sense); 419184138Smav yyset_lval ACTION_M4_IFDEF("M4""_YY_NO_SET_LVAL", ! option_sense); 420184138Smav yyget_lloc ACTION_M4_IFDEF("M4""_YY_NO_GET_LLOC", ! option_sense); 421184138Smav yyset_lloc ACTION_M4_IFDEF("M4""_YY_NO_SET_LLOC", ! option_sense); 422184138Smav 423184138Smav extra-type return OPT_EXTRA_TYPE; 424184138Smav outfile return OPT_OUTFILE; 425184138Smav prefix return OPT_PREFIX; 426241600Sgonzo yyclass return OPT_YYCLASS; 427184138Smav header(-file)? return OPT_HEADER; 428184138Smav tables-file return OPT_TABLES; 429184138Smav tables-verify { 430184138Smav tablesverify = option_sense; 431184138Smav if(!tablesext && option_sense) 432184138Smav tablesext = true; 433184138Smav } 434184138Smav 435184138Smav 436184138Smav \"[^"\n]*\" { 437184138Smav if(yyleng-1 < MAXLINE) 438241600Sgonzo { 439184138Smav strcpy( nmstr, yytext + 1 ); 440184138Smav } 441184138Smav else 442184138Smav { 443184138Smav synerr( _("Option line too long\n")); 444241600Sgonzo FLEX_EXIT(EXIT_FAILURE); 445241600Sgonzo } 446184138Smav nmstr[strlen( nmstr ) - 1] = '\0'; 447241600Sgonzo return NAME; 448241600Sgonzo } 449184138Smav 450241600Sgonzo (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { 451241600Sgonzo format_synerr( _( "unrecognized %%option: %s" ), 452241600Sgonzo yytext ); 453184138Smav BEGIN(RECOVER); 454241600Sgonzo } 455241600Sgonzo} 456241600Sgonzo 457241600Sgonzo<RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); 458241600Sgonzo 459241600Sgonzo 460241600Sgonzo<SECT2PROLOG>{ 461241600Sgonzo ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ 462241600Sgonzo ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ 463241600Sgonzo 464241600Sgonzo ^{WS}.* ACTION_ECHO; /* indented code in prolog */ 465184138Smav 466241600Sgonzo ^{NOT_WS}.* { /* non-indented code */ 467241600Sgonzo if ( bracelevel <= 0 ) 468241600Sgonzo { /* not in %{ ... %} */ 469241600Sgonzo yyless( 0 ); /* put it all back */ 470241600Sgonzo yy_set_bol( 1 ); 471241600Sgonzo mark_prolog(); 472241600Sgonzo BEGIN(SECT2); 473184138Smav } 474241600Sgonzo else 475241600Sgonzo ACTION_ECHO; 476241600Sgonzo } 477241600Sgonzo 478241600Sgonzo . ACTION_ECHO; 479241600Sgonzo {NL} ++linenum; ACTION_ECHO; 480241600Sgonzo 481241600Sgonzo <<EOF>> { 482241600Sgonzo mark_prolog(); 483241600Sgonzo sectnum = 0; 484241600Sgonzo yyterminate(); /* to stop the parser */ 485184138Smav } 486184138Smav} 487241600Sgonzo 488241600Sgonzo<SECT2>{ 489241600Sgonzo ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ 490241600Sgonzo 491241600Sgonzo ^{OPTWS}"%{" { 492241600Sgonzo indented_code = false; 493241600Sgonzo doing_codeblock = true; 494241600Sgonzo bracelevel = 1; 495241600Sgonzo BEGIN(PERCENT_BRACE_ACTION); 496241600Sgonzo } 497241600Sgonzo 498241600Sgonzo ^{OPTWS}"<" { 499241600Sgonzo /* Allow "<" to appear in (?x) patterns. */ 500241600Sgonzo if (!sf_skip_ws()) 501241600Sgonzo BEGIN(SC); 502241600Sgonzo return '<'; 503241600Sgonzo } 504241600Sgonzo ^{OPTWS}"^" return '^'; 505241600Sgonzo \" BEGIN(QUOTE); return '"'; 506241600Sgonzo "{"/[[:digit:]] { 507241600Sgonzo BEGIN(NUM); 508241600Sgonzo if ( lex_compat || posix_compat ) 509241600Sgonzo return BEGIN_REPEAT_POSIX; 510184138Smav else 511241600Sgonzo return BEGIN_REPEAT_FLEX; 512241600Sgonzo } 513241600Sgonzo "$"/([[:blank:]]|{NL}) return '$'; 514241600Sgonzo 515241600Sgonzo {WS}"%{" { 516241600Sgonzo bracelevel = 1; 517241600Sgonzo BEGIN(PERCENT_BRACE_ACTION); 518241600Sgonzo 519241600Sgonzo if ( in_rule ) 520241600Sgonzo { 521241600Sgonzo doing_rule_action = true; 522241600Sgonzo in_rule = false; 523184138Smav return '\n'; 524241600Sgonzo } 525241600Sgonzo } 526241600Sgonzo {WS}"|".*{NL} { 527241600Sgonzo if (sf_skip_ws()){ 528241600Sgonzo /* We're in the middle of a (?x: ) pattern. */ 529241600Sgonzo /* Push back everything starting at the "|" */ 530184138Smav size_t amt; 531241600Sgonzo amt = strchr (yytext, '|') - yytext; 532241600Sgonzo yyless(amt); 533241600Sgonzo } 534241600Sgonzo else { 535241600Sgonzo continued_action = true; 536241600Sgonzo ++linenum; 537241600Sgonzo return '\n'; 538241600Sgonzo } 539241600Sgonzo } 540241600Sgonzo 541241600Sgonzo ^{WS}"/*" { 542241600Sgonzo 543241600Sgonzo if (sf_skip_ws()){ 544241600Sgonzo /* We're in the middle of a (?x: ) pattern. */ 545184138Smav yy_push_state(COMMENT_DISCARD); 546241600Sgonzo } 547241600Sgonzo else{ 548241600Sgonzo yyless( yyleng - 2 ); /* put back '/', '*' */ 549184138Smav bracelevel = 0; 550184138Smav continued_action = false; 551184138Smav BEGIN(ACTION); 552241600Sgonzo } 553241600Sgonzo } 554184138Smav 555241600Sgonzo ^{WS} /* allow indented rules */ ; 556241600Sgonzo 557184138Smav {WS} { 558241600Sgonzo if (sf_skip_ws()){ 559241600Sgonzo /* We're in the middle of a (?x: ) pattern. */ 560241600Sgonzo } 561241600Sgonzo else{ 562184138Smav /* This rule is separate from the one below because 563241600Sgonzo * otherwise we get variable trailing context, so 564241600Sgonzo * we can't build the scanner using -{f,F}. 565184138Smav */ 566241600Sgonzo bracelevel = 0; 567241600Sgonzo continued_action = false; 568241600Sgonzo BEGIN(ACTION); 569241600Sgonzo 570241600Sgonzo if ( in_rule ) 571241600Sgonzo { 572184138Smav doing_rule_action = true; 573241600Sgonzo in_rule = false; 574241600Sgonzo return '\n'; 575241600Sgonzo } 576241600Sgonzo } 577241600Sgonzo } 578241600Sgonzo 579184138Smav {OPTWS}{NL} { 580241600Sgonzo if (sf_skip_ws()){ 581241600Sgonzo /* We're in the middle of a (?x: ) pattern. */ 582184138Smav ++linenum; 583184138Smav } 584184138Smav else{ 585241600Sgonzo bracelevel = 0; 586241600Sgonzo continued_action = false; 587185527Smav BEGIN(ACTION); 588241600Sgonzo unput( '\n' ); /* so <ACTION> sees it */ 589185527Smav 590185527Smav if ( in_rule ) 591185527Smav { 592185527Smav doing_rule_action = true; 593241600Sgonzo in_rule = false; 594241600Sgonzo return '\n'; 595185527Smav } 596241600Sgonzo } 597185527Smav } 598241600Sgonzo 599185527Smav ^{OPTWS}"<<EOF>>" | 600185527Smav "<<EOF>>" return EOF_OP; 601241600Sgonzo 602241600Sgonzo ^"%%".* { 603184138Smav sectnum = 3; 604184138Smav BEGIN(SECT3); 605184138Smav outn("/* Begin user sect3 */"); 606184138Smav yyterminate(); /* to stop the parser */ 607184138Smav } 608184138Smav 609184138Smav "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { 610184138Smav int cclval; 611184138Smav 612184138Smav if(yyleng < MAXLINE) 613184138Smav { 614184138Smav strcpy( nmstr, yytext ); 615184138Smav } 616184138Smav else 617184138Smav { 618184138Smav synerr( _("Input line too long\n")); 619184138Smav FLEX_EXIT(EXIT_FAILURE); 620184138Smav } 621184138Smav 622184138Smav /* Check to see if we've already encountered this 623184138Smav * ccl. 624184138Smav */ 625184138Smav if (0 /* <--- This "0" effectively disables the reuse of a 626241600Sgonzo * character class (purely based on its source text). 627184138Smav * The reason it was disabled is so yacc/bison can parse 628184138Smav * ccl operations, such as ccl difference and union. 629184138Smav */ 630184138Smav && (cclval = ccllookup( (Char *) nmstr )) != 0 ) 631184138Smav { 632184138Smav if ( input() != ']' ) 633184138Smav synerr( _( "bad character class" ) ); 634184138Smav 635184138Smav yylval = cclval; 636184138Smav ++cclreuse; 637184138Smav return PREVCCL; 638184138Smav } 639184138Smav else 640184138Smav { 641184138Smav /* We fudge a bit. We know that this ccl will 642184138Smav * soon be numbered as lastccl + 1 by cclinit. 643184138Smav */ 644184138Smav cclinstal( (Char *) nmstr, lastccl + 1 ); 645184138Smav 646184138Smav /* Push back everything but the leading bracket 647184138Smav * so the ccl can be rescanned. 648184138Smav */ 649184138Smav yyless( 1 ); 650184138Smav 651184138Smav BEGIN(FIRSTCCL); 652184138Smav return '['; 653184138Smav } 654184138Smav } 655184138Smav "{-}" return CCL_OP_DIFF; 656184138Smav "{+}" return CCL_OP_UNION; 657184138Smav 658184138Smav 659184138Smav /* Check for :space: at the end of the rule so we don't 660184138Smav * wrap the expanded regex in '(' ')' -- breaking trailing 661184138Smav * context. 662184138Smav */ 663184138Smav "{"{NAME}"}"[[:space:]]? { 664184138Smav register Char *nmdefptr; 665184138Smav int end_is_ws, end_ch; 666184138Smav 667184138Smav end_ch = yytext[yyleng-1]; 668184138Smav end_is_ws = end_ch != '}' ? 1 : 0; 669184138Smav 670184138Smav if(yyleng-1 < MAXLINE) 671184138Smav { 672184138Smav strcpy( nmstr, yytext + 1 ); 673184138Smav } 674184138Smav else 675184138Smav { 676184138Smav synerr( _("Input line too long\n")); 677184138Smav FLEX_EXIT(EXIT_FAILURE); 678184138Smav } 679185661Smavnmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ 680185661Smav 681185661Smav if ( (nmdefptr = ndlookup( nmstr )) == 0 ) 682185661Smav format_synerr( 683185661Smav _( "undefined definition {%s}" ), 684184138Smav nmstr ); 685184138Smav 686184138Smav else 687184138Smav { /* push back name surrounded by ()'s */ 688184138Smav int len = strlen( (char *) nmdefptr ); 689184138Smav if (end_is_ws) 690184138Smav unput(end_ch); 691184138Smav 692184138Smav if ( lex_compat || nmdefptr[0] == '^' || 693184138Smav (len > 0 && nmdefptr[len - 1] == '$') 694184138Smav || (end_is_ws && trlcontxt && !sf_skip_ws())) 695184138Smav { /* don't use ()'s after all */ 696184138Smav PUT_BACK_STRING((char *) nmdefptr, 0); 697184138Smav 698184138Smav if ( nmdefptr[0] == '^' ) 699184138Smav BEGIN(CARETISBOL); 700184138Smav } 701184138Smav 702184138Smav else 703184138Smav { 704184138Smav unput(')'); 705184138Smav PUT_BACK_STRING((char *) nmdefptr, 0); 706184138Smav unput('('); 707184138Smav } 708184138Smav } 709184138Smav } 710184138Smav 711184138Smav "/*" { 712184138Smav if (sf_skip_ws()) 713184138Smav yy_push_state(COMMENT_DISCARD); 714184138Smav else{ 715184138Smav /* Push back the "*" and return "/" as usual. */ 716184138Smav yyless(1); 717184138Smav return '/'; 718184138Smav } 719184138Smav } 720184138Smav 721184138Smav "(?#" { 722184138Smav if (lex_compat || posix_compat){ 723184138Smav /* Push back the "?#" and treat it like a normal parens. */ 724184138Smav yyless(1); 725184138Smav sf_push(); 726184138Smav return '('; 727184138Smav } 728184138Smav else 729184138Smav yy_push_state(EXTENDED_COMMENT); 730184138Smav } 731184138Smav "(?" { 732184138Smav sf_push(); 733184138Smav if (lex_compat || posix_compat) 734184138Smav /* Push back the "?" and treat it like a normal parens. */ 735184138Smav yyless(1); 736184138Smav else 737184138Smav BEGIN(GROUP_WITH_PARAMS); 738184138Smav return '('; 739184138Smav } 740184138Smav "(" sf_push(); return '('; 741184138Smav ")" sf_pop(); return ')'; 742184138Smav 743184138Smav [/|*+?.(){}] return (unsigned char) yytext[0]; 744184138Smav . RETURNCHAR; 745184138Smav} 746184138Smav 747184138Smav 748184138Smav<SC>{ 749241600Sgonzo {OPTWS}{NL}{OPTWS} ++linenum; /* Allow blank lines & continuations */ 750184138Smav [,*] return (unsigned char) yytext[0]; 751184138Smav ">" BEGIN(SECT2); return '>'; 752184138Smav ">"/^ BEGIN(CARETISBOL); return '>'; 753184138Smav {SCNAME} RETURNNAME; 754184138Smav . { 755184138Smav format_synerr( _( "bad <start condition>: %s" ), 756184138Smav yytext ); 757184138Smav } 758184138Smav} 759184138Smav 760184138Smav<CARETISBOL>"^" BEGIN(SECT2); return '^'; 761184138Smav 762184138Smav 763184138Smav<QUOTE>{ 764184138Smav [^"\n] RETURNCHAR; 765184138Smav \" BEGIN(SECT2); return '"'; 766184138Smav 767184138Smav {NL} { 768184138Smav synerr( _( "missing quote" ) ); 769184138Smav BEGIN(SECT2); 770184138Smav ++linenum; 771184138Smav return '"'; 772184138Smav } 773184138Smav} 774184138Smav 775184138Smav<GROUP_WITH_PARAMS>{ 776184138Smav ":" BEGIN(SECT2); 777184138Smav "-" BEGIN(GROUP_MINUS_PARAMS); 778184138Smav i sf_set_case_ins(1); 779184138Smav s sf_set_dot_all(1); 780184138Smav x sf_set_skip_ws(1); 781184138Smav} 782184138Smav<GROUP_MINUS_PARAMS>{ 783184138Smav ":" BEGIN(SECT2); 784184138Smav i sf_set_case_ins(0); 785184138Smav s sf_set_dot_all(0); 786184138Smav x sf_set_skip_ws(0); 787184138Smav} 788184138Smav 789184138Smav<FIRSTCCL>{ 790184138Smav "^"/[^-\]\n] BEGIN(CCL); return '^'; 791184138Smav "^"/("-"|"]") return '^'; 792184138Smav . BEGIN(CCL); RETURNCHAR; 793184138Smav} 794184138Smav 795184138Smav<CCL>{ 796184138Smav -/[^\]\n] return '-'; 797184138Smav [^\]\n] RETURNCHAR; 798184138Smav "]" BEGIN(SECT2); return ']'; 799184138Smav .|{NL} { 800184138Smav synerr( _( "bad character class" ) ); 801184138Smav BEGIN(SECT2); 802184138Smav return ']'; 803184138Smav } 804184138Smav} 805184138Smav 806184138Smav<FIRSTCCL,CCL>{ 807184138Smav "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; 808184138Smav "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; 809184138Smav "[:blank:]" BEGIN(CCL); return CCE_BLANK; 810184138Smav "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; 811184138Smav "[:digit:]" BEGIN(CCL); return CCE_DIGIT; 812241600Sgonzo "[:graph:]" BEGIN(CCL); return CCE_GRAPH; 813184138Smav "[:lower:]" BEGIN(CCL); return CCE_LOWER; 814184138Smav "[:print:]" BEGIN(CCL); return CCE_PRINT; 815184138Smav "[:punct:]" BEGIN(CCL); return CCE_PUNCT; 816184138Smav "[:space:]" BEGIN(CCL); return CCE_SPACE; 817184138Smav "[:upper:]" BEGIN(CCL); return CCE_UPPER; 818184138Smav "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; 819184138Smav 820184138Smav "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM; 821184138Smav "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA; 822184138Smav "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK; 823184138Smav "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL; 824184138Smav "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT; 825184138Smav "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH; 826184138Smav "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER; 827241600Sgonzo "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT; 828184138Smav "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT; 829184138Smav "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE; 830184138Smav "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER; 831241600Sgonzo "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT; 832184138Smav {CCL_EXPR} { 833184138Smav format_synerr( 834184138Smav _( "bad character class expression: %s" ), 835184138Smav yytext ); 836184138Smav BEGIN(CCL); return CCE_ALNUM; 837184138Smav } 838184138Smav} 839184138Smav 840184138Smav<NUM>{ 841184138Smav [[:digit:]]+ { 842184138Smav yylval = myctoi( yytext ); 843184138Smav return NUMBER; 844184138Smav } 845184138Smav 846184138Smav "," return ','; 847184138Smav "}" { 848184138Smav BEGIN(SECT2); 849184138Smav if ( lex_compat || posix_compat ) 850184138Smav return END_REPEAT_POSIX; 851184138Smav else 852184138Smav return END_REPEAT_FLEX; 853184138Smav } 854184138Smav 855184138Smav . { 856184138Smav synerr( _( "bad character inside {}'s" ) ); 857184138Smav BEGIN(SECT2); 858184138Smav return '}'; 859184138Smav } 860184138Smav 861184138Smav {NL} { 862184138Smav synerr( _( "missing }" ) ); 863184138Smav BEGIN(SECT2); 864184138Smav ++linenum; 865184138Smav return '}'; 866184138Smav } 867184138Smav} 868184138Smav 869184138Smav 870184138Smav<PERCENT_BRACE_ACTION>{ 871184138Smav {OPTWS}"%}".* bracelevel = 0; 872184138Smav 873184138Smav <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT ); 874184138Smav 875184138Smav <CODEBLOCK,ACTION>{ 876184138Smav "reject" { 877184138Smav ACTION_ECHO; 878184138Smav CHECK_REJECT(yytext); 879184138Smav } 880184138Smav "yymore" { 881184138Smav ACTION_ECHO; 882184138Smav CHECK_YYMORE(yytext); 883184138Smav } 884184138Smav } 885184138Smav 886184138Smav {M4QSTART} ACTION_ECHO_QSTART; 887184138Smav {M4QEND} ACTION_ECHO_QEND; 888184138Smav . ACTION_ECHO; 889184138Smav {NL} { 890184138Smav ++linenum; 891184138Smav ACTION_ECHO; 892184138Smav if ( bracelevel == 0 || 893184138Smav (doing_codeblock && indented_code) ) 894184138Smav { 895184138Smav if ( doing_rule_action ) 896184138Smav add_action( "\tYY_BREAK\n" ); 897184138Smav 898184138Smav doing_rule_action = doing_codeblock = false; 899184138Smav BEGIN(SECT2); 900184138Smav } 901184138Smav } 902184138Smav} 903184138Smav 904184138Smav 905184138Smav /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ 906184138Smav<ACTION>{ 907184138Smav "{" ACTION_ECHO; ++bracelevel; 908184138Smav "}" ACTION_ECHO; --bracelevel; 909184138Smav {M4QSTART} ACTION_ECHO_QSTART; 910184138Smav {M4QEND} ACTION_ECHO_QEND; 911184138Smav [^[:alpha:]_{}"'/\n\[\]]+ ACTION_ECHO; 912184138Smav [\[\]] ACTION_ECHO; 913184138Smav {NAME} ACTION_ECHO; 914184138Smav "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ 915184138Smav \" ACTION_ECHO; BEGIN(ACTION_STRING); 916184138Smav {NL} { 917187876Smav ++linenum; 918187876Smav ACTION_ECHO; 919187876Smav if ( bracelevel == 0 ) 920241600Sgonzo { 921184138Smav if ( doing_rule_action ) 922184138Smav add_action( "\tYY_BREAK\n" ); 923184138Smav 924184138Smav doing_rule_action = false; 925184138Smav BEGIN(SECT2); 926184138Smav } 927184138Smav } 928184138Smav . ACTION_ECHO; 929184138Smav} 930184138Smav 931241600Sgonzo<ACTION_STRING>{ 932241600Sgonzo [^"\\\n]+ ACTION_ECHO; 933184138Smav \\. ACTION_ECHO; 934184138Smav {NL} ++linenum; ACTION_ECHO; BEGIN(ACTION); 935184138Smav \" ACTION_ECHO; BEGIN(ACTION); 936184138Smav . ACTION_ECHO; 937184138Smav} 938184138Smav 939184138Smav<COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING><<EOF>> { 940184138Smav synerr( _( "EOF encountered inside an action" ) ); 941187876Smav yyterminate(); 942187876Smav } 943187876Smav 944187876Smav<EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> { 945187876Smav synerr( _( "EOF encountered inside pattern" ) ); 946187876Smav yyterminate(); 947184138Smav } 948184138Smav 949184138Smav<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { 950184138Smav yylval = myesc( (Char *) yytext ); 951188724Smav 952188724Smav if ( YY_START == FIRSTCCL ) 953241600Sgonzo BEGIN(CCL); 954188724Smav 955188724Smav return CHAR; 956188724Smav } 957184138Smav 958184138Smav 959184138Smav<SECT3>{ 960241600Sgonzo {M4QSTART} fwrite (escaped_qstart, 1, strlen(escaped_qstart), yyout); 961241600Sgonzo {M4QEND} fwrite (escaped_qend, 1, strlen(escaped_qend), yyout); 962184138Smav [^\[\]\n]*(\n?) ECHO; 963184138Smav (.|\n) ECHO; 964184138Smav <<EOF>> sectnum = 0; yyterminate(); 965184138Smav} 966184138Smav 967184138Smav<*>.|\n format_synerr( _( "bad character: %s" ), yytext ); 968184138Smav 969184138Smav%% 970184138Smav 971184138Smav 972241600Sgonzoint yywrap() 973241600Sgonzo { 974184138Smav if ( --num_input_files > 0 ) 975184138Smav { 976184138Smav set_input_file( *++input_files ); 977184138Smav return 0; 978184138Smav } 979184138Smav 980185722Smav else 981184138Smav return 1; 982184138Smav } 983184138Smav 984184138Smav 985184138Smav/* set_input_file - open the given file (if NULL, stdin) for scanning */ 986184138Smav 987184138Smavvoid set_input_file( file ) 988241600Sgonzochar *file; 989241600Sgonzo { 990184138Smav if ( file && strcmp( file, "-" ) ) 991184138Smav { 992184138Smav infilename = copy_string( file ); 993184138Smav yyin = fopen( infilename, "r" ); 994184138Smav 995184138Smav if ( yyin == NULL ) 996184138Smav lerrsf( _( "can't open %s" ), file ); 997185722Smav } 998184138Smav 999184138Smav else 1000184138Smav { 1001184138Smav yyin = stdin; 1002184138Smav infilename = copy_string( "<stdin>" ); 1003184138Smav } 1004184138Smav 1005184138Smav linenum = 1; 1006184138Smav } 1007184138Smav 1008184138Smav 1009184138Smav/* Wrapper routines for accessing the scanner's malloc routines. */ 1010184138Smav 1011184138Smavvoid *flex_alloc( size ) 1012184138Smavsize_t size; 1013184138Smav { 1014184138Smav return (void *) malloc( size ); 1015184138Smav } 1016184138Smav 1017184138Smavvoid *flex_realloc( ptr, size ) 1018184138Smavvoid *ptr; 1019184138Smavsize_t size; 1020184138Smav { 1021184138Smav return (void *) realloc( ptr, size ); 1022184138Smav } 1023184138Smav 1024184138Smavvoid flex_free( ptr ) 1025184138Smavvoid *ptr; 1026184138Smav { 1027184138Smav if ( ptr ) 1028184138Smav free( ptr ); 1029184138Smav } 1030184138Smav