1/* 2 * awkgram.y --- yacc/bison parser 3 */ 4 5/* 6 * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc. 7 * 8 * This file is part of GAWK, the GNU implementation of the 9 * AWK Programming Language. 10 * 11 * GAWK is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * GAWK is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 24 */ 25 26%{ 27#ifdef GAWKDEBUG 28#define YYDEBUG 12 29#endif 30 31#include "awk.h" 32 33#define CAN_FREE TRUE 34#define DONT_FREE FALSE 35 36#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ 37static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1; 38#else 39static void yyerror(); /* va_alist */ 40#endif 41static char *get_src_buf P((void)); 42static int yylex P((void)); 43static NODE *node_common P((NODETYPE op)); 44static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); 45static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); 46static NODE *append_right P((NODE *list, NODE *new)); 47static inline NODE *append_pattern P((NODE **list, NODE *patt)); 48static void func_install P((NODE *params, NODE *def)); 49static void pop_var P((NODE *np, int freeit)); 50static void pop_params P((NODE *params)); 51static NODE *make_param P((char *name)); 52static NODE *mk_rexp P((NODE *exp)); 53static int dup_parms P((NODE *func)); 54static void param_sanity P((NODE *arglist)); 55static int parms_shadow P((const char *fname, NODE *func)); 56static int isnoeffect P((NODETYPE t)); 57static int isassignable P((NODE *n)); 58static void dumpintlstr P((const char *str, size_t len)); 59static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2)); 60static void count_args P((NODE *n)); 61static int isarray P((NODE *n)); 62 63enum defref { FUNC_DEFINE, FUNC_USE }; 64static void func_use P((const char *name, enum defref how)); 65static void check_funcs P((void)); 66 67static int want_regexp; /* lexical scanning kludge */ 68static int can_return; /* parsing kludge */ 69static int begin_or_end_rule = FALSE; /* parsing kludge */ 70static int parsing_end_rule = FALSE; /* for warnings */ 71static int in_print = FALSE; /* lexical scanning kludge for print */ 72static int in_parens = 0; /* lexical scanning kludge for print */ 73static char *lexptr; /* pointer to next char during parsing */ 74static char *lexend; 75static char *lexptr_begin; /* keep track of where we were for error msgs */ 76static char *lexeme; /* beginning of lexeme for debugging */ 77static char *thisline = NULL; 78#define YYDEBUG_LEXER_TEXT (lexeme) 79static int param_counter; 80static char *tokstart = NULL; 81static char *tok = NULL; 82static char *tokend; 83 84static long func_count; /* total number of functions */ 85 86#define HASHSIZE 1021 /* this constant only used here */ 87NODE *variables[HASHSIZE]; 88static int var_count; /* total number of global variables */ 89 90extern char *source; 91extern int sourceline; 92extern struct src *srcfiles; 93extern int numfiles; 94extern int errcount; 95extern NODE *begin_block; 96extern NODE *end_block; 97 98/* 99 * This string cannot occur as a real awk identifier. 100 * Use it as a special token to make function parsing 101 * uniform, but if it's seen, don't install the function. 102 * e.g. 103 * function split(x) { return x } 104 * function x(a) { return a } 105 * should only produce one error message, and not core dump. 106 */ 107static char builtin_func[] = "@builtin"; 108%} 109 110%union { 111 long lval; 112 AWKNUM fval; 113 NODE *nodeval; 114 NODETYPE nodetypeval; 115 char *sval; 116 NODE *(*ptrval) P((void)); 117} 118 119%type <nodeval> function_prologue pattern action variable param_list 120%type <nodeval> exp common_exp 121%type <nodeval> simp_exp non_post_simp_exp 122%type <nodeval> expression_list opt_expression_list print_expression_list 123%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list 124%type <nodeval> simple_stmt opt_simple_stmt 125%type <nodeval> opt_exp opt_variable regexp 126%type <nodeval> input_redir output_redir 127%type <nodetypeval> print 128%type <nodetypeval> assign_operator a_relop relop_or_less 129%type <sval> func_name 130%type <lval> lex_builtin 131 132%token <sval> FUNC_CALL NAME REGEXP 133%token <lval> ERROR 134%token <nodeval> YNUMBER YSTRING 135%token <nodetypeval> RELOP IO_OUT IO_IN 136%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP 137%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE 138%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE 139%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION 140%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE 141%token <nodetypeval> LEX_IN 142%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT 143%token <lval> LEX_BUILTIN LEX_LENGTH 144%token NEWLINE 145 146/* these are just yylval numbers */ 147 148/* Lowest to highest */ 149%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL 150%right '?' ':' 151%left LEX_OR 152%left LEX_AND 153%left LEX_GETLINE 154%nonassoc LEX_IN 155%left FUNC_CALL LEX_BUILTIN LEX_LENGTH 156%nonassoc ',' 157%nonassoc MATCHOP 158%nonassoc RELOP '<' '>' IO_IN IO_OUT 159%left CONCAT_OP 160%left YSTRING YNUMBER 161%left '+' '-' 162%left '*' '/' '%' 163%right '!' UNARY 164%right '^' 165%left INCREMENT DECREMENT 166%left '$' 167%left '(' ')' 168%% 169 170start 171 : opt_nls program opt_nls 172 { 173 check_funcs(); 174 } 175 ; 176 177program 178 : /* empty */ 179 | program rule 180 { 181 begin_or_end_rule = parsing_end_rule = FALSE; 182 yyerrok; 183 } 184 | program error 185 { 186 begin_or_end_rule = parsing_end_rule = FALSE; 187 /* 188 * If errors, give up, don't produce an infinite 189 * stream of syntax error messages. 190 */ 191 /* yyerrok; */ 192 } 193 ; 194 195rule 196 : pattern action 197 { 198 $1->rnode = $2; 199 } 200 | pattern statement_term 201 { 202 if ($1->lnode != NULL) { 203 /* pattern rule with non-empty pattern */ 204 $1->rnode = node(NULL, Node_K_print_rec, NULL); 205 } else { 206 /* an error */ 207 if (begin_or_end_rule) 208 warning(_("%s blocks must have an action part"), 209 (parsing_end_rule ? "END" : "BEGIN")); 210 else 211 warning(_("each rule must have a pattern or an action part")); 212 errcount++; 213 } 214 } 215 | function_prologue action 216 { 217 can_return = FALSE; 218 if ($1) 219 func_install($1, $2); 220 yyerrok; 221 } 222 ; 223 224pattern 225 : /* empty */ 226 { 227 $$ = append_pattern(&expression_value, (NODE *) NULL); 228 } 229 | exp 230 { 231 $$ = append_pattern(&expression_value, $1); 232 } 233 | exp ',' exp 234 { 235 NODE *r; 236 237 getnode(r); 238 r->type = Node_line_range; 239 r->condpair = node($1, Node_cond_pair, $3); 240 r->triggered = FALSE; 241 $$ = append_pattern(&expression_value, r); 242 } 243 | LEX_BEGIN 244 { 245 begin_or_end_rule = TRUE; 246 $$ = append_pattern(&begin_block, (NODE *) NULL); 247 } 248 | LEX_END 249 { 250 begin_or_end_rule = parsing_end_rule = TRUE; 251 $$ = append_pattern(&end_block, (NODE *) NULL); 252 } 253 ; 254 255action 256 : l_brace statements r_brace opt_semi opt_nls 257 { $$ = $2; } 258 ; 259 260func_name 261 : NAME 262 { $$ = $1; } 263 | FUNC_CALL 264 { $$ = $1; } 265 | lex_builtin 266 { 267 yyerror(_("`%s' is a built-in function, it cannot be redefined"), 268 tokstart); 269 errcount++; 270 $$ = builtin_func; 271 /* yyerrok; */ 272 } 273 ; 274 275lex_builtin 276 : LEX_BUILTIN 277 | LEX_LENGTH 278 ; 279 280function_prologue 281 : LEX_FUNCTION 282 { 283 param_counter = 0; 284 } 285 func_name '(' opt_param_list r_paren opt_nls 286 { 287 NODE *t; 288 289 t = make_param($3); 290 t->flags |= FUNC; 291 $$ = append_right(t, $5); 292 can_return = TRUE; 293 /* check for duplicate parameter names */ 294 if (dup_parms($$)) 295 errcount++; 296 } 297 ; 298 299regexp 300 /* 301 * In this rule, want_regexp tells yylex that the next thing 302 * is a regexp so it should read up to the closing slash. 303 */ 304 : a_slash 305 { ++want_regexp; } 306 REGEXP /* The terminating '/' is consumed by yylex(). */ 307 { 308 NODE *n; 309 size_t len = strlen($3); 310 311 if (do_lint && ($3)[0] == '*') { 312 /* possible C comment */ 313 if (($3)[len-1] == '*') 314 lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart); 315 } 316 getnode(n); 317 n->type = Node_regex; 318 n->re_exp = make_string($3, len); 319 n->re_reg = make_regexp($3, len, FALSE); 320 n->re_text = NULL; 321 n->re_flags = CONST; 322 $$ = n; 323 } 324 ; 325 326a_slash 327 : '/' 328 | SLASH_BEFORE_EQUAL 329 ; 330 331statements 332 : /* empty */ 333 { $$ = NULL; } 334 | statements statement 335 { 336 if ($2 == NULL) 337 $$ = $1; 338 else { 339 if (do_lint && isnoeffect($2->type)) 340 lintwarn(_("statement may have no effect")); 341 if ($1 == NULL) 342 $$ = $2; 343 else 344 $$ = append_right( 345 ($1->type == Node_statement_list ? $1 346 : node($1, Node_statement_list, (NODE *) NULL)), 347 ($2->type == Node_statement_list ? $2 348 : node($2, Node_statement_list, (NODE *) NULL))); 349 } 350 yyerrok; 351 } 352 | statements error 353 { $$ = NULL; } 354 ; 355 356statement_term 357 : nls 358 | semi opt_nls 359 ; 360 361statement 362 : semi opt_nls 363 { $$ = NULL; } 364 | l_brace statements r_brace 365 { $$ = $2; } 366 | if_statement 367 { $$ = $1; } 368 | LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace 369 { $$ = node($3, Node_K_switch, $7); } 370 | LEX_WHILE '(' exp r_paren opt_nls statement 371 { $$ = node($3, Node_K_while, $6); } 372 | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls 373 { $$ = node($6, Node_K_do, $3); } 374 | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement 375 { 376 /* 377 * Efficiency hack. Recognize the special case of 378 * 379 * for (iggy in foo) 380 * delete foo[iggy] 381 * 382 * and treat it as if it were 383 * 384 * delete foo 385 * 386 * Check that the body is a `delete a[i]' statement, 387 * and that both the loop var and array names match. 388 */ 389 if ($8 != NULL && $8->type == Node_K_delete) { 390 NODE *arr, *sub; 391 392 assert($8->rnode->type == Node_expression_list); 393 arr = $8->lnode; /* array var */ 394 sub = $8->rnode->lnode; /* index var */ 395 396 if ( (arr->type == Node_var_new 397 || arr->type == Node_var_array 398 || arr->type == Node_param_list) 399 && (sub->type == Node_var_new 400 || sub->type == Node_var 401 || sub->type == Node_param_list) 402 && strcmp($3, sub->vname) == 0 403 && strcmp($5, arr->vname) == 0) { 404 $8->type = Node_K_delete_loop; 405 $$ = $8; 406 } 407 else 408 goto regular_loop; 409 } else { 410 regular_loop: 411 $$ = node($8, Node_K_arrayfor, 412 make_for_loop(variable($3, CAN_FREE, Node_var), 413 (NODE *) NULL, variable($5, CAN_FREE, Node_var_array))); 414 } 415 } 416 | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement 417 { 418 $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9)); 419 } 420 | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement 421 { 422 $$ = node($11, Node_K_for, 423 (NODE *) make_for_loop($3, (NODE *) NULL, $8)); 424 } 425 | LEX_BREAK statement_term 426 /* for break, maybe we'll have to remember where to break to */ 427 { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); } 428 | LEX_CONTINUE statement_term 429 /* similarly */ 430 { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); } 431 | LEX_NEXT statement_term 432 { NODETYPE type; 433 434 if (begin_or_end_rule) 435 yyerror(_("`%s' used in %s action"), "next", 436 (parsing_end_rule ? "END" : "BEGIN")); 437 type = Node_K_next; 438 $$ = node((NODE *) NULL, type, (NODE *) NULL); 439 } 440 | LEX_NEXTFILE statement_term 441 { 442 if (do_traditional) { 443 /* 444 * can't use yyerror, since may have overshot 445 * the source line 446 */ 447 errcount++; 448 error(_("`nextfile' is a gawk extension")); 449 } 450 if (do_lint) 451 lintwarn(_("`nextfile' is a gawk extension")); 452 if (begin_or_end_rule) { 453 /* same thing */ 454 errcount++; 455 error(_("`%s' used in %s action"), "nextfile", 456 (parsing_end_rule ? "END" : "BEGIN")); 457 } 458 $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); 459 } 460 | LEX_EXIT opt_exp statement_term 461 { $$ = node($2, Node_K_exit, (NODE *) NULL); } 462 | LEX_RETURN 463 { 464 if (! can_return) 465 yyerror(_("`return' used outside function context")); 466 } 467 opt_exp statement_term 468 { $$ = node($3, Node_K_return, (NODE *) NULL); } 469 | simple_stmt statement_term 470 ; 471 472 /* 473 * A simple_stmt exists to satisfy a constraint in the POSIX 474 * grammar allowing them to occur as the 1st and 3rd parts 475 * in a `for (...;...;...)' loop. This is a historical oddity 476 * inherited from Unix awk, not at all documented in the AK&W 477 * awk book. We support it, as this was reported as a bug. 478 * We don't bother to document it though. So there. 479 */ 480simple_stmt 481 : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir 482 { 483 /* 484 * Optimization: plain `print' has no expression list, so $3 is null. 485 * If $3 is an expression list with one element (rnode == null) 486 * and lnode is a field spec for field 0, we have `print $0'. 487 * For both, use Node_K_print_rec, which is faster for these two cases. 488 */ 489 if ($1 == Node_K_print && 490 ($3 == NULL 491 || ($3->type == Node_expression_list 492 && $3->rnode == NULL 493 && $3->lnode->type == Node_field_spec 494 && $3->lnode->lnode->type == Node_val 495 && $3->lnode->lnode->numbr == 0.0)) 496 ) { 497 static int warned = FALSE; 498 499 $$ = node(NULL, Node_K_print_rec, $4); 500 501 if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) { 502 warned = TRUE; 503 lintwarn( 504 _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); 505 } 506 } else { 507 $$ = node($3, $1, $4); 508 if ($$->type == Node_K_printf) 509 count_args($$); 510 } 511 } 512 | LEX_DELETE NAME '[' expression_list ']' 513 { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); } 514 | LEX_DELETE NAME 515 { 516 if (do_lint) 517 lintwarn(_("`delete array' is a gawk extension")); 518 if (do_traditional) { 519 /* 520 * can't use yyerror, since may have overshot 521 * the source line 522 */ 523 errcount++; 524 error(_("`delete array' is a gawk extension")); 525 } 526 $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); 527 } 528 | LEX_DELETE '(' NAME ')' 529 { 530 /* this is for tawk compatibility. maybe the warnings should always be done. */ 531 if (do_lint) 532 lintwarn(_("`delete(array)' is a non-portable tawk extension")); 533 if (do_traditional) { 534 /* 535 * can't use yyerror, since may have overshot 536 * the source line 537 */ 538 errcount++; 539 error(_("`delete(array)' is a non-portable tawk extension")); 540 } 541 $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); 542 } 543 | exp 544 { $$ = $1; } 545 ; 546 547opt_simple_stmt 548 : /* empty */ 549 { $$ = NULL; } 550 | simple_stmt 551 { $$ = $1; } 552 ; 553 554switch_body 555 : case_statements 556 { 557 if ($1 == NULL) { 558 $$ = NULL; 559 } else { 560 NODE *dflt = NULL; 561 NODE *head = $1; 562 NODE *curr; 563 564 const char **case_values = NULL; 565 566 int maxcount = 128; 567 int case_count = 0; 568 int i; 569 570 emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); 571 for (curr = $1; curr != NULL; curr = curr->rnode) { 572 /* Assure that case statement values are unique. */ 573 if (curr->lnode->type == Node_K_case) { 574 char *caseval; 575 576 if (curr->lnode->lnode->type == Node_regex) 577 caseval = curr->lnode->lnode->re_exp->stptr; 578 else 579 caseval = force_string(tree_eval(curr->lnode->lnode))->stptr; 580 581 for (i = 0; i < case_count; i++) 582 if (strcmp(caseval, case_values[i]) == 0) 583 yyerror(_("duplicate case values in switch body: %s"), caseval); 584 585 if (case_count >= maxcount) { 586 maxcount += 128; 587 erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); 588 } 589 case_values[case_count++] = caseval; 590 } else { 591 /* Otherwise save a pointer to the default node. */ 592 if (dflt != NULL) 593 yyerror(_("Duplicate `default' detected in switch body")); 594 dflt = curr; 595 } 596 } 597 598 free(case_values); 599 600 /* Create the switch body. */ 601 $$ = node(head, Node_switch_body, dflt); 602 } 603 } 604 ; 605 606case_statements 607 : /* empty */ 608 { $$ = NULL; } 609 | case_statements case_statement 610 { 611 if ($2 == NULL) 612 $$ = $1; 613 else { 614 if (do_lint && isnoeffect($2->type)) 615 lintwarn(_("statement may have no effect")); 616 if ($1 == NULL) 617 $$ = node($2, Node_case_list, (NODE *) NULL); 618 else 619 $$ = append_right( 620 ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)), 621 ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL)) 622 ); 623 } 624 yyerrok; 625 } 626 | case_statements error 627 { $$ = NULL; } 628 ; 629 630case_statement 631 : LEX_CASE case_value colon opt_nls statements 632 { $$ = node($2, Node_K_case, $5); } 633 | LEX_DEFAULT colon opt_nls statements 634 { $$ = node((NODE *) NULL, Node_K_default, $4); } 635 ; 636 637case_value 638 : YNUMBER 639 { $$ = $1; } 640 | '-' YNUMBER %prec UNARY 641 { 642 $2->numbr = -(force_number($2)); 643 $$ = $2; 644 } 645 | '+' YNUMBER %prec UNARY 646 { $$ = $2; } 647 | YSTRING 648 { $$ = $1; } 649 | regexp 650 { $$ = $1; } 651 ; 652 653print 654 : LEX_PRINT 655 | LEX_PRINTF 656 ; 657 658 /* 659 * Note: ``print(x)'' is already parsed by the first rule, 660 * so there is no good in covering it by the second one too. 661 */ 662print_expression_list 663 : opt_expression_list 664 | '(' exp comma expression_list r_paren 665 { $$ = node($2, Node_expression_list, $4); } 666 ; 667 668output_redir 669 : /* empty */ 670 { 671 in_print = FALSE; 672 in_parens = 0; 673 $$ = NULL; 674 } 675 | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp 676 { 677 $$ = node($3, $1, (NODE *) NULL); 678 if ($1 == Node_redirect_twoway 679 && $3->type == Node_K_getline 680 && $3->rnode->type == Node_redirect_twoway) 681 yyerror(_("multistage two-way pipelines don't work")); 682 } 683 ; 684 685if_statement 686 : LEX_IF '(' exp r_paren opt_nls statement 687 { 688 $$ = node($3, Node_K_if, 689 node($6, Node_if_branches, (NODE *) NULL)); 690 } 691 | LEX_IF '(' exp r_paren opt_nls statement 692 LEX_ELSE opt_nls statement 693 { $$ = node($3, Node_K_if, 694 node($6, Node_if_branches, $9)); } 695 ; 696 697nls 698 : NEWLINE 699 | nls NEWLINE 700 ; 701 702opt_nls 703 : /* empty */ 704 | nls 705 ; 706 707input_redir 708 : /* empty */ 709 { $$ = NULL; } 710 | '<' simp_exp 711 { $$ = node($2, Node_redirect_input, (NODE *) NULL); } 712 ; 713 714opt_param_list 715 : /* empty */ 716 { $$ = NULL; } 717 | param_list 718 { $$ = $1; } 719 ; 720 721param_list 722 : NAME 723 { $$ = make_param($1); } 724 | param_list comma NAME 725 { $$ = append_right($1, make_param($3)); yyerrok; } 726 | error 727 { $$ = NULL; } 728 | param_list error 729 { $$ = NULL; } 730 | param_list comma error 731 { $$ = NULL; } 732 ; 733 734/* optional expression, as in for loop */ 735opt_exp 736 : /* empty */ 737 { $$ = NULL; } 738 | exp 739 { $$ = $1; } 740 ; 741 742opt_expression_list 743 : /* empty */ 744 { $$ = NULL; } 745 | expression_list 746 { $$ = $1; } 747 ; 748 749expression_list 750 : exp 751 { $$ = node($1, Node_expression_list, (NODE *) NULL); } 752 | expression_list comma exp 753 { 754 $$ = append_right($1, 755 node($3, Node_expression_list, (NODE *) NULL)); 756 yyerrok; 757 } 758 | error 759 { $$ = NULL; } 760 | expression_list error 761 { $$ = NULL; } 762 | expression_list error exp 763 { $$ = NULL; } 764 | expression_list comma error 765 { $$ = NULL; } 766 ; 767 768/* Expressions, not including the comma operator. */ 769exp : variable assign_operator exp %prec ASSIGNOP 770 { 771 if (do_lint && $3->type == Node_regex) 772 lintwarn(_("regular expression on right of assignment")); 773 $$ = node($1, $2, $3); 774 } 775 | exp LEX_AND exp 776 { $$ = node($1, Node_and, $3); } 777 | exp LEX_OR exp 778 { $$ = node($1, Node_or, $3); } 779 | exp MATCHOP exp 780 { 781 if ($1->type == Node_regex) 782 warning(_("regular expression on left of `~' or `!~' operator")); 783 $$ = node($1, $2, mk_rexp($3)); 784 } 785 | exp LEX_IN NAME 786 { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } 787 | exp a_relop exp %prec RELOP 788 { 789 if (do_lint && $3->type == Node_regex) 790 lintwarn(_("regular expression on right of comparison")); 791 $$ = node($1, $2, $3); 792 } 793 | exp '?' exp ':' exp 794 { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} 795 | common_exp 796 { $$ = $1; } 797 ; 798 799assign_operator 800 : ASSIGN 801 { $$ = $1; } 802 | ASSIGNOP 803 { $$ = $1; } 804 | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */ 805 { $$ = Node_assign_quotient; } 806 ; 807 808relop_or_less 809 : RELOP 810 { $$ = $1; } 811 | '<' 812 { $$ = Node_less; } 813 ; 814a_relop 815 : relop_or_less 816 | '>' 817 { $$ = Node_greater; } 818 ; 819 820common_exp 821 : regexp 822 { $$ = $1; } 823 | '!' regexp %prec UNARY 824 { 825 $$ = node(node(make_number(0.0), 826 Node_field_spec, 827 (NODE *) NULL), 828 Node_nomatch, 829 $2); 830 } 831 | '(' expression_list r_paren LEX_IN NAME 832 { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); } 833 | simp_exp 834 { $$ = $1; } 835 | common_exp simp_exp %prec CONCAT_OP 836 { $$ = node($1, Node_concat, $2); } 837 ; 838 839simp_exp 840 : non_post_simp_exp 841 /* Binary operators in order of decreasing precedence. */ 842 | simp_exp '^' simp_exp 843 { $$ = node($1, Node_exp, $3); } 844 | simp_exp '*' simp_exp 845 { $$ = node($1, Node_times, $3); } 846 | simp_exp '/' simp_exp 847 { $$ = node($1, Node_quotient, $3); } 848 | simp_exp '%' simp_exp 849 { $$ = node($1, Node_mod, $3); } 850 | simp_exp '+' simp_exp 851 { $$ = node($1, Node_plus, $3); } 852 | simp_exp '-' simp_exp 853 { $$ = node($1, Node_minus, $3); } 854 | LEX_GETLINE opt_variable input_redir 855 { 856 if (do_lint && parsing_end_rule && $3 == NULL) 857 lintwarn(_("non-redirected `getline' undefined inside END action")); 858 $$ = node($2, Node_K_getline, $3); 859 } 860 | simp_exp IO_IN LEX_GETLINE opt_variable 861 { 862 $$ = node($4, Node_K_getline, 863 node($1, $2, (NODE *) NULL)); 864 } 865 | variable INCREMENT 866 { $$ = node($1, Node_postincrement, (NODE *) NULL); } 867 | variable DECREMENT 868 { $$ = node($1, Node_postdecrement, (NODE *) NULL); } 869 ; 870 871non_post_simp_exp 872 : '!' simp_exp %prec UNARY 873 { $$ = node($2, Node_not, (NODE *) NULL); } 874 | '(' exp r_paren 875 { $$ = $2; } 876 | LEX_BUILTIN 877 '(' opt_expression_list r_paren 878 { $$ = snode($3, Node_builtin, (int) $1); } 879 | LEX_LENGTH '(' opt_expression_list r_paren 880 { $$ = snode($3, Node_builtin, (int) $1); } 881 | LEX_LENGTH 882 { 883 if (do_lint) 884 lintwarn(_("call of `length' without parentheses is not portable")); 885 $$ = snode((NODE *) NULL, Node_builtin, (int) $1); 886 if (do_posix) 887 warning(_("call of `length' without parentheses is deprecated by POSIX")); 888 } 889 | FUNC_CALL '(' opt_expression_list r_paren 890 { 891 $$ = node($3, Node_func_call, make_string($1, strlen($1))); 892 $$->funcbody = NULL; 893 func_use($1, FUNC_USE); 894 param_sanity($3); 895 free($1); 896 } 897 | variable 898 | INCREMENT variable 899 { $$ = node($2, Node_preincrement, (NODE *) NULL); } 900 | DECREMENT variable 901 { $$ = node($2, Node_predecrement, (NODE *) NULL); } 902 | YNUMBER 903 { $$ = $1; } 904 | YSTRING 905 { $$ = $1; } 906 907 | '-' simp_exp %prec UNARY 908 { 909 if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) { 910 $2->numbr = -(force_number($2)); 911 $$ = $2; 912 } else 913 $$ = node($2, Node_unary_minus, (NODE *) NULL); 914 } 915 | '+' simp_exp %prec UNARY 916 { 917 /* 918 * was: $$ = $2 919 * POSIX semantics: force a conversion to numeric type 920 */ 921 $$ = node (make_number(0.0), Node_plus, $2); 922 } 923 ; 924 925opt_variable 926 : /* empty */ 927 { $$ = NULL; } 928 | variable 929 { $$ = $1; } 930 ; 931 932variable 933 : NAME 934 { $$ = variable($1, CAN_FREE, Node_var_new); } 935 | NAME '[' expression_list ']' 936 { 937 NODE *n; 938 939 if ((n = lookup($1)) != NULL && ! isarray(n)) 940 yyerror(_("use of non-array as array")); 941 else if ($3 == NULL) { 942 fatal(_("invalid subscript expression")); 943 } else if ($3->rnode == NULL) { 944 $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); 945 freenode($3); 946 } else 947 $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); 948 } 949 | '$' non_post_simp_exp 950 { $$ = node($2, Node_field_spec, (NODE *) NULL); } 951 ; 952 953l_brace 954 : '{' opt_nls 955 ; 956 957r_brace 958 : '}' opt_nls { yyerrok; } 959 ; 960 961r_paren 962 : ')' { yyerrok; } 963 ; 964 965opt_semi 966 : /* empty */ 967 | semi 968 ; 969 970semi 971 : ';' { yyerrok; } 972 ; 973 974colon 975 : ':' { yyerrok; } 976 ; 977 978comma : ',' opt_nls { yyerrok; } 979 ; 980 981%% 982 983struct token { 984 const char *operator; /* text to match */ 985 NODETYPE value; /* node type */ 986 int class; /* lexical class */ 987 unsigned flags; /* # of args. allowed and compatability */ 988# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ 989# define A(n) (1<<(n)) 990# define VERSION_MASK 0xFF00 /* old awk is zero */ 991# define NOT_OLD 0x0100 /* feature not in old awk */ 992# define NOT_POSIX 0x0200 /* feature not in POSIX */ 993# define GAWKX 0x0400 /* gawk extension */ 994# define RESX 0x0800 /* Bell Labs Research extension */ 995 NODE *(*ptr) P((NODE *)); /* function that implements this keyword */ 996}; 997 998/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ 999/* Function pointers come from declarations in awk.h. */ 1000 1001static const struct token tokentab[] = { 1002{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, 1003{"END", Node_illegal, LEX_END, 0, 0}, 1004#ifdef ARRAYDEBUG 1005{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, 1006#endif 1007{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, 1008{"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort}, 1009{"asorti", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti}, 1010{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, 1011{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain}, 1012{"break", Node_K_break, LEX_BREAK, 0, 0}, 1013#ifdef ALLOW_SWITCH 1014{"case", Node_K_case, LEX_CASE, GAWKX, 0}, 1015#endif 1016{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close}, 1017{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, 1018{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, 1019{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, 1020{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext}, 1021{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext}, 1022#ifdef ALLOW_SWITCH 1023{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0}, 1024#endif 1025{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, 1026{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, 1027{"else", Node_illegal, LEX_ELSE, 0, 0}, 1028{"exit", Node_K_exit, LEX_EXIT, 0, 0}, 1029{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, 1030{"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext}, 1031{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, 1032{"for", Node_K_for, LEX_FOR, 0, 0}, 1033{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, 1034{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, 1035{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, 1036{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, 1037{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, 1038{"if", Node_K_if, LEX_IF, 0, 0}, 1039{"in", Node_illegal, LEX_IN, 0, 0}, 1040{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, 1041{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, 1042{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, 1043{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, 1044{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, 1045{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match}, 1046{"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime}, 1047{"next", Node_K_next, LEX_NEXT, 0, 0}, 1048{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, 1049{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, 1050{"print", Node_K_print, LEX_PRINT, 0, 0}, 1051{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, 1052{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, 1053{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, 1054{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, 1055{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, 1056{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, 1057{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, 1058{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, 1059{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, 1060#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ 1061{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, 1062#endif 1063{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime}, 1064{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, 1065{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, 1066{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, 1067#ifdef ALLOW_SWITCH 1068{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0}, 1069#endif 1070{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, 1071{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, 1072{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, 1073{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, 1074{"while", Node_K_while, LEX_WHILE, 0, 0}, 1075{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, 1076}; 1077 1078#ifdef MBS_SUPPORT 1079/* Variable containing the current shift state. */ 1080static mbstate_t cur_mbstate; 1081/* Ring buffer containing current characters. */ 1082#define MAX_CHAR_IN_RING_BUFFER 8 1083#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX) 1084static char cur_char_ring[RING_BUFFER_SIZE]; 1085/* Index for ring buffers. */ 1086static int cur_ring_idx; 1087/* This macro means that last nextc() return a singlebyte character 1088 or 1st byte of a multibyte character. */ 1089#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) 1090#endif /* MBS_SUPPORT */ 1091 1092/* getfname --- return name of a builtin function (for pretty printing) */ 1093 1094const char * 1095getfname(register NODE *(*fptr)(NODE *)) 1096{ 1097 register int i, j; 1098 1099 j = sizeof(tokentab) / sizeof(tokentab[0]); 1100 /* linear search, no other way to do it */ 1101 for (i = 0; i < j; i++) 1102 if (tokentab[i].ptr == fptr) 1103 return tokentab[i].operator; 1104 1105 return NULL; 1106} 1107 1108/* yyerror --- print a syntax error message, show where */ 1109 1110/* 1111 * Function identifier purposely indented to avoid mangling 1112 * by ansi2knr. Sigh. 1113 */ 1114 1115static void 1116#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ 1117 yyerror(const char *m, ...) 1118#else 1119/* VARARGS0 */ 1120 yyerror(va_alist) 1121 va_dcl 1122#endif 1123{ 1124 va_list args; 1125 const char *mesg = NULL; 1126 register char *bp, *cp; 1127 char *scan; 1128 char *buf; 1129 int count; 1130 static char end_of_file_line[] = "(END OF FILE)"; 1131 char save; 1132 1133 errcount++; 1134 /* Find the current line in the input file */ 1135 if (lexptr && lexeme) { 1136 if (thisline == NULL) { 1137 cp = lexeme; 1138 if (*cp == '\n') { 1139 cp--; 1140 mesg = _("unexpected newline or end of string"); 1141 } 1142 for (; cp != lexptr_begin && *cp != '\n'; --cp) 1143 continue; 1144 if (*cp == '\n') 1145 cp++; 1146 thisline = cp; 1147 } 1148 /* NL isn't guaranteed */ 1149 bp = lexeme; 1150 while (bp < lexend && *bp && *bp != '\n') 1151 bp++; 1152 } else { 1153 thisline = end_of_file_line; 1154 bp = thisline + strlen(thisline); 1155 } 1156 1157 /* 1158 * Saving and restoring *bp keeps valgrind happy, 1159 * since the guts of glibc uses strlen, even though 1160 * we're passing an explict precision. Sigh. 1161 */ 1162 save = *bp; 1163 *bp = '\0'; 1164 1165 msg("%.*s", (int) (bp - thisline), thisline); 1166 1167 *bp = save; 1168 1169#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ 1170 va_start(args, m); 1171 if (mesg == NULL) 1172 mesg = m; 1173#else 1174 va_start(args); 1175 if (mesg == NULL) 1176 mesg = va_arg(args, char *); 1177#endif 1178 count = (bp - thisline) + strlen(mesg) + 2 + 1; 1179 emalloc(buf, char *, count, "yyerror"); 1180 1181 bp = buf; 1182 1183 if (lexptr != NULL) { 1184 scan = thisline; 1185 while (scan < lexeme) 1186 if (*scan++ == '\t') 1187 *bp++ = '\t'; 1188 else 1189 *bp++ = ' '; 1190 *bp++ = '^'; 1191 *bp++ = ' '; 1192 } 1193 strcpy(bp, mesg); 1194 err("", buf, args); 1195 va_end(args); 1196 free(buf); 1197} 1198 1199/* get_src_buf --- read the next buffer of source program */ 1200 1201static char * 1202get_src_buf() 1203{ 1204 static int samefile = FALSE; 1205 static int nextfile = 0; 1206 static char *buf = NULL; 1207 static int fd; 1208 int n; 1209 register char *scan; 1210 static size_t len = 0; 1211 static int did_newline = FALSE; 1212 int newfile; 1213 struct stat sbuf; 1214 1215# define SLOP 128 /* enough space to hold most source lines */ 1216 1217again: 1218 newfile = FALSE; 1219 if (nextfile > numfiles) 1220 return NULL; 1221 1222 if (srcfiles[nextfile].stype == CMDLINE) { 1223 if (len == 0) { 1224 len = strlen(srcfiles[nextfile].val); 1225 if (len == 0) { 1226 /* 1227 * Yet Another Special case: 1228 * gawk '' /path/name 1229 * Sigh. 1230 */ 1231 static int warned = FALSE; 1232 1233 if (do_lint && ! warned) { 1234 warned = TRUE; 1235 lintwarn(_("empty program text on command line")); 1236 } 1237 ++nextfile; 1238 goto again; 1239 } 1240 sourceline = 1; 1241 lexptr = lexptr_begin = srcfiles[nextfile].val; 1242 lexend = lexptr + len; 1243 } else if (! did_newline && *(lexptr-1) != '\n') { 1244 /* 1245 * The following goop is to ensure that the source 1246 * ends with a newline and that the entire current 1247 * line is available for error messages. 1248 */ 1249 int offset; 1250 1251 did_newline = TRUE; 1252 offset = lexptr - lexeme; 1253 for (scan = lexeme; scan > lexptr_begin; scan--) 1254 if (*scan == '\n') { 1255 scan++; 1256 break; 1257 } 1258 len = lexptr - scan; 1259 emalloc(buf, char *, len+1, "get_src_buf"); 1260 memcpy(buf, scan, len); 1261 thisline = buf; 1262 lexptr = buf + len; 1263 *lexptr = '\n'; 1264 lexeme = lexptr - offset; 1265 lexptr_begin = buf; 1266 lexend = lexptr + 1; 1267 } else { 1268 len = 0; 1269 lexeme = lexptr = lexptr_begin = NULL; 1270 } 1271 if (lexptr == NULL && ++nextfile <= numfiles) 1272 goto again; 1273 return lexptr; 1274 } 1275 if (! samefile) { 1276 source = srcfiles[nextfile].val; 1277 if (source == NULL) { 1278 if (buf != NULL) { 1279 free(buf); 1280 buf = NULL; 1281 } 1282 len = 0; 1283 return lexeme = lexptr = lexptr_begin = NULL; 1284 } 1285 fd = pathopen(source); 1286 if (fd <= INVALID_HANDLE) { 1287 char *in; 1288 1289 /* suppress file name and line no. in error mesg */ 1290 in = source; 1291 source = NULL; 1292 fatal(_("can't open source file `%s' for reading (%s)"), 1293 in, strerror(errno)); 1294 } 1295 len = optimal_bufsize(fd, & sbuf); 1296 newfile = TRUE; 1297 if (buf != NULL) 1298 free(buf); 1299 emalloc(buf, char *, len + SLOP, "get_src_buf"); 1300 lexptr_begin = buf + SLOP; 1301 samefile = TRUE; 1302 sourceline = 1; 1303 } else { 1304 /* 1305 * Here, we retain the current source line (up to length SLOP) 1306 * in the beginning of the buffer that was overallocated above 1307 */ 1308 int offset; 1309 int linelen; 1310 1311 offset = lexptr - lexeme; 1312 for (scan = lexeme; scan > lexptr_begin; scan--) 1313 if (*scan == '\n') { 1314 scan++; 1315 break; 1316 } 1317 linelen = lexptr - scan; 1318 if (linelen > SLOP) 1319 linelen = SLOP; 1320 thisline = buf + SLOP - linelen; 1321 memcpy(thisline, scan, linelen); 1322 lexeme = buf + SLOP - offset; 1323 lexptr_begin = thisline; 1324 } 1325 n = read(fd, buf + SLOP, len); 1326 if (n == -1) 1327 fatal(_("can't read sourcefile `%s' (%s)"), 1328 source, strerror(errno)); 1329 if (n == 0) { 1330 if (newfile) { 1331 static int warned = FALSE; 1332 1333 if (do_lint && ! warned) { 1334 warned = TRUE; 1335 lintwarn(_("source file `%s' is empty"), source); 1336 } 1337 } 1338 if (fd != fileno(stdin)) /* safety */ 1339 close(fd); 1340 samefile = FALSE; 1341 nextfile++; 1342 if (lexeme) 1343 *lexeme = '\0'; 1344 len = 0; 1345 goto again; 1346 } 1347 lexptr = buf + SLOP; 1348 lexend = lexptr + n; 1349 return buf; 1350} 1351 1352/* tokadd --- add a character to the token buffer */ 1353 1354#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) 1355 1356/* tokexpand --- grow the token buffer */ 1357 1358char * 1359tokexpand() 1360{ 1361 static int toksize = 60; 1362 int tokoffset; 1363 1364 tokoffset = tok - tokstart; 1365 toksize *= 2; 1366 if (tokstart != NULL) 1367 erealloc(tokstart, char *, toksize, "tokexpand"); 1368 else 1369 emalloc(tokstart, char *, toksize, "tokexpand"); 1370 tokend = tokstart + toksize; 1371 tok = tokstart + tokoffset; 1372 return tok; 1373} 1374 1375/* nextc --- get the next input character */ 1376 1377#ifdef MBS_SUPPORT 1378 1379static int 1380nextc(void) 1381{ 1382 if (gawk_mb_cur_max > 1) { 1383 /* Update the buffer index. */ 1384 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 : 1385 cur_ring_idx + 1; 1386 1387 /* Did we already check the current character? */ 1388 if (cur_char_ring[cur_ring_idx] == 0) { 1389 /* No, we need to check the next character on the buffer. */ 1390 int idx, work_ring_idx = cur_ring_idx; 1391 mbstate_t tmp_state; 1392 size_t mbclen; 1393 1394 if (!lexptr || lexptr >= lexend) 1395 if (!get_src_buf()) { 1396 return EOF; 1397 } 1398 1399 for (idx = 0 ; lexptr + idx < lexend ; idx++) { 1400 tmp_state = cur_mbstate; 1401 mbclen = mbrlen(lexptr, idx + 1, &tmp_state); 1402 1403 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) { 1404 /* It is a singlebyte character, non-complete multibyte 1405 character or EOF. We treat it as a singlebyte 1406 character. */ 1407 cur_char_ring[work_ring_idx] = 1; 1408 break; 1409 } else if (mbclen == (size_t)-2) { 1410 /* It is not a complete multibyte character. */ 1411 cur_char_ring[work_ring_idx] = idx + 1; 1412 } else { 1413 /* mbclen > 1 */ 1414 cur_char_ring[work_ring_idx] = mbclen; 1415 break; 1416 } 1417 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? 1418 0 : work_ring_idx + 1; 1419 } 1420 cur_mbstate = tmp_state; 1421 1422 /* Put a mark on the position on which we write next character. */ 1423 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? 1424 0 : work_ring_idx + 1; 1425 cur_char_ring[work_ring_idx] = 0; 1426 } 1427 1428 return (int) (unsigned char) *lexptr++; 1429 } 1430 else { 1431 int c; 1432 1433 if (lexptr && lexptr < lexend) 1434 c = (int) (unsigned char) *lexptr++; 1435 else if (get_src_buf()) 1436 c = (int) (unsigned char) *lexptr++; 1437 else 1438 c = EOF; 1439 1440 return c; 1441 } 1442} 1443 1444#else /* MBS_SUPPORT */ 1445 1446#if GAWKDEBUG 1447int 1448nextc(void) 1449{ 1450 int c; 1451 1452 if (lexptr && lexptr < lexend) 1453 c = (int) (unsigned char) *lexptr++; 1454 else if (get_src_buf()) 1455 c = (int) (unsigned char) *lexptr++; 1456 else 1457 c = EOF; 1458 1459 return c; 1460} 1461#else 1462#define nextc() ((lexptr && lexptr < lexend) ? \ 1463 ((int) (unsigned char) *lexptr++) : \ 1464 (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ 1465 ) 1466#endif 1467 1468#endif /* MBS_SUPPORT */ 1469 1470/* pushback --- push a character back on the input */ 1471 1472#ifdef MBS_SUPPORT 1473 1474static void 1475pushback(void) 1476{ 1477 if (gawk_mb_cur_max > 1) { 1478 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : 1479 cur_ring_idx - 1; 1480 (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); 1481 } else 1482 (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); 1483} 1484 1485#else 1486 1487#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) 1488 1489#endif /* MBS_SUPPORT */ 1490 1491/* allow_newline --- allow newline after &&, ||, ? and : */ 1492 1493static void 1494allow_newline(void) 1495{ 1496 int c; 1497 1498 for (;;) { 1499 c = nextc(); 1500 if (c == EOF) 1501 break; 1502 if (c == '#') { 1503 while ((c = nextc()) != '\n' && c != EOF) 1504 continue; 1505 if (c == EOF) 1506 break; 1507 } 1508 if (c == '\n') 1509 sourceline++; 1510 if (! ISSPACE(c)) { 1511 pushback(); 1512 break; 1513 } 1514 } 1515} 1516 1517/* yylex --- Read the input and turn it into tokens. */ 1518 1519static int 1520yylex(void) 1521{ 1522 register int c; 1523 int seen_e = FALSE; /* These are for numbers */ 1524 int seen_point = FALSE; 1525 int esc_seen; /* for literal strings */ 1526 int low, mid, high; 1527 static int did_newline = FALSE; 1528 char *tokkey; 1529 static int lasttok = 0, eof_warned = FALSE; 1530 int inhex = FALSE; 1531 int intlstr = FALSE; 1532 1533 if (nextc() == EOF) { 1534 if (lasttok != NEWLINE) { 1535 lasttok = NEWLINE; 1536 if (do_lint && ! eof_warned) { 1537 lintwarn(_("source file does not end in newline")); 1538 eof_warned = TRUE; 1539 } 1540 return NEWLINE; /* fake it */ 1541 } 1542 return 0; 1543 } 1544 pushback(); 1545#if defined OS2 || defined __EMX__ 1546 /* 1547 * added for OS/2's extproc feature of cmd.exe 1548 * (like #! in BSD sh) 1549 */ 1550 if (strncasecmp(lexptr, "extproc ", 8) == 0) { 1551 while (*lexptr && *lexptr != '\n') 1552 lexptr++; 1553 } 1554#endif 1555 lexeme = lexptr; 1556 thisline = NULL; 1557 if (want_regexp) { 1558 int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ 1559 /* 1560 * Counting brackets is non-trivial. [[] is ok, 1561 * and so is [\]], with a point being that /[/]/ as a regexp 1562 * constant has to work. 1563 * 1564 * Do not count [ or ] if either one is preceded by a \. 1565 * A `[' should be counted if 1566 * a) it is the first one so far (in_brack == 0) 1567 * b) it is the `[' in `[:' 1568 * A ']' should be counted if not preceded by a \, since 1569 * it is either closing `:]' or just a plain list. 1570 * According to POSIX, []] is how you put a ] into a set. 1571 * Try to handle that too. 1572 * 1573 * The code for \ handles \[ and \]. 1574 */ 1575 1576 want_regexp = FALSE; 1577 tok = tokstart; 1578 for (;;) { 1579 c = nextc(); 1580#ifdef MBS_SUPPORT 1581 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) 1582#endif 1583 switch (c) { 1584 case '[': 1585 /* one day check for `.' and `=' too */ 1586 if (nextc() == ':' || in_brack == 0) 1587 in_brack++; 1588 pushback(); 1589 break; 1590 case ']': 1591 if (tokstart[0] == '[' 1592 && (tok == tokstart + 1 1593 || (tok == tokstart + 2 1594 && tokstart[1] == '^'))) 1595 /* do nothing */; 1596 else 1597 in_brack--; 1598 break; 1599 case '\\': 1600 if ((c = nextc()) == EOF) { 1601 yyerror(_("unterminated regexp ends with `\\' at end of file")); 1602 goto end_regexp; /* kludge */ 1603 } else if (c == '\n') { 1604 sourceline++; 1605 continue; 1606 } else { 1607 tokadd('\\'); 1608 tokadd(c); 1609 continue; 1610 } 1611 break; 1612 case '/': /* end of the regexp */ 1613 if (in_brack > 0) 1614 break; 1615end_regexp: 1616 tokadd('\0'); 1617 yylval.sval = tokstart; 1618 return lasttok = REGEXP; 1619 case '\n': 1620 pushback(); 1621 yyerror(_("unterminated regexp")); 1622 goto end_regexp; /* kludge */ 1623 case EOF: 1624 yyerror(_("unterminated regexp at end of file")); 1625 goto end_regexp; /* kludge */ 1626 } 1627 tokadd(c); 1628 } 1629 } 1630retry: 1631 while ((c = nextc()) == ' ' || c == '\t') 1632 continue; 1633 1634 lexeme = lexptr ? lexptr - 1 : lexptr; 1635 thisline = NULL; 1636 tok = tokstart; 1637 yylval.nodetypeval = Node_illegal; 1638 1639#ifdef MBS_SUPPORT 1640 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) 1641#endif 1642 switch (c) { 1643 case EOF: 1644 if (lasttok != NEWLINE) { 1645 lasttok = NEWLINE; 1646 if (do_lint && ! eof_warned) { 1647 lintwarn(_("source file does not end in newline")); 1648 eof_warned = TRUE; 1649 } 1650 return NEWLINE; /* fake it */ 1651 } 1652 return 0; 1653 1654 case '\n': 1655 sourceline++; 1656 return lasttok = NEWLINE; 1657 1658 case '#': /* it's a comment */ 1659 while ((c = nextc()) != '\n') { 1660 if (c == EOF) { 1661 if (lasttok != NEWLINE) { 1662 lasttok = NEWLINE; 1663 if (do_lint && ! eof_warned) { 1664 lintwarn( 1665 _("source file does not end in newline")); 1666 eof_warned = TRUE; 1667 } 1668 return NEWLINE; /* fake it */ 1669 } 1670 return 0; 1671 } 1672 } 1673 sourceline++; 1674 return lasttok = NEWLINE; 1675 1676 case '\\': 1677#ifdef RELAXED_CONTINUATION 1678 /* 1679 * This code puports to allow comments and/or whitespace 1680 * after the `\' at the end of a line used for continuation. 1681 * Use it at your own risk. We think it's a bad idea, which 1682 * is why it's not on by default. 1683 */ 1684 if (! do_traditional) { 1685 /* strip trailing white-space and/or comment */ 1686 while ((c = nextc()) == ' ' || c == '\t') 1687 continue; 1688 if (c == '#') { 1689 if (do_lint) 1690 lintwarn( 1691 _("use of `\\ #...' line continuation is not portable")); 1692 while ((c = nextc()) != '\n') 1693 if (c == EOF) 1694 break; 1695 } 1696 pushback(); 1697 } 1698#endif /* RELAXED_CONTINUATION */ 1699 if (nextc() == '\n') { 1700 sourceline++; 1701 goto retry; 1702 } else { 1703 yyerror(_("backslash not last character on line")); 1704 exit(1); 1705 } 1706 break; 1707 1708 case ':': 1709 case '?': 1710 if (! do_posix) 1711 allow_newline(); 1712 return lasttok = c; 1713 1714 /* 1715 * in_parens is undefined unless we are parsing a print 1716 * statement (in_print), but why bother with a check? 1717 */ 1718 case ')': 1719 in_parens--; 1720 return lasttok = c; 1721 1722 case '(': 1723 in_parens++; 1724 /* FALL THROUGH */ 1725 case '$': 1726 case ';': 1727 case '{': 1728 case ',': 1729 case '[': 1730 case ']': 1731 return lasttok = c; 1732 1733 case '*': 1734 if ((c = nextc()) == '=') { 1735 yylval.nodetypeval = Node_assign_times; 1736 return lasttok = ASSIGNOP; 1737 } else if (do_posix) { 1738 pushback(); 1739 return lasttok = '*'; 1740 } else if (c == '*') { 1741 /* make ** and **= aliases for ^ and ^= */ 1742 static int did_warn_op = FALSE, did_warn_assgn = FALSE; 1743 1744 if (nextc() == '=') { 1745 if (! did_warn_assgn) { 1746 did_warn_assgn = TRUE; 1747 if (do_lint) 1748 lintwarn(_("POSIX does not allow operator `**='")); 1749 if (do_lint_old) 1750 warning(_("old awk does not support operator `**='")); 1751 } 1752 yylval.nodetypeval = Node_assign_exp; 1753 return ASSIGNOP; 1754 } else { 1755 pushback(); 1756 if (! did_warn_op) { 1757 did_warn_op = TRUE; 1758 if (do_lint) 1759 lintwarn(_("POSIX does not allow operator `**'")); 1760 if (do_lint_old) 1761 warning(_("old awk does not support operator `**'")); 1762 } 1763 return lasttok = '^'; 1764 } 1765 } 1766 pushback(); 1767 return lasttok = '*'; 1768 1769 case '/': 1770 if (nextc() == '=') { 1771 pushback(); 1772 return lasttok = SLASH_BEFORE_EQUAL; 1773 } 1774 pushback(); 1775 return lasttok = '/'; 1776 1777 case '%': 1778 if (nextc() == '=') { 1779 yylval.nodetypeval = Node_assign_mod; 1780 return lasttok = ASSIGNOP; 1781 } 1782 pushback(); 1783 return lasttok = '%'; 1784 1785 case '^': 1786 { 1787 static int did_warn_op = FALSE, did_warn_assgn = FALSE; 1788 1789 if (nextc() == '=') { 1790 if (do_lint_old && ! did_warn_assgn) { 1791 did_warn_assgn = TRUE; 1792 warning(_("operator `^=' is not supported in old awk")); 1793 } 1794 yylval.nodetypeval = Node_assign_exp; 1795 return lasttok = ASSIGNOP; 1796 } 1797 pushback(); 1798 if (do_lint_old && ! did_warn_op) { 1799 did_warn_op = TRUE; 1800 warning(_("operator `^' is not supported in old awk")); 1801 } 1802 return lasttok = '^'; 1803 } 1804 1805 case '+': 1806 if ((c = nextc()) == '=') { 1807 yylval.nodetypeval = Node_assign_plus; 1808 return lasttok = ASSIGNOP; 1809 } 1810 if (c == '+') 1811 return lasttok = INCREMENT; 1812 pushback(); 1813 return lasttok = '+'; 1814 1815 case '!': 1816 if ((c = nextc()) == '=') { 1817 yylval.nodetypeval = Node_notequal; 1818 return lasttok = RELOP; 1819 } 1820 if (c == '~') { 1821 yylval.nodetypeval = Node_nomatch; 1822 return lasttok = MATCHOP; 1823 } 1824 pushback(); 1825 return lasttok = '!'; 1826 1827 case '<': 1828 if (nextc() == '=') { 1829 yylval.nodetypeval = Node_leq; 1830 return lasttok = RELOP; 1831 } 1832 yylval.nodetypeval = Node_less; 1833 pushback(); 1834 return lasttok = '<'; 1835 1836 case '=': 1837 if (nextc() == '=') { 1838 yylval.nodetypeval = Node_equal; 1839 return lasttok = RELOP; 1840 } 1841 yylval.nodetypeval = Node_assign; 1842 pushback(); 1843 return lasttok = ASSIGN; 1844 1845 case '>': 1846 if ((c = nextc()) == '=') { 1847 yylval.nodetypeval = Node_geq; 1848 return lasttok = RELOP; 1849 } else if (c == '>') { 1850 yylval.nodetypeval = Node_redirect_append; 1851 return lasttok = IO_OUT; 1852 } 1853 pushback(); 1854 if (in_print && in_parens == 0) { 1855 yylval.nodetypeval = Node_redirect_output; 1856 return lasttok = IO_OUT; 1857 } 1858 yylval.nodetypeval = Node_greater; 1859 return lasttok = '>'; 1860 1861 case '~': 1862 yylval.nodetypeval = Node_match; 1863 return lasttok = MATCHOP; 1864 1865 case '}': 1866 /* 1867 * Added did newline stuff. Easier than 1868 * hacking the grammar. 1869 */ 1870 if (did_newline) { 1871 did_newline = FALSE; 1872 return lasttok = c; 1873 } 1874 did_newline++; 1875 --lexptr; /* pick up } next time */ 1876 return lasttok = NEWLINE; 1877 1878 case '"': 1879 string: 1880 esc_seen = FALSE; 1881 while ((c = nextc()) != '"') { 1882 if (c == '\n') { 1883 pushback(); 1884 yyerror(_("unterminated string")); 1885 exit(1); 1886 } 1887#ifdef MBS_SUPPORT 1888 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) 1889#endif 1890 if (c == '\\') { 1891 c = nextc(); 1892 if (c == '\n') { 1893 sourceline++; 1894 continue; 1895 } 1896 esc_seen = TRUE; 1897 tokadd('\\'); 1898 } 1899 if (c == EOF) { 1900 pushback(); 1901 yyerror(_("unterminated string")); 1902 exit(1); 1903 } 1904 tokadd(c); 1905 } 1906 yylval.nodeval = make_str_node(tokstart, 1907 tok - tokstart, esc_seen ? SCAN : 0); 1908 yylval.nodeval->flags |= PERM; 1909 if (intlstr) { 1910 yylval.nodeval->flags |= INTLSTR; 1911 intlstr = FALSE; 1912 if (do_intl) 1913 dumpintlstr(yylval.nodeval->stptr, 1914 yylval.nodeval->stlen); 1915 } 1916 return lasttok = YSTRING; 1917 1918 case '-': 1919 if ((c = nextc()) == '=') { 1920 yylval.nodetypeval = Node_assign_minus; 1921 return lasttok = ASSIGNOP; 1922 } 1923 if (c == '-') 1924 return lasttok = DECREMENT; 1925 pushback(); 1926 return lasttok = '-'; 1927 1928 case '.': 1929 c = nextc(); 1930 pushback(); 1931 if (! ISDIGIT(c)) 1932 return lasttok = '.'; 1933 else 1934 c = '.'; 1935 /* FALL THROUGH */ 1936 case '0': 1937 case '1': 1938 case '2': 1939 case '3': 1940 case '4': 1941 case '5': 1942 case '6': 1943 case '7': 1944 case '8': 1945 case '9': 1946 /* It's a number */ 1947 for (;;) { 1948 int gotnumber = FALSE; 1949 1950 tokadd(c); 1951 switch (c) { 1952 case 'x': 1953 case 'X': 1954 if (do_traditional) 1955 goto done; 1956 if (tok == tokstart + 2) 1957 inhex = TRUE; 1958 break; 1959 case '.': 1960 if (seen_point) { 1961 gotnumber = TRUE; 1962 break; 1963 } 1964 seen_point = TRUE; 1965 break; 1966 case 'e': 1967 case 'E': 1968 if (inhex) 1969 break; 1970 if (seen_e) { 1971 gotnumber = TRUE; 1972 break; 1973 } 1974 seen_e = TRUE; 1975 if ((c = nextc()) == '-' || c == '+') 1976 tokadd(c); 1977 else 1978 pushback(); 1979 break; 1980 case 'a': 1981 case 'A': 1982 case 'b': 1983 case 'B': 1984 case 'c': 1985 case 'C': 1986 case 'D': 1987 case 'd': 1988 case 'f': 1989 case 'F': 1990 if (do_traditional || ! inhex) 1991 goto done; 1992 /* fall through */ 1993 case '0': 1994 case '1': 1995 case '2': 1996 case '3': 1997 case '4': 1998 case '5': 1999 case '6': 2000 case '7': 2001 case '8': 2002 case '9': 2003 break; 2004 default: 2005 done: 2006 gotnumber = TRUE; 2007 } 2008 if (gotnumber) 2009 break; 2010 c = nextc(); 2011 } 2012 if (c != EOF) 2013 pushback(); 2014 else if (do_lint && ! eof_warned) { 2015 lintwarn(_("source file does not end in newline")); 2016 eof_warned = TRUE; 2017 } 2018 tokadd('\0'); 2019 if (! do_traditional && isnondecimal(tokstart)) { 2020 static short warned = FALSE; 2021 if (do_lint && ! warned) { 2022 warned = TRUE; 2023 lintwarn("numeric constant `%.*s' treated as octal or hexadecimal", 2024 strlen(tokstart)-1, tokstart); 2025 } 2026 yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); 2027 } else 2028 yylval.nodeval = make_number(atof(tokstart)); 2029 yylval.nodeval->flags |= PERM; 2030 return lasttok = YNUMBER; 2031 2032 case '&': 2033 if ((c = nextc()) == '&') { 2034 yylval.nodetypeval = Node_and; 2035 allow_newline(); 2036 return lasttok = LEX_AND; 2037 } 2038 pushback(); 2039 return lasttok = '&'; 2040 2041 case '|': 2042 if ((c = nextc()) == '|') { 2043 yylval.nodetypeval = Node_or; 2044 allow_newline(); 2045 return lasttok = LEX_OR; 2046 } else if (! do_traditional && c == '&') { 2047 yylval.nodetypeval = Node_redirect_twoway; 2048 return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN); 2049 } 2050 pushback(); 2051 if (in_print && in_parens == 0) { 2052 yylval.nodetypeval = Node_redirect_pipe; 2053 return lasttok = IO_OUT; 2054 } else { 2055 yylval.nodetypeval = Node_redirect_pipein; 2056 return lasttok = IO_IN; 2057 } 2058 } 2059 2060 if (c != '_' && ! ISALPHA(c)) { 2061 yyerror(_("invalid char '%c' in expression"), c); 2062 exit(1); 2063 } 2064 2065 /* 2066 * Lots of fog here. Consider: 2067 * 2068 * print "xyzzy"$_"foo" 2069 * 2070 * Without the check for ` lasttok != '$'' ', this is parsed as 2071 * 2072 * print "xxyzz" $(_"foo") 2073 * 2074 * With the check, it is "correctly" parsed as three 2075 * string concatenations. Sigh. This seems to be 2076 * "more correct", but this is definitely one of those 2077 * occasions where the interactions are funny. 2078 */ 2079 if (! do_traditional && c == '_' && lasttok != '$') { 2080 if ((c = nextc()) == '"') { 2081 intlstr = TRUE; 2082 goto string; 2083 } 2084 pushback(); 2085 c = '_'; 2086 } 2087 2088 /* it's some type of name-type-thing. Find its length. */ 2089 tok = tokstart; 2090 while (is_identchar(c)) { 2091 tokadd(c); 2092 c = nextc(); 2093 } 2094 tokadd('\0'); 2095 emalloc(tokkey, char *, tok - tokstart, "yylex"); 2096 memcpy(tokkey, tokstart, tok - tokstart); 2097 if (c != EOF) 2098 pushback(); 2099 else if (do_lint && ! eof_warned) { 2100 lintwarn(_("source file does not end in newline")); 2101 eof_warned = TRUE; 2102 } 2103 2104 /* See if it is a special token. */ 2105 low = 0; 2106 high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; 2107 while (low <= high) { 2108 int i; 2109 2110 mid = (low + high) / 2; 2111 c = *tokstart - tokentab[mid].operator[0]; 2112 i = c ? c : strcmp(tokstart, tokentab[mid].operator); 2113 2114 if (i < 0) /* token < mid */ 2115 high = mid - 1; 2116 else if (i > 0) /* token > mid */ 2117 low = mid + 1; 2118 else { 2119 if (do_lint) { 2120 if (tokentab[mid].flags & GAWKX) 2121 lintwarn(_("`%s' is a gawk extension"), 2122 tokentab[mid].operator); 2123 if (tokentab[mid].flags & RESX) 2124 lintwarn(_("`%s' is a Bell Labs extension"), 2125 tokentab[mid].operator); 2126 if (tokentab[mid].flags & NOT_POSIX) 2127 lintwarn(_("POSIX does not allow `%s'"), 2128 tokentab[mid].operator); 2129 } 2130 if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) 2131 warning(_("`%s' is not supported in old awk"), 2132 tokentab[mid].operator); 2133 if ((do_traditional && (tokentab[mid].flags & GAWKX)) 2134 || (do_posix && (tokentab[mid].flags & NOT_POSIX))) 2135 break; 2136 if (tokentab[mid].class == LEX_BUILTIN 2137 || tokentab[mid].class == LEX_LENGTH 2138 ) 2139 yylval.lval = mid; 2140 else 2141 yylval.nodetypeval = tokentab[mid].value; 2142 2143 free(tokkey); 2144 return lasttok = tokentab[mid].class; 2145 } 2146 } 2147 2148 yylval.sval = tokkey; 2149 if (*lexptr == '(') 2150 return lasttok = FUNC_CALL; 2151 else { 2152 static short goto_warned = FALSE; 2153 2154#define SMART_ALECK 1 2155 if (SMART_ALECK && do_lint 2156 && ! goto_warned && strcasecmp(tokkey, "goto") == 0) { 2157 goto_warned = TRUE; 2158 lintwarn(_("`goto' considered harmful!\n")); 2159 } 2160 return lasttok = NAME; 2161 } 2162} 2163 2164/* node_common --- common code for allocating a new node */ 2165 2166static NODE * 2167node_common(NODETYPE op) 2168{ 2169 register NODE *r; 2170 2171 getnode(r); 2172 r->type = op; 2173 r->flags = MALLOC; 2174 /* if lookahead is NL, lineno is 1 too high */ 2175 if (lexeme && *lexeme == '\n') 2176 r->source_line = sourceline - 1; 2177 else 2178 r->source_line = sourceline; 2179 r->source_file = source; 2180 return r; 2181} 2182 2183/* node --- allocates a node with defined lnode and rnode. */ 2184 2185NODE * 2186node(NODE *left, NODETYPE op, NODE *right) 2187{ 2188 register NODE *r; 2189 2190 r = node_common(op); 2191 r->lnode = left; 2192 r->rnode = right; 2193 return r; 2194} 2195 2196/* snode --- allocate a node with defined subnode and builtin for builtin 2197 functions. Checks for arg. count and supplies defaults where 2198 possible. */ 2199 2200static NODE * 2201snode(NODE *subn, NODETYPE op, int idx) 2202{ 2203 register NODE *r; 2204 register NODE *n; 2205 int nexp = 0; 2206 int args_allowed; 2207 2208 r = node_common(op); 2209 2210 /* traverse expression list to see how many args. given */ 2211 for (n = subn; n != NULL; n = n->rnode) { 2212 nexp++; 2213 if (nexp > 5) 2214 break; 2215 } 2216 2217 /* check against how many args. are allowed for this builtin */ 2218 args_allowed = tokentab[idx].flags & ARGS; 2219 if (args_allowed && (args_allowed & A(nexp)) == 0) 2220 fatal(_("%d is invalid as number of arguments for %s"), 2221 nexp, tokentab[idx].operator); 2222 2223 r->builtin = tokentab[idx].ptr; 2224 2225 /* special case processing for a few builtins */ 2226 if (nexp == 0 && r->builtin == do_length) { 2227 subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), 2228 Node_expression_list, 2229 (NODE *) NULL); 2230 } else if (r->builtin == do_match) { 2231 static short warned = FALSE; 2232 2233 if (subn->rnode->lnode->type != Node_regex) 2234 subn->rnode->lnode = mk_rexp(subn->rnode->lnode); 2235 2236 if (subn->rnode->rnode != NULL) { /* 3rd argument there */ 2237 if (do_lint && ! warned) { 2238 warned = TRUE; 2239 lintwarn(_("match: third argument is a gawk extension")); 2240 } 2241 if (do_traditional) 2242 fatal(_("match: third argument is a gawk extension")); 2243 } 2244 } else if (r->builtin == do_sub || r->builtin == do_gsub) { 2245 if (subn->lnode->type != Node_regex) 2246 subn->lnode = mk_rexp(subn->lnode); 2247 if (nexp == 2) 2248 append_right(subn, node(node(make_number(0.0), 2249 Node_field_spec, 2250 (NODE *) NULL), 2251 Node_expression_list, 2252 (NODE *) NULL)); 2253 else if (subn->rnode->rnode->lnode->type == Node_val) { 2254 if (do_lint) 2255 lintwarn(_("%s: string literal as last arg of substitute has no effect"), 2256 (r->builtin == do_sub) ? "sub" : "gsub"); 2257 } else if (! isassignable(subn->rnode->rnode->lnode)) { 2258 yyerror(_("%s third parameter is not a changeable object"), 2259 (r->builtin == do_sub) ? "sub" : "gsub"); 2260 } 2261 } else if (r->builtin == do_gensub) { 2262 if (subn->lnode->type != Node_regex) 2263 subn->lnode = mk_rexp(subn->lnode); 2264 if (nexp == 3) 2265 append_right(subn, node(node(make_number(0.0), 2266 Node_field_spec, 2267 (NODE *) NULL), 2268 Node_expression_list, 2269 (NODE *) NULL)); 2270 } else if (r->builtin == do_split) { 2271 if (nexp == 2) 2272 append_right(subn, 2273 node(FS_node, Node_expression_list, (NODE *) NULL)); 2274 n = subn->rnode->rnode->lnode; 2275 if (n->type != Node_regex) 2276 subn->rnode->rnode->lnode = mk_rexp(n); 2277 if (nexp == 2) 2278 subn->rnode->rnode->lnode->re_flags |= FS_DFLT; 2279 } else if (r->builtin == do_close) { 2280 static short warned = FALSE; 2281 2282 if ( nexp == 2) { 2283 if (do_lint && nexp == 2 && ! warned) { 2284 warned = TRUE; 2285 lintwarn(_("close: second argument is a gawk extension")); 2286 } 2287 if (do_traditional) 2288 fatal(_("close: second argument is a gawk extension")); 2289 } 2290 } else if (do_intl /* --gen-po */ 2291 && r->builtin == do_dcgettext /* dcgettext(...) */ 2292 && subn->lnode->type == Node_val /* 1st arg is constant */ 2293 && (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */ 2294 /* ala xgettext, dcgettext("some string" ...) dumps the string */ 2295 NODE *str = subn->lnode; 2296 2297 if ((str->flags & INTLSTR) != 0) 2298 warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore")); 2299 /* don't dump it, the lexer already did */ 2300 else 2301 dumpintlstr(str->stptr, str->stlen); 2302 } else if (do_intl /* --gen-po */ 2303 && r->builtin == do_dcngettext /* dcngettext(...) */ 2304 && subn->lnode->type == Node_val /* 1st arg is constant */ 2305 && (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */ 2306 && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */ 2307 && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */ 2308 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */ 2309 NODE *str1 = subn->lnode; 2310 NODE *str2 = subn->rnode->lnode; 2311 2312 if (((str1->flags | str2->flags) & INTLSTR) != 0) 2313 warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore")); 2314 else 2315 dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen); 2316 } 2317 2318 r->subnode = subn; 2319 if (r->builtin == do_sprintf) { 2320 count_args(r); 2321 r->lnode->printf_count = r->printf_count; /* hack */ 2322 } 2323 return r; 2324} 2325 2326/* make_for_loop --- build a for loop */ 2327 2328static NODE * 2329make_for_loop(NODE *init, NODE *cond, NODE *incr) 2330{ 2331 register FOR_LOOP_HEADER *r; 2332 NODE *n; 2333 2334 emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); 2335 getnode(n); 2336 n->type = Node_illegal; 2337 r->init = init; 2338 r->cond = cond; 2339 r->incr = incr; 2340 n->sub.nodep.r.hd = r; 2341 return n; 2342} 2343 2344/* dup_parms --- return TRUE if there are duplicate parameters */ 2345 2346static int 2347dup_parms(NODE *func) 2348{ 2349 register NODE *np; 2350 const char *fname, **names; 2351 int count, i, j, dups; 2352 NODE *params; 2353 2354 if (func == NULL) /* error earlier */ 2355 return TRUE; 2356 2357 fname = func->param; 2358 count = func->param_cnt; 2359 params = func->rnode; 2360 2361 if (count == 0) /* no args, no problem */ 2362 return FALSE; 2363 2364 if (params == NULL) /* error earlier */ 2365 return TRUE; 2366 2367 emalloc(names, const char **, count * sizeof(char *), "dup_parms"); 2368 2369 i = 0; 2370 for (np = params; np != NULL; np = np->rnode) { 2371 if (np->param == NULL) { /* error earlier, give up, go home */ 2372 free(names); 2373 return TRUE; 2374 } 2375 names[i++] = np->param; 2376 } 2377 2378 dups = 0; 2379 for (i = 1; i < count; i++) { 2380 for (j = 0; j < i; j++) { 2381 if (strcmp(names[i], names[j]) == 0) { 2382 dups++; 2383 error( 2384 _("function `%s': parameter #%d, `%s', duplicates parameter #%d"), 2385 fname, i+1, names[j], j+1); 2386 } 2387 } 2388 } 2389 2390 free(names); 2391 return (dups > 0 ? TRUE : FALSE); 2392} 2393 2394/* parms_shadow --- check if parameters shadow globals */ 2395 2396static int 2397parms_shadow(const char *fname, NODE *func) 2398{ 2399 int count, i; 2400 int ret = FALSE; 2401 2402 if (fname == NULL || func == NULL) /* error earlier */ 2403 return FALSE; 2404 2405 count = func->lnode->param_cnt; 2406 2407 if (count == 0) /* no args, no problem */ 2408 return FALSE; 2409 2410 /* 2411 * Use warning() and not lintwarn() so that can warn 2412 * about all shadowed parameters. 2413 */ 2414 for (i = 0; i < count; i++) { 2415 if (lookup(func->parmlist[i]) != NULL) { 2416 warning( 2417 _("function `%s': parameter `%s' shadows global variable"), 2418 fname, func->parmlist[i]); 2419 ret = TRUE; 2420 } 2421 } 2422 2423 return ret; 2424} 2425 2426/* 2427 * install: 2428 * Install a name in the symbol table, even if it is already there. 2429 * Caller must check against redefinition if that is desired. 2430 */ 2431 2432NODE * 2433install(char *name, NODE *value) 2434{ 2435 register NODE *hp; 2436 register size_t len; 2437 register int bucket; 2438 2439 var_count++; 2440 len = strlen(name); 2441 bucket = hash(name, len, (unsigned long) HASHSIZE); 2442 getnode(hp); 2443 hp->type = Node_hashnode; 2444 hp->hnext = variables[bucket]; 2445 variables[bucket] = hp; 2446 hp->hlength = len; 2447 hp->hvalue = value; 2448 hp->hname = name; 2449 hp->hvalue->vname = name; 2450 return hp->hvalue; 2451} 2452 2453/* lookup --- find the most recent hash node for name installed by install */ 2454 2455NODE * 2456lookup(const char *name) 2457{ 2458 register NODE *bucket; 2459 register size_t len; 2460 2461 len = strlen(name); 2462 for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)]; 2463 bucket != NULL; bucket = bucket->hnext) 2464 if (bucket->hlength == len && STREQN(bucket->hname, name, len)) 2465 return bucket->hvalue; 2466 2467 return NULL; 2468} 2469 2470/* var_comp --- compare two variable names */ 2471 2472static int 2473var_comp(const void *v1, const void *v2) 2474{ 2475 const NODE *const *npp1, *const *npp2; 2476 const NODE *n1, *n2; 2477 int minlen; 2478 2479 npp1 = (const NODE *const *) v1; 2480 npp2 = (const NODE *const *) v2; 2481 n1 = *npp1; 2482 n2 = *npp2; 2483 2484 if (n1->hlength > n2->hlength) 2485 minlen = n1->hlength; 2486 else 2487 minlen = n2->hlength; 2488 2489 return strncmp(n1->hname, n2->hname, minlen); 2490} 2491 2492/* valinfo --- dump var info */ 2493 2494static void 2495valinfo(NODE *n, FILE *fp) 2496{ 2497 if (n->flags & STRING) { 2498 fprintf(fp, "string ("); 2499 pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); 2500 fprintf(fp, ")\n"); 2501 } else if (n->flags & NUMBER) 2502 fprintf(fp, "number (%.17g)\n", n->numbr); 2503 else if (n->flags & STRCUR) { 2504 fprintf(fp, "string value ("); 2505 pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); 2506 fprintf(fp, ")\n"); 2507 } else if (n->flags & NUMCUR) 2508 fprintf(fp, "number value (%.17g)\n", n->numbr); 2509 else 2510 fprintf(fp, "?? flags %s\n", flags2str(n->flags)); 2511} 2512 2513 2514/* dump_vars --- dump the symbol table */ 2515 2516void 2517dump_vars(const char *fname) 2518{ 2519 int i, j; 2520 NODE **table; 2521 NODE *p; 2522 FILE *fp; 2523 2524 emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars"); 2525 2526 if (fname == NULL) 2527 fp = stderr; 2528 else if ((fp = fopen(fname, "w")) == NULL) { 2529 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); 2530 warning(_("sending profile to standard error")); 2531 fp = stderr; 2532 } 2533 2534 for (i = j = 0; i < HASHSIZE; i++) 2535 for (p = variables[i]; p != NULL; p = p->hnext) 2536 table[j++] = p; 2537 2538 assert(j == var_count); 2539 2540 /* Shazzam! */ 2541 qsort(table, j, sizeof(NODE *), var_comp); 2542 2543 for (i = 0; i < j; i++) { 2544 p = table[i]; 2545 if (p->hvalue->type == Node_func) 2546 continue; 2547 fprintf(fp, "%.*s: ", (int) p->hlength, p->hname); 2548 if (p->hvalue->type == Node_var_array) 2549 fprintf(fp, "array, %ld elements\n", p->hvalue->table_size); 2550 else if (p->hvalue->type == Node_var_new) 2551 fprintf(fp, "unused variable\n"); 2552 else if (p->hvalue->type == Node_var) 2553 valinfo(p->hvalue->var_value, fp); 2554 else { 2555 NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); 2556 2557 valinfo(*lhs, fp); 2558 } 2559 } 2560 2561 if (fp != stderr && fclose(fp) != 0) 2562 warning(_("%s: close failed (%s)"), fname, strerror(errno)); 2563 2564 free(table); 2565} 2566 2567/* release_all_vars --- free all variable memory */ 2568 2569void 2570release_all_vars() 2571{ 2572 int i; 2573 NODE *p, *next; 2574 2575 for (i = 0; i < HASHSIZE; i++) 2576 for (p = variables[i]; p != NULL; p = next) { 2577 next = p->hnext; 2578 2579 if (p->hvalue->type == Node_func) 2580 continue; 2581 else if (p->hvalue->type == Node_var_array) 2582 assoc_clear(p->hvalue); 2583 else if (p->hvalue->type != Node_var_new) { 2584 NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); 2585 2586 unref(*lhs); 2587 } 2588 unref(p); 2589 } 2590} 2591 2592/* finfo --- for use in comparison and sorting of function names */ 2593 2594struct finfo { 2595 const char *name; 2596 size_t nlen; 2597 NODE *func; 2598}; 2599 2600/* fcompare --- comparison function for qsort */ 2601 2602static int 2603fcompare(const void *p1, const void *p2) 2604{ 2605 const struct finfo *f1, *f2; 2606 int minlen; 2607 2608 f1 = (const struct finfo *) p1; 2609 f2 = (const struct finfo *) p2; 2610 2611 if (f1->nlen > f2->nlen) 2612 minlen = f2->nlen; 2613 else 2614 minlen = f1->nlen; 2615 2616 return strncmp(f1->name, f2->name, minlen); 2617} 2618 2619/* dump_funcs --- print all functions */ 2620 2621void 2622dump_funcs() 2623{ 2624 int i, j; 2625 NODE *p; 2626 static struct finfo *tab = NULL; 2627 2628 if (func_count == 0) 2629 return; 2630 2631 /* 2632 * Walk through symbol table countng functions. 2633 * Could be more than func_count if there are 2634 * extension functions. 2635 */ 2636 for (i = j = 0; i < HASHSIZE; i++) { 2637 for (p = variables[i]; p != NULL; p = p->hnext) { 2638 if (p->hvalue->type == Node_func) { 2639 j++; 2640 } 2641 } 2642 } 2643 2644 if (tab == NULL) 2645 emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs"); 2646 2647 /* now walk again, copying info */ 2648 for (i = j = 0; i < HASHSIZE; i++) { 2649 for (p = variables[i]; p != NULL; p = p->hnext) { 2650 if (p->hvalue->type == Node_func) { 2651 tab[j].name = p->hname; 2652 tab[j].nlen = p->hlength; 2653 tab[j].func = p->hvalue; 2654 j++; 2655 } 2656 } 2657 } 2658 2659 2660 /* Shazzam! */ 2661 qsort(tab, j, sizeof(struct finfo), fcompare); 2662 2663 for (i = 0; i < j; i++) 2664 pp_func(tab[i].name, tab[i].nlen, tab[i].func); 2665 2666 free(tab); 2667} 2668 2669/* shadow_funcs --- check all functions for parameters that shadow globals */ 2670 2671void 2672shadow_funcs() 2673{ 2674 int i, j; 2675 NODE *p; 2676 struct finfo *tab; 2677 static int calls = 0; 2678 int shadow = FALSE; 2679 2680 if (func_count == 0) 2681 return; 2682 2683 if (calls++ != 0) 2684 fatal(_("shadow_funcs() called twice!")); 2685 2686 emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs"); 2687 2688 for (i = j = 0; i < HASHSIZE; i++) { 2689 for (p = variables[i]; p != NULL; p = p->hnext) { 2690 if (p->hvalue->type == Node_func) { 2691 tab[j].name = p->hname; 2692 tab[j].nlen = p->hlength; 2693 tab[j].func = p->hvalue; 2694 j++; 2695 } 2696 } 2697 } 2698 2699 assert(j == func_count); 2700 2701 /* Shazzam! */ 2702 qsort(tab, func_count, sizeof(struct finfo), fcompare); 2703 2704 for (i = 0; i < j; i++) 2705 shadow |= parms_shadow(tab[i].name, tab[i].func); 2706 2707 free(tab); 2708 2709 /* End with fatal if the user requested it. */ 2710 if (shadow && lintfunc != warning) 2711 lintwarn(_("there were shadowed variables.")); 2712} 2713 2714/* 2715 * append_right: 2716 * Add new to the rightmost branch of LIST. This uses n^2 time, so we make 2717 * a simple attempt at optimizing it. 2718 */ 2719 2720static NODE * 2721append_right(NODE *list, NODE *new) 2722{ 2723 register NODE *oldlist; 2724 static NODE *savefront = NULL, *savetail = NULL; 2725 2726 if (list == NULL || new == NULL) 2727 return list; 2728 2729 oldlist = list; 2730 if (savefront == oldlist) 2731 list = savetail; /* Be careful: maybe list->rnode != NULL */ 2732 else 2733 savefront = oldlist; 2734 2735 while (list->rnode != NULL) 2736 list = list->rnode; 2737 savetail = list->rnode = new; 2738 return oldlist; 2739} 2740 2741/* 2742 * append_pattern: 2743 * A wrapper around append_right, used for rule lists. 2744 */ 2745static inline NODE * 2746append_pattern(NODE **list, NODE *patt) 2747{ 2748 NODE *n = node(patt, Node_rule_node, (NODE *) NULL); 2749 2750 if (*list == NULL) 2751 *list = n; 2752 else { 2753 NODE *n1 = node(n, Node_rule_list, (NODE *) NULL); 2754 if ((*list)->type != Node_rule_list) 2755 *list = node(*list, Node_rule_list, n1); 2756 else 2757 (void) append_right(*list, n1); 2758 } 2759 return n; 2760} 2761 2762/* 2763 * func_install: 2764 * check if name is already installed; if so, it had better have Null value, 2765 * in which case def is added as the value. Otherwise, install name with def 2766 * as value. 2767 * 2768 * Extra work, build up and save a list of the parameter names in a table 2769 * and hang it off params->parmlist. This is used to set the `vname' field 2770 * of each function parameter during a function call. See eval.c. 2771 */ 2772 2773static void 2774func_install(NODE *params, NODE *def) 2775{ 2776 NODE *r, *n, *thisfunc; 2777 char **pnames, *names, *sp; 2778 size_t pcount = 0, space = 0; 2779 int i; 2780 2781 /* check for function foo(foo) { ... }. bleah. */ 2782 for (n = params->rnode; n != NULL; n = n->rnode) { 2783 if (strcmp(n->param, params->param) == 0) 2784 fatal(_("function `%s': can't use function name as parameter name"), 2785 params->param); 2786 } 2787 2788 thisfunc = NULL; /* turn off warnings */ 2789 2790 /* symbol table managment */ 2791 pop_var(params, FALSE); 2792 r = lookup(params->param); 2793 if (r != NULL) { 2794 fatal(_("function name `%s' previously defined"), params->param); 2795 } else if (params->param == builtin_func) /* not a valid function name */ 2796 goto remove_params; 2797 2798 /* install the function */ 2799 thisfunc = node(params, Node_func, def); 2800 (void) install(params->param, thisfunc); 2801 2802 /* figure out amount of space to allocate for variable names */ 2803 for (n = params->rnode; n != NULL; n = n->rnode) { 2804 pcount++; 2805 space += strlen(n->param) + 1; 2806 } 2807 2808 /* allocate it and fill it in */ 2809 if (pcount != 0) { 2810 emalloc(names, char *, space, "func_install"); 2811 emalloc(pnames, char **, pcount * sizeof(char *), "func_install"); 2812 sp = names; 2813 for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) { 2814 pnames[i] = sp; 2815 strcpy(sp, n->param); 2816 sp += strlen(n->param) + 1; 2817 } 2818 thisfunc->parmlist = pnames; 2819 } else { 2820 thisfunc->parmlist = NULL; 2821 } 2822 2823 /* update lint table info */ 2824 func_use(params->param, FUNC_DEFINE); 2825 2826 func_count++; /* used by profiling / pretty printer */ 2827 2828remove_params: 2829 /* remove params from symbol table */ 2830 pop_params(params->rnode); 2831} 2832 2833/* pop_var --- remove a variable from the symbol table */ 2834 2835static void 2836pop_var(NODE *np, int freeit) 2837{ 2838 register NODE *bucket, **save; 2839 register size_t len; 2840 char *name; 2841 2842 name = np->param; 2843 len = strlen(name); 2844 save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]); 2845 for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { 2846 if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { 2847 var_count--; 2848 *save = bucket->hnext; 2849 freenode(bucket); 2850 if (freeit) 2851 free(np->param); 2852 return; 2853 } 2854 save = &(bucket->hnext); 2855 } 2856} 2857 2858/* pop_params --- remove list of function parameters from symbol table */ 2859 2860/* 2861 * pop parameters out of the symbol table. do this in reverse order to 2862 * avoid reading freed memory if there were duplicated parameters. 2863 */ 2864static void 2865pop_params(NODE *params) 2866{ 2867 if (params == NULL) 2868 return; 2869 pop_params(params->rnode); 2870 pop_var(params, TRUE); 2871} 2872 2873/* make_param --- make NAME into a function parameter */ 2874 2875static NODE * 2876make_param(char *name) 2877{ 2878 NODE *r; 2879 2880 getnode(r); 2881 r->type = Node_param_list; 2882 r->rnode = NULL; 2883 r->param = name; 2884 r->param_cnt = param_counter++; 2885 return (install(name, r)); 2886} 2887 2888static struct fdesc { 2889 char *name; 2890 short used; 2891 short defined; 2892 struct fdesc *next; 2893} *ftable[HASHSIZE]; 2894 2895/* func_use --- track uses and definitions of functions */ 2896 2897static void 2898func_use(const char *name, enum defref how) 2899{ 2900 struct fdesc *fp; 2901 int len; 2902 int ind; 2903 2904 len = strlen(name); 2905 ind = hash(name, len, HASHSIZE); 2906 2907 for (fp = ftable[ind]; fp != NULL; fp = fp->next) { 2908 if (strcmp(fp->name, name) == 0) { 2909 if (how == FUNC_DEFINE) 2910 fp->defined++; 2911 else 2912 fp->used++; 2913 return; 2914 } 2915 } 2916 2917 /* not in the table, fall through to allocate a new one */ 2918 2919 emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); 2920 memset(fp, '\0', sizeof(struct fdesc)); 2921 emalloc(fp->name, char *, len + 1, "func_use"); 2922 strcpy(fp->name, name); 2923 if (how == FUNC_DEFINE) 2924 fp->defined++; 2925 else 2926 fp->used++; 2927 fp->next = ftable[ind]; 2928 ftable[ind] = fp; 2929} 2930 2931/* check_funcs --- verify functions that are called but not defined */ 2932 2933static void 2934check_funcs() 2935{ 2936 struct fdesc *fp, *next; 2937 int i; 2938 2939 for (i = 0; i < HASHSIZE; i++) { 2940 for (fp = ftable[i]; fp != NULL; fp = fp->next) { 2941#ifdef REALLYMEAN 2942 /* making this the default breaks old code. sigh. */ 2943 if (fp->defined == 0) { 2944 error( 2945 _("function `%s' called but never defined"), fp->name); 2946 errcount++; 2947 } 2948#else 2949 if (do_lint && fp->defined == 0) 2950 lintwarn( 2951 _("function `%s' called but never defined"), fp->name); 2952#endif 2953 if (do_lint && fp->used == 0) { 2954 lintwarn(_("function `%s' defined but never called"), 2955 fp->name); 2956 } 2957 } 2958 } 2959 2960 /* now let's free all the memory */ 2961 for (i = 0; i < HASHSIZE; i++) { 2962 for (fp = ftable[i]; fp != NULL; fp = next) { 2963 next = fp->next; 2964 free(fp->name); 2965 free(fp); 2966 } 2967 } 2968} 2969 2970/* param_sanity --- look for parameters that are regexp constants */ 2971 2972static void 2973param_sanity(NODE *arglist) 2974{ 2975 NODE *argp, *arg; 2976 int i; 2977 2978 for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { 2979 arg = argp->lnode; 2980 if (arg->type == Node_regex) 2981 warning(_("regexp constant for parameter #%d yields boolean value"), i); 2982 } 2983} 2984 2985/* variable --- make sure NAME is in the symbol table */ 2986 2987NODE * 2988variable(char *name, int can_free, NODETYPE type) 2989{ 2990 register NODE *r; 2991 2992 if ((r = lookup(name)) != NULL) { 2993 if (r->type == Node_func) 2994 fatal(_("function `%s' called with space between name and `(',\n%s"), 2995 r->vname, 2996 _("or used as a variable or an array")); 2997 } else { 2998 /* not found */ 2999 if (! do_traditional && STREQ(name, "PROCINFO")) 3000 r = load_procinfo(); 3001 else if (STREQ(name, "ENVIRON")) 3002 r = load_environ(); 3003 else { 3004 /* 3005 * This is the only case in which we may not free the string. 3006 */ 3007 NODE *n; 3008 3009 if (type == Node_var) 3010 n = node(Nnull_string, type, (NODE *) NULL); 3011 else 3012 n = node((NODE *) NULL, type, (NODE *) NULL); 3013 3014 return install(name, n); 3015 } 3016 } 3017 if (can_free) 3018 free(name); 3019 return r; 3020} 3021 3022/* mk_rexp --- make a regular expression constant */ 3023 3024static NODE * 3025mk_rexp(NODE *exp) 3026{ 3027 NODE *n; 3028 3029 if (exp->type == Node_regex) 3030 return exp; 3031 3032 getnode(n); 3033 n->type = Node_dynregex; 3034 n->re_exp = exp; 3035 n->re_text = NULL; 3036 n->re_reg = NULL; 3037 n->re_flags = 0; 3038 return n; 3039} 3040 3041/* isnoeffect --- when used as a statement, has no side effects */ 3042 3043/* 3044 * To be completely general, we should recursively walk the parse 3045 * tree, to make sure that all the subexpressions also have no effect. 3046 * Instead, we just weaken the actual warning that's printed, up above 3047 * in the grammar. 3048 */ 3049 3050static int 3051isnoeffect(NODETYPE type) 3052{ 3053 switch (type) { 3054 case Node_times: 3055 case Node_quotient: 3056 case Node_mod: 3057 case Node_plus: 3058 case Node_minus: 3059 case Node_subscript: 3060 case Node_concat: 3061 case Node_exp: 3062 case Node_unary_minus: 3063 case Node_field_spec: 3064 case Node_and: 3065 case Node_or: 3066 case Node_equal: 3067 case Node_notequal: 3068 case Node_less: 3069 case Node_greater: 3070 case Node_leq: 3071 case Node_geq: 3072 case Node_match: 3073 case Node_nomatch: 3074 case Node_not: 3075 case Node_val: 3076 case Node_in_array: 3077 case Node_NF: 3078 case Node_NR: 3079 case Node_FNR: 3080 case Node_FS: 3081 case Node_RS: 3082 case Node_FIELDWIDTHS: 3083 case Node_IGNORECASE: 3084 case Node_OFS: 3085 case Node_ORS: 3086 case Node_OFMT: 3087 case Node_CONVFMT: 3088 case Node_BINMODE: 3089 case Node_LINT: 3090 case Node_TEXTDOMAIN: 3091 return TRUE; 3092 default: 3093 break; /* keeps gcc -Wall happy */ 3094 } 3095 3096 return FALSE; 3097} 3098 3099/* isassignable --- can this node be assigned to? */ 3100 3101static int 3102isassignable(register NODE *n) 3103{ 3104 switch (n->type) { 3105 case Node_var_new: 3106 case Node_var: 3107 case Node_FIELDWIDTHS: 3108 case Node_RS: 3109 case Node_FS: 3110 case Node_FNR: 3111 case Node_NR: 3112 case Node_NF: 3113 case Node_IGNORECASE: 3114 case Node_OFMT: 3115 case Node_CONVFMT: 3116 case Node_ORS: 3117 case Node_OFS: 3118 case Node_LINT: 3119 case Node_BINMODE: 3120 case Node_TEXTDOMAIN: 3121 case Node_field_spec: 3122 case Node_subscript: 3123 return TRUE; 3124 case Node_param_list: 3125 return ((n->flags & FUNC) == 0); /* ok if not func name */ 3126 default: 3127 break; /* keeps gcc -Wall happy */ 3128 } 3129 return FALSE; 3130} 3131 3132/* stopme --- for debugging */ 3133 3134NODE * 3135stopme(NODE *tree ATTRIBUTE_UNUSED) 3136{ 3137 return 0; 3138} 3139 3140/* dumpintlstr --- write out an initial .po file entry for the string */ 3141 3142static void 3143dumpintlstr(const char *str, size_t len) 3144{ 3145 char *cp; 3146 3147 /* See the GNU gettext distribution for details on the file format */ 3148 3149 if (source != NULL) { 3150 /* ala the gettext sources, remove leading `./'s */ 3151 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) 3152 continue; 3153 printf("#: %s:%d\n", cp, sourceline); 3154 } 3155 3156 printf("msgid "); 3157 pp_string_fp(stdout, str, len, '"', TRUE); 3158 putchar('\n'); 3159 printf("msgstr \"\"\n\n"); 3160 fflush(stdout); 3161} 3162 3163/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */ 3164 3165static void 3166dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2) 3167{ 3168 char *cp; 3169 3170 /* See the GNU gettext distribution for details on the file format */ 3171 3172 if (source != NULL) { 3173 /* ala the gettext sources, remove leading `./'s */ 3174 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) 3175 continue; 3176 printf("#: %s:%d\n", cp, sourceline); 3177 } 3178 3179 printf("msgid "); 3180 pp_string_fp(stdout, str1, len1, '"', TRUE); 3181 putchar('\n'); 3182 printf("msgid_plural "); 3183 pp_string_fp(stdout, str2, len2, '"', TRUE); 3184 putchar('\n'); 3185 printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n"); 3186 fflush(stdout); 3187} 3188 3189/* count_args --- count the number of printf arguments */ 3190 3191static void 3192count_args(NODE *tree) 3193{ 3194 size_t count = 0; 3195 NODE *save_tree; 3196 3197 assert(tree->type == Node_K_printf 3198 || (tree->type == Node_builtin && tree->builtin == do_sprintf)); 3199 save_tree = tree; 3200 3201 tree = tree->lnode; /* printf format string */ 3202 3203 for (count = 0; tree != NULL; tree = tree->rnode) 3204 count++; 3205 3206 save_tree->printf_count = count; 3207} 3208 3209/* isarray --- can this type be subscripted? */ 3210 3211static int 3212isarray(NODE *n) 3213{ 3214 switch (n->type) { 3215 case Node_var_new: 3216 case Node_var_array: 3217 return TRUE; 3218 case Node_param_list: 3219 return ((n->flags & FUNC) == 0); 3220 case Node_array_ref: 3221 cant_happen(); 3222 break; 3223 default: 3224 break; /* keeps gcc -Wall happy */ 3225 } 3226 3227 return FALSE; 3228} 3229