1/* 2 * main.c -- Expression tree constructors and main program for gawk. 3 */ 4 5/* 6 * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc. 7 * 8 * This file is part of GAWK, the GNU implementation of the 9 * AWK Programming Language. 10 * 11 * GAWK is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * GAWK is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 24 */ 25 26#include "awk.h" 27#include "getopt.h" 28#ifdef TANDEM 29#include "ptchlvl.h" /* blech */ 30#else 31#include "patchlev.h" 32#endif 33 34#ifndef O_BINARY 35#include <fcntl.h> 36#endif 37 38#ifdef HAVE_MCHECK_H 39#include <mcheck.h> 40#endif 41 42#define DEFAULT_PROFILE "awkprof.out" /* where to put profile */ 43#define DEFAULT_VARFILE "awkvars.out" /* where to put vars */ 44 45static const char *varfile = DEFAULT_VARFILE; 46 47static void usage P((int exitval, FILE *fp)) ATTRIBUTE_NORETURN; 48static void copyleft P((void)) ATTRIBUTE_NORETURN; 49static void cmdline_fs P((char *str)); 50static void init_args P((int argc0, int argc, char *argv0, char **argv)); 51static void init_vars P((void)); 52static void add_src P((struct src **data, long *num, long *alloc, enum srctype stype, char *val)); 53static RETSIGTYPE catchsig P((int sig)) ATTRIBUTE_NORETURN; 54static void nostalgia P((void)) ATTRIBUTE_NORETURN; 55static void version P((void)) ATTRIBUTE_NORETURN; 56static void init_fds P((void)); 57static void init_groupset P((void)); 58 59/* These nodes store all the special variables AWK uses */ 60NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node; 61NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node; 62NODE *FS_node, *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node; 63NODE *ORS_node, *PROCINFO_node, *RLENGTH_node, *RSTART_node, *RS_node; 64NODE *RT_node, *SUBSEP_node, *LINT_node, *TEXTDOMAIN_node; 65 66long NF; 67long NR; 68long FNR; 69int BINMODE; 70int IGNORECASE; 71char *OFS; 72char *ORS; 73char *OFMT; 74char *TEXTDOMAIN; 75int MRL; /* See -mr option for use of this variable */ 76 77/* 78 * CONVFMT is a convenience pointer for the current number to string format. 79 * We must supply an initial value to avoid recursion problems of 80 * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT 81 * Fun, fun, fun, fun. 82 */ 83char *CONVFMT = "%.6g"; 84 85 86int errcount = 0; /* error counter, used by yyerror() */ 87 88NODE *Nnull_string; /* The global null string */ 89 90/* The name the program was invoked under, for error messages */ 91const char *myname; 92 93/* A block of AWK code to be run before running the program */ 94NODE *begin_block = NULL; 95 96/* A block of AWK code to be run after the last input file */ 97NODE *end_block = NULL; 98 99int exiting = FALSE; /* Was an "exit" statement executed? */ 100int exit_val = 0; /* optional exit value */ 101 102#if defined(YYDEBUG) || defined(GAWKDEBUG) 103extern int yydebug; 104#endif 105 106struct src *srcfiles = NULL; /* source file name(s) */ 107long numfiles = -1; /* how many source files */ 108static long allocfiles; /* for how many is *srcfiles allocated */ 109 110#define srcfiles_add(stype, val) \ 111 add_src(&srcfiles, &numfiles, &allocfiles, stype, val) 112 113static struct src *preassigns = NULL; /* requested via -v or -F */ 114static long numassigns = -1; /* how many of them */ 115static long allocassigns; /* for how many is allocated */ 116 117#define preassigns_add(stype, val) \ 118 add_src(&preassigns, &numassigns, &allocassigns, stype, val) 119 120#undef do_lint 121#undef do_lint_old 122 123int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */ 124int do_posix = FALSE; /* turn off gnu and unix extensions */ 125int do_lint = FALSE; /* provide warnings about questionable stuff */ 126int do_lint_old = FALSE; /* warn about stuff not in V7 awk */ 127int do_intl = FALSE; /* dump locale-izable strings to stdout */ 128int do_non_decimal_data = FALSE; /* allow octal/hex C style DATA. Use with caution! */ 129int do_nostalgia = FALSE; /* provide a blast from the past */ 130int do_intervals = FALSE; /* allow {...,...} in regexps */ 131int do_profiling = FALSE; /* profile and pretty print the program */ 132int do_dump_vars = FALSE; /* dump all global variables at end */ 133int do_tidy_mem = FALSE; /* release vars when done */ 134 135int in_begin_rule = FALSE; /* we're in a BEGIN rule */ 136int in_end_rule = FALSE; /* we're in a END rule */ 137int whiny_users = FALSE; /* do things that whiny users want */ 138#ifdef MBS_SUPPORT 139int gawk_mb_cur_max = 1; /* MB_CUR_MAX value, see comment in main() */ 140#endif 141 142int output_is_tty = FALSE; /* control flushing of output */ 143 144extern const char *version_string; /* current version, for printing */ 145 146#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 147GETGROUPS_T *groupset; /* current group set */ 148int ngroups; /* size of said set */ 149#endif 150 151/* The parse tree is stored here. */ 152NODE *expression_value; 153 154#if _MSC_VER == 510 155void (*lintfunc) P((va_list va_alist, ...)) = warning; 156#else 157#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ 158void (*lintfunc) P((const char *mesg, ...)) = warning; 159#else 160void (*lintfunc) () = warning; 161#endif 162#endif 163 164static const struct option optab[] = { 165 { "compat", no_argument, & do_traditional, 1 }, 166 { "traditional", no_argument, & do_traditional, 1 }, 167 { "lint", optional_argument, NULL, 'l' }, 168 { "lint-old", no_argument, & do_lint_old, 1 }, 169 { "posix", no_argument, & do_posix, 1 }, 170 { "nostalgia", no_argument, & do_nostalgia, 1 }, 171 { "gen-po", no_argument, & do_intl, 1 }, 172 { "non-decimal-data", no_argument, & do_non_decimal_data, 1 }, 173 { "profile", optional_argument, NULL, 'p' }, 174 { "copyleft", no_argument, NULL, 'C' }, 175 { "copyright", no_argument, NULL, 'C' }, 176 { "field-separator", required_argument, NULL, 'F' }, 177 { "file", required_argument, NULL, 'f' }, 178 { "re-interval", no_argument, & do_intervals, 1 }, 179 { "source", required_argument, NULL, 's' }, 180 { "dump-variables", optional_argument, NULL, 'd' }, 181 { "assign", required_argument, NULL, 'v' }, 182 { "version", no_argument, NULL, 'V' }, 183 { "usage", no_argument, NULL, 'u' }, 184 { "help", no_argument, NULL, 'u' }, 185#ifdef GAWKDEBUG 186 { "parsedebug", no_argument, NULL, 'D' }, 187#endif 188 { NULL, 0, NULL, '\0' } 189}; 190 191#ifdef NO_LINT 192#define do_lint 0 193#define do_lint_old 0 194#endif 195 196/* main --- process args, parse program, run it, clean up */ 197 198int 199main(int argc, char **argv) 200{ 201 int c; 202 char *scan; 203 /* the + on the front tells GNU getopt not to rearrange argv */ 204 const char *optlist = "+F:f:v:W;m:D"; 205 int stopped_early = FALSE; 206 int old_optind; 207 extern int optind; 208 extern int opterr; 209 extern char *optarg; 210 int i; 211 212 /* do these checks early */ 213 if (getenv("TIDYMEM") != NULL) 214 do_tidy_mem = TRUE; 215 216 if (getenv("WHINY_USERS") != NULL) 217 whiny_users = TRUE; 218 219#ifdef HAVE_MCHECK_H 220 if (do_tidy_mem) 221 mtrace(); 222#endif /* HAVE_MCHECK_H */ 223 224#if defined(LC_CTYPE) 225 setlocale(LC_CTYPE, ""); 226#endif 227#if defined(LC_COLLATE) 228 setlocale(LC_COLLATE, ""); 229#endif 230#if HAVE_LC_MESSAGES && defined(LC_MESSAGES) 231 setlocale(LC_MESSAGES, ""); 232#endif 233#if defined(LC_NUMERIC) 234 /* 235 * Force the issue here. According to POSIX 2001, decimal 236 * point is used for parsing source code and for command-line 237 * assignments and the locale value for processing input, 238 * number to string conversion, and printing output. 239 */ 240 setlocale(LC_NUMERIC, "C"); 241#endif 242#if defined(LC_TIME) 243 setlocale(LC_TIME, ""); 244#endif 245 246#ifdef MBS_SUPPORT 247 /* 248 * In glibc, MB_CUR_MAX is actually a function. This value is 249 * tested *a lot* in many speed-critical places in gawk. Caching 250 * this value once makes a speed difference. 251 */ 252 gawk_mb_cur_max = MB_CUR_MAX; 253#endif 254 255 bindtextdomain(PACKAGE, LOCALEDIR); 256 textdomain(PACKAGE); 257 258 (void) signal(SIGFPE, catchsig); 259 (void) signal(SIGSEGV, catchsig); 260#ifdef SIGBUS 261 (void) signal(SIGBUS, catchsig); 262#endif 263 264 myname = gawk_name(argv[0]); 265 argv[0] = (char *) myname; 266 os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ 267 268 /* remove sccs gunk */ 269 if (strncmp(version_string, "@(#)", 4) == 0) 270 version_string += 4; 271 272 if (argc < 2) 273 usage(1, stderr); 274 275 /* Robustness: check that file descriptors 0, 1, 2 are open */ 276 init_fds(); 277 278 /* init array handling. */ 279 array_init(); 280 281 /* we do error messages ourselves on invalid options */ 282 opterr = FALSE; 283 284 /* option processing. ready, set, go! */ 285 for (optopt = 0, old_optind = 1; 286 (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF; 287 optopt = 0, old_optind = optind) { 288 if (do_posix) 289 opterr = TRUE; 290 291 switch (c) { 292 case 'F': 293 preassigns_add(PRE_ASSIGN_FS, optarg); 294 break; 295 296 case 'f': 297 /* 298 * a la MKS awk, allow multiple -f options. 299 * this makes function libraries real easy. 300 * most of the magic is in the scanner. 301 * 302 * The following is to allow for whitespace at the end 303 * of a #! /bin/gawk line in an executable file 304 */ 305 scan = optarg; 306 if (argv[optind-1] != optarg) 307 while (ISSPACE(*scan)) 308 scan++; 309 srcfiles_add(SOURCEFILE, 310 (*scan == '\0' ? argv[optind++] : optarg)); 311 break; 312 313 case 'v': 314 preassigns_add(PRE_ASSIGN, optarg); 315 break; 316 317 case 'm': 318 /* 319 * Research awk extension. 320 * -mf nnn set # fields, gawk ignores 321 * -mr nnn set record length, ditto 322 */ 323 if (do_lint) 324 lintwarn(_("`-m[fr]' option irrelevant in gawk")); 325 if (optarg[0] != 'r' && optarg[0] != 'f') 326 warning(_("-m option usage: `-m[fr] nnn'")); 327 /* 328 * Set fixed length records for Tandem, 329 * ignored on other platforms (see io.c:get_a_record). 330 */ 331 if (optarg[0] == 'r') { 332 if (ISDIGIT(optarg[1])) 333 MRL = atoi(optarg+1); 334 else { 335 MRL = atoi(argv[optind]); 336 optind++; 337 } 338 } else if (optarg[1] == '\0') 339 optind++; 340 break; 341 342 case 'W': /* gawk specific options - now in getopt_long */ 343 fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"), 344 argv[0], optarg); 345 break; 346 347 /* These can only come from long form options */ 348 case 'C': 349 copyleft(); 350 break; 351 352 case 'd': 353 do_dump_vars = TRUE; 354 if (optarg != NULL && optarg[0] != '\0') 355 varfile = optarg; 356 break; 357 358 case 'l': 359#ifndef NO_LINT 360 do_lint = LINT_ALL; 361 if (optarg != NULL) { 362 if (strcmp(optarg, "fatal") == 0) 363 lintfunc = r_fatal; 364 else if (strcmp(optarg, "invalid") == 0) 365 do_lint = LINT_INVALID; 366 } 367#endif 368 break; 369 370 case 'p': 371 do_profiling = TRUE; 372 if (optarg != NULL) 373 set_prof_file(optarg); 374 else 375 set_prof_file(DEFAULT_PROFILE); 376 break; 377 378 case 's': 379 if (optarg[0] == '\0') 380 warning(_("empty argument to `--source' ignored")); 381 else 382 srcfiles_add(CMDLINE, optarg); 383 break; 384 385 case 'u': 386 usage(0, stdout); /* per coding stds */ 387 break; 388 389 case 'V': 390 version(); 391 break; 392 393 case 0: 394 /* 395 * getopt_long found an option that sets a variable 396 * instead of returning a letter. Do nothing, just 397 * cycle around for the next one. 398 */ 399 break; 400 401 case 'D': 402#ifdef GAWKDEBUG 403 yydebug = 2; 404 break; 405#endif 406 /* if not debugging, fall through */ 407 408 case '?': 409 default: 410 /* 411 * New behavior. If not posix, an unrecognized 412 * option stops argument processing so that it can 413 * go into ARGV for the awk program to see. This 414 * makes use of ``#! /bin/gawk -f'' easier. 415 * 416 * However, it's never simple. If optopt is set, 417 * an option that requires an argument didn't get the 418 * argument. We care because if opterr is 0, then 419 * getopt_long won't print the error message for us. 420 */ 421 if (! do_posix 422 && (optopt == '\0' || strchr(optlist, optopt) == NULL)) { 423 /* 424 * can't just do optind--. In case of an 425 * option with >= 2 letters, getopt_long 426 * won't have incremented optind. 427 */ 428 optind = old_optind; 429 stopped_early = TRUE; 430 goto out; 431 } else if (optopt != '\0') 432 /* Use 1003.2 required message format */ 433 fprintf(stderr, 434 _("%s: option requires an argument -- %c\n"), 435 myname, optopt); 436 /* else 437 let getopt print error message for us */ 438 break; 439 } 440 } 441out: 442 443 if (do_nostalgia) 444 nostalgia(); 445 446 /* check for POSIXLY_CORRECT environment variable */ 447 if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) { 448 do_posix = TRUE; 449 if (do_lint) 450 lintwarn( 451 _("environment variable `POSIXLY_CORRECT' set: turning on `--posix'")); 452 } 453 454 if (do_posix) { 455 if (do_traditional) /* both on command line */ 456 warning(_("`--posix' overrides `--traditional'")); 457 else 458 do_traditional = TRUE; 459 /* 460 * POSIX compliance also implies 461 * no GNU extensions either. 462 */ 463 } 464 465 if (do_traditional && do_non_decimal_data) { 466 do_non_decimal_data = FALSE; 467 warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'")); 468 } 469 470 if (do_lint && os_is_setuid()) 471 warning(_("running %s setuid root may be a security problem"), myname); 472 473 /* 474 * Force profiling if this is pgawk. 475 * Don't bother if the command line already set profiling up. 476 */ 477 if (! do_profiling) 478 init_profiling(& do_profiling, DEFAULT_PROFILE); 479 480 /* load group set */ 481 init_groupset(); 482 483 /* initialize the null string */ 484 Nnull_string = make_string("", 0); 485 Nnull_string->numbr = 0.0; 486 Nnull_string->type = Node_val; 487 Nnull_string->flags = (PERM|STRCUR|STRING|NUMCUR|NUMBER); 488 489 /* 490 * Tell the regex routines how they should work. 491 * Do this before initializing variables, since 492 * they could want to do a regexp compile. 493 */ 494 resetup(); 495 496 /* Set up the special variables */ 497 init_vars(); 498 499 /* Set up the field variables */ 500 init_fields(); 501 502 /* Now process the pre-assignments */ 503 for (i = 0; i <= numassigns; i++) 504 if (preassigns[i].stype == PRE_ASSIGN) 505 (void) arg_assign(preassigns[i].val, TRUE); 506 else /* PRE_ASSIGN_FS */ 507 cmdline_fs(preassigns[i].val); 508 free(preassigns); 509#ifdef O_BINARY 510 if ((BINMODE & 1) != 0) 511 if (os_setbinmode(fileno(stdin), O_BINARY) == -1) 512 fatal(_("can't set binary mode on stdin (%s)"), strerror(errno)); 513 if ((BINMODE & 2) != 0) { 514 if (os_setbinmode(fileno(stdout), O_BINARY) == -1) 515 fatal(_("can't set binary mode on stdout (%s)"), strerror(errno)); 516 if (os_setbinmode(fileno(stderr), O_BINARY) == -1) 517 fatal(_("can't set binary mode on stderr (%s)"), strerror(errno)); 518 } 519#endif 520#ifdef GAWKDEBUG 521 setbuf(stdout, (char *) NULL); /* make debugging easier */ 522#endif 523 if (isatty(fileno(stdout))) 524 output_is_tty = TRUE; 525 /* No -f or --source options, use next arg */ 526 if (numfiles == -1) { 527 if (optind > argc - 1 || stopped_early) /* no args left or no program */ 528 usage(1, stderr); 529 srcfiles_add(CMDLINE, argv[optind]); 530 optind++; 531 } 532 533 init_args(optind, argc, (char *) myname, argv); 534 (void) tokexpand(); 535 536 /* Read in the program */ 537 if (yyparse() != 0 || errcount != 0) 538 exit(1); 539 540 free(srcfiles); 541 542 if (do_intl) 543 exit(0); 544 545 if (do_lint && begin_block == NULL && expression_value == NULL 546 && end_block == NULL) 547 lintwarn(_("no program text at all!")); 548 549 if (do_lint) 550 shadow_funcs(); 551 552 init_profiling_signals(); 553 554#if defined(LC_NUMERIC) 555 /* See comment above. */ 556 setlocale(LC_NUMERIC, ""); 557#endif 558 559 /* Whew. Finally, run the program. */ 560 if (begin_block != NULL) { 561 in_begin_rule = TRUE; 562 (void) interpret(begin_block); 563 } 564 in_begin_rule = FALSE; 565 if (! exiting && (expression_value != NULL || end_block != NULL)) 566 do_input(); 567 if (end_block != NULL) { 568 in_end_rule = TRUE; 569 (void) interpret(end_block); 570 } 571 in_end_rule = FALSE; 572 if (close_io() != 0 && exit_val == 0) 573 exit_val = 1; 574 575 if (do_profiling) { 576 dump_prog(begin_block, expression_value, end_block); 577 dump_funcs(); 578 } 579 580 if (do_dump_vars) 581 dump_vars(varfile); 582 583 if (do_tidy_mem) 584 release_all_vars(); 585 586 exit(exit_val); /* more portable */ 587 return exit_val; /* to suppress warnings */ 588} 589 590/* add_src --- add one element to *srcfiles or *preassigns */ 591 592static void 593add_src(struct src **data, long *num, long *alloc, enum srctype stype, char *val) 594{ 595#define INIT_SRC 4 596 597 ++*num; 598 599 if (*data == NULL) { 600 emalloc(*data, struct src *, INIT_SRC * sizeof(struct src), "add_src"); 601 *alloc = INIT_SRC; 602 } else if (*num >= *alloc) { 603 (*alloc) *= 2; 604 erealloc(*data, struct src *, (*alloc) * sizeof(struct src), "add_src"); 605 } 606 607 (*data)[*num].stype = stype; 608 (*data)[*num].val = val; 609 610#undef INIT_SRC 611} 612 613/* usage --- print usage information and exit */ 614 615static void 616usage(int exitval, FILE *fp) 617{ 618 619 /* Not factoring out common stuff makes it easier to translate. */ 620 fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"), 621 myname); 622 fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"), 623 myname, quote, quote); 624 625 /* GNU long options info. This is too many options. */ 626 627 fputs(_("POSIX options:\t\tGNU long options:\n"), fp); 628 fputs(_("\t-f progfile\t\t--file=progfile\n"), fp); 629 fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp); 630 fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp); 631 fputs(_("\t-m[fr] val\n"), fp); 632 fputs(_("\t-W compat\t\t--compat\n"), fp); 633 fputs(_("\t-W copyleft\t\t--copyleft\n"), fp); 634 fputs(_("\t-W copyright\t\t--copyright\n"), fp); 635 fputs(_("\t-W dump-variables[=file]\t--dump-variables[=file]\n"), fp); 636 fputs(_("\t-W gen-po\t\t--gen-po\n"), fp); 637 fputs(_("\t-W help\t\t\t--help\n"), fp); 638 fputs(_("\t-W lint[=fatal]\t\t--lint[=fatal]\n"), fp); 639 fputs(_("\t-W lint-old\t\t--lint-old\n"), fp); 640 fputs(_("\t-W non-decimal-data\t--non-decimal-data\n"), fp); 641#ifdef NOSTALGIA 642 fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp); 643#endif 644#ifdef GAWKDEBUG 645 fputs(_("\t-W parsedebug\t\t--parsedebug\n"), fp); 646#endif 647 fputs(_("\t-W profile[=file]\t--profile[=file]\n"), fp); 648 fputs(_("\t-W posix\t\t--posix\n"), fp); 649 fputs(_("\t-W re-interval\t\t--re-interval\n"), fp); 650 fputs(_("\t-W source=program-text\t--source=program-text\n"), fp); 651 fputs(_("\t-W traditional\t\t--traditional\n"), fp); 652 fputs(_("\t-W usage\t\t--usage\n"), fp); 653 fputs(_("\t-W version\t\t--version\n"), fp); 654 655 656 /* This is one string to make things easier on translators. */ 657 fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info', which is\n\ 658section `Reporting Problems and Bugs' in the printed version.\n\n"), fp); 659 660 /* ditto */ 661 fputs(_("gawk is a pattern scanning and processing language.\n\ 662By default it reads standard input and writes standard output.\n\n"), fp); 663 664 /* ditto */ 665 fputs(_("Examples:\n\tgawk '{ sum += $1 }; END { print sum }' file\n\ 666\tgawk -F: '{ print $1 }' /etc/passwd\n"), fp); 667 668 fflush(fp); 669 670 if (ferror(fp)) 671 exit(1); 672 673 exit(exitval); 674} 675 676/* copyleft --- print out the short GNU copyright information */ 677 678static void 679copyleft() 680{ 681 static const char blurb_part1[] = 682 N_("Copyright (C) 1989, 1991-%d Free Software Foundation.\n\ 683\n\ 684This program is free software; you can redistribute it and/or modify\n\ 685it under the terms of the GNU General Public License as published by\n\ 686the Free Software Foundation; either version 2 of the License, or\n\ 687(at your option) any later version.\n\ 688\n"); 689 static const char blurb_part2[] = 690 N_("This program is distributed in the hope that it will be useful,\n\ 691but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ 692MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ 693GNU General Public License for more details.\n\ 694\n"); 695 static const char blurb_part3[] = 696 N_("You should have received a copy of the GNU General Public License\n\ 697along with this program; if not, write to the Free Software\n\ 698Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"); 699 700 /* multiple blurbs are needed for some brain dead compilers. */ 701 printf(_(blurb_part1), 2003); /* Last update year */ 702 fputs(_(blurb_part2), stdout); 703 fputs(_(blurb_part3), stdout); 704 fflush(stdout); 705 706 if (ferror(stdout)) 707 exit(1); 708 709 exit(0); 710} 711 712/* cmdline_fs --- set FS from the command line */ 713 714static void 715cmdline_fs(char *str) 716{ 717 register NODE **tmp; 718 719 tmp = get_lhs(FS_node, (Func_ptr *) 0, FALSE); 720 unref(*tmp); 721 /* 722 * Only if in full compatibility mode check for the stupid special 723 * case so -F\t works as documented in awk book even though the shell 724 * hands us -Ft. Bleah! 725 * 726 * Thankfully, Posix didn't propagate this "feature". 727 */ 728 if (str[0] == 't' && str[1] == '\0') { 729 if (do_lint) 730 lintwarn(_("-Ft does not set FS to tab in POSIX awk")); 731 if (do_traditional && ! do_posix) 732 str[0] = '\t'; 733 } 734 *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */ 735 set_FS(); 736} 737 738/* init_args --- set up ARGV from stuff on the command line */ 739 740static void 741init_args(int argc0, int argc, char *argv0, char **argv) 742{ 743 int i, j; 744 NODE **aptr; 745 746 ARGV_node = install("ARGV", node((NODE *) NULL, Node_var_array, (NODE *) NULL)); 747 aptr = assoc_lookup(ARGV_node, tmp_number(0.0), FALSE); 748 *aptr = make_string(argv0, strlen(argv0)); 749 (*aptr)->flags |= MAYBE_NUM; 750 for (i = argc0, j = 1; i < argc; i++) { 751 aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j), FALSE); 752 *aptr = make_string(argv[i], strlen(argv[i])); 753 (*aptr)->flags |= MAYBE_NUM; 754 j++; 755 } 756 ARGC_node = install("ARGC", 757 node(make_number((AWKNUM) j), Node_var, (NODE *) NULL)); 758} 759 760/* 761 * Set all the special variables to their initial values. 762 * Note that some of the variables that have set_FOO routines should 763 * *N*O*T* have those routines called upon initialization, and thus 764 * they have NULL entries in that field. This is notably true of FS 765 * and IGNORECASE. 766 */ 767struct varinit { 768 NODE **spec; 769 const char *name; 770 NODETYPE type; 771 const char *strval; 772 AWKNUM numval; 773 Func_ptr assign; 774}; 775static const struct varinit varinit[] = { 776{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT }, 777{&NF_node, "NF", Node_NF, NULL, -1, NULL }, 778{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL }, 779{&NR_node, "NR", Node_NR, NULL, 0, set_NR }, 780{&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR }, 781{&FS_node, "FS", Node_FS, " ", 0, NULL }, 782{&RS_node, "RS", Node_RS, "\n", 0, set_RS }, 783{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL }, 784{&FILENAME_node, "FILENAME", Node_var, "", 0, NULL }, 785{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS }, 786{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS }, 787{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT }, 788{&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL }, 789{&RSTART_node, "RSTART", Node_var, NULL, 0, NULL }, 790{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL }, 791{&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL }, 792{&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL }, 793{&RT_node, "RT", Node_var, "", 0, NULL }, 794{&BINMODE_node, "BINMODE", Node_BINMODE, NULL, 0, NULL }, 795{&LINT_node, "LINT", Node_LINT, NULL, 0, NULL }, 796{&TEXTDOMAIN_node, "TEXTDOMAIN", Node_TEXTDOMAIN, "messages", 0, set_TEXTDOMAIN }, 797{0, NULL, Node_illegal, NULL, 0, NULL }, 798}; 799 800/* init_vars --- actually initialize everything in the symbol table */ 801 802static void 803init_vars() 804{ 805 register const struct varinit *vp; 806 807 for (vp = varinit; vp->name; vp++) { 808 *(vp->spec) = install((char *) vp->name, 809 node(vp->strval == NULL ? make_number(vp->numval) 810 : make_string((char *) vp->strval, 811 strlen(vp->strval)), 812 vp->type, (NODE *) NULL)); 813 if (vp->assign) 814 (*(vp->assign))(); 815 } 816} 817 818/* load_environ --- populate the ENVIRON array */ 819 820NODE * 821load_environ() 822{ 823#if ! defined(TANDEM) 824#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC)) 825 extern char **environ; 826#endif 827 register char *var, *val; 828 NODE **aptr; 829 register int i; 830#endif /* TANDEM */ 831 832 ENVIRON_node = install("ENVIRON", 833 node((NODE *) NULL, Node_var_array, (NODE *) NULL)); 834#if ! defined(TANDEM) 835 for (i = 0; environ[i] != NULL; i++) { 836 static char nullstr[] = ""; 837 838 var = environ[i]; 839 val = strchr(var, '='); 840 if (val != NULL) 841 *val++ = '\0'; 842 else 843 val = nullstr; 844 aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)), 845 FALSE); 846 *aptr = make_string(val, strlen(val)); 847 (*aptr)->flags |= MAYBE_NUM; 848 849 /* restore '=' so that system() gets a valid environment */ 850 if (val != nullstr) 851 *--val = '='; 852 } 853 /* 854 * Put AWKPATH into ENVIRON if it's not there. 855 * This allows querying it from within awk programs. 856 */ 857 if (getenv("AWKPATH") == NULL) { 858 aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7), FALSE); 859 *aptr = make_string(defpath, strlen(defpath)); 860 } 861#endif /* TANDEM */ 862 return ENVIRON_node; 863} 864 865/* load_procinfo --- populate the PROCINFO array */ 866 867NODE * 868load_procinfo() 869{ 870 int i; 871 NODE **aptr; 872 char name[100]; 873 AWKNUM value; 874 875 PROCINFO_node = install("PROCINFO", 876 node((NODE *) NULL, Node_var_array, (NODE *) NULL)); 877 878#ifdef GETPGRP_VOID 879#define getpgrp_arg() /* nothing */ 880#else 881#define getpgrp_arg() getpid() 882#endif 883 884 value = getpgrp(getpgrp_arg()); 885 aptr = assoc_lookup(PROCINFO_node, tmp_string("pgrpid", 6), FALSE); 886 *aptr = make_number(value); 887 888 /* 889 * could put a lot of this into a table, but then there's 890 * portability problems declaring all the functions. so just 891 * do it the slow and stupid way. sigh. 892 */ 893 894 value = getpid(); 895 aptr = assoc_lookup(PROCINFO_node, tmp_string("pid", 3), FALSE); 896 *aptr = make_number(value); 897 898 value = getppid(); 899 aptr = assoc_lookup(PROCINFO_node, tmp_string("ppid", 4), FALSE); 900 *aptr = make_number(value); 901 902 value = getuid(); 903 aptr = assoc_lookup(PROCINFO_node, tmp_string("uid", 3), FALSE); 904 *aptr = make_number(value); 905 906 value = geteuid(); 907 aptr = assoc_lookup(PROCINFO_node, tmp_string("euid", 4), FALSE); 908 *aptr = make_number(value); 909 910 value = getgid(); 911 aptr = assoc_lookup(PROCINFO_node, tmp_string("gid", 3), FALSE); 912 *aptr = make_number(value); 913 914 value = getegid(); 915 aptr = assoc_lookup(PROCINFO_node, tmp_string("egid", 4), FALSE); 916 *aptr = make_number(value); 917 918 aptr = assoc_lookup(PROCINFO_node, tmp_string("FS", 2), FALSE); 919 *aptr = (using_fieldwidths() ? make_string("FIELDWIDTHS", 11) : 920 make_string("FS", 2) ); 921 922#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 923 for (i = 0; i < ngroups; i++) { 924 sprintf(name, "group%d", i + 1); 925 value = groupset[i]; 926 aptr = assoc_lookup(PROCINFO_node, tmp_string(name, strlen(name)), FALSE); 927 *aptr = make_number(value); 928 } 929 if (groupset) { 930 free(groupset); 931 groupset = NULL; 932 } 933#endif 934 return PROCINFO_node; 935} 936 937/* arg_assign --- process a command-line assignment */ 938 939int 940arg_assign(char *arg, int initing) 941{ 942 char *cp, *cp2; 943 int badvar; 944 Func_ptr after_assign = NULL; 945 NODE *var; 946 NODE *it; 947 NODE **lhs; 948 949 cp = strchr(arg, '='); 950 951 if (cp == NULL) { 952 if (! initing) 953 return FALSE; /* This is file name, not assignment. */ 954 955 fprintf(stderr, 956 _("%s: `%s' argument to `-v' not in `var=value' form\n\n"), 957 myname, arg); 958 usage(1, stderr); 959 } 960 961 *cp++ = '\0'; 962 963 /* first check that the variable name has valid syntax */ 964 badvar = FALSE; 965 if (! ISALPHA(arg[0]) && arg[0] != '_') 966 badvar = TRUE; 967 else 968 for (cp2 = arg+1; *cp2; cp2++) 969 if (! ISALNUM(*cp2) && *cp2 != '_') { 970 badvar = TRUE; 971 break; 972 } 973 974 if (badvar) { 975 if (initing) 976 fatal(_("`%s' is not a legal variable name"), arg); 977 978 if (do_lint) 979 lintwarn(_("`%s' is not a variable name, looking for file `%s=%s'"), 980 arg, arg, cp); 981 } else { 982 /* 983 * Recent versions of nawk expand escapes inside assignments. 984 * This makes sense, so we do it too. 985 */ 986 it = make_str_node(cp, strlen(cp), SCAN); 987 it->flags |= MAYBE_NUM; 988#ifdef LC_NUMERIC 989 setlocale(LC_NUMERIC, "C"); 990 (void) force_number(it); 991 setlocale(LC_NUMERIC, ""); 992#endif /* LC_NUMERIC */ 993 var = variable(arg, FALSE, Node_var); 994 lhs = get_lhs(var, &after_assign, FALSE); 995 unref(*lhs); 996 *lhs = it; 997 if (after_assign != NULL) 998 (*after_assign)(); 999 } 1000 1001 *--cp = '='; /* restore original text of ARGV */ 1002 1003 return ! badvar; 1004} 1005 1006/* catchsig --- catch signals */ 1007 1008static RETSIGTYPE 1009catchsig(int sig) 1010{ 1011 if (sig == SIGFPE) { 1012 fatal(_("floating point exception")); 1013 } else if (sig == SIGSEGV 1014#ifdef SIGBUS 1015 || sig == SIGBUS 1016#endif 1017 ) { 1018 set_loc(__FILE__, __LINE__); 1019 msg(_("fatal error: internal error")); 1020 /* fatal won't abort() if not compiled for debugging */ 1021 abort(); 1022 } else 1023 cant_happen(); 1024 /* NOTREACHED */ 1025} 1026 1027/* nostalgia --- print the famous error message and die */ 1028 1029static void 1030nostalgia() 1031{ 1032 /* 1033 * N.B.: This string is not gettextized, on purpose. 1034 * So there. 1035 */ 1036 fprintf(stderr, "awk: bailing out near line 1\n"); 1037 fflush(stderr); 1038 abort(); 1039} 1040 1041/* version --- print version message */ 1042 1043static void 1044version() 1045{ 1046 printf("%s.%s\n", version_string, PATCHLEVEL); 1047 /* 1048 * Per GNU coding standards, print copyright info, 1049 * then exit successfully, do nothing else. 1050 */ 1051 copyleft(); 1052 exit(0); 1053} 1054 1055/* init_fds --- check for 0, 1, 2, open on /dev/null if possible */ 1056 1057static void 1058init_fds() 1059{ 1060 struct stat sbuf; 1061 int fd; 1062 int newfd; 1063 1064 /* maybe no stderr, don't bother with error mesg */ 1065 for (fd = 0; fd <= 2; fd++) { 1066 if (fstat(fd, &sbuf) < 0) { 1067#if MAKE_A_HEROIC_EFFORT 1068 if (do_lint) 1069 lintwarn(_("no pre-opened fd %d"), fd); 1070#endif 1071 newfd = devopen("/dev/null", "r+"); 1072#ifdef MAKE_A_HEROIC_EFFORT 1073 if (do_lint && newfd < 0) 1074 lintwarn(_("could not pre-open /dev/null for fd %d"), fd); 1075#endif 1076 } 1077 } 1078} 1079 1080/* init_groupset --- initialize groupset */ 1081 1082static void 1083init_groupset() 1084{ 1085#if defined(HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 1086#ifdef GETGROUPS_NOT_STANDARD 1087 /* For systems that aren't standards conformant, use old way. */ 1088 ngroups = NGROUPS_MAX; 1089#else 1090 /* 1091 * If called with 0 for both args, return value is 1092 * total number of groups. 1093 */ 1094 ngroups = getgroups(0, NULL); 1095#endif 1096 if (ngroups == -1) 1097 fatal(_("could not find groups: %s"), strerror(errno)); 1098 else if (ngroups == 0) 1099 return; 1100 1101 /* fill in groups */ 1102 emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset"); 1103 1104 ngroups = getgroups(ngroups, groupset); 1105 if (ngroups == -1) 1106 fatal(_("could not find groups: %s"), strerror(errno)); 1107#endif 1108} 1109