1/*
2 * main.c -- Expression tree constructors and main program for gawk.
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
24 */
25
26#include "awk.h"
27#include "getopt.h"
28#ifdef TANDEM
29#include "ptchlvl.h"	/* blech */
30#else
31#include "patchlev.h"
32#endif
33
34#ifndef O_BINARY
35#include <fcntl.h>
36#endif
37
38#ifdef HAVE_MCHECK_H
39#include <mcheck.h>
40#endif
41
42#define DEFAULT_PROFILE		"awkprof.out"	/* where to put profile */
43#define DEFAULT_VARFILE		"awkvars.out"	/* where to put vars */
44
45static const char *varfile = DEFAULT_VARFILE;
46
47static void usage P((int exitval, FILE *fp)) ATTRIBUTE_NORETURN;
48static void copyleft P((void)) ATTRIBUTE_NORETURN;
49static void cmdline_fs P((char *str));
50static void init_args P((int argc0, int argc, char *argv0, char **argv));
51static void init_vars P((void));
52static void add_src P((struct src **data, long *num, long *alloc, enum srctype stype, char *val));
53static RETSIGTYPE catchsig P((int sig)) ATTRIBUTE_NORETURN;
54static void nostalgia P((void)) ATTRIBUTE_NORETURN;
55static void version P((void)) ATTRIBUTE_NORETURN;
56static void init_fds P((void));
57static void init_groupset P((void));
58
59/* These nodes store all the special variables AWK uses */
60NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node;
61NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node;
62NODE *FS_node, *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
63NODE *ORS_node, *PROCINFO_node, *RLENGTH_node, *RSTART_node, *RS_node;
64NODE *RT_node, *SUBSEP_node, *LINT_node, *TEXTDOMAIN_node;
65
66long NF;
67long NR;
68long FNR;
69int BINMODE;
70int IGNORECASE;
71char *OFS;
72char *ORS;
73char *OFMT;
74char *TEXTDOMAIN;
75int MRL;	/* See -mr option for use of this variable */
76
77/*
78 * CONVFMT is a convenience pointer for the current number to string format.
79 * We must supply an initial value to avoid recursion problems of
80 *	set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
81 * Fun, fun, fun, fun.
82 */
83char *CONVFMT = "%.6g";
84
85
86int errcount = 0;		/* error counter, used by yyerror() */
87
88NODE *Nnull_string;		/* The global null string */
89
90/* The name the program was invoked under, for error messages */
91const char *myname;
92
93/* A block of AWK code to be run before running the program */
94NODE *begin_block = NULL;
95
96/* A block of AWK code to be run after the last input file */
97NODE *end_block = NULL;
98
99int exiting = FALSE;		/* Was an "exit" statement executed? */
100int exit_val = 0;		/* optional exit value */
101
102#if defined(YYDEBUG) || defined(GAWKDEBUG)
103extern int yydebug;
104#endif
105
106struct src *srcfiles = NULL;	/* source file name(s) */
107long numfiles = -1;		/* how many source files */
108static long allocfiles;		/* for how many is *srcfiles allocated */
109
110#define	srcfiles_add(stype, val) \
111	add_src(&srcfiles, &numfiles, &allocfiles, stype, val)
112
113static struct src *preassigns = NULL;	/* requested via -v or -F */
114static long numassigns = -1;		/* how many of them */
115static long allocassigns;		/* for how many is allocated */
116
117#define	preassigns_add(stype, val) \
118	add_src(&preassigns, &numassigns, &allocassigns, stype, val)
119
120#undef do_lint
121#undef do_lint_old
122
123int do_traditional = FALSE;	/* no gnu extensions, add traditional weirdnesses */
124int do_posix = FALSE;		/* turn off gnu and unix extensions */
125int do_lint = FALSE;		/* provide warnings about questionable stuff */
126int do_lint_old = FALSE;	/* warn about stuff not in V7 awk */
127int do_intl = FALSE;		/* dump locale-izable strings to stdout */
128int do_non_decimal_data = FALSE;	/* allow octal/hex C style DATA. Use with caution! */
129int do_nostalgia = FALSE;	/* provide a blast from the past */
130int do_intervals = FALSE;	/* allow {...,...} in regexps */
131int do_profiling = FALSE;	/* profile and pretty print the program */
132int do_dump_vars = FALSE;	/* dump all global variables at end */
133int do_tidy_mem = FALSE;	/* release vars when done */
134
135int in_begin_rule = FALSE;	/* we're in a BEGIN rule */
136int in_end_rule = FALSE;	/* we're in a END rule */
137int whiny_users = FALSE;	/* do things that whiny users want */
138#ifdef MBS_SUPPORT
139int gawk_mb_cur_max = 1;	/* MB_CUR_MAX value, see comment in main() */
140#endif
141
142int output_is_tty = FALSE;	/* control flushing of output */
143
144extern const char *version_string;	/* current version, for printing */
145
146#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
147GETGROUPS_T *groupset;		/* current group set */
148int ngroups;			/* size of said set */
149#endif
150
151/* The parse tree is stored here.  */
152NODE *expression_value;
153
154#if _MSC_VER == 510
155void (*lintfunc) P((va_list va_alist, ...)) = warning;
156#else
157#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
158void (*lintfunc) P((const char *mesg, ...)) = warning;
159#else
160void (*lintfunc) () = warning;
161#endif
162#endif
163
164static const struct option optab[] = {
165	{ "compat",		no_argument,		& do_traditional,	1 },
166	{ "traditional",	no_argument,		& do_traditional,	1 },
167	{ "lint",		optional_argument,	NULL,		'l' },
168	{ "lint-old",		no_argument,		& do_lint_old,	1 },
169	{ "posix",		no_argument,		& do_posix,	1 },
170	{ "nostalgia",		no_argument,		& do_nostalgia,	1 },
171	{ "gen-po",		no_argument,		& do_intl,	1 },
172	{ "non-decimal-data",	no_argument,		& do_non_decimal_data, 1 },
173	{ "profile",		optional_argument,	NULL,		'p' },
174	{ "copyleft",		no_argument,		NULL,		'C' },
175	{ "copyright",		no_argument,		NULL,		'C' },
176	{ "field-separator",	required_argument,	NULL,		'F' },
177	{ "file",		required_argument,	NULL,		'f' },
178	{ "re-interval",	no_argument,		& do_intervals,	1 },
179	{ "source",		required_argument,	NULL,		's' },
180	{ "dump-variables",	optional_argument,	NULL,		'd' },
181	{ "assign",		required_argument,	NULL,		'v' },
182	{ "version",		no_argument,		NULL,		'V' },
183	{ "usage",		no_argument,		NULL,		'u' },
184	{ "help",		no_argument,		NULL,		'u' },
185#ifdef GAWKDEBUG
186	{ "parsedebug",		no_argument,		NULL,		'D' },
187#endif
188	{ NULL, 0, NULL, '\0' }
189};
190
191#ifdef NO_LINT
192#define do_lint 0
193#define do_lint_old 0
194#endif
195
196/* main --- process args, parse program, run it, clean up */
197
198int
199main(int argc, char **argv)
200{
201	int c;
202	char *scan;
203	/* the + on the front tells GNU getopt not to rearrange argv */
204	const char *optlist = "+F:f:v:W;m:D";
205	int stopped_early = FALSE;
206	int old_optind;
207	extern int optind;
208	extern int opterr;
209	extern char *optarg;
210	int i;
211
212	/* do these checks early */
213	if (getenv("TIDYMEM") != NULL)
214		do_tidy_mem = TRUE;
215
216	if (getenv("WHINY_USERS") != NULL)
217		whiny_users = TRUE;
218
219#ifdef HAVE_MCHECK_H
220	if (do_tidy_mem)
221		mtrace();
222#endif /* HAVE_MCHECK_H */
223
224#if defined(LC_CTYPE)
225	setlocale(LC_CTYPE, "");
226#endif
227#if defined(LC_COLLATE)
228	setlocale(LC_COLLATE, "");
229#endif
230#if HAVE_LC_MESSAGES && defined(LC_MESSAGES)
231	setlocale(LC_MESSAGES, "");
232#endif
233#if defined(LC_NUMERIC)
234	/*
235	 * Force the issue here.  According to POSIX 2001, decimal
236	 * point is used for parsing source code and for command-line
237	 * assignments and the locale value for processing input,
238	 * number to string conversion, and printing output.
239	 */
240	setlocale(LC_NUMERIC, "C");
241#endif
242#if defined(LC_TIME)
243	setlocale(LC_TIME, "");
244#endif
245
246#ifdef MBS_SUPPORT
247	/*
248	 * In glibc, MB_CUR_MAX is actually a function.  This value is
249	 * tested *a lot* in many speed-critical places in gawk. Caching
250	 * this value once makes a speed difference.
251	 */
252	gawk_mb_cur_max = MB_CUR_MAX;
253#endif
254
255	bindtextdomain(PACKAGE, LOCALEDIR);
256	textdomain(PACKAGE);
257
258	(void) signal(SIGFPE, catchsig);
259	(void) signal(SIGSEGV, catchsig);
260#ifdef SIGBUS
261	(void) signal(SIGBUS, catchsig);
262#endif
263
264	myname = gawk_name(argv[0]);
265        argv[0] = (char *) myname;
266	os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
267
268	/* remove sccs gunk */
269	if (strncmp(version_string, "@(#)", 4) == 0)
270		version_string += 4;
271
272	if (argc < 2)
273		usage(1, stderr);
274
275	/* Robustness: check that file descriptors 0, 1, 2 are open */
276	init_fds();
277
278	/* init array handling. */
279	array_init();
280
281	/* we do error messages ourselves on invalid options */
282	opterr = FALSE;
283
284	/* option processing. ready, set, go! */
285	for (optopt = 0, old_optind = 1;
286	     (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
287	     optopt = 0, old_optind = optind) {
288		if (do_posix)
289			opterr = TRUE;
290
291		switch (c) {
292		case 'F':
293			preassigns_add(PRE_ASSIGN_FS, optarg);
294			break;
295
296		case 'f':
297			/*
298			 * a la MKS awk, allow multiple -f options.
299			 * this makes function libraries real easy.
300			 * most of the magic is in the scanner.
301			 *
302			 * The following is to allow for whitespace at the end
303			 * of a #! /bin/gawk line in an executable file
304			 */
305			scan = optarg;
306			if (argv[optind-1] != optarg)
307				while (ISSPACE(*scan))
308					scan++;
309			srcfiles_add(SOURCEFILE,
310				(*scan == '\0' ? argv[optind++] : optarg));
311			break;
312
313		case 'v':
314			preassigns_add(PRE_ASSIGN, optarg);
315			break;
316
317		case 'm':
318			/*
319			 * Research awk extension.
320			 *	-mf nnn		set # fields, gawk ignores
321			 *	-mr nnn		set record length, ditto
322			 */
323			if (do_lint)
324				lintwarn(_("`-m[fr]' option irrelevant in gawk"));
325			if (optarg[0] != 'r' && optarg[0] != 'f')
326				warning(_("-m option usage: `-m[fr] nnn'"));
327			/*
328			 * Set fixed length records for Tandem,
329			 * ignored on other platforms (see io.c:get_a_record).
330			 */
331			if (optarg[0] == 'r') {
332				if (ISDIGIT(optarg[1]))
333					MRL = atoi(optarg+1);
334				else {
335					MRL = atoi(argv[optind]);
336					optind++;
337				}
338			} else if (optarg[1] == '\0')
339				optind++;
340			break;
341
342		case 'W':       /* gawk specific options - now in getopt_long */
343			fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"),
344				argv[0], optarg);
345			break;
346
347		/* These can only come from long form options */
348		case 'C':
349			copyleft();
350			break;
351
352		case 'd':
353			do_dump_vars = TRUE;
354			if (optarg != NULL && optarg[0] != '\0')
355				varfile = optarg;
356			break;
357
358		case 'l':
359#ifndef NO_LINT
360			do_lint = LINT_ALL;
361			if (optarg != NULL) {
362				if (strcmp(optarg, "fatal") == 0)
363					lintfunc = r_fatal;
364				else if (strcmp(optarg, "invalid") == 0)
365					do_lint = LINT_INVALID;
366			}
367#endif
368			break;
369
370		case 'p':
371			do_profiling = TRUE;
372			if (optarg != NULL)
373				set_prof_file(optarg);
374			else
375				set_prof_file(DEFAULT_PROFILE);
376			break;
377
378		case 's':
379			if (optarg[0] == '\0')
380				warning(_("empty argument to `--source' ignored"));
381			else
382				srcfiles_add(CMDLINE, optarg);
383			break;
384
385		case 'u':
386			usage(0, stdout);	/* per coding stds */
387			break;
388
389		case 'V':
390			version();
391			break;
392
393		case 0:
394			/*
395			 * getopt_long found an option that sets a variable
396			 * instead of returning a letter. Do nothing, just
397			 * cycle around for the next one.
398			 */
399			break;
400
401		case 'D':
402#ifdef GAWKDEBUG
403			yydebug = 2;
404			break;
405#endif
406			/* if not debugging, fall through */
407
408		case '?':
409		default:
410			/*
411			 * New behavior.  If not posix, an unrecognized
412			 * option stops argument processing so that it can
413			 * go into ARGV for the awk program to see. This
414			 * makes use of ``#! /bin/gawk -f'' easier.
415			 *
416			 * However, it's never simple. If optopt is set,
417			 * an option that requires an argument didn't get the
418			 * argument. We care because if opterr is 0, then
419			 * getopt_long won't print the error message for us.
420			 */
421			if (! do_posix
422			    && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
423				/*
424				 * can't just do optind--. In case of an
425				 * option with >= 2 letters, getopt_long
426				 * won't have incremented optind.
427				 */
428				optind = old_optind;
429				stopped_early = TRUE;
430				goto out;
431			} else if (optopt != '\0')
432				/* Use 1003.2 required message format */
433				fprintf(stderr,
434					_("%s: option requires an argument -- %c\n"),
435					myname, optopt);
436			/* else
437				let getopt print error message for us */
438			break;
439		}
440	}
441out:
442
443	if (do_nostalgia)
444		nostalgia();
445
446	/* check for POSIXLY_CORRECT environment variable */
447	if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
448		do_posix = TRUE;
449		if (do_lint)
450			lintwarn(
451	_("environment variable `POSIXLY_CORRECT' set: turning on `--posix'"));
452	}
453
454	if (do_posix) {
455		if (do_traditional)	/* both on command line */
456			warning(_("`--posix' overrides `--traditional'"));
457		else
458			do_traditional = TRUE;
459			/*
460			 * POSIX compliance also implies
461			 * no GNU extensions either.
462			 */
463	}
464
465	if (do_traditional && do_non_decimal_data) {
466		do_non_decimal_data = FALSE;
467		warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'"));
468	}
469
470	if (do_lint && os_is_setuid())
471		warning(_("running %s setuid root may be a security problem"), myname);
472
473	/*
474	 * Force profiling if this is pgawk.
475	 * Don't bother if the command line already set profiling up.
476	 */
477	if (! do_profiling)
478		init_profiling(& do_profiling, DEFAULT_PROFILE);
479
480	/* load group set */
481	init_groupset();
482
483	/* initialize the null string */
484	Nnull_string = make_string("", 0);
485	Nnull_string->numbr = 0.0;
486	Nnull_string->type = Node_val;
487	Nnull_string->flags = (PERM|STRCUR|STRING|NUMCUR|NUMBER);
488
489	/*
490	 * Tell the regex routines how they should work.
491	 * Do this before initializing variables, since
492	 * they could want to do a regexp compile.
493	 */
494	resetup();
495
496	/* Set up the special variables */
497	init_vars();
498
499	/* Set up the field variables */
500	init_fields();
501
502	/* Now process the pre-assignments */
503	for (i = 0; i <= numassigns; i++)
504		if (preassigns[i].stype == PRE_ASSIGN)
505			(void) arg_assign(preassigns[i].val, TRUE);
506		else	/* PRE_ASSIGN_FS */
507			cmdline_fs(preassigns[i].val);
508	free(preassigns);
509#ifdef O_BINARY
510	if ((BINMODE & 1) != 0)
511		if (os_setbinmode(fileno(stdin), O_BINARY) == -1)
512			fatal(_("can't set binary mode on stdin (%s)"), strerror(errno));
513	if ((BINMODE & 2) != 0) {
514		if (os_setbinmode(fileno(stdout), O_BINARY) == -1)
515			fatal(_("can't set binary mode on stdout (%s)"), strerror(errno));
516		if (os_setbinmode(fileno(stderr), O_BINARY) == -1)
517			fatal(_("can't set binary mode on stderr (%s)"), strerror(errno));
518	}
519#endif
520#ifdef GAWKDEBUG
521	setbuf(stdout, (char *) NULL);	/* make debugging easier */
522#endif
523	if (isatty(fileno(stdout)))
524		output_is_tty = TRUE;
525	/* No -f or --source options, use next arg */
526	if (numfiles == -1) {
527		if (optind > argc - 1 || stopped_early) /* no args left or no program */
528			usage(1, stderr);
529		srcfiles_add(CMDLINE, argv[optind]);
530		optind++;
531	}
532
533	init_args(optind, argc, (char *) myname, argv);
534	(void) tokexpand();
535
536	/* Read in the program */
537	if (yyparse() != 0 || errcount != 0)
538		exit(1);
539
540	free(srcfiles);
541
542	if (do_intl)
543		exit(0);
544
545	if (do_lint && begin_block == NULL && expression_value == NULL
546	     && end_block == NULL)
547		lintwarn(_("no program text at all!"));
548
549	if (do_lint)
550		shadow_funcs();
551
552	init_profiling_signals();
553
554#if defined(LC_NUMERIC)
555	/* See comment above. */
556	setlocale(LC_NUMERIC, "");
557#endif
558
559	/* Whew. Finally, run the program. */
560	if (begin_block != NULL) {
561		in_begin_rule = TRUE;
562		(void) interpret(begin_block);
563	}
564	in_begin_rule = FALSE;
565	if (! exiting && (expression_value != NULL || end_block != NULL))
566		do_input();
567	if (end_block != NULL) {
568		in_end_rule = TRUE;
569		(void) interpret(end_block);
570	}
571	in_end_rule = FALSE;
572	if (close_io() != 0 && exit_val == 0)
573		exit_val = 1;
574
575	if (do_profiling) {
576		dump_prog(begin_block, expression_value, end_block);
577		dump_funcs();
578	}
579
580	if (do_dump_vars)
581		dump_vars(varfile);
582
583	if (do_tidy_mem)
584		release_all_vars();
585
586	exit(exit_val);		/* more portable */
587	return exit_val;	/* to suppress warnings */
588}
589
590/* add_src --- add one element to *srcfiles or *preassigns */
591
592static void
593add_src(struct src **data, long *num, long *alloc, enum srctype stype, char *val)
594{
595#define INIT_SRC 4
596
597	++*num;
598
599	if (*data == NULL) {
600		emalloc(*data, struct src *, INIT_SRC * sizeof(struct src), "add_src");
601		*alloc = INIT_SRC;
602	} else if (*num >= *alloc) {
603		(*alloc) *= 2;
604		erealloc(*data, struct src *, (*alloc) * sizeof(struct src), "add_src");
605	}
606
607	(*data)[*num].stype = stype;
608	(*data)[*num].val = val;
609
610#undef INIT_SRC
611}
612
613/* usage --- print usage information and exit */
614
615static void
616usage(int exitval, FILE *fp)
617{
618
619	/* Not factoring out common stuff makes it easier to translate. */
620	fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"),
621		myname);
622	fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"),
623		myname, quote, quote);
624
625	/* GNU long options info. This is too many options. */
626
627	fputs(_("POSIX options:\t\tGNU long options:\n"), fp);
628	fputs(_("\t-f progfile\t\t--file=progfile\n"), fp);
629	fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp);
630	fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp);
631	fputs(_("\t-m[fr] val\n"), fp);
632	fputs(_("\t-W compat\t\t--compat\n"), fp);
633	fputs(_("\t-W copyleft\t\t--copyleft\n"), fp);
634	fputs(_("\t-W copyright\t\t--copyright\n"), fp);
635	fputs(_("\t-W dump-variables[=file]\t--dump-variables[=file]\n"), fp);
636	fputs(_("\t-W gen-po\t\t--gen-po\n"), fp);
637	fputs(_("\t-W help\t\t\t--help\n"), fp);
638	fputs(_("\t-W lint[=fatal]\t\t--lint[=fatal]\n"), fp);
639	fputs(_("\t-W lint-old\t\t--lint-old\n"), fp);
640	fputs(_("\t-W non-decimal-data\t--non-decimal-data\n"), fp);
641#ifdef NOSTALGIA
642	fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp);
643#endif
644#ifdef GAWKDEBUG
645	fputs(_("\t-W parsedebug\t\t--parsedebug\n"), fp);
646#endif
647	fputs(_("\t-W profile[=file]\t--profile[=file]\n"), fp);
648	fputs(_("\t-W posix\t\t--posix\n"), fp);
649	fputs(_("\t-W re-interval\t\t--re-interval\n"), fp);
650	fputs(_("\t-W source=program-text\t--source=program-text\n"), fp);
651	fputs(_("\t-W traditional\t\t--traditional\n"), fp);
652	fputs(_("\t-W usage\t\t--usage\n"), fp);
653	fputs(_("\t-W version\t\t--version\n"), fp);
654
655
656	/* This is one string to make things easier on translators. */
657	fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info', which is\n\
658section `Reporting Problems and Bugs' in the printed version.\n\n"), fp);
659
660	/* ditto */
661	fputs(_("gawk is a pattern scanning and processing language.\n\
662By default it reads standard input and writes standard output.\n\n"), fp);
663
664	/* ditto */
665	fputs(_("Examples:\n\tgawk '{ sum += $1 }; END { print sum }' file\n\
666\tgawk -F: '{ print $1 }' /etc/passwd\n"), fp);
667
668	fflush(fp);
669
670	if (ferror(fp))
671		exit(1);
672
673	exit(exitval);
674}
675
676/* copyleft --- print out the short GNU copyright information */
677
678static void
679copyleft()
680{
681	static const char blurb_part1[] =
682	  N_("Copyright (C) 1989, 1991-%d Free Software Foundation.\n\
683\n\
684This program is free software; you can redistribute it and/or modify\n\
685it under the terms of the GNU General Public License as published by\n\
686the Free Software Foundation; either version 2 of the License, or\n\
687(at your option) any later version.\n\
688\n");
689	static const char blurb_part2[] =
690	  N_("This program is distributed in the hope that it will be useful,\n\
691but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
692MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
693GNU General Public License for more details.\n\
694\n");
695	static const char blurb_part3[] =
696	  N_("You should have received a copy of the GNU General Public License\n\
697along with this program; if not, write to the Free Software\n\
698Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.\n");
699
700	/* multiple blurbs are needed for some brain dead compilers. */
701	printf(_(blurb_part1), 2003);	/* Last update year */
702	fputs(_(blurb_part2), stdout);
703	fputs(_(blurb_part3), stdout);
704	fflush(stdout);
705
706	if (ferror(stdout))
707		exit(1);
708
709	exit(0);
710}
711
712/* cmdline_fs --- set FS from the command line */
713
714static void
715cmdline_fs(char *str)
716{
717	register NODE **tmp;
718
719	tmp = get_lhs(FS_node, (Func_ptr *) 0, FALSE);
720	unref(*tmp);
721	/*
722	 * Only if in full compatibility mode check for the stupid special
723	 * case so -F\t works as documented in awk book even though the shell
724	 * hands us -Ft.  Bleah!
725	 *
726	 * Thankfully, Posix didn't propagate this "feature".
727	 */
728	if (str[0] == 't' && str[1] == '\0') {
729		if (do_lint)
730			lintwarn(_("-Ft does not set FS to tab in POSIX awk"));
731		if (do_traditional && ! do_posix)
732			str[0] = '\t';
733	}
734	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
735	set_FS();
736}
737
738/* init_args --- set up ARGV from stuff on the command line */
739
740static void
741init_args(int argc0, int argc, char *argv0, char **argv)
742{
743	int i, j;
744	NODE **aptr;
745
746	ARGV_node = install("ARGV", node((NODE *) NULL, Node_var_array, (NODE *) NULL));
747	aptr = assoc_lookup(ARGV_node, tmp_number(0.0), FALSE);
748	*aptr = make_string(argv0, strlen(argv0));
749	(*aptr)->flags |= MAYBE_NUM;
750	for (i = argc0, j = 1; i < argc; i++) {
751		aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j), FALSE);
752		*aptr = make_string(argv[i], strlen(argv[i]));
753		(*aptr)->flags |= MAYBE_NUM;
754		j++;
755	}
756	ARGC_node = install("ARGC",
757			node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
758}
759
760/*
761 * Set all the special variables to their initial values.
762 * Note that some of the variables that have set_FOO routines should
763 * *N*O*T* have those routines called upon initialization, and thus
764 * they have NULL entries in that field. This is notably true of FS
765 * and IGNORECASE.
766 */
767struct varinit {
768	NODE **spec;
769	const char *name;
770	NODETYPE type;
771	const char *strval;
772	AWKNUM numval;
773	Func_ptr assign;
774};
775static const struct varinit varinit[] = {
776{&CONVFMT_node,	"CONVFMT",	Node_CONVFMT,		"%.6g",	0,  set_CONVFMT },
777{&NF_node,	"NF",		Node_NF,		NULL,	-1, NULL },
778{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS,	"",	0,  NULL },
779{&NR_node,	"NR",		Node_NR,		NULL,	0,  set_NR },
780{&FNR_node,	"FNR",		Node_FNR,		NULL,	0,  set_FNR },
781{&FS_node,	"FS",		Node_FS,		" ",	0,  NULL },
782{&RS_node,	"RS",		Node_RS,		"\n",	0,  set_RS },
783{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE,	NULL,	0,  NULL },
784{&FILENAME_node, "FILENAME",	Node_var,		"",	0,  NULL },
785{&OFS_node,	"OFS",		Node_OFS,		" ",	0,  set_OFS },
786{&ORS_node,	"ORS",		Node_ORS,		"\n",	0,  set_ORS },
787{&OFMT_node,	"OFMT",		Node_OFMT,		"%.6g",	0,  set_OFMT },
788{&RLENGTH_node, "RLENGTH",	Node_var,		NULL,	0,  NULL },
789{&RSTART_node,	"RSTART",	Node_var,		NULL,	0,  NULL },
790{&SUBSEP_node,	"SUBSEP",	Node_var,		"\034",	0,  NULL },
791{&ARGIND_node,	"ARGIND",	Node_var,		NULL,	0,  NULL },
792{&ERRNO_node,	"ERRNO",	Node_var,		NULL,	0,  NULL },
793{&RT_node,	"RT",		Node_var,		"",	0,  NULL },
794{&BINMODE_node,	"BINMODE",	Node_BINMODE,		NULL,	0,  NULL },
795{&LINT_node,	"LINT",		Node_LINT,		NULL,	0,  NULL },
796{&TEXTDOMAIN_node,	"TEXTDOMAIN",		Node_TEXTDOMAIN,	"messages",	0,  set_TEXTDOMAIN },
797{0,		NULL,		Node_illegal,		NULL,	0,  NULL },
798};
799
800/* init_vars --- actually initialize everything in the symbol table */
801
802static void
803init_vars()
804{
805	register const struct varinit *vp;
806
807	for (vp = varinit; vp->name; vp++) {
808		*(vp->spec) = install((char *) vp->name,
809		  node(vp->strval == NULL ? make_number(vp->numval)
810				: make_string((char *) vp->strval,
811					strlen(vp->strval)),
812		       vp->type, (NODE *) NULL));
813		if (vp->assign)
814			(*(vp->assign))();
815	}
816}
817
818/* load_environ --- populate the ENVIRON array */
819
820NODE *
821load_environ()
822{
823#if ! defined(TANDEM)
824#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
825	extern char **environ;
826#endif
827	register char *var, *val;
828	NODE **aptr;
829	register int i;
830#endif /* TANDEM */
831
832	ENVIRON_node = install("ENVIRON",
833			node((NODE *) NULL, Node_var_array, (NODE *) NULL));
834#if ! defined(TANDEM)
835	for (i = 0; environ[i] != NULL; i++) {
836		static char nullstr[] = "";
837
838		var = environ[i];
839		val = strchr(var, '=');
840		if (val != NULL)
841			*val++ = '\0';
842		else
843			val = nullstr;
844		aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)),
845				    FALSE);
846		*aptr = make_string(val, strlen(val));
847		(*aptr)->flags |= MAYBE_NUM;
848
849		/* restore '=' so that system() gets a valid environment */
850		if (val != nullstr)
851			*--val = '=';
852	}
853	/*
854	 * Put AWKPATH into ENVIRON if it's not there.
855	 * This allows querying it from within awk programs.
856	 */
857	if (getenv("AWKPATH") == NULL) {
858		aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7), FALSE);
859		*aptr = make_string(defpath, strlen(defpath));
860	}
861#endif /* TANDEM */
862	return ENVIRON_node;
863}
864
865/* load_procinfo --- populate the PROCINFO array */
866
867NODE *
868load_procinfo()
869{
870	int i;
871	NODE **aptr;
872	char name[100];
873	AWKNUM value;
874
875	PROCINFO_node = install("PROCINFO",
876			node((NODE *) NULL, Node_var_array, (NODE *) NULL));
877
878#ifdef GETPGRP_VOID
879#define getpgrp_arg() /* nothing */
880#else
881#define getpgrp_arg() getpid()
882#endif
883
884	value = getpgrp(getpgrp_arg());
885	aptr = assoc_lookup(PROCINFO_node, tmp_string("pgrpid", 6), FALSE);
886	*aptr = make_number(value);
887
888	/*
889	 * could put a lot of this into a table, but then there's
890	 * portability problems declaring all the functions. so just
891	 * do it the slow and stupid way. sigh.
892	 */
893
894	value = getpid();
895	aptr = assoc_lookup(PROCINFO_node, tmp_string("pid", 3), FALSE);
896	*aptr = make_number(value);
897
898	value = getppid();
899	aptr = assoc_lookup(PROCINFO_node, tmp_string("ppid", 4), FALSE);
900	*aptr = make_number(value);
901
902	value = getuid();
903	aptr = assoc_lookup(PROCINFO_node, tmp_string("uid", 3), FALSE);
904	*aptr = make_number(value);
905
906	value = geteuid();
907	aptr = assoc_lookup(PROCINFO_node, tmp_string("euid", 4), FALSE);
908	*aptr = make_number(value);
909
910	value = getgid();
911	aptr = assoc_lookup(PROCINFO_node, tmp_string("gid", 3), FALSE);
912	*aptr = make_number(value);
913
914	value = getegid();
915	aptr = assoc_lookup(PROCINFO_node, tmp_string("egid", 4), FALSE);
916	*aptr = make_number(value);
917
918	aptr = assoc_lookup(PROCINFO_node, tmp_string("FS", 2), FALSE);
919	*aptr = (using_fieldwidths() ? make_string("FIELDWIDTHS", 11) :
920				make_string("FS", 2) );
921
922#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
923	for (i = 0; i < ngroups; i++) {
924		sprintf(name, "group%d", i + 1);
925		value = groupset[i];
926		aptr = assoc_lookup(PROCINFO_node, tmp_string(name, strlen(name)), FALSE);
927		*aptr = make_number(value);
928	}
929	if (groupset) {
930		free(groupset);
931		groupset = NULL;
932	}
933#endif
934	return PROCINFO_node;
935}
936
937/* arg_assign --- process a command-line assignment */
938
939int
940arg_assign(char *arg, int initing)
941{
942	char *cp, *cp2;
943	int badvar;
944	Func_ptr after_assign = NULL;
945	NODE *var;
946	NODE *it;
947	NODE **lhs;
948
949	cp = strchr(arg, '=');
950
951	if (cp == NULL) {
952		if (! initing)
953			return FALSE;	/* This is file name, not assignment. */
954
955		fprintf(stderr,
956			_("%s: `%s' argument to `-v' not in `var=value' form\n\n"),
957			myname, arg);
958		usage(1, stderr);
959	}
960
961	*cp++ = '\0';
962
963	/* first check that the variable name has valid syntax */
964	badvar = FALSE;
965	if (! ISALPHA(arg[0]) && arg[0] != '_')
966		badvar = TRUE;
967	else
968		for (cp2 = arg+1; *cp2; cp2++)
969			if (! ISALNUM(*cp2) && *cp2 != '_') {
970				badvar = TRUE;
971				break;
972			}
973
974	if (badvar) {
975		if (initing)
976			fatal(_("`%s' is not a legal variable name"), arg);
977
978		if (do_lint)
979			lintwarn(_("`%s' is not a variable name, looking for file `%s=%s'"),
980				arg, arg, cp);
981	} else {
982		/*
983		 * Recent versions of nawk expand escapes inside assignments.
984		 * This makes sense, so we do it too.
985		 */
986		it = make_str_node(cp, strlen(cp), SCAN);
987		it->flags |= MAYBE_NUM;
988#ifdef LC_NUMERIC
989		setlocale(LC_NUMERIC, "C");
990		(void) force_number(it);
991		setlocale(LC_NUMERIC, "");
992#endif /* LC_NUMERIC */
993		var = variable(arg, FALSE, Node_var);
994		lhs = get_lhs(var, &after_assign, FALSE);
995		unref(*lhs);
996		*lhs = it;
997		if (after_assign != NULL)
998			(*after_assign)();
999	}
1000
1001	*--cp = '=';	/* restore original text of ARGV */
1002
1003	return ! badvar;
1004}
1005
1006/* catchsig --- catch signals */
1007
1008static RETSIGTYPE
1009catchsig(int sig)
1010{
1011	if (sig == SIGFPE) {
1012		fatal(_("floating point exception"));
1013	} else if (sig == SIGSEGV
1014#ifdef SIGBUS
1015	        || sig == SIGBUS
1016#endif
1017	) {
1018		set_loc(__FILE__, __LINE__);
1019		msg(_("fatal error: internal error"));
1020		/* fatal won't abort() if not compiled for debugging */
1021		abort();
1022	} else
1023		cant_happen();
1024	/* NOTREACHED */
1025}
1026
1027/* nostalgia --- print the famous error message and die */
1028
1029static void
1030nostalgia()
1031{
1032	/*
1033	 * N.B.: This string is not gettextized, on purpose.
1034	 * So there.
1035	 */
1036	fprintf(stderr, "awk: bailing out near line 1\n");
1037	fflush(stderr);
1038	abort();
1039}
1040
1041/* version --- print version message */
1042
1043static void
1044version()
1045{
1046	printf("%s.%s\n", version_string, PATCHLEVEL);
1047	/*
1048	 * Per GNU coding standards, print copyright info,
1049	 * then exit successfully, do nothing else.
1050	 */
1051	copyleft();
1052	exit(0);
1053}
1054
1055/* init_fds --- check for 0, 1, 2, open on /dev/null if possible */
1056
1057static void
1058init_fds()
1059{
1060	struct stat sbuf;
1061	int fd;
1062	int newfd;
1063
1064	/* maybe no stderr, don't bother with error mesg */
1065	for (fd = 0; fd <= 2; fd++) {
1066		if (fstat(fd, &sbuf) < 0) {
1067#if MAKE_A_HEROIC_EFFORT
1068			if (do_lint)
1069				lintwarn(_("no pre-opened fd %d"), fd);
1070#endif
1071			newfd = devopen("/dev/null", "r+");
1072#ifdef MAKE_A_HEROIC_EFFORT
1073			if (do_lint && newfd < 0)
1074				lintwarn(_("could not pre-open /dev/null for fd %d"), fd);
1075#endif
1076		}
1077	}
1078}
1079
1080/* init_groupset --- initialize groupset */
1081
1082static void
1083init_groupset()
1084{
1085#if defined(HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1086#ifdef GETGROUPS_NOT_STANDARD
1087	/* For systems that aren't standards conformant, use old way. */
1088	ngroups = NGROUPS_MAX;
1089#else
1090	/*
1091	 * If called with 0 for both args, return value is
1092	 * total number of groups.
1093	 */
1094	ngroups = getgroups(0, NULL);
1095#endif
1096	if (ngroups == -1)
1097		fatal(_("could not find groups: %s"), strerror(errno));
1098	else if (ngroups == 0)
1099		return;
1100
1101	/* fill in groups */
1102	emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset");
1103
1104	ngroups = getgroups(ngroups, groupset);
1105	if (ngroups == -1)
1106		fatal(_("could not find groups: %s"), strerror(errno));
1107#endif
1108}
1109