1/*
2 * awkgram.y --- yacc/bison parser
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
24 */
25
26%{
27#ifdef GAWKDEBUG
28#define YYDEBUG 12
29#endif
30
31#include "awk.h"
32
33#define CAN_FREE	TRUE
34#define DONT_FREE	FALSE
35
36#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
37static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38#else
39static void yyerror(); /* va_alist */
40#endif
41static char *get_src_buf P((void));
42static int yylex P((void));
43static NODE *node_common P((NODETYPE op));
44static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
45static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
46static NODE *append_right P((NODE *list, NODE *new));
47static inline NODE *append_pattern P((NODE **list, NODE *patt));
48static void func_install P((NODE *params, NODE *def));
49static void pop_var P((NODE *np, int freeit));
50static void pop_params P((NODE *params));
51static NODE *make_param P((char *name));
52static NODE *mk_rexp P((NODE *exp));
53static int dup_parms P((NODE *func));
54static void param_sanity P((NODE *arglist));
55static int parms_shadow P((const char *fname, NODE *func));
56static int isnoeffect P((NODETYPE t));
57static int isassignable P((NODE *n));
58static void dumpintlstr P((const char *str, size_t len));
59static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2));
60static void count_args P((NODE *n));
61static int isarray P((NODE *n));
62
63enum defref { FUNC_DEFINE, FUNC_USE };
64static void func_use P((const char *name, enum defref how));
65static void check_funcs P((void));
66
67static int want_regexp;		/* lexical scanning kludge */
68static int can_return;		/* parsing kludge */
69static int begin_or_end_rule = FALSE;	/* parsing kludge */
70static int parsing_end_rule = FALSE; /* for warnings */
71static int in_print = FALSE;	/* lexical scanning kludge for print */
72static int in_parens = 0;	/* lexical scanning kludge for print */
73static char *lexptr;		/* pointer to next char during parsing */
74static char *lexend;
75static char *lexptr_begin;	/* keep track of where we were for error msgs */
76static char *lexeme;		/* beginning of lexeme for debugging */
77static char *thisline = NULL;
78#define YYDEBUG_LEXER_TEXT (lexeme)
79static int param_counter;
80static char *tokstart = NULL;
81static char *tok = NULL;
82static char *tokend;
83
84static long func_count;		/* total number of functions */
85
86#define HASHSIZE	1021	/* this constant only used here */
87NODE *variables[HASHSIZE];
88static int var_count;		/* total number of global variables */
89
90extern char *source;
91extern int sourceline;
92extern struct src *srcfiles;
93extern int numfiles;
94extern int errcount;
95extern NODE *begin_block;
96extern NODE *end_block;
97
98/*
99 * This string cannot occur as a real awk identifier.
100 * Use it as a special token to make function parsing
101 * uniform, but if it's seen, don't install the function.
102 * e.g.
103 * 	function split(x) { return x }
104 * 	function x(a) { return a }
105 * should only produce one error message, and not core dump.
106 */
107static char builtin_func[] = "@builtin";
108%}
109
110%union {
111	long lval;
112	AWKNUM fval;
113	NODE *nodeval;
114	NODETYPE nodetypeval;
115	char *sval;
116	NODE *(*ptrval) P((void));
117}
118
119%type <nodeval> function_prologue pattern action variable param_list
120%type <nodeval> exp common_exp
121%type <nodeval> simp_exp non_post_simp_exp
122%type <nodeval> expression_list opt_expression_list print_expression_list
123%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
124%type <nodeval> simple_stmt opt_simple_stmt
125%type <nodeval> opt_exp opt_variable regexp
126%type <nodeval> input_redir output_redir
127%type <nodetypeval> print
128%type <nodetypeval> assign_operator a_relop relop_or_less
129%type <sval> func_name
130%type <lval> lex_builtin
131
132%token <sval> FUNC_CALL NAME REGEXP
133%token <lval> ERROR
134%token <nodeval> YNUMBER YSTRING
135%token <nodetypeval> RELOP IO_OUT IO_IN
136%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
137%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
138%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
139%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
140%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
141%token <nodetypeval> LEX_IN
142%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
143%token <lval> LEX_BUILTIN LEX_LENGTH
144%token NEWLINE
145
146/* these are just yylval numbers */
147
148/* Lowest to highest */
149%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
150%right '?' ':'
151%left LEX_OR
152%left LEX_AND
153%left LEX_GETLINE
154%nonassoc LEX_IN
155%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
156%nonassoc ','
157%nonassoc MATCHOP
158%nonassoc RELOP '<' '>' IO_IN IO_OUT
159%left CONCAT_OP
160%left YSTRING YNUMBER
161%left '+' '-'
162%left '*' '/' '%'
163%right '!' UNARY
164%right '^'
165%left INCREMENT DECREMENT
166%left '$'
167%left '(' ')'
168%%
169
170start
171	: opt_nls program opt_nls
172		{
173			check_funcs();
174		}
175	;
176
177program
178	: /* empty */
179	| program rule
180	  {
181		begin_or_end_rule = parsing_end_rule = FALSE;
182		yyerrok;
183	  }
184	| program error
185  	  {
186		begin_or_end_rule = parsing_end_rule = FALSE;
187		/*
188		 * If errors, give up, don't produce an infinite
189		 * stream of syntax error messages.
190		 */
191  		/* yyerrok; */
192  	  }
193	;
194
195rule
196	: pattern action
197	  {
198		$1->rnode = $2;
199	  }
200	| pattern statement_term
201	  {
202		if ($1->lnode != NULL) {
203			/* pattern rule with non-empty pattern */
204			$1->rnode = node(NULL, Node_K_print_rec, NULL);
205		} else {
206			/* an error */
207			if (begin_or_end_rule)
208				warning(_("%s blocks must have an action part"),
209					(parsing_end_rule ? "END" : "BEGIN"));
210			else
211				warning(_("each rule must have a pattern or an action part"));
212			errcount++;
213		}
214	  }
215	| function_prologue action
216	  {
217		can_return = FALSE;
218		if ($1)
219			func_install($1, $2);
220		yyerrok;
221	  }
222	;
223
224pattern
225	: /* empty */
226	  {
227		$$ = append_pattern(&expression_value, (NODE *) NULL);
228	  }
229	| exp
230	  {
231		$$ = append_pattern(&expression_value, $1);
232	  }
233	| exp ',' exp
234	  {
235		NODE *r;
236
237		getnode(r);
238		r->type = Node_line_range;
239		r->condpair = node($1, Node_cond_pair, $3);
240		r->triggered = FALSE;
241		$$ = append_pattern(&expression_value, r);
242	  }
243	| LEX_BEGIN
244	  {
245		begin_or_end_rule = TRUE;
246		$$ = append_pattern(&begin_block, (NODE *) NULL);
247	  }
248	| LEX_END
249	  {
250		begin_or_end_rule = parsing_end_rule = TRUE;
251		$$ = append_pattern(&end_block, (NODE *) NULL);
252	  }
253	;
254
255action
256	: l_brace statements r_brace opt_semi opt_nls
257		{ $$ = $2; }
258	;
259
260func_name
261	: NAME
262		{ $$ = $1; }
263	| FUNC_CALL
264		{ $$ = $1; }
265	| lex_builtin
266	  {
267		yyerror(_("`%s' is a built-in function, it cannot be redefined"),
268			tokstart);
269		errcount++;
270		$$ = builtin_func;
271		/* yyerrok; */
272	  }
273	;
274
275lex_builtin
276	: LEX_BUILTIN
277	| LEX_LENGTH
278	;
279
280function_prologue
281	: LEX_FUNCTION
282		{
283			param_counter = 0;
284		}
285	  func_name '(' opt_param_list r_paren opt_nls
286		{
287			NODE *t;
288
289			t = make_param($3);
290			t->flags |= FUNC;
291			$$ = append_right(t, $5);
292			can_return = TRUE;
293			/* check for duplicate parameter names */
294			if (dup_parms($$))
295				errcount++;
296		}
297	;
298
299regexp
300	/*
301	 * In this rule, want_regexp tells yylex that the next thing
302	 * is a regexp so it should read up to the closing slash.
303	 */
304	: a_slash
305		{ ++want_regexp; }
306	  REGEXP	/* The terminating '/' is consumed by yylex(). */
307		{
308		  NODE *n;
309		  size_t len = strlen($3);
310
311		  if (do_lint && ($3)[0] == '*') {
312			/* possible C comment */
313			if (($3)[len-1] == '*')
314				lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
315		  }
316		  getnode(n);
317		  n->type = Node_regex;
318		  n->re_exp = make_string($3, len);
319		  n->re_reg = make_regexp($3, len, FALSE);
320		  n->re_text = NULL;
321		  n->re_flags = CONST;
322		  $$ = n;
323		}
324	;
325
326a_slash
327	: '/'
328	| SLASH_BEFORE_EQUAL
329	;
330
331statements
332	: /* empty */
333	  { $$ = NULL; }
334	| statements statement
335	  {
336		if ($2 == NULL)
337			$$ = $1;
338		else {
339			if (do_lint && isnoeffect($2->type))
340				lintwarn(_("statement may have no effect"));
341			if ($1 == NULL)
342				$$ = $2;
343			else
344	    			$$ = append_right(
345					($1->type == Node_statement_list ? $1
346					  : node($1, Node_statement_list, (NODE *) NULL)),
347					($2->type == Node_statement_list ? $2
348					  : node($2, Node_statement_list, (NODE *) NULL)));
349		}
350	    	yyerrok;
351	  }
352	| statements error
353	  { $$ = NULL; }
354	;
355
356statement_term
357	: nls
358	| semi opt_nls
359	;
360
361statement
362	: semi opt_nls
363		{ $$ = NULL; }
364	| l_brace statements r_brace
365		{ $$ = $2; }
366	| if_statement
367		{ $$ = $1; }
368	| LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
369		{ $$ = node($3, Node_K_switch, $7); }
370	| LEX_WHILE '(' exp r_paren opt_nls statement
371		{ $$ = node($3, Node_K_while, $6); }
372	| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
373		{ $$ = node($6, Node_K_do, $3); }
374	| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
375	  {
376		/*
377		 * Efficiency hack.  Recognize the special case of
378		 *
379		 * 	for (iggy in foo)
380		 * 		delete foo[iggy]
381		 *
382		 * and treat it as if it were
383		 *
384		 * 	delete foo
385		 *
386		 * Check that the body is a `delete a[i]' statement,
387		 * and that both the loop var and array names match.
388		 */
389		if ($8 != NULL && $8->type == Node_K_delete) {
390			NODE *arr, *sub;
391
392			assert($8->rnode->type == Node_expression_list);
393			arr = $8->lnode;	/* array var */
394			sub = $8->rnode->lnode;	/* index var */
395
396			if (   (arr->type == Node_var_new
397				|| arr->type == Node_var_array
398				|| arr->type == Node_param_list)
399			    && (sub->type == Node_var_new
400				|| sub->type == Node_var
401				|| sub->type == Node_param_list)
402			    && strcmp($3, sub->vname) == 0
403			    && strcmp($5, arr->vname) == 0) {
404				$8->type = Node_K_delete_loop;
405				$$ = $8;
406			}
407			else
408				goto regular_loop;
409		} else {
410	regular_loop:
411			$$ = node($8, Node_K_arrayfor,
412				make_for_loop(variable($3, CAN_FREE, Node_var),
413				(NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
414		}
415	  }
416	| LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
417	  {
418		$$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
419	  }
420	| LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
421	  {
422		$$ = node($11, Node_K_for,
423			(NODE *) make_for_loop($3, (NODE *) NULL, $8));
424	  }
425	| LEX_BREAK statement_term
426	   /* for break, maybe we'll have to remember where to break to */
427		{ $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
428	| LEX_CONTINUE statement_term
429	   /* similarly */
430		{ $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
431	| LEX_NEXT statement_term
432		{ NODETYPE type;
433
434		  if (begin_or_end_rule)
435			yyerror(_("`%s' used in %s action"), "next",
436				(parsing_end_rule ? "END" : "BEGIN"));
437		  type = Node_K_next;
438		  $$ = node((NODE *) NULL, type, (NODE *) NULL);
439		}
440	| LEX_NEXTFILE statement_term
441		{
442		  if (do_traditional) {
443			/*
444			 * can't use yyerror, since may have overshot
445			 * the source line
446			 */
447			errcount++;
448			error(_("`nextfile' is a gawk extension"));
449		  }
450		  if (do_lint)
451			lintwarn(_("`nextfile' is a gawk extension"));
452		  if (begin_or_end_rule) {
453			/* same thing */
454			errcount++;
455			error(_("`%s' used in %s action"), "nextfile",
456				(parsing_end_rule ? "END" : "BEGIN"));
457		  }
458		  $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
459		}
460	| LEX_EXIT opt_exp statement_term
461		{ $$ = node($2, Node_K_exit, (NODE *) NULL); }
462	| LEX_RETURN
463		{
464		  if (! can_return)
465			yyerror(_("`return' used outside function context"));
466		}
467	  opt_exp statement_term
468		{ $$ = node($3, Node_K_return, (NODE *) NULL); }
469	| simple_stmt statement_term
470	;
471
472	/*
473	 * A simple_stmt exists to satisfy a constraint in the POSIX
474	 * grammar allowing them to occur as the 1st and 3rd parts
475	 * in a `for (...;...;...)' loop.  This is a historical oddity
476	 * inherited from Unix awk, not at all documented in the AK&W
477	 * awk book.  We support it, as this was reported as a bug.
478	 * We don't bother to document it though. So there.
479	 */
480simple_stmt
481	: print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
482	  {
483		/*
484		 * Optimization: plain `print' has no expression list, so $3 is null.
485		 * If $3 is an expression list with one element (rnode == null)
486		 * and lnode is a field spec for field 0, we have `print $0'.
487		 * For both, use Node_K_print_rec, which is faster for these two cases.
488		 */
489		if ($1 == Node_K_print &&
490		    ($3 == NULL
491		     || ($3->type == Node_expression_list
492			&& $3->rnode == NULL
493			&& $3->lnode->type == Node_field_spec
494			&& $3->lnode->lnode->type == Node_val
495			&& $3->lnode->lnode->numbr == 0.0))
496		) {
497			static int warned = FALSE;
498
499			$$ = node(NULL, Node_K_print_rec, $4);
500
501			if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
502				warned = TRUE;
503				lintwarn(
504	_("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
505			}
506		} else {
507			$$ = node($3, $1, $4);
508			if ($$->type == Node_K_printf)
509				count_args($$);
510		}
511	  }
512	| LEX_DELETE NAME '[' expression_list ']'
513		{ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
514	| LEX_DELETE NAME
515		{
516		  if (do_lint)
517			lintwarn(_("`delete array' is a gawk extension"));
518		  if (do_traditional) {
519			/*
520			 * can't use yyerror, since may have overshot
521			 * the source line
522			 */
523			errcount++;
524			error(_("`delete array' is a gawk extension"));
525		  }
526		  $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
527		}
528	| LEX_DELETE '(' NAME ')'
529		{
530		  /* this is for tawk compatibility. maybe the warnings should always be done. */
531		  if (do_lint)
532			lintwarn(_("`delete(array)' is a non-portable tawk extension"));
533		  if (do_traditional) {
534			/*
535			 * can't use yyerror, since may have overshot
536			 * the source line
537			 */
538			errcount++;
539			error(_("`delete(array)' is a non-portable tawk extension"));
540		  }
541		  $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
542		}
543	| exp
544		{ $$ = $1; }
545	;
546
547opt_simple_stmt
548	: /* empty */
549		{ $$ = NULL; }
550	| simple_stmt
551		{ $$ = $1; }
552	;
553
554switch_body
555	: case_statements
556	  {
557		if ($1 == NULL) {
558			$$ = NULL;
559		} else {
560			NODE *dflt = NULL;
561			NODE *head = $1;
562			NODE *curr;
563
564			const char **case_values = NULL;
565
566			int maxcount = 128;
567			int case_count = 0;
568			int i;
569
570			emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
571			for (curr = $1; curr != NULL; curr = curr->rnode) {
572				/* Assure that case statement values are unique. */
573				if (curr->lnode->type == Node_K_case) {
574					char *caseval;
575
576					if (curr->lnode->lnode->type == Node_regex)
577						caseval = curr->lnode->lnode->re_exp->stptr;
578					else
579						caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
580
581					for (i = 0; i < case_count; i++)
582						if (strcmp(caseval, case_values[i]) == 0)
583							yyerror(_("duplicate case values in switch body: %s"), caseval);
584
585					if (case_count >= maxcount) {
586						maxcount += 128;
587						erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
588					}
589					case_values[case_count++] = caseval;
590				} else {
591					/* Otherwise save a pointer to the default node.  */
592					if (dflt != NULL)
593						yyerror(_("Duplicate `default' detected in switch body"));
594					dflt = curr;
595				}
596			}
597
598			free(case_values);
599
600			/* Create the switch body. */
601			$$ = node(head, Node_switch_body, dflt);
602		}
603	}
604	;
605
606case_statements
607	: /* empty */
608	  { $$ = NULL; }
609	| case_statements case_statement
610	  {
611		if ($2 == NULL)
612			$$ = $1;
613		else {
614			if (do_lint && isnoeffect($2->type))
615				lintwarn(_("statement may have no effect"));
616			if ($1 == NULL)
617				$$ = node($2, Node_case_list, (NODE *) NULL);
618			else
619				$$ = append_right(
620					($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
621					($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
622				);
623		}
624	    	yyerrok;
625	  }
626	| case_statements error
627	  { $$ = NULL; }
628	;
629
630case_statement
631	: LEX_CASE case_value colon opt_nls statements
632		{ $$ = node($2, Node_K_case, $5); }
633	| LEX_DEFAULT colon opt_nls statements
634		{ $$ = node((NODE *) NULL, Node_K_default, $4); }
635	;
636
637case_value
638	: YNUMBER
639		{ $$ = $1; }
640	| '-' YNUMBER    %prec UNARY
641	  {
642		$2->numbr = -(force_number($2));
643		$$ = $2;
644	  }
645	| '+' YNUMBER    %prec UNARY
646		{ $$ = $2; }
647	| YSTRING
648		{ $$ = $1; }
649	| regexp
650		{ $$ = $1; }
651	;
652
653print
654	: LEX_PRINT
655	| LEX_PRINTF
656	;
657
658	/*
659	 * Note: ``print(x)'' is already parsed by the first rule,
660	 * so there is no good in covering it by the second one too.
661	 */
662print_expression_list
663	: opt_expression_list
664	| '(' exp comma expression_list r_paren
665		{ $$ = node($2, Node_expression_list, $4); }
666	;
667
668output_redir
669	: /* empty */
670	  {
671		in_print = FALSE;
672		in_parens = 0;
673		$$ = NULL;
674	  }
675	| IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
676	  {
677		$$ = node($3, $1, (NODE *) NULL);
678		if ($1 == Node_redirect_twoway
679		    && $3->type == Node_K_getline
680		    && $3->rnode->type == Node_redirect_twoway)
681			yyerror(_("multistage two-way pipelines don't work"));
682	  }
683	;
684
685if_statement
686	: LEX_IF '(' exp r_paren opt_nls statement
687	  {
688		$$ = node($3, Node_K_if,
689			node($6, Node_if_branches, (NODE *) NULL));
690	  }
691	| LEX_IF '(' exp r_paren opt_nls statement
692	     LEX_ELSE opt_nls statement
693		{ $$ = node($3, Node_K_if,
694				node($6, Node_if_branches, $9)); }
695	;
696
697nls
698	: NEWLINE
699	| nls NEWLINE
700	;
701
702opt_nls
703	: /* empty */
704	| nls
705	;
706
707input_redir
708	: /* empty */
709		{ $$ = NULL; }
710	| '<' simp_exp
711		{ $$ = node($2, Node_redirect_input, (NODE *) NULL); }
712	;
713
714opt_param_list
715	: /* empty */
716		{ $$ = NULL; }
717	| param_list
718		{ $$ = $1; }
719	;
720
721param_list
722	: NAME
723		{ $$ = make_param($1); }
724	| param_list comma NAME
725		{ $$ = append_right($1, make_param($3)); yyerrok; }
726	| error
727		{ $$ = NULL; }
728	| param_list error
729		{ $$ = NULL; }
730	| param_list comma error
731		{ $$ = NULL; }
732	;
733
734/* optional expression, as in for loop */
735opt_exp
736	: /* empty */
737		{ $$ = NULL; }
738	| exp
739		{ $$ = $1; }
740	;
741
742opt_expression_list
743	: /* empty */
744		{ $$ = NULL; }
745	| expression_list
746		{ $$ = $1; }
747	;
748
749expression_list
750	: exp
751		{ $$ = node($1, Node_expression_list, (NODE *) NULL); }
752	| expression_list comma exp
753		{
754			$$ = append_right($1,
755				node($3, Node_expression_list, (NODE *) NULL));
756			yyerrok;
757		}
758	| error
759		{ $$ = NULL; }
760	| expression_list error
761		{ $$ = NULL; }
762	| expression_list error exp
763		{ $$ = NULL; }
764	| expression_list comma error
765		{ $$ = NULL; }
766	;
767
768/* Expressions, not including the comma operator.  */
769exp	: variable assign_operator exp %prec ASSIGNOP
770		{
771		  if (do_lint && $3->type == Node_regex)
772			lintwarn(_("regular expression on right of assignment"));
773		  $$ = node($1, $2, $3);
774		}
775	| exp LEX_AND exp
776		{ $$ = node($1, Node_and, $3); }
777	| exp LEX_OR exp
778		{ $$ = node($1, Node_or, $3); }
779	| exp MATCHOP exp
780		{
781		  if ($1->type == Node_regex)
782			warning(_("regular expression on left of `~' or `!~' operator"));
783		  $$ = node($1, $2, mk_rexp($3));
784		}
785	| exp LEX_IN NAME
786		{ $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
787	| exp a_relop exp %prec RELOP
788		{
789		  if (do_lint && $3->type == Node_regex)
790			lintwarn(_("regular expression on right of comparison"));
791		  $$ = node($1, $2, $3);
792		}
793	| exp '?' exp ':' exp
794		{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
795	| common_exp
796		{ $$ = $1; }
797	;
798
799assign_operator
800	: ASSIGN
801		{ $$ = $1; }
802	| ASSIGNOP
803		{ $$ = $1; }
804	| SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */
805		{ $$ = Node_assign_quotient; }
806	;
807
808relop_or_less
809	: RELOP
810		{ $$ = $1; }
811	| '<'
812		{ $$ = Node_less; }
813	;
814a_relop
815	: relop_or_less
816	| '>'
817		{ $$ = Node_greater; }
818	;
819
820common_exp
821	: regexp
822		{ $$ = $1; }
823	| '!' regexp %prec UNARY
824		{
825		  $$ = node(node(make_number(0.0),
826				 Node_field_spec,
827				 (NODE *) NULL),
828		            Node_nomatch,
829			    $2);
830		}
831	| '(' expression_list r_paren LEX_IN NAME
832		{ $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
833	| simp_exp
834		{ $$ = $1; }
835	| common_exp simp_exp %prec CONCAT_OP
836		{ $$ = node($1, Node_concat, $2); }
837	;
838
839simp_exp
840	: non_post_simp_exp
841	/* Binary operators in order of decreasing precedence.  */
842	| simp_exp '^' simp_exp
843		{ $$ = node($1, Node_exp, $3); }
844	| simp_exp '*' simp_exp
845		{ $$ = node($1, Node_times, $3); }
846	| simp_exp '/' simp_exp
847		{ $$ = node($1, Node_quotient, $3); }
848	| simp_exp '%' simp_exp
849		{ $$ = node($1, Node_mod, $3); }
850	| simp_exp '+' simp_exp
851		{ $$ = node($1, Node_plus, $3); }
852	| simp_exp '-' simp_exp
853		{ $$ = node($1, Node_minus, $3); }
854	| LEX_GETLINE opt_variable input_redir
855		{
856		  if (do_lint && parsing_end_rule && $3 == NULL)
857			lintwarn(_("non-redirected `getline' undefined inside END action"));
858		  $$ = node($2, Node_K_getline, $3);
859		}
860	| simp_exp IO_IN LEX_GETLINE opt_variable
861		{
862		  $$ = node($4, Node_K_getline,
863			 node($1, $2, (NODE *) NULL));
864		}
865	| variable INCREMENT
866		{ $$ = node($1, Node_postincrement, (NODE *) NULL); }
867	| variable DECREMENT
868		{ $$ = node($1, Node_postdecrement, (NODE *) NULL); }
869	;
870
871non_post_simp_exp
872	: '!' simp_exp %prec UNARY
873		{ $$ = node($2, Node_not, (NODE *) NULL); }
874	| '(' exp r_paren
875		{ $$ = $2; }
876	| LEX_BUILTIN
877	  '(' opt_expression_list r_paren
878		{ $$ = snode($3, Node_builtin, (int) $1); }
879	| LEX_LENGTH '(' opt_expression_list r_paren
880		{ $$ = snode($3, Node_builtin, (int) $1); }
881	| LEX_LENGTH
882	  {
883		if (do_lint)
884			lintwarn(_("call of `length' without parentheses is not portable"));
885		$$ = snode((NODE *) NULL, Node_builtin, (int) $1);
886		if (do_posix)
887			warning(_("call of `length' without parentheses is deprecated by POSIX"));
888	  }
889	| FUNC_CALL '(' opt_expression_list r_paren
890	  {
891		$$ = node($3, Node_func_call, make_string($1, strlen($1)));
892		$$->funcbody = NULL;
893		func_use($1, FUNC_USE);
894		param_sanity($3);
895		free($1);
896	  }
897	| variable
898	| INCREMENT variable
899		{ $$ = node($2, Node_preincrement, (NODE *) NULL); }
900	| DECREMENT variable
901		{ $$ = node($2, Node_predecrement, (NODE *) NULL); }
902	| YNUMBER
903		{ $$ = $1; }
904	| YSTRING
905		{ $$ = $1; }
906
907	| '-' simp_exp    %prec UNARY
908		{
909		  if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) {
910			$2->numbr = -(force_number($2));
911			$$ = $2;
912		  } else
913			$$ = node($2, Node_unary_minus, (NODE *) NULL);
914		}
915	| '+' simp_exp    %prec UNARY
916		{
917		  /*
918		   * was: $$ = $2
919		   * POSIX semantics: force a conversion to numeric type
920		   */
921		  $$ = node (make_number(0.0), Node_plus, $2);
922		}
923	;
924
925opt_variable
926	: /* empty */
927		{ $$ = NULL; }
928	| variable
929		{ $$ = $1; }
930	;
931
932variable
933	: NAME
934		{ $$ = variable($1, CAN_FREE, Node_var_new); }
935	| NAME '[' expression_list ']'
936	  {
937		NODE *n;
938
939		if ((n = lookup($1)) != NULL && ! isarray(n))
940			yyerror(_("use of non-array as array"));
941		else if ($3 == NULL) {
942			fatal(_("invalid subscript expression"));
943		} else if ($3->rnode == NULL) {
944			$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
945			freenode($3);
946		} else
947			$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
948	  }
949	| '$' non_post_simp_exp
950		{ $$ = node($2, Node_field_spec, (NODE *) NULL); }
951	;
952
953l_brace
954	: '{' opt_nls
955	;
956
957r_brace
958	: '}' opt_nls	{ yyerrok; }
959	;
960
961r_paren
962	: ')' { yyerrok; }
963	;
964
965opt_semi
966	: /* empty */
967	| semi
968	;
969
970semi
971	: ';'	{ yyerrok; }
972	;
973
974colon
975	: ':'	{ yyerrok; }
976	;
977
978comma	: ',' opt_nls	{ yyerrok; }
979	;
980
981%%
982
983struct token {
984	const char *operator;		/* text to match */
985	NODETYPE value;		/* node type */
986	int class;		/* lexical class */
987	unsigned flags;		/* # of args. allowed and compatability */
988#	define	ARGS	0xFF	/* 0, 1, 2, 3 args allowed (any combination */
989#	define	A(n)	(1<<(n))
990#	define	VERSION_MASK	0xFF00	/* old awk is zero */
991#	define	NOT_OLD		0x0100	/* feature not in old awk */
992#	define	NOT_POSIX	0x0200	/* feature not in POSIX */
993#	define	GAWKX		0x0400	/* gawk extension */
994#	define	RESX		0x0800	/* Bell Labs Research extension */
995	NODE *(*ptr) P((NODE *));	/* function that implements this keyword */
996};
997
998/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
999/* Function pointers come from declarations in awk.h. */
1000
1001static const struct token tokentab[] = {
1002{"BEGIN",	Node_illegal,	 LEX_BEGIN,	0,		0},
1003{"END",		Node_illegal,	 LEX_END,	0,		0},
1004#ifdef ARRAYDEBUG
1005{"adump",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_adump},
1006#endif
1007{"and",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_and},
1008{"asort",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2),	do_asort},
1009{"asorti",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2),	do_asorti},
1010{"atan2",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2),	do_atan2},
1011{"bindtextdomain",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2),	do_bindtextdomain},
1012{"break",	Node_K_break,	 LEX_BREAK,	0,		0},
1013#ifdef ALLOW_SWITCH
1014{"case",	Node_K_case,	 LEX_CASE,	GAWKX,		0},
1015#endif
1016{"close",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1)|A(2),	do_close},
1017{"compl",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_compl},
1018{"continue",	Node_K_continue, LEX_CONTINUE,	0,		0},
1019{"cos",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_cos},
1020{"dcgettext",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3),	do_dcgettext},
1021{"dcngettext",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3)|A(4)|A(5),	do_dcngettext},
1022#ifdef ALLOW_SWITCH
1023{"default",	Node_K_default,	 LEX_DEFAULT,	GAWKX,		0},
1024#endif
1025{"delete",	Node_K_delete,	 LEX_DELETE,	NOT_OLD,	0},
1026{"do",		Node_K_do,	 LEX_DO,	NOT_OLD,	0},
1027{"else",	Node_illegal,	 LEX_ELSE,	0,		0},
1028{"exit",	Node_K_exit,	 LEX_EXIT,	0,		0},
1029{"exp",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_exp},
1030{"extension",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(2),	do_ext},
1031{"fflush",	Node_builtin,	 LEX_BUILTIN,	RESX|A(0)|A(1), do_fflush},
1032{"for",		Node_K_for,	 LEX_FOR,	0,		0},
1033{"func",	Node_K_function, LEX_FUNCTION,	NOT_POSIX|NOT_OLD,	0},
1034{"function",	Node_K_function, LEX_FUNCTION,	NOT_OLD,	0},
1035{"gensub",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(3)|A(4), do_gensub},
1036{"getline",	Node_K_getline,	 LEX_GETLINE,	NOT_OLD,	0},
1037{"gsub",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_gsub},
1038{"if",		Node_K_if,	 LEX_IF,	0,		0},
1039{"in",		Node_illegal,	 LEX_IN,	0,		0},
1040{"index",	Node_builtin,	 LEX_BUILTIN,	A(2),		do_index},
1041{"int",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_int},
1042{"length",	Node_builtin,	 LEX_LENGTH,	A(0)|A(1),	do_length},
1043{"log",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_log},
1044{"lshift",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_lshift},
1045{"match",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_match},
1046{"mktime",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(1),	do_mktime},
1047{"next",	Node_K_next,	 LEX_NEXT,	0,		0},
1048{"nextfile",	Node_K_nextfile, LEX_NEXTFILE,	GAWKX,		0},
1049{"or",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_or},
1050{"print",	Node_K_print,	 LEX_PRINT,	0,		0},
1051{"printf",	Node_K_printf,	 LEX_PRINTF,	0,		0},
1052{"rand",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0),	do_rand},
1053{"return",	Node_K_return,	 LEX_RETURN,	NOT_OLD,	0},
1054{"rshift",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_rshift},
1055{"sin",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_sin},
1056{"split",	Node_builtin,	 LEX_BUILTIN,	A(2)|A(3),	do_split},
1057{"sprintf",	Node_builtin,	 LEX_BUILTIN,	0,		do_sprintf},
1058{"sqrt",	Node_builtin,	 LEX_BUILTIN,	A(1),		do_sqrt},
1059{"srand",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0)|A(1), do_srand},
1060#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1061{"stopme",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(0),	stopme},
1062#endif
1063{"strftime",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(0)|A(1)|A(2), do_strftime},
1064{"strtonum",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_strtonum},
1065{"sub",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_sub},
1066{"substr",	Node_builtin,	 LEX_BUILTIN,	A(2)|A(3),	do_substr},
1067#ifdef ALLOW_SWITCH
1068{"switch",	Node_K_switch,	 LEX_SWITCH,	GAWKX,		0},
1069#endif
1070{"system",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_system},
1071{"systime",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(0),	do_systime},
1072{"tolower",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_tolower},
1073{"toupper",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_toupper},
1074{"while",	Node_K_while,	 LEX_WHILE,	0,		0},
1075{"xor",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_xor},
1076};
1077
1078#ifdef MBS_SUPPORT
1079/* Variable containing the current shift state.  */
1080static mbstate_t cur_mbstate;
1081/* Ring buffer containing current characters.  */
1082#define MAX_CHAR_IN_RING_BUFFER 8
1083#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1084static char cur_char_ring[RING_BUFFER_SIZE];
1085/* Index for ring buffers.  */
1086static int cur_ring_idx;
1087/* This macro means that last nextc() return a singlebyte character
1088   or 1st byte of a multibyte character.  */
1089#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1090#endif /* MBS_SUPPORT */
1091
1092/* getfname --- return name of a builtin function (for pretty printing) */
1093
1094const char *
1095getfname(register NODE *(*fptr)(NODE *))
1096{
1097	register int i, j;
1098
1099	j = sizeof(tokentab) / sizeof(tokentab[0]);
1100	/* linear search, no other way to do it */
1101	for (i = 0; i < j; i++)
1102		if (tokentab[i].ptr == fptr)
1103			return tokentab[i].operator;
1104
1105	return NULL;
1106}
1107
1108/* yyerror --- print a syntax error message, show where */
1109
1110/*
1111 * Function identifier purposely indented to avoid mangling
1112 * by ansi2knr.  Sigh.
1113 */
1114
1115static void
1116#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1117  yyerror(const char *m, ...)
1118#else
1119/* VARARGS0 */
1120  yyerror(va_alist)
1121  va_dcl
1122#endif
1123{
1124	va_list args;
1125	const char *mesg = NULL;
1126	register char *bp, *cp;
1127	char *scan;
1128	char *buf;
1129	int count;
1130	static char end_of_file_line[] = "(END OF FILE)";
1131	char save;
1132
1133	errcount++;
1134	/* Find the current line in the input file */
1135	if (lexptr && lexeme) {
1136		if (thisline == NULL) {
1137			cp = lexeme;
1138			if (*cp == '\n') {
1139				cp--;
1140				mesg = _("unexpected newline or end of string");
1141			}
1142			for (; cp != lexptr_begin && *cp != '\n'; --cp)
1143				continue;
1144			if (*cp == '\n')
1145				cp++;
1146			thisline = cp;
1147		}
1148		/* NL isn't guaranteed */
1149		bp = lexeme;
1150		while (bp < lexend && *bp && *bp != '\n')
1151			bp++;
1152	} else {
1153		thisline = end_of_file_line;
1154		bp = thisline + strlen(thisline);
1155	}
1156
1157	/*
1158	 * Saving and restoring *bp keeps valgrind happy,
1159	 * since the guts of glibc uses strlen, even though
1160	 * we're passing an explict precision. Sigh.
1161	 */
1162	save = *bp;
1163	*bp = '\0';
1164
1165	msg("%.*s", (int) (bp - thisline), thisline);
1166
1167	*bp = save;
1168
1169#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1170	va_start(args, m);
1171	if (mesg == NULL)
1172		mesg = m;
1173#else
1174	va_start(args);
1175	if (mesg == NULL)
1176		mesg = va_arg(args, char *);
1177#endif
1178	count = (bp - thisline) + strlen(mesg) + 2 + 1;
1179	emalloc(buf, char *, count, "yyerror");
1180
1181	bp = buf;
1182
1183	if (lexptr != NULL) {
1184		scan = thisline;
1185		while (scan < lexeme)
1186			if (*scan++ == '\t')
1187				*bp++ = '\t';
1188			else
1189				*bp++ = ' ';
1190		*bp++ = '^';
1191		*bp++ = ' ';
1192	}
1193	strcpy(bp, mesg);
1194	err("", buf, args);
1195	va_end(args);
1196	free(buf);
1197}
1198
1199/* get_src_buf --- read the next buffer of source program */
1200
1201static char *
1202get_src_buf()
1203{
1204	static int samefile = FALSE;
1205	static int nextfile = 0;
1206	static char *buf = NULL;
1207	static int fd;
1208	int n;
1209	register char *scan;
1210	static size_t len = 0;
1211	static int did_newline = FALSE;
1212	int newfile;
1213	struct stat sbuf;
1214
1215#	define	SLOP	128	/* enough space to hold most source lines */
1216
1217again:
1218	newfile = FALSE;
1219	if (nextfile > numfiles)
1220		return NULL;
1221
1222	if (srcfiles[nextfile].stype == CMDLINE) {
1223		if (len == 0) {
1224			len = strlen(srcfiles[nextfile].val);
1225			if (len == 0) {
1226				/*
1227				 * Yet Another Special case:
1228				 *	gawk '' /path/name
1229				 * Sigh.
1230				 */
1231				static int warned = FALSE;
1232
1233				if (do_lint && ! warned) {
1234					warned = TRUE;
1235					lintwarn(_("empty program text on command line"));
1236				}
1237				++nextfile;
1238				goto again;
1239			}
1240			sourceline = 1;
1241			lexptr = lexptr_begin = srcfiles[nextfile].val;
1242			lexend = lexptr + len;
1243		} else if (! did_newline && *(lexptr-1) != '\n') {
1244			/*
1245			 * The following goop is to ensure that the source
1246			 * ends with a newline and that the entire current
1247			 * line is available for error messages.
1248			 */
1249			int offset;
1250
1251			did_newline = TRUE;
1252			offset = lexptr - lexeme;
1253			for (scan = lexeme; scan > lexptr_begin; scan--)
1254				if (*scan == '\n') {
1255					scan++;
1256					break;
1257				}
1258			len = lexptr - scan;
1259			emalloc(buf, char *, len+1, "get_src_buf");
1260			memcpy(buf, scan, len);
1261			thisline = buf;
1262			lexptr = buf + len;
1263			*lexptr = '\n';
1264			lexeme = lexptr - offset;
1265			lexptr_begin = buf;
1266			lexend = lexptr + 1;
1267		} else {
1268			len = 0;
1269			lexeme = lexptr = lexptr_begin = NULL;
1270		}
1271		if (lexptr == NULL && ++nextfile <= numfiles)
1272			goto again;
1273		return lexptr;
1274	}
1275	if (! samefile) {
1276		source = srcfiles[nextfile].val;
1277		if (source == NULL) {
1278			if (buf != NULL) {
1279				free(buf);
1280				buf = NULL;
1281			}
1282			len = 0;
1283			return lexeme = lexptr = lexptr_begin = NULL;
1284		}
1285		fd = pathopen(source);
1286		if (fd <= INVALID_HANDLE) {
1287			char *in;
1288
1289			/* suppress file name and line no. in error mesg */
1290			in = source;
1291			source = NULL;
1292			fatal(_("can't open source file `%s' for reading (%s)"),
1293				in, strerror(errno));
1294		}
1295		len = optimal_bufsize(fd, & sbuf);
1296		newfile = TRUE;
1297		if (buf != NULL)
1298			free(buf);
1299		emalloc(buf, char *, len + SLOP, "get_src_buf");
1300		lexptr_begin = buf + SLOP;
1301		samefile = TRUE;
1302		sourceline = 1;
1303	} else {
1304		/*
1305		 * Here, we retain the current source line (up to length SLOP)
1306		 * in the beginning of the buffer that was overallocated above
1307		 */
1308		int offset;
1309		int linelen;
1310
1311		offset = lexptr - lexeme;
1312		for (scan = lexeme; scan > lexptr_begin; scan--)
1313			if (*scan == '\n') {
1314				scan++;
1315				break;
1316			}
1317		linelen = lexptr - scan;
1318		if (linelen > SLOP)
1319			linelen = SLOP;
1320		thisline = buf + SLOP - linelen;
1321		memcpy(thisline, scan, linelen);
1322		lexeme = buf + SLOP - offset;
1323		lexptr_begin = thisline;
1324	}
1325	n = read(fd, buf + SLOP, len);
1326	if (n == -1)
1327		fatal(_("can't read sourcefile `%s' (%s)"),
1328			source, strerror(errno));
1329	if (n == 0) {
1330		if (newfile) {
1331			static int warned = FALSE;
1332
1333			if (do_lint && ! warned) {
1334				warned = TRUE;
1335				lintwarn(_("source file `%s' is empty"), source);
1336			}
1337		}
1338		if (fd != fileno(stdin)) /* safety */
1339			close(fd);
1340		samefile = FALSE;
1341		nextfile++;
1342		if (lexeme)
1343			*lexeme = '\0';
1344		len = 0;
1345		goto again;
1346	}
1347	lexptr = buf + SLOP;
1348	lexend = lexptr + n;
1349	return buf;
1350}
1351
1352/* tokadd --- add a character to the token buffer */
1353
1354#define	tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
1355
1356/* tokexpand --- grow the token buffer */
1357
1358char *
1359tokexpand()
1360{
1361	static int toksize = 60;
1362	int tokoffset;
1363
1364	tokoffset = tok - tokstart;
1365	toksize *= 2;
1366	if (tokstart != NULL)
1367		erealloc(tokstart, char *, toksize, "tokexpand");
1368	else
1369		emalloc(tokstart, char *, toksize, "tokexpand");
1370	tokend = tokstart + toksize;
1371	tok = tokstart + tokoffset;
1372	return tok;
1373}
1374
1375/* nextc --- get the next input character */
1376
1377#ifdef MBS_SUPPORT
1378
1379static int
1380nextc(void)
1381{
1382	if (gawk_mb_cur_max > 1)	{
1383		/* Update the buffer index.  */
1384		cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
1385			cur_ring_idx + 1;
1386
1387		/* Did we already check the current character?  */
1388		if (cur_char_ring[cur_ring_idx] == 0) {
1389			/* No, we need to check the next character on the buffer.  */
1390			int idx, work_ring_idx = cur_ring_idx;
1391			mbstate_t tmp_state;
1392			size_t mbclen;
1393
1394			if (!lexptr || lexptr >= lexend)
1395				if (!get_src_buf()) {
1396					return EOF;
1397				}
1398
1399			for (idx = 0 ; lexptr + idx < lexend ; idx++) {
1400				tmp_state = cur_mbstate;
1401				mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
1402
1403				if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
1404					/* It is a singlebyte character, non-complete multibyte
1405					   character or EOF.  We treat it as a singlebyte
1406					   character.  */
1407					cur_char_ring[work_ring_idx] = 1;
1408					break;
1409				} else if (mbclen == (size_t)-2) {
1410					/* It is not a complete multibyte character.  */
1411					cur_char_ring[work_ring_idx] = idx + 1;
1412				} else {
1413					/* mbclen > 1 */
1414					cur_char_ring[work_ring_idx] = mbclen;
1415					break;
1416				}
1417				work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1418					0 : work_ring_idx + 1;
1419			}
1420			cur_mbstate = tmp_state;
1421
1422			/* Put a mark on the position on which we write next character.  */
1423			work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1424				0 : work_ring_idx + 1;
1425			cur_char_ring[work_ring_idx] = 0;
1426		}
1427
1428		return (int) (unsigned char) *lexptr++;
1429	}
1430	else {
1431		int c;
1432
1433		if (lexptr && lexptr < lexend)
1434			c = (int) (unsigned char) *lexptr++;
1435		else if (get_src_buf())
1436			c = (int) (unsigned char) *lexptr++;
1437		else
1438			c = EOF;
1439
1440		return c;
1441	}
1442}
1443
1444#else /* MBS_SUPPORT */
1445
1446#if GAWKDEBUG
1447int
1448nextc(void)
1449{
1450	int c;
1451
1452	if (lexptr && lexptr < lexend)
1453		c = (int) (unsigned char) *lexptr++;
1454	else if (get_src_buf())
1455		c = (int) (unsigned char) *lexptr++;
1456	else
1457		c = EOF;
1458
1459	return c;
1460}
1461#else
1462#define	nextc()	((lexptr && lexptr < lexend) ? \
1463		    ((int) (unsigned char) *lexptr++) : \
1464		    (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
1465		)
1466#endif
1467
1468#endif /* MBS_SUPPORT */
1469
1470/* pushback --- push a character back on the input */
1471
1472#ifdef MBS_SUPPORT
1473
1474static void
1475pushback(void)
1476{
1477	if (gawk_mb_cur_max > 1) {
1478		cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
1479			cur_ring_idx - 1;
1480		(lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1481	} else
1482		(lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1483}
1484
1485#else
1486
1487#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
1488
1489#endif /* MBS_SUPPORT */
1490
1491/* allow_newline --- allow newline after &&, ||, ? and : */
1492
1493static void
1494allow_newline(void)
1495{
1496	int c;
1497
1498	for (;;) {
1499		c = nextc();
1500		if (c == EOF)
1501			break;
1502		if (c == '#') {
1503			while ((c = nextc()) != '\n' && c != EOF)
1504				continue;
1505			if (c == EOF)
1506				break;
1507		}
1508		if (c == '\n')
1509			sourceline++;
1510		if (! ISSPACE(c)) {
1511			pushback();
1512			break;
1513		}
1514	}
1515}
1516
1517/* yylex --- Read the input and turn it into tokens. */
1518
1519static int
1520yylex(void)
1521{
1522	register int c;
1523	int seen_e = FALSE;		/* These are for numbers */
1524	int seen_point = FALSE;
1525	int esc_seen;		/* for literal strings */
1526	int low, mid, high;
1527	static int did_newline = FALSE;
1528	char *tokkey;
1529	static int lasttok = 0, eof_warned = FALSE;
1530	int inhex = FALSE;
1531	int intlstr = FALSE;
1532
1533	if (nextc() == EOF) {
1534		if (lasttok != NEWLINE) {
1535			lasttok = NEWLINE;
1536			if (do_lint && ! eof_warned) {
1537				lintwarn(_("source file does not end in newline"));
1538				eof_warned = TRUE;
1539			}
1540			return NEWLINE;	/* fake it */
1541		}
1542		return 0;
1543	}
1544	pushback();
1545#if defined OS2 || defined __EMX__
1546	/*
1547	 * added for OS/2's extproc feature of cmd.exe
1548	 * (like #! in BSD sh)
1549	 */
1550	if (strncasecmp(lexptr, "extproc ", 8) == 0) {
1551		while (*lexptr && *lexptr != '\n')
1552			lexptr++;
1553	}
1554#endif
1555	lexeme = lexptr;
1556	thisline = NULL;
1557	if (want_regexp) {
1558		int in_brack = 0;	/* count brackets, [[:alnum:]] allowed */
1559		/*
1560		 * Counting brackets is non-trivial. [[] is ok,
1561		 * and so is [\]], with a point being that /[/]/ as a regexp
1562		 * constant has to work.
1563		 *
1564		 * Do not count [ or ] if either one is preceded by a \.
1565		 * A `[' should be counted if
1566		 *  a) it is the first one so far (in_brack == 0)
1567		 *  b) it is the `[' in `[:'
1568		 * A ']' should be counted if not preceded by a \, since
1569		 * it is either closing `:]' or just a plain list.
1570		 * According to POSIX, []] is how you put a ] into a set.
1571		 * Try to handle that too.
1572		 *
1573		 * The code for \ handles \[ and \].
1574		 */
1575
1576		want_regexp = FALSE;
1577		tok = tokstart;
1578		for (;;) {
1579			c = nextc();
1580#ifdef MBS_SUPPORT
1581			if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1582#endif
1583			switch (c) {
1584			case '[':
1585				/* one day check for `.' and `=' too */
1586				if (nextc() == ':' || in_brack == 0)
1587					in_brack++;
1588				pushback();
1589				break;
1590			case ']':
1591				if (tokstart[0] == '['
1592				    && (tok == tokstart + 1
1593					|| (tok == tokstart + 2
1594					    && tokstart[1] == '^')))
1595					/* do nothing */;
1596				else
1597					in_brack--;
1598				break;
1599			case '\\':
1600				if ((c = nextc()) == EOF) {
1601					yyerror(_("unterminated regexp ends with `\\' at end of file"));
1602					goto end_regexp; /* kludge */
1603				} else if (c == '\n') {
1604					sourceline++;
1605					continue;
1606				} else {
1607					tokadd('\\');
1608					tokadd(c);
1609					continue;
1610				}
1611				break;
1612			case '/':	/* end of the regexp */
1613				if (in_brack > 0)
1614					break;
1615end_regexp:
1616				tokadd('\0');
1617				yylval.sval = tokstart;
1618				return lasttok = REGEXP;
1619			case '\n':
1620				pushback();
1621				yyerror(_("unterminated regexp"));
1622				goto end_regexp;	/* kludge */
1623			case EOF:
1624				yyerror(_("unterminated regexp at end of file"));
1625				goto end_regexp;	/* kludge */
1626			}
1627			tokadd(c);
1628		}
1629	}
1630retry:
1631	while ((c = nextc()) == ' ' || c == '\t')
1632		continue;
1633
1634	lexeme = lexptr ? lexptr - 1 : lexptr;
1635	thisline = NULL;
1636	tok = tokstart;
1637	yylval.nodetypeval = Node_illegal;
1638
1639#ifdef MBS_SUPPORT
1640	if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1641#endif
1642	switch (c) {
1643	case EOF:
1644		if (lasttok != NEWLINE) {
1645			lasttok = NEWLINE;
1646			if (do_lint && ! eof_warned) {
1647				lintwarn(_("source file does not end in newline"));
1648				eof_warned = TRUE;
1649			}
1650			return NEWLINE;	/* fake it */
1651		}
1652		return 0;
1653
1654	case '\n':
1655		sourceline++;
1656		return lasttok = NEWLINE;
1657
1658	case '#':		/* it's a comment */
1659		while ((c = nextc()) != '\n') {
1660			if (c == EOF) {
1661				if (lasttok != NEWLINE) {
1662					lasttok = NEWLINE;
1663					if (do_lint && ! eof_warned) {
1664						lintwarn(
1665				_("source file does not end in newline"));
1666						eof_warned = TRUE;
1667					}
1668					return NEWLINE;	/* fake it */
1669				}
1670				return 0;
1671			}
1672		}
1673		sourceline++;
1674		return lasttok = NEWLINE;
1675
1676	case '\\':
1677#ifdef RELAXED_CONTINUATION
1678		/*
1679		 * This code puports to allow comments and/or whitespace
1680		 * after the `\' at the end of a line used for continuation.
1681		 * Use it at your own risk. We think it's a bad idea, which
1682		 * is why it's not on by default.
1683		 */
1684		if (! do_traditional) {
1685			/* strip trailing white-space and/or comment */
1686			while ((c = nextc()) == ' ' || c == '\t')
1687				continue;
1688			if (c == '#') {
1689				if (do_lint)
1690					lintwarn(
1691		_("use of `\\ #...' line continuation is not portable"));
1692				while ((c = nextc()) != '\n')
1693					if (c == EOF)
1694						break;
1695			}
1696			pushback();
1697		}
1698#endif /* RELAXED_CONTINUATION */
1699		if (nextc() == '\n') {
1700			sourceline++;
1701			goto retry;
1702		} else {
1703			yyerror(_("backslash not last character on line"));
1704			exit(1);
1705		}
1706		break;
1707
1708	case ':':
1709	case '?':
1710		if (! do_posix)
1711			allow_newline();
1712		return lasttok = c;
1713
1714		/*
1715		 * in_parens is undefined unless we are parsing a print
1716		 * statement (in_print), but why bother with a check?
1717		 */
1718	case ')':
1719		in_parens--;
1720		return lasttok = c;
1721
1722	case '(':
1723		in_parens++;
1724		/* FALL THROUGH */
1725	case '$':
1726	case ';':
1727	case '{':
1728	case ',':
1729	case '[':
1730	case ']':
1731		return lasttok = c;
1732
1733	case '*':
1734		if ((c = nextc()) == '=') {
1735			yylval.nodetypeval = Node_assign_times;
1736			return lasttok = ASSIGNOP;
1737		} else if (do_posix) {
1738			pushback();
1739			return lasttok = '*';
1740		} else if (c == '*') {
1741			/* make ** and **= aliases for ^ and ^= */
1742			static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1743
1744			if (nextc() == '=') {
1745				if (! did_warn_assgn) {
1746					did_warn_assgn = TRUE;
1747					if (do_lint)
1748						lintwarn(_("POSIX does not allow operator `**='"));
1749					if (do_lint_old)
1750						warning(_("old awk does not support operator `**='"));
1751				}
1752				yylval.nodetypeval = Node_assign_exp;
1753				return ASSIGNOP;
1754			} else {
1755				pushback();
1756				if (! did_warn_op) {
1757					did_warn_op = TRUE;
1758					if (do_lint)
1759						lintwarn(_("POSIX does not allow operator `**'"));
1760					if (do_lint_old)
1761						warning(_("old awk does not support operator `**'"));
1762				}
1763				return lasttok = '^';
1764			}
1765		}
1766		pushback();
1767		return lasttok = '*';
1768
1769	case '/':
1770		if (nextc() == '=') {
1771			pushback();
1772			return lasttok = SLASH_BEFORE_EQUAL;
1773		}
1774		pushback();
1775		return lasttok = '/';
1776
1777	case '%':
1778		if (nextc() == '=') {
1779			yylval.nodetypeval = Node_assign_mod;
1780			return lasttok = ASSIGNOP;
1781		}
1782		pushback();
1783		return lasttok = '%';
1784
1785	case '^':
1786	{
1787		static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1788
1789		if (nextc() == '=') {
1790			if (do_lint_old && ! did_warn_assgn) {
1791				did_warn_assgn = TRUE;
1792				warning(_("operator `^=' is not supported in old awk"));
1793			}
1794			yylval.nodetypeval = Node_assign_exp;
1795			return lasttok = ASSIGNOP;
1796		}
1797		pushback();
1798		if (do_lint_old && ! did_warn_op) {
1799			did_warn_op = TRUE;
1800			warning(_("operator `^' is not supported in old awk"));
1801		}
1802		return lasttok = '^';
1803	}
1804
1805	case '+':
1806		if ((c = nextc()) == '=') {
1807			yylval.nodetypeval = Node_assign_plus;
1808			return lasttok = ASSIGNOP;
1809		}
1810		if (c == '+')
1811			return lasttok = INCREMENT;
1812		pushback();
1813		return lasttok = '+';
1814
1815	case '!':
1816		if ((c = nextc()) == '=') {
1817			yylval.nodetypeval = Node_notequal;
1818			return lasttok = RELOP;
1819		}
1820		if (c == '~') {
1821			yylval.nodetypeval = Node_nomatch;
1822			return lasttok = MATCHOP;
1823		}
1824		pushback();
1825		return lasttok = '!';
1826
1827	case '<':
1828		if (nextc() == '=') {
1829			yylval.nodetypeval = Node_leq;
1830			return lasttok = RELOP;
1831		}
1832		yylval.nodetypeval = Node_less;
1833		pushback();
1834		return lasttok = '<';
1835
1836	case '=':
1837		if (nextc() == '=') {
1838			yylval.nodetypeval = Node_equal;
1839			return lasttok = RELOP;
1840		}
1841		yylval.nodetypeval = Node_assign;
1842		pushback();
1843		return lasttok = ASSIGN;
1844
1845	case '>':
1846		if ((c = nextc()) == '=') {
1847			yylval.nodetypeval = Node_geq;
1848			return lasttok = RELOP;
1849		} else if (c == '>') {
1850			yylval.nodetypeval = Node_redirect_append;
1851			return lasttok = IO_OUT;
1852		}
1853		pushback();
1854		if (in_print && in_parens == 0) {
1855			yylval.nodetypeval = Node_redirect_output;
1856			return lasttok = IO_OUT;
1857		}
1858		yylval.nodetypeval = Node_greater;
1859		return lasttok = '>';
1860
1861	case '~':
1862		yylval.nodetypeval = Node_match;
1863		return lasttok = MATCHOP;
1864
1865	case '}':
1866		/*
1867		 * Added did newline stuff.  Easier than
1868		 * hacking the grammar.
1869		 */
1870		if (did_newline) {
1871			did_newline = FALSE;
1872			return lasttok = c;
1873		}
1874		did_newline++;
1875		--lexptr;	/* pick up } next time */
1876		return lasttok = NEWLINE;
1877
1878	case '"':
1879	string:
1880		esc_seen = FALSE;
1881		while ((c = nextc()) != '"') {
1882			if (c == '\n') {
1883				pushback();
1884				yyerror(_("unterminated string"));
1885				exit(1);
1886			}
1887#ifdef MBS_SUPPORT
1888			if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1889#endif
1890			if (c == '\\') {
1891				c = nextc();
1892				if (c == '\n') {
1893					sourceline++;
1894					continue;
1895				}
1896				esc_seen = TRUE;
1897				tokadd('\\');
1898			}
1899			if (c == EOF) {
1900				pushback();
1901				yyerror(_("unterminated string"));
1902				exit(1);
1903			}
1904			tokadd(c);
1905		}
1906		yylval.nodeval = make_str_node(tokstart,
1907					tok - tokstart, esc_seen ? SCAN : 0);
1908		yylval.nodeval->flags |= PERM;
1909		if (intlstr) {
1910			yylval.nodeval->flags |= INTLSTR;
1911			intlstr = FALSE;
1912			if (do_intl)
1913				dumpintlstr(yylval.nodeval->stptr,
1914						yylval.nodeval->stlen);
1915 		}
1916		return lasttok = YSTRING;
1917
1918	case '-':
1919		if ((c = nextc()) == '=') {
1920			yylval.nodetypeval = Node_assign_minus;
1921			return lasttok = ASSIGNOP;
1922		}
1923		if (c == '-')
1924			return lasttok = DECREMENT;
1925		pushback();
1926		return lasttok = '-';
1927
1928	case '.':
1929		c = nextc();
1930		pushback();
1931		if (! ISDIGIT(c))
1932			return lasttok = '.';
1933		else
1934			c = '.';
1935		/* FALL THROUGH */
1936	case '0':
1937	case '1':
1938	case '2':
1939	case '3':
1940	case '4':
1941	case '5':
1942	case '6':
1943	case '7':
1944	case '8':
1945	case '9':
1946		/* It's a number */
1947		for (;;) {
1948			int gotnumber = FALSE;
1949
1950			tokadd(c);
1951			switch (c) {
1952			case 'x':
1953			case 'X':
1954				if (do_traditional)
1955					goto done;
1956				if (tok == tokstart + 2)
1957					inhex = TRUE;
1958				break;
1959			case '.':
1960				if (seen_point) {
1961					gotnumber = TRUE;
1962					break;
1963				}
1964				seen_point = TRUE;
1965				break;
1966			case 'e':
1967			case 'E':
1968				if (inhex)
1969					break;
1970				if (seen_e) {
1971					gotnumber = TRUE;
1972					break;
1973				}
1974				seen_e = TRUE;
1975				if ((c = nextc()) == '-' || c == '+')
1976					tokadd(c);
1977				else
1978					pushback();
1979				break;
1980			case 'a':
1981			case 'A':
1982			case 'b':
1983			case 'B':
1984			case 'c':
1985			case 'C':
1986			case 'D':
1987			case 'd':
1988			case 'f':
1989			case 'F':
1990				if (do_traditional || ! inhex)
1991					goto done;
1992				/* fall through */
1993			case '0':
1994			case '1':
1995			case '2':
1996			case '3':
1997			case '4':
1998			case '5':
1999			case '6':
2000			case '7':
2001			case '8':
2002			case '9':
2003				break;
2004			default:
2005			done:
2006				gotnumber = TRUE;
2007			}
2008			if (gotnumber)
2009				break;
2010			c = nextc();
2011		}
2012		if (c != EOF)
2013			pushback();
2014		else if (do_lint && ! eof_warned) {
2015			lintwarn(_("source file does not end in newline"));
2016			eof_warned = TRUE;
2017		}
2018		tokadd('\0');
2019		if (! do_traditional && isnondecimal(tokstart)) {
2020			static short warned = FALSE;
2021			if (do_lint && ! warned) {
2022				warned = TRUE;
2023				lintwarn("numeric constant `%.*s' treated as octal or hexadecimal",
2024					strlen(tokstart)-1, tokstart);
2025			}
2026			yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
2027		} else
2028			yylval.nodeval = make_number(atof(tokstart));
2029		yylval.nodeval->flags |= PERM;
2030		return lasttok = YNUMBER;
2031
2032	case '&':
2033		if ((c = nextc()) == '&') {
2034			yylval.nodetypeval = Node_and;
2035			allow_newline();
2036			return lasttok = LEX_AND;
2037		}
2038		pushback();
2039		return lasttok = '&';
2040
2041	case '|':
2042		if ((c = nextc()) == '|') {
2043			yylval.nodetypeval = Node_or;
2044			allow_newline();
2045			return lasttok = LEX_OR;
2046		} else if (! do_traditional && c == '&') {
2047			yylval.nodetypeval = Node_redirect_twoway;
2048			return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
2049		}
2050		pushback();
2051		if (in_print && in_parens == 0) {
2052			yylval.nodetypeval = Node_redirect_pipe;
2053			return lasttok = IO_OUT;
2054		} else {
2055			yylval.nodetypeval = Node_redirect_pipein;
2056			return lasttok = IO_IN;
2057		}
2058	}
2059
2060	if (c != '_' && ! ISALPHA(c)) {
2061		yyerror(_("invalid char '%c' in expression"), c);
2062		exit(1);
2063	}
2064
2065	/*
2066	 * Lots of fog here.  Consider:
2067	 *
2068	 * print "xyzzy"$_"foo"
2069	 *
2070	 * Without the check for ` lasttok != '$'' ', this is parsed as
2071	 *
2072	 * print "xxyzz" $(_"foo")
2073	 *
2074	 * With the check, it is "correctly" parsed as three
2075	 * string concatenations.  Sigh.  This seems to be
2076	 * "more correct", but this is definitely one of those
2077	 * occasions where the interactions are funny.
2078	 */
2079	if (! do_traditional && c == '_' && lasttok != '$') {
2080		if ((c = nextc()) == '"') {
2081			intlstr = TRUE;
2082			goto string;
2083		}
2084		pushback();
2085		c = '_';
2086	}
2087
2088	/* it's some type of name-type-thing.  Find its length. */
2089	tok = tokstart;
2090	while (is_identchar(c)) {
2091		tokadd(c);
2092		c = nextc();
2093	}
2094	tokadd('\0');
2095	emalloc(tokkey, char *, tok - tokstart, "yylex");
2096	memcpy(tokkey, tokstart, tok - tokstart);
2097	if (c != EOF)
2098		pushback();
2099	else if (do_lint && ! eof_warned) {
2100		lintwarn(_("source file does not end in newline"));
2101		eof_warned = TRUE;
2102	}
2103
2104	/* See if it is a special token. */
2105	low = 0;
2106	high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
2107	while (low <= high) {
2108		int i;
2109
2110		mid = (low + high) / 2;
2111		c = *tokstart - tokentab[mid].operator[0];
2112		i = c ? c : strcmp(tokstart, tokentab[mid].operator);
2113
2114		if (i < 0)		/* token < mid */
2115			high = mid - 1;
2116		else if (i > 0)		/* token > mid */
2117			low = mid + 1;
2118		else {
2119			if (do_lint) {
2120				if (tokentab[mid].flags & GAWKX)
2121					lintwarn(_("`%s' is a gawk extension"),
2122						tokentab[mid].operator);
2123				if (tokentab[mid].flags & RESX)
2124					lintwarn(_("`%s' is a Bell Labs extension"),
2125						tokentab[mid].operator);
2126				if (tokentab[mid].flags & NOT_POSIX)
2127					lintwarn(_("POSIX does not allow `%s'"),
2128						tokentab[mid].operator);
2129			}
2130			if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
2131				warning(_("`%s' is not supported in old awk"),
2132						tokentab[mid].operator);
2133			if ((do_traditional && (tokentab[mid].flags & GAWKX))
2134			    || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
2135				break;
2136			if (tokentab[mid].class == LEX_BUILTIN
2137			    || tokentab[mid].class == LEX_LENGTH
2138			   )
2139				yylval.lval = mid;
2140			else
2141				yylval.nodetypeval = tokentab[mid].value;
2142
2143			free(tokkey);
2144			return lasttok = tokentab[mid].class;
2145		}
2146	}
2147
2148	yylval.sval = tokkey;
2149	if (*lexptr == '(')
2150		return lasttok = FUNC_CALL;
2151	else {
2152		static short goto_warned = FALSE;
2153
2154#define SMART_ALECK	1
2155		if (SMART_ALECK && do_lint
2156		    && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
2157			goto_warned = TRUE;
2158			lintwarn(_("`goto' considered harmful!\n"));
2159		}
2160		return lasttok = NAME;
2161	}
2162}
2163
2164/* node_common --- common code for allocating a new node */
2165
2166static NODE *
2167node_common(NODETYPE op)
2168{
2169	register NODE *r;
2170
2171	getnode(r);
2172	r->type = op;
2173	r->flags = MALLOC;
2174	/* if lookahead is NL, lineno is 1 too high */
2175	if (lexeme && *lexeme == '\n')
2176		r->source_line = sourceline - 1;
2177	else
2178		r->source_line = sourceline;
2179	r->source_file = source;
2180	return r;
2181}
2182
2183/* node --- allocates a node with defined lnode and rnode. */
2184
2185NODE *
2186node(NODE *left, NODETYPE op, NODE *right)
2187{
2188	register NODE *r;
2189
2190	r = node_common(op);
2191	r->lnode = left;
2192	r->rnode = right;
2193	return r;
2194}
2195
2196/* snode ---	allocate a node with defined subnode and builtin for builtin
2197		functions. Checks for arg. count and supplies defaults where
2198		possible. */
2199
2200static NODE *
2201snode(NODE *subn, NODETYPE op, int idx)
2202{
2203	register NODE *r;
2204	register NODE *n;
2205	int nexp = 0;
2206	int args_allowed;
2207
2208	r = node_common(op);
2209
2210	/* traverse expression list to see how many args. given */
2211	for (n = subn; n != NULL; n = n->rnode) {
2212		nexp++;
2213		if (nexp > 5)
2214			break;
2215	}
2216
2217	/* check against how many args. are allowed for this builtin */
2218	args_allowed = tokentab[idx].flags & ARGS;
2219	if (args_allowed && (args_allowed & A(nexp)) == 0)
2220		fatal(_("%d is invalid as number of arguments for %s"),
2221				nexp, tokentab[idx].operator);
2222
2223	r->builtin = tokentab[idx].ptr;
2224
2225	/* special case processing for a few builtins */
2226	if (nexp == 0 && r->builtin == do_length) {
2227		subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
2228		            Node_expression_list,
2229			    (NODE *) NULL);
2230	} else if (r->builtin == do_match) {
2231		static short warned = FALSE;
2232
2233		if (subn->rnode->lnode->type != Node_regex)
2234			subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
2235
2236		if (subn->rnode->rnode != NULL) {	/* 3rd argument there */
2237			if (do_lint && ! warned) {
2238				warned = TRUE;
2239				lintwarn(_("match: third argument is a gawk extension"));
2240			}
2241			if (do_traditional)
2242				fatal(_("match: third argument is a gawk extension"));
2243		}
2244	} else if (r->builtin == do_sub || r->builtin == do_gsub) {
2245		if (subn->lnode->type != Node_regex)
2246			subn->lnode = mk_rexp(subn->lnode);
2247		if (nexp == 2)
2248			append_right(subn, node(node(make_number(0.0),
2249						     Node_field_spec,
2250						     (NODE *) NULL),
2251					        Node_expression_list,
2252						(NODE *) NULL));
2253		else if (subn->rnode->rnode->lnode->type == Node_val) {
2254			if (do_lint)
2255				lintwarn(_("%s: string literal as last arg of substitute has no effect"),
2256					(r->builtin == do_sub) ? "sub" : "gsub");
2257		} else if (! isassignable(subn->rnode->rnode->lnode)) {
2258			yyerror(_("%s third parameter is not a changeable object"),
2259				(r->builtin == do_sub) ? "sub" : "gsub");
2260		}
2261	} else if (r->builtin == do_gensub) {
2262		if (subn->lnode->type != Node_regex)
2263			subn->lnode = mk_rexp(subn->lnode);
2264		if (nexp == 3)
2265			append_right(subn, node(node(make_number(0.0),
2266						     Node_field_spec,
2267						     (NODE *) NULL),
2268					        Node_expression_list,
2269						(NODE *) NULL));
2270	} else if (r->builtin == do_split) {
2271		if (nexp == 2)
2272			append_right(subn,
2273			    node(FS_node, Node_expression_list, (NODE *) NULL));
2274		n = subn->rnode->rnode->lnode;
2275		if (n->type != Node_regex)
2276			subn->rnode->rnode->lnode = mk_rexp(n);
2277		if (nexp == 2)
2278			subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
2279	} else if (r->builtin == do_close) {
2280		static short warned = FALSE;
2281
2282		if ( nexp == 2) {
2283			if (do_lint && nexp == 2 && ! warned) {
2284				warned = TRUE;
2285				lintwarn(_("close: second argument is a gawk extension"));
2286			}
2287			if (do_traditional)
2288				fatal(_("close: second argument is a gawk extension"));
2289		}
2290	} else if (do_intl					/* --gen-po */
2291			&& r->builtin == do_dcgettext		/* dcgettext(...) */
2292			&& subn->lnode->type == Node_val	/* 1st arg is constant */
2293			&& (subn->lnode->flags & STRCUR) != 0) {	/* it's a string constant */
2294		/* ala xgettext, dcgettext("some string" ...) dumps the string */
2295		NODE *str = subn->lnode;
2296
2297		if ((str->flags & INTLSTR) != 0)
2298			warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
2299			/* don't dump it, the lexer already did */
2300		else
2301			dumpintlstr(str->stptr, str->stlen);
2302	} else if (do_intl					/* --gen-po */
2303			&& r->builtin == do_dcngettext		/* dcngettext(...) */
2304			&& subn->lnode->type == Node_val	/* 1st arg is constant */
2305			&& (subn->lnode->flags & STRCUR) != 0	/* it's a string constant */
2306			&& subn->rnode->lnode->type == Node_val	/* 2nd arg is constant too */
2307			&& (subn->rnode->lnode->flags & STRCUR) != 0) {	/* it's a string constant */
2308		/* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
2309		NODE *str1 = subn->lnode;
2310		NODE *str2 = subn->rnode->lnode;
2311
2312		if (((str1->flags | str2->flags) & INTLSTR) != 0)
2313			warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
2314		else
2315			dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
2316	}
2317
2318	r->subnode = subn;
2319	if (r->builtin == do_sprintf) {
2320		count_args(r);
2321		r->lnode->printf_count = r->printf_count; /* hack */
2322	}
2323	return r;
2324}
2325
2326/* make_for_loop --- build a for loop */
2327
2328static NODE *
2329make_for_loop(NODE *init, NODE *cond, NODE *incr)
2330{
2331	register FOR_LOOP_HEADER *r;
2332	NODE *n;
2333
2334	emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
2335	getnode(n);
2336	n->type = Node_illegal;
2337	r->init = init;
2338	r->cond = cond;
2339	r->incr = incr;
2340	n->sub.nodep.r.hd = r;
2341	return n;
2342}
2343
2344/* dup_parms --- return TRUE if there are duplicate parameters */
2345
2346static int
2347dup_parms(NODE *func)
2348{
2349	register NODE *np;
2350	const char *fname, **names;
2351	int count, i, j, dups;
2352	NODE *params;
2353
2354	if (func == NULL)	/* error earlier */
2355		return TRUE;
2356
2357	fname = func->param;
2358	count = func->param_cnt;
2359	params = func->rnode;
2360
2361	if (count == 0)		/* no args, no problem */
2362		return FALSE;
2363
2364	if (params == NULL)	/* error earlier */
2365		return TRUE;
2366
2367	emalloc(names, const char **, count * sizeof(char *), "dup_parms");
2368
2369	i = 0;
2370	for (np = params; np != NULL; np = np->rnode) {
2371		if (np->param == NULL) { /* error earlier, give up, go home */
2372			free(names);
2373			return TRUE;
2374		}
2375		names[i++] = np->param;
2376	}
2377
2378	dups = 0;
2379	for (i = 1; i < count; i++) {
2380		for (j = 0; j < i; j++) {
2381			if (strcmp(names[i], names[j]) == 0) {
2382				dups++;
2383				error(
2384	_("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
2385					fname, i+1, names[j], j+1);
2386			}
2387		}
2388	}
2389
2390	free(names);
2391	return (dups > 0 ? TRUE : FALSE);
2392}
2393
2394/* parms_shadow --- check if parameters shadow globals */
2395
2396static int
2397parms_shadow(const char *fname, NODE *func)
2398{
2399	int count, i;
2400	int ret = FALSE;
2401
2402	if (fname == NULL || func == NULL)	/* error earlier */
2403		return FALSE;
2404
2405	count = func->lnode->param_cnt;
2406
2407	if (count == 0)		/* no args, no problem */
2408		return FALSE;
2409
2410	/*
2411	 * Use warning() and not lintwarn() so that can warn
2412	 * about all shadowed parameters.
2413	 */
2414	for (i = 0; i < count; i++) {
2415		if (lookup(func->parmlist[i]) != NULL) {
2416			warning(
2417	_("function `%s': parameter `%s' shadows global variable"),
2418					fname, func->parmlist[i]);
2419			ret = TRUE;
2420		}
2421	}
2422
2423	return ret;
2424}
2425
2426/*
2427 * install:
2428 * Install a name in the symbol table, even if it is already there.
2429 * Caller must check against redefinition if that is desired.
2430 */
2431
2432NODE *
2433install(char *name, NODE *value)
2434{
2435	register NODE *hp;
2436	register size_t len;
2437	register int bucket;
2438
2439	var_count++;
2440	len = strlen(name);
2441	bucket = hash(name, len, (unsigned long) HASHSIZE);
2442	getnode(hp);
2443	hp->type = Node_hashnode;
2444	hp->hnext = variables[bucket];
2445	variables[bucket] = hp;
2446	hp->hlength = len;
2447	hp->hvalue = value;
2448	hp->hname = name;
2449	hp->hvalue->vname = name;
2450	return hp->hvalue;
2451}
2452
2453/* lookup --- find the most recent hash node for name installed by install */
2454
2455NODE *
2456lookup(const char *name)
2457{
2458	register NODE *bucket;
2459	register size_t len;
2460
2461	len = strlen(name);
2462	for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
2463			bucket != NULL; bucket = bucket->hnext)
2464		if (bucket->hlength == len && STREQN(bucket->hname, name, len))
2465			return bucket->hvalue;
2466
2467	return NULL;
2468}
2469
2470/* var_comp --- compare two variable names */
2471
2472static int
2473var_comp(const void *v1, const void *v2)
2474{
2475	const NODE *const *npp1, *const *npp2;
2476	const NODE *n1, *n2;
2477	int minlen;
2478
2479	npp1 = (const NODE *const *) v1;
2480	npp2 = (const NODE *const *) v2;
2481	n1 = *npp1;
2482	n2 = *npp2;
2483
2484	if (n1->hlength > n2->hlength)
2485		minlen = n1->hlength;
2486	else
2487		minlen = n2->hlength;
2488
2489	return strncmp(n1->hname, n2->hname, minlen);
2490}
2491
2492/* valinfo --- dump var info */
2493
2494static void
2495valinfo(NODE *n, FILE *fp)
2496{
2497	if (n->flags & STRING) {
2498		fprintf(fp, "string (");
2499		pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2500		fprintf(fp, ")\n");
2501	} else if (n->flags & NUMBER)
2502		fprintf(fp, "number (%.17g)\n", n->numbr);
2503	else if (n->flags & STRCUR) {
2504		fprintf(fp, "string value (");
2505		pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2506		fprintf(fp, ")\n");
2507	} else if (n->flags & NUMCUR)
2508		fprintf(fp, "number value (%.17g)\n", n->numbr);
2509	else
2510		fprintf(fp, "?? flags %s\n", flags2str(n->flags));
2511}
2512
2513
2514/* dump_vars --- dump the symbol table */
2515
2516void
2517dump_vars(const char *fname)
2518{
2519	int i, j;
2520	NODE **table;
2521	NODE *p;
2522	FILE *fp;
2523
2524	emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars");
2525
2526	if (fname == NULL)
2527		fp = stderr;
2528	else if ((fp = fopen(fname, "w")) == NULL) {
2529		warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
2530		warning(_("sending profile to standard error"));
2531		fp = stderr;
2532	}
2533
2534	for (i = j = 0; i < HASHSIZE; i++)
2535		for (p = variables[i]; p != NULL; p = p->hnext)
2536			table[j++] = p;
2537
2538	assert(j == var_count);
2539
2540	/* Shazzam! */
2541	qsort(table, j, sizeof(NODE *), var_comp);
2542
2543	for (i = 0; i < j; i++) {
2544		p = table[i];
2545		if (p->hvalue->type == Node_func)
2546			continue;
2547		fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
2548		if (p->hvalue->type == Node_var_array)
2549			fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
2550		else if (p->hvalue->type == Node_var_new)
2551			fprintf(fp, "unused variable\n");
2552		else if (p->hvalue->type == Node_var)
2553			valinfo(p->hvalue->var_value, fp);
2554		else {
2555			NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2556
2557			valinfo(*lhs, fp);
2558		}
2559	}
2560
2561	if (fp != stderr && fclose(fp) != 0)
2562		warning(_("%s: close failed (%s)"), fname, strerror(errno));
2563
2564	free(table);
2565}
2566
2567/* release_all_vars --- free all variable memory */
2568
2569void
2570release_all_vars()
2571{
2572	int i;
2573	NODE *p, *next;
2574
2575	for (i = 0; i < HASHSIZE; i++)
2576		for (p = variables[i]; p != NULL; p = next) {
2577			next = p->hnext;
2578
2579			if (p->hvalue->type == Node_func)
2580				continue;
2581			else if (p->hvalue->type == Node_var_array)
2582				assoc_clear(p->hvalue);
2583			else if (p->hvalue->type != Node_var_new) {
2584				NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2585
2586				unref(*lhs);
2587			}
2588			unref(p);
2589	}
2590}
2591
2592/* finfo --- for use in comparison and sorting of function names */
2593
2594struct finfo {
2595	const char *name;
2596	size_t nlen;
2597	NODE *func;
2598};
2599
2600/* fcompare --- comparison function for qsort */
2601
2602static int
2603fcompare(const void *p1, const void *p2)
2604{
2605	const struct finfo *f1, *f2;
2606	int minlen;
2607
2608	f1 = (const struct finfo *) p1;
2609	f2 = (const struct finfo *) p2;
2610
2611	if (f1->nlen > f2->nlen)
2612		minlen = f2->nlen;
2613	else
2614		minlen = f1->nlen;
2615
2616	return strncmp(f1->name, f2->name, minlen);
2617}
2618
2619/* dump_funcs --- print all functions */
2620
2621void
2622dump_funcs()
2623{
2624	int i, j;
2625	NODE *p;
2626	static struct finfo *tab = NULL;
2627
2628	if (func_count == 0)
2629		return;
2630
2631	/*
2632	 * Walk through symbol table countng functions.
2633	 * Could be more than func_count if there are
2634	 * extension functions.
2635	 */
2636	for (i = j = 0; i < HASHSIZE; i++) {
2637		for (p = variables[i]; p != NULL; p = p->hnext) {
2638			if (p->hvalue->type == Node_func) {
2639				j++;
2640			}
2641		}
2642	}
2643
2644	if (tab == NULL)
2645		emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs");
2646
2647	/* now walk again, copying info */
2648	for (i = j = 0; i < HASHSIZE; i++) {
2649		for (p = variables[i]; p != NULL; p = p->hnext) {
2650			if (p->hvalue->type == Node_func) {
2651				tab[j].name = p->hname;
2652				tab[j].nlen = p->hlength;
2653				tab[j].func = p->hvalue;
2654				j++;
2655			}
2656		}
2657	}
2658
2659
2660	/* Shazzam! */
2661	qsort(tab, j, sizeof(struct finfo), fcompare);
2662
2663	for (i = 0; i < j; i++)
2664		pp_func(tab[i].name, tab[i].nlen, tab[i].func);
2665
2666	free(tab);
2667}
2668
2669/* shadow_funcs --- check all functions for parameters that shadow globals */
2670
2671void
2672shadow_funcs()
2673{
2674	int i, j;
2675	NODE *p;
2676	struct finfo *tab;
2677	static int calls = 0;
2678	int shadow = FALSE;
2679
2680	if (func_count == 0)
2681		return;
2682
2683	if (calls++ != 0)
2684		fatal(_("shadow_funcs() called twice!"));
2685
2686	emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs");
2687
2688	for (i = j = 0; i < HASHSIZE; i++) {
2689		for (p = variables[i]; p != NULL; p = p->hnext) {
2690			if (p->hvalue->type == Node_func) {
2691				tab[j].name = p->hname;
2692				tab[j].nlen = p->hlength;
2693				tab[j].func = p->hvalue;
2694				j++;
2695			}
2696		}
2697	}
2698
2699	assert(j == func_count);
2700
2701	/* Shazzam! */
2702	qsort(tab, func_count, sizeof(struct finfo), fcompare);
2703
2704	for (i = 0; i < j; i++)
2705		shadow |= parms_shadow(tab[i].name, tab[i].func);
2706
2707	free(tab);
2708
2709	/* End with fatal if the user requested it.  */
2710	if (shadow && lintfunc != warning)
2711		lintwarn(_("there were shadowed variables."));
2712}
2713
2714/*
2715 * append_right:
2716 * Add new to the rightmost branch of LIST.  This uses n^2 time, so we make
2717 * a simple attempt at optimizing it.
2718 */
2719
2720static NODE *
2721append_right(NODE *list, NODE *new)
2722{
2723	register NODE *oldlist;
2724	static NODE *savefront = NULL, *savetail = NULL;
2725
2726	if (list == NULL || new == NULL)
2727		return list;
2728
2729	oldlist = list;
2730	if (savefront == oldlist)
2731		list = savetail; /* Be careful: maybe list->rnode != NULL */
2732	else
2733		savefront = oldlist;
2734
2735	while (list->rnode != NULL)
2736		list = list->rnode;
2737	savetail = list->rnode = new;
2738	return oldlist;
2739}
2740
2741/*
2742 * append_pattern:
2743 * A wrapper around append_right, used for rule lists.
2744 */
2745static inline NODE *
2746append_pattern(NODE **list, NODE *patt)
2747{
2748	NODE *n = node(patt, Node_rule_node, (NODE *) NULL);
2749
2750	if (*list == NULL)
2751		*list = n;
2752	else {
2753		NODE *n1 = node(n, Node_rule_list, (NODE *) NULL);
2754		if ((*list)->type != Node_rule_list)
2755			*list = node(*list, Node_rule_list, n1);
2756		else
2757			(void) append_right(*list, n1);
2758	}
2759	return n;
2760}
2761
2762/*
2763 * func_install:
2764 * check if name is already installed;  if so, it had better have Null value,
2765 * in which case def is added as the value. Otherwise, install name with def
2766 * as value.
2767 *
2768 * Extra work, build up and save a list of the parameter names in a table
2769 * and hang it off params->parmlist. This is used to set the `vname' field
2770 * of each function parameter during a function call. See eval.c.
2771 */
2772
2773static void
2774func_install(NODE *params, NODE *def)
2775{
2776	NODE *r, *n, *thisfunc;
2777	char **pnames, *names, *sp;
2778	size_t pcount = 0, space = 0;
2779	int i;
2780
2781	/* check for function foo(foo) { ... }.  bleah. */
2782	for (n = params->rnode; n != NULL; n = n->rnode) {
2783		if (strcmp(n->param, params->param) == 0)
2784			fatal(_("function `%s': can't use function name as parameter name"),
2785					params->param);
2786	}
2787
2788	thisfunc = NULL;	/* turn off warnings */
2789
2790	/* symbol table managment */
2791	pop_var(params, FALSE);
2792	r = lookup(params->param);
2793	if (r != NULL) {
2794		fatal(_("function name `%s' previously defined"), params->param);
2795	} else if (params->param == builtin_func)	/* not a valid function name */
2796		goto remove_params;
2797
2798	/* install the function */
2799	thisfunc = node(params, Node_func, def);
2800	(void) install(params->param, thisfunc);
2801
2802	/* figure out amount of space to allocate for variable names */
2803	for (n = params->rnode; n != NULL; n = n->rnode) {
2804		pcount++;
2805		space += strlen(n->param) + 1;
2806	}
2807
2808	/* allocate it and fill it in */
2809	if (pcount != 0) {
2810		emalloc(names, char *, space, "func_install");
2811		emalloc(pnames, char **, pcount * sizeof(char *), "func_install");
2812		sp = names;
2813		for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
2814			pnames[i] = sp;
2815			strcpy(sp, n->param);
2816			sp += strlen(n->param) + 1;
2817		}
2818		thisfunc->parmlist = pnames;
2819	} else {
2820		thisfunc->parmlist = NULL;
2821	}
2822
2823	/* update lint table info */
2824	func_use(params->param, FUNC_DEFINE);
2825
2826	func_count++;	/* used by profiling / pretty printer */
2827
2828remove_params:
2829	/* remove params from symbol table */
2830	pop_params(params->rnode);
2831}
2832
2833/* pop_var --- remove a variable from the symbol table */
2834
2835static void
2836pop_var(NODE *np, int freeit)
2837{
2838	register NODE *bucket, **save;
2839	register size_t len;
2840	char *name;
2841
2842	name = np->param;
2843	len = strlen(name);
2844	save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
2845	for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
2846		if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
2847			var_count--;
2848			*save = bucket->hnext;
2849			freenode(bucket);
2850			if (freeit)
2851				free(np->param);
2852			return;
2853		}
2854		save = &(bucket->hnext);
2855	}
2856}
2857
2858/* pop_params --- remove list of function parameters from symbol table */
2859
2860/*
2861 * pop parameters out of the symbol table. do this in reverse order to
2862 * avoid reading freed memory if there were duplicated parameters.
2863 */
2864static void
2865pop_params(NODE *params)
2866{
2867	if (params == NULL)
2868		return;
2869	pop_params(params->rnode);
2870	pop_var(params, TRUE);
2871}
2872
2873/* make_param --- make NAME into a function parameter */
2874
2875static NODE *
2876make_param(char *name)
2877{
2878	NODE *r;
2879
2880	getnode(r);
2881	r->type = Node_param_list;
2882	r->rnode = NULL;
2883	r->param = name;
2884	r->param_cnt = param_counter++;
2885	return (install(name, r));
2886}
2887
2888static struct fdesc {
2889	char *name;
2890	short used;
2891	short defined;
2892	struct fdesc *next;
2893} *ftable[HASHSIZE];
2894
2895/* func_use --- track uses and definitions of functions */
2896
2897static void
2898func_use(const char *name, enum defref how)
2899{
2900	struct fdesc *fp;
2901	int len;
2902	int ind;
2903
2904	len = strlen(name);
2905	ind = hash(name, len, HASHSIZE);
2906
2907	for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
2908		if (strcmp(fp->name, name) == 0) {
2909			if (how == FUNC_DEFINE)
2910				fp->defined++;
2911			else
2912				fp->used++;
2913			return;
2914		}
2915	}
2916
2917	/* not in the table, fall through to allocate a new one */
2918
2919	emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
2920	memset(fp, '\0', sizeof(struct fdesc));
2921	emalloc(fp->name, char *, len + 1, "func_use");
2922	strcpy(fp->name, name);
2923	if (how == FUNC_DEFINE)
2924		fp->defined++;
2925	else
2926		fp->used++;
2927	fp->next = ftable[ind];
2928	ftable[ind] = fp;
2929}
2930
2931/* check_funcs --- verify functions that are called but not defined */
2932
2933static void
2934check_funcs()
2935{
2936	struct fdesc *fp, *next;
2937	int i;
2938
2939	for (i = 0; i < HASHSIZE; i++) {
2940		for (fp = ftable[i]; fp != NULL; fp = fp->next) {
2941#ifdef REALLYMEAN
2942			/* making this the default breaks old code. sigh. */
2943			if (fp->defined == 0) {
2944				error(
2945		_("function `%s' called but never defined"), fp->name);
2946				errcount++;
2947			}
2948#else
2949			if (do_lint && fp->defined == 0)
2950				lintwarn(
2951		_("function `%s' called but never defined"), fp->name);
2952#endif
2953			if (do_lint && fp->used == 0) {
2954				lintwarn(_("function `%s' defined but never called"),
2955					fp->name);
2956			}
2957		}
2958	}
2959
2960	/* now let's free all the memory */
2961	for (i = 0; i < HASHSIZE; i++) {
2962		for (fp = ftable[i]; fp != NULL; fp = next) {
2963			next = fp->next;
2964			free(fp->name);
2965			free(fp);
2966		}
2967	}
2968}
2969
2970/* param_sanity --- look for parameters that are regexp constants */
2971
2972static void
2973param_sanity(NODE *arglist)
2974{
2975	NODE *argp, *arg;
2976	int i;
2977
2978	for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
2979		arg = argp->lnode;
2980		if (arg->type == Node_regex)
2981			warning(_("regexp constant for parameter #%d yields boolean value"), i);
2982	}
2983}
2984
2985/* variable --- make sure NAME is in the symbol table */
2986
2987NODE *
2988variable(char *name, int can_free, NODETYPE type)
2989{
2990	register NODE *r;
2991
2992	if ((r = lookup(name)) != NULL) {
2993		if (r->type == Node_func)
2994			fatal(_("function `%s' called with space between name and `(',\n%s"),
2995				r->vname,
2996				_("or used as a variable or an array"));
2997	} else {
2998		/* not found */
2999		if (! do_traditional && STREQ(name, "PROCINFO"))
3000			r = load_procinfo();
3001		else if (STREQ(name, "ENVIRON"))
3002			r = load_environ();
3003		else {
3004			/*
3005			 * This is the only case in which we may not free the string.
3006			 */
3007			NODE *n;
3008
3009			if (type == Node_var)
3010				n = node(Nnull_string, type, (NODE *) NULL);
3011			else
3012				n = node((NODE *) NULL, type, (NODE *) NULL);
3013
3014			return install(name, n);
3015		}
3016	}
3017	if (can_free)
3018		free(name);
3019	return r;
3020}
3021
3022/* mk_rexp --- make a regular expression constant */
3023
3024static NODE *
3025mk_rexp(NODE *exp)
3026{
3027	NODE *n;
3028
3029	if (exp->type == Node_regex)
3030		return exp;
3031
3032	getnode(n);
3033	n->type = Node_dynregex;
3034	n->re_exp = exp;
3035	n->re_text = NULL;
3036	n->re_reg = NULL;
3037	n->re_flags = 0;
3038	return n;
3039}
3040
3041/* isnoeffect --- when used as a statement, has no side effects */
3042
3043/*
3044 * To be completely general, we should recursively walk the parse
3045 * tree, to make sure that all the subexpressions also have no effect.
3046 * Instead, we just weaken the actual warning that's printed, up above
3047 * in the grammar.
3048 */
3049
3050static int
3051isnoeffect(NODETYPE type)
3052{
3053	switch (type) {
3054	case Node_times:
3055	case Node_quotient:
3056	case Node_mod:
3057	case Node_plus:
3058	case Node_minus:
3059	case Node_subscript:
3060	case Node_concat:
3061	case Node_exp:
3062	case Node_unary_minus:
3063	case Node_field_spec:
3064	case Node_and:
3065	case Node_or:
3066	case Node_equal:
3067	case Node_notequal:
3068	case Node_less:
3069	case Node_greater:
3070	case Node_leq:
3071	case Node_geq:
3072	case Node_match:
3073	case Node_nomatch:
3074	case Node_not:
3075	case Node_val:
3076	case Node_in_array:
3077	case Node_NF:
3078	case Node_NR:
3079	case Node_FNR:
3080	case Node_FS:
3081	case Node_RS:
3082	case Node_FIELDWIDTHS:
3083	case Node_IGNORECASE:
3084	case Node_OFS:
3085	case Node_ORS:
3086	case Node_OFMT:
3087	case Node_CONVFMT:
3088	case Node_BINMODE:
3089	case Node_LINT:
3090	case Node_TEXTDOMAIN:
3091		return TRUE;
3092	default:
3093		break;	/* keeps gcc -Wall happy */
3094	}
3095
3096	return FALSE;
3097}
3098
3099/* isassignable --- can this node be assigned to? */
3100
3101static int
3102isassignable(register NODE *n)
3103{
3104	switch (n->type) {
3105	case Node_var_new:
3106	case Node_var:
3107	case Node_FIELDWIDTHS:
3108	case Node_RS:
3109	case Node_FS:
3110	case Node_FNR:
3111	case Node_NR:
3112	case Node_NF:
3113	case Node_IGNORECASE:
3114	case Node_OFMT:
3115	case Node_CONVFMT:
3116	case Node_ORS:
3117	case Node_OFS:
3118	case Node_LINT:
3119	case Node_BINMODE:
3120	case Node_TEXTDOMAIN:
3121	case Node_field_spec:
3122	case Node_subscript:
3123		return TRUE;
3124	case Node_param_list:
3125		return ((n->flags & FUNC) == 0);  /* ok if not func name */
3126	default:
3127		break;	/* keeps gcc -Wall happy */
3128	}
3129	return FALSE;
3130}
3131
3132/* stopme --- for debugging */
3133
3134NODE *
3135stopme(NODE *tree ATTRIBUTE_UNUSED)
3136{
3137	return 0;
3138}
3139
3140/* dumpintlstr --- write out an initial .po file entry for the string */
3141
3142static void
3143dumpintlstr(const char *str, size_t len)
3144{
3145	char *cp;
3146
3147	/* See the GNU gettext distribution for details on the file format */
3148
3149	if (source != NULL) {
3150		/* ala the gettext sources, remove leading `./'s */
3151		for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3152			continue;
3153		printf("#: %s:%d\n", cp, sourceline);
3154	}
3155
3156	printf("msgid ");
3157	pp_string_fp(stdout, str, len, '"', TRUE);
3158	putchar('\n');
3159	printf("msgstr \"\"\n\n");
3160	fflush(stdout);
3161}
3162
3163/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
3164
3165static void
3166dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
3167{
3168	char *cp;
3169
3170	/* See the GNU gettext distribution for details on the file format */
3171
3172	if (source != NULL) {
3173		/* ala the gettext sources, remove leading `./'s */
3174		for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3175			continue;
3176		printf("#: %s:%d\n", cp, sourceline);
3177	}
3178
3179	printf("msgid ");
3180	pp_string_fp(stdout, str1, len1, '"', TRUE);
3181	putchar('\n');
3182	printf("msgid_plural ");
3183	pp_string_fp(stdout, str2, len2, '"', TRUE);
3184	putchar('\n');
3185	printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
3186	fflush(stdout);
3187}
3188
3189/* count_args --- count the number of printf arguments */
3190
3191static void
3192count_args(NODE *tree)
3193{
3194	size_t count = 0;
3195	NODE *save_tree;
3196
3197	assert(tree->type == Node_K_printf
3198		|| (tree->type == Node_builtin && tree->builtin == do_sprintf));
3199	save_tree = tree;
3200
3201	tree = tree->lnode;	/* printf format string */
3202
3203	for (count = 0; tree != NULL; tree = tree->rnode)
3204		count++;
3205
3206	save_tree->printf_count = count;
3207}
3208
3209/* isarray --- can this type be subscripted? */
3210
3211static int
3212isarray(NODE *n)
3213{
3214	switch (n->type) {
3215	case Node_var_new:
3216	case Node_var_array:
3217		return TRUE;
3218	case Node_param_list:
3219		return ((n->flags & FUNC) == 0);
3220	case Node_array_ref:
3221		cant_happen();
3222		break;
3223	default:
3224		break;	/* keeps gcc -Wall happy */
3225	}
3226
3227	return FALSE;
3228}
3229