1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
9
10#include "libbb.h"
11#include "xregex.h"
12#include <math.h>
13
14/* This is a NOEXEC applet. Be very careful! */
15
16
17/* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19#define debug_printf_walker(...)  do {} while (0)
20#define debug_printf_eval(...)  do {} while (0)
21
22#ifndef debug_printf_walker
23# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24#endif
25#ifndef debug_printf_eval
26# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27#endif
28
29
30
31#define	MAXVARFMT       240
32#define	MINNVBLOCK      64
33
34/* variable flags */
35#define	VF_NUMBER       0x0001	/* 1 = primary type is number */
36#define	VF_ARRAY        0x0002	/* 1 = it's an array */
37
38#define	VF_CACHED       0x0100	/* 1 = num/str value has cached str/num eq */
39#define	VF_USER         0x0200	/* 1 = user input (may be numeric string) */
40#define	VF_SPECIAL      0x0400	/* 1 = requires extra handling when changed */
41#define	VF_WALK         0x0800	/* 1 = variable has alloc'd x.walker list */
42#define	VF_FSTR         0x1000	/* 1 = var::string points to fstring buffer */
43#define	VF_CHILD        0x2000	/* 1 = function arg; x.parent points to source */
44#define	VF_DIRTY        0x4000	/* 1 = variable was set explicitly */
45
46/* these flags are static, don't change them when value is changed */
47#define	VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49typedef struct walker_list {
50	char *end;
51	char *cur;
52	struct walker_list *prev;
53	char wbuf[1];
54} walker_list;
55
56/* Variable */
57typedef struct var_s {
58	unsigned type;            /* flags */
59	double number;
60	char *string;
61	union {
62		int aidx;               /* func arg idx (for compilation stage) */
63		struct xhash_s *array;  /* array ptr */
64		struct var_s *parent;   /* for func args, ptr to actual parameter */
65		walker_list *walker;    /* list of array elements (for..in) */
66	} x;
67} var;
68
69/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70typedef struct chain_s {
71	struct node_s *first;
72	struct node_s *last;
73	const char *programname;
74} chain;
75
76/* Function */
77typedef struct func_s {
78	unsigned nargs;
79	struct chain_s body;
80} func;
81
82/* I/O stream */
83typedef struct rstream_s {
84	FILE *F;
85	char *buffer;
86	int adv;
87	int size;
88	int pos;
89	smallint is_pipe;
90} rstream;
91
92typedef struct hash_item_s {
93	union {
94		struct var_s v;         /* variable/array hash */
95		struct rstream_s rs;    /* redirect streams hash */
96		struct func_s f;        /* functions hash */
97	} data;
98	struct hash_item_s *next;       /* next in chain */
99	char name[1];                   /* really it's longer */
100} hash_item;
101
102typedef struct xhash_s {
103	unsigned nel;           /* num of elements */
104	unsigned csize;         /* current hash size */
105	unsigned nprime;        /* next hash size in PRIMES[] */
106	unsigned glen;          /* summary length of item names */
107	struct hash_item_s **items;
108} xhash;
109
110/* Tree node */
111typedef struct node_s {
112	uint32_t info;
113	unsigned lineno;
114	union {
115		struct node_s *n;
116		var *v;
117		int aidx;
118		char *new_progname;
119		regex_t *re;
120	} l;
121	union {
122		struct node_s *n;
123		regex_t *ire;
124		func *f;
125	} r;
126	union {
127		struct node_s *n;
128	} a;
129} node;
130
131/* Block of temporary variables */
132typedef struct nvblock_s {
133	int size;
134	var *pos;
135	struct nvblock_s *prev;
136	struct nvblock_s *next;
137	var nv[];
138} nvblock;
139
140typedef struct tsplitter_s {
141	node n;
142	regex_t re[2];
143} tsplitter;
144
145/* simple token classes */
146/* Order and hex values are very important!!!  See next_token() */
147#define	TC_SEQSTART	 1				/* ( */
148#define	TC_SEQTERM	(1 << 1)		/* ) */
149#define	TC_REGEXP	(1 << 2)		/* /.../ */
150#define	TC_OUTRDR	(1 << 3)		/* | > >> */
151#define	TC_UOPPOST	(1 << 4)		/* unary postfix operator */
152#define	TC_UOPPRE1	(1 << 5)		/* unary prefix operator */
153#define	TC_BINOPX	(1 << 6)		/* two-opnd operator */
154#define	TC_IN		(1 << 7)
155#define	TC_COMMA	(1 << 8)
156#define	TC_PIPE		(1 << 9)		/* input redirection pipe */
157#define	TC_UOPPRE2	(1 << 10)		/* unary prefix operator */
158#define	TC_ARRTERM	(1 << 11)		/* ] */
159#define	TC_GRPSTART	(1 << 12)		/* { */
160#define	TC_GRPTERM	(1 << 13)		/* } */
161#define	TC_SEMICOL	(1 << 14)
162#define	TC_NEWLINE	(1 << 15)
163#define	TC_STATX	(1 << 16)		/* ctl statement (for, next...) */
164#define	TC_WHILE	(1 << 17)
165#define	TC_ELSE		(1 << 18)
166#define	TC_BUILTIN	(1 << 19)
167#define	TC_GETLINE	(1 << 20)
168#define	TC_FUNCDECL	(1 << 21)		/* `function' `func' */
169#define	TC_BEGIN	(1 << 22)
170#define	TC_END		(1 << 23)
171#define	TC_EOF		(1 << 24)
172#define	TC_VARIABLE	(1 << 25)
173#define	TC_ARRAY	(1 << 26)
174#define	TC_FUNCTION	(1 << 27)
175#define	TC_STRING	(1 << 28)
176#define	TC_NUMBER	(1 << 29)
177
178#define	TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
179
180/* combined token classes */
181#define	TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182#define	TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183#define	TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184                   | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186#define	TC_STATEMNT (TC_STATX | TC_WHILE)
187#define	TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
188
189/* word tokens, cannot mean something else if not expected */
190#define	TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191                   | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193/* discard newlines after these */
194#define	TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195                   | TC_BINOP | TC_OPTERM)
196
197/* what can expression begin with */
198#define	TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199/* what can group begin with */
200#define	TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203/* operator is inserted between them */
204#define	TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
206#define	TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208#define	OF_RES1    0x010000
209#define	OF_RES2    0x020000
210#define	OF_STR1    0x040000
211#define	OF_STR2    0x080000
212#define	OF_NUM1    0x100000
213#define	OF_CHECKED 0x200000
214
215/* combined operator flags */
216#define	xx	0
217#define	xV	OF_RES2
218#define	xS	(OF_RES2 | OF_STR2)
219#define	Vx	OF_RES1
220#define	VV	(OF_RES1 | OF_RES2)
221#define	Nx	(OF_RES1 | OF_NUM1)
222#define	NV	(OF_RES1 | OF_NUM1 | OF_RES2)
223#define	Sx	(OF_RES1 | OF_STR1)
224#define	SV	(OF_RES1 | OF_STR1 | OF_RES2)
225#define	SS	(OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227#define	OPCLSMASK 0xFF00
228#define	OPNMASK   0x007F
229
230/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 */
234#define P(x)      (x << 24)
235#define PRIMASK   0x7F000000
236#define PRIMASK2  0x7E000000
237
238/* Operation classes */
239
240#define	SHIFT_TIL_THIS	0x0600
241#define	RECUR_FROM_THIS	0x1000
242
243enum {
244	OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
245	OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
246
247	OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
248	OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
249	OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
250
251	OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
252	OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
253	OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
254	OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
255	OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
256	OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
257	OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
258	OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
259	OC_DONE = 0x2800,
260
261	ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
262	ST_WHILE = 0x3300
263};
264
265/* simple builtins */
266enum {
267	F_in,	F_rn,	F_co,	F_ex,	F_lg,	F_si,	F_sq,	F_sr,
268	F_ti,	F_le,	F_sy,	F_ff,	F_cl
269};
270
271/* builtins */
272enum {
273	B_a2,	B_ix,	B_ma,	B_sp,	B_ss,	B_ti,   B_mt,	B_lo,	B_up,
274	B_ge,	B_gs,	B_su,
275	B_an,	B_co,	B_ls,	B_or,	B_rs,	B_xo,
276};
277
278/* tokens and their corresponding info values */
279
280#define	NTC     "\377"  /* switch to next token class (tc<<1) */
281#define	NTCC    '\377'
282
283#define	OC_B	OC_BUILTIN
284
285static const char tokenlist[] ALIGN1 =
286	"\1("       NTC
287	"\1)"       NTC
288	"\1/"       NTC                                 /* REGEXP */
289	"\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
290	"\2++"      "\2--"      NTC                     /* UOPPOST */
291	"\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
292	"\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
293	"\2*="      "\2/="      "\2%="      "\2^="
294	"\1+"       "\1-"       "\3**="     "\2**"
295	"\1/"       "\1%"       "\1^"       "\1*"
296	"\2!="      "\2>="      "\2<="      "\1>"
297	"\1<"       "\2!~"      "\1~"       "\2&&"
298	"\2||"      "\1?"       "\1:"       NTC
299	"\2in"      NTC
300	"\1,"       NTC
301	"\1|"       NTC
302	"\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
303	"\1]"       NTC
304	"\1{"       NTC
305	"\1}"       NTC
306	"\1;"       NTC
307	"\1\n"      NTC
308	"\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
309	"\10continue"           "\6delete"  "\5print"
310	"\6printf"  "\4next"    "\10nextfile"
311	"\6return"  "\4exit"    NTC
312	"\5while"   NTC
313	"\4else"    NTC
314
315	"\3and"     "\5compl"   "\6lshift"  "\2or"
316	"\6rshift"  "\3xor"
317	"\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
318	"\3cos"     "\3exp"     "\3int"     "\3log"
319	"\4rand"    "\3sin"     "\4sqrt"    "\5srand"
320	"\6gensub"  "\4gsub"    "\5index"   "\6length"
321	"\5match"   "\5split"   "\7sprintf" "\3sub"
322	"\6substr"  "\7systime" "\10strftime" "\6mktime"
323	"\7tolower" "\7toupper" NTC
324	"\7getline" NTC
325	"\4func"    "\10function"   NTC
326	"\5BEGIN"   NTC
327	"\3END"     "\0"
328	;
329
330static const uint32_t tokeninfo[] = {
331	0,
332	0,
333	OC_REGEXP,
334	xS|'a',     xS|'w',     xS|'|',
335	OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
336	OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
337	    OC_FIELD|xV|P(5),
338	OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
339	    OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
340	OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
341	    OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
342	OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
343	    OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
344	OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
345	    OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
346	OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
347	    OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
348	OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
349	    OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
350	OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
351	    OC_COLON|xx|P(67)|':',
352	OC_IN|SV|P(49),
353	OC_COMMA|SS|P(80),
354	OC_PGETLINE|SV|P(37),
355	OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
356	    OC_UNARY|xV|P(19)|'!',
357	0,
358	0,
359	0,
360	0,
361	0,
362	ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
363	OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
364	OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
365	OC_RETURN|Vx,   OC_EXIT|Nx,
366	ST_WHILE,
367	0,
368
369	OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370	OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371	OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372	OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373	OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374	OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375	OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
376	OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377	OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
378	OC_GETLINE|SV|P(0),
379	0,	0,
380	0,
381	0
382};
383
384/* internal variable names and their initial values       */
385/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
386enum {
387	CONVFMT,    OFMT,       FS,         OFS,
388	ORS,        RS,         RT,         FILENAME,
389	SUBSEP,     F0,         ARGIND,     ARGC,
390	ARGV,       ERRNO,      FNR,        NR,
391	NF,         IGNORECASE,	ENVIRON,    NUM_INTERNAL_VARS
392};
393
394static const char vNames[] ALIGN1 =
395	"CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
396	"ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
397	"SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
398	"ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
399	"NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
400
401static const char vValues[] ALIGN1 =
402	"%.6g\0"    "%.6g\0"    " \0"       " \0"
403	"\n\0"      "\n\0"      "\0"        "\0"
404	"\034\0"    "\0"        "\377";
405
406/* hash size may grow to these values */
407#define FIRST_PRIME 61
408static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
409
410
411/* Globals. Split in two parts so that first one is addressed
412 * with (mostly short) negative offsets.
413 * NB: it's unsafe to put members of type "double"
414 * into globals2 (gcc may fail to align them).
415 */
416struct globals {
417	double t_double;
418	chain beginseq, mainseq, endseq;
419	chain *seq;
420	node *break_ptr, *continue_ptr;
421	rstream *iF;
422	xhash *vhash, *ahash, *fdhash, *fnhash;
423	const char *g_progname;
424	int g_lineno;
425	int nfields;
426	int maxfields; /* used in fsrealloc() only */
427	var *Fields;
428	nvblock *g_cb;
429	char *g_pos;
430	char *g_buf;
431	smallint icase;
432	smallint exiting;
433	smallint nextrec;
434	smallint nextfile;
435	smallint is_f0_split;
436};
437struct globals2 {
438	uint32_t t_info; /* often used */
439	uint32_t t_tclass;
440	char *t_string;
441	int t_lineno;
442	int t_rollback;
443
444	var *intvar[NUM_INTERNAL_VARS]; /* often used */
445
446	/* former statics from various functions */
447	char *split_f0__fstrings;
448
449	uint32_t next_token__save_tclass;
450	uint32_t next_token__save_info;
451	uint32_t next_token__ltclass;
452	smallint next_token__concat_inserted;
453
454	smallint next_input_file__files_happen;
455	rstream next_input_file__rsm;
456
457	var *evaluate__fnargs;
458	unsigned evaluate__seed;
459	regex_t evaluate__sreg;
460
461	var ptest__v;
462
463	tsplitter exec_builtin__tspl;
464
465	/* biggest and least used members go last */
466	tsplitter fsplitter, rsplitter;
467};
468#define G1 (ptr_to_globals[-1])
469#define G (*(struct globals2 *)ptr_to_globals)
470/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471/*char G1size[sizeof(G1)]; - 0x74 */
472/*char Gsize[sizeof(G)]; - 0x1c4 */
473/* Trying to keep most of members accessible with short offsets: */
474/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475#define t_double     (G1.t_double    )
476#define beginseq     (G1.beginseq    )
477#define mainseq      (G1.mainseq     )
478#define endseq       (G1.endseq      )
479#define seq          (G1.seq         )
480#define break_ptr    (G1.break_ptr   )
481#define continue_ptr (G1.continue_ptr)
482#define iF           (G1.iF          )
483#define vhash        (G1.vhash       )
484#define ahash        (G1.ahash       )
485#define fdhash       (G1.fdhash      )
486#define fnhash       (G1.fnhash      )
487#define g_progname   (G1.g_progname  )
488#define g_lineno     (G1.g_lineno    )
489#define nfields      (G1.nfields     )
490#define maxfields    (G1.maxfields   )
491#define Fields       (G1.Fields      )
492#define g_cb         (G1.g_cb        )
493#define g_pos        (G1.g_pos       )
494#define g_buf        (G1.g_buf       )
495#define icase        (G1.icase       )
496#define exiting      (G1.exiting     )
497#define nextrec      (G1.nextrec     )
498#define nextfile     (G1.nextfile    )
499#define is_f0_split  (G1.is_f0_split )
500#define t_info       (G.t_info      )
501#define t_tclass     (G.t_tclass    )
502#define t_string     (G.t_string    )
503#define t_lineno     (G.t_lineno    )
504#define t_rollback   (G.t_rollback  )
505#define intvar       (G.intvar      )
506#define fsplitter    (G.fsplitter   )
507#define rsplitter    (G.rsplitter   )
508#define INIT_G() do { \
509	SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510	G.next_token__ltclass = TC_OPTERM; \
511	G.evaluate__seed = 1; \
512} while (0)
513
514
515/* function prototypes */
516static void handle_special(var *);
517static node *parse_expr(uint32_t);
518static void chain_group(void);
519static var *evaluate(node *, var *);
520static rstream *next_input_file(void);
521static int fmt_num(char *, int, const char *, double, int);
522static int awk_exit(int) NORETURN;
523
524/* ---- error handling ---- */
525
526static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535#if !ENABLE_FEATURE_AWK_LIBM
536static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
537#endif
538
539static void zero_out_var(var *vp)
540{
541	memset(vp, 0, sizeof(*vp));
542}
543
544static void syntax_error(const char *message) NORETURN;
545static void syntax_error(const char *message)
546{
547	bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
548}
549
550/* ---- hash stuff ---- */
551
552static unsigned hashidx(const char *name)
553{
554	unsigned idx = 0;
555
556	while (*name)
557		idx = *name++ + (idx << 6) - idx;
558	return idx;
559}
560
561/* create new hash */
562static xhash *hash_init(void)
563{
564	xhash *newhash;
565
566	newhash = xzalloc(sizeof(*newhash));
567	newhash->csize = FIRST_PRIME;
568	newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569
570	return newhash;
571}
572
573/* find item in hash, return ptr to data, NULL if not found */
574static void *hash_search(xhash *hash, const char *name)
575{
576	hash_item *hi;
577
578	hi = hash->items[hashidx(name) % hash->csize];
579	while (hi) {
580		if (strcmp(hi->name, name) == 0)
581			return &hi->data;
582		hi = hi->next;
583	}
584	return NULL;
585}
586
587/* grow hash if it becomes too big */
588static void hash_rebuild(xhash *hash)
589{
590	unsigned newsize, i, idx;
591	hash_item **newitems, *hi, *thi;
592
593	if (hash->nprime == ARRAY_SIZE(PRIMES))
594		return;
595
596	newsize = PRIMES[hash->nprime++];
597	newitems = xzalloc(newsize * sizeof(newitems[0]));
598
599	for (i = 0; i < hash->csize; i++) {
600		hi = hash->items[i];
601		while (hi) {
602			thi = hi;
603			hi = thi->next;
604			idx = hashidx(thi->name) % newsize;
605			thi->next = newitems[idx];
606			newitems[idx] = thi;
607		}
608	}
609
610	free(hash->items);
611	hash->csize = newsize;
612	hash->items = newitems;
613}
614
615/* find item in hash, add it if necessary. Return ptr to data */
616static void *hash_find(xhash *hash, const char *name)
617{
618	hash_item *hi;
619	unsigned idx;
620	int l;
621
622	hi = hash_search(hash, name);
623	if (!hi) {
624		if (++hash->nel / hash->csize > 10)
625			hash_rebuild(hash);
626
627		l = strlen(name) + 1;
628		hi = xzalloc(sizeof(*hi) + l);
629		strcpy(hi->name, name);
630
631		idx = hashidx(name) % hash->csize;
632		hi->next = hash->items[idx];
633		hash->items[idx] = hi;
634		hash->glen += l;
635	}
636	return &hi->data;
637}
638
639#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
640#define newvar(name)        ((var*)    hash_find(vhash, (name)))
641#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
642#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
643
644static void hash_remove(xhash *hash, const char *name)
645{
646	hash_item *hi, **phi;
647
648	phi = &hash->items[hashidx(name) % hash->csize];
649	while (*phi) {
650		hi = *phi;
651		if (strcmp(hi->name, name) == 0) {
652			hash->glen -= (strlen(name) + 1);
653			hash->nel--;
654			*phi = hi->next;
655			free(hi);
656			break;
657		}
658		phi = &hi->next;
659	}
660}
661
662/* ------ some useful functions ------ */
663
664static char *skip_spaces(char *p)
665{
666	while (1) {
667		if (*p == '\\' && p[1] == '\n') {
668			p++;
669			t_lineno++;
670		} else if (*p != ' ' && *p != '\t') {
671			break;
672		}
673		p++;
674	}
675	return p;
676}
677
678/* returns old *s, advances *s past word and terminating NUL */
679static char *nextword(char **s)
680{
681	char *p = *s;
682	while (*(*s)++ != '\0')
683		continue;
684	return p;
685}
686
687static char nextchar(char **s)
688{
689	char c, *pps;
690
691	c = *(*s)++;
692	pps = *s;
693	if (c == '\\')
694		c = bb_process_escape_sequence((const char**)s);
695	if (c == '\\' && *s == pps)
696		c = *(*s)++;
697	return c;
698}
699
700static ALWAYS_INLINE int isalnum_(int c)
701{
702	return (isalnum(c) || c == '_');
703}
704
705static double my_strtod(char **pp)
706{
707	char *cp = *pp;
708#if ENABLE_DESKTOP
709	if (cp[0] == '0') {
710		/* Might be hex or octal integer: 0x123abc or 07777 */
711		char c = (cp[1] | 0x20);
712		if (c == 'x' || isdigit(cp[1])) {
713			unsigned long long ull = strtoull(cp, pp, 0);
714			if (c == 'x')
715				return ull;
716			c = **pp;
717			if (!isdigit(c) && c != '.')
718				return ull;
719			/* else: it may be a floating number. Examples:
720			 * 009.123 (*pp points to '9')
721			 * 000.123 (*pp points to '.')
722			 * fall through to strtod.
723			 */
724		}
725	}
726#endif
727	return strtod(cp, pp);
728}
729
730/* -------- working with variables (set/get/copy/etc) -------- */
731
732static xhash *iamarray(var *v)
733{
734	var *a = v;
735
736	while (a->type & VF_CHILD)
737		a = a->x.parent;
738
739	if (!(a->type & VF_ARRAY)) {
740		a->type |= VF_ARRAY;
741		a->x.array = hash_init();
742	}
743	return a->x.array;
744}
745
746static void clear_array(xhash *array)
747{
748	unsigned i;
749	hash_item *hi, *thi;
750
751	for (i = 0; i < array->csize; i++) {
752		hi = array->items[i];
753		while (hi) {
754			thi = hi;
755			hi = hi->next;
756			free(thi->data.v.string);
757			free(thi);
758		}
759		array->items[i] = NULL;
760	}
761	array->glen = array->nel = 0;
762}
763
764/* clear a variable */
765static var *clrvar(var *v)
766{
767	if (!(v->type & VF_FSTR))
768		free(v->string);
769
770	v->type &= VF_DONTTOUCH;
771	v->type |= VF_DIRTY;
772	v->string = NULL;
773	return v;
774}
775
776/* assign string value to variable */
777static var *setvar_p(var *v, char *value)
778{
779	clrvar(v);
780	v->string = value;
781	handle_special(v);
782	return v;
783}
784
785/* same as setvar_p but make a copy of string */
786static var *setvar_s(var *v, const char *value)
787{
788	return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
789}
790
791/* same as setvar_s but sets USER flag */
792static var *setvar_u(var *v, const char *value)
793{
794	v = setvar_s(v, value);
795	v->type |= VF_USER;
796	return v;
797}
798
799/* set array element to user string */
800static void setari_u(var *a, int idx, const char *s)
801{
802	var *v;
803
804	v = findvar(iamarray(a), itoa(idx));
805	setvar_u(v, s);
806}
807
808/* assign numeric value to variable */
809static var *setvar_i(var *v, double value)
810{
811	clrvar(v);
812	v->type |= VF_NUMBER;
813	v->number = value;
814	handle_special(v);
815	return v;
816}
817
818static const char *getvar_s(var *v)
819{
820	/* if v is numeric and has no cached string, convert it to string */
821	if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
822		fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
823		v->string = xstrdup(g_buf);
824		v->type |= VF_CACHED;
825	}
826	return (v->string == NULL) ? "" : v->string;
827}
828
829static double getvar_i(var *v)
830{
831	char *s;
832
833	if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
834		v->number = 0;
835		s = v->string;
836		if (s && *s) {
837			debug_printf_eval("getvar_i: '%s'->", s);
838			v->number = my_strtod(&s);
839			debug_printf_eval("%f (s:'%s')\n", v->number, s);
840			if (v->type & VF_USER) {
841				s = skip_spaces(s);
842				if (*s != '\0')
843					v->type &= ~VF_USER;
844			}
845		} else {
846			debug_printf_eval("getvar_i: '%s'->zero\n", s);
847			v->type &= ~VF_USER;
848		}
849		v->type |= VF_CACHED;
850	}
851	debug_printf_eval("getvar_i: %f\n", v->number);
852	return v->number;
853}
854
855/* Used for operands of bitwise ops */
856static unsigned long getvar_i_int(var *v)
857{
858	double d = getvar_i(v);
859
860	/* Casting doubles to longs is undefined for values outside
861	 * of target type range. Try to widen it as much as possible */
862	if (d >= 0)
863		return (unsigned long)d;
864	/* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
865	return - (long) (unsigned long) (-d);
866}
867
868static var *copyvar(var *dest, const var *src)
869{
870	if (dest != src) {
871		clrvar(dest);
872		dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
873		debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
874		dest->number = src->number;
875		if (src->string)
876			dest->string = xstrdup(src->string);
877	}
878	handle_special(dest);
879	return dest;
880}
881
882static var *incvar(var *v)
883{
884	return setvar_i(v, getvar_i(v) + 1.0);
885}
886
887/* return true if v is number or numeric string */
888static int is_numeric(var *v)
889{
890	getvar_i(v);
891	return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
892}
893
894/* return 1 when value of v corresponds to true, 0 otherwise */
895static int istrue(var *v)
896{
897	if (is_numeric(v))
898		return (v->number != 0);
899	return (v->string && v->string[0]);
900}
901
902/* temporary variables allocator. Last allocated should be first freed */
903static var *nvalloc(int n)
904{
905	nvblock *pb = NULL;
906	var *v, *r;
907	int size;
908
909	while (g_cb) {
910		pb = g_cb;
911		if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
912			break;
913		g_cb = g_cb->next;
914	}
915
916	if (!g_cb) {
917		size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
918		g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
919		g_cb->size = size;
920		g_cb->pos = g_cb->nv;
921		g_cb->prev = pb;
922		/*g_cb->next = NULL; - xzalloc did it */
923		if (pb)
924			pb->next = g_cb;
925	}
926
927	v = r = g_cb->pos;
928	g_cb->pos += n;
929
930	while (v < g_cb->pos) {
931		v->type = 0;
932		v->string = NULL;
933		v++;
934	}
935
936	return r;
937}
938
939static void nvfree(var *v)
940{
941	var *p;
942
943	if (v < g_cb->nv || v >= g_cb->pos)
944		syntax_error(EMSG_INTERNAL_ERROR);
945
946	for (p = v; p < g_cb->pos; p++) {
947		if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
948			clear_array(iamarray(p));
949			free(p->x.array->items);
950			free(p->x.array);
951		}
952		if (p->type & VF_WALK) {
953			walker_list *n;
954			walker_list *w = p->x.walker;
955			debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
956			p->x.walker = NULL;
957			while (w) {
958				n = w->prev;
959				debug_printf_walker(" free(%p)\n", w);
960				free(w);
961				w = n;
962			}
963		}
964		clrvar(p);
965	}
966
967	g_cb->pos = v;
968	while (g_cb->prev && g_cb->pos == g_cb->nv) {
969		g_cb = g_cb->prev;
970	}
971}
972
973/* ------- awk program text parsing ------- */
974
975/* Parse next token pointed by global pos, place results into global ttt.
976 * If token isn't expected, give away. Return token class
977 */
978static uint32_t next_token(uint32_t expected)
979{
980#define concat_inserted (G.next_token__concat_inserted)
981#define save_tclass     (G.next_token__save_tclass)
982#define save_info       (G.next_token__save_info)
983/* Initialized to TC_OPTERM: */
984#define ltclass         (G.next_token__ltclass)
985
986	char *p, *s;
987	const char *tl;
988	uint32_t tc;
989	const uint32_t *ti;
990
991	if (t_rollback) {
992		t_rollback = FALSE;
993
994	} else if (concat_inserted) {
995		concat_inserted = FALSE;
996		t_tclass = save_tclass;
997		t_info = save_info;
998
999	} else {
1000		p = g_pos;
1001 readnext:
1002		p = skip_spaces(p);
1003		g_lineno = t_lineno;
1004		if (*p == '#')
1005			while (*p != '\n' && *p != '\0')
1006				p++;
1007
1008		if (*p == '\n')
1009			t_lineno++;
1010
1011		if (*p == '\0') {
1012			tc = TC_EOF;
1013
1014		} else if (*p == '\"') {
1015			/* it's a string */
1016			t_string = s = ++p;
1017			while (*p != '\"') {
1018				char *pp = p;
1019				if (*p == '\0' || *p == '\n')
1020					syntax_error(EMSG_UNEXP_EOS);
1021				*s++ = nextchar(&pp);
1022				p = pp;
1023			}
1024			p++;
1025			*s = '\0';
1026			tc = TC_STRING;
1027
1028		} else if ((expected & TC_REGEXP) && *p == '/') {
1029			/* it's regexp */
1030			t_string = s = ++p;
1031			while (*p != '/') {
1032				if (*p == '\0' || *p == '\n')
1033					syntax_error(EMSG_UNEXP_EOS);
1034				*s = *p++;
1035				if (*s++ == '\\') {
1036					char *pp = p;
1037					s[-1] = bb_process_escape_sequence((const char **)&pp);
1038					if (*p == '\\')
1039						*s++ = '\\';
1040					if (pp == p)
1041						*s++ = *p++;
1042					else
1043						p = pp;
1044				}
1045			}
1046			p++;
1047			*s = '\0';
1048			tc = TC_REGEXP;
1049
1050		} else if (*p == '.' || isdigit(*p)) {
1051			/* it's a number */
1052			char *pp = p;
1053			t_double = my_strtod(&pp);
1054			p = pp;
1055			if (*p == '.')
1056				syntax_error(EMSG_UNEXP_TOKEN);
1057			tc = TC_NUMBER;
1058
1059		} else {
1060			/* search for something known */
1061			tl = tokenlist;
1062			tc = 0x00000001;
1063			ti = tokeninfo;
1064			while (*tl) {
1065				int l = (unsigned char) *tl++;
1066				if (l == (unsigned char) NTCC) {
1067					tc <<= 1;
1068					continue;
1069				}
1070				/* if token class is expected,
1071				 * token matches,
1072				 * and it's not a longer word,
1073				 */
1074				if ((tc & (expected | TC_WORD | TC_NEWLINE))
1075				 && strncmp(p, tl, l) == 0
1076				 && !((tc & TC_WORD) && isalnum_(p[l]))
1077				) {
1078					/* then this is what we are looking for */
1079					t_info = *ti;
1080					p += l;
1081					goto token_found;
1082				}
1083				ti++;
1084				tl += l;
1085			}
1086			/* not a known token */
1087
1088			/* is it a name? (var/array/function) */
1089			if (!isalnum_(*p))
1090				syntax_error(EMSG_UNEXP_TOKEN); /* no */
1091			/* yes */
1092			t_string = --p;
1093			while (isalnum_(*++p)) {
1094				p[-1] = *p;
1095			}
1096			p[-1] = '\0';
1097			tc = TC_VARIABLE;
1098			/* also consume whitespace between functionname and bracket */
1099			if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1100				p = skip_spaces(p);
1101			if (*p == '(') {
1102				tc = TC_FUNCTION;
1103			} else {
1104				if (*p == '[') {
1105					p++;
1106					tc = TC_ARRAY;
1107				}
1108			}
1109 token_found: ;
1110		}
1111		g_pos = p;
1112
1113		/* skipping newlines in some cases */
1114		if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1115			goto readnext;
1116
1117		/* insert concatenation operator when needed */
1118		if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1119			concat_inserted = TRUE;
1120			save_tclass = tc;
1121			save_info = t_info;
1122			tc = TC_BINOP;
1123			t_info = OC_CONCAT | SS | P(35);
1124		}
1125
1126		t_tclass = tc;
1127	}
1128	ltclass = t_tclass;
1129
1130	/* Are we ready for this? */
1131	if (!(ltclass & expected))
1132		syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1133				EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1134
1135	return ltclass;
1136#undef concat_inserted
1137#undef save_tclass
1138#undef save_info
1139#undef ltclass
1140}
1141
1142static void rollback_token(void)
1143{
1144	t_rollback = TRUE;
1145}
1146
1147static node *new_node(uint32_t info)
1148{
1149	node *n;
1150
1151	n = xzalloc(sizeof(node));
1152	n->info = info;
1153	n->lineno = g_lineno;
1154	return n;
1155}
1156
1157static void mk_re_node(const char *s, node *n, regex_t *re)
1158{
1159	n->info = OC_REGEXP;
1160	n->l.re = re;
1161	n->r.ire = re + 1;
1162	xregcomp(re, s, REG_EXTENDED);
1163	xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1164}
1165
1166static node *condition(void)
1167{
1168	next_token(TC_SEQSTART);
1169	return parse_expr(TC_SEQTERM);
1170}
1171
1172/* parse expression terminated by given argument, return ptr
1173 * to built subtree. Terminator is eaten by parse_expr */
1174static node *parse_expr(uint32_t iexp)
1175{
1176	node sn;
1177	node *cn = &sn;
1178	node *vn, *glptr;
1179	uint32_t tc, xtc;
1180	var *v;
1181
1182	sn.info = PRIMASK;
1183	sn.r.n = glptr = NULL;
1184	xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1185
1186	while (!((tc = next_token(xtc)) & iexp)) {
1187
1188		if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1189			/* input redirection (<) attached to glptr node */
1190			cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1191			cn->a.n = glptr;
1192			xtc = TC_OPERAND | TC_UOPPRE;
1193			glptr = NULL;
1194
1195		} else if (tc & (TC_BINOP | TC_UOPPOST)) {
1196			/* for binary and postfix-unary operators, jump back over
1197			 * previous operators with higher priority */
1198			vn = cn;
1199			while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1200			    || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1201			) {
1202				vn = vn->a.n;
1203			}
1204			if ((t_info & OPCLSMASK) == OC_TERNARY)
1205				t_info += P(6);
1206			cn = vn->a.n->r.n = new_node(t_info);
1207			cn->a.n = vn->a.n;
1208			if (tc & TC_BINOP) {
1209				cn->l.n = vn;
1210				xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1211				if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1212					/* it's a pipe */
1213					next_token(TC_GETLINE);
1214					/* give maximum priority to this pipe */
1215					cn->info &= ~PRIMASK;
1216					xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1217				}
1218			} else {
1219				cn->r.n = vn;
1220				xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1221			}
1222			vn->a.n = cn;
1223
1224		} else {
1225			/* for operands and prefix-unary operators, attach them
1226			 * to last node */
1227			vn = cn;
1228			cn = vn->r.n = new_node(t_info);
1229			cn->a.n = vn;
1230			xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1231			if (tc & (TC_OPERAND | TC_REGEXP)) {
1232				xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1233				/* one should be very careful with switch on tclass -
1234				 * only simple tclasses should be used! */
1235				switch (tc) {
1236				case TC_VARIABLE:
1237				case TC_ARRAY:
1238					cn->info = OC_VAR;
1239					v = hash_search(ahash, t_string);
1240					if (v != NULL) {
1241						cn->info = OC_FNARG;
1242						cn->l.aidx = v->x.aidx;
1243					} else {
1244						cn->l.v = newvar(t_string);
1245					}
1246					if (tc & TC_ARRAY) {
1247						cn->info |= xS;
1248						cn->r.n = parse_expr(TC_ARRTERM);
1249					}
1250					break;
1251
1252				case TC_NUMBER:
1253				case TC_STRING:
1254					cn->info = OC_VAR;
1255					v = cn->l.v = xzalloc(sizeof(var));
1256					if (tc & TC_NUMBER)
1257						setvar_i(v, t_double);
1258					else
1259						setvar_s(v, t_string);
1260					break;
1261
1262				case TC_REGEXP:
1263					mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1264					break;
1265
1266				case TC_FUNCTION:
1267					cn->info = OC_FUNC;
1268					cn->r.f = newfunc(t_string);
1269					cn->l.n = condition();
1270					break;
1271
1272				case TC_SEQSTART:
1273					cn = vn->r.n = parse_expr(TC_SEQTERM);
1274					cn->a.n = vn;
1275					break;
1276
1277				case TC_GETLINE:
1278					glptr = cn;
1279					xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1280					break;
1281
1282				case TC_BUILTIN:
1283					cn->l.n = condition();
1284					break;
1285				}
1286			}
1287		}
1288	}
1289	return sn.r.n;
1290}
1291
1292/* add node to chain. Return ptr to alloc'd node */
1293static node *chain_node(uint32_t info)
1294{
1295	node *n;
1296
1297	if (!seq->first)
1298		seq->first = seq->last = new_node(0);
1299
1300	if (seq->programname != g_progname) {
1301		seq->programname = g_progname;
1302		n = chain_node(OC_NEWSOURCE);
1303		n->l.new_progname = xstrdup(g_progname);
1304	}
1305
1306	n = seq->last;
1307	n->info = info;
1308	seq->last = n->a.n = new_node(OC_DONE);
1309
1310	return n;
1311}
1312
1313static void chain_expr(uint32_t info)
1314{
1315	node *n;
1316
1317	n = chain_node(info);
1318	n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1319	if (t_tclass & TC_GRPTERM)
1320		rollback_token();
1321}
1322
1323static node *chain_loop(node *nn)
1324{
1325	node *n, *n2, *save_brk, *save_cont;
1326
1327	save_brk = break_ptr;
1328	save_cont = continue_ptr;
1329
1330	n = chain_node(OC_BR | Vx);
1331	continue_ptr = new_node(OC_EXEC);
1332	break_ptr = new_node(OC_EXEC);
1333	chain_group();
1334	n2 = chain_node(OC_EXEC | Vx);
1335	n2->l.n = nn;
1336	n2->a.n = n;
1337	continue_ptr->a.n = n2;
1338	break_ptr->a.n = n->r.n = seq->last;
1339
1340	continue_ptr = save_cont;
1341	break_ptr = save_brk;
1342
1343	return n;
1344}
1345
1346/* parse group and attach it to chain */
1347static void chain_group(void)
1348{
1349	uint32_t c;
1350	node *n, *n2, *n3;
1351
1352	do {
1353		c = next_token(TC_GRPSEQ);
1354	} while (c & TC_NEWLINE);
1355
1356	if (c & TC_GRPSTART) {
1357		while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1358			if (t_tclass & TC_NEWLINE)
1359				continue;
1360			rollback_token();
1361			chain_group();
1362		}
1363	} else if (c & (TC_OPSEQ | TC_OPTERM)) {
1364		rollback_token();
1365		chain_expr(OC_EXEC | Vx);
1366	} else {						/* TC_STATEMNT */
1367		switch (t_info & OPCLSMASK) {
1368		case ST_IF:
1369			n = chain_node(OC_BR | Vx);
1370			n->l.n = condition();
1371			chain_group();
1372			n2 = chain_node(OC_EXEC);
1373			n->r.n = seq->last;
1374			if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1375				chain_group();
1376				n2->a.n = seq->last;
1377			} else {
1378				rollback_token();
1379			}
1380			break;
1381
1382		case ST_WHILE:
1383			n2 = condition();
1384			n = chain_loop(NULL);
1385			n->l.n = n2;
1386			break;
1387
1388		case ST_DO:
1389			n2 = chain_node(OC_EXEC);
1390			n = chain_loop(NULL);
1391			n2->a.n = n->a.n;
1392			next_token(TC_WHILE);
1393			n->l.n = condition();
1394			break;
1395
1396		case ST_FOR:
1397			next_token(TC_SEQSTART);
1398			n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1399			if (t_tclass & TC_SEQTERM) {	/* for-in */
1400				if ((n2->info & OPCLSMASK) != OC_IN)
1401					syntax_error(EMSG_UNEXP_TOKEN);
1402				n = chain_node(OC_WALKINIT | VV);
1403				n->l.n = n2->l.n;
1404				n->r.n = n2->r.n;
1405				n = chain_loop(NULL);
1406				n->info = OC_WALKNEXT | Vx;
1407				n->l.n = n2->l.n;
1408			} else {			/* for (;;) */
1409				n = chain_node(OC_EXEC | Vx);
1410				n->l.n = n2;
1411				n2 = parse_expr(TC_SEMICOL);
1412				n3 = parse_expr(TC_SEQTERM);
1413				n = chain_loop(n3);
1414				n->l.n = n2;
1415				if (!n2)
1416					n->info = OC_EXEC;
1417			}
1418			break;
1419
1420		case OC_PRINT:
1421		case OC_PRINTF:
1422			n = chain_node(t_info);
1423			n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1424			if (t_tclass & TC_OUTRDR) {
1425				n->info |= t_info;
1426				n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1427			}
1428			if (t_tclass & TC_GRPTERM)
1429				rollback_token();
1430			break;
1431
1432		case OC_BREAK:
1433			n = chain_node(OC_EXEC);
1434			n->a.n = break_ptr;
1435			break;
1436
1437		case OC_CONTINUE:
1438			n = chain_node(OC_EXEC);
1439			n->a.n = continue_ptr;
1440			break;
1441
1442		/* delete, next, nextfile, return, exit */
1443		default:
1444			chain_expr(t_info);
1445		}
1446	}
1447}
1448
1449static void parse_program(char *p)
1450{
1451	uint32_t tclass;
1452	node *cn;
1453	func *f;
1454	var *v;
1455
1456	g_pos = p;
1457	t_lineno = 1;
1458	while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1459			TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1460
1461		if (tclass & TC_OPTERM)
1462			continue;
1463
1464		seq = &mainseq;
1465		if (tclass & TC_BEGIN) {
1466			seq = &beginseq;
1467			chain_group();
1468
1469		} else if (tclass & TC_END) {
1470			seq = &endseq;
1471			chain_group();
1472
1473		} else if (tclass & TC_FUNCDECL) {
1474			next_token(TC_FUNCTION);
1475			g_pos++;
1476			f = newfunc(t_string);
1477			f->body.first = NULL;
1478			f->nargs = 0;
1479			while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1480				v = findvar(ahash, t_string);
1481				v->x.aidx = f->nargs++;
1482
1483				if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1484					break;
1485			}
1486			seq = &f->body;
1487			chain_group();
1488			clear_array(ahash);
1489
1490		} else if (tclass & TC_OPSEQ) {
1491			rollback_token();
1492			cn = chain_node(OC_TEST);
1493			cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1494			if (t_tclass & TC_GRPSTART) {
1495				rollback_token();
1496				chain_group();
1497			} else {
1498				chain_node(OC_PRINT);
1499			}
1500			cn->r.n = mainseq.last;
1501
1502		} else /* if (tclass & TC_GRPSTART) */ {
1503			rollback_token();
1504			chain_group();
1505		}
1506	}
1507}
1508
1509
1510/* -------- program execution part -------- */
1511
1512static node *mk_splitter(const char *s, tsplitter *spl)
1513{
1514	regex_t *re, *ire;
1515	node *n;
1516
1517	re = &spl->re[0];
1518	ire = &spl->re[1];
1519	n = &spl->n;
1520	if ((n->info & OPCLSMASK) == OC_REGEXP) {
1521		regfree(re);
1522		regfree(ire); // TODO: nuke ire, use re+1?
1523	}
1524	if (s[0] && s[1]) { /* strlen(s) > 1 */
1525		mk_re_node(s, n, re);
1526	} else {
1527		n->info = (uint32_t) s[0];
1528	}
1529
1530	return n;
1531}
1532
1533/* use node as a regular expression. Supplied with node ptr and regex_t
1534 * storage space. Return ptr to regex (if result points to preg, it should
1535 * be later regfree'd manually
1536 */
1537static regex_t *as_regex(node *op, regex_t *preg)
1538{
1539	int cflags;
1540	var *v;
1541	const char *s;
1542
1543	if ((op->info & OPCLSMASK) == OC_REGEXP) {
1544		return icase ? op->r.ire : op->l.re;
1545	}
1546	v = nvalloc(1);
1547	s = getvar_s(evaluate(op, v));
1548
1549	cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1550	/* Testcase where REG_EXTENDED fails (unpaired '{'):
1551	 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1552	 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1553	 * (maybe gsub is not supposed to use REG_EXTENDED?).
1554	 */
1555	if (regcomp(preg, s, cflags)) {
1556		cflags &= ~REG_EXTENDED;
1557		xregcomp(preg, s, cflags);
1558	}
1559	nvfree(v);
1560	return preg;
1561}
1562
1563/* gradually increasing buffer.
1564 * note that we reallocate even if n == old_size,
1565 * and thus there is at least one extra allocated byte.
1566 */
1567static char* qrealloc(char *b, int n, int *size)
1568{
1569	if (!b || n >= *size) {
1570		*size = n + (n>>1) + 80;
1571		b = xrealloc(b, *size);
1572	}
1573	return b;
1574}
1575
1576/* resize field storage space */
1577static void fsrealloc(int size)
1578{
1579	int i;
1580
1581	if (size >= maxfields) {
1582		i = maxfields;
1583		maxfields = size + 16;
1584		Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1585		for (; i < maxfields; i++) {
1586			Fields[i].type = VF_SPECIAL;
1587			Fields[i].string = NULL;
1588		}
1589	}
1590	/* if size < nfields, clear extra field variables */
1591	for (i = size; i < nfields; i++) {
1592		clrvar(Fields + i);
1593	}
1594	nfields = size;
1595}
1596
1597static int awk_split(const char *s, node *spl, char **slist)
1598{
1599	int l, n;
1600	char c[4];
1601	char *s1;
1602	regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1603
1604	/* in worst case, each char would be a separate field */
1605	*slist = s1 = xzalloc(strlen(s) * 2 + 3);
1606	strcpy(s1, s);
1607
1608	c[0] = c[1] = (char)spl->info;
1609	c[2] = c[3] = '\0';
1610	if (*getvar_s(intvar[RS]) == '\0')
1611		c[2] = '\n';
1612
1613	n = 0;
1614	if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1615		if (!*s)
1616			return n; /* "": zero fields */
1617		n++; /* at least one field will be there */
1618		do {
1619			l = strcspn(s, c+2); /* len till next NUL or \n */
1620			if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1621			 && pmatch[0].rm_so <= l
1622			) {
1623				l = pmatch[0].rm_so;
1624				if (pmatch[0].rm_eo == 0) {
1625					l++;
1626					pmatch[0].rm_eo++;
1627				}
1628				n++; /* we saw yet another delimiter */
1629			} else {
1630				pmatch[0].rm_eo = l;
1631				if (s[l])
1632					pmatch[0].rm_eo++;
1633			}
1634			memcpy(s1, s, l);
1635			/* make sure we remove *all* of the separator chars */
1636			do {
1637				s1[l] = '\0';
1638			} while (++l < pmatch[0].rm_eo);
1639			nextword(&s1);
1640			s += pmatch[0].rm_eo;
1641		} while (*s);
1642		return n;
1643	}
1644	if (c[0] == '\0') {  /* null split */
1645		while (*s) {
1646			*s1++ = *s++;
1647			*s1++ = '\0';
1648			n++;
1649		}
1650		return n;
1651	}
1652	if (c[0] != ' ') {  /* single-character split */
1653		if (icase) {
1654			c[0] = toupper(c[0]);
1655			c[1] = tolower(c[1]);
1656		}
1657		if (*s1)
1658			n++;
1659		while ((s1 = strpbrk(s1, c)) != NULL) {
1660			*s1++ = '\0';
1661			n++;
1662		}
1663		return n;
1664	}
1665	/* space split */
1666	while (*s) {
1667		s = skip_whitespace(s);
1668		if (!*s)
1669			break;
1670		n++;
1671		while (*s && !isspace(*s))
1672			*s1++ = *s++;
1673		*s1++ = '\0';
1674	}
1675	return n;
1676}
1677
1678static void split_f0(void)
1679{
1680/* static char *fstrings; */
1681#define fstrings (G.split_f0__fstrings)
1682
1683	int i, n;
1684	char *s;
1685
1686	if (is_f0_split)
1687		return;
1688
1689	is_f0_split = TRUE;
1690	free(fstrings);
1691	fsrealloc(0);
1692	n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1693	fsrealloc(n);
1694	s = fstrings;
1695	for (i = 0; i < n; i++) {
1696		Fields[i].string = nextword(&s);
1697		Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1698	}
1699
1700	/* set NF manually to avoid side effects */
1701	clrvar(intvar[NF]);
1702	intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1703	intvar[NF]->number = nfields;
1704#undef fstrings
1705}
1706
1707/* perform additional actions when some internal variables changed */
1708static void handle_special(var *v)
1709{
1710	int n;
1711	char *b;
1712	const char *sep, *s;
1713	int sl, l, len, i, bsize;
1714
1715	if (!(v->type & VF_SPECIAL))
1716		return;
1717
1718	if (v == intvar[NF]) {
1719		n = (int)getvar_i(v);
1720		fsrealloc(n);
1721
1722		/* recalculate $0 */
1723		sep = getvar_s(intvar[OFS]);
1724		sl = strlen(sep);
1725		b = NULL;
1726		len = 0;
1727		for (i = 0; i < n; i++) {
1728			s = getvar_s(&Fields[i]);
1729			l = strlen(s);
1730			if (b) {
1731				memcpy(b+len, sep, sl);
1732				len += sl;
1733			}
1734			b = qrealloc(b, len+l+sl, &bsize);
1735			memcpy(b+len, s, l);
1736			len += l;
1737		}
1738		if (b)
1739			b[len] = '\0';
1740		setvar_p(intvar[F0], b);
1741		is_f0_split = TRUE;
1742
1743	} else if (v == intvar[F0]) {
1744		is_f0_split = FALSE;
1745
1746	} else if (v == intvar[FS]) {
1747		mk_splitter(getvar_s(v), &fsplitter);
1748
1749	} else if (v == intvar[RS]) {
1750		mk_splitter(getvar_s(v), &rsplitter);
1751
1752	} else if (v == intvar[IGNORECASE]) {
1753		icase = istrue(v);
1754
1755	} else {				/* $n */
1756		n = getvar_i(intvar[NF]);
1757		setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1758		/* right here v is invalid. Just to note... */
1759	}
1760}
1761
1762/* step through func/builtin/etc arguments */
1763static node *nextarg(node **pn)
1764{
1765	node *n;
1766
1767	n = *pn;
1768	if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1769		*pn = n->r.n;
1770		n = n->l.n;
1771	} else {
1772		*pn = NULL;
1773	}
1774	return n;
1775}
1776
1777static void hashwalk_init(var *v, xhash *array)
1778{
1779	hash_item *hi;
1780	unsigned i;
1781	walker_list *w;
1782	walker_list *prev_walker;
1783
1784	if (v->type & VF_WALK) {
1785		prev_walker = v->x.walker;
1786	} else {
1787		v->type |= VF_WALK;
1788		prev_walker = NULL;
1789	}
1790	debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1791
1792	w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1793	debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1794	w->cur = w->end = w->wbuf;
1795	w->prev = prev_walker;
1796	for (i = 0; i < array->csize; i++) {
1797		hi = array->items[i];
1798		while (hi) {
1799			strcpy(w->end, hi->name);
1800			nextword(&w->end);
1801			hi = hi->next;
1802		}
1803	}
1804}
1805
1806static int hashwalk_next(var *v)
1807{
1808	walker_list *w = v->x.walker;
1809
1810	if (w->cur >= w->end) {
1811		walker_list *prev_walker = w->prev;
1812
1813		debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1814		free(w);
1815		v->x.walker = prev_walker;
1816		return FALSE;
1817	}
1818
1819	setvar_s(v, nextword(&w->cur));
1820	return TRUE;
1821}
1822
1823/* evaluate node, return 1 when result is true, 0 otherwise */
1824static int ptest(node *pattern)
1825{
1826	/* ptest__v is "static": to save stack space? */
1827	return istrue(evaluate(pattern, &G.ptest__v));
1828}
1829
1830/* read next record from stream rsm into a variable v */
1831static int awk_getline(rstream *rsm, var *v)
1832{
1833	char *b;
1834	regmatch_t pmatch[2];
1835	int size, a, p, pp = 0;
1836	int fd, so, eo, r, rp;
1837	char c, *m, *s;
1838
1839	/* we're using our own buffer since we need access to accumulating
1840	 * characters
1841	 */
1842	fd = fileno(rsm->F);
1843	m = rsm->buffer;
1844	a = rsm->adv;
1845	p = rsm->pos;
1846	size = rsm->size;
1847	c = (char) rsplitter.n.info;
1848	rp = 0;
1849
1850	if (!m)
1851		m = qrealloc(m, 256, &size);
1852
1853	do {
1854		b = m + a;
1855		so = eo = p;
1856		r = 1;
1857		if (p > 0) {
1858			if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1859				if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1860							b, 1, pmatch, 0) == 0) {
1861					so = pmatch[0].rm_so;
1862					eo = pmatch[0].rm_eo;
1863					if (b[eo] != '\0')
1864						break;
1865				}
1866			} else if (c != '\0') {
1867				s = strchr(b+pp, c);
1868				if (!s)
1869					s = memchr(b+pp, '\0', p - pp);
1870				if (s) {
1871					so = eo = s-b;
1872					eo++;
1873					break;
1874				}
1875			} else {
1876				while (b[rp] == '\n')
1877					rp++;
1878				s = strstr(b+rp, "\n\n");
1879				if (s) {
1880					so = eo = s-b;
1881					while (b[eo] == '\n')
1882						eo++;
1883					if (b[eo] != '\0')
1884						break;
1885				}
1886			}
1887		}
1888
1889		if (a > 0) {
1890			memmove(m, m+a, p+1);
1891			b = m;
1892			a = 0;
1893		}
1894
1895		m = qrealloc(m, a+p+128, &size);
1896		b = m + a;
1897		pp = p;
1898		p += safe_read(fd, b+p, size-p-1);
1899		if (p < pp) {
1900			p = 0;
1901			r = 0;
1902			setvar_i(intvar[ERRNO], errno);
1903		}
1904		b[p] = '\0';
1905
1906	} while (p > pp);
1907
1908	if (p == 0) {
1909		r--;
1910	} else {
1911		c = b[so]; b[so] = '\0';
1912		setvar_s(v, b+rp);
1913		v->type |= VF_USER;
1914		b[so] = c;
1915		c = b[eo]; b[eo] = '\0';
1916		setvar_s(intvar[RT], b+so);
1917		b[eo] = c;
1918	}
1919
1920	rsm->buffer = m;
1921	rsm->adv = a + eo;
1922	rsm->pos = p - eo;
1923	rsm->size = size;
1924
1925	return r;
1926}
1927
1928static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1929{
1930	int r = 0;
1931	char c;
1932	const char *s = format;
1933
1934	if (int_as_int && n == (int)n) {
1935		r = snprintf(b, size, "%d", (int)n);
1936	} else {
1937		do { c = *s; } while (c && *++s);
1938		if (strchr("diouxX", c)) {
1939			r = snprintf(b, size, format, (int)n);
1940		} else if (strchr("eEfgG", c)) {
1941			r = snprintf(b, size, format, n);
1942		} else {
1943			syntax_error(EMSG_INV_FMT);
1944		}
1945	}
1946	return r;
1947}
1948
1949/* formatted output into an allocated buffer, return ptr to buffer */
1950static char *awk_printf(node *n)
1951{
1952	char *b = NULL;
1953	char *fmt, *s, *f;
1954	const char *s1;
1955	int i, j, incr, bsize;
1956	char c, c1;
1957	var *v, *arg;
1958
1959	v = nvalloc(1);
1960	fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1961
1962	i = 0;
1963	while (*f) {
1964		s = f;
1965		while (*f && (*f != '%' || *++f == '%'))
1966			f++;
1967		while (*f && !isalpha(*f)) {
1968			if (*f == '*')
1969				syntax_error("%*x formats are not supported");
1970			f++;
1971		}
1972
1973		incr = (f - s) + MAXVARFMT;
1974		b = qrealloc(b, incr + i, &bsize);
1975		c = *f;
1976		if (c != '\0')
1977			f++;
1978		c1 = *f;
1979		*f = '\0';
1980		arg = evaluate(nextarg(&n), v);
1981
1982		j = i;
1983		if (c == 'c' || !c) {
1984			i += sprintf(b+i, s, is_numeric(arg) ?
1985					(char)getvar_i(arg) : *getvar_s(arg));
1986		} else if (c == 's') {
1987			s1 = getvar_s(arg);
1988			b = qrealloc(b, incr+i+strlen(s1), &bsize);
1989			i += sprintf(b+i, s, s1);
1990		} else {
1991			i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1992		}
1993		*f = c1;
1994
1995		/* if there was an error while sprintf, return value is negative */
1996		if (i < j)
1997			i = j;
1998	}
1999
2000	free(fmt);
2001	nvfree(v);
2002	b = xrealloc(b, i + 1);
2003	b[i] = '\0';
2004	return b;
2005}
2006
2007/* Common substitution routine.
2008 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2009 * store result into (dest), return number of substitutions.
2010 * If nm = 0, replace all matches.
2011 * If src or dst is NULL, use $0.
2012 * If subexp != 0, enable subexpression matching (\1-\9).
2013 */
2014static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2015{
2016	char *resbuf;
2017	const char *sp;
2018	int match_no, residx, replen, resbufsize;
2019	int regexec_flags;
2020	regmatch_t pmatch[10];
2021	regex_t sreg, *regex;
2022
2023	resbuf = NULL;
2024	residx = 0;
2025	match_no = 0;
2026	regexec_flags = 0;
2027	regex = as_regex(rn, &sreg);
2028	sp = getvar_s(src ? src : intvar[F0]);
2029	replen = strlen(repl);
2030	while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2031		int so = pmatch[0].rm_so;
2032		int eo = pmatch[0].rm_eo;
2033
2034		//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2035		resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2036		memcpy(resbuf + residx, sp, eo);
2037		residx += eo;
2038		if (++match_no >= nm) {
2039			const char *s;
2040			int nbs;
2041
2042			/* replace */
2043			residx -= (eo - so);
2044			nbs = 0;
2045			for (s = repl; *s; s++) {
2046				char c = resbuf[residx++] = *s;
2047				if (c == '\\') {
2048					nbs++;
2049					continue;
2050				}
2051				if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2052					int j;
2053					residx -= ((nbs + 3) >> 1);
2054					j = 0;
2055					if (c != '&') {
2056						j = c - '0';
2057						nbs++;
2058					}
2059					if (nbs % 2) {
2060						resbuf[residx++] = c;
2061					} else {
2062						int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2063						resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2064						memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2065						residx += n;
2066					}
2067				}
2068				nbs = 0;
2069			}
2070		}
2071
2072		regexec_flags = REG_NOTBOL;
2073		sp += eo;
2074		if (match_no == nm)
2075			break;
2076		if (eo == so) {
2077			/* Empty match (e.g. "b*" will match anywhere).
2078			 * Advance by one char. */
2079//BUG (bug 1333):
2080//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2081//... and will erroneously match "b" even though it is NOT at the word start.
2082//we need REG_NOTBOW but it does not exist...
2083//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2084//it should be able to do it correctly.
2085			/* Subtle: this is safe only because
2086			 * qrealloc allocated at least one extra byte */
2087			resbuf[residx] = *sp;
2088			if (*sp == '\0')
2089				goto ret;
2090			sp++;
2091			residx++;
2092		}
2093	}
2094
2095	resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2096	strcpy(resbuf + residx, sp);
2097 ret:
2098	//bb_error_msg("end sp:'%s'%p", sp,sp);
2099	setvar_p(dest ? dest : intvar[F0], resbuf);
2100	if (regex == &sreg)
2101		regfree(regex);
2102	return match_no;
2103}
2104
2105static NOINLINE int do_mktime(const char *ds)
2106{
2107	struct tm then;
2108	int count;
2109
2110	/*memset(&then, 0, sizeof(then)); - not needed */
2111	then.tm_isdst = -1; /* default is unknown */
2112
2113	/* manpage of mktime says these fields are ints,
2114	 * so we can sscanf stuff directly into them */
2115	count = sscanf(ds, "%u %u %u %u %u %u %d",
2116		&then.tm_year, &then.tm_mon, &then.tm_mday,
2117		&then.tm_hour, &then.tm_min, &then.tm_sec,
2118		&then.tm_isdst);
2119
2120	if (count < 6
2121	 || (unsigned)then.tm_mon < 1
2122	 || (unsigned)then.tm_year < 1900
2123	) {
2124		return -1;
2125	}
2126
2127	then.tm_mon -= 1;
2128	then.tm_year -= 1900;
2129
2130	return mktime(&then);
2131}
2132
2133static NOINLINE var *exec_builtin(node *op, var *res)
2134{
2135#define tspl (G.exec_builtin__tspl)
2136
2137	var *tv;
2138	node *an[4];
2139	var *av[4];
2140	const char *as[4];
2141	regmatch_t pmatch[2];
2142	regex_t sreg, *re;
2143	node *spl;
2144	uint32_t isr, info;
2145	int nargs;
2146	time_t tt;
2147	int i, l, ll, n;
2148
2149	tv = nvalloc(4);
2150	isr = info = op->info;
2151	op = op->l.n;
2152
2153	av[2] = av[3] = NULL;
2154	for (i = 0; i < 4 && op; i++) {
2155		an[i] = nextarg(&op);
2156		if (isr & 0x09000000)
2157			av[i] = evaluate(an[i], &tv[i]);
2158		if (isr & 0x08000000)
2159			as[i] = getvar_s(av[i]);
2160		isr >>= 1;
2161	}
2162
2163	nargs = i;
2164	if ((uint32_t)nargs < (info >> 30))
2165		syntax_error(EMSG_TOO_FEW_ARGS);
2166
2167	info &= OPNMASK;
2168	switch (info) {
2169
2170	case B_a2:
2171#if ENABLE_FEATURE_AWK_LIBM
2172		setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2173#else
2174		syntax_error(EMSG_NO_MATH);
2175#endif
2176		break;
2177
2178	case B_sp: {
2179		char *s, *s1;
2180
2181		if (nargs > 2) {
2182			spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2183				an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2184		} else {
2185			spl = &fsplitter.n;
2186		}
2187
2188		n = awk_split(as[0], spl, &s);
2189		s1 = s;
2190		clear_array(iamarray(av[1]));
2191		for (i = 1; i <= n; i++)
2192			setari_u(av[1], i, nextword(&s));
2193		free(s1);
2194		setvar_i(res, n);
2195		break;
2196	}
2197
2198	case B_ss: {
2199		char *s;
2200
2201		l = strlen(as[0]);
2202		i = getvar_i(av[1]) - 1;
2203		if (i > l)
2204			i = l;
2205		if (i < 0)
2206			i = 0;
2207		n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2208		if (n < 0)
2209			n = 0;
2210		s = xstrndup(as[0]+i, n);
2211		setvar_p(res, s);
2212		break;
2213	}
2214
2215	/* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2216	 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2217	case B_an:
2218		setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2219		break;
2220
2221	case B_co:
2222		setvar_i(res, ~getvar_i_int(av[0]));
2223		break;
2224
2225	case B_ls:
2226		setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2227		break;
2228
2229	case B_or:
2230		setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2231		break;
2232
2233	case B_rs:
2234		setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2235		break;
2236
2237	case B_xo:
2238		setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2239		break;
2240
2241	case B_lo:
2242	case B_up: {
2243		char *s, *s1;
2244		s1 = s = xstrdup(as[0]);
2245		while (*s1) {
2246			//*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2247			if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2248				*s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2249			s1++;
2250		}
2251		setvar_p(res, s);
2252		break;
2253	}
2254
2255	case B_ix:
2256		n = 0;
2257		ll = strlen(as[1]);
2258		l = strlen(as[0]) - ll;
2259		if (ll > 0 && l >= 0) {
2260			if (!icase) {
2261				char *s = strstr(as[0], as[1]);
2262				if (s)
2263					n = (s - as[0]) + 1;
2264			} else {
2265				/* this piece of code is terribly slow and
2266				 * really should be rewritten
2267				 */
2268				for (i = 0; i <= l; i++) {
2269					if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2270						n = i+1;
2271						break;
2272					}
2273				}
2274			}
2275		}
2276		setvar_i(res, n);
2277		break;
2278
2279	case B_ti:
2280		if (nargs > 1)
2281			tt = getvar_i(av[1]);
2282		else
2283			time(&tt);
2284		//s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2285		i = strftime(g_buf, MAXVARFMT,
2286			((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2287			localtime(&tt));
2288		g_buf[i] = '\0';
2289		setvar_s(res, g_buf);
2290		break;
2291
2292	case B_mt:
2293		setvar_i(res, do_mktime(as[0]));
2294		break;
2295
2296	case B_ma:
2297		re = as_regex(an[1], &sreg);
2298		n = regexec(re, as[0], 1, pmatch, 0);
2299		if (n == 0) {
2300			pmatch[0].rm_so++;
2301			pmatch[0].rm_eo++;
2302		} else {
2303			pmatch[0].rm_so = 0;
2304			pmatch[0].rm_eo = -1;
2305		}
2306		setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2307		setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2308		setvar_i(res, pmatch[0].rm_so);
2309		if (re == &sreg)
2310			regfree(re);
2311		break;
2312
2313	case B_ge:
2314		awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2315		break;
2316
2317	case B_gs:
2318		setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2319		break;
2320
2321	case B_su:
2322		setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2323		break;
2324	}
2325
2326	nvfree(tv);
2327	return res;
2328#undef tspl
2329}
2330
2331/*
2332 * Evaluate node - the heart of the program. Supplied with subtree
2333 * and place where to store result. returns ptr to result.
2334 */
2335#define XC(n) ((n) >> 8)
2336
2337static var *evaluate(node *op, var *res)
2338{
2339/* This procedure is recursive so we should count every byte */
2340#define fnargs (G.evaluate__fnargs)
2341/* seed is initialized to 1 */
2342#define seed   (G.evaluate__seed)
2343#define	sreg   (G.evaluate__sreg)
2344
2345	var *v1;
2346
2347	if (!op)
2348		return setvar_s(res, NULL);
2349
2350	v1 = nvalloc(2);
2351
2352	while (op) {
2353		struct {
2354			var *v;
2355			const char *s;
2356		} L = L; /* for compiler */
2357		struct {
2358			var *v;
2359			const char *s;
2360		} R = R;
2361		double L_d = L_d;
2362		uint32_t opinfo;
2363		int opn;
2364		node *op1;
2365
2366		opinfo = op->info;
2367		opn = (opinfo & OPNMASK);
2368		g_lineno = op->lineno;
2369		op1 = op->l.n;
2370		debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
2371
2372		/* execute inevitable things */
2373		if (opinfo & OF_RES1)
2374			L.v = evaluate(op1, v1);
2375		if (opinfo & OF_RES2)
2376			R.v = evaluate(op->r.n, v1+1);
2377		if (opinfo & OF_STR1) {
2378			L.s = getvar_s(L.v);
2379			debug_printf_eval("L.s:'%s'\n", L.s);
2380		}
2381		if (opinfo & OF_STR2) {
2382			R.s = getvar_s(R.v);
2383			debug_printf_eval("R.s:'%s'\n", R.s);
2384		}
2385		if (opinfo & OF_NUM1) {
2386			L_d = getvar_i(L.v);
2387			debug_printf_eval("L_d:%f\n", L_d);
2388		}
2389
2390		switch (XC(opinfo & OPCLSMASK)) {
2391
2392		/* -- iterative node type -- */
2393
2394		/* test pattern */
2395		case XC( OC_TEST ):
2396			if ((op1->info & OPCLSMASK) == OC_COMMA) {
2397				/* it's range pattern */
2398				if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2399					op->info |= OF_CHECKED;
2400					if (ptest(op1->r.n))
2401						op->info &= ~OF_CHECKED;
2402					op = op->a.n;
2403				} else {
2404					op = op->r.n;
2405				}
2406			} else {
2407				op = ptest(op1) ? op->a.n : op->r.n;
2408			}
2409			break;
2410
2411		/* just evaluate an expression, also used as unconditional jump */
2412		case XC( OC_EXEC ):
2413			break;
2414
2415		/* branch, used in if-else and various loops */
2416		case XC( OC_BR ):
2417			op = istrue(L.v) ? op->a.n : op->r.n;
2418			break;
2419
2420		/* initialize for-in loop */
2421		case XC( OC_WALKINIT ):
2422			hashwalk_init(L.v, iamarray(R.v));
2423			break;
2424
2425		/* get next array item */
2426		case XC( OC_WALKNEXT ):
2427			op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2428			break;
2429
2430		case XC( OC_PRINT ):
2431		case XC( OC_PRINTF ): {
2432			FILE *F = stdout;
2433
2434			if (op->r.n) {
2435				rstream *rsm = newfile(R.s);
2436				if (!rsm->F) {
2437					if (opn == '|') {
2438						rsm->F = popen(R.s, "w");
2439						if (rsm->F == NULL)
2440							bb_perror_msg_and_die("popen");
2441						rsm->is_pipe = 1;
2442					} else {
2443						rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2444					}
2445				}
2446				F = rsm->F;
2447			}
2448
2449			if ((opinfo & OPCLSMASK) == OC_PRINT) {
2450				if (!op1) {
2451					fputs(getvar_s(intvar[F0]), F);
2452				} else {
2453					while (op1) {
2454						var *v = evaluate(nextarg(&op1), v1);
2455						if (v->type & VF_NUMBER) {
2456							fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2457									getvar_i(v), TRUE);
2458							fputs(g_buf, F);
2459						} else {
2460							fputs(getvar_s(v), F);
2461						}
2462
2463						if (op1)
2464							fputs(getvar_s(intvar[OFS]), F);
2465					}
2466				}
2467				fputs(getvar_s(intvar[ORS]), F);
2468
2469			} else {	/* OC_PRINTF */
2470				char *s = awk_printf(op1);
2471				fputs(s, F);
2472				free(s);
2473			}
2474			fflush(F);
2475			break;
2476		}
2477
2478		case XC( OC_DELETE ): {
2479			uint32_t info = op1->info & OPCLSMASK;
2480			var *v;
2481
2482			if (info == OC_VAR) {
2483				v = op1->l.v;
2484			} else if (info == OC_FNARG) {
2485				v = &fnargs[op1->l.aidx];
2486			} else {
2487				syntax_error(EMSG_NOT_ARRAY);
2488			}
2489
2490			if (op1->r.n) {
2491				const char *s;
2492				clrvar(L.v);
2493				s = getvar_s(evaluate(op1->r.n, v1));
2494				hash_remove(iamarray(v), s);
2495			} else {
2496				clear_array(iamarray(v));
2497			}
2498			break;
2499		}
2500
2501		case XC( OC_NEWSOURCE ):
2502			g_progname = op->l.new_progname;
2503			break;
2504
2505		case XC( OC_RETURN ):
2506			copyvar(res, L.v);
2507			break;
2508
2509		case XC( OC_NEXTFILE ):
2510			nextfile = TRUE;
2511		case XC( OC_NEXT ):
2512			nextrec = TRUE;
2513		case XC( OC_DONE ):
2514			clrvar(res);
2515			break;
2516
2517		case XC( OC_EXIT ):
2518			awk_exit(L_d);
2519
2520		/* -- recursive node type -- */
2521
2522		case XC( OC_VAR ):
2523			L.v = op->l.v;
2524			if (L.v == intvar[NF])
2525				split_f0();
2526			goto v_cont;
2527
2528		case XC( OC_FNARG ):
2529			L.v = &fnargs[op->l.aidx];
2530 v_cont:
2531			res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2532			break;
2533
2534		case XC( OC_IN ):
2535			setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2536			break;
2537
2538		case XC( OC_REGEXP ):
2539			op1 = op;
2540			L.s = getvar_s(intvar[F0]);
2541			goto re_cont;
2542
2543		case XC( OC_MATCH ):
2544			op1 = op->r.n;
2545 re_cont:
2546			{
2547				regex_t *re = as_regex(op1, &sreg);
2548				int i = regexec(re, L.s, 0, NULL, 0);
2549				if (re == &sreg)
2550					regfree(re);
2551				setvar_i(res, (i == 0) ^ (opn == '!'));
2552			}
2553			break;
2554
2555		case XC( OC_MOVE ):
2556			debug_printf_eval("MOVE\n");
2557			/* if source is a temporary string, jusk relink it to dest */
2558//Disabled: if R.v is numeric but happens to have cached R.v->string,
2559//then L.v ends up being a string, which is wrong
2560//			if (R.v == v1+1 && R.v->string) {
2561//				res = setvar_p(L.v, R.v->string);
2562//				R.v->string = NULL;
2563//			} else {
2564				res = copyvar(L.v, R.v);
2565//			}
2566			break;
2567
2568		case XC( OC_TERNARY ):
2569			if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2570				syntax_error(EMSG_POSSIBLE_ERROR);
2571			res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2572			break;
2573
2574		case XC( OC_FUNC ): {
2575			var *vbeg, *v;
2576			const char *sv_progname;
2577
2578			if (!op->r.f->body.first)
2579				syntax_error(EMSG_UNDEF_FUNC);
2580
2581			vbeg = v = nvalloc(op->r.f->nargs + 1);
2582			while (op1) {
2583				var *arg = evaluate(nextarg(&op1), v1);
2584				copyvar(v, arg);
2585				v->type |= VF_CHILD;
2586				v->x.parent = arg;
2587				if (++v - vbeg >= op->r.f->nargs)
2588					break;
2589			}
2590
2591			v = fnargs;
2592			fnargs = vbeg;
2593			sv_progname = g_progname;
2594
2595			res = evaluate(op->r.f->body.first, res);
2596
2597			g_progname = sv_progname;
2598			nvfree(fnargs);
2599			fnargs = v;
2600
2601			break;
2602		}
2603
2604		case XC( OC_GETLINE ):
2605		case XC( OC_PGETLINE ): {
2606			rstream *rsm;
2607			int i;
2608
2609			if (op1) {
2610				rsm = newfile(L.s);
2611				if (!rsm->F) {
2612					if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2613						rsm->F = popen(L.s, "r");
2614						rsm->is_pipe = TRUE;
2615					} else {
2616						rsm->F = fopen_for_read(L.s);		/* not xfopen! */
2617					}
2618				}
2619			} else {
2620				if (!iF)
2621					iF = next_input_file();
2622				rsm = iF;
2623			}
2624
2625			if (!rsm->F) {
2626				setvar_i(intvar[ERRNO], errno);
2627				setvar_i(res, -1);
2628				break;
2629			}
2630
2631			if (!op->r.n)
2632				R.v = intvar[F0];
2633
2634			i = awk_getline(rsm, R.v);
2635			if (i > 0 && !op1) {
2636				incvar(intvar[FNR]);
2637				incvar(intvar[NR]);
2638			}
2639			setvar_i(res, i);
2640			break;
2641		}
2642
2643		/* simple builtins */
2644		case XC( OC_FBLTIN ): {
2645			int i;
2646			rstream *rsm;
2647			double R_d = R_d; /* for compiler */
2648
2649			switch (opn) {
2650			case F_in:
2651				R_d = (int)L_d;
2652				break;
2653
2654			case F_rn:
2655				R_d = (double)rand() / (double)RAND_MAX;
2656				break;
2657#if ENABLE_FEATURE_AWK_LIBM
2658			case F_co:
2659				R_d = cos(L_d);
2660				break;
2661
2662			case F_ex:
2663				R_d = exp(L_d);
2664				break;
2665
2666			case F_lg:
2667				R_d = log(L_d);
2668				break;
2669
2670			case F_si:
2671				R_d = sin(L_d);
2672				break;
2673
2674			case F_sq:
2675				R_d = sqrt(L_d);
2676				break;
2677#else
2678			case F_co:
2679			case F_ex:
2680			case F_lg:
2681			case F_si:
2682			case F_sq:
2683				syntax_error(EMSG_NO_MATH);
2684				break;
2685#endif
2686			case F_sr:
2687				R_d = (double)seed;
2688				seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2689				srand(seed);
2690				break;
2691
2692			case F_ti:
2693				R_d = time(NULL);
2694				break;
2695
2696			case F_le:
2697				if (!op1)
2698					L.s = getvar_s(intvar[F0]);
2699				R_d = strlen(L.s);
2700				break;
2701
2702			case F_sy:
2703				fflush_all();
2704				R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2705						? (system(L.s) >> 8) : 0;
2706				break;
2707
2708			case F_ff:
2709				if (!op1) {
2710					fflush(stdout);
2711				} else if (L.s && *L.s) {
2712					rsm = newfile(L.s);
2713					fflush(rsm->F);
2714				} else {
2715					fflush_all();
2716				}
2717				break;
2718
2719			case F_cl:
2720				i = 0;
2721				rsm = (rstream *)hash_search(fdhash, L.s);
2722				if (rsm) {
2723					i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2724					free(rsm->buffer);
2725					hash_remove(fdhash, L.s);
2726				}
2727				if (i != 0)
2728					setvar_i(intvar[ERRNO], errno);
2729				R_d = (double)i;
2730				break;
2731			}
2732			setvar_i(res, R_d);
2733			break;
2734		}
2735
2736		case XC( OC_BUILTIN ):
2737			res = exec_builtin(op, res);
2738			break;
2739
2740		case XC( OC_SPRINTF ):
2741			setvar_p(res, awk_printf(op1));
2742			break;
2743
2744		case XC( OC_UNARY ): {
2745			double Ld, R_d;
2746
2747			Ld = R_d = getvar_i(R.v);
2748			switch (opn) {
2749			case 'P':
2750				Ld = ++R_d;
2751				goto r_op_change;
2752			case 'p':
2753				R_d++;
2754				goto r_op_change;
2755			case 'M':
2756				Ld = --R_d;
2757				goto r_op_change;
2758			case 'm':
2759				R_d--;
2760 r_op_change:
2761				setvar_i(R.v, R_d);
2762				break;
2763			case '!':
2764				Ld = !istrue(R.v);
2765				break;
2766			case '-':
2767				Ld = -R_d;
2768				break;
2769			}
2770			setvar_i(res, Ld);
2771			break;
2772		}
2773
2774		case XC( OC_FIELD ): {
2775			int i = (int)getvar_i(R.v);
2776			if (i == 0) {
2777				res = intvar[F0];
2778			} else {
2779				split_f0();
2780				if (i > nfields)
2781					fsrealloc(i);
2782				res = &Fields[i - 1];
2783			}
2784			break;
2785		}
2786
2787		/* concatenation (" ") and index joining (",") */
2788		case XC( OC_CONCAT ):
2789		case XC( OC_COMMA ): {
2790			const char *sep = "";
2791			if ((opinfo & OPCLSMASK) == OC_COMMA)
2792				sep = getvar_s(intvar[SUBSEP]);
2793			setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2794			break;
2795		}
2796
2797		case XC( OC_LAND ):
2798			setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2799			break;
2800
2801		case XC( OC_LOR ):
2802			setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2803			break;
2804
2805		case XC( OC_BINARY ):
2806		case XC( OC_REPLACE ): {
2807			double R_d = getvar_i(R.v);
2808			debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2809			switch (opn) {
2810			case '+':
2811				L_d += R_d;
2812				break;
2813			case '-':
2814				L_d -= R_d;
2815				break;
2816			case '*':
2817				L_d *= R_d;
2818				break;
2819			case '/':
2820				if (R_d == 0)
2821					syntax_error(EMSG_DIV_BY_ZERO);
2822				L_d /= R_d;
2823				break;
2824			case '&':
2825#if ENABLE_FEATURE_AWK_LIBM
2826				L_d = pow(L_d, R_d);
2827#else
2828				syntax_error(EMSG_NO_MATH);
2829#endif
2830				break;
2831			case '%':
2832				if (R_d == 0)
2833					syntax_error(EMSG_DIV_BY_ZERO);
2834				L_d -= (int)(L_d / R_d) * R_d;
2835				break;
2836			}
2837			debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2838			res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2839			break;
2840		}
2841
2842		case XC( OC_COMPARE ): {
2843			int i = i; /* for compiler */
2844			double Ld;
2845
2846			if (is_numeric(L.v) && is_numeric(R.v)) {
2847				Ld = getvar_i(L.v) - getvar_i(R.v);
2848			} else {
2849				const char *l = getvar_s(L.v);
2850				const char *r = getvar_s(R.v);
2851				Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2852			}
2853			switch (opn & 0xfe) {
2854			case 0:
2855				i = (Ld > 0);
2856				break;
2857			case 2:
2858				i = (Ld >= 0);
2859				break;
2860			case 4:
2861				i = (Ld == 0);
2862				break;
2863			}
2864			setvar_i(res, (i == 0) ^ (opn & 1));
2865			break;
2866		}
2867
2868		default:
2869			syntax_error(EMSG_POSSIBLE_ERROR);
2870		}
2871		if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2872			op = op->a.n;
2873		if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2874			break;
2875		if (nextrec)
2876			break;
2877	} /* while (op) */
2878
2879	nvfree(v1);
2880	return res;
2881#undef fnargs
2882#undef seed
2883#undef sreg
2884}
2885
2886
2887/* -------- main & co. -------- */
2888
2889static int awk_exit(int r)
2890{
2891	var tv;
2892	unsigned i;
2893	hash_item *hi;
2894
2895	zero_out_var(&tv);
2896
2897	if (!exiting) {
2898		exiting = TRUE;
2899		nextrec = FALSE;
2900		evaluate(endseq.first, &tv);
2901	}
2902
2903	/* waiting for children */
2904	for (i = 0; i < fdhash->csize; i++) {
2905		hi = fdhash->items[i];
2906		while (hi) {
2907			if (hi->data.rs.F && hi->data.rs.is_pipe)
2908				pclose(hi->data.rs.F);
2909			hi = hi->next;
2910		}
2911	}
2912
2913	exit(r);
2914}
2915
2916/* if expr looks like "var=value", perform assignment and return 1,
2917 * otherwise return 0 */
2918static int is_assignment(const char *expr)
2919{
2920	char *exprc, *s, *s0, *s1;
2921
2922	exprc = xstrdup(expr);
2923	if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2924		free(exprc);
2925		return FALSE;
2926	}
2927
2928	*s++ = '\0';
2929	s0 = s1 = s;
2930	while (*s)
2931		*s1++ = nextchar(&s);
2932
2933	*s1 = '\0';
2934	setvar_u(newvar(exprc), s0);
2935	free(exprc);
2936	return TRUE;
2937}
2938
2939/* switch to next input file */
2940static rstream *next_input_file(void)
2941{
2942#define rsm          (G.next_input_file__rsm)
2943#define files_happen (G.next_input_file__files_happen)
2944
2945	FILE *F = NULL;
2946	const char *fname, *ind;
2947
2948	if (rsm.F)
2949		fclose(rsm.F);
2950	rsm.F = NULL;
2951	rsm.pos = rsm.adv = 0;
2952
2953	do {
2954		if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2955			if (files_happen)
2956				return NULL;
2957			fname = "-";
2958			F = stdin;
2959		} else {
2960			ind = getvar_s(incvar(intvar[ARGIND]));
2961			fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2962			if (fname && *fname && !is_assignment(fname))
2963				F = xfopen_stdin(fname);
2964		}
2965	} while (!F);
2966
2967	files_happen = TRUE;
2968	setvar_s(intvar[FILENAME], fname);
2969	rsm.F = F;
2970	return &rsm;
2971#undef rsm
2972#undef files_happen
2973}
2974
2975int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2976int awk_main(int argc, char **argv)
2977{
2978	unsigned opt;
2979	char *opt_F, *opt_W;
2980	llist_t *list_v = NULL;
2981	llist_t *list_f = NULL;
2982	int i, j;
2983	var *v;
2984	var tv;
2985	char **envp;
2986	char *vnames = (char *)vNames; /* cheat */
2987	char *vvalues = (char *)vValues;
2988
2989	INIT_G();
2990
2991	/* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2992	 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2993	if (ENABLE_LOCALE_SUPPORT)
2994		setlocale(LC_NUMERIC, "C");
2995
2996	zero_out_var(&tv);
2997
2998	/* allocate global buffer */
2999	g_buf = xmalloc(MAXVARFMT + 1);
3000
3001	vhash = hash_init();
3002	ahash = hash_init();
3003	fdhash = hash_init();
3004	fnhash = hash_init();
3005
3006	/* initialize variables */
3007	for (i = 0; *vnames; i++) {
3008		intvar[i] = v = newvar(nextword(&vnames));
3009		if (*vvalues != '\377')
3010			setvar_s(v, nextword(&vvalues));
3011		else
3012			setvar_i(v, 0);
3013
3014		if (*vnames == '*') {
3015			v->type |= VF_SPECIAL;
3016			vnames++;
3017		}
3018	}
3019
3020	handle_special(intvar[FS]);
3021	handle_special(intvar[RS]);
3022
3023	newfile("/dev/stdin")->F = stdin;
3024	newfile("/dev/stdout")->F = stdout;
3025	newfile("/dev/stderr")->F = stderr;
3026
3027	/* Huh, people report that sometimes environ is NULL. Oh well. */
3028	if (environ) for (envp = environ; *envp; envp++) {
3029		/* environ is writable, thus we don't strdup it needlessly */
3030		char *s = *envp;
3031		char *s1 = strchr(s, '=');
3032		if (s1) {
3033			*s1 = '\0';
3034			/* Both findvar and setvar_u take const char*
3035			 * as 2nd arg -> environment is not trashed */
3036			setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3037			*s1 = '=';
3038		}
3039	}
3040	opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3041	opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3042	argv += optind;
3043	argc -= optind;
3044	if (opt & 0x1)
3045		setvar_s(intvar[FS], opt_F); // -F
3046	while (list_v) { /* -v */
3047		if (!is_assignment(llist_pop(&list_v)))
3048			bb_show_usage();
3049	}
3050	if (list_f) { /* -f */
3051		do {
3052			char *s = NULL;
3053			FILE *from_file;
3054
3055			g_progname = llist_pop(&list_f);
3056			from_file = xfopen_stdin(g_progname);
3057			/* one byte is reserved for some trick in next_token */
3058			for (i = j = 1; j > 0; i += j) {
3059				s = xrealloc(s, i + 4096);
3060				j = fread(s + i, 1, 4094, from_file);
3061			}
3062			s[i] = '\0';
3063			fclose(from_file);
3064			parse_program(s + 1);
3065			free(s);
3066		} while (list_f);
3067		argc++;
3068	} else { // no -f: take program from 1st parameter
3069		if (!argc)
3070			bb_show_usage();
3071		g_progname = "cmd. line";
3072		parse_program(*argv++);
3073	}
3074	if (opt & 0x8) // -W
3075		bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3076
3077	/* fill in ARGV array */
3078	setvar_i(intvar[ARGC], argc);
3079	setari_u(intvar[ARGV], 0, "awk");
3080	i = 0;
3081	while (*argv)
3082		setari_u(intvar[ARGV], ++i, *argv++);
3083
3084	evaluate(beginseq.first, &tv);
3085	if (!mainseq.first && !endseq.first)
3086		awk_exit(EXIT_SUCCESS);
3087
3088	/* input file could already be opened in BEGIN block */
3089	if (!iF)
3090		iF = next_input_file();
3091
3092	/* passing through input files */
3093	while (iF) {
3094		nextfile = FALSE;
3095		setvar_i(intvar[FNR], 0);
3096
3097		while ((i = awk_getline(iF, intvar[F0])) > 0) {
3098			nextrec = FALSE;
3099			incvar(intvar[NR]);
3100			incvar(intvar[FNR]);
3101			evaluate(mainseq.first, &tv);
3102
3103			if (nextfile)
3104				break;
3105		}
3106
3107		if (i < 0)
3108			syntax_error(strerror(errno));
3109
3110		iF = next_input_file();
3111	}
3112
3113	awk_exit(EXIT_SUCCESS);
3114	/*return 0;*/
3115}
3116