1/* vi: set sw=4 ts=4: */ 2/* 3 * awk implementation for busybox 4 * 5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua> 6 * 7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. 8 */ 9 10#include "libbb.h" 11#include "xregex.h" 12#include <math.h> 13 14/* This is a NOEXEC applet. Be very careful! */ 15 16 17/* If you comment out one of these below, it will be #defined later 18 * to perform debug printfs to stderr: */ 19#define debug_printf_walker(...) do {} while (0) 20#define debug_printf_eval(...) do {} while (0) 21 22#ifndef debug_printf_walker 23# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__)) 24#endif 25#ifndef debug_printf_eval 26# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__)) 27#endif 28 29 30 31#define MAXVARFMT 240 32#define MINNVBLOCK 64 33 34/* variable flags */ 35#define VF_NUMBER 0x0001 /* 1 = primary type is number */ 36#define VF_ARRAY 0x0002 /* 1 = it's an array */ 37 38#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */ 39#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ 40#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ 41#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ 42#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ 43#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ 44#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ 45 46/* these flags are static, don't change them when value is changed */ 47#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY) 48 49typedef struct walker_list { 50 char *end; 51 char *cur; 52 struct walker_list *prev; 53 char wbuf[1]; 54} walker_list; 55 56/* Variable */ 57typedef struct var_s { 58 unsigned type; /* flags */ 59 double number; 60 char *string; 61 union { 62 int aidx; /* func arg idx (for compilation stage) */ 63 struct xhash_s *array; /* array ptr */ 64 struct var_s *parent; /* for func args, ptr to actual parameter */ 65 walker_list *walker; /* list of array elements (for..in) */ 66 } x; 67} var; 68 69/* Node chain (pattern-action chain, BEGIN, END, function bodies) */ 70typedef struct chain_s { 71 struct node_s *first; 72 struct node_s *last; 73 const char *programname; 74} chain; 75 76/* Function */ 77typedef struct func_s { 78 unsigned nargs; 79 struct chain_s body; 80} func; 81 82/* I/O stream */ 83typedef struct rstream_s { 84 FILE *F; 85 char *buffer; 86 int adv; 87 int size; 88 int pos; 89 smallint is_pipe; 90} rstream; 91 92typedef struct hash_item_s { 93 union { 94 struct var_s v; /* variable/array hash */ 95 struct rstream_s rs; /* redirect streams hash */ 96 struct func_s f; /* functions hash */ 97 } data; 98 struct hash_item_s *next; /* next in chain */ 99 char name[1]; /* really it's longer */ 100} hash_item; 101 102typedef struct xhash_s { 103 unsigned nel; /* num of elements */ 104 unsigned csize; /* current hash size */ 105 unsigned nprime; /* next hash size in PRIMES[] */ 106 unsigned glen; /* summary length of item names */ 107 struct hash_item_s **items; 108} xhash; 109 110/* Tree node */ 111typedef struct node_s { 112 uint32_t info; 113 unsigned lineno; 114 union { 115 struct node_s *n; 116 var *v; 117 int aidx; 118 char *new_progname; 119 regex_t *re; 120 } l; 121 union { 122 struct node_s *n; 123 regex_t *ire; 124 func *f; 125 } r; 126 union { 127 struct node_s *n; 128 } a; 129} node; 130 131/* Block of temporary variables */ 132typedef struct nvblock_s { 133 int size; 134 var *pos; 135 struct nvblock_s *prev; 136 struct nvblock_s *next; 137 var nv[]; 138} nvblock; 139 140typedef struct tsplitter_s { 141 node n; 142 regex_t re[2]; 143} tsplitter; 144 145/* simple token classes */ 146/* Order and hex values are very important!!! See next_token() */ 147#define TC_SEQSTART 1 /* ( */ 148#define TC_SEQTERM (1 << 1) /* ) */ 149#define TC_REGEXP (1 << 2) /* /.../ */ 150#define TC_OUTRDR (1 << 3) /* | > >> */ 151#define TC_UOPPOST (1 << 4) /* unary postfix operator */ 152#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */ 153#define TC_BINOPX (1 << 6) /* two-opnd operator */ 154#define TC_IN (1 << 7) 155#define TC_COMMA (1 << 8) 156#define TC_PIPE (1 << 9) /* input redirection pipe */ 157#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */ 158#define TC_ARRTERM (1 << 11) /* ] */ 159#define TC_GRPSTART (1 << 12) /* { */ 160#define TC_GRPTERM (1 << 13) /* } */ 161#define TC_SEMICOL (1 << 14) 162#define TC_NEWLINE (1 << 15) 163#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ 164#define TC_WHILE (1 << 17) 165#define TC_ELSE (1 << 18) 166#define TC_BUILTIN (1 << 19) 167#define TC_GETLINE (1 << 20) 168#define TC_FUNCDECL (1 << 21) /* `function' `func' */ 169#define TC_BEGIN (1 << 22) 170#define TC_END (1 << 23) 171#define TC_EOF (1 << 24) 172#define TC_VARIABLE (1 << 25) 173#define TC_ARRAY (1 << 26) 174#define TC_FUNCTION (1 << 27) 175#define TC_STRING (1 << 28) 176#define TC_NUMBER (1 << 29) 177 178#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) 179 180/* combined token classes */ 181#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) 182#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) 183#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ 184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) 185 186#define TC_STATEMNT (TC_STATX | TC_WHILE) 187#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) 188 189/* word tokens, cannot mean something else if not expected */ 190#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ 191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) 192 193/* discard newlines after these */ 194#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ 195 | TC_BINOP | TC_OPTERM) 196 197/* what can expression begin with */ 198#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) 199/* what can group begin with */ 200#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) 201 202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ 203/* operator is inserted between them */ 204#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ 205 | TC_STRING | TC_NUMBER | TC_UOPPOST) 206#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) 207 208#define OF_RES1 0x010000 209#define OF_RES2 0x020000 210#define OF_STR1 0x040000 211#define OF_STR2 0x080000 212#define OF_NUM1 0x100000 213#define OF_CHECKED 0x200000 214 215/* combined operator flags */ 216#define xx 0 217#define xV OF_RES2 218#define xS (OF_RES2 | OF_STR2) 219#define Vx OF_RES1 220#define VV (OF_RES1 | OF_RES2) 221#define Nx (OF_RES1 | OF_NUM1) 222#define NV (OF_RES1 | OF_NUM1 | OF_RES2) 223#define Sx (OF_RES1 | OF_STR1) 224#define SV (OF_RES1 | OF_STR1 | OF_RES2) 225#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2) 226 227#define OPCLSMASK 0xFF00 228#define OPNMASK 0x007F 229 230/* operator priority is a highest byte (even: r->l, odd: l->r grouping) 231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, 232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string 233 */ 234#define P(x) (x << 24) 235#define PRIMASK 0x7F000000 236#define PRIMASK2 0x7E000000 237 238/* Operation classes */ 239 240#define SHIFT_TIL_THIS 0x0600 241#define RECUR_FROM_THIS 0x1000 242 243enum { 244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300, 245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600, 246 247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900, 248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00, 249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00, 250 251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200, 252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500, 253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800, 254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00, 255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00, 256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100, 257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400, 258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700, 259 OC_DONE = 0x2800, 260 261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200, 262 ST_WHILE = 0x3300 263}; 264 265/* simple builtins */ 266enum { 267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr, 268 F_ti, F_le, F_sy, F_ff, F_cl 269}; 270 271/* builtins */ 272enum { 273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up, 274 B_ge, B_gs, B_su, 275 B_an, B_co, B_ls, B_or, B_rs, B_xo, 276}; 277 278/* tokens and their corresponding info values */ 279 280#define NTC "\377" /* switch to next token class (tc<<1) */ 281#define NTCC '\377' 282 283#define OC_B OC_BUILTIN 284 285static const char tokenlist[] ALIGN1 = 286 "\1(" NTC 287 "\1)" NTC 288 "\1/" NTC /* REGEXP */ 289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */ 290 "\2++" "\2--" NTC /* UOPPOST */ 291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ 292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ 293 "\2*=" "\2/=" "\2%=" "\2^=" 294 "\1+" "\1-" "\3**=" "\2**" 295 "\1/" "\1%" "\1^" "\1*" 296 "\2!=" "\2>=" "\2<=" "\1>" 297 "\1<" "\2!~" "\1~" "\2&&" 298 "\2||" "\1?" "\1:" NTC 299 "\2in" NTC 300 "\1," NTC 301 "\1|" NTC 302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ 303 "\1]" NTC 304 "\1{" NTC 305 "\1}" NTC 306 "\1;" NTC 307 "\1\n" NTC 308 "\2if" "\2do" "\3for" "\5break" /* STATX */ 309 "\10continue" "\6delete" "\5print" 310 "\6printf" "\4next" "\10nextfile" 311 "\6return" "\4exit" NTC 312 "\5while" NTC 313 "\4else" NTC 314 315 "\3and" "\5compl" "\6lshift" "\2or" 316 "\6rshift" "\3xor" 317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ 318 "\3cos" "\3exp" "\3int" "\3log" 319 "\4rand" "\3sin" "\4sqrt" "\5srand" 320 "\6gensub" "\4gsub" "\5index" "\6length" 321 "\5match" "\5split" "\7sprintf" "\3sub" 322 "\6substr" "\7systime" "\10strftime" "\6mktime" 323 "\7tolower" "\7toupper" NTC 324 "\7getline" NTC 325 "\4func" "\10function" NTC 326 "\5BEGIN" NTC 327 "\3END" "\0" 328 ; 329 330static const uint32_t tokeninfo[] = { 331 0, 332 0, 333 OC_REGEXP, 334 xS|'a', xS|'w', xS|'|', 335 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', 336 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', 337 OC_FIELD|xV|P(5), 338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), 339 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', 340 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', 341 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', 342 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', 343 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', 344 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', 345 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', 346 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, 347 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, 348 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', 349 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), 350 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', 351 OC_COLON|xx|P(67)|':', 352 OC_IN|SV|P(49), 353 OC_COMMA|SS|P(80), 354 OC_PGETLINE|SV|P(37), 355 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', 356 OC_UNARY|xV|P(19)|'!', 357 0, 358 0, 359 0, 360 0, 361 0, 362 ST_IF, ST_DO, ST_FOR, OC_BREAK, 363 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, 364 OC_PRINTF, OC_NEXT, OC_NEXTFILE, 365 OC_RETURN|Vx, OC_EXIT|Nx, 366 ST_WHILE, 367 0, 368 369 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), 370 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), 371 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), 372 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, 373 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, 374 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, 375 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), 376 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), 377 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), 378 OC_GETLINE|SV|P(0), 379 0, 0, 380 0, 381 0 382}; 383 384/* internal variable names and their initial values */ 385/* asterisk marks SPECIAL vars; $ is just no-named Field0 */ 386enum { 387 CONVFMT, OFMT, FS, OFS, 388 ORS, RS, RT, FILENAME, 389 SUBSEP, F0, ARGIND, ARGC, 390 ARGV, ERRNO, FNR, NR, 391 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS 392}; 393 394static const char vNames[] ALIGN1 = 395 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0" 396 "ORS\0" "RS\0*" "RT\0" "FILENAME\0" 397 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0" 398 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0" 399 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0"; 400 401static const char vValues[] ALIGN1 = 402 "%.6g\0" "%.6g\0" " \0" " \0" 403 "\n\0" "\n\0" "\0" "\0" 404 "\034\0" "\0" "\377"; 405 406/* hash size may grow to these values */ 407#define FIRST_PRIME 61 408static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 }; 409 410 411/* Globals. Split in two parts so that first one is addressed 412 * with (mostly short) negative offsets. 413 * NB: it's unsafe to put members of type "double" 414 * into globals2 (gcc may fail to align them). 415 */ 416struct globals { 417 double t_double; 418 chain beginseq, mainseq, endseq; 419 chain *seq; 420 node *break_ptr, *continue_ptr; 421 rstream *iF; 422 xhash *vhash, *ahash, *fdhash, *fnhash; 423 const char *g_progname; 424 int g_lineno; 425 int nfields; 426 int maxfields; /* used in fsrealloc() only */ 427 var *Fields; 428 nvblock *g_cb; 429 char *g_pos; 430 char *g_buf; 431 smallint icase; 432 smallint exiting; 433 smallint nextrec; 434 smallint nextfile; 435 smallint is_f0_split; 436}; 437struct globals2 { 438 uint32_t t_info; /* often used */ 439 uint32_t t_tclass; 440 char *t_string; 441 int t_lineno; 442 int t_rollback; 443 444 var *intvar[NUM_INTERNAL_VARS]; /* often used */ 445 446 /* former statics from various functions */ 447 char *split_f0__fstrings; 448 449 uint32_t next_token__save_tclass; 450 uint32_t next_token__save_info; 451 uint32_t next_token__ltclass; 452 smallint next_token__concat_inserted; 453 454 smallint next_input_file__files_happen; 455 rstream next_input_file__rsm; 456 457 var *evaluate__fnargs; 458 unsigned evaluate__seed; 459 regex_t evaluate__sreg; 460 461 var ptest__v; 462 463 tsplitter exec_builtin__tspl; 464 465 /* biggest and least used members go last */ 466 tsplitter fsplitter, rsplitter; 467}; 468#define G1 (ptr_to_globals[-1]) 469#define G (*(struct globals2 *)ptr_to_globals) 470/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ 471/*char G1size[sizeof(G1)]; - 0x74 */ 472/*char Gsize[sizeof(G)]; - 0x1c4 */ 473/* Trying to keep most of members accessible with short offsets: */ 474/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ 475#define t_double (G1.t_double ) 476#define beginseq (G1.beginseq ) 477#define mainseq (G1.mainseq ) 478#define endseq (G1.endseq ) 479#define seq (G1.seq ) 480#define break_ptr (G1.break_ptr ) 481#define continue_ptr (G1.continue_ptr) 482#define iF (G1.iF ) 483#define vhash (G1.vhash ) 484#define ahash (G1.ahash ) 485#define fdhash (G1.fdhash ) 486#define fnhash (G1.fnhash ) 487#define g_progname (G1.g_progname ) 488#define g_lineno (G1.g_lineno ) 489#define nfields (G1.nfields ) 490#define maxfields (G1.maxfields ) 491#define Fields (G1.Fields ) 492#define g_cb (G1.g_cb ) 493#define g_pos (G1.g_pos ) 494#define g_buf (G1.g_buf ) 495#define icase (G1.icase ) 496#define exiting (G1.exiting ) 497#define nextrec (G1.nextrec ) 498#define nextfile (G1.nextfile ) 499#define is_f0_split (G1.is_f0_split ) 500#define t_info (G.t_info ) 501#define t_tclass (G.t_tclass ) 502#define t_string (G.t_string ) 503#define t_lineno (G.t_lineno ) 504#define t_rollback (G.t_rollback ) 505#define intvar (G.intvar ) 506#define fsplitter (G.fsplitter ) 507#define rsplitter (G.rsplitter ) 508#define INIT_G() do { \ 509 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ 510 G.next_token__ltclass = TC_OPTERM; \ 511 G.evaluate__seed = 1; \ 512} while (0) 513 514 515/* function prototypes */ 516static void handle_special(var *); 517static node *parse_expr(uint32_t); 518static void chain_group(void); 519static var *evaluate(node *, var *); 520static rstream *next_input_file(void); 521static int fmt_num(char *, int, const char *, double, int); 522static int awk_exit(int) NORETURN; 523 524/* ---- error handling ---- */ 525 526static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; 527static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; 528static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; 529static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; 530static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier"; 531static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin"; 532static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array"; 533static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error"; 534static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; 535#if !ENABLE_FEATURE_AWK_LIBM 536static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; 537#endif 538 539static void zero_out_var(var *vp) 540{ 541 memset(vp, 0, sizeof(*vp)); 542} 543 544static void syntax_error(const char *message) NORETURN; 545static void syntax_error(const char *message) 546{ 547 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); 548} 549 550/* ---- hash stuff ---- */ 551 552static unsigned hashidx(const char *name) 553{ 554 unsigned idx = 0; 555 556 while (*name) 557 idx = *name++ + (idx << 6) - idx; 558 return idx; 559} 560 561/* create new hash */ 562static xhash *hash_init(void) 563{ 564 xhash *newhash; 565 566 newhash = xzalloc(sizeof(*newhash)); 567 newhash->csize = FIRST_PRIME; 568 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0])); 569 570 return newhash; 571} 572 573/* find item in hash, return ptr to data, NULL if not found */ 574static void *hash_search(xhash *hash, const char *name) 575{ 576 hash_item *hi; 577 578 hi = hash->items[hashidx(name) % hash->csize]; 579 while (hi) { 580 if (strcmp(hi->name, name) == 0) 581 return &hi->data; 582 hi = hi->next; 583 } 584 return NULL; 585} 586 587/* grow hash if it becomes too big */ 588static void hash_rebuild(xhash *hash) 589{ 590 unsigned newsize, i, idx; 591 hash_item **newitems, *hi, *thi; 592 593 if (hash->nprime == ARRAY_SIZE(PRIMES)) 594 return; 595 596 newsize = PRIMES[hash->nprime++]; 597 newitems = xzalloc(newsize * sizeof(newitems[0])); 598 599 for (i = 0; i < hash->csize; i++) { 600 hi = hash->items[i]; 601 while (hi) { 602 thi = hi; 603 hi = thi->next; 604 idx = hashidx(thi->name) % newsize; 605 thi->next = newitems[idx]; 606 newitems[idx] = thi; 607 } 608 } 609 610 free(hash->items); 611 hash->csize = newsize; 612 hash->items = newitems; 613} 614 615/* find item in hash, add it if necessary. Return ptr to data */ 616static void *hash_find(xhash *hash, const char *name) 617{ 618 hash_item *hi; 619 unsigned idx; 620 int l; 621 622 hi = hash_search(hash, name); 623 if (!hi) { 624 if (++hash->nel / hash->csize > 10) 625 hash_rebuild(hash); 626 627 l = strlen(name) + 1; 628 hi = xzalloc(sizeof(*hi) + l); 629 strcpy(hi->name, name); 630 631 idx = hashidx(name) % hash->csize; 632 hi->next = hash->items[idx]; 633 hash->items[idx] = hi; 634 hash->glen += l; 635 } 636 return &hi->data; 637} 638 639#define findvar(hash, name) ((var*) hash_find((hash), (name))) 640#define newvar(name) ((var*) hash_find(vhash, (name))) 641#define newfile(name) ((rstream*)hash_find(fdhash, (name))) 642#define newfunc(name) ((func*) hash_find(fnhash, (name))) 643 644static void hash_remove(xhash *hash, const char *name) 645{ 646 hash_item *hi, **phi; 647 648 phi = &hash->items[hashidx(name) % hash->csize]; 649 while (*phi) { 650 hi = *phi; 651 if (strcmp(hi->name, name) == 0) { 652 hash->glen -= (strlen(name) + 1); 653 hash->nel--; 654 *phi = hi->next; 655 free(hi); 656 break; 657 } 658 phi = &hi->next; 659 } 660} 661 662/* ------ some useful functions ------ */ 663 664static char *skip_spaces(char *p) 665{ 666 while (1) { 667 if (*p == '\\' && p[1] == '\n') { 668 p++; 669 t_lineno++; 670 } else if (*p != ' ' && *p != '\t') { 671 break; 672 } 673 p++; 674 } 675 return p; 676} 677 678/* returns old *s, advances *s past word and terminating NUL */ 679static char *nextword(char **s) 680{ 681 char *p = *s; 682 while (*(*s)++ != '\0') 683 continue; 684 return p; 685} 686 687static char nextchar(char **s) 688{ 689 char c, *pps; 690 691 c = *(*s)++; 692 pps = *s; 693 if (c == '\\') 694 c = bb_process_escape_sequence((const char**)s); 695 if (c == '\\' && *s == pps) 696 c = *(*s)++; 697 return c; 698} 699 700static ALWAYS_INLINE int isalnum_(int c) 701{ 702 return (isalnum(c) || c == '_'); 703} 704 705static double my_strtod(char **pp) 706{ 707 char *cp = *pp; 708#if ENABLE_DESKTOP 709 if (cp[0] == '0') { 710 /* Might be hex or octal integer: 0x123abc or 07777 */ 711 char c = (cp[1] | 0x20); 712 if (c == 'x' || isdigit(cp[1])) { 713 unsigned long long ull = strtoull(cp, pp, 0); 714 if (c == 'x') 715 return ull; 716 c = **pp; 717 if (!isdigit(c) && c != '.') 718 return ull; 719 /* else: it may be a floating number. Examples: 720 * 009.123 (*pp points to '9') 721 * 000.123 (*pp points to '.') 722 * fall through to strtod. 723 */ 724 } 725 } 726#endif 727 return strtod(cp, pp); 728} 729 730/* -------- working with variables (set/get/copy/etc) -------- */ 731 732static xhash *iamarray(var *v) 733{ 734 var *a = v; 735 736 while (a->type & VF_CHILD) 737 a = a->x.parent; 738 739 if (!(a->type & VF_ARRAY)) { 740 a->type |= VF_ARRAY; 741 a->x.array = hash_init(); 742 } 743 return a->x.array; 744} 745 746static void clear_array(xhash *array) 747{ 748 unsigned i; 749 hash_item *hi, *thi; 750 751 for (i = 0; i < array->csize; i++) { 752 hi = array->items[i]; 753 while (hi) { 754 thi = hi; 755 hi = hi->next; 756 free(thi->data.v.string); 757 free(thi); 758 } 759 array->items[i] = NULL; 760 } 761 array->glen = array->nel = 0; 762} 763 764/* clear a variable */ 765static var *clrvar(var *v) 766{ 767 if (!(v->type & VF_FSTR)) 768 free(v->string); 769 770 v->type &= VF_DONTTOUCH; 771 v->type |= VF_DIRTY; 772 v->string = NULL; 773 return v; 774} 775 776/* assign string value to variable */ 777static var *setvar_p(var *v, char *value) 778{ 779 clrvar(v); 780 v->string = value; 781 handle_special(v); 782 return v; 783} 784 785/* same as setvar_p but make a copy of string */ 786static var *setvar_s(var *v, const char *value) 787{ 788 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL); 789} 790 791/* same as setvar_s but sets USER flag */ 792static var *setvar_u(var *v, const char *value) 793{ 794 v = setvar_s(v, value); 795 v->type |= VF_USER; 796 return v; 797} 798 799/* set array element to user string */ 800static void setari_u(var *a, int idx, const char *s) 801{ 802 var *v; 803 804 v = findvar(iamarray(a), itoa(idx)); 805 setvar_u(v, s); 806} 807 808/* assign numeric value to variable */ 809static var *setvar_i(var *v, double value) 810{ 811 clrvar(v); 812 v->type |= VF_NUMBER; 813 v->number = value; 814 handle_special(v); 815 return v; 816} 817 818static const char *getvar_s(var *v) 819{ 820 /* if v is numeric and has no cached string, convert it to string */ 821 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { 822 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); 823 v->string = xstrdup(g_buf); 824 v->type |= VF_CACHED; 825 } 826 return (v->string == NULL) ? "" : v->string; 827} 828 829static double getvar_i(var *v) 830{ 831 char *s; 832 833 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) { 834 v->number = 0; 835 s = v->string; 836 if (s && *s) { 837 debug_printf_eval("getvar_i: '%s'->", s); 838 v->number = my_strtod(&s); 839 debug_printf_eval("%f (s:'%s')\n", v->number, s); 840 if (v->type & VF_USER) { 841 s = skip_spaces(s); 842 if (*s != '\0') 843 v->type &= ~VF_USER; 844 } 845 } else { 846 debug_printf_eval("getvar_i: '%s'->zero\n", s); 847 v->type &= ~VF_USER; 848 } 849 v->type |= VF_CACHED; 850 } 851 debug_printf_eval("getvar_i: %f\n", v->number); 852 return v->number; 853} 854 855/* Used for operands of bitwise ops */ 856static unsigned long getvar_i_int(var *v) 857{ 858 double d = getvar_i(v); 859 860 /* Casting doubles to longs is undefined for values outside 861 * of target type range. Try to widen it as much as possible */ 862 if (d >= 0) 863 return (unsigned long)d; 864 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */ 865 return - (long) (unsigned long) (-d); 866} 867 868static var *copyvar(var *dest, const var *src) 869{ 870 if (dest != src) { 871 clrvar(dest); 872 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR)); 873 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string); 874 dest->number = src->number; 875 if (src->string) 876 dest->string = xstrdup(src->string); 877 } 878 handle_special(dest); 879 return dest; 880} 881 882static var *incvar(var *v) 883{ 884 return setvar_i(v, getvar_i(v) + 1.0); 885} 886 887/* return true if v is number or numeric string */ 888static int is_numeric(var *v) 889{ 890 getvar_i(v); 891 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY)); 892} 893 894/* return 1 when value of v corresponds to true, 0 otherwise */ 895static int istrue(var *v) 896{ 897 if (is_numeric(v)) 898 return (v->number != 0); 899 return (v->string && v->string[0]); 900} 901 902/* temporary variables allocator. Last allocated should be first freed */ 903static var *nvalloc(int n) 904{ 905 nvblock *pb = NULL; 906 var *v, *r; 907 int size; 908 909 while (g_cb) { 910 pb = g_cb; 911 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) 912 break; 913 g_cb = g_cb->next; 914 } 915 916 if (!g_cb) { 917 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; 918 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); 919 g_cb->size = size; 920 g_cb->pos = g_cb->nv; 921 g_cb->prev = pb; 922 /*g_cb->next = NULL; - xzalloc did it */ 923 if (pb) 924 pb->next = g_cb; 925 } 926 927 v = r = g_cb->pos; 928 g_cb->pos += n; 929 930 while (v < g_cb->pos) { 931 v->type = 0; 932 v->string = NULL; 933 v++; 934 } 935 936 return r; 937} 938 939static void nvfree(var *v) 940{ 941 var *p; 942 943 if (v < g_cb->nv || v >= g_cb->pos) 944 syntax_error(EMSG_INTERNAL_ERROR); 945 946 for (p = v; p < g_cb->pos; p++) { 947 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { 948 clear_array(iamarray(p)); 949 free(p->x.array->items); 950 free(p->x.array); 951 } 952 if (p->type & VF_WALK) { 953 walker_list *n; 954 walker_list *w = p->x.walker; 955 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); 956 p->x.walker = NULL; 957 while (w) { 958 n = w->prev; 959 debug_printf_walker(" free(%p)\n", w); 960 free(w); 961 w = n; 962 } 963 } 964 clrvar(p); 965 } 966 967 g_cb->pos = v; 968 while (g_cb->prev && g_cb->pos == g_cb->nv) { 969 g_cb = g_cb->prev; 970 } 971} 972 973/* ------- awk program text parsing ------- */ 974 975/* Parse next token pointed by global pos, place results into global ttt. 976 * If token isn't expected, give away. Return token class 977 */ 978static uint32_t next_token(uint32_t expected) 979{ 980#define concat_inserted (G.next_token__concat_inserted) 981#define save_tclass (G.next_token__save_tclass) 982#define save_info (G.next_token__save_info) 983/* Initialized to TC_OPTERM: */ 984#define ltclass (G.next_token__ltclass) 985 986 char *p, *s; 987 const char *tl; 988 uint32_t tc; 989 const uint32_t *ti; 990 991 if (t_rollback) { 992 t_rollback = FALSE; 993 994 } else if (concat_inserted) { 995 concat_inserted = FALSE; 996 t_tclass = save_tclass; 997 t_info = save_info; 998 999 } else { 1000 p = g_pos; 1001 readnext: 1002 p = skip_spaces(p); 1003 g_lineno = t_lineno; 1004 if (*p == '#') 1005 while (*p != '\n' && *p != '\0') 1006 p++; 1007 1008 if (*p == '\n') 1009 t_lineno++; 1010 1011 if (*p == '\0') { 1012 tc = TC_EOF; 1013 1014 } else if (*p == '\"') { 1015 /* it's a string */ 1016 t_string = s = ++p; 1017 while (*p != '\"') { 1018 char *pp = p; 1019 if (*p == '\0' || *p == '\n') 1020 syntax_error(EMSG_UNEXP_EOS); 1021 *s++ = nextchar(&pp); 1022 p = pp; 1023 } 1024 p++; 1025 *s = '\0'; 1026 tc = TC_STRING; 1027 1028 } else if ((expected & TC_REGEXP) && *p == '/') { 1029 /* it's regexp */ 1030 t_string = s = ++p; 1031 while (*p != '/') { 1032 if (*p == '\0' || *p == '\n') 1033 syntax_error(EMSG_UNEXP_EOS); 1034 *s = *p++; 1035 if (*s++ == '\\') { 1036 char *pp = p; 1037 s[-1] = bb_process_escape_sequence((const char **)&pp); 1038 if (*p == '\\') 1039 *s++ = '\\'; 1040 if (pp == p) 1041 *s++ = *p++; 1042 else 1043 p = pp; 1044 } 1045 } 1046 p++; 1047 *s = '\0'; 1048 tc = TC_REGEXP; 1049 1050 } else if (*p == '.' || isdigit(*p)) { 1051 /* it's a number */ 1052 char *pp = p; 1053 t_double = my_strtod(&pp); 1054 p = pp; 1055 if (*p == '.') 1056 syntax_error(EMSG_UNEXP_TOKEN); 1057 tc = TC_NUMBER; 1058 1059 } else { 1060 /* search for something known */ 1061 tl = tokenlist; 1062 tc = 0x00000001; 1063 ti = tokeninfo; 1064 while (*tl) { 1065 int l = (unsigned char) *tl++; 1066 if (l == (unsigned char) NTCC) { 1067 tc <<= 1; 1068 continue; 1069 } 1070 /* if token class is expected, 1071 * token matches, 1072 * and it's not a longer word, 1073 */ 1074 if ((tc & (expected | TC_WORD | TC_NEWLINE)) 1075 && strncmp(p, tl, l) == 0 1076 && !((tc & TC_WORD) && isalnum_(p[l])) 1077 ) { 1078 /* then this is what we are looking for */ 1079 t_info = *ti; 1080 p += l; 1081 goto token_found; 1082 } 1083 ti++; 1084 tl += l; 1085 } 1086 /* not a known token */ 1087 1088 /* is it a name? (var/array/function) */ 1089 if (!isalnum_(*p)) 1090 syntax_error(EMSG_UNEXP_TOKEN); /* no */ 1091 /* yes */ 1092 t_string = --p; 1093 while (isalnum_(*++p)) { 1094 p[-1] = *p; 1095 } 1096 p[-1] = '\0'; 1097 tc = TC_VARIABLE; 1098 /* also consume whitespace between functionname and bracket */ 1099 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) 1100 p = skip_spaces(p); 1101 if (*p == '(') { 1102 tc = TC_FUNCTION; 1103 } else { 1104 if (*p == '[') { 1105 p++; 1106 tc = TC_ARRAY; 1107 } 1108 } 1109 token_found: ; 1110 } 1111 g_pos = p; 1112 1113 /* skipping newlines in some cases */ 1114 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) 1115 goto readnext; 1116 1117 /* insert concatenation operator when needed */ 1118 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) { 1119 concat_inserted = TRUE; 1120 save_tclass = tc; 1121 save_info = t_info; 1122 tc = TC_BINOP; 1123 t_info = OC_CONCAT | SS | P(35); 1124 } 1125 1126 t_tclass = tc; 1127 } 1128 ltclass = t_tclass; 1129 1130 /* Are we ready for this? */ 1131 if (!(ltclass & expected)) 1132 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? 1133 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); 1134 1135 return ltclass; 1136#undef concat_inserted 1137#undef save_tclass 1138#undef save_info 1139#undef ltclass 1140} 1141 1142static void rollback_token(void) 1143{ 1144 t_rollback = TRUE; 1145} 1146 1147static node *new_node(uint32_t info) 1148{ 1149 node *n; 1150 1151 n = xzalloc(sizeof(node)); 1152 n->info = info; 1153 n->lineno = g_lineno; 1154 return n; 1155} 1156 1157static void mk_re_node(const char *s, node *n, regex_t *re) 1158{ 1159 n->info = OC_REGEXP; 1160 n->l.re = re; 1161 n->r.ire = re + 1; 1162 xregcomp(re, s, REG_EXTENDED); 1163 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); 1164} 1165 1166static node *condition(void) 1167{ 1168 next_token(TC_SEQSTART); 1169 return parse_expr(TC_SEQTERM); 1170} 1171 1172/* parse expression terminated by given argument, return ptr 1173 * to built subtree. Terminator is eaten by parse_expr */ 1174static node *parse_expr(uint32_t iexp) 1175{ 1176 node sn; 1177 node *cn = &sn; 1178 node *vn, *glptr; 1179 uint32_t tc, xtc; 1180 var *v; 1181 1182 sn.info = PRIMASK; 1183 sn.r.n = glptr = NULL; 1184 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; 1185 1186 while (!((tc = next_token(xtc)) & iexp)) { 1187 1188 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { 1189 /* input redirection (<) attached to glptr node */ 1190 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); 1191 cn->a.n = glptr; 1192 xtc = TC_OPERAND | TC_UOPPRE; 1193 glptr = NULL; 1194 1195 } else if (tc & (TC_BINOP | TC_UOPPOST)) { 1196 /* for binary and postfix-unary operators, jump back over 1197 * previous operators with higher priority */ 1198 vn = cn; 1199 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) 1200 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) 1201 ) { 1202 vn = vn->a.n; 1203 } 1204 if ((t_info & OPCLSMASK) == OC_TERNARY) 1205 t_info += P(6); 1206 cn = vn->a.n->r.n = new_node(t_info); 1207 cn->a.n = vn->a.n; 1208 if (tc & TC_BINOP) { 1209 cn->l.n = vn; 1210 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; 1211 if ((t_info & OPCLSMASK) == OC_PGETLINE) { 1212 /* it's a pipe */ 1213 next_token(TC_GETLINE); 1214 /* give maximum priority to this pipe */ 1215 cn->info &= ~PRIMASK; 1216 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; 1217 } 1218 } else { 1219 cn->r.n = vn; 1220 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; 1221 } 1222 vn->a.n = cn; 1223 1224 } else { 1225 /* for operands and prefix-unary operators, attach them 1226 * to last node */ 1227 vn = cn; 1228 cn = vn->r.n = new_node(t_info); 1229 cn->a.n = vn; 1230 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; 1231 if (tc & (TC_OPERAND | TC_REGEXP)) { 1232 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; 1233 /* one should be very careful with switch on tclass - 1234 * only simple tclasses should be used! */ 1235 switch (tc) { 1236 case TC_VARIABLE: 1237 case TC_ARRAY: 1238 cn->info = OC_VAR; 1239 v = hash_search(ahash, t_string); 1240 if (v != NULL) { 1241 cn->info = OC_FNARG; 1242 cn->l.aidx = v->x.aidx; 1243 } else { 1244 cn->l.v = newvar(t_string); 1245 } 1246 if (tc & TC_ARRAY) { 1247 cn->info |= xS; 1248 cn->r.n = parse_expr(TC_ARRTERM); 1249 } 1250 break; 1251 1252 case TC_NUMBER: 1253 case TC_STRING: 1254 cn->info = OC_VAR; 1255 v = cn->l.v = xzalloc(sizeof(var)); 1256 if (tc & TC_NUMBER) 1257 setvar_i(v, t_double); 1258 else 1259 setvar_s(v, t_string); 1260 break; 1261 1262 case TC_REGEXP: 1263 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); 1264 break; 1265 1266 case TC_FUNCTION: 1267 cn->info = OC_FUNC; 1268 cn->r.f = newfunc(t_string); 1269 cn->l.n = condition(); 1270 break; 1271 1272 case TC_SEQSTART: 1273 cn = vn->r.n = parse_expr(TC_SEQTERM); 1274 cn->a.n = vn; 1275 break; 1276 1277 case TC_GETLINE: 1278 glptr = cn; 1279 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; 1280 break; 1281 1282 case TC_BUILTIN: 1283 cn->l.n = condition(); 1284 break; 1285 } 1286 } 1287 } 1288 } 1289 return sn.r.n; 1290} 1291 1292/* add node to chain. Return ptr to alloc'd node */ 1293static node *chain_node(uint32_t info) 1294{ 1295 node *n; 1296 1297 if (!seq->first) 1298 seq->first = seq->last = new_node(0); 1299 1300 if (seq->programname != g_progname) { 1301 seq->programname = g_progname; 1302 n = chain_node(OC_NEWSOURCE); 1303 n->l.new_progname = xstrdup(g_progname); 1304 } 1305 1306 n = seq->last; 1307 n->info = info; 1308 seq->last = n->a.n = new_node(OC_DONE); 1309 1310 return n; 1311} 1312 1313static void chain_expr(uint32_t info) 1314{ 1315 node *n; 1316 1317 n = chain_node(info); 1318 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); 1319 if (t_tclass & TC_GRPTERM) 1320 rollback_token(); 1321} 1322 1323static node *chain_loop(node *nn) 1324{ 1325 node *n, *n2, *save_brk, *save_cont; 1326 1327 save_brk = break_ptr; 1328 save_cont = continue_ptr; 1329 1330 n = chain_node(OC_BR | Vx); 1331 continue_ptr = new_node(OC_EXEC); 1332 break_ptr = new_node(OC_EXEC); 1333 chain_group(); 1334 n2 = chain_node(OC_EXEC | Vx); 1335 n2->l.n = nn; 1336 n2->a.n = n; 1337 continue_ptr->a.n = n2; 1338 break_ptr->a.n = n->r.n = seq->last; 1339 1340 continue_ptr = save_cont; 1341 break_ptr = save_brk; 1342 1343 return n; 1344} 1345 1346/* parse group and attach it to chain */ 1347static void chain_group(void) 1348{ 1349 uint32_t c; 1350 node *n, *n2, *n3; 1351 1352 do { 1353 c = next_token(TC_GRPSEQ); 1354 } while (c & TC_NEWLINE); 1355 1356 if (c & TC_GRPSTART) { 1357 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { 1358 if (t_tclass & TC_NEWLINE) 1359 continue; 1360 rollback_token(); 1361 chain_group(); 1362 } 1363 } else if (c & (TC_OPSEQ | TC_OPTERM)) { 1364 rollback_token(); 1365 chain_expr(OC_EXEC | Vx); 1366 } else { /* TC_STATEMNT */ 1367 switch (t_info & OPCLSMASK) { 1368 case ST_IF: 1369 n = chain_node(OC_BR | Vx); 1370 n->l.n = condition(); 1371 chain_group(); 1372 n2 = chain_node(OC_EXEC); 1373 n->r.n = seq->last; 1374 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { 1375 chain_group(); 1376 n2->a.n = seq->last; 1377 } else { 1378 rollback_token(); 1379 } 1380 break; 1381 1382 case ST_WHILE: 1383 n2 = condition(); 1384 n = chain_loop(NULL); 1385 n->l.n = n2; 1386 break; 1387 1388 case ST_DO: 1389 n2 = chain_node(OC_EXEC); 1390 n = chain_loop(NULL); 1391 n2->a.n = n->a.n; 1392 next_token(TC_WHILE); 1393 n->l.n = condition(); 1394 break; 1395 1396 case ST_FOR: 1397 next_token(TC_SEQSTART); 1398 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); 1399 if (t_tclass & TC_SEQTERM) { /* for-in */ 1400 if ((n2->info & OPCLSMASK) != OC_IN) 1401 syntax_error(EMSG_UNEXP_TOKEN); 1402 n = chain_node(OC_WALKINIT | VV); 1403 n->l.n = n2->l.n; 1404 n->r.n = n2->r.n; 1405 n = chain_loop(NULL); 1406 n->info = OC_WALKNEXT | Vx; 1407 n->l.n = n2->l.n; 1408 } else { /* for (;;) */ 1409 n = chain_node(OC_EXEC | Vx); 1410 n->l.n = n2; 1411 n2 = parse_expr(TC_SEMICOL); 1412 n3 = parse_expr(TC_SEQTERM); 1413 n = chain_loop(n3); 1414 n->l.n = n2; 1415 if (!n2) 1416 n->info = OC_EXEC; 1417 } 1418 break; 1419 1420 case OC_PRINT: 1421 case OC_PRINTF: 1422 n = chain_node(t_info); 1423 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); 1424 if (t_tclass & TC_OUTRDR) { 1425 n->info |= t_info; 1426 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); 1427 } 1428 if (t_tclass & TC_GRPTERM) 1429 rollback_token(); 1430 break; 1431 1432 case OC_BREAK: 1433 n = chain_node(OC_EXEC); 1434 n->a.n = break_ptr; 1435 break; 1436 1437 case OC_CONTINUE: 1438 n = chain_node(OC_EXEC); 1439 n->a.n = continue_ptr; 1440 break; 1441 1442 /* delete, next, nextfile, return, exit */ 1443 default: 1444 chain_expr(t_info); 1445 } 1446 } 1447} 1448 1449static void parse_program(char *p) 1450{ 1451 uint32_t tclass; 1452 node *cn; 1453 func *f; 1454 var *v; 1455 1456 g_pos = p; 1457 t_lineno = 1; 1458 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | 1459 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { 1460 1461 if (tclass & TC_OPTERM) 1462 continue; 1463 1464 seq = &mainseq; 1465 if (tclass & TC_BEGIN) { 1466 seq = &beginseq; 1467 chain_group(); 1468 1469 } else if (tclass & TC_END) { 1470 seq = &endseq; 1471 chain_group(); 1472 1473 } else if (tclass & TC_FUNCDECL) { 1474 next_token(TC_FUNCTION); 1475 g_pos++; 1476 f = newfunc(t_string); 1477 f->body.first = NULL; 1478 f->nargs = 0; 1479 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) { 1480 v = findvar(ahash, t_string); 1481 v->x.aidx = f->nargs++; 1482 1483 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) 1484 break; 1485 } 1486 seq = &f->body; 1487 chain_group(); 1488 clear_array(ahash); 1489 1490 } else if (tclass & TC_OPSEQ) { 1491 rollback_token(); 1492 cn = chain_node(OC_TEST); 1493 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); 1494 if (t_tclass & TC_GRPSTART) { 1495 rollback_token(); 1496 chain_group(); 1497 } else { 1498 chain_node(OC_PRINT); 1499 } 1500 cn->r.n = mainseq.last; 1501 1502 } else /* if (tclass & TC_GRPSTART) */ { 1503 rollback_token(); 1504 chain_group(); 1505 } 1506 } 1507} 1508 1509 1510/* -------- program execution part -------- */ 1511 1512static node *mk_splitter(const char *s, tsplitter *spl) 1513{ 1514 regex_t *re, *ire; 1515 node *n; 1516 1517 re = &spl->re[0]; 1518 ire = &spl->re[1]; 1519 n = &spl->n; 1520 if ((n->info & OPCLSMASK) == OC_REGEXP) { 1521 regfree(re); 1522 regfree(ire); // TODO: nuke ire, use re+1? 1523 } 1524 if (s[0] && s[1]) { /* strlen(s) > 1 */ 1525 mk_re_node(s, n, re); 1526 } else { 1527 n->info = (uint32_t) s[0]; 1528 } 1529 1530 return n; 1531} 1532 1533/* use node as a regular expression. Supplied with node ptr and regex_t 1534 * storage space. Return ptr to regex (if result points to preg, it should 1535 * be later regfree'd manually 1536 */ 1537static regex_t *as_regex(node *op, regex_t *preg) 1538{ 1539 int cflags; 1540 var *v; 1541 const char *s; 1542 1543 if ((op->info & OPCLSMASK) == OC_REGEXP) { 1544 return icase ? op->r.ire : op->l.re; 1545 } 1546 v = nvalloc(1); 1547 s = getvar_s(evaluate(op, v)); 1548 1549 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; 1550 /* Testcase where REG_EXTENDED fails (unpaired '{'): 1551 * echo Hi | awk 'gsub("@(samp|code|file)\{","");' 1552 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED 1553 * (maybe gsub is not supposed to use REG_EXTENDED?). 1554 */ 1555 if (regcomp(preg, s, cflags)) { 1556 cflags &= ~REG_EXTENDED; 1557 xregcomp(preg, s, cflags); 1558 } 1559 nvfree(v); 1560 return preg; 1561} 1562 1563/* gradually increasing buffer. 1564 * note that we reallocate even if n == old_size, 1565 * and thus there is at least one extra allocated byte. 1566 */ 1567static char* qrealloc(char *b, int n, int *size) 1568{ 1569 if (!b || n >= *size) { 1570 *size = n + (n>>1) + 80; 1571 b = xrealloc(b, *size); 1572 } 1573 return b; 1574} 1575 1576/* resize field storage space */ 1577static void fsrealloc(int size) 1578{ 1579 int i; 1580 1581 if (size >= maxfields) { 1582 i = maxfields; 1583 maxfields = size + 16; 1584 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0])); 1585 for (; i < maxfields; i++) { 1586 Fields[i].type = VF_SPECIAL; 1587 Fields[i].string = NULL; 1588 } 1589 } 1590 /* if size < nfields, clear extra field variables */ 1591 for (i = size; i < nfields; i++) { 1592 clrvar(Fields + i); 1593 } 1594 nfields = size; 1595} 1596 1597static int awk_split(const char *s, node *spl, char **slist) 1598{ 1599 int l, n; 1600 char c[4]; 1601 char *s1; 1602 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... 1603 1604 /* in worst case, each char would be a separate field */ 1605 *slist = s1 = xzalloc(strlen(s) * 2 + 3); 1606 strcpy(s1, s); 1607 1608 c[0] = c[1] = (char)spl->info; 1609 c[2] = c[3] = '\0'; 1610 if (*getvar_s(intvar[RS]) == '\0') 1611 c[2] = '\n'; 1612 1613 n = 0; 1614 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ 1615 if (!*s) 1616 return n; /* "": zero fields */ 1617 n++; /* at least one field will be there */ 1618 do { 1619 l = strcspn(s, c+2); /* len till next NUL or \n */ 1620 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 1621 && pmatch[0].rm_so <= l 1622 ) { 1623 l = pmatch[0].rm_so; 1624 if (pmatch[0].rm_eo == 0) { 1625 l++; 1626 pmatch[0].rm_eo++; 1627 } 1628 n++; /* we saw yet another delimiter */ 1629 } else { 1630 pmatch[0].rm_eo = l; 1631 if (s[l]) 1632 pmatch[0].rm_eo++; 1633 } 1634 memcpy(s1, s, l); 1635 /* make sure we remove *all* of the separator chars */ 1636 do { 1637 s1[l] = '\0'; 1638 } while (++l < pmatch[0].rm_eo); 1639 nextword(&s1); 1640 s += pmatch[0].rm_eo; 1641 } while (*s); 1642 return n; 1643 } 1644 if (c[0] == '\0') { /* null split */ 1645 while (*s) { 1646 *s1++ = *s++; 1647 *s1++ = '\0'; 1648 n++; 1649 } 1650 return n; 1651 } 1652 if (c[0] != ' ') { /* single-character split */ 1653 if (icase) { 1654 c[0] = toupper(c[0]); 1655 c[1] = tolower(c[1]); 1656 } 1657 if (*s1) 1658 n++; 1659 while ((s1 = strpbrk(s1, c)) != NULL) { 1660 *s1++ = '\0'; 1661 n++; 1662 } 1663 return n; 1664 } 1665 /* space split */ 1666 while (*s) { 1667 s = skip_whitespace(s); 1668 if (!*s) 1669 break; 1670 n++; 1671 while (*s && !isspace(*s)) 1672 *s1++ = *s++; 1673 *s1++ = '\0'; 1674 } 1675 return n; 1676} 1677 1678static void split_f0(void) 1679{ 1680/* static char *fstrings; */ 1681#define fstrings (G.split_f0__fstrings) 1682 1683 int i, n; 1684 char *s; 1685 1686 if (is_f0_split) 1687 return; 1688 1689 is_f0_split = TRUE; 1690 free(fstrings); 1691 fsrealloc(0); 1692 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings); 1693 fsrealloc(n); 1694 s = fstrings; 1695 for (i = 0; i < n; i++) { 1696 Fields[i].string = nextword(&s); 1697 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY); 1698 } 1699 1700 /* set NF manually to avoid side effects */ 1701 clrvar(intvar[NF]); 1702 intvar[NF]->type = VF_NUMBER | VF_SPECIAL; 1703 intvar[NF]->number = nfields; 1704#undef fstrings 1705} 1706 1707/* perform additional actions when some internal variables changed */ 1708static void handle_special(var *v) 1709{ 1710 int n; 1711 char *b; 1712 const char *sep, *s; 1713 int sl, l, len, i, bsize; 1714 1715 if (!(v->type & VF_SPECIAL)) 1716 return; 1717 1718 if (v == intvar[NF]) { 1719 n = (int)getvar_i(v); 1720 fsrealloc(n); 1721 1722 /* recalculate $0 */ 1723 sep = getvar_s(intvar[OFS]); 1724 sl = strlen(sep); 1725 b = NULL; 1726 len = 0; 1727 for (i = 0; i < n; i++) { 1728 s = getvar_s(&Fields[i]); 1729 l = strlen(s); 1730 if (b) { 1731 memcpy(b+len, sep, sl); 1732 len += sl; 1733 } 1734 b = qrealloc(b, len+l+sl, &bsize); 1735 memcpy(b+len, s, l); 1736 len += l; 1737 } 1738 if (b) 1739 b[len] = '\0'; 1740 setvar_p(intvar[F0], b); 1741 is_f0_split = TRUE; 1742 1743 } else if (v == intvar[F0]) { 1744 is_f0_split = FALSE; 1745 1746 } else if (v == intvar[FS]) { 1747 mk_splitter(getvar_s(v), &fsplitter); 1748 1749 } else if (v == intvar[RS]) { 1750 mk_splitter(getvar_s(v), &rsplitter); 1751 1752 } else if (v == intvar[IGNORECASE]) { 1753 icase = istrue(v); 1754 1755 } else { /* $n */ 1756 n = getvar_i(intvar[NF]); 1757 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1); 1758 /* right here v is invalid. Just to note... */ 1759 } 1760} 1761 1762/* step through func/builtin/etc arguments */ 1763static node *nextarg(node **pn) 1764{ 1765 node *n; 1766 1767 n = *pn; 1768 if (n && (n->info & OPCLSMASK) == OC_COMMA) { 1769 *pn = n->r.n; 1770 n = n->l.n; 1771 } else { 1772 *pn = NULL; 1773 } 1774 return n; 1775} 1776 1777static void hashwalk_init(var *v, xhash *array) 1778{ 1779 hash_item *hi; 1780 unsigned i; 1781 walker_list *w; 1782 walker_list *prev_walker; 1783 1784 if (v->type & VF_WALK) { 1785 prev_walker = v->x.walker; 1786 } else { 1787 v->type |= VF_WALK; 1788 prev_walker = NULL; 1789 } 1790 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker); 1791 1792 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */ 1793 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w); 1794 w->cur = w->end = w->wbuf; 1795 w->prev = prev_walker; 1796 for (i = 0; i < array->csize; i++) { 1797 hi = array->items[i]; 1798 while (hi) { 1799 strcpy(w->end, hi->name); 1800 nextword(&w->end); 1801 hi = hi->next; 1802 } 1803 } 1804} 1805 1806static int hashwalk_next(var *v) 1807{ 1808 walker_list *w = v->x.walker; 1809 1810 if (w->cur >= w->end) { 1811 walker_list *prev_walker = w->prev; 1812 1813 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker); 1814 free(w); 1815 v->x.walker = prev_walker; 1816 return FALSE; 1817 } 1818 1819 setvar_s(v, nextword(&w->cur)); 1820 return TRUE; 1821} 1822 1823/* evaluate node, return 1 when result is true, 0 otherwise */ 1824static int ptest(node *pattern) 1825{ 1826 /* ptest__v is "static": to save stack space? */ 1827 return istrue(evaluate(pattern, &G.ptest__v)); 1828} 1829 1830/* read next record from stream rsm into a variable v */ 1831static int awk_getline(rstream *rsm, var *v) 1832{ 1833 char *b; 1834 regmatch_t pmatch[2]; 1835 int size, a, p, pp = 0; 1836 int fd, so, eo, r, rp; 1837 char c, *m, *s; 1838 1839 /* we're using our own buffer since we need access to accumulating 1840 * characters 1841 */ 1842 fd = fileno(rsm->F); 1843 m = rsm->buffer; 1844 a = rsm->adv; 1845 p = rsm->pos; 1846 size = rsm->size; 1847 c = (char) rsplitter.n.info; 1848 rp = 0; 1849 1850 if (!m) 1851 m = qrealloc(m, 256, &size); 1852 1853 do { 1854 b = m + a; 1855 so = eo = p; 1856 r = 1; 1857 if (p > 0) { 1858 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { 1859 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, 1860 b, 1, pmatch, 0) == 0) { 1861 so = pmatch[0].rm_so; 1862 eo = pmatch[0].rm_eo; 1863 if (b[eo] != '\0') 1864 break; 1865 } 1866 } else if (c != '\0') { 1867 s = strchr(b+pp, c); 1868 if (!s) 1869 s = memchr(b+pp, '\0', p - pp); 1870 if (s) { 1871 so = eo = s-b; 1872 eo++; 1873 break; 1874 } 1875 } else { 1876 while (b[rp] == '\n') 1877 rp++; 1878 s = strstr(b+rp, "\n\n"); 1879 if (s) { 1880 so = eo = s-b; 1881 while (b[eo] == '\n') 1882 eo++; 1883 if (b[eo] != '\0') 1884 break; 1885 } 1886 } 1887 } 1888 1889 if (a > 0) { 1890 memmove(m, m+a, p+1); 1891 b = m; 1892 a = 0; 1893 } 1894 1895 m = qrealloc(m, a+p+128, &size); 1896 b = m + a; 1897 pp = p; 1898 p += safe_read(fd, b+p, size-p-1); 1899 if (p < pp) { 1900 p = 0; 1901 r = 0; 1902 setvar_i(intvar[ERRNO], errno); 1903 } 1904 b[p] = '\0'; 1905 1906 } while (p > pp); 1907 1908 if (p == 0) { 1909 r--; 1910 } else { 1911 c = b[so]; b[so] = '\0'; 1912 setvar_s(v, b+rp); 1913 v->type |= VF_USER; 1914 b[so] = c; 1915 c = b[eo]; b[eo] = '\0'; 1916 setvar_s(intvar[RT], b+so); 1917 b[eo] = c; 1918 } 1919 1920 rsm->buffer = m; 1921 rsm->adv = a + eo; 1922 rsm->pos = p - eo; 1923 rsm->size = size; 1924 1925 return r; 1926} 1927 1928static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) 1929{ 1930 int r = 0; 1931 char c; 1932 const char *s = format; 1933 1934 if (int_as_int && n == (int)n) { 1935 r = snprintf(b, size, "%d", (int)n); 1936 } else { 1937 do { c = *s; } while (c && *++s); 1938 if (strchr("diouxX", c)) { 1939 r = snprintf(b, size, format, (int)n); 1940 } else if (strchr("eEfgG", c)) { 1941 r = snprintf(b, size, format, n); 1942 } else { 1943 syntax_error(EMSG_INV_FMT); 1944 } 1945 } 1946 return r; 1947} 1948 1949/* formatted output into an allocated buffer, return ptr to buffer */ 1950static char *awk_printf(node *n) 1951{ 1952 char *b = NULL; 1953 char *fmt, *s, *f; 1954 const char *s1; 1955 int i, j, incr, bsize; 1956 char c, c1; 1957 var *v, *arg; 1958 1959 v = nvalloc(1); 1960 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); 1961 1962 i = 0; 1963 while (*f) { 1964 s = f; 1965 while (*f && (*f != '%' || *++f == '%')) 1966 f++; 1967 while (*f && !isalpha(*f)) { 1968 if (*f == '*') 1969 syntax_error("%*x formats are not supported"); 1970 f++; 1971 } 1972 1973 incr = (f - s) + MAXVARFMT; 1974 b = qrealloc(b, incr + i, &bsize); 1975 c = *f; 1976 if (c != '\0') 1977 f++; 1978 c1 = *f; 1979 *f = '\0'; 1980 arg = evaluate(nextarg(&n), v); 1981 1982 j = i; 1983 if (c == 'c' || !c) { 1984 i += sprintf(b+i, s, is_numeric(arg) ? 1985 (char)getvar_i(arg) : *getvar_s(arg)); 1986 } else if (c == 's') { 1987 s1 = getvar_s(arg); 1988 b = qrealloc(b, incr+i+strlen(s1), &bsize); 1989 i += sprintf(b+i, s, s1); 1990 } else { 1991 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE); 1992 } 1993 *f = c1; 1994 1995 /* if there was an error while sprintf, return value is negative */ 1996 if (i < j) 1997 i = j; 1998 } 1999 2000 free(fmt); 2001 nvfree(v); 2002 b = xrealloc(b, i + 1); 2003 b[i] = '\0'; 2004 return b; 2005} 2006 2007/* Common substitution routine. 2008 * Replace (nm)'th substring of (src) that matches (rn) with (repl), 2009 * store result into (dest), return number of substitutions. 2010 * If nm = 0, replace all matches. 2011 * If src or dst is NULL, use $0. 2012 * If subexp != 0, enable subexpression matching (\1-\9). 2013 */ 2014static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) 2015{ 2016 char *resbuf; 2017 const char *sp; 2018 int match_no, residx, replen, resbufsize; 2019 int regexec_flags; 2020 regmatch_t pmatch[10]; 2021 regex_t sreg, *regex; 2022 2023 resbuf = NULL; 2024 residx = 0; 2025 match_no = 0; 2026 regexec_flags = 0; 2027 regex = as_regex(rn, &sreg); 2028 sp = getvar_s(src ? src : intvar[F0]); 2029 replen = strlen(repl); 2030 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { 2031 int so = pmatch[0].rm_so; 2032 int eo = pmatch[0].rm_eo; 2033 2034 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); 2035 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); 2036 memcpy(resbuf + residx, sp, eo); 2037 residx += eo; 2038 if (++match_no >= nm) { 2039 const char *s; 2040 int nbs; 2041 2042 /* replace */ 2043 residx -= (eo - so); 2044 nbs = 0; 2045 for (s = repl; *s; s++) { 2046 char c = resbuf[residx++] = *s; 2047 if (c == '\\') { 2048 nbs++; 2049 continue; 2050 } 2051 if (c == '&' || (subexp && c >= '0' && c <= '9')) { 2052 int j; 2053 residx -= ((nbs + 3) >> 1); 2054 j = 0; 2055 if (c != '&') { 2056 j = c - '0'; 2057 nbs++; 2058 } 2059 if (nbs % 2) { 2060 resbuf[residx++] = c; 2061 } else { 2062 int n = pmatch[j].rm_eo - pmatch[j].rm_so; 2063 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); 2064 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); 2065 residx += n; 2066 } 2067 } 2068 nbs = 0; 2069 } 2070 } 2071 2072 regexec_flags = REG_NOTBOL; 2073 sp += eo; 2074 if (match_no == nm) 2075 break; 2076 if (eo == so) { 2077 /* Empty match (e.g. "b*" will match anywhere). 2078 * Advance by one char. */ 2079//BUG (bug 1333): 2080//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc" 2081//... and will erroneously match "b" even though it is NOT at the word start. 2082//we need REG_NOTBOW but it does not exist... 2083//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search, 2084//it should be able to do it correctly. 2085 /* Subtle: this is safe only because 2086 * qrealloc allocated at least one extra byte */ 2087 resbuf[residx] = *sp; 2088 if (*sp == '\0') 2089 goto ret; 2090 sp++; 2091 residx++; 2092 } 2093 } 2094 2095 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); 2096 strcpy(resbuf + residx, sp); 2097 ret: 2098 //bb_error_msg("end sp:'%s'%p", sp,sp); 2099 setvar_p(dest ? dest : intvar[F0], resbuf); 2100 if (regex == &sreg) 2101 regfree(regex); 2102 return match_no; 2103} 2104 2105static NOINLINE int do_mktime(const char *ds) 2106{ 2107 struct tm then; 2108 int count; 2109 2110 /*memset(&then, 0, sizeof(then)); - not needed */ 2111 then.tm_isdst = -1; /* default is unknown */ 2112 2113 /* manpage of mktime says these fields are ints, 2114 * so we can sscanf stuff directly into them */ 2115 count = sscanf(ds, "%u %u %u %u %u %u %d", 2116 &then.tm_year, &then.tm_mon, &then.tm_mday, 2117 &then.tm_hour, &then.tm_min, &then.tm_sec, 2118 &then.tm_isdst); 2119 2120 if (count < 6 2121 || (unsigned)then.tm_mon < 1 2122 || (unsigned)then.tm_year < 1900 2123 ) { 2124 return -1; 2125 } 2126 2127 then.tm_mon -= 1; 2128 then.tm_year -= 1900; 2129 2130 return mktime(&then); 2131} 2132 2133static NOINLINE var *exec_builtin(node *op, var *res) 2134{ 2135#define tspl (G.exec_builtin__tspl) 2136 2137 var *tv; 2138 node *an[4]; 2139 var *av[4]; 2140 const char *as[4]; 2141 regmatch_t pmatch[2]; 2142 regex_t sreg, *re; 2143 node *spl; 2144 uint32_t isr, info; 2145 int nargs; 2146 time_t tt; 2147 int i, l, ll, n; 2148 2149 tv = nvalloc(4); 2150 isr = info = op->info; 2151 op = op->l.n; 2152 2153 av[2] = av[3] = NULL; 2154 for (i = 0; i < 4 && op; i++) { 2155 an[i] = nextarg(&op); 2156 if (isr & 0x09000000) 2157 av[i] = evaluate(an[i], &tv[i]); 2158 if (isr & 0x08000000) 2159 as[i] = getvar_s(av[i]); 2160 isr >>= 1; 2161 } 2162 2163 nargs = i; 2164 if ((uint32_t)nargs < (info >> 30)) 2165 syntax_error(EMSG_TOO_FEW_ARGS); 2166 2167 info &= OPNMASK; 2168 switch (info) { 2169 2170 case B_a2: 2171#if ENABLE_FEATURE_AWK_LIBM 2172 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1]))); 2173#else 2174 syntax_error(EMSG_NO_MATH); 2175#endif 2176 break; 2177 2178 case B_sp: { 2179 char *s, *s1; 2180 2181 if (nargs > 2) { 2182 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? 2183 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); 2184 } else { 2185 spl = &fsplitter.n; 2186 } 2187 2188 n = awk_split(as[0], spl, &s); 2189 s1 = s; 2190 clear_array(iamarray(av[1])); 2191 for (i = 1; i <= n; i++) 2192 setari_u(av[1], i, nextword(&s)); 2193 free(s1); 2194 setvar_i(res, n); 2195 break; 2196 } 2197 2198 case B_ss: { 2199 char *s; 2200 2201 l = strlen(as[0]); 2202 i = getvar_i(av[1]) - 1; 2203 if (i > l) 2204 i = l; 2205 if (i < 0) 2206 i = 0; 2207 n = (nargs > 2) ? getvar_i(av[2]) : l-i; 2208 if (n < 0) 2209 n = 0; 2210 s = xstrndup(as[0]+i, n); 2211 setvar_p(res, s); 2212 break; 2213 } 2214 2215 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5: 2216 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */ 2217 case B_an: 2218 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1])); 2219 break; 2220 2221 case B_co: 2222 setvar_i(res, ~getvar_i_int(av[0])); 2223 break; 2224 2225 case B_ls: 2226 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1])); 2227 break; 2228 2229 case B_or: 2230 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1])); 2231 break; 2232 2233 case B_rs: 2234 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1])); 2235 break; 2236 2237 case B_xo: 2238 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1])); 2239 break; 2240 2241 case B_lo: 2242 case B_up: { 2243 char *s, *s1; 2244 s1 = s = xstrdup(as[0]); 2245 while (*s1) { 2246 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1); 2247 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a')) 2248 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20); 2249 s1++; 2250 } 2251 setvar_p(res, s); 2252 break; 2253 } 2254 2255 case B_ix: 2256 n = 0; 2257 ll = strlen(as[1]); 2258 l = strlen(as[0]) - ll; 2259 if (ll > 0 && l >= 0) { 2260 if (!icase) { 2261 char *s = strstr(as[0], as[1]); 2262 if (s) 2263 n = (s - as[0]) + 1; 2264 } else { 2265 /* this piece of code is terribly slow and 2266 * really should be rewritten 2267 */ 2268 for (i = 0; i <= l; i++) { 2269 if (strncasecmp(as[0]+i, as[1], ll) == 0) { 2270 n = i+1; 2271 break; 2272 } 2273 } 2274 } 2275 } 2276 setvar_i(res, n); 2277 break; 2278 2279 case B_ti: 2280 if (nargs > 1) 2281 tt = getvar_i(av[1]); 2282 else 2283 time(&tt); 2284 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"; 2285 i = strftime(g_buf, MAXVARFMT, 2286 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"), 2287 localtime(&tt)); 2288 g_buf[i] = '\0'; 2289 setvar_s(res, g_buf); 2290 break; 2291 2292 case B_mt: 2293 setvar_i(res, do_mktime(as[0])); 2294 break; 2295 2296 case B_ma: 2297 re = as_regex(an[1], &sreg); 2298 n = regexec(re, as[0], 1, pmatch, 0); 2299 if (n == 0) { 2300 pmatch[0].rm_so++; 2301 pmatch[0].rm_eo++; 2302 } else { 2303 pmatch[0].rm_so = 0; 2304 pmatch[0].rm_eo = -1; 2305 } 2306 setvar_i(newvar("RSTART"), pmatch[0].rm_so); 2307 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); 2308 setvar_i(res, pmatch[0].rm_so); 2309 if (re == &sreg) 2310 regfree(re); 2311 break; 2312 2313 case B_ge: 2314 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE); 2315 break; 2316 2317 case B_gs: 2318 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE)); 2319 break; 2320 2321 case B_su: 2322 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE)); 2323 break; 2324 } 2325 2326 nvfree(tv); 2327 return res; 2328#undef tspl 2329} 2330 2331/* 2332 * Evaluate node - the heart of the program. Supplied with subtree 2333 * and place where to store result. returns ptr to result. 2334 */ 2335#define XC(n) ((n) >> 8) 2336 2337static var *evaluate(node *op, var *res) 2338{ 2339/* This procedure is recursive so we should count every byte */ 2340#define fnargs (G.evaluate__fnargs) 2341/* seed is initialized to 1 */ 2342#define seed (G.evaluate__seed) 2343#define sreg (G.evaluate__sreg) 2344 2345 var *v1; 2346 2347 if (!op) 2348 return setvar_s(res, NULL); 2349 2350 v1 = nvalloc(2); 2351 2352 while (op) { 2353 struct { 2354 var *v; 2355 const char *s; 2356 } L = L; /* for compiler */ 2357 struct { 2358 var *v; 2359 const char *s; 2360 } R = R; 2361 double L_d = L_d; 2362 uint32_t opinfo; 2363 int opn; 2364 node *op1; 2365 2366 opinfo = op->info; 2367 opn = (opinfo & OPNMASK); 2368 g_lineno = op->lineno; 2369 op1 = op->l.n; 2370 debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK)); 2371 2372 /* execute inevitable things */ 2373 if (opinfo & OF_RES1) 2374 L.v = evaluate(op1, v1); 2375 if (opinfo & OF_RES2) 2376 R.v = evaluate(op->r.n, v1+1); 2377 if (opinfo & OF_STR1) { 2378 L.s = getvar_s(L.v); 2379 debug_printf_eval("L.s:'%s'\n", L.s); 2380 } 2381 if (opinfo & OF_STR2) { 2382 R.s = getvar_s(R.v); 2383 debug_printf_eval("R.s:'%s'\n", R.s); 2384 } 2385 if (opinfo & OF_NUM1) { 2386 L_d = getvar_i(L.v); 2387 debug_printf_eval("L_d:%f\n", L_d); 2388 } 2389 2390 switch (XC(opinfo & OPCLSMASK)) { 2391 2392 /* -- iterative node type -- */ 2393 2394 /* test pattern */ 2395 case XC( OC_TEST ): 2396 if ((op1->info & OPCLSMASK) == OC_COMMA) { 2397 /* it's range pattern */ 2398 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { 2399 op->info |= OF_CHECKED; 2400 if (ptest(op1->r.n)) 2401 op->info &= ~OF_CHECKED; 2402 op = op->a.n; 2403 } else { 2404 op = op->r.n; 2405 } 2406 } else { 2407 op = ptest(op1) ? op->a.n : op->r.n; 2408 } 2409 break; 2410 2411 /* just evaluate an expression, also used as unconditional jump */ 2412 case XC( OC_EXEC ): 2413 break; 2414 2415 /* branch, used in if-else and various loops */ 2416 case XC( OC_BR ): 2417 op = istrue(L.v) ? op->a.n : op->r.n; 2418 break; 2419 2420 /* initialize for-in loop */ 2421 case XC( OC_WALKINIT ): 2422 hashwalk_init(L.v, iamarray(R.v)); 2423 break; 2424 2425 /* get next array item */ 2426 case XC( OC_WALKNEXT ): 2427 op = hashwalk_next(L.v) ? op->a.n : op->r.n; 2428 break; 2429 2430 case XC( OC_PRINT ): 2431 case XC( OC_PRINTF ): { 2432 FILE *F = stdout; 2433 2434 if (op->r.n) { 2435 rstream *rsm = newfile(R.s); 2436 if (!rsm->F) { 2437 if (opn == '|') { 2438 rsm->F = popen(R.s, "w"); 2439 if (rsm->F == NULL) 2440 bb_perror_msg_and_die("popen"); 2441 rsm->is_pipe = 1; 2442 } else { 2443 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a"); 2444 } 2445 } 2446 F = rsm->F; 2447 } 2448 2449 if ((opinfo & OPCLSMASK) == OC_PRINT) { 2450 if (!op1) { 2451 fputs(getvar_s(intvar[F0]), F); 2452 } else { 2453 while (op1) { 2454 var *v = evaluate(nextarg(&op1), v1); 2455 if (v->type & VF_NUMBER) { 2456 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), 2457 getvar_i(v), TRUE); 2458 fputs(g_buf, F); 2459 } else { 2460 fputs(getvar_s(v), F); 2461 } 2462 2463 if (op1) 2464 fputs(getvar_s(intvar[OFS]), F); 2465 } 2466 } 2467 fputs(getvar_s(intvar[ORS]), F); 2468 2469 } else { /* OC_PRINTF */ 2470 char *s = awk_printf(op1); 2471 fputs(s, F); 2472 free(s); 2473 } 2474 fflush(F); 2475 break; 2476 } 2477 2478 case XC( OC_DELETE ): { 2479 uint32_t info = op1->info & OPCLSMASK; 2480 var *v; 2481 2482 if (info == OC_VAR) { 2483 v = op1->l.v; 2484 } else if (info == OC_FNARG) { 2485 v = &fnargs[op1->l.aidx]; 2486 } else { 2487 syntax_error(EMSG_NOT_ARRAY); 2488 } 2489 2490 if (op1->r.n) { 2491 const char *s; 2492 clrvar(L.v); 2493 s = getvar_s(evaluate(op1->r.n, v1)); 2494 hash_remove(iamarray(v), s); 2495 } else { 2496 clear_array(iamarray(v)); 2497 } 2498 break; 2499 } 2500 2501 case XC( OC_NEWSOURCE ): 2502 g_progname = op->l.new_progname; 2503 break; 2504 2505 case XC( OC_RETURN ): 2506 copyvar(res, L.v); 2507 break; 2508 2509 case XC( OC_NEXTFILE ): 2510 nextfile = TRUE; 2511 case XC( OC_NEXT ): 2512 nextrec = TRUE; 2513 case XC( OC_DONE ): 2514 clrvar(res); 2515 break; 2516 2517 case XC( OC_EXIT ): 2518 awk_exit(L_d); 2519 2520 /* -- recursive node type -- */ 2521 2522 case XC( OC_VAR ): 2523 L.v = op->l.v; 2524 if (L.v == intvar[NF]) 2525 split_f0(); 2526 goto v_cont; 2527 2528 case XC( OC_FNARG ): 2529 L.v = &fnargs[op->l.aidx]; 2530 v_cont: 2531 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v; 2532 break; 2533 2534 case XC( OC_IN ): 2535 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0); 2536 break; 2537 2538 case XC( OC_REGEXP ): 2539 op1 = op; 2540 L.s = getvar_s(intvar[F0]); 2541 goto re_cont; 2542 2543 case XC( OC_MATCH ): 2544 op1 = op->r.n; 2545 re_cont: 2546 { 2547 regex_t *re = as_regex(op1, &sreg); 2548 int i = regexec(re, L.s, 0, NULL, 0); 2549 if (re == &sreg) 2550 regfree(re); 2551 setvar_i(res, (i == 0) ^ (opn == '!')); 2552 } 2553 break; 2554 2555 case XC( OC_MOVE ): 2556 debug_printf_eval("MOVE\n"); 2557 /* if source is a temporary string, jusk relink it to dest */ 2558//Disabled: if R.v is numeric but happens to have cached R.v->string, 2559//then L.v ends up being a string, which is wrong 2560// if (R.v == v1+1 && R.v->string) { 2561// res = setvar_p(L.v, R.v->string); 2562// R.v->string = NULL; 2563// } else { 2564 res = copyvar(L.v, R.v); 2565// } 2566 break; 2567 2568 case XC( OC_TERNARY ): 2569 if ((op->r.n->info & OPCLSMASK) != OC_COLON) 2570 syntax_error(EMSG_POSSIBLE_ERROR); 2571 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); 2572 break; 2573 2574 case XC( OC_FUNC ): { 2575 var *vbeg, *v; 2576 const char *sv_progname; 2577 2578 if (!op->r.f->body.first) 2579 syntax_error(EMSG_UNDEF_FUNC); 2580 2581 vbeg = v = nvalloc(op->r.f->nargs + 1); 2582 while (op1) { 2583 var *arg = evaluate(nextarg(&op1), v1); 2584 copyvar(v, arg); 2585 v->type |= VF_CHILD; 2586 v->x.parent = arg; 2587 if (++v - vbeg >= op->r.f->nargs) 2588 break; 2589 } 2590 2591 v = fnargs; 2592 fnargs = vbeg; 2593 sv_progname = g_progname; 2594 2595 res = evaluate(op->r.f->body.first, res); 2596 2597 g_progname = sv_progname; 2598 nvfree(fnargs); 2599 fnargs = v; 2600 2601 break; 2602 } 2603 2604 case XC( OC_GETLINE ): 2605 case XC( OC_PGETLINE ): { 2606 rstream *rsm; 2607 int i; 2608 2609 if (op1) { 2610 rsm = newfile(L.s); 2611 if (!rsm->F) { 2612 if ((opinfo & OPCLSMASK) == OC_PGETLINE) { 2613 rsm->F = popen(L.s, "r"); 2614 rsm->is_pipe = TRUE; 2615 } else { 2616 rsm->F = fopen_for_read(L.s); /* not xfopen! */ 2617 } 2618 } 2619 } else { 2620 if (!iF) 2621 iF = next_input_file(); 2622 rsm = iF; 2623 } 2624 2625 if (!rsm->F) { 2626 setvar_i(intvar[ERRNO], errno); 2627 setvar_i(res, -1); 2628 break; 2629 } 2630 2631 if (!op->r.n) 2632 R.v = intvar[F0]; 2633 2634 i = awk_getline(rsm, R.v); 2635 if (i > 0 && !op1) { 2636 incvar(intvar[FNR]); 2637 incvar(intvar[NR]); 2638 } 2639 setvar_i(res, i); 2640 break; 2641 } 2642 2643 /* simple builtins */ 2644 case XC( OC_FBLTIN ): { 2645 int i; 2646 rstream *rsm; 2647 double R_d = R_d; /* for compiler */ 2648 2649 switch (opn) { 2650 case F_in: 2651 R_d = (int)L_d; 2652 break; 2653 2654 case F_rn: 2655 R_d = (double)rand() / (double)RAND_MAX; 2656 break; 2657#if ENABLE_FEATURE_AWK_LIBM 2658 case F_co: 2659 R_d = cos(L_d); 2660 break; 2661 2662 case F_ex: 2663 R_d = exp(L_d); 2664 break; 2665 2666 case F_lg: 2667 R_d = log(L_d); 2668 break; 2669 2670 case F_si: 2671 R_d = sin(L_d); 2672 break; 2673 2674 case F_sq: 2675 R_d = sqrt(L_d); 2676 break; 2677#else 2678 case F_co: 2679 case F_ex: 2680 case F_lg: 2681 case F_si: 2682 case F_sq: 2683 syntax_error(EMSG_NO_MATH); 2684 break; 2685#endif 2686 case F_sr: 2687 R_d = (double)seed; 2688 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL); 2689 srand(seed); 2690 break; 2691 2692 case F_ti: 2693 R_d = time(NULL); 2694 break; 2695 2696 case F_le: 2697 if (!op1) 2698 L.s = getvar_s(intvar[F0]); 2699 R_d = strlen(L.s); 2700 break; 2701 2702 case F_sy: 2703 fflush_all(); 2704 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s) 2705 ? (system(L.s) >> 8) : 0; 2706 break; 2707 2708 case F_ff: 2709 if (!op1) { 2710 fflush(stdout); 2711 } else if (L.s && *L.s) { 2712 rsm = newfile(L.s); 2713 fflush(rsm->F); 2714 } else { 2715 fflush_all(); 2716 } 2717 break; 2718 2719 case F_cl: 2720 i = 0; 2721 rsm = (rstream *)hash_search(fdhash, L.s); 2722 if (rsm) { 2723 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); 2724 free(rsm->buffer); 2725 hash_remove(fdhash, L.s); 2726 } 2727 if (i != 0) 2728 setvar_i(intvar[ERRNO], errno); 2729 R_d = (double)i; 2730 break; 2731 } 2732 setvar_i(res, R_d); 2733 break; 2734 } 2735 2736 case XC( OC_BUILTIN ): 2737 res = exec_builtin(op, res); 2738 break; 2739 2740 case XC( OC_SPRINTF ): 2741 setvar_p(res, awk_printf(op1)); 2742 break; 2743 2744 case XC( OC_UNARY ): { 2745 double Ld, R_d; 2746 2747 Ld = R_d = getvar_i(R.v); 2748 switch (opn) { 2749 case 'P': 2750 Ld = ++R_d; 2751 goto r_op_change; 2752 case 'p': 2753 R_d++; 2754 goto r_op_change; 2755 case 'M': 2756 Ld = --R_d; 2757 goto r_op_change; 2758 case 'm': 2759 R_d--; 2760 r_op_change: 2761 setvar_i(R.v, R_d); 2762 break; 2763 case '!': 2764 Ld = !istrue(R.v); 2765 break; 2766 case '-': 2767 Ld = -R_d; 2768 break; 2769 } 2770 setvar_i(res, Ld); 2771 break; 2772 } 2773 2774 case XC( OC_FIELD ): { 2775 int i = (int)getvar_i(R.v); 2776 if (i == 0) { 2777 res = intvar[F0]; 2778 } else { 2779 split_f0(); 2780 if (i > nfields) 2781 fsrealloc(i); 2782 res = &Fields[i - 1]; 2783 } 2784 break; 2785 } 2786 2787 /* concatenation (" ") and index joining (",") */ 2788 case XC( OC_CONCAT ): 2789 case XC( OC_COMMA ): { 2790 const char *sep = ""; 2791 if ((opinfo & OPCLSMASK) == OC_COMMA) 2792 sep = getvar_s(intvar[SUBSEP]); 2793 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); 2794 break; 2795 } 2796 2797 case XC( OC_LAND ): 2798 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0); 2799 break; 2800 2801 case XC( OC_LOR ): 2802 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n)); 2803 break; 2804 2805 case XC( OC_BINARY ): 2806 case XC( OC_REPLACE ): { 2807 double R_d = getvar_i(R.v); 2808 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn); 2809 switch (opn) { 2810 case '+': 2811 L_d += R_d; 2812 break; 2813 case '-': 2814 L_d -= R_d; 2815 break; 2816 case '*': 2817 L_d *= R_d; 2818 break; 2819 case '/': 2820 if (R_d == 0) 2821 syntax_error(EMSG_DIV_BY_ZERO); 2822 L_d /= R_d; 2823 break; 2824 case '&': 2825#if ENABLE_FEATURE_AWK_LIBM 2826 L_d = pow(L_d, R_d); 2827#else 2828 syntax_error(EMSG_NO_MATH); 2829#endif 2830 break; 2831 case '%': 2832 if (R_d == 0) 2833 syntax_error(EMSG_DIV_BY_ZERO); 2834 L_d -= (int)(L_d / R_d) * R_d; 2835 break; 2836 } 2837 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d); 2838 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d); 2839 break; 2840 } 2841 2842 case XC( OC_COMPARE ): { 2843 int i = i; /* for compiler */ 2844 double Ld; 2845 2846 if (is_numeric(L.v) && is_numeric(R.v)) { 2847 Ld = getvar_i(L.v) - getvar_i(R.v); 2848 } else { 2849 const char *l = getvar_s(L.v); 2850 const char *r = getvar_s(R.v); 2851 Ld = icase ? strcasecmp(l, r) : strcmp(l, r); 2852 } 2853 switch (opn & 0xfe) { 2854 case 0: 2855 i = (Ld > 0); 2856 break; 2857 case 2: 2858 i = (Ld >= 0); 2859 break; 2860 case 4: 2861 i = (Ld == 0); 2862 break; 2863 } 2864 setvar_i(res, (i == 0) ^ (opn & 1)); 2865 break; 2866 } 2867 2868 default: 2869 syntax_error(EMSG_POSSIBLE_ERROR); 2870 } 2871 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) 2872 op = op->a.n; 2873 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) 2874 break; 2875 if (nextrec) 2876 break; 2877 } /* while (op) */ 2878 2879 nvfree(v1); 2880 return res; 2881#undef fnargs 2882#undef seed 2883#undef sreg 2884} 2885 2886 2887/* -------- main & co. -------- */ 2888 2889static int awk_exit(int r) 2890{ 2891 var tv; 2892 unsigned i; 2893 hash_item *hi; 2894 2895 zero_out_var(&tv); 2896 2897 if (!exiting) { 2898 exiting = TRUE; 2899 nextrec = FALSE; 2900 evaluate(endseq.first, &tv); 2901 } 2902 2903 /* waiting for children */ 2904 for (i = 0; i < fdhash->csize; i++) { 2905 hi = fdhash->items[i]; 2906 while (hi) { 2907 if (hi->data.rs.F && hi->data.rs.is_pipe) 2908 pclose(hi->data.rs.F); 2909 hi = hi->next; 2910 } 2911 } 2912 2913 exit(r); 2914} 2915 2916/* if expr looks like "var=value", perform assignment and return 1, 2917 * otherwise return 0 */ 2918static int is_assignment(const char *expr) 2919{ 2920 char *exprc, *s, *s0, *s1; 2921 2922 exprc = xstrdup(expr); 2923 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) { 2924 free(exprc); 2925 return FALSE; 2926 } 2927 2928 *s++ = '\0'; 2929 s0 = s1 = s; 2930 while (*s) 2931 *s1++ = nextchar(&s); 2932 2933 *s1 = '\0'; 2934 setvar_u(newvar(exprc), s0); 2935 free(exprc); 2936 return TRUE; 2937} 2938 2939/* switch to next input file */ 2940static rstream *next_input_file(void) 2941{ 2942#define rsm (G.next_input_file__rsm) 2943#define files_happen (G.next_input_file__files_happen) 2944 2945 FILE *F = NULL; 2946 const char *fname, *ind; 2947 2948 if (rsm.F) 2949 fclose(rsm.F); 2950 rsm.F = NULL; 2951 rsm.pos = rsm.adv = 0; 2952 2953 do { 2954 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { 2955 if (files_happen) 2956 return NULL; 2957 fname = "-"; 2958 F = stdin; 2959 } else { 2960 ind = getvar_s(incvar(intvar[ARGIND])); 2961 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); 2962 if (fname && *fname && !is_assignment(fname)) 2963 F = xfopen_stdin(fname); 2964 } 2965 } while (!F); 2966 2967 files_happen = TRUE; 2968 setvar_s(intvar[FILENAME], fname); 2969 rsm.F = F; 2970 return &rsm; 2971#undef rsm 2972#undef files_happen 2973} 2974 2975int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 2976int awk_main(int argc, char **argv) 2977{ 2978 unsigned opt; 2979 char *opt_F, *opt_W; 2980 llist_t *list_v = NULL; 2981 llist_t *list_f = NULL; 2982 int i, j; 2983 var *v; 2984 var tv; 2985 char **envp; 2986 char *vnames = (char *)vNames; /* cheat */ 2987 char *vvalues = (char *)vValues; 2988 2989 INIT_G(); 2990 2991 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing: 2992 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */ 2993 if (ENABLE_LOCALE_SUPPORT) 2994 setlocale(LC_NUMERIC, "C"); 2995 2996 zero_out_var(&tv); 2997 2998 /* allocate global buffer */ 2999 g_buf = xmalloc(MAXVARFMT + 1); 3000 3001 vhash = hash_init(); 3002 ahash = hash_init(); 3003 fdhash = hash_init(); 3004 fnhash = hash_init(); 3005 3006 /* initialize variables */ 3007 for (i = 0; *vnames; i++) { 3008 intvar[i] = v = newvar(nextword(&vnames)); 3009 if (*vvalues != '\377') 3010 setvar_s(v, nextword(&vvalues)); 3011 else 3012 setvar_i(v, 0); 3013 3014 if (*vnames == '*') { 3015 v->type |= VF_SPECIAL; 3016 vnames++; 3017 } 3018 } 3019 3020 handle_special(intvar[FS]); 3021 handle_special(intvar[RS]); 3022 3023 newfile("/dev/stdin")->F = stdin; 3024 newfile("/dev/stdout")->F = stdout; 3025 newfile("/dev/stderr")->F = stderr; 3026 3027 /* Huh, people report that sometimes environ is NULL. Oh well. */ 3028 if (environ) for (envp = environ; *envp; envp++) { 3029 /* environ is writable, thus we don't strdup it needlessly */ 3030 char *s = *envp; 3031 char *s1 = strchr(s, '='); 3032 if (s1) { 3033 *s1 = '\0'; 3034 /* Both findvar and setvar_u take const char* 3035 * as 2nd arg -> environment is not trashed */ 3036 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); 3037 *s1 = '='; 3038 } 3039 } 3040 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */ 3041 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W); 3042 argv += optind; 3043 argc -= optind; 3044 if (opt & 0x1) 3045 setvar_s(intvar[FS], opt_F); // -F 3046 while (list_v) { /* -v */ 3047 if (!is_assignment(llist_pop(&list_v))) 3048 bb_show_usage(); 3049 } 3050 if (list_f) { /* -f */ 3051 do { 3052 char *s = NULL; 3053 FILE *from_file; 3054 3055 g_progname = llist_pop(&list_f); 3056 from_file = xfopen_stdin(g_progname); 3057 /* one byte is reserved for some trick in next_token */ 3058 for (i = j = 1; j > 0; i += j) { 3059 s = xrealloc(s, i + 4096); 3060 j = fread(s + i, 1, 4094, from_file); 3061 } 3062 s[i] = '\0'; 3063 fclose(from_file); 3064 parse_program(s + 1); 3065 free(s); 3066 } while (list_f); 3067 argc++; 3068 } else { // no -f: take program from 1st parameter 3069 if (!argc) 3070 bb_show_usage(); 3071 g_progname = "cmd. line"; 3072 parse_program(*argv++); 3073 } 3074 if (opt & 0x8) // -W 3075 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W); 3076 3077 /* fill in ARGV array */ 3078 setvar_i(intvar[ARGC], argc); 3079 setari_u(intvar[ARGV], 0, "awk"); 3080 i = 0; 3081 while (*argv) 3082 setari_u(intvar[ARGV], ++i, *argv++); 3083 3084 evaluate(beginseq.first, &tv); 3085 if (!mainseq.first && !endseq.first) 3086 awk_exit(EXIT_SUCCESS); 3087 3088 /* input file could already be opened in BEGIN block */ 3089 if (!iF) 3090 iF = next_input_file(); 3091 3092 /* passing through input files */ 3093 while (iF) { 3094 nextfile = FALSE; 3095 setvar_i(intvar[FNR], 0); 3096 3097 while ((i = awk_getline(iF, intvar[F0])) > 0) { 3098 nextrec = FALSE; 3099 incvar(intvar[NR]); 3100 incvar(intvar[FNR]); 3101 evaluate(mainseq.first, &tv); 3102 3103 if (nextfile) 3104 break; 3105 } 3106 3107 if (i < 0) 3108 syntax_error(strerror(errno)); 3109 3110 iF = next_input_file(); 3111 } 3112 3113 awk_exit(EXIT_SUCCESS); 3114 /*return 0;*/ 3115} 3116