1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25#if HAVE_NBTOOL_CONFIG_H 26#include "nbtool_config.h" 27#endif 28 29#define DEBUG 30#include <stdio.h> 31#include <ctype.h> 32#include <wchar.h> 33#include <wctype.h> 34#include <fcntl.h> 35#include <setjmp.h> 36#include <limits.h> 37#include <math.h> 38#include <string.h> 39#include <stdlib.h> 40#include <time.h> 41#include <sys/types.h> 42#include <sys/wait.h> 43#include "awk.h" 44#include "awkgram.h" 45 46static void stdinit(void); 47static void flush_all(void); 48 49#if 1 50#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 51#else 52void tempfree(Cell *p) { 53 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 54 WARNING("bad csub %d in Cell %d %s", 55 p->csub, p->ctype, p->sval); 56 } 57 if (istemp(p)) 58 tfree(p); 59} 60#endif 61 62/* do we really need these? */ 63/* #ifdef _NFILE */ 64/* #ifndef FOPEN_MAX */ 65/* #define FOPEN_MAX _NFILE */ 66/* #endif */ 67/* #endif */ 68/* */ 69/* #ifndef FOPEN_MAX */ 70/* #define FOPEN_MAX 40 */ /* max number of open files */ 71/* #endif */ 72/* */ 73/* #ifndef RAND_MAX */ 74/* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 75/* #endif */ 76 77jmp_buf env; 78extern int pairstack[]; 79extern Awkfloat srand_seed; 80 81Node *winner = NULL; /* root of parse tree */ 82Cell *tmps; /* free temporary cells for execution */ 83 84static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 85Cell *True = &truecell; 86static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 87Cell *False = &falsecell; 88static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 89Cell *jbreak = &breakcell; 90static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 91Cell *jcont = &contcell; 92static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 93Cell *jnext = &nextcell; 94static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 95Cell *jnextfile = &nextfilecell; 96static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 97Cell *jexit = &exitcell; 98static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 99Cell *jret = &retcell; 100static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 101 102Node *curnode = NULL; /* the node being executed, for debugging */ 103 104/* buffer memory management */ 105int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 106 const char *whatrtn) 107/* pbuf: address of pointer to buffer being managed 108 * psiz: address of buffer size variable 109 * minlen: minimum length of buffer needed 110 * quantum: buffer size quantum 111 * pbptr: address of movable pointer into buffer, or 0 if none 112 * whatrtn: name of the calling routine if failure should cause fatal error 113 * 114 * return 0 for realloc failure, !=0 for success 115 */ 116{ 117 if (minlen > *psiz) { 118 char *tbuf; 119 int rminlen = quantum ? minlen % quantum : 0; 120 int boff = pbptr ? *pbptr - *pbuf : 0; 121 /* round up to next multiple of quantum */ 122 if (rminlen) 123 minlen += quantum - rminlen; 124 tbuf = realloc(*pbuf, minlen); 125 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); 126 if (tbuf == NULL) { 127 if (whatrtn) 128 FATAL("out of memory in %s", whatrtn); 129 return 0; 130 } 131 *pbuf = tbuf; 132 *psiz = minlen; 133 if (pbptr) 134 *pbptr = tbuf + boff; 135 } 136 return 1; 137} 138 139void run(Node *a) /* execution of parse tree starts here */ 140{ 141 142 stdinit(); 143 execute(a); 144 closeall(); 145} 146 147Cell *execute(Node *u) /* execute a node of the parse tree */ 148{ 149 Cell *(*proc)(Node **, int); 150 Cell *x; 151 Node *a; 152 153 if (u == NULL) 154 return(True); 155 for (a = u; ; a = a->nnext) { 156 curnode = a; 157 if (isvalue(a)) { 158 x = (Cell *) (a->narg[0]); 159 if (isfld(x) && !donefld) 160 fldbld(); 161 else if (isrec(x) && !donerec) 162 recbld(); 163 return(x); 164 } 165 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 166 FATAL("illegal statement"); 167 proc = proctab[a->nobj-FIRSTTOKEN]; 168 x = (*proc)(a->narg, a->nobj); 169 if (isfld(x) && !donefld) 170 fldbld(); 171 else if (isrec(x) && !donerec) 172 recbld(); 173 if (isexpr(a)) 174 return(x); 175 if (isjump(x)) 176 return(x); 177 if (a->nnext == NULL) 178 return(x); 179 tempfree(x); 180 } 181} 182 183 184Cell *program(Node **a, int n) /* execute an awk program */ 185{ /* a[0] = BEGIN, a[1] = body, a[2] = END */ 186 Cell *x; 187 188 if (setjmp(env) != 0) 189 goto ex; 190 if (a[0]) { /* BEGIN */ 191 x = execute(a[0]); 192 if (isexit(x)) 193 return(True); 194 if (isjump(x)) 195 FATAL("illegal break, continue, next or nextfile from BEGIN"); 196 tempfree(x); 197 } 198 if (a[1] || a[2]) 199 while (getrec(&record, &recsize, true) > 0) { 200 x = execute(a[1]); 201 if (isexit(x)) 202 break; 203 tempfree(x); 204 } 205 ex: 206 if (setjmp(env) != 0) /* handles exit within END */ 207 goto ex1; 208 if (a[2]) { /* END */ 209 x = execute(a[2]); 210 if (isbreak(x) || isnext(x) || iscont(x)) 211 FATAL("illegal break, continue, next or nextfile from END"); 212 tempfree(x); 213 } 214 ex1: 215 return(True); 216} 217 218struct Frame { /* stack frame for awk function calls */ 219 int nargs; /* number of arguments in this call */ 220 Cell *fcncell; /* pointer to Cell for function */ 221 Cell **args; /* pointer to array of arguments after execute */ 222 Cell *retval; /* return value */ 223}; 224 225#define NARGS 50 /* max args in a call */ 226 227struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 228int nframe = 0; /* number of frames allocated */ 229struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 230 231Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 232{ 233 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 234 int i, ncall, ndef; 235 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 236 Node *x; 237 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 238 Cell *y, *z, *fcn; 239 char *s; 240 241 fcn = execute(a[0]); /* the function itself */ 242 s = fcn->nval; 243 if (!isfcn(fcn)) 244 FATAL("calling undefined function %s", s); 245 if (frame == NULL) { 246 frp = frame = calloc(nframe += 100, sizeof(*frame)); 247 if (frame == NULL) 248 FATAL("out of space for stack frames calling %s", s); 249 } 250 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 251 ncall++; 252 ndef = (int) fcn->fval; /* args in defn */ 253 dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); 254 if (ncall > ndef) 255 WARNING("function %s called with %d args, uses only %d", 256 s, ncall, ndef); 257 if (ncall + ndef > NARGS) 258 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 259 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 260 dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); 261 y = execute(x); 262 oargs[i] = y; 263 dprintf( ("args[%d]: %s %f <%s>, t=%o\n", 264 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); 265 if (isfcn(y)) 266 FATAL("can't use function %s as argument in %s", y->nval, s); 267 if (isarr(y)) 268 args[i] = y; /* arrays by ref */ 269 else 270 args[i] = copycell(y); 271 tempfree(y); 272 } 273 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 274 args[i] = gettemp(); 275 *args[i] = newcopycell; 276 } 277 frp++; /* now ok to up frame */ 278 if (frp >= frame + nframe) { 279 int dfp = frp - frame; /* old index */ 280 frame = realloc(frame, (nframe += 100) * sizeof(*frame)); 281 if (frame == NULL) 282 FATAL("out of space for stack frames in %s", s); 283 frp = frame + dfp; 284 } 285 frp->fcncell = fcn; 286 frp->args = args; 287 frp->nargs = ndef; /* number defined with (excess are locals) */ 288 frp->retval = gettemp(); 289 290 dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); 291 y = execute((Node *)(fcn->sval)); /* execute body */ 292 dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); 293 294 for (i = 0; i < ndef; i++) { 295 Cell *t = frp->args[i]; 296 if (isarr(t)) { 297 if (t->csub == CCOPY) { 298 if (i >= ncall) { 299 freesymtab(t); 300 t->csub = CTEMP; 301 tempfree(t); 302 } else { 303 oargs[i]->tval = t->tval; 304 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 305 oargs[i]->sval = t->sval; 306 tempfree(t); 307 } 308 } 309 } else if (t != y) { /* kludge to prevent freeing twice */ 310 t->csub = CTEMP; 311 tempfree(t); 312 } else if (t == y && t->csub == CCOPY) { 313 t->csub = CTEMP; 314 tempfree(t); 315 freed = 1; 316 } 317 } 318 tempfree(fcn); 319 if (isexit(y) || isnext(y)) 320 return y; 321 if (freed == 0) { 322 tempfree(y); /* don't free twice! */ 323 } 324 z = frp->retval; /* return value */ 325 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); 326 frp--; 327 return(z); 328} 329 330Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 331{ 332 Cell *y; 333 334 /* copy is not constant or field */ 335 336 y = gettemp(); 337 y->tval = x->tval & ~(CON|FLD|REC); 338 y->csub = CCOPY; /* prevents freeing until call is over */ 339 y->nval = x->nval; /* BUG? */ 340 if (isstr(x) /* || x->ctype == OCELL */) { 341 y->sval = tostring(x->sval); 342 y->tval &= ~DONTFREE; 343 } else 344 y->tval |= DONTFREE; 345 y->fval = x->fval; 346 return y; 347} 348 349Cell *arg(Node **a, int n) /* nth argument of a function */ 350{ 351 352 n = ptoi(a[0]); /* argument number, counting from 0 */ 353 dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); 354 if (n+1 > frp->nargs) 355 FATAL("argument #%d of function %s was not supplied", 356 n+1, frp->fcncell->nval); 357 return frp->args[n]; 358} 359 360Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 361{ 362 Cell *y; 363 364 switch (n) { 365 case EXIT: 366 if (a[0] != NULL) { 367 y = execute(a[0]); 368 errorflag = (int) getfval(y); 369 tempfree(y); 370 } 371 longjmp(env, 1); 372 case RETURN: 373 if (a[0] != NULL) { 374 y = execute(a[0]); 375 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 376 setsval(frp->retval, getsval(y)); 377 frp->retval->fval = getfval(y); 378 frp->retval->tval |= NUM; 379 } 380 else if (y->tval & STR) 381 setsval(frp->retval, getsval(y)); 382 else if (y->tval & NUM) 383 setfval(frp->retval, getfval(y)); 384 else /* can't happen */ 385 FATAL("bad type variable %d", y->tval); 386 tempfree(y); 387 } 388 return(jret); 389 case NEXT: 390 return(jnext); 391 case NEXTFILE: 392 nextfile(); 393 return(jnextfile); 394 case BREAK: 395 return(jbreak); 396 case CONTINUE: 397 return(jcont); 398 default: /* can't happen */ 399 FATAL("illegal jump type %d", n); 400 } 401 return 0; /* not reached */ 402} 403 404Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 405{ /* a[0] is variable, a[1] is operator, a[2] is filename */ 406 Cell *r, *x; 407 extern Cell **fldtab; 408 FILE *fp; 409 char *buf; 410 int bufsize = recsize; 411 int mode; 412 bool newflag; 413 414 if ((buf = malloc(bufsize)) == NULL) 415 FATAL("out of memory in getline"); 416 417 fflush(stdout); /* in case someone is waiting for a prompt */ 418 r = gettemp(); 419 if (a[1] != NULL) { /* getline < file */ 420 x = execute(a[2]); /* filename */ 421 mode = ptoi(a[1]); 422 if (mode == '|') /* input pipe */ 423 mode = LE; /* arbitrary flag */ 424 fp = openfile(mode, getsval(x), &newflag); 425 tempfree(x); 426 if (fp == NULL) 427 n = -1; 428 else 429 n = readrec(&buf, &bufsize, fp, newflag); 430 if (n <= 0) { 431 ; 432 } else if (a[0] != NULL) { /* getline var <file */ 433 x = execute(a[0]); 434 setsval(x, buf); 435 check_number(x); 436 tempfree(x); 437 } else { /* getline <file */ 438 setsval(fldtab[0], buf); 439 check_number(fldtab[0]); 440 } 441 } else { /* bare getline; use current input */ 442 if (a[0] == NULL) /* getline */ 443 n = getrec(&record, &recsize, true); 444 else { /* getline var */ 445 n = getrec(&buf, &bufsize, false); 446 x = execute(a[0]); 447 setsval(x, buf); 448 check_number(x); 449 tempfree(x); 450 } 451 } 452 setfval(r, (Awkfloat) n); 453 free(buf); 454 return r; 455} 456 457Cell *getnf(Node **a, int n) /* get NF */ 458{ 459 if (!donefld) 460 fldbld(); 461 return (Cell *) a[0]; 462} 463 464static char * 465makearraystring(Node *p, const char *func) 466{ 467 char *buf; 468 int bufsz = recsize; 469 size_t blen, seplen; 470 471 if ((buf = malloc(bufsz)) == NULL) { 472 FATAL("%s: out of memory", func); 473 } 474 475 blen = 0; 476 buf[blen] = '\0'; 477 seplen = strlen(getsval(subseploc)); 478 479 for (; p; p = p->nnext) { 480 Cell *x = execute(p); /* expr */ 481 char *s = getsval(x); 482 size_t nsub = p->nnext ? seplen : 0; 483 size_t slen = strlen(s); 484 size_t tlen = blen + slen + nsub; 485 486 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 487 FATAL("%s: out of memory %s[%s...]", 488 func, x->nval, buf); 489 } 490 memcpy(buf + blen, s, slen); 491 if (nsub) { 492 memcpy(buf + blen + slen, *SUBSEP, nsub); 493 } 494 buf[tlen] = '\0'; 495 blen = tlen; 496 tempfree(x); 497 } 498 return buf; 499} 500 501Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 502{ 503 Cell *x, *z; 504 char *buf; 505 506 x = execute(a[0]); /* Cell* for symbol table */ 507 buf = makearraystring(a[1], __func__); 508 if (!isarr(x)) { 509 dprintf( ("making %s into an array\n", NN(x->nval)) ); 510 if (freeable(x)) 511 xfree(x->sval); 512 x->tval &= ~(STR|NUM|DONTFREE); 513 x->tval |= ARR; 514 x->sval = (char *) makesymtab(NSYMTAB); 515 } 516 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 517 z->ctype = OCELL; 518 z->csub = CVAR; 519 tempfree(x); 520 free(buf); 521 return(z); 522} 523 524Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 525{ 526 Cell *x; 527 528 x = execute(a[0]); /* Cell* for symbol table */ 529 if (x == symtabloc) { 530 FATAL("cannot delete SYMTAB or its elements"); 531 } 532 if (!isarr(x)) 533 return True; 534 if (a[1] == NULL) { /* delete the elements, not the table */ 535 freesymtab(x); 536 x->tval &= ~STR; 537 x->tval |= ARR; 538 x->sval = (char *) makesymtab(NSYMTAB); 539 } else { 540 char *buf = makearraystring(a[1], __func__); 541 freeelem(x, buf); 542 free(buf); 543 } 544 tempfree(x); 545 return True; 546} 547 548Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 549{ 550 Cell *ap, *k; 551 char *buf; 552 553 ap = execute(a[1]); /* array name */ 554 if (!isarr(ap)) { 555 dprintf( ("making %s into an array\n", ap->nval) ); 556 if (freeable(ap)) 557 xfree(ap->sval); 558 ap->tval &= ~(STR|NUM|DONTFREE); 559 ap->tval |= ARR; 560 ap->sval = (char *) makesymtab(NSYMTAB); 561 } 562 buf = makearraystring(a[0], __func__); 563 k = lookup(buf, (Array *) ap->sval); 564 tempfree(ap); 565 free(buf); 566 if (k == NULL) 567 return(False); 568 else 569 return(True); 570} 571 572 573Cell *matchop(Node **a, int n) /* ~ and match() */ 574{ 575 Cell *x, *y; 576 char *s, *t; 577 int i; 578 fa *pfa; 579 int (*mf)(fa *, const char *) = match, mode = 0; 580 581 if (n == MATCHFCN) { 582 mf = pmatch; 583 mode = 1; 584 } 585 x = execute(a[1]); /* a[1] = target text */ 586 s = getsval(x); 587 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 588 i = (*mf)((fa *) a[2], s); 589 else { 590 y = execute(a[2]); /* a[2] = regular expr */ 591 t = getsval(y); 592 pfa = makedfa(t, mode); 593 i = (*mf)(pfa, s); 594 tempfree(y); 595 } 596 tempfree(x); 597 if (n == MATCHFCN) { 598 int start = patbeg - s + 1; 599 if (patlen < 0) 600 start = 0; 601 setfval(rstartloc, (Awkfloat) start); 602 setfval(rlengthloc, (Awkfloat) patlen); 603 x = gettemp(); 604 x->tval = NUM; 605 x->fval = start; 606 return x; 607 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 608 return(True); 609 else 610 return(False); 611} 612 613 614Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 615{ 616 Cell *x, *y; 617 int i; 618 619 x = execute(a[0]); 620 i = istrue(x); 621 tempfree(x); 622 switch (n) { 623 case BOR: 624 if (i) return(True); 625 y = execute(a[1]); 626 i = istrue(y); 627 tempfree(y); 628 if (i) return(True); 629 else return(False); 630 case AND: 631 if ( !i ) return(False); 632 y = execute(a[1]); 633 i = istrue(y); 634 tempfree(y); 635 if (i) return(True); 636 else return(False); 637 case NOT: 638 if (i) return(False); 639 else return(True); 640 default: /* can't happen */ 641 FATAL("unknown boolean operator %d", n); 642 } 643 return 0; /*NOTREACHED*/ 644} 645 646Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 647{ 648 int i; 649 Cell *x, *y; 650 Awkfloat j; 651 652 x = execute(a[0]); 653 y = execute(a[1]); 654 if (x->tval&NUM && y->tval&NUM) { 655 j = x->fval - y->fval; 656 i = j<0? -1: (j>0? 1: 0); 657 } else { 658 i = strcmp(getsval(x), getsval(y)); 659 } 660 tempfree(x); 661 tempfree(y); 662 switch (n) { 663 case LT: if (i<0) return(True); 664 else return(False); 665 case LE: if (i<=0) return(True); 666 else return(False); 667 case NE: if (i!=0) return(True); 668 else return(False); 669 case EQ: if (i == 0) return(True); 670 else return(False); 671 case GE: if (i>=0) return(True); 672 else return(False); 673 case GT: if (i>0) return(True); 674 else return(False); 675 default: /* can't happen */ 676 FATAL("unknown relational operator %d", n); 677 } 678 return 0; /*NOTREACHED*/ 679} 680 681void tfree(Cell *a) /* free a tempcell */ 682{ 683 if (freeable(a)) { 684 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); 685 xfree(a->sval); 686 } 687 if (a == tmps) 688 FATAL("tempcell list is curdled"); 689 a->cnext = tmps; 690 tmps = a; 691} 692 693Cell *gettemp(void) /* get a tempcell */ 694{ int i; 695 Cell *x; 696 697 if (!tmps) { 698 tmps = calloc(100, sizeof(*tmps)); 699 if (!tmps) 700 FATAL("out of space for temporaries"); 701 for (i = 1; i < 100; i++) 702 tmps[i-1].cnext = &tmps[i]; 703 tmps[i-1].cnext = NULL; 704 } 705 x = tmps; 706 tmps = x->cnext; 707 *x = tempcell; 708 return(x); 709} 710 711Cell *indirect(Node **a, int n) /* $( a[0] ) */ 712{ 713 Awkfloat val; 714 Cell *x; 715 int m; 716 char *s; 717 718 x = execute(a[0]); 719 val = getfval(x); /* freebsd: defend against super large field numbers */ 720 if ((Awkfloat)INT_MAX < val) 721 FATAL("trying to access out of range field %s", x->nval); 722 m = (int) val; 723 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ 724 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 725 /* BUG: can x->nval ever be null??? */ 726 tempfree(x); 727 x = fieldadr(m); 728 x->ctype = OCELL; /* BUG? why are these needed? */ 729 x->csub = CFLD; 730 return(x); 731} 732 733Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 734{ 735 int k, m, n; 736 char *s; 737 int temp; 738 Cell *x, *y, *z = NULL; 739 740 x = execute(a[0]); 741 y = execute(a[1]); 742 if (a[2] != NULL) 743 z = execute(a[2]); 744 s = getsval(x); 745 k = strlen(s) + 1; 746 if (k <= 1) { 747 tempfree(x); 748 tempfree(y); 749 if (a[2] != NULL) { 750 tempfree(z); 751 } 752 x = gettemp(); 753 setsval(x, ""); 754 return(x); 755 } 756 m = (int) getfval(y); 757 if (m <= 0) 758 m = 1; 759 else if (m > k) 760 m = k; 761 tempfree(y); 762 if (a[2] != NULL) { 763 n = (int) getfval(z); 764 tempfree(z); 765 } else 766 n = k - 1; 767 if (n < 0) 768 n = 0; 769 else if (n > k - m) 770 n = k - m; 771 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); 772 y = gettemp(); 773 temp = s[n+m-1]; /* with thanks to John Linderman */ 774 s[n+m-1] = '\0'; 775 setsval(y, s + m - 1); 776 s[n+m-1] = temp; 777 tempfree(x); 778 return(y); 779} 780 781Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 782{ 783 Cell *x, *y, *z; 784 char *s1, *s2, *p1, *p2, *q; 785 Awkfloat v = 0.0; 786 787 x = execute(a[0]); 788 s1 = getsval(x); 789 y = execute(a[1]); 790 s2 = getsval(y); 791 792 z = gettemp(); 793 for (p1 = s1; *p1 != '\0'; p1++) { 794 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 795 continue; 796 if (*p2 == '\0') { 797 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 798 break; 799 } 800 } 801 tempfree(x); 802 tempfree(y); 803 setfval(z, v); 804 return(z); 805} 806 807#define MAXNUMSIZE 50 808 809int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 810{ 811 char *fmt; 812 char *p, *t; 813 const char *os; 814 Cell *x; 815 int flag = 0, n; 816 int fmtwd; /* format width */ 817 int fmtsz = recsize; 818 char *buf = *pbuf; 819 int bufsize = *pbufsize; 820#define FMTSZ(a) (fmtsz - ((a) - fmt)) 821#define BUFSZ(a) (bufsize - ((a) - buf)) 822 823 static bool first = true; 824 static bool have_a_format = false; 825 826 if (first) { 827 char xbuf[100]; 828 829 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 830 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 831 first = false; 832 } 833 834 os = s; 835 p = buf; 836 if ((fmt = malloc(fmtsz)) == NULL) 837 FATAL("out of memory in format()"); 838 while (*s) { 839 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 840 if (*s != '%') { 841 *p++ = *s++; 842 continue; 843 } 844 if (*(s+1) == '%') { 845 *p++ = '%'; 846 s += 2; 847 continue; 848 } 849 /* have to be real careful in case this is a huge number, eg, %100000d */ 850 fmtwd = atoi(s+1); 851 if (fmtwd < 0) 852 fmtwd = -fmtwd; 853 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 854 for (t = fmt; (*t++ = *s) != '\0'; s++) { 855 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 856 FATAL("format item %.30s... ran format() out of memory", os); 857 /* Ignore size specifiers */ 858 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 859 t--; 860 continue; 861 } 862 if (isalpha((uschar)*s)) 863 break; 864 if (*s == '$') { 865 FATAL("'$' not permitted in awk formats"); 866 } 867 if (*s == '*') { 868 if (a == NULL) { 869 FATAL("not enough args in printf(%s)", os); 870 } 871 x = execute(a); 872 a = a->nnext; 873 snprintf(t - 1, FMTSZ(t - 1), 874 "%d", fmtwd=(int) getfval(x)); 875 if (fmtwd < 0) 876 fmtwd = -fmtwd; 877 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 878 t = fmt + strlen(fmt); 879 tempfree(x); 880 } 881 } 882 *t = '\0'; 883 if (fmtwd < 0) 884 fmtwd = -fmtwd; 885 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 886 switch (*s) { 887 case 'a': case 'A': 888 if (have_a_format) 889 flag = *s; 890 else 891 flag = 'f'; 892 break; 893 case 'f': case 'e': case 'g': case 'E': case 'G': 894 flag = 'f'; 895 break; 896 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 897 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 898 *(t-1) = 'j'; 899 *t = *s; 900 *++t = '\0'; 901 break; 902 case 's': 903 flag = 's'; 904 break; 905 case 'c': 906 flag = 'c'; 907 break; 908 default: 909 WARNING("weird printf conversion %s", fmt); 910 flag = '?'; 911 break; 912 } 913 if (a == NULL) 914 FATAL("not enough args in printf(%s)", os); 915 x = execute(a); 916 a = a->nnext; 917 n = MAXNUMSIZE; 918 if (fmtwd > n) 919 n = fmtwd; 920 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 921 switch (flag) { 922 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 923 t = getsval(x); 924 n = strlen(t); 925 if (fmtwd > n) 926 n = fmtwd; 927 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 928 p += strlen(p); 929 snprintf(p, BUFSZ(p), "%s", t); 930 break; 931 case 'a': 932 case 'A': 933 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 934 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 935 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 936 case 's': 937 t = getsval(x); 938 n = strlen(t); 939 if (fmtwd > n) 940 n = fmtwd; 941 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 942 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 943 snprintf(p, BUFSZ(p), fmt, t); 944 break; 945 case 'c': 946 if (isnum(x)) { 947 if ((int)getfval(x)) 948 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 949 else { 950 *p++ = '\0'; /* explicit null byte */ 951 *p = '\0'; /* next output will start here */ 952 } 953 } else 954 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 955 break; 956 default: 957 FATAL("can't happen: bad conversion %c in format()", flag); 958 } 959 tempfree(x); 960 p += strlen(p); 961 s++; 962 } 963 *p = '\0'; 964 free(fmt); 965 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 966 execute(a); 967 *pbuf = buf; 968 *pbufsize = bufsize; 969 return p - buf; 970} 971 972Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 973{ 974 Cell *x; 975 Node *y; 976 char *buf; 977 int bufsz=3*recsize; 978 979 if ((buf = malloc(bufsz)) == NULL) 980 FATAL("out of memory in awksprintf"); 981 y = a[0]->nnext; 982 x = execute(a[0]); 983 if (format(&buf, &bufsz, getsval(x), y) == -1) 984 FATAL("sprintf string %.30s... too long. can't happen.", buf); 985 tempfree(x); 986 x = gettemp(); 987 x->sval = buf; 988 x->tval = STR; 989 return(x); 990} 991 992Cell *awkprintf(Node **a, int n) /* printf */ 993{ /* a[0] is list of args, starting with format string */ 994 /* a[1] is redirection operator, a[2] is redirection file */ 995 FILE *fp; 996 Cell *x; 997 Node *y; 998 char *buf; 999 int len; 1000 int bufsz=3*recsize; 1001 1002 if ((buf = malloc(bufsz)) == NULL) 1003 FATAL("out of memory in awkprintf"); 1004 y = a[0]->nnext; 1005 x = execute(a[0]); 1006 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1007 FATAL("printf string %.30s... too long. can't happen.", buf); 1008 tempfree(x); 1009 if (a[1] == NULL) { 1010 /* fputs(buf, stdout); */ 1011 fwrite(buf, len, 1, stdout); 1012 if (ferror(stdout)) 1013 FATAL("write error on stdout"); 1014 } else { 1015 fp = redirect(ptoi(a[1]), a[2]); 1016 /* fputs(buf, fp); */ 1017 fwrite(buf, len, 1, fp); 1018 fflush(fp); 1019 if (ferror(fp)) 1020 FATAL("write error on %s", filename(fp)); 1021 } 1022 free(buf); 1023 return(True); 1024} 1025 1026Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1027{ 1028 Awkfloat i, j = 0; 1029 double v; 1030 Cell *x, *y, *z; 1031 1032 x = execute(a[0]); 1033 i = getfval(x); 1034 tempfree(x); 1035 if (n != UMINUS && n != UPLUS) { 1036 y = execute(a[1]); 1037 j = getfval(y); 1038 tempfree(y); 1039 } 1040 z = gettemp(); 1041 switch (n) { 1042 case ADD: 1043 i += j; 1044 break; 1045 case MINUS: 1046 i -= j; 1047 break; 1048 case MULT: 1049 i *= j; 1050 break; 1051 case DIVIDE: 1052 if (j == 0) 1053 FATAL("division by zero"); 1054 i /= j; 1055 break; 1056 case MOD: 1057 if (j == 0) 1058 FATAL("division by zero in mod"); 1059 modf(i/j, &v); 1060 i = i - j * v; 1061 break; 1062 case UMINUS: 1063 i = -i; 1064 break; 1065 case UPLUS: /* handled by getfval(), above */ 1066 break; 1067 case POWER: 1068 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1069 i = ipow(i, (int) j); 1070 else 1071 i = errcheck(pow(i, j), "pow"); 1072 break; 1073 default: /* can't happen */ 1074 FATAL("illegal arithmetic operator %d", n); 1075 } 1076 setfval(z, i); 1077 return(z); 1078} 1079 1080double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1081{ 1082 double v; 1083 1084 if (n <= 0) 1085 return 1; 1086 v = ipow(x, n/2); 1087 if (n % 2 == 0) 1088 return v * v; 1089 else 1090 return x * v * v; 1091} 1092 1093Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1094{ 1095 Cell *x, *z; 1096 int k; 1097 Awkfloat xf; 1098 1099 x = execute(a[0]); 1100 xf = getfval(x); 1101 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1102 if (n == PREINCR || n == PREDECR) { 1103 setfval(x, xf + k); 1104 return(x); 1105 } 1106 z = gettemp(); 1107 setfval(z, xf); 1108 setfval(x, xf + k); 1109 tempfree(x); 1110 return(z); 1111} 1112 1113Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1114{ /* this is subtle; don't muck with it. */ 1115 Cell *x, *y; 1116 Awkfloat xf, yf; 1117 double v; 1118 1119 y = execute(a[1]); 1120 x = execute(a[0]); 1121 if (n == ASSIGN) { /* ordinary assignment */ 1122 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1123 ; /* self-assignment: leave alone unless it's a field or NF */ 1124 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1125 setsval(x, getsval(y)); 1126 x->fval = getfval(y); 1127 x->tval |= NUM; 1128 } 1129 else if (isstr(y)) 1130 setsval(x, getsval(y)); 1131 else if (isnum(y)) 1132 setfval(x, getfval(y)); 1133 else 1134 funnyvar(y, "read value of"); 1135 tempfree(y); 1136 return(x); 1137 } 1138 xf = getfval(x); 1139 yf = getfval(y); 1140 switch (n) { 1141 case ADDEQ: 1142 xf += yf; 1143 break; 1144 case SUBEQ: 1145 xf -= yf; 1146 break; 1147 case MULTEQ: 1148 xf *= yf; 1149 break; 1150 case DIVEQ: 1151 if (yf == 0) 1152 FATAL("division by zero in /="); 1153 xf /= yf; 1154 break; 1155 case MODEQ: 1156 if (yf == 0) 1157 FATAL("division by zero in %%="); 1158 modf(xf/yf, &v); 1159 xf = xf - yf * v; 1160 break; 1161 case POWEQ: 1162 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1163 xf = ipow(xf, (int) yf); 1164 else 1165 xf = errcheck(pow(xf, yf), "pow"); 1166 break; 1167 default: 1168 FATAL("illegal assignment operator %d", n); 1169 break; 1170 } 1171 tempfree(y); 1172 setfval(x, xf); 1173 return(x); 1174} 1175 1176Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1177{ 1178 Cell *x, *y, *z; 1179 int n1, n2; 1180 char *s = NULL; 1181 int ssz = 0; 1182 1183 x = execute(a[0]); 1184 n1 = strlen(getsval(x)); 1185 1186 y = execute(a[1]); 1187 n2 = strlen(getsval(y)); 1188 1189 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat"); 1190 memcpy(s, x->sval, n1); 1191 memcpy(s + n1, y->sval, n2); 1192 s[n1 + n2] = '\0'; 1193 1194 tempfree(x); 1195 tempfree(y); 1196 1197 z = gettemp(); 1198 z->sval = s; 1199 z->tval = STR; 1200 1201 return(z); 1202} 1203 1204Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1205{ 1206 Cell *x; 1207 1208 if (a[0] == NULL) 1209 x = execute(a[1]); 1210 else { 1211 x = execute(a[0]); 1212 if (istrue(x)) { 1213 tempfree(x); 1214 x = execute(a[1]); 1215 } 1216 } 1217 return x; 1218} 1219 1220Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1221{ 1222 Cell *x; 1223 int pair; 1224 1225 pair = ptoi(a[3]); 1226 if (pairstack[pair] == 0) { 1227 x = execute(a[0]); 1228 if (istrue(x)) 1229 pairstack[pair] = 1; 1230 tempfree(x); 1231 } 1232 if (pairstack[pair] == 1) { 1233 x = execute(a[1]); 1234 if (istrue(x)) 1235 pairstack[pair] = 0; 1236 tempfree(x); 1237 x = execute(a[2]); 1238 return(x); 1239 } 1240 return(False); 1241} 1242 1243Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1244{ 1245 Cell *x = NULL, *y, *ap; 1246 const char *s, *origs, *t; 1247 const char *fs = NULL; 1248 char *origfs = NULL; 1249 int sep; 1250 char temp, num[50]; 1251 int n, tempstat, arg3type; 1252 1253 y = execute(a[0]); /* source string */ 1254 origs = s = strdup(getsval(y)); 1255 arg3type = ptoi(a[3]); 1256 if (a[2] == NULL) /* fs string */ 1257 fs = getsval(fsloc); 1258 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1259 x = execute(a[2]); 1260 fs = origfs = strdup(getsval(x)); 1261 tempfree(x); 1262 } else if (arg3type == REGEXPR) 1263 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1264 else 1265 FATAL("illegal type of split"); 1266 sep = *fs; 1267 ap = execute(a[1]); /* array name */ 1268 freesymtab(ap); 1269 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); 1270 ap->tval &= ~STR; 1271 ap->tval |= ARR; 1272 ap->sval = (char *) makesymtab(NSYMTAB); 1273 1274 n = 0; 1275 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1276 /* split(s, a, //); have to arrange that it looks like empty sep */ 1277 arg3type = 0; 1278 fs = ""; 1279 sep = 0; 1280 } 1281 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1282 fa *pfa; 1283 if (arg3type == REGEXPR) { /* it's ready already */ 1284 pfa = (fa *) a[2]; 1285 } else { 1286 pfa = makedfa(fs, 1); 1287 } 1288 if (nematch(pfa,s)) { 1289 tempstat = pfa->initstat; 1290 pfa->initstat = 2; 1291 do { 1292 n++; 1293 snprintf(num, sizeof(num), "%d", n); 1294 temp = *patbeg; 1295 setptr(patbeg, '\0'); 1296 if (is_number(s)) 1297 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1298 else 1299 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1300 setptr(patbeg, temp); 1301 s = patbeg + patlen; 1302 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1303 n++; 1304 snprintf(num, sizeof(num), "%d", n); 1305 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1306 pfa->initstat = tempstat; 1307 goto spdone; 1308 } 1309 } while (nematch(pfa,s)); 1310 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1311 /* cf gsub and refldbld */ 1312 } 1313 n++; 1314 snprintf(num, sizeof(num), "%d", n); 1315 if (is_number(s)) 1316 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1317 else 1318 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1319 spdone: 1320 pfa = NULL; 1321 } else if (sep == ' ') { 1322 for (n = 0; ; ) { 1323#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1324 while (ISWS(*s)) 1325 s++; 1326 if (*s == '\0') 1327 break; 1328 n++; 1329 t = s; 1330 do 1331 s++; 1332 while (*s != '\0' && !ISWS(*s)); 1333 temp = *s; 1334 setptr(s, '\0'); 1335 snprintf(num, sizeof(num), "%d", n); 1336 if (is_number(t)) 1337 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1338 else 1339 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1340 setptr(s, temp); 1341 if (*s != '\0') 1342 s++; 1343 } 1344 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1345 for (n = 0; *s != '\0'; s++) { 1346 char buf[2]; 1347 n++; 1348 snprintf(num, sizeof(num), "%d", n); 1349 buf[0] = *s; 1350 buf[1] = '\0'; 1351 if (isdigit((uschar)buf[0])) 1352 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1353 else 1354 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1355 } 1356 } else if (*s != '\0') { 1357 for (;;) { 1358 n++; 1359 t = s; 1360 while (*s != sep && *s != '\n' && *s != '\0') 1361 s++; 1362 temp = *s; 1363 setptr(s, '\0'); 1364 snprintf(num, sizeof(num), "%d", n); 1365 if (is_number(t)) 1366 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1367 else 1368 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1369 setptr(s, temp); 1370 if (*s++ == '\0') 1371 break; 1372 } 1373 } 1374 tempfree(ap); 1375 tempfree(y); 1376 xfree(origs); 1377 xfree(origfs); 1378 x = gettemp(); 1379 x->tval = NUM; 1380 x->fval = n; 1381 return(x); 1382} 1383 1384Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1385{ 1386 Cell *x; 1387 1388 x = execute(a[0]); 1389 if (istrue(x)) { 1390 tempfree(x); 1391 x = execute(a[1]); 1392 } else { 1393 tempfree(x); 1394 x = execute(a[2]); 1395 } 1396 return(x); 1397} 1398 1399Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1400{ 1401 Cell *x; 1402 1403 x = execute(a[0]); 1404 if (istrue(x)) { 1405 tempfree(x); 1406 x = execute(a[1]); 1407 } else if (a[2] != NULL) { 1408 tempfree(x); 1409 x = execute(a[2]); 1410 } 1411 return(x); 1412} 1413 1414Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1415{ 1416 Cell *x; 1417 1418 for (;;) { 1419 x = execute(a[0]); 1420 if (!istrue(x)) 1421 return(x); 1422 tempfree(x); 1423 x = execute(a[1]); 1424 if (isbreak(x)) { 1425 x = True; 1426 return(x); 1427 } 1428 if (isnext(x) || isexit(x) || isret(x)) 1429 return(x); 1430 tempfree(x); 1431 } 1432} 1433 1434Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1435{ 1436 Cell *x; 1437 1438 for (;;) { 1439 x = execute(a[0]); 1440 if (isbreak(x)) 1441 return True; 1442 if (isnext(x) || isexit(x) || isret(x)) 1443 return(x); 1444 tempfree(x); 1445 x = execute(a[1]); 1446 if (!istrue(x)) 1447 return(x); 1448 tempfree(x); 1449 } 1450} 1451 1452Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1453{ 1454 Cell *x; 1455 1456 x = execute(a[0]); 1457 tempfree(x); 1458 for (;;) { 1459 if (a[1]!=NULL) { 1460 x = execute(a[1]); 1461 if (!istrue(x)) return(x); 1462 else tempfree(x); 1463 } 1464 x = execute(a[3]); 1465 if (isbreak(x)) /* turn off break */ 1466 return True; 1467 if (isnext(x) || isexit(x) || isret(x)) 1468 return(x); 1469 tempfree(x); 1470 x = execute(a[2]); 1471 tempfree(x); 1472 } 1473} 1474 1475Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1476{ 1477 Cell *x, *vp, *arrayp, *cp, *ncp; 1478 Array *tp; 1479 int i; 1480 1481 vp = execute(a[0]); 1482 arrayp = execute(a[1]); 1483 if (!isarr(arrayp)) { 1484 return True; 1485 } 1486 tp = (Array *) arrayp->sval; 1487 tempfree(arrayp); 1488 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1489 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1490 setsval(vp, cp->nval); 1491 ncp = cp->cnext; 1492 x = execute(a[2]); 1493 if (isbreak(x)) { 1494 tempfree(vp); 1495 return True; 1496 } 1497 if (isnext(x) || isexit(x) || isret(x)) { 1498 tempfree(vp); 1499 return(x); 1500 } 1501 tempfree(x); 1502 } 1503 } 1504 return True; 1505} 1506 1507static char *nawk_convert(const char *s, int (*fun_c)(int), 1508 wint_t (*fun_wc)(wint_t)) 1509{ 1510 char *buf = NULL; 1511 char *pbuf = NULL; 1512 const char *ps = NULL; 1513 size_t n = 0; 1514 mbstate_t mbs, mbs2; 1515 wchar_t wc; 1516 size_t sz = MB_CUR_MAX; 1517 1518 if (sz == 1) { 1519 buf = tostring(s); 1520 1521 for (pbuf = buf; *pbuf; pbuf++) 1522 *pbuf = fun_c((uschar)*pbuf); 1523 1524 return buf; 1525 } else { 1526 /* upper/lower character may be shorter/longer */ 1527 buf = tostringN(s, strlen(s) * sz + 1); 1528 1529 memset(&mbs, 0, sizeof(mbs)); 1530 memset(&mbs2, 0, sizeof(mbs2)); 1531 1532 ps = s; 1533 pbuf = buf; 1534 while (n = mbrtowc(&wc, ps, sz, &mbs), 1535 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1536 { 1537 ps += n; 1538 1539 n = wcrtomb(pbuf, fun_wc(wc), &mbs2); 1540 if (n == (size_t)-1) 1541 FATAL("illegal wide character %s", s); 1542 1543 pbuf += n; 1544 } 1545 1546 *pbuf = '\0'; 1547 1548 if (n) 1549 FATAL("illegal byte sequence %s", s); 1550 1551 return buf; 1552 } 1553} 1554 1555static char *nawk_toupper(const char *s) 1556{ 1557 return nawk_convert(s, toupper, towupper); 1558} 1559 1560static char *nawk_tolower(const char *s) 1561{ 1562 return nawk_convert(s, tolower, towlower); 1563} 1564 1565Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1566{ 1567 Cell *x, *y; 1568 Awkfloat u; 1569 int t, sz; 1570 Awkfloat tmp; 1571 char *buf, *fmt; 1572 Node *nextarg; 1573 FILE *fp; 1574 int status = 0; 1575 time_t tv; 1576 struct tm *tm; 1577 1578 t = ptoi(a[0]); 1579 x = execute(a[1]); 1580 nextarg = a[1]->nnext; 1581 switch (t) { 1582 case FLENGTH: 1583 if (isarr(x)) 1584 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1585 else 1586 u = strlen(getsval(x)); 1587 break; 1588 case FLOG: 1589 u = errcheck(log(getfval(x)), "log"); break; 1590 case FINT: 1591 modf(getfval(x), &u); break; 1592 case FEXP: 1593 u = errcheck(exp(getfval(x)), "exp"); break; 1594 case FSQRT: 1595 u = errcheck(sqrt(getfval(x)), "sqrt"); break; 1596 case FSIN: 1597 u = sin(getfval(x)); break; 1598 case FCOS: 1599 u = cos(getfval(x)); break; 1600 case FATAN: 1601 if (nextarg == NULL) { 1602 WARNING("atan2 requires two arguments; returning 1.0"); 1603 u = 1.0; 1604 } else { 1605 y = execute(a[1]->nnext); 1606 u = atan2(getfval(x), getfval(y)); 1607 tempfree(y); 1608 nextarg = nextarg->nnext; 1609 } 1610 break; 1611 case FCOMPL: 1612 u = ~((int)getfval(x)); 1613 break; 1614 case FAND: 1615 if (nextarg == 0) { 1616 WARNING("and requires two arguments; returning 0"); 1617 u = 0; 1618 break; 1619 } 1620 y = execute(a[1]->nnext); 1621 u = ((int)getfval(x)) & ((int)getfval(y)); 1622 tempfree(y); 1623 nextarg = nextarg->nnext; 1624 break; 1625 case FFOR: 1626 if (nextarg == 0) { 1627 WARNING("or requires two arguments; returning 0"); 1628 u = 0; 1629 break; 1630 } 1631 y = execute(a[1]->nnext); 1632 u = ((int)getfval(x)) | ((int)getfval(y)); 1633 tempfree(y); 1634 nextarg = nextarg->nnext; 1635 break; 1636 case FXOR: 1637 if (nextarg == 0) { 1638 WARNING("xor requires two arguments; returning 0"); 1639 u = 0; 1640 break; 1641 } 1642 y = execute(a[1]->nnext); 1643 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1644 tempfree(y); 1645 nextarg = nextarg->nnext; 1646 break; 1647 case FLSHIFT: 1648 if (nextarg == 0) { 1649 WARNING("lshift requires two arguments; returning 0"); 1650 u = 0; 1651 break; 1652 } 1653 y = execute(a[1]->nnext); 1654 u = ((int)getfval(x)) << ((int)getfval(y)); 1655 tempfree(y); 1656 nextarg = nextarg->nnext; 1657 break; 1658 case FRSHIFT: 1659 if (nextarg == 0) { 1660 WARNING("rshift requires two arguments; returning 0"); 1661 u = 0; 1662 break; 1663 } 1664 y = execute(a[1]->nnext); 1665 u = ((int)getfval(x)) >> ((int)getfval(y)); 1666 tempfree(y); 1667 nextarg = nextarg->nnext; 1668 break; 1669 case FSYSTEM: 1670 fflush(stdout); /* in case something is buffered already */ 1671 status = system(getsval(x)); 1672 u = status; 1673 if (status != -1) { 1674 if (WIFEXITED(status)) { 1675 u = WEXITSTATUS(status); 1676 } else if (WIFSIGNALED(status)) { 1677 u = WTERMSIG(status) + 256; 1678#ifdef WCOREDUMP 1679 if (WCOREDUMP(status)) 1680 u += 256; 1681#endif 1682 } else /* something else?!? */ 1683 u = 0; 1684 } 1685 break; 1686 case FRAND: 1687 /* random() returns numbers in [0..2^31-1] 1688 * in order to get a number in [0, 1), divide it by 2^31 1689 */ 1690 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1691 break; 1692 case FSRAND: 1693 if (isrec(x)) /* no argument provided */ 1694 u = time((time_t *)0); 1695 else 1696 u = getfval(x); 1697 tmp = u; 1698 srandom((unsigned long) u); 1699 u = srand_seed; 1700 srand_seed = tmp; 1701 break; 1702 case FTOUPPER: 1703 case FTOLOWER: 1704 if (t == FTOUPPER) 1705 buf = nawk_toupper(getsval(x)); 1706 else 1707 buf = nawk_tolower(getsval(x)); 1708 tempfree(x); 1709 x = gettemp(); 1710 setsval(x, buf); 1711 free(buf); 1712 return x; 1713 case FFLUSH: 1714 if (isrec(x) || strlen(getsval(x)) == 0) { 1715 flush_all(); /* fflush() or fflush("") -> all */ 1716 u = 0; 1717 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1718 u = EOF; 1719 else 1720 u = fflush(fp); 1721 break; 1722 case FSYSTIME: 1723 u = time((time_t *) 0); 1724 break; 1725 case FSTRFTIME: 1726 /* strftime([format [,timestamp]]) */ 1727 if (nextarg) { 1728 y = execute(nextarg); 1729 nextarg = nextarg->nnext; 1730 tv = (time_t) getfval(y); 1731 tempfree(y); 1732 } else 1733 tv = time((time_t *) 0); 1734 tm = localtime(&tv); 1735 if (tm == NULL) 1736 FATAL("bad time %ld", (long)tv); 1737 1738 if (isrec(x)) { 1739 /* format argument not provided, use default */ 1740 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1741 } else 1742 fmt = tostring(getsval(x)); 1743 1744 sz = 32; 1745 buf = NULL; 1746 do { 1747 if ((buf = realloc(buf, (sz *= 2))) == NULL) 1748 FATAL("out of memory in strftime"); 1749 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1750 1751 y = gettemp(); 1752 setsval(y, buf); 1753 free(fmt); 1754 free(buf); 1755 1756 return y; 1757 default: /* can't happen */ 1758 FATAL("illegal function type %d", t); 1759 break; 1760 } 1761 tempfree(x); 1762 x = gettemp(); 1763 setfval(x, u); 1764 if (nextarg != NULL) { 1765 WARNING("warning: function has too many arguments"); 1766 for ( ; nextarg; nextarg = nextarg->nnext) 1767 execute(nextarg); 1768 } 1769 return(x); 1770} 1771 1772Cell *printstat(Node **a, int n) /* print a[0] */ 1773{ 1774 Node *x; 1775 Cell *y; 1776 FILE *fp; 1777 1778 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1779 fp = stdout; 1780 else 1781 fp = redirect(ptoi(a[1]), a[2]); 1782 for (x = a[0]; x != NULL; x = x->nnext) { 1783 y = execute(x); 1784 fputs(getpssval(y), fp); 1785 tempfree(y); 1786 if (x->nnext == NULL) 1787 fputs(getsval(orsloc), fp); 1788 else 1789 fputs(getsval(ofsloc), fp); 1790 } 1791 if (a[1] != NULL) 1792 fflush(fp); 1793 if (ferror(fp)) 1794 FATAL("write error on %s", filename(fp)); 1795 return(True); 1796} 1797 1798Cell *nullproc(Node **a, int n) 1799{ 1800 return 0; 1801} 1802 1803 1804FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1805{ 1806 FILE *fp; 1807 Cell *x; 1808 char *fname; 1809 1810 x = execute(b); 1811 fname = getsval(x); 1812 fp = openfile(a, fname, NULL); 1813 if (fp == NULL) 1814 FATAL("can't open file %s", fname); 1815 tempfree(x); 1816 return fp; 1817} 1818 1819struct files { 1820 FILE *fp; 1821 const char *fname; 1822 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1823} *files; 1824 1825size_t nfiles; 1826 1827static void stdinit(void) /* in case stdin, etc., are not constants */ 1828{ 1829 nfiles = FOPEN_MAX; 1830 files = calloc(nfiles, sizeof(*files)); 1831 if (files == NULL) 1832 FATAL("can't allocate file memory for %zu files", nfiles); 1833 files[0].fp = stdin; 1834 files[0].fname = "/dev/stdin"; 1835 files[0].mode = LT; 1836 files[1].fp = stdout; 1837 files[1].fname = "/dev/stdout"; 1838 files[1].mode = GT; 1839 files[2].fp = stderr; 1840 files[2].fname = "/dev/stderr"; 1841 files[2].mode = GT; 1842} 1843 1844FILE *openfile(int a, const char *us, bool *pnewflag) 1845{ 1846 const char *s = us; 1847 size_t i; 1848 int m; 1849 FILE *fp = NULL; 1850 1851 if (*s == '\0') 1852 FATAL("null file name in print or getline"); 1853 for (i = 0; i < nfiles; i++) 1854 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1855 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1856 a == FFLUSH)) { 1857 if (pnewflag) 1858 *pnewflag = false; 1859 return files[i].fp; 1860 } 1861 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1862 return NULL; 1863 1864 for (i = 0; i < nfiles; i++) 1865 if (files[i].fp == NULL) 1866 break; 1867 if (i >= nfiles) { 1868 struct files *nf; 1869 size_t nnf = nfiles + FOPEN_MAX; 1870 nf = realloc(files, nnf * sizeof(*nf)); 1871 if (nf == NULL) 1872 FATAL("cannot grow files for %s and %zu files", s, nnf); 1873 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1874 nfiles = nnf; 1875 files = nf; 1876 } 1877 fflush(stdout); /* force a semblance of order */ 1878 m = a; 1879 if (a == GT) { 1880 fp = fopen(s, "w"); 1881 } else if (a == APPEND) { 1882 fp = fopen(s, "a"); 1883 m = GT; /* so can mix > and >> */ 1884 } else if (a == '|') { /* output pipe */ 1885 fp = popen(s, "w"); 1886 } else if (a == LE) { /* input pipe */ 1887 fp = popen(s, "r"); 1888 } else if (a == LT) { /* getline <file */ 1889 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1890 } else /* can't happen */ 1891 FATAL("illegal redirection %d", a); 1892 if (fp != NULL) { 1893 files[i].fname = tostring(s); 1894 files[i].fp = fp; 1895 files[i].mode = m; 1896 if (pnewflag) 1897 *pnewflag = true; 1898 if (fp != stdin && fp != stdout && fp != stderr) 1899 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1900 } 1901 return fp; 1902} 1903 1904const char *filename(FILE *fp) 1905{ 1906 size_t i; 1907 1908 for (i = 0; i < nfiles; i++) 1909 if (fp == files[i].fp) 1910 return files[i].fname; 1911 return "???"; 1912} 1913 1914 Cell *closefile(Node **a, int n) 1915 { 1916 Cell *x; 1917 size_t i; 1918 bool stat; 1919 1920 x = execute(a[0]); 1921 getsval(x); 1922 stat = true; 1923 for (i = 0; i < nfiles; i++) { 1924 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1925 continue; 1926 if (ferror(files[i].fp)) 1927 FATAL("i/o error occurred on %s", files[i].fname); 1928 if (files[i].mode == '|' || files[i].mode == LE) 1929 stat = pclose(files[i].fp) == -1; 1930 else 1931 stat = fclose(files[i].fp) == EOF; 1932 if (stat) 1933 FATAL("i/o error occurred closing %s", files[i].fname); 1934 if (i > 2) /* don't do /dev/std... */ 1935 xfree(files[i].fname); 1936 files[i].fname = NULL; /* watch out for ref thru this */ 1937 files[i].fp = NULL; 1938 } 1939 tempfree(x); 1940 x = gettemp(); 1941 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1942 return(x); 1943 } 1944 1945void closeall(void) 1946{ 1947 size_t i; 1948 bool stat = false; 1949 1950 for (i = 0; i < nfiles; i++) { 1951 if (! files[i].fp) 1952 continue; 1953 if (ferror(files[i].fp)) 1954 FATAL( "i/o error occurred on %s", files[i].fname ); 1955 if (files[i].mode == '|' || files[i].mode == LE) 1956 stat = pclose(files[i].fp) == -1; 1957 else 1958 stat = fclose(files[i].fp) == EOF; 1959 if (stat) 1960 FATAL( "i/o error occurred while closing %s", files[i].fname ); 1961 } 1962} 1963 1964static void flush_all(void) 1965{ 1966 size_t i; 1967 1968 for (i = 0; i < nfiles; i++) 1969 if (files[i].fp) 1970 fflush(files[i].fp); 1971} 1972 1973void backsub(char **pb_ptr, const char **sptr_ptr); 1974 1975Cell *sub(Node **a, int nnn) /* substitute command */ 1976{ 1977 const char *sptr, *q; 1978 Cell *x, *y, *result; 1979 char *t, *buf, *pb; 1980 fa *pfa; 1981 int bufsz = recsize; 1982 1983 if ((buf = malloc(bufsz)) == NULL) 1984 FATAL("out of memory in sub"); 1985 x = execute(a[3]); /* target string */ 1986 t = getsval(x); 1987 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 1988 pfa = (fa *) a[1]; /* regular expression */ 1989 else { 1990 y = execute(a[1]); 1991 pfa = makedfa(getsval(y), 1); 1992 tempfree(y); 1993 } 1994 y = execute(a[2]); /* replacement string */ 1995 result = False; 1996 if (pmatch(pfa, t)) { 1997 sptr = t; 1998 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 1999 pb = buf; 2000 while (sptr < patbeg) 2001 *pb++ = *sptr++; 2002 sptr = getsval(y); 2003 while (*sptr != '\0') { 2004 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2005 if (*sptr == '\\') { 2006 backsub(&pb, &sptr); 2007 } else if (*sptr == '&') { 2008 sptr++; 2009 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2010 for (q = patbeg; q < patbeg+patlen; ) 2011 *pb++ = *q++; 2012 } else 2013 *pb++ = *sptr++; 2014 } 2015 *pb = '\0'; 2016 if (pb > buf + bufsz) 2017 FATAL("sub result1 %.30s too big; can't happen", buf); 2018 sptr = patbeg + patlen; 2019 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2020 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2021 while ((*pb++ = *sptr++) != '\0') 2022 continue; 2023 } 2024 if (pb > buf + bufsz) 2025 FATAL("sub result2 %.30s too big; can't happen", buf); 2026 setsval(x, buf); /* BUG: should be able to avoid copy */ 2027 result = True; 2028 } 2029 tempfree(x); 2030 tempfree(y); 2031 free(buf); 2032 return result; 2033} 2034 2035Cell *gsub(Node **a, int nnn) /* global substitute */ 2036{ 2037 Cell *x, *y; 2038 char *rptr, *pb; 2039 const char *q, *t, *sptr; 2040 char *buf; 2041 fa *pfa; 2042 int mflag, tempstat, num; 2043 int bufsz = recsize; 2044 2045 if ((buf = malloc(bufsz)) == NULL) 2046 FATAL("out of memory in gsub"); 2047 mflag = 0; /* if mflag == 0, can replace empty string */ 2048 num = 0; 2049 x = execute(a[3]); /* target string */ 2050 t = getsval(x); 2051 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2052 pfa = (fa *) a[1]; /* regular expression */ 2053 else { 2054 y = execute(a[1]); 2055 pfa = makedfa(getsval(y), 1); 2056 tempfree(y); 2057 } 2058 y = execute(a[2]); /* replacement string */ 2059 if (pmatch(pfa, t)) { 2060 tempstat = pfa->initstat; 2061 pfa->initstat = 2; 2062 pb = buf; 2063 rptr = getsval(y); 2064 do { 2065 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2066 if (mflag == 0) { /* can replace empty */ 2067 num++; 2068 sptr = rptr; 2069 while (*sptr != '\0') { 2070 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2071 if (*sptr == '\\') { 2072 backsub(&pb, &sptr); 2073 } else if (*sptr == '&') { 2074 sptr++; 2075 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2076 for (q = patbeg; q < patbeg+patlen; ) 2077 *pb++ = *q++; 2078 } else 2079 *pb++ = *sptr++; 2080 } 2081 } 2082 if (*t == '\0') /* at end */ 2083 goto done; 2084 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2085 *pb++ = *t++; 2086 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2087 FATAL("gsub result0 %.30s too big; can't happen", buf); 2088 mflag = 0; 2089 } 2090 else { /* matched nonempty string */ 2091 num++; 2092 sptr = t; 2093 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2094 while (sptr < patbeg) 2095 *pb++ = *sptr++; 2096 sptr = rptr; 2097 while (*sptr != '\0') { 2098 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2099 if (*sptr == '\\') { 2100 backsub(&pb, &sptr); 2101 } else if (*sptr == '&') { 2102 sptr++; 2103 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2104 for (q = patbeg; q < patbeg+patlen; ) 2105 *pb++ = *q++; 2106 } else 2107 *pb++ = *sptr++; 2108 } 2109 t = patbeg + patlen; 2110 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2111 goto done; 2112 if (pb > buf + bufsz) 2113 FATAL("gsub result1 %.30s too big; can't happen", buf); 2114 mflag = 1; 2115 } 2116 } while (pmatch(pfa,t)); 2117 sptr = t; 2118 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2119 while ((*pb++ = *sptr++) != '\0') 2120 continue; 2121 done: if (pb < buf + bufsz) 2122 *pb = '\0'; 2123 else if (*(pb-1) != '\0') 2124 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2125 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2126 pfa->initstat = tempstat; 2127 } 2128 tempfree(x); 2129 tempfree(y); 2130 x = gettemp(); 2131 x->tval = NUM; 2132 x->fval = num; 2133 free(buf); 2134 return(x); 2135} 2136 2137Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2138 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2139{ 2140 Cell *x, *y, *res, *h; 2141 char *rptr; 2142 const char *sptr; 2143 char *buf, *pb; 2144 const char *t, *q; 2145 fa *pfa; 2146 int mflag, tempstat, num, whichm; 2147 int bufsz = recsize; 2148 2149 if ((buf = malloc(bufsz)) == NULL) 2150 FATAL("out of memory in gensub"); 2151 mflag = 0; /* if mflag == 0, can replace empty string */ 2152 num = 0; 2153 x = execute(a[4]); /* source string */ 2154 t = getsval(x); 2155 res = copycell(x); /* target string - initially copy of source */ 2156 res->csub = CTEMP; /* result values are temporary */ 2157 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2158 pfa = (fa *) a[1]; /* regular expression */ 2159 else { 2160 y = execute(a[1]); 2161 pfa = makedfa(getsval(y), 1); 2162 tempfree(y); 2163 } 2164 y = execute(a[2]); /* replacement string */ 2165 h = execute(a[3]); /* which matches should be replaced */ 2166 sptr = getsval(h); 2167 if (sptr[0] == 'g' || sptr[0] == 'G') 2168 whichm = -1; 2169 else { 2170 /* 2171 * The specified number is index of replacement, starting 2172 * from 1. GNU awk treats index lower than 0 same as 2173 * 1, we do same for compatibility. 2174 */ 2175 whichm = (int) getfval(h) - 1; 2176 if (whichm < 0) 2177 whichm = 0; 2178 } 2179 tempfree(h); 2180 2181 if (pmatch(pfa, t)) { 2182 char *sl; 2183 2184 tempstat = pfa->initstat; 2185 pfa->initstat = 2; 2186 pb = buf; 2187 rptr = getsval(y); 2188 /* 2189 * XXX if there are any backreferences in subst string, 2190 * complain now. 2191 */ 2192 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2193 if (strchr("0123456789", sl[1])) { 2194 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2195 } 2196 } 2197 2198 do { 2199 if (whichm >= 0 && whichm != num) { 2200 num++; 2201 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2202 2203 /* copy the part of string up to and including 2204 * match to output buffer */ 2205 while (t < patbeg + patlen) 2206 *pb++ = *t++; 2207 continue; 2208 } 2209 2210 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2211 if (mflag == 0) { /* can replace empty */ 2212 num++; 2213 sptr = rptr; 2214 while (*sptr != 0) { 2215 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2216 if (*sptr == '\\') { 2217 backsub(&pb, &sptr); 2218 } else if (*sptr == '&') { 2219 sptr++; 2220 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2221 for (q = patbeg; q < patbeg+patlen; ) 2222 *pb++ = *q++; 2223 } else 2224 *pb++ = *sptr++; 2225 } 2226 } 2227 if (*t == 0) /* at end */ 2228 goto done; 2229 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2230 *pb++ = *t++; 2231 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2232 FATAL("gensub result0 %.30s too big; can't happen", buf); 2233 mflag = 0; 2234 } 2235 else { /* matched nonempty string */ 2236 num++; 2237 sptr = t; 2238 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2239 while (sptr < patbeg) 2240 *pb++ = *sptr++; 2241 sptr = rptr; 2242 while (*sptr != 0) { 2243 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2244 if (*sptr == '\\') { 2245 backsub(&pb, &sptr); 2246 } else if (*sptr == '&') { 2247 sptr++; 2248 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2249 for (q = patbeg; q < patbeg+patlen; ) 2250 *pb++ = *q++; 2251 } else 2252 *pb++ = *sptr++; 2253 } 2254 t = patbeg + patlen; 2255 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2256 goto done; 2257 if (pb > buf + bufsz) 2258 FATAL("gensub result1 %.30s too big; can't happen", buf); 2259 mflag = 1; 2260 } 2261 } while (pmatch(pfa,t)); 2262 sptr = t; 2263 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2264 while ((*pb++ = *sptr++) != 0) 2265 ; 2266 done: if (pb > buf + bufsz) 2267 FATAL("gensub result2 %.30s too big; can't happen", buf); 2268 *pb = '\0'; 2269 setsval(res, buf); 2270 pfa->initstat = tempstat; 2271 } 2272 tempfree(x); 2273 tempfree(y); 2274 free(buf); 2275 return(res); 2276} 2277 2278void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2279{ /* sptr[0] == '\\' */ 2280 char *pb = *pb_ptr; 2281 const char *sptr = *sptr_ptr; 2282 static bool first = true; 2283 static bool do_posix = false; 2284 2285 if (first) { 2286 first = false; 2287 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2288 } 2289 2290 if (sptr[1] == '\\') { 2291 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2292 *pb++ = '\\'; 2293 *pb++ = '&'; 2294 sptr += 4; 2295 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2296 *pb++ = '\\'; 2297 sptr += 2; 2298 } else if (do_posix) { /* \\x -> \x */ 2299 sptr++; 2300 *pb++ = *sptr++; 2301 } else { /* \\x -> \\x */ 2302 *pb++ = *sptr++; 2303 *pb++ = *sptr++; 2304 } 2305 } else if (sptr[1] == '&') { /* literal & */ 2306 sptr++; 2307 *pb++ = *sptr++; 2308 } else /* literal \ */ 2309 *pb++ = *sptr++; 2310 2311 *pb_ptr = pb; 2312 *sptr_ptr = sptr; 2313} 2314