1/* 2 * subst.c - various substitutions 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1992-1997 Paul Falstad 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Paul Falstad or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Paul Falstad and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Paul Falstad and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Paul Falstad and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "zsh.mdh" 31#include "subst.pro" 32 33#define LF_ARRAY 1 34 35/**/ 36char nulstring[] = {Nularg, '\0'}; 37 38/* Do substitutions before fork. These are: 39 * - Process substitution: <(...), >(...), =(...) 40 * - Parameter substitution 41 * - Command substitution 42 * Followed by 43 * - Quote removal 44 * - Brace expansion 45 * - Tilde and equals substitution 46 * 47 * PREFORK_* flags are defined in zsh.h 48 */ 49 50/**/ 51mod_export void 52prefork(LinkList list, int flags) 53{ 54 LinkNode node, stop = 0; 55 int keep = 0, asssub = (flags & PREFORK_TYPESET) && isset(KSHTYPESET); 56 57 queue_signals(); 58 for (node = firstnode(list); node; incnode(node)) { 59 if (isset(SHFILEEXPANSION)) { 60 /* 61 * Here and below we avoid taking the address 62 * of a void * and then pretending it's a char ** 63 * instead of a void ** by a little inefficiency. 64 * This could be avoided with some extra linked list 65 * machinery, but that would need quite a lot of work 66 * to ensure consistency. What we really need is 67 * templates... 68 */ 69 char *cptr = (char *)getdata(node); 70 filesub(&cptr, flags & (PREFORK_TYPESET|PREFORK_ASSIGN)); 71 /* 72 * The assignment is so simple it's not worth 73 * testing if cptr changed... 74 */ 75 setdata(node, cptr); 76 } 77 if (!(node = stringsubst(list, node, 78 flags & (PREFORK_SINGLE|PREFORK_SPLIT| 79 PREFORK_SHWORDSPLIT| 80 PREFORK_NOSHWORDSPLIT), 81 asssub))) { 82 unqueue_signals(); 83 return; 84 } 85 } 86 for (node = firstnode(list); node; incnode(node)) { 87 if (node == stop) 88 keep = 0; 89 if (*(char *)getdata(node)) { 90 remnulargs(getdata(node)); 91 if (unset(IGNOREBRACES) && !(flags & PREFORK_SINGLE)) { 92 if (!keep) 93 stop = nextnode(node); 94 while (hasbraces(getdata(node))) { 95 keep = 1; 96 xpandbraces(list, &node); 97 } 98 } 99 if (unset(SHFILEEXPANSION)) { 100 char *cptr = (char *)getdata(node); 101 filesub(&cptr, flags & (PREFORK_TYPESET|PREFORK_ASSIGN)); 102 setdata(node, cptr); 103 } 104 } else if (!(flags & PREFORK_SINGLE) && !keep) 105 uremnode(list, node); 106 if (errflag) { 107 unqueue_signals(); 108 return; 109 } 110 } 111 unqueue_signals(); 112} 113 114/* 115 * Perform $'...' quoting. The arguments are 116 * strstart The start of the string 117 * pstrdpos Initially, *pstrdpos is the position where the $ of the $' 118 * occurs. It will be updated to the next character after the 119 * last ' of the $'...'. 120 * The return value is the entire allocated string from strstart on the heap. 121 * Note the original string may be modified in the process. 122 */ 123/**/ 124static char * 125stringsubstquote(char *strstart, char **pstrdpos) 126{ 127 int len; 128 char *strdpos = *pstrdpos, *strsub, *strret; 129 130 strsub = getkeystring(strdpos+2, &len, 131 GETKEYS_DOLLARS_QUOTE, NULL); 132 len += 2; /* measured from strdpos */ 133 134 if (strstart != strdpos) { 135 *strdpos = '\0'; 136 if (strdpos[len]) 137 strret = zhtricat(strstart, strsub, strdpos + len); 138 else 139 strret = dyncat(strstart, strsub); 140 } else if (strdpos[len]) 141 strret = dyncat(strsub, strdpos + len); 142 else 143 strret = strsub; 144 145 *pstrdpos = strret + (strdpos - strstart) + strlen(strsub); 146 147 return strret; 148} 149 150/**/ 151static LinkNode 152stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) 153{ 154 int qt; 155 char *str3 = (char *)getdata(node); 156 char *str = str3, c; 157 158 while (!errflag && (c = *str)) { 159 if (((c = *str) == Inang || c == OutangProc || 160 (str == str3 && c == Equals)) 161 && str[1] == Inpar) { 162 char *subst, *rest, *snew, *sptr; 163 int str3len = str - str3, sublen, restlen; 164 165 if (c == Inang || c == OutangProc) 166 subst = getproc(str, &rest); /* <(...) or >(...) */ 167 else 168 subst = getoutputfile(str, &rest); /* =(...) */ 169 if (errflag) 170 return NULL; 171 if (!subst) 172 subst = ""; 173 174 sublen = strlen(subst); 175 restlen = strlen(rest); 176 sptr = snew = hcalloc(str3len + sublen + restlen + 1); 177 if (str3len) { 178 memcpy(sptr, str3, str3len); 179 sptr += str3len; 180 } 181 if (sublen) { 182 memcpy(sptr, subst, sublen); 183 sptr += sublen; 184 } 185 if (restlen) 186 memcpy(sptr, rest, restlen); 187 sptr[restlen] = '\0'; 188 str3 = snew; 189 str = snew + str3len + sublen; 190 setdata(node, str3); 191 } else 192 str++; 193 } 194 str = str3; 195 196 while (!errflag && (c = *str)) { 197 if ((qt = c == Qstring) || c == String) { 198 if ((c = str[1]) == Inpar) { 199 if (!qt) 200 list->list.flags |= LF_ARRAY; 201 str++; 202 goto comsub; 203 } else if (c == Inbrack) { 204 /* $[...] */ 205 char *str2 = str; 206 str2++; 207 if (skipparens(Inbrack, Outbrack, &str2)) { 208 zerr("closing bracket missing"); 209 return NULL; 210 } 211 str2[-1] = *str = '\0'; 212 str = arithsubst(str + 2, &str3, str2); 213 setdata(node, (void *) str3); 214 continue; 215 } else if (c == Snull) { 216 str3 = stringsubstquote(str3, &str); 217 setdata(node, (void *) str3); 218 continue; 219 } else { 220 /* 221 * To avoid setting and unsetting the SHWORDSPLIT 222 * option, we pass flags if we need to control it for 223 * recursive expansion via multsub() 224 * If PREFORK_NOSHWORDSPLIT is set, the option is 225 * disregarded; otherwise, use it if set. 226 * If PREFORK_SPLIT is set, splitting is forced, 227 * regardless of the option 228 * If PREFORK_SHWORDSPLIT is already set, or used by the 229 * previous two to signal paramsubst(), we'll do 230 * sh-style wordsplitting on parameters. 231 */ 232 if ((isset(SHWORDSPLIT) && 233 !(pf_flags & PREFORK_NOSHWORDSPLIT)) || 234 (pf_flags & PREFORK_SPLIT)) 235 pf_flags |= PREFORK_SHWORDSPLIT; 236 node = paramsubst( 237 list, node, &str, qt, 238 pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT)); 239 if (errflag || !node) 240 return NULL; 241 str3 = (char *)getdata(node); 242 continue; 243 } 244 } else if ((qt = c == Qtick) || (c == Tick ? (list->list.flags |= LF_ARRAY) : 0)) 245 comsub: { 246 LinkList pl; 247 char *s, *str2 = str; 248 char endchar; 249 int l1, l2; 250 251 if (c == Inpar) { 252 endchar = Outpar; 253 str[-1] = '\0'; 254#ifdef DEBUG 255 if (skipparens(Inpar, Outpar, &str)) 256 dputs("BUG: parse error in command substitution"); 257#else 258 skipparens(Inpar, Outpar, &str); 259#endif 260 str--; 261 } else { 262 endchar = c; 263 *str = '\0'; 264 265 while (*++str != endchar) 266 DPUTS(!*str, "BUG: parse error in command substitution"); 267 } 268 *str++ = '\0'; 269 if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') { 270 /* Math substitution of the form $((...)) */ 271 str[-2] = '\0'; 272 if (isset(EXECOPT)) 273 str = arithsubst(str2 + 2, &str3, str); 274 else 275 strncpy(str3, str2, 1); 276 setdata(node, (void *) str3); 277 continue; 278 } 279 280 /* It is a command substitution, which will be parsed again * 281 * by the lexer, so we untokenize it first, but we cannot use * 282 * untokenize() since in the case of `...` some Bnulls should * 283 * be left unchanged. Note that the lexer doesn't tokenize * 284 * the body of a command substitution so if there are some * 285 * tokens here they are from a ${(e)~...} substitution. */ 286 for (str = str2; (c = *++str); ) 287 if (itok(c) && c != Nularg && 288 !(endchar != Outpar && c == Bnull && 289 (str[1] == '$' || str[1] == '\\' || str[1] == '`' || 290 (qt && str[1] == '"')))) 291 *str = ztokens[c - Pound]; 292 str++; 293 if (!(pl = getoutput(str2 + 1, qt || 294 (pf_flags & PREFORK_SINGLE)))) { 295 zerr("parse error in command substitution"); 296 return NULL; 297 } 298 if (endchar == Outpar) 299 str2--; 300 if (!(s = (char *) ugetnode(pl))) { 301 str = strcpy(str2, str); 302 continue; 303 } 304 if (!qt && (pf_flags & PREFORK_SINGLE) && isset(GLOBSUBST)) 305 shtokenize(s); 306 l1 = str2 - str3; 307 l2 = strlen(s); 308 if (nonempty(pl)) { 309 LinkNode n = lastnode(pl); 310 str2 = (char *) hcalloc(l1 + l2 + 1); 311 strcpy(str2, str3); 312 strcpy(str2 + l1, s); 313 setdata(node, str2); 314 insertlinklist(pl, node, list); 315 s = (char *) getdata(node = n); 316 l1 = 0; 317 l2 = strlen(s); 318 } 319 str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1); 320 if (l1) 321 strcpy(str2, str3); 322 strcpy(str2 + l1, s); 323 str = strcpy(str2 + l1 + l2, str); 324 str3 = str2; 325 setdata(node, str3); 326 continue; 327 } else if (asssub && ((c == '=') || c == Equals) && str != str3) { 328 /* 329 * We are in a normal argument which looks like an assignment 330 * and is to be treated like one, with no word splitting. 331 */ 332 pf_flags |= PREFORK_SINGLE; 333 } 334 str++; 335 } 336 return errflag ? NULL : node; 337} 338 339/* 340 * Simplified version of the prefork/singsub processing where 341 * we only do substitutions appropriate to quoting. Currently 342 * this means only the expansions in $'....'. This is used 343 * for the end tag for here documents. As we are not doing 344 * `...` expansions, we just use those for quoting. However, 345 * they stay in the text. This is weird, but that's not 346 * my fault. 347 * 348 * The remnulargs() makes this consistent with the other forms 349 * of substitution, indicating that quotes have been fully 350 * processed. 351 * 352 * The fully processed string is returned. 353 */ 354 355/**/ 356char * 357quotesubst(char *str) 358{ 359 char *s = str; 360 361 while (*s) { 362 if (*s == String && s[1] == Snull) { 363 str = stringsubstquote(str, &s); 364 } else { 365 s++; 366 } 367 } 368 remnulargs(str); 369 return str; 370} 371 372/**/ 373mod_export void 374globlist(LinkList list, int nountok) 375{ 376 LinkNode node, next; 377 378 badcshglob = 0; 379 for (node = firstnode(list); !errflag && node; node = next) { 380 next = nextnode(node); 381 zglob(list, node, nountok); 382 } 383 if (badcshglob == 1) 384 zerr("no match"); 385} 386 387/* perform substitution on a single word */ 388 389/**/ 390mod_export void 391singsub(char **s) 392{ 393 local_list1(foo); 394 395 init_list1(foo, *s); 396 397 prefork(&foo, PREFORK_SINGLE); 398 if (errflag) 399 return; 400 *s = (char *) ugetnode(&foo); 401 DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!"); 402} 403 404/* Perform substitution on a single word, *s. Unlike with singsub(), the 405 * result can be more than one word. If split is non-zero, the string is 406 * first word-split using IFS, but only for non-quoted "whitespace" (as 407 * indicated by Dnull, Snull, Tick, Bnull, Inpar, and Outpar). 408 * 409 * If arg "a" was non-NULL and we got an array as a result of the parsing, 410 * the strings are stored in *a (even for a 1-element array) and *isarr is 411 * set to 1. Otherwise, *isarr is set to 0, and the result is put into *s, 412 * with any necessary joining of multiple elements using sep (which can be 413 * NULL to use IFS). The return value is true iff the expansion resulted 414 * in an empty list. */ 415 416/**/ 417static int 418multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep) 419{ 420 int l; 421 char **r, **p, *x = *s; 422 local_list1(foo); 423 424 if (pf_flags & PREFORK_SPLIT) { 425 /* 426 * This doesn't handle multibyte characters, but we're 427 * looking for whitespace separators which must be ASCII. 428 */ 429 for ( ; *x; x += l) { 430 char c = (l = *x == Meta) ? x[1] ^ 32 : *x; 431 l++; 432 if (!iwsep(STOUC(c))) 433 break; 434 } 435 } 436 437 init_list1(foo, x); 438 439 if (pf_flags & PREFORK_SPLIT) { 440 LinkNode n = firstnode(&foo); 441 int inq = 0, inp = 0; 442 MB_METACHARINIT(); 443 for ( ; *x; x += l) { 444 int rawc = -1; 445 convchar_t c; 446 if (itok(STOUC(*x))) { 447 /* token, can't be separator, must be single byte */ 448 rawc = *x; 449 l = 1; 450 } else { 451 l = MB_METACHARLENCONV(x, &c); 452 if (!inq && !inp && WC_ZISTYPE(c, ISEP)) { 453 *x = '\0'; 454 for (x += l; *x; x += l) { 455 if (itok(STOUC(*x))) { 456 /* as above */ 457 rawc = *x; 458 l = 1; 459 break; 460 } 461 l = MB_METACHARLENCONV(x, &c); 462 if (!WC_ZISTYPE(c, ISEP)) 463 break; 464 } 465 if (!*x) 466 break; 467 insertlinknode(&foo, n, (void *)x), incnode(n); 468 } 469 } 470 switch (rawc) { 471 case Dnull: /* " */ 472 case Snull: /* ' */ 473 case Tick: /* ` (note: no Qtick!) */ 474 /* These always occur in unnested pairs. */ 475 inq = !inq; 476 break; 477 case Inpar: /* ( */ 478 inp++; 479 break; 480 case Outpar: /* ) */ 481 inp--; 482 break; 483 case Bnull: /* \ */ 484 case Bnullkeep: 485 /* The parser verified the following char's existence. */ 486 x += l; 487 l = MB_METACHARLEN(x); 488 break; 489 } 490 } 491 } 492 493 prefork(&foo, pf_flags); 494 if (errflag) { 495 if (isarr) 496 *isarr = 0; 497 return 0; 498 } 499 500 if ((l = countlinknodes(&foo)) > 1 || (foo.list.flags & LF_ARRAY && a)) { 501 p = r = hcalloc((l + 1) * sizeof(char*)); 502 while (nonempty(&foo)) 503 *p++ = (char *)ugetnode(&foo); 504 *p = NULL; 505 /* We need a way to figure out if a one-item result was a scalar 506 * or a single-item array. The parser will have set LF_ARRAY 507 * in the latter case, allowing us to return it as an array to 508 * our caller (if they provided for that result). */ 509 if (a && (l > 1 || foo.list.flags & LF_ARRAY)) { 510 *a = r; 511 *isarr = SCANPM_MATCHMANY; 512 return 0; 513 } 514 *s = sepjoin(r, sep, 1); 515 if (isarr) 516 *isarr = 0; 517 return 0; 518 } 519 if (l) 520 *s = (char *) ugetnode(&foo); 521 else 522 *s = dupstring(""); 523 if (isarr) 524 *isarr = 0; 525 return !l; 526} 527 528/* 529 * ~, = subs: assign & PREFORK_TYPESET => typeset or magic equals 530 * assign & PREFORK_ASSIGN => normal assignment 531 */ 532 533/**/ 534mod_export void 535filesub(char **namptr, int assign) 536{ 537 char *eql = NULL, *sub = NULL, *str, *ptr; 538 int len; 539 540 filesubstr(namptr, assign); 541 542 if (!assign) 543 return; 544 545 if (assign & PREFORK_TYPESET) { 546 if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) { 547 str = sub + 1; 548 if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) { 549 sub[1] = '\0'; 550 *namptr = dyncat(*namptr, str); 551 } 552 } else 553 return; 554 } 555 556 ptr = *namptr; 557 while ((sub = strchr(ptr, ':'))) { 558 str = sub + 1; 559 len = sub - *namptr; 560 if (sub > eql && 561 (sub[1] == Tilde || sub[1] == Equals) && 562 filesubstr(&str, assign)) { 563 sub[1] = '\0'; 564 *namptr = dyncat(*namptr, str); 565 } 566 ptr = *namptr + len + 1; 567 } 568} 569 570#define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') ) 571#define isend2(c) ( !(c) || (c)==Inpar || (assign && (c)==':') ) 572 573/* 574 * do =foo substitution, or equivalent. 575 * on entry, str should point to the "foo". 576 * if assign, this is in an assignment 577 * if nomatch, report hard error on failure. 578 * if successful, returns the expansion, else NULL. 579 */ 580 581/**/ 582char * 583equalsubstr(char *str, int assign, int nomatch) 584{ 585 char *pp, *cnam, *cmdstr, *ret; 586 587 for (pp = str; !isend2(*pp); pp++) 588 ; 589 cmdstr = dupstrpfx(str, pp-str); 590 untokenize(cmdstr); 591 remnulargs(cmdstr); 592 if (!(cnam = findcmd(cmdstr, 1))) { 593 if (nomatch) 594 zerr("%s not found", cmdstr); 595 return NULL; 596 } 597 ret = dupstring(cnam); 598 if (*pp) 599 ret = dyncat(ret, pp); 600 return ret; 601} 602 603/**/ 604mod_export int 605filesubstr(char **namptr, int assign) 606{ 607 char *str = *namptr; 608 609 if (*str == Tilde && str[1] != '=' && str[1] != Equals) { 610 char *ptr, *tmp, *res, *ptr2; 611 int val; 612 613 val = zstrtol(str + 1, &ptr, 10); 614 if (isend(str[1])) { /* ~ */ 615 *namptr = dyncat(home ? home : "", str + 1); 616 return 1; 617 } else if (str[1] == '+' && isend(str[2])) { /* ~+ */ 618 *namptr = dyncat(pwd, str + 2); 619 return 1; 620 } else if (str[1] == '-' && isend(str[2])) { /* ~- */ 621 *namptr = dyncat((tmp = oldpwd) ? tmp : pwd, str + 2); 622 return 1; 623 } else if (str[1] == Inbrack && 624 (ptr2 = strchr(str+2, Outbrack))) { 625 char **arr; 626 untokenize(tmp = dupstrpfx(str+2, ptr2 - (str+2))); 627 remnulargs(tmp); 628 arr = subst_string_by_hook("zsh_directory_name", "n", tmp); 629 res = arr ? *arr : NULL; 630 if (res) { 631 *namptr = dyncat(res, ptr2+1); 632 return 1; 633 } 634 if (isset(NOMATCH)) 635 zerr("no directory expansion: ~[%s]", tmp); 636 return 0; 637 } else if (!inblank(str[1]) && isend(*ptr) && 638 (!idigit(str[1]) || (ptr - str < 4))) { 639 char *ds; 640 641 if (val < 0) 642 val = -val; 643 ds = dstackent(str[1], val); 644 if (!ds) 645 return 0; 646 *namptr = dyncat(ds, ptr); 647 return 1; 648 } else if ((ptr = itype_end(str+1, IUSER, 0)) != str+1) { /* ~foo */ 649 char *hom, save; 650 651 save = *ptr; 652 if (!isend(save)) 653 return 0; 654 *ptr = 0; 655 if (!(hom = getnameddir(++str))) { 656 if (isset(NOMATCH)) 657 zerr("no such user or named directory: %s", str); 658 *ptr = save; 659 return 0; 660 } 661 *ptr = save; 662 *namptr = dyncat(hom, ptr); 663 return 1; 664 } 665 } else if (*str == Equals && isset(EQUALS) && str[1]) { /* =foo */ 666 char *expn = equalsubstr(str+1, assign, isset(NOMATCH)); 667 if (expn) { 668 *namptr = expn; 669 return 1; 670 } 671 } 672 return 0; 673} 674 675#undef isend 676#undef isend2 677 678/**/ 679static char * 680strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub, 681 int copied) 682{ 683 char *dest; 684 int pl = pe - pb; 685 686 if (!pl && (!s || !*s)) { 687 *d = dest = (copied ? src : dupstring(src)); 688 if (glbsub) 689 shtokenize(dest); 690 } else { 691 *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1); 692 strncpy(dest, pb, pl); 693 dest += pl; 694 strcpy(dest, src); 695 if (glbsub) 696 shtokenize(dest); 697 dest += l; 698 if (s) 699 strcpy(dest, s); 700 } 701 return dest; 702} 703 704#ifdef MULTIBYTE_SUPPORT 705#define WCPADWIDTH(cchar, mw) wcpadwidth(cchar, mw) 706 707/* 708 * Width of character for padding purposes. 709 * 0: all characters count 1. 710 * 1: use width of multibyte character. 711 * 2: non-zero width characters count 1, zero width 0. 712 */ 713static int 714wcpadwidth(wchar_t wc, int multi_width) 715{ 716 int width; 717 718 switch (multi_width) 719 { 720 case 0: 721 return 1; 722 723 case 1: 724 width = WCWIDTH(wc); 725 if (width >= 0) 726 return width; 727 return 0; 728 729 default: 730 return WCWIDTH(wc) > 0 ? 1 : 0; 731 } 732} 733 734#else 735#define WCPADWIDTH(cchar, mw) (1) 736#endif 737 738/* 739 * Pad the string str, returning a result from the heap (or str itself, 740 * if it didn't need padding). If str is too large, it will be truncated. 741 * Calculations are in terms of width if MULTIBYTE is in effect and 742 * multi_width is non-zero, else characters. 743 * 744 * prenum and postnum are the width to which the string needs padding 745 * on the left and right. 746 * 747 * preone and postone are string to insert once only before and after 748 * str. They will be truncated on the left or right, respectively, 749 * if necessary to fit the width. Either or both may be NULL in which 750 * case they will not be used. 751 * 752 * premul and postmul are the padding strings to be repeated before 753 * on the left (if prenum is non-zero) and right (if postnum is non-zero). If 754 * NULL the first character of IFS (typically but not necessarily a space) 755 * will be used. 756 */ 757 758static char * 759dopadding(char *str, int prenum, int postnum, char *preone, char *postone, 760 char *premul, char *postmul 761#ifdef MULTIBYTE_SUPPORT 762 , int multi_width 763#endif 764 ) 765{ 766 char *def, *ret, *t, *r; 767 int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl; 768 convchar_t cchar; 769 770 MB_METACHARINIT(); 771 if (!ifs || *ifs) { 772 char *tmpifs = ifs ? ifs : DEFAULT_IFS; 773 def = dupstrpfx(tmpifs, MB_METACHARLEN(tmpifs)); 774 } else 775 def = ""; 776 if (preone && !*preone) 777 preone = def; 778 if (postone && !*postone) 779 postone = def; 780 if (!premul || !*premul) 781 premul = def; 782 if (!postmul || !*postmul) 783 postmul = def; 784 785 ls = MB_METASTRLEN2(str, multi_width); 786 lpreone = preone ? MB_METASTRLEN2(preone, multi_width) : 0; 787 lpostone = postone ? MB_METASTRLEN2(postone, multi_width) : 0; 788 lpremul = MB_METASTRLEN2(premul, multi_width); 789 lpostmul = MB_METASTRLEN2(postmul, multi_width); 790 791 if (prenum + postnum == ls) 792 return str; 793 794 /* 795 * Try to be careful with allocated lengths. The following 796 * is a maximum, in case we need the entire repeated string 797 * for each repetition. We probably don't, but in case the user 798 * has given us something pathological which doesn't convert 799 * easily into a width we'd better be safe. 800 */ 801 lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum; 802 /* 803 * Same logic for preone and postone, except those may be NULL. 804 */ 805 if (preone) 806 lr += strlen(preone); 807 if (postone) 808 lr += strlen(postone); 809 r = ret = (char *)zhalloc(lr + 1); 810 811 if (prenum) { 812 /* 813 * Pad on the left. 814 */ 815 if (postnum) { 816 /* 817 * Pad on both right and left. 818 * The strategy is to divide the string into two halves. 819 * The first half is dealt with by the left hand padding 820 * code, the second by the right hand. 821 */ 822 ls2 = ls / 2; 823 824 /* The width left to pad for the first half. */ 825 f = prenum - ls2; 826 if (f <= 0) { 827 /* First half doesn't fit. Skip the first -f width. */ 828 f = -f; 829 MB_METACHARINIT(); 830 while (f > 0) { 831 cl = MB_METACHARLENCONV(str, &cchar); 832 if (!cl) 833 break; 834 str += cl; 835 f -= WCPADWIDTH(cchar, multi_width); 836 } 837 /* Now finish the first half. */ 838 for (c = prenum; c > 0; ) { 839 cl = MB_METACHARLENCONV(str, &cchar); 840 if (!cl) 841 break; 842 while (cl--) 843 *r++ = *str++; 844 c -= WCPADWIDTH(cchar, multi_width); 845 } 846 } else { 847 if (f <= lpreone) { 848 if (preone) { 849 /* 850 * The unrepeated string doesn't fit. 851 */ 852 MB_METACHARINIT(); 853 /* The width we need to skip */ 854 f = lpreone - f; 855 /* So skip. */ 856 for (t = preone; f > 0; ) { 857 cl = MB_METACHARLENCONV(t, &cchar); 858 if (!cl) 859 break; 860 t += cl; 861 f -= WCPADWIDTH(cchar, multi_width); 862 } 863 /* Then copy the entire remainder. */ 864 while (*t) 865 *r++ = *t++; 866 } 867 } else { 868 f -= lpreone; 869 if (lpremul) { 870 if ((m = f % lpremul)) { 871 /* 872 * Left over fraction of repeated string. 873 */ 874 MB_METACHARINIT(); 875 /* Skip this much. */ 876 m = lpremul - m; 877 for (t = premul; m > 0; ) { 878 cl = MB_METACHARLENCONV(t, &cchar); 879 if (!cl) 880 break; 881 t += cl; 882 m -= WCPADWIDTH(cchar, multi_width); 883 } 884 /* Output the rest. */ 885 while (*t) 886 *r++ = *t++; 887 } 888 for (cc = f / lpremul; cc--;) { 889 /* Repeat the repeated string */ 890 MB_METACHARINIT(); 891 for (c = lpremul, t = premul; c > 0; ) { 892 cl = MB_METACHARLENCONV(t, &cchar); 893 if (!cl) 894 break; 895 while (cl--) 896 *r++ = *t++; 897 c -= WCPADWIDTH(cchar, multi_width); 898 } 899 } 900 } 901 if (preone) { 902 /* Output the full unrepeated string */ 903 while (*preone) 904 *r++ = *preone++; 905 } 906 } 907 /* Output the first half width of the original string. */ 908 for (c = ls2; c > 0; ) { 909 cl = MB_METACHARLENCONV(str, &cchar); 910 if (!cl) 911 break; 912 c -= WCPADWIDTH(cchar, multi_width); 913 while (cl--) 914 *r++ = *str++; 915 } 916 } 917 /* Other half. In case the string had an odd length... */ 918 ls2 = ls - ls2; 919 /* Width that needs padding... */ 920 f = postnum - ls2; 921 if (f <= 0) { 922 /* ...is negative, truncate original string */ 923 MB_METACHARINIT(); 924 for (c = postnum; c > 0; ) { 925 cl = MB_METACHARLENCONV(str, &cchar); 926 if (!cl) 927 break; 928 c -= WCPADWIDTH(cchar, multi_width); 929 while (cl--) 930 *r++ = *str++; 931 } 932 } else { 933 /* Rest of original string fits, output it complete */ 934 while (*str) 935 *r++ = *str++; 936 if (f <= lpostone) { 937 if (postone) { 938 /* Can't fit unrepeated string, truncate it */ 939 for (c = f; c > 0; ) { 940 cl = MB_METACHARLENCONV(postone, &cchar); 941 if (!cl) 942 break; 943 c -= WCPADWIDTH(cchar, multi_width); 944 while (cl--) 945 *r++ = *postone++; 946 } 947 } 948 } else { 949 if (postone) { 950 f -= lpostone; 951 /* Output entire unrepeated string */ 952 while (*postone) 953 *r++ = *postone++; 954 } 955 if (lpostmul) { 956 for (cc = f / lpostmul; cc--;) { 957 /* Begin the beguine */ 958 for (t = postmul; *t; ) 959 *r++ = *t++; 960 } 961 if ((m = f % lpostmul)) { 962 /* Fill leftovers with chunk of repeated string */ 963 MB_METACHARINIT(); 964 while (m > 0) { 965 cl = MB_METACHARLENCONV(postmul, &cchar); 966 if (!cl) 967 break; 968 m -= WCPADWIDTH(cchar, multi_width); 969 while (cl--) 970 *r++ = *postmul++; 971 } 972 } 973 } 974 } 975 } 976 } else { 977 /* 978 * Pad only on the left. 979 */ 980 f = prenum - ls; 981 if (f <= 0) { 982 /* 983 * Original string is at least as wide as padding. 984 * Truncate original string to width. 985 * Truncate on left, so skip the characters we 986 * don't need. 987 */ 988 f = -f; 989 MB_METACHARINIT(); 990 while (f > 0) { 991 cl = MB_METACHARLENCONV(str, &cchar); 992 if (!cl) 993 break; 994 str += cl; 995 f -= WCPADWIDTH(cchar, multi_width); 996 } 997 /* Copy the rest of the original string */ 998 for (c = prenum; c > 0; ) { 999 cl = MB_METACHARLENCONV(str, &cchar); 1000 if (!cl) 1001 break; 1002 while (cl--) 1003 *r++ = *str++; 1004 c -= WCPADWIDTH(cchar, multi_width); 1005 } 1006 } else { 1007 /* 1008 * We can fit the entire string... 1009 */ 1010 if (f <= lpreone) { 1011 if (preone) { 1012 /* 1013 * ...with some fraction of the unrepeated string. 1014 */ 1015 /* We need this width of characters. */ 1016 c = f; 1017 /* 1018 * We therefore need to skip this width of 1019 * characters. 1020 */ 1021 f = lpreone - f; 1022 MB_METACHARINIT(); 1023 for (t = preone; f > 0; ) { 1024 cl = MB_METACHARLENCONV(t, &cchar); 1025 if (!cl) 1026 break; 1027 t += cl; 1028 f -= WCPADWIDTH(cchar, multi_width); 1029 } 1030 /* Copy the rest of preone */ 1031 while (*t) 1032 *r++ = *t++; 1033 } 1034 } else { 1035 /* 1036 * We can fit the whole of preone, needing this width 1037 * first 1038 */ 1039 f -= lpreone; 1040 if (lpremul) { 1041 if ((m = f % lpremul)) { 1042 /* 1043 * Some fraction of the repeated string needed. 1044 */ 1045 /* Need this much... */ 1046 c = m; 1047 /* ...skipping this much first. */ 1048 m = lpremul - m; 1049 MB_METACHARINIT(); 1050 for (t = premul; m > 0; ) { 1051 cl = MB_METACHARLENCONV(t, &cchar); 1052 if (!cl) 1053 break; 1054 t += cl; 1055 m -= WCPADWIDTH(cchar, multi_width); 1056 } 1057 /* Now the rest of the repeated string. */ 1058 while (c > 0) { 1059 cl = MB_METACHARLENCONV(t, &cchar); 1060 if (!cl) 1061 break; 1062 while (cl--) 1063 *r++ = *t++; 1064 c -= WCPADWIDTH(cchar, multi_width); 1065 } 1066 } 1067 for (cc = f / lpremul; cc--;) { 1068 /* 1069 * Repeat the repeated string. 1070 */ 1071 MB_METACHARINIT(); 1072 for (c = lpremul, t = premul; c > 0; ) { 1073 cl = MB_METACHARLENCONV(t, &cchar); 1074 if (!cl) 1075 break; 1076 while (cl--) 1077 *r++ = *t++; 1078 c -= WCPADWIDTH(cchar, multi_width); 1079 } 1080 } 1081 } 1082 if (preone) { 1083 /* 1084 * Now the entire unrepeated string. Don't 1085 * count the width, just dump it. This is 1086 * significant if there are special characters 1087 * in this string. It's sort of a historical 1088 * accident that this worked, but there's nothing 1089 * to stop us just dumping the thing out and assuming 1090 * the user knows what they're doing. 1091 */ 1092 while (*preone) 1093 *r++ = *preone++; 1094 } 1095 } 1096 /* Now the string being padded */ 1097 while (*str) 1098 *r++ = *str++; 1099 } 1100 } 1101 } else if (postnum) { 1102 /* 1103 * Pad on the right. 1104 */ 1105 f = postnum - ls; 1106 MB_METACHARINIT(); 1107 if (f <= 0) { 1108 /* 1109 * Original string is at least as wide as padding. 1110 * Truncate original string to width. 1111 */ 1112 for (c = postnum; c > 0; ) { 1113 cl = MB_METACHARLENCONV(str, &cchar); 1114 if (!cl) 1115 break; 1116 while (cl--) 1117 *r++ = *str++; 1118 c -= WCPADWIDTH(cchar, multi_width); 1119 } 1120 } else { 1121 /* 1122 * There's some space to fill. First copy the original 1123 * string, counting the width. Make sure we copy the 1124 * entire string. 1125 */ 1126 for (c = ls; *str; ) { 1127 cl = MB_METACHARLENCONV(str, &cchar); 1128 if (!cl) 1129 break; 1130 while (cl--) 1131 *r++ = *str++; 1132 c -= WCPADWIDTH(cchar, multi_width); 1133 } 1134 MB_METACHARINIT(); 1135 if (f <= lpostone) { 1136 if (postone) { 1137 /* 1138 * Not enough or only just enough space to fit 1139 * the unrepeated string. Truncate as necessary. 1140 */ 1141 for (c = f; c > 0; ) { 1142 cl = MB_METACHARLENCONV(postone, &cchar); 1143 if (!cl) 1144 break; 1145 while (cl--) 1146 *r++ = *postone++; 1147 c -= WCPADWIDTH(cchar, multi_width); 1148 } 1149 } 1150 } else { 1151 if (postone) { 1152 f -= lpostone; 1153 /* Copy the entire unrepeated string */ 1154 for (c = lpostone; *postone; ) { 1155 cl = MB_METACHARLENCONV(postone, &cchar); 1156 if (!cl) 1157 break; 1158 while (cl--) 1159 *r++ = *postone++; 1160 c -= WCPADWIDTH(cchar, multi_width); 1161 } 1162 } 1163 if (lpostmul) { 1164 /* Repeat the repeated string */ 1165 for (cc = f / lpostmul; cc--;) { 1166 MB_METACHARINIT(); 1167 for (c = lpostmul, t = postmul; *t; ) { 1168 cl = MB_METACHARLENCONV(t, &cchar); 1169 if (!cl) 1170 break; 1171 while (cl--) 1172 *r++ = *t++; 1173 c -= WCPADWIDTH(cchar, multi_width); 1174 } 1175 } 1176 /* 1177 * See if there's any fraction of the repeated 1178 * string needed to fill up the remaining space. 1179 */ 1180 if ((m = f % lpostmul)) { 1181 MB_METACHARINIT(); 1182 while (m > 0) { 1183 cl = MB_METACHARLENCONV(postmul, &cchar); 1184 if (!cl) 1185 break; 1186 while (cl--) 1187 *r++ = *postmul++; 1188 m -= WCPADWIDTH(cchar, multi_width); 1189 } 1190 } 1191 } 1192 } 1193 } 1194 } 1195 *r = '\0'; 1196 1197 return ret; 1198} 1199 1200 1201/* 1202 * Look for a delimited portion of a string. The first (possibly 1203 * multibyte) character at s is the delimiter. Various forms 1204 * of brackets are treated separately, as documented. 1205 * 1206 * Returns a pointer to the final delimiter. Sets *len to the 1207 * length of the final delimiter; a NULL causes *len to be set 1208 * to zero since we shouldn't advance past it. (The string is 1209 * tokenized, so a NULL is a real end of string.) 1210 */ 1211 1212/**/ 1213char * 1214get_strarg(char *s, int *lenp) 1215{ 1216 convchar_t del; 1217 int len; 1218 char ctok = 0; 1219 1220 MB_METACHARINIT(); 1221 len = MB_METACHARLENCONV(s, &del); 1222 if (!len) { 1223 *lenp = 0; 1224 return s; 1225 } 1226 1227#ifdef MULTIBYTE_SUPPORT 1228 if (del == WEOF) 1229 del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s); 1230#endif 1231 s += len; 1232 switch (del) { 1233 case ZWC('('): 1234 del = ZWC(')'); 1235 break; 1236 case '[': 1237 del = ZWC(']'); 1238 break; 1239 case '{': 1240 del = ZWC('}'); 1241 break; 1242 case '<': 1243 del = ZWC('>'); 1244 break; 1245 case Inpar: 1246 ctok = Outpar; 1247 break; 1248 case Inang: 1249 ctok = Outang; 1250 break; 1251 case Inbrace: 1252 ctok = Outbrace; 1253 break; 1254 case Inbrack: 1255 ctok = Outbrack; 1256 break; 1257 } 1258 1259 if (ctok) { 1260 /* 1261 * Looking for a matching token; we want the literal byte, 1262 * not a decoded multibyte character, so search specially. 1263 */ 1264 while (*s && *s != ctok) 1265 s++; 1266 } else { 1267 convchar_t del2; 1268 len = 0; 1269 while (*s) { 1270 len = MB_METACHARLENCONV(s, &del2); 1271#ifdef MULTIBYTE_SUPPORT 1272 if (del2 == WEOF) 1273 del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s); 1274#endif 1275 if (del == del2) 1276 break; 1277 s += len; 1278 } 1279 } 1280 1281 *lenp = len; 1282 return s; 1283} 1284 1285/* 1286 * Get an integer argument; update *s to the end of the 1287 * final delimiter. *delmatchp is set to the length of the 1288 * matched delimiter if we have matching, delimiters and there was no error in 1289 * the evaluation, else 0. 1290 */ 1291 1292/**/ 1293static int 1294get_intarg(char **s, int *delmatchp) 1295{ 1296 int arglen; 1297 char *t = get_strarg(*s, &arglen); 1298 char *p, sav; 1299 zlong ret; 1300 1301 *delmatchp = 0; 1302 if (!*t) 1303 return -1; 1304 sav = *t; 1305 *t = '\0'; 1306 p = dupstring(*s + arglen); 1307 *s = t + arglen; 1308 *t = sav; 1309 if (parsestr(p)) 1310 return -1; 1311 singsub(&p); 1312 if (errflag) 1313 return -1; 1314 ret = mathevali(p); 1315 if (errflag) 1316 return -1; 1317 if (ret < 0) 1318 ret = -ret; 1319 *delmatchp = arglen; 1320 return ret < 0 ? -ret : ret; 1321} 1322 1323/* Parsing for the (e) flag. */ 1324 1325static int 1326subst_parse_str(char **sp, int single, int err) 1327{ 1328 char *s; 1329 1330 *sp = s = dupstring(*sp); 1331 1332 if (!(err ? parsestr(s) : parsestrnoerr(s))) { 1333 if (!single) { 1334 int qt = 0; 1335 1336 for (; *s; s++) 1337 if (!qt) { 1338 if (*s == Qstring) 1339 *s = String; 1340 else if (*s == Qtick) 1341 *s = Tick; 1342 } else if (*s == Dnull) 1343 qt = !qt; 1344 } 1345 return 0; 1346 } 1347 return 1; 1348} 1349 1350/* Evaluation for (#) flag */ 1351 1352static char * 1353substevalchar(char *ptr) 1354{ 1355 zlong ires = mathevali(ptr); 1356 int len = 0; 1357 1358 if (errflag) 1359 return NULL; 1360#ifdef MULTIBYTE_SUPPORT 1361 if (isset(MULTIBYTE) && ires > 127) { 1362 /* '\\' + 'U' + 8 bytes of character + '\0' */ 1363 char buf[11]; 1364 1365 /* inefficient: should separate out \U handling from getkeystring */ 1366 sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu); 1367 ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL); 1368 } 1369 if (len == 0) 1370#endif 1371 { 1372 ptr = zhalloc(2); 1373 len = 1; 1374 sprintf(ptr, "%c", (int)ires); 1375 } 1376 return metafy(ptr, len, META_USEHEAP); 1377} 1378 1379/* 1380 * Helper function for arguments to parameter flags which 1381 * handles the (p) and (~) flags as escapes and tok_arg respectively. 1382 */ 1383 1384static char * 1385untok_and_escape(char *s, int escapes, int tok_arg) 1386{ 1387 int klen; 1388 char *dst; 1389 1390 untokenize(dst = dupstring(s)); 1391 if (escapes) { 1392 dst = getkeystring(dst, &klen, GETKEYS_SEP, NULL); 1393 dst = metafy(dst, klen, META_HREALLOC); 1394 } 1395 if (tok_arg) 1396 shtokenize(dst); 1397 return dst; 1398} 1399 1400/* 1401 * See if an argument str looks like a subscript or length following 1402 * a colon and parse it. It must be followed by a ':' or nothing. 1403 * If this succeeds, expand and return the evaulated expression if 1404 * found, else return NULL. 1405 * 1406 * We assume this is what is meant if the first character is not 1407 * an alphabetic character or '&', which signify modifiers. 1408 * 1409 * Set *endp to point to the next character following. 1410 */ 1411static char * 1412check_colon_subscript(char *str, char **endp) 1413{ 1414 int sav; 1415 1416 /* Could this be a modifier (or empty)? */ 1417 if (!*str || ialpha(*str) || *str == '&') 1418 return NULL; 1419 1420 *endp = parse_subscript(str, 0, ':'); 1421 if (!*endp) { 1422 /* No trailing colon? */ 1423 *endp = parse_subscript(str, 0, '\0'); 1424 if (!*endp) 1425 return NULL; 1426 } 1427 sav = **endp; 1428 **endp = '\0'; 1429 if (parsestr(str = dupstring(str))) 1430 return NULL; 1431 singsub(&str); 1432 remnulargs(str); 1433 untokenize(str); 1434 1435 **endp = sav; 1436 return str; 1437} 1438 1439/* parameter substitution */ 1440 1441#define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring) 1442#define isbrack(c) ((c) == '[' || (char)(c) == Inbrack) 1443 1444/* 1445 * Given a linked list l with node n, perform parameter substitution 1446 * starting from *str. Return the node with the substitutuion performed 1447 * or NULL if it failed. 1448 * 1449 * If qt is true, the `$' was quoted. TODO: why can't we just look 1450 * to see if the first character was String or Qstring? 1451 * 1452 * If ssub is true, we are being called via singsubst(), which means 1453 * the result will be a single word. TODO: can we generate the 1454 * single word at the end? TODO: if not, or maybe in any case, 1455 * can we pass down the ssub flag from prefork with the other flags 1456 * instead of pushing it into different arguments? (How exactly 1457 * to qt and ssub differ? Are both necessary, if so is there some 1458 * better way of separating the two?) 1459 */ 1460 1461/**/ 1462static LinkNode 1463paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) 1464{ 1465 char *aptr = *str, c, cc; 1466 char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); 1467 int colf; /* != 0 means we found a colon after the name */ 1468 /* 1469 * There are far too many flags. They need to be grouped 1470 * together into some structure which ties them to where they 1471 * came from. 1472 * 1473 * Some flags have a an obscure relationship to their effect which 1474 * depends on incrementing them to particular values in particular 1475 * ways. 1476 */ 1477 /* 1478 * Whether the value is an array (in aval) or not (in val). There's 1479 * a movement from storing the value in the stuff read from the 1480 * parameter (the value v) to storing them in val and aval. 1481 * However, sometimes you find v reappearing temporarily. 1482 * 1483 * The values -1 and 2 are special to isarr. The value -1 is used 1484 * to force us to keep an empty array. It's tested in the YUK chunk 1485 * (I mean the one explicitly marked as such). The value 2 1486 * indicates an array has come from splitting a scalar. We use 1487 * that to override the usual rule that in double quotes we don't 1488 * remove empty elements (so "${(s.:):-foo::bar}" produces two 1489 * words). This seems to me to be quite the wrong thing to do, 1490 * but it looks like code may be relying on it. So we require (@) 1491 * as well before we keep the empty fields (look for assignments 1492 * like "isarr = nojoin ? 1 : 2"). 1493 */ 1494 int isarr = 0; 1495 /* 1496 * This is just the setting of the option except we need to 1497 * take account of ^ and ^^. 1498 */ 1499 int plan9 = isset(RCEXPANDPARAM); 1500 /* 1501 * Likwise, but with ~ and ~~. Also, we turn it off later 1502 * on if qt is passed down. The value can go to 2 if we 1503 * use ~ to force this on. 1504 */ 1505 int globsubst = isset(GLOBSUBST); 1506 /* 1507 * Indicates ${(#)...}. 1508 */ 1509 int evalchar = 0; 1510 /* 1511 * Indicates ${#pm}, massaged by whichlen which is set by 1512 * the (c), (w), and (W) flags to indicate how we take the length. 1513 */ 1514 int getlen = 0; 1515 int whichlen = 0; 1516 /* 1517 * Indicates ${+pm}: a simple boolean for once. 1518 */ 1519 int chkset = 0; 1520 /* 1521 * Indicates we have tried to get a value in v but that was 1522 * unset. I don't quite understand why (v == NULL) isn't 1523 * good enough, but there are places where we seem to need 1524 * to second guess whether a value is a real value or not. 1525 */ 1526 int vunset = 0; 1527 /* 1528 * Indicates (t) flag, i.e. print out types. The code for 1529 * this actually isn't too horrifically inbred compared with 1530 * that for (P). 1531 */ 1532 int wantt = 0; 1533 /* 1534 * Indicates spliting a string into an array. There aren't 1535 * actually that many special cases for this --- which may 1536 * be why it doesn't work properly; we split in some cases 1537 * where we shouldn't, in particular on the multsubs for 1538 * handling embedded values for ${...=...} and the like. 1539 */ 1540 int spbreak = (pf_flags & PREFORK_SHWORDSPLIT) && 1541 !(pf_flags & PREFORK_SINGLE) && !qt; 1542 /* Scalar and array value, see isarr above */ 1543 char *val = NULL, **aval = NULL; 1544 /* 1545 * vbuf and v are both used to retrieve parameter values; this 1546 * is a kludge, we pass down vbuf and it may or may not return v. 1547 */ 1548 struct value vbuf; 1549 Value v = NULL; 1550 /* 1551 * This expressive name refers to the set of flags which 1552 * is applied to matching for #, %, / and their doubled variants: 1553 * (M), (R), (B), (E), (N), (S). 1554 */ 1555 int flags = 0; 1556 /* Value from (I) flag, used for ditto. */ 1557 int flnum = 0; 1558 /* 1559 * sortit is to be passed to strmetasort(). 1560 * indord is the (a) flag, which for consistency doesn't get 1561 * combined into sortit. 1562 */ 1563 int sortit = SORTIT_ANYOLDHOW, indord = 0; 1564 /* (u): straightforward. */ 1565 int unique = 0; 1566 /* combination of (L), (U) and (C) flags. */ 1567 int casmod = CASMOD_NONE; 1568 /* 1569 * quotemod says we are doing either (q) (positive), (Q) (negative) 1570 * or not (0). quotetype counts the q's for the first case. 1571 * quoterr is simply (X) but gets passed around a lot because the 1572 * combination (eX) needs it. 1573 */ 1574 int quotemod = 0, quotetype = QT_NONE, quoteerr = 0; 1575 /* 1576 * Various fairly straightforward modifications, except that as with so 1577 * many flags it's not easy to decide where to put them in the order. 1578 * bit 0: (D) flag. 1579 * bit 1: (V) flag. 1580 */ 1581 int mods = 0; 1582 /* 1583 * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied 1584 * spbreak, see above; fairly straighforward in use but c.f. 1585 * the comment for mods. 1586 * 1587 * This gets set to one of the LEXFLAGS_* values. 1588 */ 1589 int shsplit = 0; 1590 /* 1591 * "ssub" is true when we are called from singsub (via prefork): 1592 * it means that we must join arrays and should not split words. 1593 */ 1594 int ssub = (pf_flags & PREFORK_SINGLE); 1595 /* 1596 * The separator from (j) and (s) respectively, or (F) and (f) 1597 * respectively (hardwired to "\n" in that case). Slightly 1598 * confusingly also used for ${#pm}, thought that's at least 1599 * documented in the manual 1600 */ 1601 char *sep = NULL, *spsep = NULL; 1602 /* 1603 * Padding strings. The left and right padding strings which 1604 * are repeated, then the ones which only occur once, for 1605 * the (l) and (r) flags. 1606 */ 1607 char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL; 1608 /* Replacement string for /orig/repl and //orig/repl */ 1609 char *replstr = NULL; 1610 /* The numbers for (l) and (r) */ 1611 zlong prenum = 0, postnum = 0; 1612#ifdef MULTIBYTE_SUPPORT 1613 /* The (m) flag: use width of multibyte characters */ 1614 int multi_width = 0; 1615#endif 1616 /* 1617 * Whether the value has been copied. Optimisation: if we 1618 * are modifying an expression, we only need to copy it the 1619 * first time, and if we don't modify it we can just use the 1620 * value from the parameter or input. 1621 */ 1622 int copied = 0; 1623 /* 1624 * The (A) flag for array assignment, with consequences for 1625 * splitting and joining; (AA) gives arrasg == 2 for associative 1626 * arrays. 1627 */ 1628 int arrasg = 0; 1629 /* 1630 * The (e) flag. As we need to do extra work not quite 1631 * at the end, the effect of this is kludged in several places. 1632 */ 1633 int eval = 0; 1634 /* 1635 * The (P) flag. This interacts a bit obscurely with whether 1636 * or not we are dealing with a sub expression (subexp). 1637 */ 1638 int aspar = 0; 1639 /* 1640 * The (%) flag, c.f. mods again. 1641 */ 1642 int presc = 0; 1643 /* 1644 * The (g) flag. Process escape sequences with various GETKEY_ flags. 1645 */ 1646 int getkeys = -1; 1647 /* 1648 * The (@) flag; interacts obscurely with qt and isarr. 1649 * This is one of the things that decides whether multsub 1650 * will produce an array, but in an extremely indirect fashion. 1651 */ 1652 int nojoin = (pf_flags & PREFORK_SHWORDSPLIT) ? !(ifs && *ifs) && !qt : 0; 1653 /* 1654 * != 0 means ${...}, otherwise $... What works without braces 1655 * is largely a historical artefact (everything works with braces, 1656 * I sincerely hope). 1657 */ 1658 char inbrace = 0; 1659 /* 1660 * Use for the (k) flag. Goes down into the parameter code, 1661 * sometimes. 1662 */ 1663 char hkeys = 0; 1664 /* 1665 * Used for the (v) flag, ditto. Not quite sure why they're 1666 * separate, but the tradition seems to be that things only 1667 * get combined when that makes the result more obscure rather 1668 * than less. 1669 */ 1670 char hvals = 0; 1671 /* 1672 * Whether we had to evaluate a subexpression, i.e. an 1673 * internal ${...} or $(...) or plain $pm. We almost don't 1674 * need to remember this (which would be neater), but the (P) 1675 * flag means the subexp and !subexp code is obscurely combined, 1676 * and the argument passing to fetchvalue has another kludge. 1677 */ 1678 int subexp; 1679 /* 1680 * If we're referring to the positional parameters, then 1681 * e.g ${*:1:1} refers to $1. 1682 * This is for compatibility. 1683 */ 1684 int horrible_offset_hack = 0; 1685 1686 *s++ = '\0'; 1687 /* 1688 * Nothing to do unless the character following the $ is 1689 * something we recognise. 1690 * 1691 * Shouldn't this be a table or something? We test for all 1692 * these later on, too. 1693 */ 1694 c = *s; 1695 if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound && 1696 c != '-' && c != '!' && c != '$' && c != String && c != Qstring && 1697 c != '?' && c != Quest && 1698 c != '*' && c != Star && c != '@' && c != '{' && 1699 c != Inbrace && c != '=' && c != Equals && c != Hat && 1700 c != '^' && c != '~' && c != Tilde && c != '+') { 1701 s[-1] = '$'; 1702 *str = s; 1703 return n; 1704 } 1705 DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()"); 1706 /* 1707 * Extra processing if there is an opening brace: mostly 1708 * flags in parentheses, but also one ksh hack. 1709 */ 1710 if (c == Inbrace) { 1711 inbrace = 1; 1712 s++; 1713 /* 1714 * In ksh emulation a leading `!' is a special flag working 1715 * sort of like our (k). 1716 * TODO: this is one of very few cases tied directly to 1717 * the emulation mode rather than an option. Since ksh 1718 * doesn't have parameter flags it might be neater to 1719 * handle this with the ^, =, ~ stuff, below. 1720 */ 1721 if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) { 1722 hkeys = SCANPM_WANTKEYS; 1723 s++; 1724 } else if (c == '(' || c == Inpar) { 1725 char *t, sav; 1726 int tt = 0; 1727 zlong num; 1728 /* 1729 * The (p) flag is only remembered within 1730 * this block. It says we do print-style handling 1731 * on the values for flags, but only on those. 1732 */ 1733 int escapes = 0; 1734 /* 1735 * '~' in parentheses caused tokenization of string arg: 1736 * similar to (p). 1737 */ 1738 int tok_arg = 0; 1739 1740 for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) { 1741 int arglen; /* length of modifier argument */ 1742 int dellen; /* length of matched delimiter, 0 if not */ 1743 char *del0; /* pointer to initial delimiter */ 1744 1745 switch (c) { 1746 case ')': 1747 case Outpar: 1748 /* how can this happen? */ 1749 break; 1750 case '~': 1751 case Tilde: 1752 tok_arg = !tok_arg; 1753 break; 1754 case 'A': 1755 ++arrasg; 1756 break; 1757 case '@': 1758 nojoin = 2; /* nojoin = 2 means force */ 1759 break; 1760 case 'M': 1761 flags |= SUB_MATCH; 1762 break; 1763 case 'R': 1764 flags |= SUB_REST; 1765 break; 1766 case 'B': 1767 flags |= SUB_BIND; 1768 break; 1769 case 'E': 1770 flags |= SUB_EIND; 1771 break; 1772 case 'N': 1773 flags |= SUB_LEN; 1774 break; 1775 case 'S': 1776 flags |= SUB_SUBSTR; 1777 break; 1778 case 'I': 1779 s++; 1780 flnum = get_intarg(&s, &dellen); 1781 if (flnum < 0) 1782 goto flagerr; 1783 s--; 1784 break; 1785 1786 case 'L': 1787 casmod = CASMOD_LOWER; 1788 break; 1789 case 'U': 1790 casmod = CASMOD_UPPER; 1791 break; 1792 case 'C': 1793 casmod = CASMOD_CAPS; 1794 break; 1795 1796 case 'o': 1797 if (!sortit) 1798 sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */ 1799 break; 1800 case 'O': 1801 sortit |= SORTIT_BACKWARDS; 1802 break; 1803 case 'i': 1804 sortit |= SORTIT_IGNORING_CASE; 1805 break; 1806 case 'n': 1807 sortit |= SORTIT_NUMERICALLY; 1808 break; 1809 case 'a': 1810 sortit |= SORTIT_SOMEHOW; 1811 indord = 1; 1812 break; 1813 1814 case 'D': 1815 mods |= 1; 1816 break; 1817 case 'V': 1818 mods |= 2; 1819 break; 1820 1821 case 'q': 1822 if (quotetype == QT_DOLLARS) 1823 goto flagerr; 1824 if (s[1] == '-') { 1825 if (quotemod) 1826 goto flagerr; 1827 s++; 1828 quotemod = 1; 1829 quotetype = QT_SINGLE_OPTIONAL; 1830 } else { 1831 if (quotetype == QT_SINGLE_OPTIONAL) { 1832 /* extra q's after '-' not allowed */ 1833 goto flagerr; 1834 } 1835 quotemod++, quotetype++; 1836 } 1837 break; 1838 case 'Q': 1839 quotemod--; 1840 break; 1841 case 'X': 1842 quoteerr = 1; 1843 break; 1844 1845 case 'e': 1846 eval = 1; 1847 break; 1848 case 'P': 1849 aspar = 1; 1850 break; 1851 1852 case 'c': 1853 whichlen = 1; 1854 break; 1855 case 'w': 1856 whichlen = 2; 1857 break; 1858 case 'W': 1859 whichlen = 3; 1860 break; 1861 1862 case 'f': 1863 spsep = "\n"; 1864 break; 1865 case 'F': 1866 sep = "\n"; 1867 break; 1868 1869 case '0': 1870 spsep = zhalloc(3); 1871 spsep[0] = Meta; 1872 spsep[1] = '\0' ^ 32; 1873 spsep[2] = '\0'; 1874 break; 1875 1876 case 's': 1877 tt = 1; 1878 /* fall through */ 1879 case 'j': 1880 t = get_strarg(++s, &arglen); 1881 if (*t) { 1882 sav = *t; 1883 *t = '\0'; 1884 if (tt) 1885 spsep = untok_and_escape(s + arglen, 1886 escapes, tok_arg); 1887 else 1888 sep = untok_and_escape(s + arglen, 1889 escapes, tok_arg); 1890 *t = sav; 1891 s = t + arglen - 1; 1892 } else 1893 goto flagerr; 1894 break; 1895 1896 case 'l': 1897 tt = 1; 1898 /* fall through */ 1899 case 'r': 1900 s++; 1901 /* delimiter position */ 1902 del0 = s; 1903 num = get_intarg(&s, &dellen); 1904 if (num < 0) 1905 goto flagerr; 1906 if (tt) 1907 prenum = num; 1908 else 1909 postnum = num; 1910 /* must have same delimiter if more arguments */ 1911 if (!dellen || memcmp(del0, s, dellen)) { 1912 /* decrement since loop will increment */ 1913 s--; 1914 break; 1915 } 1916 t = get_strarg(s, &arglen); 1917 if (!*t) 1918 goto flagerr; 1919 sav = *t; 1920 *t = '\0'; 1921 if (tt) 1922 premul = untok_and_escape(s + arglen, escapes, 1923 tok_arg); 1924 else 1925 postmul = untok_and_escape(s + arglen, escapes, 1926 tok_arg); 1927 *t = sav; 1928 sav = *s; 1929 s = t + arglen; 1930 /* again, continue only if another start delimiter */ 1931 if (memcmp(del0, s, dellen)) { 1932 /* decrement since loop will increment */ 1933 s--; 1934 break; 1935 } 1936 t = get_strarg(s, &arglen); 1937 if (!*t) 1938 goto flagerr; 1939 sav = *t; 1940 *t = '\0'; 1941 if (tt) 1942 preone = untok_and_escape(s + arglen, 1943 escapes, tok_arg); 1944 else 1945 postone = untok_and_escape(s + arglen, 1946 escapes, tok_arg); 1947 *t = sav; 1948 /* -1 since loop will increment */ 1949 s = t + arglen - 1; 1950 break; 1951 1952 case 'm': 1953#ifdef MULTIBYTE_SUPPORT 1954 multi_width++; 1955#endif 1956 break; 1957 1958 case 'p': 1959 escapes = 1; 1960 break; 1961 1962 case 'k': 1963 hkeys = SCANPM_WANTKEYS; 1964 break; 1965 case 'v': 1966 hvals = SCANPM_WANTVALS; 1967 break; 1968 1969 case 't': 1970 wantt = 1; 1971 break; 1972 1973 case '%': 1974 presc++; 1975 break; 1976 1977 case 'g': 1978 t = get_strarg(++s, &arglen); 1979 if (getkeys < 0) 1980 getkeys = 0; 1981 if (*t) { 1982 sav = *t; 1983 *t = 0; 1984 while (*++s) { 1985 switch (*s) { 1986 case 'e': 1987 getkeys |= GETKEY_EMACS; 1988 break; 1989 case 'o': 1990 getkeys |= GETKEY_OCTAL_ESC; 1991 break; 1992 case 'c': 1993 getkeys |= GETKEY_CTRL; 1994 break; 1995 1996 default: 1997 *t = sav; 1998 goto flagerr; 1999 } 2000 } 2001 *t = sav; 2002 s = t + arglen - 1; 2003 } else 2004 goto flagerr; 2005 break; 2006 2007 case 'z': 2008 shsplit = LEXFLAGS_ACTIVE; 2009 break; 2010 2011 case 'Z': 2012 t = get_strarg(++s, &arglen); 2013 if (*t) { 2014 sav = *t; 2015 *t = 0; 2016 while (*++s) { 2017 switch (*s) { 2018 case 'c': 2019 /* Parse and keep comments */ 2020 shsplit |= LEXFLAGS_COMMENTS_KEEP; 2021 break; 2022 2023 case 'C': 2024 /* Parse and remove comments */ 2025 shsplit |= LEXFLAGS_COMMENTS_STRIP; 2026 break; 2027 2028 case 'n': 2029 /* Treat newlines as whitespace */ 2030 shsplit |= LEXFLAGS_NEWLINE; 2031 break; 2032 2033 default: 2034 *t = sav; 2035 goto flagerr; 2036 } 2037 } 2038 *t = sav; 2039 s = t + arglen - 1; 2040 } else 2041 goto flagerr; 2042 break; 2043 2044 case 'u': 2045 unique = 1; 2046 break; 2047 2048 case '#': 2049 case Pound: 2050 evalchar = 1; 2051 break; 2052 2053 case '_': 2054 t = get_strarg(++s, &arglen); 2055 if (*t) { 2056 sav = *t; 2057 *t = 0; 2058 while (*++s) { 2059 /* Reserved for future use */ 2060 switch (*s) { 2061 default: 2062 *t = sav; 2063 goto flagerr; 2064 } 2065 } 2066 *t = sav; 2067 s = t + arglen - 1; 2068 } else 2069 goto flagerr; 2070 break; 2071 2072 default: 2073 flagerr: 2074 zerr("error in flags"); 2075 return NULL; 2076 } 2077 } 2078 s++; 2079 } 2080 } 2081 2082 /* 2083 * premul, postmul specify the padding character to be used 2084 * multiple times with the (l) and (r) flags respectively. 2085 */ 2086 if (!premul) 2087 premul = " "; 2088 if (!postmul) 2089 postmul = " "; 2090 2091 /* 2092 * Look for special unparenthesised flags. 2093 * TODO: could make these able to appear inside parentheses, too, 2094 * i.e. ${(^)...} etc. 2095 */ 2096 for (;;) { 2097 if ((c = *s) == '^' || c == Hat) { 2098 /* RC_EXPAND_PARAM on or off (doubled )*/ 2099 if ((c = *++s) == '^' || c == Hat) { 2100 plan9 = 0; 2101 s++; 2102 } else 2103 plan9 = 1; 2104 } else if ((c = *s) == '=' || c == Equals) { 2105 /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */ 2106 if ((c = *++s) == '=' || c == Equals) { 2107 spbreak = 0; 2108 if (nojoin < 2) 2109 nojoin = 0; 2110 s++; 2111 } else { 2112 spbreak = 2; 2113 if (nojoin < 2) 2114 nojoin = !(ifs && *ifs); 2115 } 2116 } else if ((c == '#' || c == Pound) && 2117 (itype_end(s+1, IIDENT, 0) != s + 1 2118 || (cc = s[1]) == '*' || cc == Star || cc == '@' 2119 || cc == '?' || cc == Quest 2120 || cc == '$' || cc == String || cc == Qstring 2121 /* 2122 * Me And My Squiggle: 2123 * ${##} is the length of $#, but ${##foo} 2124 * is $# with a "foo" removed from the start. 2125 * If someone had defined the *@!@! language 2126 * properly in the first place we wouldn't 2127 * have this nonsense. 2128 */ 2129 || ((cc == '#' || cc == Pound) && 2130 s[2] == Outbrace) 2131 || cc == '-' || (cc == ':' && s[2] == '-') 2132 || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) { 2133 getlen = 1 + whichlen, s++; 2134 /* 2135 * Return the length of the parameter. 2136 * getlen can be more than 1 to indicate characters (2), 2137 * words ignoring multiple delimiters (3), words taking 2138 * account of multiple delimiters. delimiter is in 2139 * spsep, NULL means $IFS. 2140 */ 2141 } else if (c == '~' || c == Tilde) { 2142 /* GLOB_SUBST (forced) on or off (doubled) */ 2143 if ((c = *++s) == '~' || c == Tilde) { 2144 globsubst = 0; 2145 s++; 2146 } else 2147 globsubst = 2; 2148 } else if (c == '+') { 2149 /* 2150 * Return whether indicated parameter is set. 2151 * Try to handle this when parameter is named 2152 * by (P) (second part of test). 2153 */ 2154 if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) && 2155 (s[2] == Inbrace || s[2] == Inpar))) 2156 chkset = 1, s++; 2157 else if (!inbrace) { 2158 /* Special case for `$+' on its own --- leave unmodified */ 2159 *aptr = '$'; 2160 *str = aptr + 1; 2161 return n; 2162 } else { 2163 zerr("bad substitution"); 2164 return NULL; 2165 } 2166 } else if (inbrace && inull(*s)) { 2167 /* 2168 * Handles things like ${(f)"$(<file)"} by skipping 2169 * the double quotes. We don't need to know what was 2170 * actually there; the presence of a String or Qstring 2171 * is good enough. 2172 */ 2173 s++; 2174 } else 2175 break; 2176 } 2177 /* Don't activate special pattern characters if inside quotes */ 2178 if (qt) 2179 globsubst = 0; 2180 2181 /* 2182 * At this point, we usually expect a parameter name. 2183 * However, there may be a nested ${...} or $(...). 2184 * These say that the parameter itself is somewhere inside, 2185 * or that there isn't a parameter and we will get the values 2186 * from a command substitution itself. In either case, 2187 * the current instance of paramsubst() doesn't fetch a value, 2188 * it just operates on what gets passed up. 2189 * (The first ought to have been {...}, reserving ${...} 2190 * for substituting a value at that point, but it's too late now.) 2191 */ 2192 idbeg = s; 2193 if ((subexp = (inbrace && s[-1] && isstring(*s) && 2194 (s[1] == Inbrace || s[1] == Inpar)))) { 2195 int sav; 2196 int quoted = *s == Qstring; 2197 2198 val = s++; 2199 skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s); 2200 sav = *s; 2201 *s = 0; 2202 /* 2203 * This handles arrays. TODO: this is not the most obscure call to 2204 * multsub() (see below) but even so it would be nicer to pass down 2205 * and back the arrayness more rationally. In that case, we should 2206 * remove the aspar test and extract a value from an array, if 2207 * necessary, when we handle (P) lower down. 2208 */ 2209 if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) { 2210 /* Empty quoted string --- treat as null string, not elided */ 2211 isarr = -1; 2212 aval = (char **) hcalloc(sizeof(char *)); 2213 aspar = 0; 2214 } else if (aspar) 2215 idbeg = val; 2216 *s = sav; 2217 /* 2218 * This tests for the second double quote in an expression 2219 * like ${(f)"$(<file)"}, compare above. 2220 */ 2221 while (inull(*s)) 2222 s++; 2223 v = (Value) NULL; 2224 } else if (aspar) { 2225 /* 2226 * No subexpression, but in any case the value is going 2227 * to give us the name of a parameter on which we do 2228 * our remaining processing. In other words, this 2229 * makes ${(P)param} work like ${(P)${param}}. (Probably 2230 * better looked at, this is the basic code for ${(P)param} 2231 * and it's been kludged into the subexp code because no 2232 * opportunity for a kludge has been neglected.) 2233 */ 2234 if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) { 2235 val = idbeg = getstrvalue(v); 2236 subexp = 1; 2237 } else 2238 vunset = 1; 2239 } 2240 /* 2241 * We need to retrieve a value either if we haven't already 2242 * got it from a subexpression, or if the processing so 2243 * far has just yielded us a parameter name to be processed 2244 * with (P). 2245 */ 2246 if (!subexp || aspar) { 2247 char *ov = val; 2248 2249 /* 2250 * Second argument: decide whether to use the subexpression or 2251 * the string next on the line as the parameter name. 2252 * Third argument: decide how processing for brackets 2253 * 1 means full processing 2254 * -1 appears to mean something along the lines of 2255 * only handle single digits and don't handle brackets. 2256 * I *think* (but it's really only a guess) that this 2257 * is used by the test below the wantt handling, so 2258 * that in certain cases we handle brackets there. 2259 * 0 would apparently mean something like we know we 2260 * should have the name of a scalar and we get cross 2261 * if there's anything present which disagrees with that 2262 * but you will search fetchvalue() in vain for comments on this. 2263 * Fourth argument gives flags to do with keys, values, quoting, 2264 * assigning depending on context and parameter flags. 2265 * 2266 * This is the last mention of subexp, so presumably this 2267 * is what the code which makes sure subexp is set if aspar (the 2268 * (P) flag) is set. I *think* what's going on here is the 2269 * second argument is for both input and output: with 2270 * subexp, we only want the input effect, whereas normally 2271 * we let fetchvalue set the main string pointer s to 2272 * the end of the bit it's fetched. 2273 */ 2274 if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s), 2275 (wantt ? -1 : 2276 ((unset(KSHARRAYS) || inbrace) ? 1 : -1)), 2277 hkeys|hvals| 2278 (arrasg ? SCANPM_ASSIGNING : 0)| 2279 (qt ? SCANPM_DQUOTED : 0))) || 2280 (v->pm && (v->pm->node.flags & PM_UNSET)) || 2281 (v->flags & VALFLAG_EMPTY)) 2282 vunset = 1; 2283 2284 if (wantt) { 2285 /* 2286 * Handle the (t) flag: value now becomes the type 2287 * information for the parameter. 2288 */ 2289 if (v && v->pm && !(v->pm->node.flags & PM_UNSET)) { 2290 int f = v->pm->node.flags; 2291 2292 switch (PM_TYPE(f)) { 2293 case PM_SCALAR: val = "scalar"; break; 2294 case PM_ARRAY: val = "array"; break; 2295 case PM_INTEGER: val = "integer"; break; 2296 case PM_EFLOAT: 2297 case PM_FFLOAT: val = "float"; break; 2298 case PM_HASHED: val = "association"; break; 2299 } 2300 val = dupstring(val); 2301 if (v->pm->level) 2302 val = dyncat(val, "-local"); 2303 if (f & PM_LEFT) 2304 val = dyncat(val, "-left"); 2305 if (f & PM_RIGHT_B) 2306 val = dyncat(val, "-right_blanks"); 2307 if (f & PM_RIGHT_Z) 2308 val = dyncat(val, "-right_zeros"); 2309 if (f & PM_LOWER) 2310 val = dyncat(val, "-lower"); 2311 if (f & PM_UPPER) 2312 val = dyncat(val, "-upper"); 2313 if (f & PM_READONLY) 2314 val = dyncat(val, "-readonly"); 2315 if (f & PM_TAGGED) 2316 val = dyncat(val, "-tag"); 2317 if (f & PM_TAGGED_LOCAL) 2318 val = dyncat(val, "-tag_local"); 2319 if (f & PM_EXPORTED) 2320 val = dyncat(val, "-export"); 2321 if (f & PM_UNIQUE) 2322 val = dyncat(val, "-unique"); 2323 if (f & PM_HIDE) 2324 val = dyncat(val, "-hide"); 2325 if (f & PM_HIDE) 2326 val = dyncat(val, "-hideval"); 2327 if (f & PM_SPECIAL) 2328 val = dyncat(val, "-special"); 2329 vunset = 0; 2330 } else 2331 val = dupstring(""); 2332 2333 v = NULL; 2334 isarr = 0; 2335 } 2336 } 2337 /* 2338 * We get in here two ways; either we need to convert v into 2339 * the local value system, or we need to get rid of brackets 2340 * even if there isn't a v. 2341 */ 2342 while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) { 2343 if (!v) { 2344 /* 2345 * Index applied to non-existent parameter; we may or may 2346 * not have a value to index, however. Create a temporary 2347 * empty parameter as a trick, and index on that. This 2348 * usually happens the second time around the loop when 2349 * we've used up the original parameter value and want to 2350 * apply a subscript to what's left. However, it's also 2351 * possible it's got something to do with some of that murky 2352 * passing of -1's as the third argument to fetchvalue() to 2353 * inhibit bracket parsing at that stage. 2354 */ 2355 Param pm; 2356 char *os = s; 2357 2358 if (!isbrack(*s)) 2359 break; 2360 if (vunset) { 2361 val = dupstring(""); 2362 isarr = 0; 2363 } 2364 pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR); 2365 DPUTS(!pm, "BUG: parameter not created"); 2366 if (isarr) 2367 pm->u.arr = aval; 2368 else 2369 pm->u.str = val; 2370 v = (Value) hcalloc(sizeof *v); 2371 v->isarr = isarr; 2372 v->pm = pm; 2373 v->end = -1; 2374 if (getindex(&s, v, qt ? SCANPM_DQUOTED : 0) || s == os) 2375 break; 2376 } 2377 /* 2378 * This is where we extract a value (we know now we have 2379 * one) into the local parameters for a scalar (val) or 2380 * array (aval) value. TODO: move val and aval into 2381 * a structure with a discriminator. Hope we can make 2382 * more things array values at this point and dearrayify later. 2383 * v->isarr tells us whether the stuff from down below looks 2384 * like an array. 2385 * 2386 * I think we get to discard the existing value of isarr 2387 * here because it's already been taken account of, either 2388 * in the subexp stuff or immediately above. 2389 */ 2390 if ((isarr = v->isarr)) { 2391 /* 2392 * No way to get here with v->flags & VALFLAG_INV, so 2393 * getvaluearr() is called by getarrvalue(); needn't test 2394 * PM_HASHED. 2395 */ 2396 if (v->isarr == SCANPM_WANTINDEX) { 2397 isarr = v->isarr = 0; 2398 val = dupstring(v->pm->node.nam); 2399 } else 2400 aval = getarrvalue(v); 2401 } else { 2402 /* Value retrieved from parameter/subexpression is scalar */ 2403 if (v->pm->node.flags & PM_ARRAY) { 2404 /* 2405 * Although the value is a scalar, the parameter 2406 * itself is an array. Presumably this is due to 2407 * being quoted, or doing single substitution or something, 2408 * TODO: we're about to do some definitely stringy 2409 * stuff, so something like this bit is probably 2410 * necessary. However, I'd like to leave any 2411 * necessary joining of arrays until this point 2412 * to avoid the multsub() horror. 2413 */ 2414 int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm)); 2415 2416 if (v->start < 0) 2417 v->start += tmplen + ((v->flags & VALFLAG_INV) ? 1 : 0); 2418 if (!(v->flags & VALFLAG_INV) && 2419 (v->start >= tmplen || v->start < 0)) 2420 vunset = 1; 2421 } 2422 if (!vunset) { 2423 /* 2424 * There really is a value. Padding and case 2425 * transformations used to be handled here, but 2426 * are now handled in getstrvalue() for greater 2427 * consistency. However, we get unexpected effects 2428 * if we allow them to applied on every call, so 2429 * set the flag that allows them to be substituted. 2430 */ 2431 v->flags |= VALFLAG_SUBST; 2432 val = getstrvalue(v); 2433 } 2434 } 2435 /* See if this is a reference to the positional parameters. */ 2436 if (v && v->pm && v->pm->gsu.a == &vararray_gsu && 2437 (char ***)v->pm->u.data == &pparams) 2438 horrible_offset_hack = 1; 2439 else 2440 horrible_offset_hack = 0; 2441 /* 2442 * Finished with the original parameter and its indices; 2443 * carry on looping to see if we need to do more indexing. 2444 * This means we final get rid of v in favour of val and 2445 * aval. We could do with somehow encapsulating the bit 2446 * where we need v. 2447 */ 2448 v = NULL; 2449 if (!inbrace) 2450 break; 2451 } 2452 /* 2453 * We're now past the name or subexpression; the only things 2454 * which can happen now are a closing brace, one of the standard 2455 * parameter postmodifiers, or a history-style colon-modifier. 2456 * 2457 * Again, this duplicates tests for characters we're about to 2458 * examine properly later on. 2459 */ 2460 if (inbrace && 2461 (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' && 2462 c != '=' && c != Equals && 2463 c != '#' && c != Pound && 2464 c != '?' && c != Quest && 2465 c != '}' && c != Outbrace) { 2466 zerr("bad substitution"); 2467 return NULL; 2468 } 2469 /* 2470 * Join arrays up if we're in quotes and there isn't some 2471 * override such as (@). 2472 * TODO: hmm, if we're called as part of some recursive 2473 * substitution do we want to delay this until we get back to 2474 * the top level? Or is if there's a qt (i.e. this parameter 2475 * substitution is in quotes) always good enough? Potentially 2476 * we may be OK by now --- all potential `@'s and subexpressions 2477 * have been handled, including any [@] index which comes up 2478 * by virture of v->isarr being set to SCANPM_ISVAR_AT which 2479 * is now in isarr. 2480 * 2481 * However, if we are replacing multsub() with something that 2482 * doesn't mangle arrays, we may need to delay this step until after 2483 * the foo:- or foo:= or whatever that causes that. Note the value 2484 * (string or array) at this point is irrelevant if we are going to 2485 * be doing that. This would mean // and stuff get applied 2486 * arraywise even if quoted. That's probably wrong, so maybe 2487 * this just stays. 2488 * 2489 * We do a separate stage of dearrayification in the YUK chunk, 2490 * I think mostly because of the way we make array or scalar 2491 * values appear to the caller. 2492 */ 2493 if (isarr) { 2494 if (nojoin) 2495 isarr = -1; 2496 if (qt && !getlen && isarr > 0) { 2497 val = sepjoin(aval, sep, 1); 2498 isarr = 0; 2499 } 2500 } 2501 2502 idend = s; 2503 if (inbrace) { 2504 /* 2505 * This is to match a closing double quote in case 2506 * we didn't have a subexpression, e.g. ${"foo"}. 2507 * This form is pointless, but logically it ought to work. 2508 */ 2509 while (inull(*s)) 2510 s++; 2511 } 2512 /* 2513 * We don't yet know whether a `:' introduces a history-style 2514 * colon modifier or qualifies something like ${...:=...}. 2515 * But if we remember the colon here it's easy to check later. 2516 */ 2517 if ((colf = *s == ':')) 2518 s++; 2519 2520 2521 /* fstr is to be the text following the substitution. If we have * 2522 * braces, we look for it here, else we infer it later on. */ 2523 fstr = s; 2524 if (inbrace) { 2525 int bct; 2526 for (bct = 1; (c = *fstr); fstr++) { 2527 if (c == Inbrace) 2528 bct++; 2529 else if (c == Outbrace && !--bct) 2530 break; 2531 } 2532 2533 if (bct) { 2534 noclosebrace: 2535 zerr("closing brace expected"); 2536 return NULL; 2537 } 2538 if (c) 2539 *fstr++ = '\0'; 2540 } 2541 2542 /* Check for ${..?..} or ${..=..} or one of those. * 2543 * Only works if the name is in braces. */ 2544 2545 if (inbrace && ((c = *s) == '-' || 2546 c == '+' || 2547 c == ':' || /* i.e. a doubled colon */ 2548 c == '=' || c == Equals || 2549 c == '%' || 2550 c == '#' || c == Pound || 2551 c == '?' || c == Quest || 2552 c == '/')) { 2553 2554 /* 2555 * Default index is 1 if no (I) or (I) gave zero. But 2556 * why don't we set the default explicitly at the start 2557 * and massage any passed index where we set flnum anyway? 2558 */ 2559 if (!flnum) 2560 flnum++; 2561 if (c == '%') 2562 flags |= SUB_END; 2563 2564 /* Check for ${..%%..} or ${..##..} */ 2565 if ((c == '%' || c == '#' || c == Pound) && c == s[1]) { 2566 s++; 2567 /* we have %%, not %, or ##, not # */ 2568 flags |= SUB_LONG; 2569 } 2570 s++; 2571 if (s[-1] == '/') { 2572 char *ptr; 2573 /* 2574 * previous flags are irrelevant, except for (S) which 2575 * indicates shortest substring; else look for longest. 2576 */ 2577 flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG; 2578 if ((c = *s) == '/') { 2579 /* doubled, so replace all occurrences */ 2580 flags |= SUB_GLOBAL; 2581 c = *++s; 2582 } 2583 /* Check for anchored substitution */ 2584 if (c == '#' || c == Pound) { 2585 /* 2586 * anchor at head: this is the `normal' case in 2587 * getmatch and we only require the flag if SUB_END 2588 * is also present. 2589 */ 2590 flags |= SUB_START; 2591 s++; 2592 } 2593 if (*s == '%') { 2594 /* anchor at tail */ 2595 flags |= SUB_END; 2596 s++; 2597 } 2598 if (!(flags & (SUB_START|SUB_END))) { 2599 /* No anchor, so substring */ 2600 flags |= SUB_SUBSTR; 2601 } 2602 /* 2603 * Find the / marking the end of the search pattern. 2604 * If there isn't one, we're just going to delete that, 2605 * i.e. replace it with an empty string. 2606 * 2607 * We used to use double backslashes to quote slashes, 2608 * but actually that was buggy and using a single backslash 2609 * is easier and more obvious. 2610 */ 2611 for (ptr = s; (c = *ptr) && c != '/'; ptr++) 2612 { 2613 if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1]) 2614 { 2615 if (ptr[1] == '/') 2616 chuck(ptr); 2617 else 2618 ptr++; 2619 } 2620 } 2621 replstr = (*ptr && ptr[1]) ? ptr+1 : ""; 2622 *ptr = '\0'; 2623 } 2624 2625 /* See if this was ${...:-...}, ${...:=...}, etc. */ 2626 if (colf) 2627 flags |= SUB_ALL; 2628 /* 2629 * With no special flags, i.e. just a # or % or whatever, 2630 * the matched portion is removed and we keep the rest. 2631 * We also want the rest when we're doing a substitution. 2632 */ 2633 if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN))) 2634 flags |= SUB_REST; 2635 2636 /* 2637 * With ":" treat a value as unset if the variable is set but 2638 * - (array) contains no elements 2639 * - (scalar) contains an empty string 2640 */ 2641 if (colf && !vunset) 2642 vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]); 2643 2644 switch (s[-1]) { 2645 case '+': 2646 if (vunset) { 2647 val = dupstring(""); 2648 copied = 1; 2649 isarr = 0; 2650 break; 2651 } 2652 vunset = 1; 2653 /* Fall Through! */ 2654 case '-': 2655 if (vunset) { 2656 int split_flags; 2657 val = dupstring(s); 2658 /* If word-splitting is enabled, we ask multsub() to split 2659 * the substituted string at unquoted whitespace. Then, we 2660 * turn off spbreak so that no further splitting occurs. 2661 * This allows a construct such as ${1+"$@"} to correctly 2662 * keep its array splits, and weird constructs such as 2663 * ${str+"one two" "3 2 1" foo "$str"} to only be split 2664 * at the unquoted spaces. */ 2665 if (spbreak) { 2666 split_flags = PREFORK_SHWORDSPLIT; 2667 if (!aspar) 2668 split_flags |= PREFORK_SPLIT; 2669 } else { 2670 /* 2671 * It's not good enough not passing the flag to use 2672 * SHWORDSPLIT, because when we get to a nested 2673 * paramsubst we need to ignore isset(SHWORDSPLIT). 2674 */ 2675 split_flags = PREFORK_NOSHWORDSPLIT; 2676 } 2677 multsub(&val, split_flags, (aspar ? NULL : &aval), 2678 &isarr, NULL); 2679 copied = 1; 2680 spbreak = 0; 2681 /* Leave globsubst on if forced */ 2682 if (globsubst != 2) 2683 globsubst = 0; 2684 } 2685 break; 2686 case ':': 2687 /* this must be `::=', unconditional assignment */ 2688 if (*s != '=' && *s != Equals) 2689 goto noclosebrace; 2690 vunset = 1; 2691 s++; 2692 /* Fall through */ 2693 case '=': 2694 case Equals: 2695 if (vunset) { 2696 char sav = *idend; 2697 int l, split_flags; 2698 2699 *idend = '\0'; 2700 val = dupstring(s); 2701 if (spsep || !arrasg) { 2702 /* POSIX requires PREFORK_SINGLE semantics here, but 2703 * traditional zsh used PREFORK_NOSHWORDSPLIT. Base 2704 * behavior on caller choice of PREFORK_SHWORDSPLIT. */ 2705 multsub(&val, 2706 spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT, 2707 NULL, &isarr, NULL); 2708 } else { 2709 if (spbreak) 2710 split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT; 2711 else 2712 split_flags = PREFORK_NOSHWORDSPLIT; 2713 multsub(&val, split_flags, &aval, &isarr, NULL); 2714 spbreak = 0; 2715 } 2716 if (arrasg) { 2717 /* This is an array assignment. */ 2718 char *arr[2], **t, **a, **p; 2719 if (spsep || spbreak) { 2720 aval = sepsplit(val, spsep, 0, 1); 2721 isarr = nojoin ? 1 : 2; 2722 l = arrlen(aval); 2723 if (l && !*(aval[l-1])) 2724 l--; 2725 if (l && !**aval) 2726 l--, t = aval + 1; 2727 else 2728 t = aval; 2729 } else if (!isarr) { 2730 if (!*val && arrasg > 1) { 2731 arr[0] = NULL; 2732 l = 0; 2733 } else { 2734 arr[0] = val; 2735 arr[1] = NULL; 2736 l = 1; 2737 } 2738 t = aval = arr; 2739 } else 2740 l = arrlen(aval), t = aval; 2741 p = a = zalloc(sizeof(char *) * (l + 1)); 2742 while (l--) { 2743 untokenize(*t); 2744 *p++ = ztrdup(*t++); 2745 } 2746 *p++ = NULL; 2747 if (arrasg > 1) { 2748 Param pm = sethparam(idbeg, a); 2749 if (pm) 2750 aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals); 2751 } else 2752 setaparam(idbeg, a); 2753 } else { 2754 untokenize(val); 2755 setsparam(idbeg, ztrdup(val)); 2756 } 2757 *idend = sav; 2758 copied = 1; 2759 if (isarr) { 2760 if (nojoin) 2761 isarr = -1; 2762 if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) { 2763 val = sepjoin(aval, sep, 1); 2764 isarr = 0; 2765 } 2766 sep = spsep = NULL; 2767 spbreak = 0; 2768 } 2769 } 2770 break; 2771 case '?': 2772 case Quest: 2773 if (vunset) { 2774 if (isset(EXECOPT)) { 2775 *idend = '\0'; 2776 zerr("%s: %s", idbeg, *s ? s : "parameter not set"); 2777 if (!interact) { 2778 if (mypid == getpid()) { 2779 /* 2780 * paranoia: don't check for jobs, but there 2781 * shouldn't be any if not interactive. 2782 */ 2783 stopmsg = 1; 2784 zexit(1, 0); 2785 } else 2786 _exit(1); 2787 } 2788 } 2789 return NULL; 2790 } 2791 break; 2792 case '%': 2793 case '#': 2794 case Pound: 2795 case '/': 2796 /* This once was executed only `if (qt) ...'. But with that 2797 * patterns in a expansion resulting from a ${(e)...} aren't 2798 * tokenized even though this function thinks they are (it thinks 2799 * they are because parse_subst_str() turns Qstring tokens 2800 * into String tokens and for unquoted parameter expansions the 2801 * lexer normally does tokenize patterns inside parameter 2802 * expansions). */ 2803 { 2804 int one = noerrs, oef = errflag, haserr; 2805 2806 if (!quoteerr) 2807 noerrs = 1; 2808 haserr = parse_subst_string(s); 2809 noerrs = one; 2810 if (!quoteerr) { 2811 errflag = oef; 2812 if (haserr) 2813 shtokenize(s); 2814 } else if (haserr || errflag) { 2815 zerr("parse error in ${...%c...} substitution", s[-1]); 2816 return NULL; 2817 } 2818 } 2819 { 2820#if 0 2821 /* 2822 * This allows # and % to be at the start of 2823 * a parameter in the substitution, which is 2824 * a bit nasty, and can be done (although 2825 * less efficiently) with anchors. 2826 */ 2827 2828 char t = s[-1]; 2829 2830 singsub(&s); 2831 2832 if (t == '/' && (flags & SUB_SUBSTR)) { 2833 if ((c = *s) == '#' || c == '%') { 2834 flags &= ~SUB_SUBSTR; 2835 if (c == '%') 2836 flags |= SUB_END; 2837 s++; 2838 } else if (c == '\\') { 2839 s++; 2840 } 2841 } 2842#else 2843 singsub(&s); 2844#endif 2845 } 2846 2847 /* 2848 * Either loop over an array doing replacements or 2849 * do the replacment on a string. 2850 * 2851 * We need an untokenized value for matching. 2852 */ 2853 if (!vunset && isarr) { 2854 char **ap; 2855 if (!copied) { 2856 aval = arrdup(aval); 2857 copied = 1; 2858 } 2859 for (ap = aval; *ap; ap++) { 2860 untokenize(*ap); 2861 } 2862 getmatcharr(&aval, s, flags, flnum, replstr); 2863 } else { 2864 if (vunset) { 2865 if (unset(UNSET)) { 2866 *idend = '\0'; 2867 zerr("%s: parameter not set", idbeg); 2868 return NULL; 2869 } 2870 val = dupstring(""); 2871 } 2872 if (!copied) { 2873 val = dupstring(val); 2874 copied = 1; 2875 untokenize(val); 2876 } 2877 getmatch(&val, s, flags, flnum, replstr); 2878 } 2879 break; 2880 } 2881 } else if (inbrace && (*s == '|' || *s == Bar || 2882 *s == '*' || *s == Star)) { 2883 int intersect = (*s == '*' || *s == Star); 2884 char **compare, **ap, **apsrc; 2885 ++s; 2886 if (*itype_end(s, IIDENT, 0)) { 2887 untokenize(s); 2888 zerr("not an identifier: %s", s); 2889 return NULL; 2890 } 2891 compare = getaparam(s); 2892 if (compare) { 2893 HashTable ht = newuniqtable(arrlen(compare)+1); 2894 int present; 2895 for (ap = compare; *ap; ap++) 2896 (void)addhashnode2(ht, *ap, (HashNode) 2897 zhalloc(sizeof(struct hashnode))); 2898 if (!vunset && isarr) { 2899 if (!copied) { 2900 aval = arrdup(aval); 2901 copied = 1; 2902 } 2903 for (ap = apsrc = aval; *apsrc; apsrc++) { 2904 untokenize(*apsrc); 2905 present = (gethashnode2(ht, *apsrc) != NULL); 2906 if (intersect ? present : !present) { 2907 if (ap != apsrc) { 2908 *ap = *apsrc; 2909 } 2910 ap++; 2911 } 2912 } 2913 *ap = NULL; 2914 } else { 2915 if (vunset) { 2916 if (unset(UNSET)) { 2917 *idend = '\0'; 2918 zerr("%s: parameter not set", idbeg); 2919 deletehashtable(ht); 2920 return NULL; 2921 } 2922 val = dupstring(""); 2923 } else { 2924 present = (gethashnode2(ht, val) != NULL); 2925 if (intersect ? !present : present) 2926 val = dupstring(""); 2927 } 2928 } 2929 deletehashtable(ht); 2930 } else if (intersect) { 2931 /* 2932 * The intersection with nothing is nothing... 2933 * Seems a bit pointless complaining that the first 2934 * expression is unset here if the second is, too. 2935 */ 2936 if (!vunset) { 2937 if (isarr) { 2938 aval = mkarray(NULL); 2939 } else { 2940 val = dupstring(""); 2941 } 2942 } 2943 } 2944 if (vunset) { 2945 if (unset(UNSET)) { 2946 *idend = '\0'; 2947 zerr("%s: parameter not set", idbeg); 2948 return NULL; 2949 } 2950 val = dupstring(""); 2951 } 2952 } else { /* no ${...=...} or anything, but possible modifiers. */ 2953 /* 2954 * Handler ${+...}. TODO: strange, why do we handle this only 2955 * if there isn't a trailing modifier? Why don't we do this 2956 * e.g. when we handle the ${(t)...} flag? 2957 */ 2958 if (chkset) { 2959 val = dupstring(vunset ? "0" : "1"); 2960 isarr = 0; 2961 } else if (vunset) { 2962 if (unset(UNSET)) { 2963 *idend = '\0'; 2964 zerr("%s: parameter not set", idbeg); 2965 return NULL; 2966 } 2967 val = dupstring(""); 2968 } 2969 if (colf && inbrace) { 2970 /* 2971 * Look for ${PARAM:OFFSET} or ${PARAM:OFFSET:LENGTH}. 2972 * This must appear before modifiers. For compatibility 2973 * with bash we perform both standard string substitutions 2974 * and math eval. 2975 */ 2976 char *check_offset2; 2977 char *check_offset = check_colon_subscript(s, &check_offset2); 2978 if (check_offset) { 2979 zlong offset = mathevali(check_offset); 2980 zlong length = 0; 2981 int length_set = 0; 2982 int offset_hack_argzero = 0; 2983 if (errflag) 2984 return NULL; 2985 if ((*check_offset2 && *check_offset2 != ':')) { 2986 zerr("invalid subscript: %s", check_offset); 2987 return NULL; 2988 } 2989 if (*check_offset2) { 2990 check_offset = check_colon_subscript(check_offset2 + 1, 2991 &check_offset2); 2992 if (*check_offset2 && *check_offset2 != ':') { 2993 zerr("invalid length: %s", check_offset); 2994 return NULL; 2995 } 2996 if (check_offset) { 2997 length = mathevali(check_offset); 2998 length_set = 1; 2999 if (errflag) 3000 return NULL; 3001 } 3002 } 3003 if (isarr) { 3004 int alen, count; 3005 char **srcptr, **dstptr, **newarr; 3006 3007 if (horrible_offset_hack) { 3008 /* 3009 * As part of the 'orrible hoffset 'ack, 3010 * (what hare you? Han 'orrible hoffset 'ack, 3011 * sergeant major), if we are given a ksh/bash/POSIX 3012 * style positional parameter array which includes 3013 * offset 0, we use $0. 3014 */ 3015 if (offset == 0) { 3016 offset_hack_argzero = 1; 3017 } else if (offset > 0) { 3018 offset--; 3019 } 3020 } 3021 3022 alen = arrlen(aval); 3023 if (offset < 0) { 3024 offset += alen; 3025 if (offset < 0) 3026 offset = 0; 3027 } 3028 if (offset_hack_argzero) 3029 alen++; 3030 if (length_set) { 3031 if (length < 0) 3032 length += alen - offset; 3033 if (length < 0) { 3034 zerr("substring expression: %d < %d", 3035 (int)(length + offset), (int)offset); 3036 return NULL; 3037 } 3038 } else 3039 length = alen; 3040 if (offset > alen) 3041 offset = alen; 3042 if (offset + length > alen) 3043 length = alen - offset; 3044 count = length; 3045 srcptr = aval + offset; 3046 newarr = dstptr = (char **) 3047 zhalloc((length+1)*sizeof(char *)); 3048 if (count && offset_hack_argzero) { 3049 *dstptr++ = dupstring(argzero); 3050 count--; 3051 } 3052 while (count--) 3053 *dstptr++ = dupstring(*srcptr++); 3054 *dstptr = (char *)NULL; 3055 aval = newarr; 3056 } else { 3057 char *sptr, *eptr; 3058 int given_offset; 3059 if (offset < 0) { 3060 MB_METACHARINIT(); 3061 for (sptr = val; *sptr; ) { 3062 sptr += MB_METACHARLEN(sptr); 3063 offset++; 3064 } 3065 if (offset < 0) 3066 offset = 0; 3067 } 3068 given_offset = offset; 3069 MB_METACHARINIT(); 3070 if (length_set && length < 0) 3071 length -= offset; 3072 for (sptr = val; *sptr && offset; ) { 3073 sptr += MB_METACHARLEN(sptr); 3074 offset--; 3075 } 3076 if (length_set) { 3077 if (length < 0) { 3078 MB_METACHARINIT(); 3079 for (eptr = val; *eptr; ) { 3080 eptr += MB_METACHARLEN(eptr); 3081 length++; 3082 } 3083 if (length < 0) { 3084 zerr("substring expression: %d < %d", 3085 (int)(length + given_offset), 3086 (int)given_offset); 3087 return NULL; 3088 } 3089 } 3090 for (eptr = sptr; *eptr && length; ) { 3091 eptr += MB_METACHARLEN(eptr); 3092 length--; 3093 } 3094 val = dupstrpfx(sptr, eptr - sptr); 3095 } else { 3096 val = dupstring(sptr); 3097 } 3098 } 3099 if (!*check_offset2) { 3100 colf = 0; 3101 } else { 3102 s = check_offset2 + 1; 3103 } 3104 } 3105 } 3106 if (colf) { 3107 /* 3108 * History style colon modifiers. May need to apply 3109 * on multiple elements of an array. 3110 */ 3111 s--; 3112 if (unset(KSHARRAYS) || inbrace) { 3113 if (!isarr) 3114 modify(&val, &s); 3115 else { 3116 char *ss; 3117 char **ap = aval; 3118 char **pp = aval = (char **) hcalloc(sizeof(char *) * 3119 (arrlen(aval) + 1)); 3120 3121 while ((*pp = *ap++)) { 3122 ss = s; 3123 modify(pp++, &ss); 3124 } 3125 if (pp == aval) { 3126 char *t = ""; 3127 ss = s; 3128 modify(&t, &ss); 3129 } 3130 s = ss; 3131 } 3132 copied = 1; 3133 if (inbrace && *s) { 3134 if (*s == ':' && !imeta(s[1])) 3135 zerr("unrecognized modifier `%c'", s[1]); 3136 else 3137 zerr("unrecognized modifier"); 3138 return NULL; 3139 } 3140 } 3141 } 3142 if (!inbrace) 3143 fstr = s; 3144 } 3145 if (errflag) 3146 return NULL; 3147 if (evalchar) { 3148 int one = noerrs, oef = errflag, haserr = 0; 3149 3150 if (!quoteerr) 3151 noerrs = 1; 3152 /* 3153 * Evaluate the value numerically and output the result as 3154 * a character. 3155 */ 3156 if (isarr) { 3157 char **aval2, **avptr, **av2ptr; 3158 3159 aval2 = (char **)zhalloc((arrlen(aval)+1)*sizeof(char *)); 3160 3161 for (avptr = aval, av2ptr = aval2; *avptr; avptr++, av2ptr++) 3162 { 3163 /* When noerrs = 1, the only error is out-of-memory */ 3164 if (!(*av2ptr = substevalchar(*avptr))) { 3165 haserr = 1; 3166 break; 3167 } 3168 } 3169 *av2ptr = NULL; 3170 aval = aval2; 3171 } else { 3172 /* When noerrs = 1, the only error is out-of-memory */ 3173 if (!(val = substevalchar(val))) 3174 haserr = 1; 3175 } 3176 noerrs = one; 3177 if (!quoteerr) 3178 errflag = oef; 3179 if (haserr || errflag) 3180 return NULL; 3181 } 3182 /* 3183 * This handles taking a length with ${#foo} and variations. 3184 * TODO: again. one might naively have thought this had the 3185 * same sort of effect as the ${(t)...} flag and the ${+...} 3186 * test, although in this case we do need the value rather 3187 * the parameter, so maybe it's a bit different. 3188 */ 3189 if (getlen) { 3190 long len = 0; 3191 char buf[14]; 3192 3193 if (isarr) { 3194 char **ctr; 3195 int sl = sep ? MB_METASTRLEN(sep) : 1; 3196 3197 if (getlen == 1) 3198 for (ctr = aval; *ctr; ctr++, len++); 3199 else if (getlen == 2) { 3200 if (*aval) 3201 for (len = -sl, ctr = aval; 3202 len += sl + MB_METASTRLEN2(*ctr, multi_width), 3203 *++ctr;); 3204 } 3205 else 3206 for (ctr = aval; 3207 *ctr; 3208 len += wordcount(*ctr, spsep, getlen > 3), ctr++); 3209 } else { 3210 if (getlen < 3) 3211 len = MB_METASTRLEN2(val, multi_width); 3212 else 3213 len = wordcount(val, spsep, getlen > 3); 3214 } 3215 3216 sprintf(buf, "%ld", len); 3217 val = dupstring(buf); 3218 isarr = 0; 3219 } 3220 /* At this point we make sure that our arrayness has affected the 3221 * arrayness of the linked list. Then, we can turn our value into 3222 * a scalar for convenience sake without affecting the arrayness 3223 * of the resulting value. ## This is the YUK chunk. ## */ 3224 if (isarr) 3225 l->list.flags |= LF_ARRAY; 3226 else 3227 l->list.flags &= ~LF_ARRAY; 3228 if (isarr > 0 && !plan9 && (!aval || !aval[0])) { 3229 val = dupstring(""); 3230 isarr = 0; 3231 } else if (isarr && aval && aval[0] && !aval[1]) { 3232 /* treat a one-element array as a scalar for purposes of * 3233 * concatenation with surrounding text (some${param}thing) * 3234 * and rc_expand_param handling. Note: LF_ARRAY (above) * 3235 * propagates the true array type from nested expansions. */ 3236 val = aval[0]; 3237 isarr = 0; 3238 } 3239 /* This is where we may join arrays together, e.g. (j:,:) sets "sep", and 3240 * (afterward) may split the joined value (e.g. (s:-:) sets "spsep"). One 3241 * exception is that ${name:-word} and ${name:+word} will have already 3242 * done any requested splitting of the word value with quoting preserved. 3243 */ 3244 if (ssub || (spbreak && isarr >= 0) || spsep || sep) { 3245 if (isarr) { 3246 val = sepjoin(aval, sep, 1); 3247 isarr = 0; 3248 } 3249 if (!ssub && (spbreak || spsep)) { 3250 aval = sepsplit(val, spsep, 0, 1); 3251 if (!aval || !aval[0]) 3252 val = dupstring(""); 3253 else if (!aval[1]) 3254 val = aval[0]; 3255 else 3256 isarr = nojoin ? 1 : 2; 3257 } 3258 if (isarr) 3259 l->list.flags |= LF_ARRAY; 3260 else 3261 l->list.flags &= ~LF_ARRAY; 3262 } 3263 /* 3264 * Perform case modififications. 3265 */ 3266 if (casmod != CASMOD_NONE) { 3267 copied = 1; /* string is always modified by copy */ 3268 if (isarr) { 3269 char **ap, **ap2; 3270 3271 ap = aval; 3272 ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1)); 3273 3274 while (*ap) 3275 *ap2++ = casemodify(*ap++, casmod); 3276 *ap2++ = NULL; 3277 } else { 3278 val = casemodify(val, casmod); 3279 } 3280 } 3281 /* 3282 * Process echo- and print-style escape sequences. 3283 */ 3284 if (getkeys >= 0) { 3285 int len; 3286 3287 copied = 1; /* string is always copied */ 3288 if (isarr) { 3289 char **ap, **ap2; 3290 3291 ap = aval; 3292 aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1)); 3293 for (ap2 = aval; *ap; ap++, ap2++) { 3294 *ap2 = getkeystring(*ap, &len, getkeys, NULL); 3295 *ap2 = metafy(*ap2, len, META_USEHEAP); 3296 } 3297 *ap2++ = NULL; 3298 } else { 3299 val = getkeystring(val, &len, getkeys, NULL); 3300 val = metafy(val, len, META_USEHEAP); 3301 } 3302 } 3303 /* 3304 * Perform prompt-style modifications. 3305 */ 3306 if (presc) { 3307 int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG]; 3308 int opp = opts[PROMPTPERCENT]; 3309 3310 if (presc < 2) { 3311 opts[PROMPTPERCENT] = 1; 3312 opts[PROMPTSUBST] = opts[PROMPTBANG] = 0; 3313 } 3314 /* 3315 * TODO: It would be really quite nice to abstract the 3316 * isarr and !issarr code into a function which gets 3317 * passed a pointer to a function with the effect of 3318 * the promptexpand bit. Then we could use this for 3319 * a lot of stuff and bury val/aval/isarr inside a structure 3320 * which gets passed to it. 3321 */ 3322 if (isarr) { 3323 char **ap; 3324 3325 if (!copied) 3326 aval = arrdup(aval), copied = 1; 3327 ap = aval; 3328 for (; *ap; ap++) { 3329 char *tmps; 3330 untokenize(*ap); 3331 tmps = promptexpand(*ap, 0, NULL, NULL, NULL); 3332 *ap = dupstring(tmps); 3333 free(tmps); 3334 } 3335 } else { 3336 char *tmps; 3337 if (!copied) 3338 val = dupstring(val), copied = 1; 3339 untokenize(val); 3340 tmps = promptexpand(val, 0, NULL, NULL, NULL); 3341 val = dupstring(tmps); 3342 free(tmps); 3343 } 3344 opts[PROMPTSUBST] = ops; 3345 opts[PROMPTBANG] = opb; 3346 opts[PROMPTPERCENT] = opp; 3347 } 3348 /* 3349 * One of the possible set of quotes to apply, depending on 3350 * the repetitions of the (q) flag. 3351 */ 3352 if (quotemod) { 3353 int pre = 0, post = 0; 3354 3355 if (quotemod > 0 && quotetype > QT_BACKSLASH) { 3356 switch (quotetype) 3357 { 3358 case QT_DOLLARS: 3359 /* space for "$" */ 3360 pre = 2; 3361 post = 1; 3362 break; 3363 3364 case QT_SINGLE_OPTIONAL: 3365 /* quotes will be added for us */ 3366 break; 3367 3368 default: 3369 pre = post = 1; 3370 break; 3371 } 3372 } 3373 if (isarr) { 3374 char **ap; 3375 3376 if (!copied) 3377 aval = arrdup(aval), copied = 1; 3378 ap = aval; 3379 3380 if (quotemod > 0) { 3381 if (quotetype > QT_BACKSLASH) { 3382 int sl; 3383 char *tmp; 3384 3385 for (; *ap; ap++) { 3386 tmp = quotestring(*ap, NULL, quotetype); 3387 sl = strlen(tmp); 3388 *ap = (char *) zhalloc(pre + sl + post + 1); 3389 strcpy((*ap) + pre, tmp); 3390 if (pre) 3391 ap[0][pre - 1] = ap[0][pre + sl] = 3392 (quotetype != QT_DOUBLE ? '\'' : '"'); 3393 ap[0][pre + sl + 1] = '\0'; 3394 if (quotetype == QT_DOLLARS) 3395 ap[0][0] = '$'; 3396 } 3397 } else 3398 for (; *ap; ap++) 3399 *ap = quotestring(*ap, NULL, QT_BACKSLASH_SHOWNULL); 3400 } else { 3401 int one = noerrs, oef = errflag, haserr = 0; 3402 3403 if (!quoteerr) 3404 noerrs = 1; 3405 for (; *ap; ap++) { 3406 haserr |= parse_subst_string(*ap); 3407 remnulargs(*ap); 3408 untokenize(*ap); 3409 } 3410 noerrs = one; 3411 if (!quoteerr) 3412 errflag = oef; 3413 else if (haserr || errflag) { 3414 zerr("parse error in parameter value"); 3415 return NULL; 3416 } 3417 } 3418 } else { 3419 if (!copied) 3420 val = dupstring(val), copied = 1; 3421 if (quotemod > 0) { 3422 if (quotetype > QT_BACKSLASH) { 3423 int sl; 3424 char *tmp; 3425 tmp = quotestring(val, NULL, quotetype); 3426 sl = strlen(tmp); 3427 val = (char *) zhalloc(pre + sl + 2); 3428 strcpy(val + pre, tmp); 3429 if (pre) 3430 val[pre - 1] = val[pre + sl] = 3431 (quotetype != QT_DOUBLE ? '\'' : '"'); 3432 val[pre + sl + 1] = '\0'; 3433 if (quotetype == QT_DOLLARS) 3434 val[0] = '$'; 3435 } else 3436 val = quotestring(val, NULL, QT_BACKSLASH_SHOWNULL); 3437 } else { 3438 int one = noerrs, oef = errflag, haserr; 3439 3440 if (!quoteerr) 3441 noerrs = 1; 3442 haserr = parse_subst_string(val); 3443 noerrs = one; 3444 if (!quoteerr) 3445 errflag = oef; 3446 else if (haserr || errflag) { 3447 zerr("parse error in parameter value"); 3448 return NULL; 3449 } 3450 remnulargs(val); 3451 untokenize(val); 3452 } 3453 } 3454 } 3455 /* 3456 * Transform special characters in the string to make them 3457 * printable, or to show directories, or possibly even both. 3458 */ 3459 if (mods) { 3460 if (isarr) { 3461 char **ap; 3462 if (!copied) 3463 aval = arrdup(aval), copied = 1; 3464 for (ap = aval; *ap; ap++) { 3465 if (mods & 1) 3466 *ap = substnamedir(*ap); 3467 if (mods & 2) 3468 *ap = nicedupstring(*ap); 3469 } 3470 } else { 3471 if (!copied) 3472 val = dupstring(val), copied = 1; 3473 if (mods & 1) 3474 val = substnamedir(val); 3475 if (mods & 2) 3476 val = nicedupstring(val); 3477 } 3478 } 3479 /* 3480 * Nothing particularly to do with SH_WORD_SPLIT --- this 3481 * performs lexical splitting on a string as specified by 3482 * the (z) flag. 3483 */ 3484 if (shsplit) { 3485 LinkList list = NULL; 3486 3487 if (isarr) { 3488 char **ap; 3489 for (ap = aval; *ap; ap++) 3490 list = bufferwords(list, *ap, NULL, shsplit); 3491 isarr = 0; 3492 } else 3493 list = bufferwords(NULL, val, NULL, shsplit); 3494 3495 if (!list || !firstnode(list)) 3496 val = dupstring(""); 3497 else if (!nextnode(firstnode(list))) 3498 val = getdata(firstnode(list)); 3499 else { 3500 aval = hlinklist2array(list, 0); 3501 isarr = nojoin ? 1 : 2; 3502 l->list.flags |= LF_ARRAY; 3503 } 3504 copied = 1; 3505 } 3506 /* 3507 * TODO: hmm. At this point we have to be on our toes about 3508 * whether we're putting stuff into a line or not, i.e. 3509 * we don't want to do this from a recursive call. 3510 * Rather than passing back flags in a non-trivial way, maybe 3511 * we could decide on the basis of flags passed down to us. 3512 * 3513 * This is the ideal place to do any last-minute conversion from 3514 * array to strings. However, given all the transformations we've 3515 * already done, probably if it's going to be done it will already 3516 * have been. (I'd really like to keep everying in aval or 3517 * equivalent and only locally decide if we need to treat it 3518 * as a scalar.) 3519 */ 3520 if (isarr) { 3521 char *x; 3522 char *y; 3523 int xlen; 3524 int i; 3525 LinkNode on = n; 3526 3527 /* Handle the (u) flag; we need this before the next test */ 3528 if (unique) { 3529 if(!copied) 3530 aval = arrdup(aval); 3531 3532 i = arrlen(aval); 3533 if (i > 1) 3534 zhuniqarray(aval); 3535 } 3536 if ((!aval[0] || !aval[1]) && !plan9) { 3537 /* 3538 * Empty array or single element. Currently you only 3539 * get a single element array at this point from the 3540 * unique expansion above. but we can potentially 3541 * have other reasons. 3542 * 3543 * The following test removes the markers 3544 * from surrounding double quotes, but I don't know why 3545 * that's necessary. 3546 */ 3547 int vallen; 3548 if (aptr > (char *) getdata(n) && 3549 aptr[-1] == Dnull && *fstr == Dnull) 3550 *--aptr = '\0', fstr++; 3551 vallen = aval[0] ? strlen(aval[0]) : 0; 3552 y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1); 3553 strcpy(y, ostr); 3554 *str = y + (aptr - ostr); 3555 if (vallen) 3556 { 3557 strcpy(*str, aval[0]); 3558 *str += vallen; 3559 } 3560 strcpy(*str, fstr); 3561 setdata(n, y); 3562 return n; 3563 } 3564 /* Handle (o) and (O) and their variants */ 3565 if (sortit != SORTIT_ANYOLDHOW) { 3566 if (!copied) 3567 aval = arrdup(aval); 3568 if (indord) { 3569 if (sortit & SORTIT_BACKWARDS) { 3570 char *copy; 3571 char **end = aval + arrlen(aval) - 1, **start = aval; 3572 3573 /* reverse the array */ 3574 while (start < end) { 3575 copy = *end; 3576 *end-- = *start; 3577 *start++ = copy; 3578 } 3579 } 3580 } else { 3581 /* 3582 * HERE: we tested if the last element of the array 3583 * was not a NULL string. Why the last element? 3584 * Why didn't we expect NULL strings to work? 3585 * Was it just a clumsy way of testing whether there 3586 * was enough in the array to sort? 3587 */ 3588 strmetasort(aval, sortit, NULL); 3589 } 3590 } 3591 if (plan9) { 3592 /* Handle RC_EXPAND_PARAM */ 3593 LinkNode tn; 3594 local_list1(tl); 3595 3596 *--fstr = Marker; 3597 init_list1(tl, fstr); 3598 if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0)) 3599 return NULL; 3600 *str = aptr; 3601 tn = firstnode(&tl); 3602 while ((x = *aval++)) { 3603 if (prenum || postnum) 3604 x = dopadding(x, prenum, postnum, preone, postone, 3605 premul, postmul 3606#ifdef MULTIBYTE_SUPPORT 3607 , multi_width 3608#endif 3609 ); 3610 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) 3611 return NULL; 3612 xlen = strlen(x); 3613 for (tn = firstnode(&tl); 3614 tn && *(y = (char *) getdata(tn)) == Marker; 3615 incnode(tn)) { 3616 strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst, 3617 copied); 3618 if (qt && !*y && isarr != 2) 3619 y = dupstring(nulstring); 3620 if (plan9) 3621 setdata(n, (void *) y), plan9 = 0; 3622 else 3623 insertlinknode(l, n, (void *) y), incnode(n); 3624 } 3625 } 3626 for (; tn; incnode(tn)) { 3627 y = (char *) getdata(tn); 3628 if (*y == Marker) 3629 continue; 3630 if (qt && !*y && isarr != 2) 3631 y = dupstring(nulstring); 3632 if (plan9) 3633 setdata(n, (void *) y), plan9 = 0; 3634 else 3635 insertlinknode(l, n, (void *) y), incnode(n); 3636 } 3637 if (plan9) { 3638 uremnode(l, n); 3639 return n; 3640 } 3641 } else { 3642 /* 3643 * Not RC_EXPAND_PARAM: simply join the first and 3644 * last values. 3645 * TODO: how about removing the restriction that 3646 * aval[1] is non-NULL to promote consistency?, or 3647 * simply changing the test so that we drop into 3648 * the scalar branch, instead of tricking isarr? 3649 */ 3650 x = aval[0]; 3651 if (prenum || postnum) 3652 x = dopadding(x, prenum, postnum, preone, postone, 3653 premul, postmul 3654#ifdef MULTIBYTE_SUPPORT 3655 , multi_width 3656#endif 3657 ); 3658 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) 3659 return NULL; 3660 xlen = strlen(x); 3661 strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied); 3662 if (qt && !*y && isarr != 2) 3663 y = dupstring(nulstring); 3664 setdata(n, (void *) y); 3665 3666 i = 1; 3667 /* aval[1] is non-null here */ 3668 while (aval[i + 1]) { 3669 x = aval[i++]; 3670 if (prenum || postnum) 3671 x = dopadding(x, prenum, postnum, preone, postone, 3672 premul, postmul 3673#ifdef MULTIBYTE_SUPPORT 3674 , multi_width 3675#endif 3676 ); 3677 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) 3678 return NULL; 3679 if (qt && !*x && isarr != 2) 3680 y = dupstring(nulstring); 3681 else { 3682 y = dupstring(x); 3683 if (globsubst) 3684 shtokenize(y); 3685 } 3686 insertlinknode(l, n, (void *) y), incnode(n); 3687 } 3688 3689 x = aval[i]; 3690 if (prenum || postnum) 3691 x = dopadding(x, prenum, postnum, preone, postone, 3692 premul, postmul 3693#ifdef MULTIBYTE_SUPPORT 3694 , multi_width 3695#endif 3696 ); 3697 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) 3698 return NULL; 3699 xlen = strlen(x); 3700 *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied); 3701 if (qt && !*y && isarr != 2) 3702 y = dupstring(nulstring); 3703 insertlinknode(l, n, (void *) y), incnode(n); 3704 } 3705 if (eval) 3706 n = on; 3707 } else { 3708 /* 3709 * Scalar value. Handle last minute transformations 3710 * such as left- or right-padding and the (e) flag to 3711 * revaluate the result. 3712 */ 3713 int xlen; 3714 char *x; 3715 char *y; 3716 3717 x = val; 3718 if (!x) { 3719 /* Shouldn't have got here with a NULL string. */ 3720 DPUTS(1, "value is NULL in paramsubst"); 3721 return NULL; 3722 } 3723 if (prenum || postnum) 3724 x = dopadding(x, prenum, postnum, preone, postone, 3725 premul, postmul 3726#ifdef MULTIBYTE_SUPPORT 3727 , multi_width 3728#endif 3729 ); 3730 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr)) 3731 return NULL; 3732 xlen = strlen(x); 3733 *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied); 3734 if (qt && !*y) 3735 y = dupstring(nulstring); 3736 setdata(n, (void *) y); 3737 } 3738 if (eval) 3739 *str = (char *) getdata(n); 3740 3741 return n; 3742} 3743 3744/* 3745 * Arithmetic substitution: `a' is the string to be evaluated, `bptr' 3746 * points to the beginning of the string containing it. The tail of 3747 * the string is given by `rest'. *bptr is modified with the substituted 3748 * string. The function returns a pointer to the tail in the substituted 3749 * string. 3750 */ 3751 3752/**/ 3753static char * 3754arithsubst(char *a, char **bptr, char *rest) 3755{ 3756 char *s = *bptr, *t; 3757 char buf[BDIGBUFSIZE], *b = buf; 3758 mnumber v; 3759 3760 singsub(&a); 3761 v = matheval(a); 3762 if ((v.type & MN_FLOAT) && !outputradix) 3763 b = convfloat(v.u.d, 0, 0, NULL); 3764 else { 3765 if (v.type & MN_FLOAT) 3766 v.u.l = (zlong) v.u.d; 3767 convbase(buf, v.u.l, outputradix); 3768 } 3769 t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + 3770 strlen(rest) + 1); 3771 t--; 3772 while ((*++t = *s++)); 3773 t--; 3774 while ((*++t = *b++)); 3775 strcat(t, rest); 3776 return t; 3777} 3778 3779/**/ 3780void 3781modify(char **str, char **ptr) 3782{ 3783 char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e; 3784 char *copy, *all, *tmp, sav, sav1, *ptr1end; 3785 int gbal, wall, rec, al, nl, charlen, dellen; 3786 convchar_t del; 3787 3788 test = NULL; 3789 3790 if (**ptr == ':') 3791 *str = dupstring(*str); 3792 3793 while (**ptr == ':') { 3794 lptr = *ptr; 3795 (*ptr)++; 3796 wall = gbal = 0; 3797 rec = 1; 3798 c = '\0'; 3799 sep = NULL; 3800 3801 for (; !c && **ptr;) { 3802 switch (**ptr) { 3803 case 'a': 3804 case 'A': 3805 case 'c': 3806 case 'h': 3807 case 'r': 3808 case 'e': 3809 case 't': 3810 case 'l': 3811 case 'u': 3812 case 'q': 3813 case 'Q': 3814 c = **ptr; 3815 break; 3816 3817 case 's': 3818 c = **ptr; 3819 (*ptr)++; 3820 ptr1 = *ptr; 3821 MB_METACHARINIT(); 3822 charlen = MB_METACHARLENCONV(ptr1, &del); 3823#ifdef MULTIBYTE_SUPPORT 3824 if (del == WEOF) 3825 del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1); 3826#endif 3827 ptr1 += charlen; 3828 for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) { 3829 convchar_t del2; 3830 if ((*ptr2 == Bnull || *ptr2 == '\\') && ptr2[1]) { 3831 /* in double quotes, the backslash isn't tokenized */ 3832 if (*ptr2 == '\\') 3833 *ptr2 = Bnull; 3834 charlen = 2; 3835 continue; 3836 } 3837 charlen = MB_METACHARLENCONV(ptr2, &del2); 3838#ifdef MULTIBYTE_SUPPORT 3839 if (del2 == WEOF) 3840 del2 = (wint_t)((*ptr2 == Meta) ? 3841 ptr2[1] ^ 32 : *ptr2); 3842#endif 3843 if (del2 == del) 3844 break; 3845 } 3846 if (!*ptr2) { 3847 zerr("bad substitution"); 3848 return; 3849 } 3850 ptr1end = ptr2; 3851 ptr2 += charlen; 3852 sav1 = *ptr1end; 3853 *ptr1end = '\0'; 3854 for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) { 3855 convchar_t del3; 3856 if ((*ptr3 == Bnull || *ptr3 == '\\') && ptr3[1]) { 3857 /* in double quotes, the backslash isn't tokenized */ 3858 if (*ptr3 == '\\') 3859 *ptr3 = Bnull; 3860 charlen = 2; 3861 continue; 3862 } 3863 charlen = MB_METACHARLENCONV(ptr3, &del3); 3864#ifdef MULTIBYTE_SUPPORT 3865 if (del3 == WEOF) 3866 del3 = (wint_t)((*ptr3 == Meta) ? 3867 ptr3[1] ^ 32 : *ptr3); 3868#endif 3869 if (del3 == del) 3870 break; 3871 } 3872 sav = *ptr3; 3873 *ptr3 = '\0'; 3874 if (*ptr1) { 3875 zsfree(hsubl); 3876 hsubl = ztrdup(ptr1); 3877 } 3878 if (!hsubl) { 3879 zerr("no previous substitution"); 3880 return; 3881 } 3882 zsfree(hsubr); 3883 for (tt = hsubl; *tt; tt++) 3884 if (inull(*tt) && *tt != Bnullkeep) 3885 chuck(tt--); 3886 if (!isset(HISTSUBSTPATTERN)) 3887 untokenize(hsubl); 3888 for (tt = hsubr = ztrdup(ptr2); *tt; tt++) { 3889 if (inull(*tt) && *tt != Bnullkeep) { 3890 if (*tt == Bnull && (tt[1] == '&' || tt[1] == '\\')) { 3891 /* 3892 * The substitution will treat \& and \\ 3893 * specially. We need to leave real \'s 3894 * as the first character for this to work. 3895 */ 3896 *tt = '\\'; 3897 } else { 3898 chuck(tt--); 3899 } 3900 } 3901 } 3902 *ptr1end = sav1; 3903 *ptr3 = sav; 3904 *ptr = ptr3 - 1; 3905 if (*ptr3) { 3906 /* Final terminator is optional. */ 3907 *ptr += charlen; 3908 } 3909 break; 3910 3911 case '&': 3912 c = 's'; 3913 break; 3914 3915 case 'g': 3916 (*ptr)++; 3917 gbal = 1; 3918 break; 3919 3920 case 'w': 3921 wall = 1; 3922 (*ptr)++; 3923 break; 3924 case 'W': 3925 wall = 1; 3926 (*ptr)++; 3927 ptr1 = get_strarg(ptr2 = *ptr, &charlen); 3928 if ((sav = *ptr1)) 3929 *ptr1 = '\0'; 3930 sep = dupstring(ptr2 + charlen); 3931 if (sav) 3932 *ptr1 = sav; 3933 *ptr = ptr1 + charlen; 3934 c = '\0'; 3935 break; 3936 3937 case 'f': 3938 rec = -1; 3939 (*ptr)++; 3940 break; 3941 case 'F': 3942 (*ptr)++; 3943 rec = get_intarg(ptr, &dellen); 3944 break; 3945 default: 3946 *ptr = lptr; 3947 return; 3948 } 3949 } 3950 (*ptr)++; 3951 if (!c) { 3952 *ptr = lptr; 3953 return; 3954 } 3955 if (rec < 0) 3956 test = dupstring(*str); 3957 3958 while (rec--) { 3959 if (wall) { 3960 al = 0; 3961 all = NULL; 3962 for (t = e = *str; (tt = findword(&e, sep));) { 3963 tc = *e; 3964 *e = '\0'; 3965 if (c != 'l' && c != 'u') 3966 copy = dupstring(tt); 3967 *e = tc; 3968 switch (c) { 3969 case 'a': 3970 chabspath(©); 3971 break; 3972 case 'A': 3973 chrealpath(©); 3974 break; 3975 case 'c': 3976 { 3977 char *copy2 = equalsubstr(copy, 0, 0); 3978 if (copy2) 3979 copy = copy2; 3980 break; 3981 } 3982 case 'h': 3983 remtpath(©); 3984 break; 3985 case 'r': 3986 remtext(©); 3987 break; 3988 case 'e': 3989 rembutext(©); 3990 break; 3991 case 't': 3992 remlpaths(©); 3993 break; 3994 case 'l': 3995 copy = casemodify(tt, CASMOD_LOWER); 3996 break; 3997 case 'u': 3998 copy = casemodify(tt, CASMOD_UPPER); 3999 break; 4000 case 's': 4001 if (hsubl && hsubr) 4002 subst(©, hsubl, hsubr, gbal); 4003 break; 4004 case 'q': 4005 copy = quotestring(copy, NULL, QT_BACKSLASH_SHOWNULL); 4006 break; 4007 case 'Q': 4008 { 4009 int one = noerrs, oef = errflag; 4010 4011 noerrs = 1; 4012 parse_subst_string(copy); 4013 noerrs = one; 4014 errflag = oef; 4015 remnulargs(copy); 4016 untokenize(copy); 4017 } 4018 break; 4019 } 4020 tc = *tt; 4021 *tt = '\0'; 4022 nl = al + strlen(t) + strlen(copy); 4023 ptr1 = tmp = (char *)zhalloc(nl + 1); 4024 if (all) 4025 for (ptr2 = all; *ptr2;) 4026 *ptr1++ = *ptr2++; 4027 for (ptr2 = t; *ptr2;) 4028 *ptr1++ = *ptr2++; 4029 *tt = tc; 4030 for (ptr2 = copy; *ptr2;) 4031 *ptr1++ = *ptr2++; 4032 *ptr1 = '\0'; 4033 al = nl; 4034 all = tmp; 4035 t = e; 4036 } 4037 if (!all) 4038 *str = dupstring(""); 4039 else 4040 *str = all; 4041 4042 } else { 4043 switch (c) { 4044 case 'a': 4045 chabspath(str); 4046 break; 4047 case 'A': 4048 chrealpath(str); 4049 break; 4050 case 'c': 4051 { 4052 char *copy2 = equalsubstr(*str, 0, 0); 4053 if (copy2) 4054 *str = copy2; 4055 break; 4056 } 4057 case 'h': 4058 remtpath(str); 4059 break; 4060 case 'r': 4061 remtext(str); 4062 break; 4063 case 'e': 4064 rembutext(str); 4065 break; 4066 case 't': 4067 remlpaths(str); 4068 break; 4069 case 'l': 4070 *str = casemodify(*str, CASMOD_LOWER); 4071 break; 4072 case 'u': 4073 *str = casemodify(*str, CASMOD_UPPER); 4074 break; 4075 case 's': 4076 if (hsubl && hsubr) 4077 subst(str, hsubl, hsubr, gbal); 4078 break; 4079 case 'q': 4080 *str = quotestring(*str, NULL, QT_BACKSLASH); 4081 break; 4082 case 'Q': 4083 { 4084 int one = noerrs, oef = errflag; 4085 4086 noerrs = 1; 4087 parse_subst_string(*str); 4088 noerrs = one; 4089 errflag = oef; 4090 remnulargs(*str); 4091 untokenize(*str); 4092 } 4093 break; 4094 } 4095 } 4096 if (rec < 0) { 4097 if (!strcmp(test, *str)) 4098 rec = 0; 4099 else 4100 test = dupstring(*str); 4101 } 4102 } 4103 } 4104} 4105 4106/* get a directory stack entry */ 4107 4108/**/ 4109static char * 4110dstackent(char ch, int val) 4111{ 4112 int backwards; 4113 LinkNode end=(LinkNode)dirstack, n; 4114 4115 backwards = ch == (isset(PUSHDMINUS) ? '+' : '-'); 4116 if(!backwards && !val--) 4117 return pwd; 4118 if (backwards) 4119 for (n=lastnode(dirstack); n != end && val; val--, n=prevnode(n)); 4120 else 4121 for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n)); 4122 if (n == end) { 4123 if (backwards && !val) 4124 return pwd; 4125 if (isset(NOMATCH)) 4126 zerr("not enough directory stack entries."); 4127 return NULL; 4128 } 4129 return (char *)getdata(n); 4130} 4131