1/* 2 * lex.c - lexical analysis 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1992-1997 Paul Falstad 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Paul Falstad or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Paul Falstad and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Paul Falstad and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Paul Falstad and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "zsh.mdh" 31#include "lex.pro" 32 33/* tokens */ 34 35/**/ 36mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\"; 37 38/* parts of the current token */ 39 40/**/ 41char *zshlextext; 42/**/ 43mod_export char *tokstr; 44/**/ 45mod_export enum lextok tok; 46/**/ 47mod_export int tokfd; 48 49/* 50 * Line number at which the first character of a token was found. 51 * We always set this in gettok(), which is always called from 52 * zshlex() unless we have reached an error. So it is always 53 * valid when parsing. It is not useful during execution 54 * of the parsed structure. 55 */ 56 57/**/ 58zlong toklineno; 59 60/* lexical analyzer error flag */ 61 62/**/ 63mod_export int lexstop; 64 65/* if != 0, this is the first line of the command */ 66 67/**/ 68mod_export int isfirstln; 69 70/* if != 0, this is the first char of the command (not including white space) */ 71 72/**/ 73int isfirstch; 74 75/* flag that an alias should be expanded after expansion ending in space */ 76 77/**/ 78int inalmore; 79 80/* 81 * Don't do spelling correction. 82 * Bit 1 is only valid for the current word. It's 83 * set when we detect a lookahead that stops the word from 84 * needing correction. 85 */ 86 87/**/ 88int nocorrect; 89 90/* 91 * Cursor position and line length in zle when the line is 92 * metafied for access from the main shell. 93 */ 94 95/**/ 96mod_export int zlemetacs, zlemetall; 97 98/* inwhat says what exactly we are in * 99 * (its value is one of the IN_* things). */ 100 101/**/ 102mod_export int inwhat; 103 104/* 1 if x added to complete in a blank between words */ 105 106/**/ 107mod_export int addedx; 108 109/* wb and we hold the beginning/end position of the word we are completing. */ 110 111/**/ 112mod_export int wb, we; 113 114/* 1 if aliases should not be expanded */ 115 116/**/ 117mod_export int noaliases; 118 119/* 120 * If non-zero, we are parsing a line sent to use by the editor, or some 121 * other string that's not part of standard command input (e.g. eval is 122 * part of normal command input). 123 * 124 * Set of bits from LEXFLAGS_*. 125 * 126 * Note that although it is passed into the lexer as an input, the 127 * lexer can set it to zero after finding the word it's searching for. 128 * This only happens if the line being parsed actually does come from 129 * ZLE, and hence the bit LEXFLAGS_ZLE is set. 130 */ 131 132/**/ 133mod_export int lexflags; 134 135/**/ 136mod_export int wordbeg; 137 138/**/ 139mod_export int parbegin; 140 141/**/ 142mod_export int parend; 143 144/* don't recognize comments */ 145 146/**/ 147mod_export int nocomments; 148 149/* text of punctuation tokens */ 150 151/**/ 152mod_export char *tokstrings[WHILE + 1] = { 153 NULL, /* NULLTOK 0 */ 154 ";", /* SEPER */ 155 "\\n", /* NEWLIN */ 156 ";", /* SEMI */ 157 ";;", /* DSEMI */ 158 "&", /* AMPER 5 */ 159 "(", /* INPAR */ 160 ")", /* OUTPAR */ 161 "||", /* DBAR */ 162 "&&", /* DAMPER */ 163 ">", /* OUTANG 10 */ 164 ">|", /* OUTANGBANG */ 165 ">>", /* DOUTANG */ 166 ">>|", /* DOUTANGBANG */ 167 "<", /* INANG */ 168 "<>", /* INOUTANG 15 */ 169 "<<", /* DINANG */ 170 "<<-", /* DINANGDASH */ 171 "<&", /* INANGAMP */ 172 ">&", /* OUTANGAMP */ 173 "&>", /* AMPOUTANG 20 */ 174 "&>|", /* OUTANGAMPBANG */ 175 ">>&", /* DOUTANGAMP */ 176 ">>&|", /* DOUTANGAMPBANG */ 177 "<<<", /* TRINANG */ 178 "|", /* BAR 25 */ 179 "|&", /* BARAMP */ 180 "()", /* INOUTPAR */ 181 "((", /* DINPAR */ 182 "))", /* DOUTPAR */ 183 "&|", /* AMPERBANG 30 */ 184 ";&", /* SEMIAMP */ 185 ";|", /* SEMIBAR */ 186}; 187 188/* lexical state */ 189 190static int dbparens; 191static int len = 0, bsiz = 256; 192static char *bptr; 193 194struct lexstack { 195 struct lexstack *next; 196 197 int incmdpos; 198 int incond; 199 int incasepat; 200 int dbparens; 201 int isfirstln; 202 int isfirstch; 203 int histactive; 204 int histdone; 205 int lexflags; 206 int stophist; 207 int hlinesz; 208 char *hline; 209 char *hptr; 210 enum lextok tok; 211 int isnewlin; 212 char *tokstr; 213 char *zshlextext; 214 char *bptr; 215 int bsiz; 216 int len; 217 short *chwords; 218 int chwordlen; 219 int chwordpos; 220 int hwgetword; 221 int lexstop; 222 struct heredocs *hdocs; 223 int (*hgetc) _((void)); 224 void (*hungetc) _((int)); 225 void (*hwaddc) _((int)); 226 void (*hwbegin) _((int)); 227 void (*hwend) _((void)); 228 void (*addtoline) _((int)); 229 230 int eclen, ecused, ecnpats; 231 Wordcode ecbuf; 232 Eccstr ecstrs; 233 int ecsoffs, ecssub, ecnfunc; 234 235 unsigned char *cstack; 236 int csp; 237 zlong toklineno; 238}; 239 240static struct lexstack *lstack = NULL; 241 242/* save the lexical state */ 243 244/* is this a hack or what? */ 245 246/**/ 247mod_export void 248lexsave(void) 249{ 250 struct lexstack *ls; 251 252 ls = (struct lexstack *)malloc(sizeof(struct lexstack)); 253 254 ls->incmdpos = incmdpos; 255 ls->incond = incond; 256 ls->incasepat = incasepat; 257 ls->dbparens = dbparens; 258 ls->isfirstln = isfirstln; 259 ls->isfirstch = isfirstch; 260 ls->histactive = histactive; 261 ls->histdone = histdone; 262 ls->lexflags = lexflags; 263 ls->stophist = stophist; 264 stophist = 0; 265 if (!lstack) { 266 /* top level, make this version visible to ZLE */ 267 zle_chline = chline; 268 /* ensure line stored is NULL-terminated */ 269 if (hptr) 270 *hptr = '\0'; 271 } 272 ls->hline = chline; 273 chline = NULL; 274 ls->hptr = hptr; 275 hptr = NULL; 276 ls->hlinesz = hlinesz; 277 ls->cstack = cmdstack; 278 ls->csp = cmdsp; 279 cmdstack = (unsigned char *)zalloc(CMDSTACKSZ); 280 ls->tok = tok; 281 ls->isnewlin = isnewlin; 282 ls->tokstr = tokstr; 283 ls->zshlextext = zshlextext; 284 ls->bptr = bptr; 285 tokstr = zshlextext = bptr = NULL; 286 ls->bsiz = bsiz; 287 bsiz = 256; 288 ls->len = len; 289 ls->chwords = chwords; 290 ls->chwordlen = chwordlen; 291 ls->chwordpos = chwordpos; 292 ls->hwgetword = hwgetword; 293 ls->lexstop = lexstop; 294 ls->hdocs = hdocs; 295 ls->hgetc = hgetc; 296 ls->hungetc = hungetc; 297 ls->hwaddc = hwaddc; 298 ls->hwbegin = hwbegin; 299 ls->hwend = hwend; 300 ls->addtoline = addtoline; 301 ls->eclen = eclen; 302 ls->ecused = ecused; 303 ls->ecnpats = ecnpats; 304 ls->ecbuf = ecbuf; 305 ls->ecstrs = ecstrs; 306 ls->ecsoffs = ecsoffs; 307 ls->ecssub = ecssub; 308 ls->ecnfunc = ecnfunc; 309 ls->toklineno = toklineno; 310 cmdsp = 0; 311 inredir = 0; 312 hdocs = NULL; 313 histactive = 0; 314 ecbuf = NULL; 315 316 ls->next = lstack; 317 lstack = ls; 318} 319 320/* restore lexical state */ 321 322/**/ 323mod_export void 324lexrestore(void) 325{ 326 struct lexstack *ln; 327 328 DPUTS(!lstack, "BUG: lexrestore() without lexsave()"); 329 incmdpos = lstack->incmdpos; 330 incond = lstack->incond; 331 incasepat = lstack->incasepat; 332 dbparens = lstack->dbparens; 333 isfirstln = lstack->isfirstln; 334 isfirstch = lstack->isfirstch; 335 histactive = lstack->histactive; 336 histdone = lstack->histdone; 337 lexflags = lstack->lexflags; 338 stophist = lstack->stophist; 339 chline = lstack->hline; 340 hptr = lstack->hptr; 341 if (cmdstack) 342 free(cmdstack); 343 cmdstack = lstack->cstack; 344 cmdsp = lstack->csp; 345 tok = lstack->tok; 346 isnewlin = lstack->isnewlin; 347 tokstr = lstack->tokstr; 348 zshlextext = lstack->zshlextext; 349 bptr = lstack->bptr; 350 bsiz = lstack->bsiz; 351 len = lstack->len; 352 chwords = lstack->chwords; 353 chwordlen = lstack->chwordlen; 354 chwordpos = lstack->chwordpos; 355 hwgetword = lstack->hwgetword; 356 lexstop = lstack->lexstop; 357 hdocs = lstack->hdocs; 358 hgetc = lstack->hgetc; 359 hungetc = lstack->hungetc; 360 hwaddc = lstack->hwaddc; 361 hwbegin = lstack->hwbegin; 362 hwend = lstack->hwend; 363 addtoline = lstack->addtoline; 364 if (ecbuf) 365 zfree(ecbuf, eclen); 366 eclen = lstack->eclen; 367 ecused = lstack->ecused; 368 ecnpats = lstack->ecnpats; 369 ecbuf = lstack->ecbuf; 370 ecstrs = lstack->ecstrs; 371 ecsoffs = lstack->ecsoffs; 372 ecssub = lstack->ecssub; 373 ecnfunc = lstack->ecnfunc; 374 hlinesz = lstack->hlinesz; 375 toklineno = lstack->toklineno; 376 errflag = 0; 377 378 ln = lstack->next; 379 if (!ln) { 380 /* Back to top level: don't need special ZLE value */ 381 DPUTS(chline != zle_chline, "BUG: Ouch, wrong chline for ZLE"); 382 zle_chline = NULL; 383 } 384 free(lstack); 385 lstack = ln; 386} 387 388/**/ 389void 390zshlex(void) 391{ 392 if (tok == LEXERR) 393 return; 394 do 395 tok = gettok(); 396 while (tok != ENDINPUT && exalias()); 397 nocorrect &= 1; 398 if (tok == NEWLIN || tok == ENDINPUT) { 399 while (hdocs) { 400 struct heredocs *next = hdocs->next; 401 char *doc, *munged_term; 402 403 hwbegin(0); 404 cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD); 405 munged_term = dupstring(hdocs->str); 406 STOPHIST 407 doc = gethere(&munged_term, hdocs->type); 408 ALLOWHIST 409 cmdpop(); 410 hwend(); 411 if (!doc) { 412 zerr("here document too large"); 413 while (hdocs) { 414 next = hdocs->next; 415 zfree(hdocs, sizeof(struct heredocs)); 416 hdocs = next; 417 } 418 tok = LEXERR; 419 break; 420 } 421 setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str, 422 munged_term); 423 zfree(hdocs, sizeof(struct heredocs)); 424 hdocs = next; 425 } 426 } 427 if (tok != NEWLIN) 428 isnewlin = 0; 429 else 430 isnewlin = (inbufct) ? -1 : 1; 431 if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE))) 432 tok = SEPER; 433} 434 435/**/ 436mod_export void 437ctxtlex(void) 438{ 439 static int oldpos; 440 441 zshlex(); 442 switch (tok) { 443 case SEPER: 444 case NEWLIN: 445 case SEMI: 446 case DSEMI: 447 case SEMIAMP: 448 case SEMIBAR: 449 case AMPER: 450 case AMPERBANG: 451 case INPAR: 452 case INBRACE: 453 case DBAR: 454 case DAMPER: 455 case BAR: 456 case BARAMP: 457 case INOUTPAR: 458 case DOLOOP: 459 case THEN: 460 case ELIF: 461 case ELSE: 462 case DOUTBRACK: 463 incmdpos = 1; 464 break; 465 case STRING: 466 /* case ENVSTRING: */ 467 case ENVARRAY: 468 case OUTPAR: 469 case CASE: 470 case DINBRACK: 471 incmdpos = 0; 472 break; 473 474 default: 475 /* nothing to do, keep compiler happy */ 476 break; 477 } 478 if (tok != DINPAR) 479 infor = tok == FOR ? 2 : 0; 480 if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) { 481 inredir = 1; 482 oldpos = incmdpos; 483 incmdpos = 0; 484 } else if (inredir) { 485 incmdpos = oldpos; 486 inredir = 0; 487 } 488} 489 490#define LX1_BKSLASH 0 491#define LX1_COMMENT 1 492#define LX1_NEWLIN 2 493#define LX1_SEMI 3 494#define LX1_AMPER 5 495#define LX1_BAR 6 496#define LX1_INPAR 7 497#define LX1_OUTPAR 8 498#define LX1_INANG 13 499#define LX1_OUTANG 14 500#define LX1_OTHER 15 501 502#define LX2_BREAK 0 503#define LX2_OUTPAR 1 504#define LX2_BAR 2 505#define LX2_STRING 3 506#define LX2_INBRACK 4 507#define LX2_OUTBRACK 5 508#define LX2_TILDE 6 509#define LX2_INPAR 7 510#define LX2_INBRACE 8 511#define LX2_OUTBRACE 9 512#define LX2_OUTANG 10 513#define LX2_INANG 11 514#define LX2_EQUALS 12 515#define LX2_BKSLASH 13 516#define LX2_QUOTE 14 517#define LX2_DQUOTE 15 518#define LX2_BQUOTE 16 519#define LX2_COMMA 17 520#define LX2_OTHER 18 521#define LX2_META 19 522 523static unsigned char lexact1[256], lexact2[256], lextok2[256]; 524 525/**/ 526void 527initlextabs(void) 528{ 529 int t0; 530 static char *lx1 = "\\q\n;!&|(){}[]<>"; 531 static char *lx2 = ";)|$[]~({}><=\\\'\"`,"; 532 533 for (t0 = 0; t0 != 256; t0++) { 534 lexact1[t0] = LX1_OTHER; 535 lexact2[t0] = LX2_OTHER; 536 lextok2[t0] = t0; 537 } 538 for (t0 = 0; lx1[t0]; t0++) 539 lexact1[(int)lx1[t0]] = t0; 540 for (t0 = 0; lx2[t0]; t0++) 541 lexact2[(int)lx2[t0]] = t0; 542 lexact2['&'] = LX2_BREAK; 543 lexact2[STOUC(Meta)] = LX2_META; 544 lextok2['*'] = Star; 545 lextok2['?'] = Quest; 546 lextok2['{'] = Inbrace; 547 lextok2['['] = Inbrack; 548 lextok2['$'] = String; 549 lextok2['~'] = Tilde; 550 lextok2['#'] = Pound; 551 lextok2['^'] = Hat; 552} 553 554/* initialize lexical state */ 555 556/**/ 557void 558lexinit(void) 559{ 560 incond = incasepat = nocorrect = 561 infor = dbparens = lexstop = 0; 562 incmdpos = 1; 563 tok = ENDINPUT; 564} 565 566/* add a char to the string buffer */ 567 568/**/ 569void 570add(int c) 571{ 572 *bptr++ = c; 573 if (bsiz == ++len) { 574 int newbsiz = bsiz * 2; 575 576 if (newbsiz > inbufct && inbufct > bsiz) 577 newbsiz = inbufct; 578 579 bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz)); 580 /* len == bsiz, so bptr is at the start of newly allocated memory */ 581 memset(bptr, 0, newbsiz - bsiz); 582 bsiz = newbsiz; 583 } 584} 585 586#define SETPARBEGIN { \ 587 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ 588 zlemetacs >= zlemetall+1-inbufct) \ 589 parbegin = inbufct; \ 590 } 591#define SETPAREND { \ 592 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ 593 parbegin != -1 && parend == -1) { \ 594 if (zlemetacs >= zlemetall + 1 - inbufct) \ 595 parbegin = -1; \ 596 else \ 597 parend = inbufct; \ 598 } \ 599 } 600 601/* 602 * Return 1 for math, 0 for a command, 2 for an error. If it couldn't be 603 * parsed as math, but there was no gross error, it's a command. 604 */ 605 606static int 607cmd_or_math(int cs_type) 608{ 609 int oldlen = len; 610 int c; 611 612 cmdpush(cs_type); 613 c = dquote_parse(')', 0); 614 cmdpop(); 615 *bptr = '\0'; 616 if (!c) { 617 /* Successfully parsed, see if it was math */ 618 c = hgetc(); 619 if (c == ')') 620 return 1; /* yes */ 621 hungetc(c); 622 lexstop = 0; 623 c = ')'; 624 } else if (lexstop) { 625 /* we haven't got anything to unget */ 626 return 2; 627 } 628 /* else unsuccessful: unget the whole thing */ 629 hungetc(c); 630 lexstop = 0; 631 while (len > oldlen) { 632 len--; 633 hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr); 634 } 635 hungetc('('); 636 return 0; 637} 638 639 640/* 641 * Parse either a $(( ... )) or a $(...) 642 * Return 0 on success, 1 on failure. 643 */ 644static int 645cmd_or_math_sub(void) 646{ 647 int c = hgetc(), ret; 648 649 if (c == '(') { 650 add(Inpar); 651 add('('); 652 if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) { 653 add(')'); 654 return 0; 655 } 656 if (ret == 2) 657 return 1; 658 bptr -= 2; 659 len -= 2; 660 } else { 661 hungetc(c); 662 lexstop = 0; 663 } 664 return skipcomm(); 665} 666 667/* Check whether we're looking at valid numeric globbing syntax * 668 * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". * 669 * Leaves the input in the same place, returning 0 or 1. */ 670 671/**/ 672static int 673isnumglob(void) 674{ 675 int c, ec = '-', ret = 0; 676 int tbs = 256, n = 0; 677 char *tbuf = (char *)zalloc(tbs); 678 679 while(1) { 680 c = hgetc(); 681 if(lexstop) { 682 lexstop = 0; 683 break; 684 } 685 tbuf[n++] = c; 686 if(!idigit(c)) { 687 if(c != ec) 688 break; 689 if(ec == '>') { 690 ret = 1; 691 break; 692 } 693 ec = '>'; 694 } 695 if(n == tbs) 696 tbuf = (char *)realloc(tbuf, tbs *= 2); 697 } 698 while(n--) 699 hungetc(tbuf[n]); 700 zfree(tbuf, tbs); 701 return ret; 702} 703 704/**/ 705static enum lextok 706gettok(void) 707{ 708 int c, d; 709 int peekfd = -1; 710 enum lextok peek; 711 712 beginning: 713 tokstr = NULL; 714 while (iblank(c = hgetc()) && !lexstop); 715 toklineno = lineno; 716 if (lexstop) 717 return (errflag) ? LEXERR : ENDINPUT; 718 isfirstln = 0; 719 wordbeg = inbufct - (qbang && c == bangchar); 720 hwbegin(-1-(qbang && c == bangchar)); 721 /* word includes the last character read and possibly \ before ! */ 722 if (dbparens) { 723 len = 0; 724 bptr = tokstr = (char *) hcalloc(bsiz = 32); 725 hungetc(c); 726 cmdpush(CS_MATH); 727 c = dquote_parse(infor ? ';' : ')', 0); 728 cmdpop(); 729 *bptr = '\0'; 730 if (!c && infor) { 731 infor--; 732 return DINPAR; 733 } 734 if (c || (c = hgetc()) != ')') { 735 hungetc(c); 736 return LEXERR; 737 } 738 dbparens = 0; 739 return DOUTPAR; 740 } else if (idigit(c)) { /* handle 1< foo */ 741 d = hgetc(); 742 if(d == '&') { 743 d = hgetc(); 744 if(d == '>') { 745 peekfd = c - '0'; 746 hungetc('>'); 747 c = '&'; 748 } else { 749 hungetc(d); 750 lexstop = 0; 751 hungetc('&'); 752 } 753 } else if (d == '>' || d == '<') { 754 peekfd = c - '0'; 755 c = d; 756 } else { 757 hungetc(d); 758 lexstop = 0; 759 } 760 } 761 762 /* chars in initial position in word */ 763 764 /* 765 * Handle comments. There are some special cases when this 766 * is not normal command input: lexflags implies we are examining 767 * a line lexically without it being used for normal command input. 768 */ 769 if (c == hashchar && !nocomments && 770 (isset(INTERACTIVECOMMENTS) || 771 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding && 772 (!interact || unset(SHINSTDIN) || strin)))) { 773 /* History is handled here to prevent extra * 774 * newlines being inserted into the history. */ 775 776 if (lexflags & LEXFLAGS_COMMENTS_KEEP) { 777 len = 0; 778 bptr = tokstr = (char *)hcalloc(bsiz = 32); 779 add(c); 780 } 781 hwend(); 782 while ((c = ingetc()) != '\n' && !lexstop) { 783 hwaddc(c); 784 addtoline(c); 785 if (lexflags & LEXFLAGS_COMMENTS_KEEP) 786 add(c); 787 } 788 789 if (errflag) 790 peek = LEXERR; 791 else { 792 if (lexflags & LEXFLAGS_COMMENTS_KEEP) { 793 *bptr = '\0'; 794 if (!lexstop) 795 hungetc(c); 796 peek = STRING; 797 } else { 798 hwend(); 799 hwbegin(0); 800 hwaddc('\n'); 801 addtoline('\n'); 802 /* 803 * If splitting a line and removing comments, 804 * we don't want a newline token since it's 805 * treated specially. 806 */ 807 if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop) 808 peek = ENDINPUT; 809 else 810 peek = NEWLIN; 811 } 812 } 813 return peek; 814 } 815 switch (lexact1[STOUC(c)]) { 816 case LX1_BKSLASH: 817 d = hgetc(); 818 if (d == '\n') 819 goto beginning; 820 hungetc(d); 821 lexstop = 0; 822 break; 823 case LX1_NEWLIN: 824 return NEWLIN; 825 case LX1_SEMI: 826 d = hgetc(); 827 if(d == ';') 828 return DSEMI; 829 else if(d == '&') 830 return SEMIAMP; 831 else if (d == '|') 832 return SEMIBAR; 833 hungetc(d); 834 lexstop = 0; 835 return SEMI; 836 case LX1_AMPER: 837 d = hgetc(); 838 if (d == '&') 839 return DAMPER; 840 else if (d == '!' || d == '|') 841 return AMPERBANG; 842 else if (d == '>') { 843 tokfd = peekfd; 844 d = hgetc(); 845 if (d == '!' || d == '|') 846 return OUTANGAMPBANG; 847 else if (d == '>') { 848 d = hgetc(); 849 if (d == '!' || d == '|') 850 return DOUTANGAMPBANG; 851 hungetc(d); 852 lexstop = 0; 853 return DOUTANGAMP; 854 } 855 hungetc(d); 856 lexstop = 0; 857 return AMPOUTANG; 858 } 859 hungetc(d); 860 lexstop = 0; 861 return AMPER; 862 case LX1_BAR: 863 d = hgetc(); 864 if (d == '|') 865 return DBAR; 866 else if (d == '&') 867 return BARAMP; 868 hungetc(d); 869 lexstop = 0; 870 return BAR; 871 case LX1_INPAR: 872 d = hgetc(); 873 if (d == '(') { 874 if (infor) { 875 dbparens = 1; 876 return DINPAR; 877 } 878 if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) { 879 len = 0; 880 bptr = tokstr = (char *) hcalloc(bsiz = 32); 881 switch (cmd_or_math(CS_MATH)) { 882 case 1: 883 return DINPAR; 884 885 case 0: 886 /* 887 * Not math, so we don't return the contents 888 * as a string in this case. 889 */ 890 tokstr = NULL; 891 return INPAR; 892 893 default: 894 return LEXERR; 895 } 896 } 897 } else if (d == ')') 898 return INOUTPAR; 899 hungetc(d); 900 lexstop = 0; 901 if (!(incond == 1 || incmdpos)) 902 break; 903 return INPAR; 904 case LX1_OUTPAR: 905 return OUTPAR; 906 case LX1_INANG: 907 d = hgetc(); 908 if (d == '(') { 909 hungetc(d); 910 lexstop = 0; 911 unpeekfd: 912 if(peekfd != -1) { 913 hungetc(c); 914 c = '0' + peekfd; 915 } 916 break; 917 } 918 if (d == '>') { 919 peek = INOUTANG; 920 } else if (d == '<') { 921 int e = hgetc(); 922 923 if (e == '(') { 924 hungetc(e); 925 hungetc(d); 926 peek = INANG; 927 } else if (e == '<') 928 peek = TRINANG; 929 else if (e == '-') 930 peek = DINANGDASH; 931 else { 932 hungetc(e); 933 lexstop = 0; 934 peek = DINANG; 935 } 936 } else if (d == '&') { 937 peek = INANGAMP; 938 } else { 939 hungetc(d); 940 if(isnumglob()) 941 goto unpeekfd; 942 peek = INANG; 943 } 944 tokfd = peekfd; 945 return peek; 946 case LX1_OUTANG: 947 d = hgetc(); 948 if (d == '(') { 949 hungetc(d); 950 goto unpeekfd; 951 } else if (d == '&') { 952 d = hgetc(); 953 if (d == '!' || d == '|') 954 peek = OUTANGAMPBANG; 955 else { 956 hungetc(d); 957 lexstop = 0; 958 peek = OUTANGAMP; 959 } 960 } else if (d == '!' || d == '|') 961 peek = OUTANGBANG; 962 else if (d == '>') { 963 d = hgetc(); 964 if (d == '&') { 965 d = hgetc(); 966 if (d == '!' || d == '|') 967 peek = DOUTANGAMPBANG; 968 else { 969 hungetc(d); 970 lexstop = 0; 971 peek = DOUTANGAMP; 972 } 973 } else if (d == '!' || d == '|') 974 peek = DOUTANGBANG; 975 else if (d == '(') { 976 hungetc(d); 977 hungetc('>'); 978 peek = OUTANG; 979 } else { 980 hungetc(d); 981 lexstop = 0; 982 peek = DOUTANG; 983 if (isset(HISTALLOWCLOBBER)) 984 hwaddc('|'); 985 } 986 } else { 987 hungetc(d); 988 lexstop = 0; 989 peek = OUTANG; 990 if (!incond && isset(HISTALLOWCLOBBER)) 991 hwaddc('|'); 992 } 993 tokfd = peekfd; 994 return peek; 995 } 996 997 /* we've started a string, now get the * 998 * rest of it, performing tokenization */ 999 return gettokstr(c, 0); 1000} 1001 1002/* 1003 * Get the remains of a token string. This has two uses. 1004 * When called from gettok(), with sub = 0, we have already identified 1005 * any interesting initial character and want to get the rest of 1006 * what we now know is a string. However, the string may still include 1007 * metacharacters and potentially substitutions. 1008 * 1009 * When called from parse_subst_string() with sub = 1, we are not 1010 * fully parsing a command line, merely tokenizing a string. 1011 * In this case we always add characters to the parsed string 1012 * unless there is a parse error. 1013 */ 1014 1015/**/ 1016static enum lextok 1017gettokstr(int c, int sub) 1018{ 1019 int bct = 0, pct = 0, brct = 0, fdpar = 0; 1020 int intpos = 1, in_brace_param = 0; 1021 int inquote, unmatched = 0; 1022 enum lextok peek; 1023#ifdef DEBUG 1024 int ocmdsp = cmdsp; 1025#endif 1026 1027 peek = STRING; 1028 if (!sub) { 1029 len = 0; 1030 bptr = tokstr = (char *) hcalloc(bsiz = 32); 1031 } 1032 for (;;) { 1033 int act; 1034 int e; 1035 int inbl = inblank(c); 1036 1037 if (fdpar && !inbl && c != ')') 1038 fdpar = 0; 1039 1040 if (inbl && !in_brace_param && !pct) 1041 act = LX2_BREAK; 1042 else { 1043 act = lexact2[STOUC(c)]; 1044 c = lextok2[STOUC(c)]; 1045 } 1046 switch (act) { 1047 case LX2_BREAK: 1048 if (!in_brace_param && !sub) 1049 goto brk; 1050 break; 1051 case LX2_META: 1052 c = hgetc(); 1053#ifdef DEBUG 1054 if (lexstop) { 1055 fputs("BUG: input terminated by Meta\n", stderr); 1056 fflush(stderr); 1057 goto brk; 1058 } 1059#endif 1060 add(Meta); 1061 break; 1062 case LX2_OUTPAR: 1063 if (fdpar) { 1064 /* this is a single word `( )', treat as INOUTPAR */ 1065 add(c); 1066 *bptr = '\0'; 1067 return INOUTPAR; 1068 } 1069 if ((sub || in_brace_param) && isset(SHGLOB)) 1070 break; 1071 if (!in_brace_param && !pct--) { 1072 if (sub) { 1073 pct = 0; 1074 break; 1075 } else 1076 goto brk; 1077 } 1078 c = Outpar; 1079 break; 1080 case LX2_BAR: 1081 if (!pct && !in_brace_param) { 1082 if (sub) 1083 break; 1084 else 1085 goto brk; 1086 } 1087 if (unset(SHGLOB) || (!sub && !in_brace_param)) 1088 c = Bar; 1089 break; 1090 case LX2_STRING: 1091 e = hgetc(); 1092 if (e == '[') { 1093 cmdpush(CS_MATHSUBST); 1094 add(String); 1095 add(Inbrack); 1096 c = dquote_parse(']', sub); 1097 cmdpop(); 1098 if (c) { 1099 peek = LEXERR; 1100 goto brk; 1101 } 1102 c = Outbrack; 1103 } else if (e == '(') { 1104 add(String); 1105 c = cmd_or_math_sub(); 1106 if (c) { 1107 peek = LEXERR; 1108 goto brk; 1109 } 1110 c = Outpar; 1111 } else { 1112 if (e == '{') { 1113 add(c); 1114 c = Inbrace; 1115 ++bct; 1116 cmdpush(CS_BRACEPAR); 1117 if (!in_brace_param) 1118 in_brace_param = bct; 1119 } else { 1120 hungetc(e); 1121 lexstop = 0; 1122 } 1123 } 1124 break; 1125 case LX2_INBRACK: 1126 if (!in_brace_param) 1127 brct++; 1128 c = Inbrack; 1129 break; 1130 case LX2_OUTBRACK: 1131 if (!in_brace_param) 1132 brct--; 1133 if (brct < 0) 1134 brct = 0; 1135 c = Outbrack; 1136 break; 1137 case LX2_INPAR: 1138 if (isset(SHGLOB)) { 1139 if (sub || in_brace_param) 1140 break; 1141 if (incasepat && !len) 1142 return INPAR; 1143 if (!isset(KSHGLOB) && len) 1144 goto brk; 1145 } 1146 if (!in_brace_param) { 1147 if (!sub) { 1148 e = hgetc(); 1149 hungetc(e); 1150 lexstop = 0; 1151 /* For command words, parentheses are only 1152 * special at the start. But now we're tokenising 1153 * the remaining string. So I don't see what 1154 * the old incmdpos test here is for. 1155 * pws 1999/6/8 1156 * 1157 * Oh, no. 1158 * func1( ) 1159 * is a valid function definition in [k]sh. The best 1160 * thing we can do, without really nasty lookahead tricks, 1161 * is break if we find a blank after a parenthesis. At 1162 * least this can't happen inside braces or brackets. We 1163 * only allow this with SHGLOB (set for both sh and ksh). 1164 * 1165 * Things like `print @( |foo)' should still 1166 * work, because [k]sh don't allow multiple words 1167 * in a function definition, so we only do this 1168 * in command position. 1169 * pws 1999/6/14 1170 */ 1171 if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct && 1172 !brct && !intpos && incmdpos)) { 1173 /* 1174 * Either a () token, or a command word with 1175 * something suspiciously like a ksh function 1176 * definition. 1177 * The current word isn't spellcheckable. 1178 */ 1179 nocorrect |= 2; 1180 goto brk; 1181 } 1182 } 1183 /* 1184 * This also handles the [k]sh `foo( )' function definition. 1185 * Maintain a variable fdpar, set as long as a single set of 1186 * parentheses contains only space. Then if we get to the 1187 * closing parenthesis and it is still set, we can assume we 1188 * have a function definition. Only do this at the start of 1189 * the word, since the (...) must be a separate token. 1190 */ 1191 if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct) 1192 fdpar = 1; 1193 } 1194 c = Inpar; 1195 break; 1196 case LX2_INBRACE: 1197 if (isset(IGNOREBRACES) || sub) 1198 c = '{'; 1199 else { 1200 if (!len && incmdpos) { 1201 add('{'); 1202 *bptr = '\0'; 1203 return STRING; 1204 } 1205 if (in_brace_param) { 1206 cmdpush(CS_BRACE); 1207 } 1208 bct++; 1209 } 1210 break; 1211 case LX2_OUTBRACE: 1212 if ((isset(IGNOREBRACES) || sub) && !in_brace_param) 1213 break; 1214 if (!bct) 1215 break; 1216 if (in_brace_param) { 1217 cmdpop(); 1218 } 1219 if (bct-- == in_brace_param) 1220 in_brace_param = 0; 1221 c = Outbrace; 1222 break; 1223 case LX2_COMMA: 1224 if (unset(IGNOREBRACES) && !sub && bct > in_brace_param) 1225 c = Comma; 1226 break; 1227 case LX2_OUTANG: 1228 if (in_brace_param || sub) 1229 break; 1230 e = hgetc(); 1231 if (e != '(') { 1232 hungetc(e); 1233 lexstop = 0; 1234 goto brk; 1235 } 1236 add(OutangProc); 1237 if (skipcomm()) { 1238 peek = LEXERR; 1239 goto brk; 1240 } 1241 c = Outpar; 1242 break; 1243 case LX2_INANG: 1244 if (isset(SHGLOB) && sub) 1245 break; 1246 e = hgetc(); 1247 if (!(in_brace_param || sub) && e == '(') { 1248 add(Inang); 1249 if (skipcomm()) { 1250 peek = LEXERR; 1251 goto brk; 1252 } 1253 c = Outpar; 1254 break; 1255 } 1256 hungetc(e); 1257 if(isnumglob()) { 1258 add(Inang); 1259 while ((c = hgetc()) != '>') 1260 add(c); 1261 c = Outang; 1262 break; 1263 } 1264 lexstop = 0; 1265 if (in_brace_param || sub) 1266 break; 1267 goto brk; 1268 case LX2_EQUALS: 1269 if (!sub) { 1270 if (intpos) { 1271 e = hgetc(); 1272 if (e != '(') { 1273 hungetc(e); 1274 lexstop = 0; 1275 c = Equals; 1276 } else { 1277 add(Equals); 1278 if (skipcomm()) { 1279 peek = LEXERR; 1280 goto brk; 1281 } 1282 c = Outpar; 1283 } 1284 } else if (peek != ENVSTRING && 1285 incmdpos && !bct && !brct) { 1286 char *t = tokstr; 1287 if (idigit(*t)) 1288 while (++t < bptr && idigit(*t)); 1289 else { 1290 int sav = *bptr; 1291 *bptr = '\0'; 1292 t = itype_end(t, IIDENT, 0); 1293 if (t < bptr) { 1294 skipparens(Inbrack, Outbrack, &t); 1295 } else { 1296 *bptr = sav; 1297 } 1298 } 1299 if (*t == '+') 1300 t++; 1301 if (t == bptr) { 1302 e = hgetc(); 1303 if (e == '(' && incmdpos) { 1304 *bptr = '\0'; 1305 return ENVARRAY; 1306 } 1307 hungetc(e); 1308 lexstop = 0; 1309 peek = ENVSTRING; 1310 intpos = 2; 1311 } else 1312 c = Equals; 1313 } else 1314 c = Equals; 1315 } 1316 break; 1317 case LX2_BKSLASH: 1318 c = hgetc(); 1319 if (c == '\n') { 1320 c = hgetc(); 1321 if (!lexstop) 1322 continue; 1323 } else 1324 add(Bnull); 1325 if (lexstop) 1326 goto brk; 1327 break; 1328 case LX2_QUOTE: { 1329 int strquote = (len && bptr[-1] == String); 1330 1331 add(Snull); 1332 cmdpush(CS_QUOTE); 1333 for (;;) { 1334 STOPHIST 1335 while ((c = hgetc()) != '\'' && !lexstop) { 1336 if (strquote && c == '\\') { 1337 c = hgetc(); 1338 if (lexstop) 1339 break; 1340 /* 1341 * Mostly we don't need to do anything special 1342 * with escape backslashes or closing quotes 1343 * inside $'...'; however in completion we 1344 * need to be able to strip multiple backslashes 1345 * neatly. 1346 */ 1347 if (c == '\\' || c == '\'') 1348 add(Bnull); 1349 else 1350 add('\\'); 1351 } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { 1352 if (bptr[-1] == '\\') 1353 bptr--, len--; 1354 else 1355 break; 1356 } 1357 add(c); 1358 } 1359 ALLOWHIST 1360 if (c != '\'') { 1361 unmatched = '\''; 1362 peek = LEXERR; 1363 cmdpop(); 1364 goto brk; 1365 } 1366 e = hgetc(); 1367 if (e != '\'' || unset(RCQUOTES) || strquote) 1368 break; 1369 add(c); 1370 } 1371 cmdpop(); 1372 hungetc(e); 1373 lexstop = 0; 1374 c = Snull; 1375 break; 1376 } 1377 case LX2_DQUOTE: 1378 add(Dnull); 1379 cmdpush(CS_DQUOTE); 1380 c = dquote_parse('"', sub); 1381 cmdpop(); 1382 if (c) { 1383 unmatched = '"'; 1384 peek = LEXERR; 1385 goto brk; 1386 } 1387 c = Dnull; 1388 break; 1389 case LX2_BQUOTE: 1390 add(Tick); 1391 cmdpush(CS_BQUOTE); 1392 SETPARBEGIN 1393 inquote = 0; 1394 while ((c = hgetc()) != '`' && !lexstop) { 1395 if (c == '\\') { 1396 c = hgetc(); 1397 if (c != '\n') { 1398 add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\'); 1399 add(c); 1400 } 1401 else if (!sub && isset(CSHJUNKIEQUOTES)) 1402 add(c); 1403 } else { 1404 if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { 1405 break; 1406 } 1407 add(c); 1408 if (c == '\'') { 1409 if ((inquote = !inquote)) 1410 STOPHIST 1411 else 1412 ALLOWHIST 1413 } 1414 } 1415 } 1416 if (inquote) 1417 ALLOWHIST 1418 cmdpop(); 1419 if (c != '`') { 1420 unmatched = '`'; 1421 peek = LEXERR; 1422 goto brk; 1423 } 1424 c = Tick; 1425 SETPAREND 1426 break; 1427 } 1428 add(c); 1429 c = hgetc(); 1430 if (intpos) 1431 intpos--; 1432 if (lexstop) 1433 break; 1434 } 1435 brk: 1436 hungetc(c); 1437 if (unmatched) 1438 zerr("unmatched %c", unmatched); 1439 if (in_brace_param) { 1440 while(bct-- >= in_brace_param) 1441 cmdpop(); 1442 zerr("closing brace expected"); 1443 } else if (unset(IGNOREBRACES) && !sub && len > 1 && 1444 peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) { 1445 /* hack to get {foo} command syntax work */ 1446 bptr--; 1447 len--; 1448 lexstop = 0; 1449 hungetc('}'); 1450 } 1451 *bptr = '\0'; 1452 DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed."); 1453 return peek; 1454} 1455 1456 1457/* 1458 * Parse input as if in double quotes. 1459 * endchar is the end character to expect. 1460 * sub has got something to do with whether we are doing quoted substitution. 1461 * Return non-zero for error (character to unget), else zero 1462 */ 1463 1464/**/ 1465static int 1466dquote_parse(char endchar, int sub) 1467{ 1468 int pct = 0, brct = 0, bct = 0, intick = 0, err = 0; 1469 int c; 1470 int math = endchar == ')' || endchar == ']'; 1471 int zlemath = math && zlemetacs > zlemetall + addedx - inbufct; 1472 1473 while (((c = hgetc()) != endchar || bct || 1474 (math && ((pct > 0) || (brct > 0))) || 1475 intick) && !lexstop) { 1476 cont: 1477 switch (c) { 1478 case '\\': 1479 c = hgetc(); 1480 if (c != '\n') { 1481 if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || 1482 c == endchar || c == '`' || 1483 (endchar == ']' && (c == '[' || c == ']' || 1484 c == '(' || c == ')' || 1485 c == '{' || c == '}' || 1486 (c == '"' && sub)))) 1487 add(Bnull); 1488 else { 1489 /* lexstop is implicitly handled here */ 1490 add('\\'); 1491 goto cont; 1492 } 1493 } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"') 1494 continue; 1495 break; 1496 case '\n': 1497 err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"'; 1498 break; 1499 case '$': 1500 if (intick) 1501 break; 1502 c = hgetc(); 1503 if (c == '(') { 1504 add(Qstring); 1505 err = cmd_or_math_sub(); 1506 c = Outpar; 1507 } else if (c == '[') { 1508 add(String); 1509 add(Inbrack); 1510 cmdpush(CS_MATHSUBST); 1511 err = dquote_parse(']', sub); 1512 cmdpop(); 1513 c = Outbrack; 1514 } else if (c == '{') { 1515 add(Qstring); 1516 c = Inbrace; 1517 cmdpush(CS_BRACEPAR); 1518 bct++; 1519 } else if (c == '$') 1520 add(Qstring); 1521 else { 1522 hungetc(c); 1523 lexstop = 0; 1524 c = Qstring; 1525 } 1526 break; 1527 case '}': 1528 if (intick || !bct) 1529 break; 1530 c = Outbrace; 1531 bct--; 1532 cmdpop(); 1533 break; 1534 case '`': 1535 c = Qtick; 1536 if (intick == 2) 1537 ALLOWHIST 1538 if ((intick = !intick)) { 1539 SETPARBEGIN 1540 cmdpush(CS_BQUOTE); 1541 } else { 1542 SETPAREND 1543 cmdpop(); 1544 } 1545 break; 1546 case '\'': 1547 if (!intick) 1548 break; 1549 if (intick == 1) 1550 intick = 2, STOPHIST 1551 else 1552 intick = 1, ALLOWHIST 1553 break; 1554 case '(': 1555 if (!math || !bct) 1556 pct++; 1557 break; 1558 case ')': 1559 if (!math || !bct) 1560 err = (!pct-- && math); 1561 break; 1562 case '[': 1563 if (!math || !bct) 1564 brct++; 1565 break; 1566 case ']': 1567 if (!math || !bct) 1568 err = (!brct-- && math); 1569 break; 1570 case '"': 1571 if (intick || (endchar != '"' && !bct)) 1572 break; 1573 if (bct) { 1574 add(Dnull); 1575 cmdpush(CS_DQUOTE); 1576 err = dquote_parse('"', sub); 1577 cmdpop(); 1578 c = Dnull; 1579 } else 1580 err = 1; 1581 break; 1582 } 1583 if (err || lexstop) 1584 break; 1585 add(c); 1586 } 1587 if (intick == 2) 1588 ALLOWHIST 1589 if (intick) { 1590 cmdpop(); 1591 } 1592 while (bct--) 1593 cmdpop(); 1594 if (lexstop) 1595 err = intick || endchar || err; 1596 else if (err == 1) { 1597 /* 1598 * TODO: as far as I can see, this hack is used in gettokstr() 1599 * to hungetc() a character on an error. However, I don't 1600 * understand what that actually gets us, and we can't guarantee 1601 * it's a character anyway, because of the previous test. 1602 * 1603 * We use the same feature in cmd_or_math where we actually do 1604 * need to unget if we decide it's really a command substitution. 1605 * We try to handle the other case by testing for lexstop. 1606 */ 1607 err = c; 1608 } 1609 if (zlemath && zlemetacs <= zlemetall + 1 - inbufct) 1610 inwhat = IN_MATH; 1611 return err; 1612} 1613 1614/* Tokenize a string given in s. Parsing is done as in double * 1615 * quotes. This is usually called before singsub(). */ 1616 1617/**/ 1618mod_export int 1619parsestr(char *s) 1620{ 1621 int err; 1622 1623 if ((err = parsestrnoerr(s))) { 1624 untokenize(s); 1625 if (err > 32 && err < 127) 1626 zerr("parse error near `%c'", err); 1627 else 1628 zerr("parse error"); 1629 } 1630 return err; 1631} 1632 1633/**/ 1634mod_export int 1635parsestrnoerr(char *s) 1636{ 1637 int l = strlen(s), err; 1638 1639 lexsave(); 1640 untokenize(s); 1641 inpush(dupstring(s), 0, NULL); 1642 strinbeg(0); 1643 len = 0; 1644 bptr = tokstr = s; 1645 bsiz = l + 1; 1646 err = dquote_parse('\0', 1); 1647 *bptr = '\0'; 1648 strinend(); 1649 inpop(); 1650 DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty."); 1651 lexrestore(); 1652 return err; 1653} 1654 1655/* 1656 * Parse a subscript in string s. 1657 * sub is passed down to dquote_parse(). 1658 * endchar is the final character. 1659 * Return the next character, or NULL. 1660 */ 1661/**/ 1662mod_export char * 1663parse_subscript(char *s, int sub, int endchar) 1664{ 1665 int l = strlen(s), err; 1666 char *t; 1667 1668 if (!*s || *s == endchar) 1669 return 0; 1670 lexsave(); 1671 untokenize(t = dupstring(s)); 1672 inpush(t, 0, NULL); 1673 strinbeg(0); 1674 len = 0; 1675 bptr = tokstr = s; 1676 bsiz = l + 1; 1677 err = dquote_parse(endchar, sub); 1678 if (err) { 1679 err = *bptr; 1680 *bptr = '\0'; 1681 untokenize(s); 1682 *bptr = err; 1683 s = NULL; 1684 } else { 1685 s = bptr; 1686 } 1687 strinend(); 1688 inpop(); 1689 DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); 1690 lexrestore(); 1691 return s; 1692} 1693 1694/* Tokenize a string given in s. Parsing is done as if s were a normal * 1695 * command-line argument but it may contain separators. This is used * 1696 * to parse the right-hand side of ${...%...} substitutions. */ 1697 1698/**/ 1699mod_export int 1700parse_subst_string(char *s) 1701{ 1702 int c, l = strlen(s), err; 1703 char *ptr; 1704 enum lextok ctok; 1705 1706 if (!*s || !strcmp(s, nulstring)) 1707 return 0; 1708 lexsave(); 1709 untokenize(s); 1710 inpush(dupstring(s), 0, NULL); 1711 strinbeg(0); 1712 len = 0; 1713 bptr = tokstr = s; 1714 bsiz = l + 1; 1715 c = hgetc(); 1716 ctok = gettokstr(c, 1); 1717 err = errflag; 1718 strinend(); 1719 inpop(); 1720 DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty."); 1721 lexrestore(); 1722 errflag = err; 1723 if (ctok == LEXERR) { 1724 untokenize(s); 1725 return 1; 1726 } 1727#ifdef DEBUG 1728 /* 1729 * Historical note: we used to check here for olen (the value of len 1730 * before lexrestore()) == l, but that's not necessarily the case if 1731 * we stripped an RCQUOTE. 1732 */ 1733 if (ctok != STRING || (errflag && !noerrs)) { 1734 fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n", 1735 errflag ? "errflag" : "ctok != STRING"); 1736 fflush(stderr); 1737 untokenize(s); 1738 return 1; 1739 } 1740#endif 1741 /* Check for $'...' quoting. This needs special handling. */ 1742 for (ptr = s; *ptr; ) 1743 { 1744 if (*ptr == String && ptr[1] == Snull) 1745 { 1746 char *t; 1747 int len, tlen, diff; 1748 t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL); 1749 len += 2; 1750 tlen = strlen(t); 1751 diff = len - tlen; 1752 /* 1753 * Yuk. 1754 * parse_subst_string() currently handles strings in-place. 1755 * That's not so easy to fix without knowing whether 1756 * additional memory should come off the heap or 1757 * otherwise. So we cheat by copying the unquoted string 1758 * into place, unless it's too long. That's not the 1759 * normal case, but I'm worried there are pathological 1760 * cases with converting metafied multibyte strings. 1761 * If someone can prove there aren't I will be very happy. 1762 */ 1763 if (diff < 0) { 1764 DPUTS(1, "$'...' subst too long: fix get_parse_string()"); 1765 return 1; 1766 } 1767 memcpy(ptr, t, tlen); 1768 ptr += tlen; 1769 if (diff > 0) { 1770 char *dptr = ptr; 1771 char *sptr = ptr + diff; 1772 while ((*dptr++ = *sptr++)) 1773 ; 1774 } 1775 } else 1776 ptr++; 1777 } 1778 return 0; 1779} 1780 1781/* Called below to report word positions. */ 1782 1783/**/ 1784mod_export void 1785gotword(void) 1786{ 1787 we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0); 1788 if (zlemetacs <= we) { 1789 wb = zlemetall - wordbeg + addedx; 1790 lexflags = 0; 1791 } 1792} 1793 1794/* expand aliases and reserved words */ 1795 1796/**/ 1797int 1798exalias(void) 1799{ 1800 Alias an; 1801 Reswd rw; 1802 1803 hwend(); 1804 if (interact && isset(SHINSTDIN) && !strin && !incasepat && 1805 tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) && 1806 (isset(CORRECTALL) || (isset(CORRECT) && incmdpos))) 1807 spckword(&tokstr, 1, incmdpos, 1); 1808 1809 if (!tokstr) { 1810 zshlextext = tokstrings[tok]; 1811 1812 return 0; 1813 } else { 1814 VARARR(char, copy, (strlen(tokstr) + 1)); 1815 1816 if (has_token(tokstr)) { 1817 char *p, *t; 1818 1819 zshlextext = p = copy; 1820 for (t = tokstr; 1821 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++);); 1822 } else 1823 zshlextext = tokstr; 1824 1825 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) { 1826 int zp = lexflags; 1827 1828 gotword(); 1829 if ((zp & LEXFLAGS_ZLE) && !lexflags) { 1830 if (zshlextext == copy) 1831 zshlextext = tokstr; 1832 return 0; 1833 } 1834 } 1835 1836 if (tok == STRING) { 1837 /* Check for an alias */ 1838 if (!noaliases && isset(ALIASESOPT) && 1839 (!isset(POSIXALIASES) || 1840 !reswdtab->getnode(reswdtab, zshlextext))) { 1841 char *suf; 1842 1843 an = (Alias) aliastab->getnode(aliastab, zshlextext); 1844 if (an && !an->inuse && 1845 ((an->node.flags & ALIAS_GLOBAL) || incmdpos || inalmore)) { 1846 inpush(an->text, INP_ALIAS, an); 1847 if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL)) 1848 aliasspaceflag = 1; 1849 lexstop = 0; 1850 if (zshlextext == copy) 1851 zshlextext = tokstr; 1852 return 1; 1853 } 1854 if ((suf = strrchr(zshlextext, '.')) && suf[1] && 1855 suf > zshlextext && suf[-1] != Meta && 1856 (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) && 1857 !an->inuse && incmdpos) { 1858 inpush(dupstring(zshlextext), INP_ALIAS, NULL); 1859 inpush(" ", INP_ALIAS, NULL); 1860 inpush(an->text, INP_ALIAS, an); 1861 lexstop = 0; 1862 if (zshlextext == copy) 1863 zshlextext = tokstr; 1864 return 1; 1865 } 1866 } 1867 1868 /* Then check for a reserved word */ 1869 if ((incmdpos || 1870 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) && 1871 zshlextext[0] == '}' && !zshlextext[1])) && 1872 (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) { 1873 tok = rw->token; 1874 if (tok == DINBRACK) 1875 incond = 1; 1876 } else if (incond && !strcmp(zshlextext, "]]")) { 1877 tok = DOUTBRACK; 1878 incond = 0; 1879 } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1]) 1880 tok = BANG; 1881 } 1882 inalmore = 0; 1883 if (zshlextext == copy) 1884 zshlextext = tokstr; 1885 } 1886 return 0; 1887} 1888 1889/* skip (...) */ 1890 1891/**/ 1892static int 1893skipcomm(void) 1894{ 1895 int pct = 1, c, start = 1; 1896 1897 cmdpush(CS_CMDSUBST); 1898 SETPARBEGIN 1899 c = Inpar; 1900 do { 1901 int iswhite; 1902 add(c); 1903 c = hgetc(); 1904 if (itok(c) || lexstop) 1905 break; 1906 iswhite = inblank(c); 1907 switch (c) { 1908 case '(': 1909 pct++; 1910 break; 1911 case ')': 1912 pct--; 1913 break; 1914 case '\\': 1915 add(c); 1916 c = hgetc(); 1917 break; 1918 case '\'': { 1919 int strquote = bptr[-1] == '$'; 1920 add(c); 1921 STOPHIST 1922 while ((c = hgetc()) != '\'' && !lexstop) { 1923 if (c == '\\' && strquote) { 1924 add(c); 1925 c = hgetc(); 1926 } 1927 add(c); 1928 } 1929 ALLOWHIST 1930 break; 1931 } 1932 case '\"': 1933 add(c); 1934 while ((c = hgetc()) != '\"' && !lexstop) 1935 if (c == '\\') { 1936 add(c); 1937 add(hgetc()); 1938 } else 1939 add(c); 1940 break; 1941 case '`': 1942 add(c); 1943 while ((c = hgetc()) != '`' && !lexstop) 1944 if (c == '\\') 1945 add(c), add(hgetc()); 1946 else 1947 add(c); 1948 break; 1949 case '#': 1950 if (start) { 1951 add(c); 1952 while ((c = hgetc()) != '\n' && !lexstop) 1953 add(c); 1954 iswhite = 1; 1955 } 1956 break; 1957 } 1958 start = iswhite; 1959 } 1960 while (pct); 1961 if (!lexstop) 1962 SETPAREND 1963 cmdpop(); 1964 return lexstop; 1965} 1966