1/* 2 * lex.c - lexical analysis 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1992-1997 Paul Falstad 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Paul Falstad or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Paul Falstad and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Paul Falstad and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Paul Falstad and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "zsh.mdh" 31#include "lex.pro" 32 33/* tokens */ 34 35/**/ 36mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\"; 37 38/* parts of the current token */ 39 40/**/ 41char *zshlextext; 42/**/ 43mod_export char *tokstr; 44/**/ 45mod_export enum lextok tok; 46/**/ 47mod_export int tokfd; 48 49/* 50 * Line number at which the first character of a token was found. 51 * We always set this in gettok(), which is always called from 52 * zshlex() unless we have reached an error. So it is always 53 * valid when parsing. It is not useful during execution 54 * of the parsed structure. 55 */ 56 57/**/ 58zlong toklineno; 59 60/* lexical analyzer error flag */ 61 62/**/ 63mod_export int lexstop; 64 65/* if != 0, this is the first line of the command */ 66 67/**/ 68mod_export int isfirstln; 69 70/* if != 0, this is the first char of the command (not including white space) */ 71 72/**/ 73int isfirstch; 74 75/* flag that an alias should be expanded after expansion ending in space */ 76 77/**/ 78int inalmore; 79 80/* 81 * Don't do spelling correction. 82 * Bit 1 is only valid for the current word. It's 83 * set when we detect a lookahead that stops the word from 84 * needing correction. 85 */ 86 87/**/ 88int nocorrect; 89 90/* 91 * Cursor position and line length in zle when the line is 92 * metafied for access from the main shell. 93 */ 94 95/**/ 96mod_export int zlemetacs, zlemetall; 97 98/* inwhat says what exactly we are in * 99 * (its value is one of the IN_* things). */ 100 101/**/ 102mod_export int inwhat; 103 104/* 1 if x added to complete in a blank between words */ 105 106/**/ 107mod_export int addedx; 108 109/* wb and we hold the beginning/end position of the word we are completing. */ 110 111/**/ 112mod_export int wb, we; 113 114/* 1 if aliases should not be expanded */ 115 116/**/ 117mod_export int noaliases; 118 119/* 120 * If non-zero, we are parsing a line sent to use by the editor, or some 121 * other string that's not part of standard command input (e.g. eval is 122 * part of normal command input). 123 * 124 * Set of bits from LEXFLAGS_*. 125 * 126 * Note that although it is passed into the lexer as an input, the 127 * lexer can set it to zero after finding the word it's searching for. 128 * This only happens if the line being parsed actually does come from 129 * ZLE, and hence the bit LEXFLAGS_ZLE is set. 130 */ 131 132/**/ 133mod_export int lexflags; 134 135/**/ 136mod_export int wordbeg; 137 138/**/ 139mod_export int parbegin; 140 141/**/ 142mod_export int parend; 143 144/* don't recognize comments */ 145 146/**/ 147mod_export int nocomments; 148 149/* text of punctuation tokens */ 150 151/**/ 152mod_export char *tokstrings[WHILE + 1] = { 153 NULL, /* NULLTOK 0 */ 154 ";", /* SEPER */ 155 "\\n", /* NEWLIN */ 156 ";", /* SEMI */ 157 ";;", /* DSEMI */ 158 "&", /* AMPER 5 */ 159 "(", /* INPAR */ 160 ")", /* OUTPAR */ 161 "||", /* DBAR */ 162 "&&", /* DAMPER */ 163 ">", /* OUTANG 10 */ 164 ">|", /* OUTANGBANG */ 165 ">>", /* DOUTANG */ 166 ">>|", /* DOUTANGBANG */ 167 "<", /* INANG */ 168 "<>", /* INOUTANG 15 */ 169 "<<", /* DINANG */ 170 "<<-", /* DINANGDASH */ 171 "<&", /* INANGAMP */ 172 ">&", /* OUTANGAMP */ 173 "&>", /* AMPOUTANG 20 */ 174 "&>|", /* OUTANGAMPBANG */ 175 ">>&", /* DOUTANGAMP */ 176 ">>&|", /* DOUTANGAMPBANG */ 177 "<<<", /* TRINANG */ 178 "|", /* BAR 25 */ 179 "|&", /* BARAMP */ 180 "()", /* INOUTPAR */ 181 "((", /* DINPAR */ 182 "))", /* DOUTPAR */ 183 "&|", /* AMPERBANG 30 */ 184 ";&", /* SEMIAMP */ 185 ";|", /* SEMIBAR */ 186}; 187 188/* lexical state */ 189 190static int dbparens; 191static int len = 0, bsiz = 256; 192static char *bptr; 193 194struct lexstack { 195 struct lexstack *next; 196 197 int incmdpos; 198 int incond; 199 int incasepat; 200 int dbparens; 201 int isfirstln; 202 int isfirstch; 203 int histactive; 204 int histdone; 205 int lexflags; 206 int stophist; 207 int hlinesz; 208 char *hline; 209 char *hptr; 210 enum lextok tok; 211 int isnewlin; 212 char *tokstr; 213 char *zshlextext; 214 char *bptr; 215 int bsiz; 216 int len; 217 short *chwords; 218 int chwordlen; 219 int chwordpos; 220 int hwgetword; 221 int lexstop; 222 struct heredocs *hdocs; 223 int (*hgetc) _((void)); 224 void (*hungetc) _((int)); 225 void (*hwaddc) _((int)); 226 void (*hwbegin) _((int)); 227 void (*hwend) _((void)); 228 void (*addtoline) _((int)); 229 230 int eclen, ecused, ecnpats; 231 Wordcode ecbuf; 232 Eccstr ecstrs; 233 int ecsoffs, ecssub, ecnfunc; 234 235 unsigned char *cstack; 236 int csp; 237 zlong toklineno; 238}; 239 240static struct lexstack *lstack = NULL; 241 242/* save the lexical state */ 243 244/* is this a hack or what? */ 245 246/**/ 247mod_export void 248lexsave(void) 249{ 250 struct lexstack *ls; 251 252 ls = (struct lexstack *)malloc(sizeof(struct lexstack)); 253 254 ls->incmdpos = incmdpos; 255 ls->incond = incond; 256 ls->incasepat = incasepat; 257 ls->dbparens = dbparens; 258 ls->isfirstln = isfirstln; 259 ls->isfirstch = isfirstch; 260 ls->histactive = histactive; 261 ls->histdone = histdone; 262 ls->lexflags = lexflags; 263 ls->stophist = stophist; 264 stophist = 0; 265 if (!lstack) { 266 /* top level, make this version visible to ZLE */ 267 zle_chline = chline; 268 /* ensure line stored is NULL-terminated */ 269 if (hptr) 270 *hptr = '\0'; 271 } 272 ls->hline = chline; 273 chline = NULL; 274 ls->hptr = hptr; 275 hptr = NULL; 276 ls->hlinesz = hlinesz; 277 ls->cstack = cmdstack; 278 ls->csp = cmdsp; 279 cmdstack = (unsigned char *)zalloc(CMDSTACKSZ); 280 ls->tok = tok; 281 ls->isnewlin = isnewlin; 282 ls->tokstr = tokstr; 283 ls->zshlextext = zshlextext; 284 ls->bptr = bptr; 285 tokstr = zshlextext = bptr = NULL; 286 ls->bsiz = bsiz; 287 bsiz = 256; 288 ls->len = len; 289 ls->chwords = chwords; 290 ls->chwordlen = chwordlen; 291 ls->chwordpos = chwordpos; 292 ls->hwgetword = hwgetword; 293 ls->lexstop = lexstop; 294 ls->hdocs = hdocs; 295 ls->hgetc = hgetc; 296 ls->hungetc = hungetc; 297 ls->hwaddc = hwaddc; 298 ls->hwbegin = hwbegin; 299 ls->hwend = hwend; 300 ls->addtoline = addtoline; 301 ls->eclen = eclen; 302 ls->ecused = ecused; 303 ls->ecnpats = ecnpats; 304 ls->ecbuf = ecbuf; 305 ls->ecstrs = ecstrs; 306 ls->ecsoffs = ecsoffs; 307 ls->ecssub = ecssub; 308 ls->ecnfunc = ecnfunc; 309 ls->toklineno = toklineno; 310 cmdsp = 0; 311 inredir = 0; 312 hdocs = NULL; 313 histactive = 0; 314 ecbuf = NULL; 315 316 ls->next = lstack; 317 lstack = ls; 318} 319 320/* restore lexical state */ 321 322/**/ 323mod_export void 324lexrestore(void) 325{ 326 struct lexstack *ln; 327 328 DPUTS(!lstack, "BUG: lexrestore() without lexsave()"); 329 incmdpos = lstack->incmdpos; 330 incond = lstack->incond; 331 incasepat = lstack->incasepat; 332 dbparens = lstack->dbparens; 333 isfirstln = lstack->isfirstln; 334 isfirstch = lstack->isfirstch; 335 histactive = lstack->histactive; 336 histdone = lstack->histdone; 337 lexflags = lstack->lexflags; 338 stophist = lstack->stophist; 339 chline = lstack->hline; 340 hptr = lstack->hptr; 341 if (cmdstack) 342 free(cmdstack); 343 cmdstack = lstack->cstack; 344 cmdsp = lstack->csp; 345 tok = lstack->tok; 346 isnewlin = lstack->isnewlin; 347 tokstr = lstack->tokstr; 348 zshlextext = lstack->zshlextext; 349 bptr = lstack->bptr; 350 bsiz = lstack->bsiz; 351 len = lstack->len; 352 chwords = lstack->chwords; 353 chwordlen = lstack->chwordlen; 354 chwordpos = lstack->chwordpos; 355 hwgetword = lstack->hwgetword; 356 lexstop = lstack->lexstop; 357 hdocs = lstack->hdocs; 358 hgetc = lstack->hgetc; 359 hungetc = lstack->hungetc; 360 hwaddc = lstack->hwaddc; 361 hwbegin = lstack->hwbegin; 362 hwend = lstack->hwend; 363 addtoline = lstack->addtoline; 364 if (ecbuf) 365 zfree(ecbuf, eclen); 366 eclen = lstack->eclen; 367 ecused = lstack->ecused; 368 ecnpats = lstack->ecnpats; 369 ecbuf = lstack->ecbuf; 370 ecstrs = lstack->ecstrs; 371 ecsoffs = lstack->ecsoffs; 372 ecssub = lstack->ecssub; 373 ecnfunc = lstack->ecnfunc; 374 hlinesz = lstack->hlinesz; 375 toklineno = lstack->toklineno; 376 errflag = 0; 377 378 ln = lstack->next; 379 if (!ln) { 380 /* Back to top level: don't need special ZLE value */ 381 DPUTS(chline != zle_chline, "BUG: Ouch, wrong chline for ZLE"); 382 zle_chline = NULL; 383 } 384 free(lstack); 385 lstack = ln; 386} 387 388/**/ 389void 390zshlex(void) 391{ 392 if (tok == LEXERR) 393 return; 394 do 395 tok = gettok(); 396 while (tok != ENDINPUT && exalias()); 397 nocorrect &= 1; 398 if (tok == NEWLIN || tok == ENDINPUT) { 399 while (hdocs) { 400 struct heredocs *next = hdocs->next; 401 char *doc, *munged_term; 402 403 hwbegin(0); 404 cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD); 405 munged_term = dupstring(hdocs->str); 406 STOPHIST 407 doc = gethere(&munged_term, hdocs->type); 408 ALLOWHIST 409 cmdpop(); 410 hwend(); 411 if (!doc) { 412 zerr("here document too large"); 413 while (hdocs) { 414 next = hdocs->next; 415 zfree(hdocs, sizeof(struct heredocs)); 416 hdocs = next; 417 } 418 tok = LEXERR; 419 break; 420 } 421 setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str, 422 munged_term); 423 zfree(hdocs, sizeof(struct heredocs)); 424 hdocs = next; 425 } 426 } 427 if (tok != NEWLIN) 428 isnewlin = 0; 429 else 430 isnewlin = (inbufct) ? -1 : 1; 431 if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE))) 432 tok = SEPER; 433} 434 435/**/ 436mod_export void 437ctxtlex(void) 438{ 439 static int oldpos; 440 441 zshlex(); 442 switch (tok) { 443 case SEPER: 444 case NEWLIN: 445 case SEMI: 446 case DSEMI: 447 case SEMIAMP: 448 case SEMIBAR: 449 case AMPER: 450 case AMPERBANG: 451 case INPAR: 452 case INBRACE: 453 case DBAR: 454 case DAMPER: 455 case BAR: 456 case BARAMP: 457 case INOUTPAR: 458 case DOLOOP: 459 case THEN: 460 case ELIF: 461 case ELSE: 462 case DOUTBRACK: 463 incmdpos = 1; 464 break; 465 case STRING: 466 /* case ENVSTRING: */ 467 case ENVARRAY: 468 case OUTPAR: 469 case CASE: 470 case DINBRACK: 471 incmdpos = 0; 472 break; 473 474 default: 475 /* nothing to do, keep compiler happy */ 476 break; 477 } 478 if (tok != DINPAR) 479 infor = tok == FOR ? 2 : 0; 480 if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) { 481 inredir = 1; 482 oldpos = incmdpos; 483 incmdpos = 0; 484 } else if (inredir) { 485 incmdpos = oldpos; 486 inredir = 0; 487 } 488} 489 490#define LX1_BKSLASH 0 491#define LX1_COMMENT 1 492#define LX1_NEWLIN 2 493#define LX1_SEMI 3 494#define LX1_AMPER 5 495#define LX1_BAR 6 496#define LX1_INPAR 7 497#define LX1_OUTPAR 8 498#define LX1_INANG 13 499#define LX1_OUTANG 14 500#define LX1_OTHER 15 501 502#define LX2_BREAK 0 503#define LX2_OUTPAR 1 504#define LX2_BAR 2 505#define LX2_STRING 3 506#define LX2_INBRACK 4 507#define LX2_OUTBRACK 5 508#define LX2_TILDE 6 509#define LX2_INPAR 7 510#define LX2_INBRACE 8 511#define LX2_OUTBRACE 9 512#define LX2_OUTANG 10 513#define LX2_INANG 11 514#define LX2_EQUALS 12 515#define LX2_BKSLASH 13 516#define LX2_QUOTE 14 517#define LX2_DQUOTE 15 518#define LX2_BQUOTE 16 519#define LX2_COMMA 17 520#define LX2_OTHER 18 521#define LX2_META 19 522 523static unsigned char lexact1[256], lexact2[256], lextok2[256]; 524 525/**/ 526void 527initlextabs(void) 528{ 529 int t0; 530 static char *lx1 = "\\q\n;!&|(){}[]<>"; 531 static char *lx2 = ";)|$[]~({}><=\\\'\"`,"; 532 533 for (t0 = 0; t0 != 256; t0++) { 534 lexact1[t0] = LX1_OTHER; 535 lexact2[t0] = LX2_OTHER; 536 lextok2[t0] = t0; 537 } 538 for (t0 = 0; lx1[t0]; t0++) 539 lexact1[(int)lx1[t0]] = t0; 540 for (t0 = 0; lx2[t0]; t0++) 541 lexact2[(int)lx2[t0]] = t0; 542 lexact2['&'] = LX2_BREAK; 543 lexact2[STOUC(Meta)] = LX2_META; 544 lextok2['*'] = Star; 545 lextok2['?'] = Quest; 546 lextok2['{'] = Inbrace; 547 lextok2['['] = Inbrack; 548 lextok2['$'] = String; 549 lextok2['~'] = Tilde; 550 lextok2['#'] = Pound; 551 lextok2['^'] = Hat; 552} 553 554/* initialize lexical state */ 555 556/**/ 557void 558lexinit(void) 559{ 560 incond = incasepat = nocorrect = 561 infor = dbparens = lexstop = 0; 562 incmdpos = 1; 563 tok = ENDINPUT; 564} 565 566/* add a char to the string buffer */ 567 568/**/ 569void 570add(int c) 571{ 572 *bptr++ = c; 573 if (bsiz == ++len) { 574 int newbsiz = bsiz * 2; 575 576 if (newbsiz > inbufct && inbufct > bsiz) 577 newbsiz = inbufct; 578 579 bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz)); 580 /* len == bsiz, so bptr is at the start of newly allocated memory */ 581 memset(bptr, 0, newbsiz - bsiz); 582 bsiz = newbsiz; 583 } 584} 585 586#define SETPARBEGIN { \ 587 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ 588 zlemetacs >= zlemetall+1-inbufct) \ 589 parbegin = inbufct; \ 590 } 591#define SETPAREND { \ 592 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ 593 parbegin != -1 && parend == -1) { \ 594 if (zlemetacs >= zlemetall + 1 - inbufct) \ 595 parbegin = -1; \ 596 else \ 597 parend = inbufct; \ 598 } \ 599 } 600 601/* 602 * Return 1 for math, 0 for a command, 2 for an error. If it couldn't be 603 * parsed as math, but there was no gross error, it's a command. 604 */ 605 606static int 607cmd_or_math(int cs_type) 608{ 609 int oldlen = len; 610 int c; 611 612 cmdpush(cs_type); 613 c = dquote_parse(')', 0); 614 cmdpop(); 615 *bptr = '\0'; 616 if (!c) { 617 /* Successfully parsed, see if it was math */ 618 c = hgetc(); 619 if (c == ')') 620 return 1; /* yes */ 621 hungetc(c); 622 lexstop = 0; 623 c = ')'; 624 } else if (lexstop) { 625 /* we haven't got anything to unget */ 626 return 2; 627 } 628 /* else unsuccessful: unget the whole thing */ 629 hungetc(c); 630 lexstop = 0; 631 while (len > oldlen) { 632 len--; 633 hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr); 634 } 635 hungetc('('); 636 return 0; 637} 638 639 640/* 641 * Parse either a $(( ... )) or a $(...) 642 * Return 0 on success, 1 on failure. 643 */ 644static int 645cmd_or_math_sub(void) 646{ 647 int c = hgetc(), ret; 648 649 if (c == '(') { 650 add(Inpar); 651 add('('); 652 if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) { 653 add(')'); 654 return 0; 655 } 656 if (ret == 2) 657 return 1; 658 bptr -= 2; 659 len -= 2; 660 } else { 661 hungetc(c); 662 lexstop = 0; 663 } 664 return skipcomm(); 665} 666 667/* Check whether we're looking at valid numeric globbing syntax * 668 * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". * 669 * Leaves the input in the same place, returning 0 or 1. */ 670 671/**/ 672static int 673isnumglob(void) 674{ 675 int c, ec = '-', ret = 0; 676 int tbs = 256, n = 0; 677 char *tbuf = (char *)zalloc(tbs); 678 679 while(1) { 680 c = hgetc(); 681 if(lexstop) { 682 lexstop = 0; 683 break; 684 } 685 tbuf[n++] = c; 686 if(!idigit(c)) { 687 if(c != ec) 688 break; 689 if(ec == '>') { 690 ret = 1; 691 break; 692 } 693 ec = '>'; 694 } 695 if(n == tbs) 696 tbuf = (char *)realloc(tbuf, tbs *= 2); 697 } 698 while(n--) 699 hungetc(tbuf[n]); 700 zfree(tbuf, tbs); 701 return ret; 702} 703 704/**/ 705static enum lextok 706gettok(void) 707{ 708 int c, d; 709 int peekfd = -1; 710 enum lextok peek; 711 712 beginning: 713 tokstr = NULL; 714 while (iblank(c = hgetc()) && !lexstop); 715 toklineno = lineno; 716 if (lexstop) 717 return (errflag) ? LEXERR : ENDINPUT; 718 isfirstln = 0; 719 wordbeg = inbufct - (qbang && c == bangchar); 720 hwbegin(-1-(qbang && c == bangchar)); 721 /* word includes the last character read and possibly \ before ! */ 722 if (dbparens) { 723 len = 0; 724 bptr = tokstr = (char *) hcalloc(bsiz = 32); 725 hungetc(c); 726 cmdpush(CS_MATH); 727 c = dquote_parse(infor ? ';' : ')', 0); 728 cmdpop(); 729 *bptr = '\0'; 730 if (!c && infor) { 731 infor--; 732 return DINPAR; 733 } 734 if (c || (c = hgetc()) != ')') { 735 hungetc(c); 736 return LEXERR; 737 } 738 dbparens = 0; 739 return DOUTPAR; 740 } else if (idigit(c)) { /* handle 1< foo */ 741 d = hgetc(); 742 if(d == '&') { 743 d = hgetc(); 744 if(d == '>') { 745 peekfd = c - '0'; 746 hungetc('>'); 747 c = '&'; 748 } else { 749 hungetc(d); 750 lexstop = 0; 751 hungetc('&'); 752 } 753 } else if (d == '>' || d == '<') { 754 peekfd = c - '0'; 755 c = d; 756 } else { 757 hungetc(d); 758 lexstop = 0; 759 } 760 } 761 762 /* chars in initial position in word */ 763 764 /* 765 * Handle comments. There are some special cases when this 766 * is not normal command input: lexflags implies we are examining 767 * a line lexically without it being used for normal command input. 768 */ 769 if (c == hashchar && !nocomments && 770 (isset(INTERACTIVECOMMENTS) || 771 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding && 772 (!interact || unset(SHINSTDIN) || strin)))) { 773 /* History is handled here to prevent extra * 774 * newlines being inserted into the history. */ 775 776 if (lexflags & LEXFLAGS_COMMENTS_KEEP) { 777 len = 0; 778 bptr = tokstr = (char *)hcalloc(bsiz = 32); 779 add(c); 780 } 781 while ((c = ingetc()) != '\n' && !lexstop) { 782 hwaddc(c); 783 addtoline(c); 784 if (lexflags & LEXFLAGS_COMMENTS_KEEP) 785 add(c); 786 } 787 788 if (errflag) 789 peek = LEXERR; 790 else { 791 if (lexflags & LEXFLAGS_COMMENTS_KEEP) { 792 *bptr = '\0'; 793 if (!lexstop) 794 hungetc(c); 795 peek = STRING; 796 } else { 797 hwend(); 798 hwbegin(0); 799 hwaddc('\n'); 800 addtoline('\n'); 801 /* 802 * If splitting a line and removing comments, 803 * we don't want a newline token since it's 804 * treated specially. 805 */ 806 if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop) 807 peek = ENDINPUT; 808 else 809 peek = NEWLIN; 810 } 811 } 812 return peek; 813 } 814 switch (lexact1[STOUC(c)]) { 815 case LX1_BKSLASH: 816 d = hgetc(); 817 if (d == '\n') 818 goto beginning; 819 hungetc(d); 820 lexstop = 0; 821 break; 822 case LX1_NEWLIN: 823 return NEWLIN; 824 case LX1_SEMI: 825 d = hgetc(); 826 if(d == ';') 827 return DSEMI; 828 else if(d == '&') 829 return SEMIAMP; 830 else if (d == '|') 831 return SEMIBAR; 832 hungetc(d); 833 lexstop = 0; 834 return SEMI; 835 case LX1_AMPER: 836 d = hgetc(); 837 if (d == '&') 838 return DAMPER; 839 else if (d == '!' || d == '|') 840 return AMPERBANG; 841 else if (d == '>') { 842 tokfd = peekfd; 843 d = hgetc(); 844 if (d == '!' || d == '|') 845 return OUTANGAMPBANG; 846 else if (d == '>') { 847 d = hgetc(); 848 if (d == '!' || d == '|') 849 return DOUTANGAMPBANG; 850 hungetc(d); 851 lexstop = 0; 852 return DOUTANGAMP; 853 } 854 hungetc(d); 855 lexstop = 0; 856 return AMPOUTANG; 857 } 858 hungetc(d); 859 lexstop = 0; 860 return AMPER; 861 case LX1_BAR: 862 d = hgetc(); 863 if (d == '|') 864 return DBAR; 865 else if (d == '&') 866 return BARAMP; 867 hungetc(d); 868 lexstop = 0; 869 return BAR; 870 case LX1_INPAR: 871 d = hgetc(); 872 if (d == '(') { 873 if (infor) { 874 dbparens = 1; 875 return DINPAR; 876 } 877 if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) { 878 len = 0; 879 bptr = tokstr = (char *) hcalloc(bsiz = 32); 880 switch (cmd_or_math(CS_MATH)) { 881 case 1: 882 return DINPAR; 883 884 case 0: 885 /* 886 * Not math, so we don't return the contents 887 * as a string in this case. 888 */ 889 tokstr = NULL; 890 return INPAR; 891 892 default: 893 return LEXERR; 894 } 895 } 896 } else if (d == ')') 897 return INOUTPAR; 898 hungetc(d); 899 lexstop = 0; 900 if (!(incond == 1 || incmdpos)) 901 break; 902 return INPAR; 903 case LX1_OUTPAR: 904 return OUTPAR; 905 case LX1_INANG: 906 d = hgetc(); 907 if (d == '(') { 908 hungetc(d); 909 lexstop = 0; 910 unpeekfd: 911 if(peekfd != -1) { 912 hungetc(c); 913 c = '0' + peekfd; 914 } 915 break; 916 } 917 if (d == '>') { 918 peek = INOUTANG; 919 } else if (d == '<') { 920 int e = hgetc(); 921 922 if (e == '(') { 923 hungetc(e); 924 hungetc(d); 925 peek = INANG; 926 } else if (e == '<') 927 peek = TRINANG; 928 else if (e == '-') 929 peek = DINANGDASH; 930 else { 931 hungetc(e); 932 lexstop = 0; 933 peek = DINANG; 934 } 935 } else if (d == '&') { 936 peek = INANGAMP; 937 } else { 938 hungetc(d); 939 if(isnumglob()) 940 goto unpeekfd; 941 peek = INANG; 942 } 943 tokfd = peekfd; 944 return peek; 945 case LX1_OUTANG: 946 d = hgetc(); 947 if (d == '(') { 948 hungetc(d); 949 goto unpeekfd; 950 } else if (d == '&') { 951 d = hgetc(); 952 if (d == '!' || d == '|') 953 peek = OUTANGAMPBANG; 954 else { 955 hungetc(d); 956 lexstop = 0; 957 peek = OUTANGAMP; 958 } 959 } else if (d == '!' || d == '|') 960 peek = OUTANGBANG; 961 else if (d == '>') { 962 d = hgetc(); 963 if (d == '&') { 964 d = hgetc(); 965 if (d == '!' || d == '|') 966 peek = DOUTANGAMPBANG; 967 else { 968 hungetc(d); 969 lexstop = 0; 970 peek = DOUTANGAMP; 971 } 972 } else if (d == '!' || d == '|') 973 peek = DOUTANGBANG; 974 else if (d == '(') { 975 hungetc(d); 976 hungetc('>'); 977 peek = OUTANG; 978 } else { 979 hungetc(d); 980 lexstop = 0; 981 peek = DOUTANG; 982 if (isset(HISTALLOWCLOBBER)) 983 hwaddc('|'); 984 } 985 } else { 986 hungetc(d); 987 lexstop = 0; 988 peek = OUTANG; 989 if (!incond && isset(HISTALLOWCLOBBER)) 990 hwaddc('|'); 991 } 992 tokfd = peekfd; 993 return peek; 994 } 995 996 /* we've started a string, now get the * 997 * rest of it, performing tokenization */ 998 return gettokstr(c, 0); 999} 1000 1001/* 1002 * Get the remains of a token string. This has two uses. 1003 * When called from gettok(), with sub = 0, we have already identified 1004 * any interesting initial character and want to get the rest of 1005 * what we now know is a string. However, the string may still include 1006 * metacharacters and potentially substitutions. 1007 * 1008 * When called from parse_subst_string() with sub = 1, we are not 1009 * fully parsing a command line, merely tokenizing a string. 1010 * In this case we always add characters to the parsed string 1011 * unless there is a parse error. 1012 */ 1013 1014/**/ 1015static enum lextok 1016gettokstr(int c, int sub) 1017{ 1018 int bct = 0, pct = 0, brct = 0, fdpar = 0; 1019 int intpos = 1, in_brace_param = 0; 1020 int inquote, unmatched = 0; 1021 enum lextok peek; 1022#ifdef DEBUG 1023 int ocmdsp = cmdsp; 1024#endif 1025 1026 peek = STRING; 1027 if (!sub) { 1028 len = 0; 1029 bptr = tokstr = (char *) hcalloc(bsiz = 32); 1030 } 1031 for (;;) { 1032 int act; 1033 int e; 1034 int inbl = inblank(c); 1035 1036 if (fdpar && !inbl && c != ')') 1037 fdpar = 0; 1038 1039 if (inbl && !in_brace_param && !pct) 1040 act = LX2_BREAK; 1041 else { 1042 act = lexact2[STOUC(c)]; 1043 c = lextok2[STOUC(c)]; 1044 } 1045 switch (act) { 1046 case LX2_BREAK: 1047 if (!in_brace_param && !sub) 1048 goto brk; 1049 break; 1050 case LX2_META: 1051 c = hgetc(); 1052#ifdef DEBUG 1053 if (lexstop) { 1054 fputs("BUG: input terminated by Meta\n", stderr); 1055 fflush(stderr); 1056 goto brk; 1057 } 1058#endif 1059 add(Meta); 1060 break; 1061 case LX2_OUTPAR: 1062 if (fdpar) { 1063 /* this is a single word `( )', treat as INOUTPAR */ 1064 add(c); 1065 *bptr = '\0'; 1066 return INOUTPAR; 1067 } 1068 if ((sub || in_brace_param) && isset(SHGLOB)) 1069 break; 1070 if (!in_brace_param && !pct--) { 1071 if (sub) { 1072 pct = 0; 1073 break; 1074 } else 1075 goto brk; 1076 } 1077 c = Outpar; 1078 break; 1079 case LX2_BAR: 1080 if (!pct && !in_brace_param) { 1081 if (sub) 1082 break; 1083 else 1084 goto brk; 1085 } 1086 if (unset(SHGLOB) || (!sub && !in_brace_param)) 1087 c = Bar; 1088 break; 1089 case LX2_STRING: 1090 e = hgetc(); 1091 if (e == '[') { 1092 cmdpush(CS_MATHSUBST); 1093 add(String); 1094 add(Inbrack); 1095 c = dquote_parse(']', sub); 1096 cmdpop(); 1097 if (c) { 1098 peek = LEXERR; 1099 goto brk; 1100 } 1101 c = Outbrack; 1102 } else if (e == '(') { 1103 add(String); 1104 c = cmd_or_math_sub(); 1105 if (c) { 1106 peek = LEXERR; 1107 goto brk; 1108 } 1109 c = Outpar; 1110 } else { 1111 if (e == '{') { 1112 add(c); 1113 c = Inbrace; 1114 ++bct; 1115 cmdpush(CS_BRACEPAR); 1116 if (!in_brace_param) 1117 in_brace_param = bct; 1118 } else { 1119 hungetc(e); 1120 lexstop = 0; 1121 } 1122 } 1123 break; 1124 case LX2_INBRACK: 1125 if (!in_brace_param) 1126 brct++; 1127 c = Inbrack; 1128 break; 1129 case LX2_OUTBRACK: 1130 if (!in_brace_param) 1131 brct--; 1132 if (brct < 0) 1133 brct = 0; 1134 c = Outbrack; 1135 break; 1136 case LX2_INPAR: 1137 if (isset(SHGLOB)) { 1138 if (sub || in_brace_param) 1139 break; 1140 if (incasepat && !len) 1141 return INPAR; 1142 if (!isset(KSHGLOB) && len) 1143 goto brk; 1144 } 1145 if (!in_brace_param) { 1146 if (!sub) { 1147 e = hgetc(); 1148 hungetc(e); 1149 lexstop = 0; 1150 /* For command words, parentheses are only 1151 * special at the start. But now we're tokenising 1152 * the remaining string. So I don't see what 1153 * the old incmdpos test here is for. 1154 * pws 1999/6/8 1155 * 1156 * Oh, no. 1157 * func1( ) 1158 * is a valid function definition in [k]sh. The best 1159 * thing we can do, without really nasty lookahead tricks, 1160 * is break if we find a blank after a parenthesis. At 1161 * least this can't happen inside braces or brackets. We 1162 * only allow this with SHGLOB (set for both sh and ksh). 1163 * 1164 * Things like `print @( |foo)' should still 1165 * work, because [k]sh don't allow multiple words 1166 * in a function definition, so we only do this 1167 * in command position. 1168 * pws 1999/6/14 1169 */ 1170 if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct && 1171 !brct && !intpos && incmdpos)) { 1172 /* 1173 * Either a () token, or a command word with 1174 * something suspiciously like a ksh function 1175 * definition. 1176 * The current word isn't spellcheckable. 1177 */ 1178 nocorrect |= 2; 1179 goto brk; 1180 } 1181 } 1182 /* 1183 * This also handles the [k]sh `foo( )' function definition. 1184 * Maintain a variable fdpar, set as long as a single set of 1185 * parentheses contains only space. Then if we get to the 1186 * closing parenthesis and it is still set, we can assume we 1187 * have a function definition. Only do this at the start of 1188 * the word, since the (...) must be a separate token. 1189 */ 1190 if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct) 1191 fdpar = 1; 1192 } 1193 c = Inpar; 1194 break; 1195 case LX2_INBRACE: 1196 if (isset(IGNOREBRACES) || sub) 1197 c = '{'; 1198 else { 1199 if (!len && incmdpos) { 1200 add('{'); 1201 *bptr = '\0'; 1202 return STRING; 1203 } 1204 if (in_brace_param) { 1205 cmdpush(CS_BRACE); 1206 } 1207 bct++; 1208 } 1209 break; 1210 case LX2_OUTBRACE: 1211 if ((isset(IGNOREBRACES) || sub) && !in_brace_param) 1212 break; 1213 if (!bct) 1214 break; 1215 if (in_brace_param) { 1216 cmdpop(); 1217 } 1218 if (bct-- == in_brace_param) 1219 in_brace_param = 0; 1220 c = Outbrace; 1221 break; 1222 case LX2_COMMA: 1223 if (unset(IGNOREBRACES) && !sub && bct > in_brace_param) 1224 c = Comma; 1225 break; 1226 case LX2_OUTANG: 1227 if (in_brace_param || sub) 1228 break; 1229 e = hgetc(); 1230 if (e != '(') { 1231 hungetc(e); 1232 lexstop = 0; 1233 goto brk; 1234 } 1235 add(OutangProc); 1236 if (skipcomm()) { 1237 peek = LEXERR; 1238 goto brk; 1239 } 1240 c = Outpar; 1241 break; 1242 case LX2_INANG: 1243 if (isset(SHGLOB) && sub) 1244 break; 1245 e = hgetc(); 1246 if (!(in_brace_param || sub) && e == '(') { 1247 add(Inang); 1248 if (skipcomm()) { 1249 peek = LEXERR; 1250 goto brk; 1251 } 1252 c = Outpar; 1253 break; 1254 } 1255 hungetc(e); 1256 if(isnumglob()) { 1257 add(Inang); 1258 while ((c = hgetc()) != '>') 1259 add(c); 1260 c = Outang; 1261 break; 1262 } 1263 lexstop = 0; 1264 if (in_brace_param || sub) 1265 break; 1266 goto brk; 1267 case LX2_EQUALS: 1268 if (!sub) { 1269 if (intpos) { 1270 e = hgetc(); 1271 if (e != '(') { 1272 hungetc(e); 1273 lexstop = 0; 1274 c = Equals; 1275 } else { 1276 add(Equals); 1277 if (skipcomm()) { 1278 peek = LEXERR; 1279 goto brk; 1280 } 1281 c = Outpar; 1282 } 1283 } else if (peek != ENVSTRING && 1284 incmdpos && !bct && !brct) { 1285 char *t = tokstr; 1286 if (idigit(*t)) 1287 while (++t < bptr && idigit(*t)); 1288 else { 1289 int sav = *bptr; 1290 *bptr = '\0'; 1291 t = itype_end(t, IIDENT, 0); 1292 if (t < bptr) { 1293 skipparens(Inbrack, Outbrack, &t); 1294 } else { 1295 *bptr = sav; 1296 } 1297 } 1298 if (*t == '+') 1299 t++; 1300 if (t == bptr) { 1301 e = hgetc(); 1302 if (e == '(' && incmdpos) { 1303 *bptr = '\0'; 1304 return ENVARRAY; 1305 } 1306 hungetc(e); 1307 lexstop = 0; 1308 peek = ENVSTRING; 1309 intpos = 2; 1310 } else 1311 c = Equals; 1312 } else 1313 c = Equals; 1314 } 1315 break; 1316 case LX2_BKSLASH: 1317 c = hgetc(); 1318 if (c == '\n') { 1319 c = hgetc(); 1320 if (!lexstop) 1321 continue; 1322 } else 1323 add(Bnull); 1324 if (lexstop) 1325 goto brk; 1326 break; 1327 case LX2_QUOTE: { 1328 int strquote = (len && bptr[-1] == String); 1329 1330 add(Snull); 1331 cmdpush(CS_QUOTE); 1332 for (;;) { 1333 STOPHIST 1334 while ((c = hgetc()) != '\'' && !lexstop) { 1335 if (strquote && c == '\\') { 1336 c = hgetc(); 1337 if (lexstop) 1338 break; 1339 /* 1340 * Mostly we don't need to do anything special 1341 * with escape backslashes or closing quotes 1342 * inside $'...'; however in completion we 1343 * need to be able to strip multiple backslashes 1344 * neatly. 1345 */ 1346 if (c == '\\' || c == '\'') 1347 add(Bnull); 1348 else 1349 add('\\'); 1350 } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { 1351 if (bptr[-1] == '\\') 1352 bptr--, len--; 1353 else 1354 break; 1355 } 1356 add(c); 1357 } 1358 ALLOWHIST 1359 if (c != '\'') { 1360 unmatched = '\''; 1361 peek = LEXERR; 1362 cmdpop(); 1363 goto brk; 1364 } 1365 e = hgetc(); 1366 if (e != '\'' || unset(RCQUOTES) || strquote) 1367 break; 1368 add(c); 1369 } 1370 cmdpop(); 1371 hungetc(e); 1372 lexstop = 0; 1373 c = Snull; 1374 break; 1375 } 1376 case LX2_DQUOTE: 1377 add(Dnull); 1378 cmdpush(CS_DQUOTE); 1379 c = dquote_parse('"', sub); 1380 cmdpop(); 1381 if (c) { 1382 unmatched = '"'; 1383 peek = LEXERR; 1384 goto brk; 1385 } 1386 c = Dnull; 1387 break; 1388 case LX2_BQUOTE: 1389 add(Tick); 1390 cmdpush(CS_BQUOTE); 1391 SETPARBEGIN 1392 inquote = 0; 1393 while ((c = hgetc()) != '`' && !lexstop) { 1394 if (c == '\\') { 1395 c = hgetc(); 1396 if (c != '\n') { 1397 add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\'); 1398 add(c); 1399 } 1400 else if (!sub && isset(CSHJUNKIEQUOTES)) 1401 add(c); 1402 } else { 1403 if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { 1404 break; 1405 } 1406 add(c); 1407 if (c == '\'') { 1408 if ((inquote = !inquote)) 1409 STOPHIST 1410 else 1411 ALLOWHIST 1412 } 1413 } 1414 } 1415 if (inquote) 1416 ALLOWHIST 1417 cmdpop(); 1418 if (c != '`') { 1419 unmatched = '`'; 1420 peek = LEXERR; 1421 goto brk; 1422 } 1423 c = Tick; 1424 SETPAREND 1425 break; 1426 } 1427 add(c); 1428 c = hgetc(); 1429 if (intpos) 1430 intpos--; 1431 if (lexstop) 1432 break; 1433 } 1434 brk: 1435 hungetc(c); 1436 if (unmatched) 1437 zerr("unmatched %c", unmatched); 1438 if (in_brace_param) { 1439 while(bct-- >= in_brace_param) 1440 cmdpop(); 1441 zerr("closing brace expected"); 1442 } else if (unset(IGNOREBRACES) && !sub && len > 1 && 1443 peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) { 1444 /* hack to get {foo} command syntax work */ 1445 bptr--; 1446 len--; 1447 lexstop = 0; 1448 hungetc('}'); 1449 } 1450 *bptr = '\0'; 1451 DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed."); 1452 return peek; 1453} 1454 1455 1456/* 1457 * Parse input as if in double quotes. 1458 * endchar is the end character to expect. 1459 * sub has got something to do with whether we are doing quoted substitution. 1460 * Return non-zero for error (character to unget), else zero 1461 */ 1462 1463/**/ 1464static int 1465dquote_parse(char endchar, int sub) 1466{ 1467 int pct = 0, brct = 0, bct = 0, intick = 0, err = 0; 1468 int c; 1469 int math = endchar == ')' || endchar == ']'; 1470 int zlemath = math && zlemetacs > zlemetall + addedx - inbufct; 1471 1472 while (((c = hgetc()) != endchar || bct || 1473 (math && ((pct > 0) || (brct > 0))) || 1474 intick) && !lexstop) { 1475 cont: 1476 switch (c) { 1477 case '\\': 1478 c = hgetc(); 1479 if (c != '\n') { 1480 if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || 1481 c == endchar || c == '`' || 1482 (endchar == ']' && (c == '[' || c == ']' || 1483 c == '(' || c == ')' || 1484 c == '{' || c == '}' || 1485 (c == '"' && sub)))) 1486 add(Bnull); 1487 else { 1488 /* lexstop is implicitly handled here */ 1489 add('\\'); 1490 goto cont; 1491 } 1492 } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"') 1493 continue; 1494 break; 1495 case '\n': 1496 err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"'; 1497 break; 1498 case '$': 1499 if (intick) 1500 break; 1501 c = hgetc(); 1502 if (c == '(') { 1503 add(Qstring); 1504 err = cmd_or_math_sub(); 1505 c = Outpar; 1506 } else if (c == '[') { 1507 add(String); 1508 add(Inbrack); 1509 cmdpush(CS_MATHSUBST); 1510 err = dquote_parse(']', sub); 1511 cmdpop(); 1512 c = Outbrack; 1513 } else if (c == '{') { 1514 add(Qstring); 1515 c = Inbrace; 1516 cmdpush(CS_BRACEPAR); 1517 bct++; 1518 } else if (c == '$') 1519 add(Qstring); 1520 else { 1521 hungetc(c); 1522 lexstop = 0; 1523 c = Qstring; 1524 } 1525 break; 1526 case '}': 1527 if (intick || !bct) 1528 break; 1529 c = Outbrace; 1530 bct--; 1531 cmdpop(); 1532 break; 1533 case '`': 1534 c = Qtick; 1535 if (intick == 2) 1536 ALLOWHIST 1537 if ((intick = !intick)) { 1538 SETPARBEGIN 1539 cmdpush(CS_BQUOTE); 1540 } else { 1541 SETPAREND 1542 cmdpop(); 1543 } 1544 break; 1545 case '\'': 1546 if (!intick) 1547 break; 1548 if (intick == 1) 1549 intick = 2, STOPHIST 1550 else 1551 intick = 1, ALLOWHIST 1552 break; 1553 case '(': 1554 if (!math || !bct) 1555 pct++; 1556 break; 1557 case ')': 1558 if (!math || !bct) 1559 err = (!pct-- && math); 1560 break; 1561 case '[': 1562 if (!math || !bct) 1563 brct++; 1564 break; 1565 case ']': 1566 if (!math || !bct) 1567 err = (!brct-- && math); 1568 break; 1569 case '"': 1570 if (intick || (endchar != '"' && !bct)) 1571 break; 1572 if (bct) { 1573 add(Dnull); 1574 cmdpush(CS_DQUOTE); 1575 err = dquote_parse('"', sub); 1576 cmdpop(); 1577 c = Dnull; 1578 } else 1579 err = 1; 1580 break; 1581 } 1582 if (err || lexstop) 1583 break; 1584 add(c); 1585 } 1586 if (intick == 2) 1587 ALLOWHIST 1588 if (intick) { 1589 cmdpop(); 1590 } 1591 while (bct--) 1592 cmdpop(); 1593 if (lexstop) 1594 err = intick || endchar || err; 1595 else if (err == 1) { 1596 /* 1597 * TODO: as far as I can see, this hack is used in gettokstr() 1598 * to hungetc() a character on an error. However, I don't 1599 * understand what that actually gets us, and we can't guarantee 1600 * it's a character anyway, because of the previous test. 1601 * 1602 * We use the same feature in cmd_or_math where we actually do 1603 * need to unget if we decide it's really a command substitution. 1604 * We try to handle the other case by testing for lexstop. 1605 */ 1606 err = c; 1607 } 1608 if (zlemath && zlemetacs <= zlemetall + 1 - inbufct) 1609 inwhat = IN_MATH; 1610 return err; 1611} 1612 1613/* Tokenize a string given in s. Parsing is done as in double * 1614 * quotes. This is usually called before singsub(). */ 1615 1616/**/ 1617mod_export int 1618parsestr(char *s) 1619{ 1620 int err; 1621 1622 if ((err = parsestrnoerr(s))) { 1623 untokenize(s); 1624 if (err > 32 && err < 127) 1625 zerr("parse error near `%c'", err); 1626 else 1627 zerr("parse error"); 1628 } 1629 return err; 1630} 1631 1632/**/ 1633mod_export int 1634parsestrnoerr(char *s) 1635{ 1636 int l = strlen(s), err; 1637 1638 lexsave(); 1639 untokenize(s); 1640 inpush(dupstring(s), 0, NULL); 1641 strinbeg(0); 1642 len = 0; 1643 bptr = tokstr = s; 1644 bsiz = l + 1; 1645 err = dquote_parse('\0', 1); 1646 *bptr = '\0'; 1647 strinend(); 1648 inpop(); 1649 DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty."); 1650 lexrestore(); 1651 return err; 1652} 1653 1654/* 1655 * Parse a subscript in string s. 1656 * sub is passed down to dquote_parse(). 1657 * endchar is the final character. 1658 * Return the next character, or NULL. 1659 */ 1660/**/ 1661mod_export char * 1662parse_subscript(char *s, int sub, int endchar) 1663{ 1664 int l = strlen(s), err; 1665 char *t; 1666 1667 if (!*s || *s == endchar) 1668 return 0; 1669 lexsave(); 1670 untokenize(t = dupstring(s)); 1671 inpush(t, 0, NULL); 1672 strinbeg(0); 1673 len = 0; 1674 bptr = tokstr = s; 1675 bsiz = l + 1; 1676 err = dquote_parse(endchar, sub); 1677 if (err) { 1678 err = *bptr; 1679 *bptr = '\0'; 1680 untokenize(s); 1681 *bptr = err; 1682 s = NULL; 1683 } else { 1684 s = bptr; 1685 } 1686 strinend(); 1687 inpop(); 1688 DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); 1689 lexrestore(); 1690 return s; 1691} 1692 1693/* Tokenize a string given in s. Parsing is done as if s were a normal * 1694 * command-line argument but it may contain separators. This is used * 1695 * to parse the right-hand side of ${...%...} substitutions. */ 1696 1697/**/ 1698mod_export int 1699parse_subst_string(char *s) 1700{ 1701 int c, l = strlen(s), err; 1702 char *ptr; 1703 enum lextok ctok; 1704 1705 if (!*s || !strcmp(s, nulstring)) 1706 return 0; 1707 lexsave(); 1708 untokenize(s); 1709 inpush(dupstring(s), 0, NULL); 1710 strinbeg(0); 1711 len = 0; 1712 bptr = tokstr = s; 1713 bsiz = l + 1; 1714 c = hgetc(); 1715 ctok = gettokstr(c, 1); 1716 err = errflag; 1717 strinend(); 1718 inpop(); 1719 DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty."); 1720 lexrestore(); 1721 errflag = err; 1722 if (ctok == LEXERR) { 1723 untokenize(s); 1724 return 1; 1725 } 1726#ifdef DEBUG 1727 /* 1728 * Historical note: we used to check here for olen (the value of len 1729 * before lexrestore()) == l, but that's not necessarily the case if 1730 * we stripped an RCQUOTE. 1731 */ 1732 if (ctok != STRING || (errflag && !noerrs)) { 1733 fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n", 1734 errflag ? "errflag" : "ctok != STRING"); 1735 fflush(stderr); 1736 untokenize(s); 1737 return 1; 1738 } 1739#endif 1740 /* Check for $'...' quoting. This needs special handling. */ 1741 for (ptr = s; *ptr; ) 1742 { 1743 if (*ptr == String && ptr[1] == Snull) 1744 { 1745 char *t; 1746 int len, tlen, diff; 1747 t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL); 1748 len += 2; 1749 tlen = strlen(t); 1750 diff = len - tlen; 1751 /* 1752 * Yuk. 1753 * parse_subst_string() currently handles strings in-place. 1754 * That's not so easy to fix without knowing whether 1755 * additional memory should come off the heap or 1756 * otherwise. So we cheat by copying the unquoted string 1757 * into place, unless it's too long. That's not the 1758 * normal case, but I'm worried there are pathological 1759 * cases with converting metafied multibyte strings. 1760 * If someone can prove there aren't I will be very happy. 1761 */ 1762 if (diff < 0) { 1763 DPUTS(1, "$'...' subst too long: fix get_parse_string()"); 1764 return 1; 1765 } 1766 memcpy(ptr, t, tlen); 1767 ptr += tlen; 1768 if (diff > 0) { 1769 char *dptr = ptr; 1770 char *sptr = ptr + diff; 1771 while ((*dptr++ = *sptr++)) 1772 ; 1773 } 1774 } else 1775 ptr++; 1776 } 1777 return 0; 1778} 1779 1780/* Called below to report word positions. */ 1781 1782/**/ 1783mod_export void 1784gotword(void) 1785{ 1786 we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0); 1787 if (zlemetacs <= we) { 1788 wb = zlemetall - wordbeg + addedx; 1789 lexflags = 0; 1790 } 1791} 1792 1793/* expand aliases and reserved words */ 1794 1795/**/ 1796int 1797exalias(void) 1798{ 1799 Alias an; 1800 Reswd rw; 1801 1802 hwend(); 1803 if (interact && isset(SHINSTDIN) && !strin && !incasepat && 1804 tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) && 1805 (isset(CORRECTALL) || (isset(CORRECT) && incmdpos))) 1806 spckword(&tokstr, 1, incmdpos, 1); 1807 1808 if (!tokstr) { 1809 zshlextext = tokstrings[tok]; 1810 1811 return 0; 1812 } else { 1813 VARARR(char, copy, (strlen(tokstr) + 1)); 1814 1815 if (has_token(tokstr)) { 1816 char *p, *t; 1817 1818 zshlextext = p = copy; 1819 for (t = tokstr; 1820 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++);); 1821 } else 1822 zshlextext = tokstr; 1823 1824 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) { 1825 int zp = lexflags; 1826 1827 gotword(); 1828 if ((zp & LEXFLAGS_ZLE) && !lexflags) { 1829 if (zshlextext == copy) 1830 zshlextext = tokstr; 1831 return 0; 1832 } 1833 } 1834 1835 if (tok == STRING) { 1836 /* Check for an alias */ 1837 if (!noaliases && isset(ALIASESOPT) && 1838 (!isset(POSIXALIASES) || 1839 !reswdtab->getnode(reswdtab, zshlextext))) { 1840 char *suf; 1841 1842 an = (Alias) aliastab->getnode(aliastab, zshlextext); 1843 if (an && !an->inuse && 1844 ((an->node.flags & ALIAS_GLOBAL) || incmdpos || inalmore)) { 1845 inpush(an->text, INP_ALIAS, an); 1846 if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL)) 1847 aliasspaceflag = 1; 1848 lexstop = 0; 1849 if (zshlextext == copy) 1850 zshlextext = tokstr; 1851 return 1; 1852 } 1853 if ((suf = strrchr(zshlextext, '.')) && suf[1] && 1854 suf > zshlextext && suf[-1] != Meta && 1855 (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) && 1856 !an->inuse && incmdpos) { 1857 inpush(dupstring(zshlextext), INP_ALIAS, NULL); 1858 inpush(" ", INP_ALIAS, NULL); 1859 inpush(an->text, INP_ALIAS, an); 1860 lexstop = 0; 1861 if (zshlextext == copy) 1862 zshlextext = tokstr; 1863 return 1; 1864 } 1865 } 1866 1867 /* Then check for a reserved word */ 1868 if ((incmdpos || 1869 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) && 1870 zshlextext[0] == '}' && !zshlextext[1])) && 1871 (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) { 1872 tok = rw->token; 1873 if (tok == DINBRACK) 1874 incond = 1; 1875 } else if (incond && !strcmp(zshlextext, "]]")) { 1876 tok = DOUTBRACK; 1877 incond = 0; 1878 } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1]) 1879 tok = BANG; 1880 } 1881 inalmore = 0; 1882 if (zshlextext == copy) 1883 zshlextext = tokstr; 1884 } 1885 return 0; 1886} 1887 1888/* skip (...) */ 1889 1890/**/ 1891static int 1892skipcomm(void) 1893{ 1894 int pct = 1, c, start = 1; 1895 1896 cmdpush(CS_CMDSUBST); 1897 SETPARBEGIN 1898 c = Inpar; 1899 do { 1900 int iswhite; 1901 add(c); 1902 c = hgetc(); 1903 if (itok(c) || lexstop) 1904 break; 1905 iswhite = inblank(c); 1906 switch (c) { 1907 case '(': 1908 pct++; 1909 break; 1910 case ')': 1911 pct--; 1912 break; 1913 case '\\': 1914 add(c); 1915 c = hgetc(); 1916 break; 1917 case '\'': { 1918 int strquote = bptr[-1] == '$'; 1919 add(c); 1920 STOPHIST 1921 while ((c = hgetc()) != '\'' && !lexstop) { 1922 if (c == '\\' && strquote) { 1923 add(c); 1924 c = hgetc(); 1925 } 1926 add(c); 1927 } 1928 ALLOWHIST 1929 break; 1930 } 1931 case '\"': 1932 add(c); 1933 while ((c = hgetc()) != '\"' && !lexstop) 1934 if (c == '\\') { 1935 add(c); 1936 add(hgetc()); 1937 } else 1938 add(c); 1939 break; 1940 case '`': 1941 add(c); 1942 while ((c = hgetc()) != '`' && !lexstop) 1943 if (c == '\\') 1944 add(c), add(hgetc()); 1945 else 1946 add(c); 1947 break; 1948 case '#': 1949 if (start) { 1950 add(c); 1951 while ((c = hgetc()) != '\n' && !lexstop) 1952 add(c); 1953 iswhite = 1; 1954 } 1955 break; 1956 } 1957 start = iswhite; 1958 } 1959 while (pct); 1960 if (!lexstop) 1961 SETPAREND 1962 cmdpop(); 1963 return lexstop; 1964} 1965