1/*
2 * lex.c - lexical analysis
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "zsh.mdh"
31#include "lex.pro"
32
33/* tokens */
34
35/**/
36mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\";
37
38/* parts of the current token */
39
40/**/
41char *zshlextext;
42/**/
43mod_export char *tokstr;
44/**/
45mod_export enum lextok tok;
46/**/
47mod_export int tokfd;
48
49/*
50 * Line number at which the first character of a token was found.
51 * We always set this in gettok(), which is always called from
52 * zshlex() unless we have reached an error.  So it is always
53 * valid when parsing.  It is not useful during execution
54 * of the parsed structure.
55 */
56
57/**/
58zlong toklineno;
59
60/* lexical analyzer error flag */
61
62/**/
63mod_export int lexstop;
64
65/* if != 0, this is the first line of the command */
66
67/**/
68mod_export int isfirstln;
69
70/* if != 0, this is the first char of the command (not including white space) */
71
72/**/
73int isfirstch;
74
75/* flag that an alias should be expanded after expansion ending in space */
76
77/**/
78int inalmore;
79
80/*
81 * Don't do spelling correction.
82 * Bit 1 is only valid for the current word.  It's
83 * set when we detect a lookahead that stops the word from
84 * needing correction.
85 */
86
87/**/
88int nocorrect;
89
90/*
91 * Cursor position and line length in zle when the line is
92 * metafied for access from the main shell.
93 */
94
95/**/
96mod_export int zlemetacs, zlemetall;
97
98/* inwhat says what exactly we are in     *
99 * (its value is one of the IN_* things). */
100
101/**/
102mod_export int inwhat;
103
104/* 1 if x added to complete in a blank between words */
105
106/**/
107mod_export int addedx;
108
109/* wb and we hold the beginning/end position of the word we are completing. */
110
111/**/
112mod_export int wb, we;
113
114/* 1 if aliases should not be expanded */
115
116/**/
117mod_export int noaliases;
118
119/*
120 * If non-zero, we are parsing a line sent to use by the editor, or some
121 * other string that's not part of standard command input (e.g. eval is
122 * part of normal command input).
123 *
124 * Set of bits from LEXFLAGS_*.
125 *
126 * Note that although it is passed into the lexer as an input, the
127 * lexer can set it to zero after finding the word it's searching for.
128 * This only happens if the line being parsed actually does come from
129 * ZLE, and hence the bit LEXFLAGS_ZLE is set.
130 */
131
132/**/
133mod_export int lexflags;
134
135/**/
136mod_export int wordbeg;
137
138/**/
139mod_export int parbegin;
140
141/**/
142mod_export int parend;
143
144/* don't recognize comments */
145
146/**/
147mod_export int nocomments;
148
149/* text of punctuation tokens */
150
151/**/
152mod_export char *tokstrings[WHILE + 1] = {
153    NULL,	/* NULLTOK	  0  */
154    ";",	/* SEPER	     */
155    "\\n",	/* NEWLIN	     */
156    ";",	/* SEMI		     */
157    ";;",	/* DSEMI	     */
158    "&",	/* AMPER	  5  */
159    "(",	/* INPAR	     */
160    ")",	/* OUTPAR	     */
161    "||",	/* DBAR		     */
162    "&&",	/* DAMPER	     */
163    ">",	/* OUTANG	  10 */
164    ">|",	/* OUTANGBANG	     */
165    ">>",	/* DOUTANG	     */
166    ">>|",	/* DOUTANGBANG	     */
167    "<",	/* INANG	     */
168    "<>",	/* INOUTANG	  15 */
169    "<<",	/* DINANG	     */
170    "<<-",	/* DINANGDASH	     */
171    "<&",	/* INANGAMP	     */
172    ">&",	/* OUTANGAMP	     */
173    "&>",	/* AMPOUTANG	  20 */
174    "&>|",	/* OUTANGAMPBANG     */
175    ">>&",	/* DOUTANGAMP	     */
176    ">>&|",	/* DOUTANGAMPBANG    */
177    "<<<",	/* TRINANG	     */
178    "|",	/* BAR		  25 */
179    "|&",	/* BARAMP	     */
180    "()",	/* INOUTPAR	     */
181    "((",	/* DINPAR	     */
182    "))",	/* DOUTPAR	     */
183    "&|",	/* AMPERBANG	  30 */
184    ";&",	/* SEMIAMP	     */
185    ";|",	/* SEMIBAR	     */
186};
187
188/* lexical state */
189
190static int dbparens;
191static int len = 0, bsiz = 256;
192static char *bptr;
193
194struct lexstack {
195    struct lexstack *next;
196
197    int incmdpos;
198    int incond;
199    int incasepat;
200    int dbparens;
201    int isfirstln;
202    int isfirstch;
203    int histactive;
204    int histdone;
205    int lexflags;
206    int stophist;
207    int hlinesz;
208    char *hline;
209    char *hptr;
210    enum lextok tok;
211    int isnewlin;
212    char *tokstr;
213    char *zshlextext;
214    char *bptr;
215    int bsiz;
216    int len;
217    short *chwords;
218    int chwordlen;
219    int chwordpos;
220    int hwgetword;
221    int lexstop;
222    struct heredocs *hdocs;
223    int (*hgetc) _((void));
224    void (*hungetc) _((int));
225    void (*hwaddc) _((int));
226    void (*hwbegin) _((int));
227    void (*hwend) _((void));
228    void (*addtoline) _((int));
229
230    int eclen, ecused, ecnpats;
231    Wordcode ecbuf;
232    Eccstr ecstrs;
233    int ecsoffs, ecssub, ecnfunc;
234
235    unsigned char *cstack;
236    int csp;
237    zlong toklineno;
238};
239
240static struct lexstack *lstack = NULL;
241
242/* save the lexical state */
243
244/* is this a hack or what? */
245
246/**/
247mod_export void
248lexsave(void)
249{
250    struct lexstack *ls;
251
252    ls = (struct lexstack *)malloc(sizeof(struct lexstack));
253
254    ls->incmdpos = incmdpos;
255    ls->incond = incond;
256    ls->incasepat = incasepat;
257    ls->dbparens = dbparens;
258    ls->isfirstln = isfirstln;
259    ls->isfirstch = isfirstch;
260    ls->histactive = histactive;
261    ls->histdone = histdone;
262    ls->lexflags = lexflags;
263    ls->stophist = stophist;
264    stophist = 0;
265    if (!lstack) {
266	/* top level, make this version visible to ZLE */
267	zle_chline = chline;
268	/* ensure line stored is NULL-terminated */
269	if (hptr)
270	    *hptr = '\0';
271    }
272    ls->hline = chline;
273    chline = NULL;
274    ls->hptr = hptr;
275    hptr = NULL;
276    ls->hlinesz = hlinesz;
277    ls->cstack = cmdstack;
278    ls->csp = cmdsp;
279    cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
280    ls->tok = tok;
281    ls->isnewlin = isnewlin;
282    ls->tokstr = tokstr;
283    ls->zshlextext = zshlextext;
284    ls->bptr = bptr;
285    tokstr = zshlextext = bptr = NULL;
286    ls->bsiz = bsiz;
287    bsiz = 256;
288    ls->len = len;
289    ls->chwords = chwords;
290    ls->chwordlen = chwordlen;
291    ls->chwordpos = chwordpos;
292    ls->hwgetword = hwgetword;
293    ls->lexstop = lexstop;
294    ls->hdocs = hdocs;
295    ls->hgetc = hgetc;
296    ls->hungetc = hungetc;
297    ls->hwaddc = hwaddc;
298    ls->hwbegin = hwbegin;
299    ls->hwend = hwend;
300    ls->addtoline = addtoline;
301    ls->eclen = eclen;
302    ls->ecused = ecused;
303    ls->ecnpats = ecnpats;
304    ls->ecbuf = ecbuf;
305    ls->ecstrs = ecstrs;
306    ls->ecsoffs = ecsoffs;
307    ls->ecssub = ecssub;
308    ls->ecnfunc = ecnfunc;
309    ls->toklineno = toklineno;
310    cmdsp = 0;
311    inredir = 0;
312    hdocs = NULL;
313    histactive = 0;
314    ecbuf = NULL;
315
316    ls->next = lstack;
317    lstack = ls;
318}
319
320/* restore lexical state */
321
322/**/
323mod_export void
324lexrestore(void)
325{
326    struct lexstack *ln;
327
328    DPUTS(!lstack, "BUG: lexrestore() without lexsave()");
329    incmdpos = lstack->incmdpos;
330    incond = lstack->incond;
331    incasepat = lstack->incasepat;
332    dbparens = lstack->dbparens;
333    isfirstln = lstack->isfirstln;
334    isfirstch = lstack->isfirstch;
335    histactive = lstack->histactive;
336    histdone = lstack->histdone;
337    lexflags = lstack->lexflags;
338    stophist = lstack->stophist;
339    chline = lstack->hline;
340    hptr = lstack->hptr;
341    if (cmdstack)
342	free(cmdstack);
343    cmdstack = lstack->cstack;
344    cmdsp = lstack->csp;
345    tok = lstack->tok;
346    isnewlin = lstack->isnewlin;
347    tokstr = lstack->tokstr;
348    zshlextext = lstack->zshlextext;
349    bptr = lstack->bptr;
350    bsiz = lstack->bsiz;
351    len = lstack->len;
352    chwords = lstack->chwords;
353    chwordlen = lstack->chwordlen;
354    chwordpos = lstack->chwordpos;
355    hwgetword = lstack->hwgetword;
356    lexstop = lstack->lexstop;
357    hdocs = lstack->hdocs;
358    hgetc = lstack->hgetc;
359    hungetc = lstack->hungetc;
360    hwaddc = lstack->hwaddc;
361    hwbegin = lstack->hwbegin;
362    hwend = lstack->hwend;
363    addtoline = lstack->addtoline;
364    if (ecbuf)
365	zfree(ecbuf, eclen);
366    eclen = lstack->eclen;
367    ecused = lstack->ecused;
368    ecnpats = lstack->ecnpats;
369    ecbuf = lstack->ecbuf;
370    ecstrs = lstack->ecstrs;
371    ecsoffs = lstack->ecsoffs;
372    ecssub = lstack->ecssub;
373    ecnfunc = lstack->ecnfunc;
374    hlinesz = lstack->hlinesz;
375    toklineno = lstack->toklineno;
376    errflag = 0;
377
378    ln = lstack->next;
379    if (!ln) {
380	/* Back to top level: don't need special ZLE value */
381	DPUTS(chline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
382	zle_chline = NULL;
383    }
384    free(lstack);
385    lstack = ln;
386}
387
388/**/
389void
390zshlex(void)
391{
392    if (tok == LEXERR)
393	return;
394    do
395	tok = gettok();
396    while (tok != ENDINPUT && exalias());
397    nocorrect &= 1;
398    if (tok == NEWLIN || tok == ENDINPUT) {
399	while (hdocs) {
400	    struct heredocs *next = hdocs->next;
401	    char *doc, *munged_term;
402
403	    hwbegin(0);
404	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
405	    munged_term = dupstring(hdocs->str);
406	    STOPHIST
407	    doc = gethere(&munged_term, hdocs->type);
408	    ALLOWHIST
409	    cmdpop();
410	    hwend();
411	    if (!doc) {
412		zerr("here document too large");
413		while (hdocs) {
414		    next = hdocs->next;
415		    zfree(hdocs, sizeof(struct heredocs));
416		    hdocs = next;
417		}
418		tok = LEXERR;
419		break;
420	    }
421	    setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
422		       munged_term);
423	    zfree(hdocs, sizeof(struct heredocs));
424	    hdocs = next;
425	}
426    }
427    if (tok != NEWLIN)
428	isnewlin = 0;
429    else
430	isnewlin = (inbufct) ? -1 : 1;
431    if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
432	tok = SEPER;
433}
434
435/**/
436mod_export void
437ctxtlex(void)
438{
439    static int oldpos;
440
441    zshlex();
442    switch (tok) {
443    case SEPER:
444    case NEWLIN:
445    case SEMI:
446    case DSEMI:
447    case SEMIAMP:
448    case SEMIBAR:
449    case AMPER:
450    case AMPERBANG:
451    case INPAR:
452    case INBRACE:
453    case DBAR:
454    case DAMPER:
455    case BAR:
456    case BARAMP:
457    case INOUTPAR:
458    case DOLOOP:
459    case THEN:
460    case ELIF:
461    case ELSE:
462    case DOUTBRACK:
463	incmdpos = 1;
464	break;
465    case STRING:
466 /* case ENVSTRING: */
467    case ENVARRAY:
468    case OUTPAR:
469    case CASE:
470    case DINBRACK:
471	incmdpos = 0;
472	break;
473
474    default:
475	/* nothing to do, keep compiler happy */
476	break;
477    }
478    if (tok != DINPAR)
479	infor = tok == FOR ? 2 : 0;
480    if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
481	inredir = 1;
482	oldpos = incmdpos;
483	incmdpos = 0;
484    } else if (inredir) {
485	incmdpos = oldpos;
486	inredir = 0;
487    }
488}
489
490#define LX1_BKSLASH 0
491#define LX1_COMMENT 1
492#define LX1_NEWLIN 2
493#define LX1_SEMI 3
494#define LX1_AMPER 5
495#define LX1_BAR 6
496#define LX1_INPAR 7
497#define LX1_OUTPAR 8
498#define LX1_INANG 13
499#define LX1_OUTANG 14
500#define LX1_OTHER 15
501
502#define LX2_BREAK 0
503#define LX2_OUTPAR 1
504#define LX2_BAR 2
505#define LX2_STRING 3
506#define LX2_INBRACK 4
507#define LX2_OUTBRACK 5
508#define LX2_TILDE 6
509#define LX2_INPAR 7
510#define LX2_INBRACE 8
511#define LX2_OUTBRACE 9
512#define LX2_OUTANG 10
513#define LX2_INANG 11
514#define LX2_EQUALS 12
515#define LX2_BKSLASH 13
516#define LX2_QUOTE 14
517#define LX2_DQUOTE 15
518#define LX2_BQUOTE 16
519#define LX2_COMMA 17
520#define LX2_OTHER 18
521#define LX2_META 19
522
523static unsigned char lexact1[256], lexact2[256], lextok2[256];
524
525/**/
526void
527initlextabs(void)
528{
529    int t0;
530    static char *lx1 = "\\q\n;!&|(){}[]<>";
531    static char *lx2 = ";)|$[]~({}><=\\\'\"`,";
532
533    for (t0 = 0; t0 != 256; t0++) {
534	lexact1[t0] = LX1_OTHER;
535	lexact2[t0] = LX2_OTHER;
536	lextok2[t0] = t0;
537    }
538    for (t0 = 0; lx1[t0]; t0++)
539	lexact1[(int)lx1[t0]] = t0;
540    for (t0 = 0; lx2[t0]; t0++)
541	lexact2[(int)lx2[t0]] = t0;
542    lexact2['&'] = LX2_BREAK;
543    lexact2[STOUC(Meta)] = LX2_META;
544    lextok2['*'] = Star;
545    lextok2['?'] = Quest;
546    lextok2['{'] = Inbrace;
547    lextok2['['] = Inbrack;
548    lextok2['$'] = String;
549    lextok2['~'] = Tilde;
550    lextok2['#'] = Pound;
551    lextok2['^'] = Hat;
552}
553
554/* initialize lexical state */
555
556/**/
557void
558lexinit(void)
559{
560    incond = incasepat = nocorrect =
561    infor = dbparens = lexstop = 0;
562    incmdpos = 1;
563    tok = ENDINPUT;
564}
565
566/* add a char to the string buffer */
567
568/**/
569void
570add(int c)
571{
572    *bptr++ = c;
573    if (bsiz == ++len) {
574	int newbsiz = bsiz * 2;
575
576	if (newbsiz > inbufct && inbufct > bsiz)
577	    newbsiz = inbufct;
578
579	bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz));
580	/* len == bsiz, so bptr is at the start of newly allocated memory */
581	memset(bptr, 0, newbsiz - bsiz);
582	bsiz = newbsiz;
583    }
584}
585
586#define SETPARBEGIN {							\
587	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) &&	\
588	    zlemetacs >= zlemetall+1-inbufct)				\
589	    parbegin = inbufct;		      \
590    }
591#define SETPAREND {						      \
592	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
593	    parbegin != -1 && parend == -1) {			      \
594	    if (zlemetacs >= zlemetall + 1 - inbufct)		      \
595		parbegin = -1;					      \
596	    else						      \
597		parend = inbufct;				      \
598	}							      \
599    }
600
601/*
602 * Return 1 for math, 0 for a command, 2 for an error.  If it couldn't be
603 * parsed as math, but there was no gross error, it's a command.
604 */
605
606static int
607cmd_or_math(int cs_type)
608{
609    int oldlen = len;
610    int c;
611
612    cmdpush(cs_type);
613    c = dquote_parse(')', 0);
614    cmdpop();
615    *bptr = '\0';
616    if (!c) {
617	/* Successfully parsed, see if it was math */
618	c = hgetc();
619	if (c == ')')
620	    return 1; /* yes */
621	hungetc(c);
622	lexstop = 0;
623	c = ')';
624    } else if (lexstop) {
625	/* we haven't got anything to unget */
626	return 2;
627    }
628    /* else unsuccessful: unget the whole thing */
629    hungetc(c);
630    lexstop = 0;
631    while (len > oldlen) {
632	len--;
633	hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr);
634    }
635    hungetc('(');
636    return 0;
637}
638
639
640/*
641 * Parse either a $(( ... )) or a $(...)
642 * Return 0 on success, 1 on failure.
643 */
644static int
645cmd_or_math_sub(void)
646{
647    int c = hgetc(), ret;
648
649    if (c == '(') {
650	add(Inpar);
651	add('(');
652	if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) {
653	    add(')');
654	    return 0;
655	}
656	if (ret == 2)
657	    return 1;
658	bptr -= 2;
659	len -= 2;
660    } else {
661	hungetc(c);
662	lexstop = 0;
663    }
664    return skipcomm();
665}
666
667/* Check whether we're looking at valid numeric globbing syntax      *
668 * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
669 * Leaves the input in the same place, returning 0 or 1.             */
670
671/**/
672static int
673isnumglob(void)
674{
675    int c, ec = '-', ret = 0;
676    int tbs = 256, n = 0;
677    char *tbuf = (char *)zalloc(tbs);
678
679    while(1) {
680	c = hgetc();
681	if(lexstop) {
682	    lexstop = 0;
683	    break;
684	}
685	tbuf[n++] = c;
686	if(!idigit(c)) {
687	    if(c != ec)
688		break;
689	    if(ec == '>') {
690		ret = 1;
691		break;
692	    }
693	    ec = '>';
694	}
695	if(n == tbs)
696	    tbuf = (char *)realloc(tbuf, tbs *= 2);
697    }
698    while(n--)
699	hungetc(tbuf[n]);
700    zfree(tbuf, tbs);
701    return ret;
702}
703
704/**/
705static enum lextok
706gettok(void)
707{
708    int c, d;
709    int peekfd = -1;
710    enum lextok peek;
711
712  beginning:
713    tokstr = NULL;
714    while (iblank(c = hgetc()) && !lexstop);
715    toklineno = lineno;
716    if (lexstop)
717	return (errflag) ? LEXERR : ENDINPUT;
718    isfirstln = 0;
719    wordbeg = inbufct - (qbang && c == bangchar);
720    hwbegin(-1-(qbang && c == bangchar));
721    /* word includes the last character read and possibly \ before ! */
722    if (dbparens) {
723	len = 0;
724	bptr = tokstr = (char *) hcalloc(bsiz = 32);
725	hungetc(c);
726	cmdpush(CS_MATH);
727	c = dquote_parse(infor ? ';' : ')', 0);
728	cmdpop();
729	*bptr = '\0';
730	if (!c && infor) {
731	    infor--;
732	    return DINPAR;
733	}
734	if (c || (c = hgetc()) != ')') {
735	    hungetc(c);
736	    return LEXERR;
737	}
738	dbparens = 0;
739	return DOUTPAR;
740    } else if (idigit(c)) {	/* handle 1< foo */
741	d = hgetc();
742	if(d == '&') {
743	    d = hgetc();
744	    if(d == '>') {
745		peekfd = c - '0';
746		hungetc('>');
747		c = '&';
748	    } else {
749		hungetc(d);
750		lexstop = 0;
751		hungetc('&');
752	    }
753	} else if (d == '>' || d == '<') {
754	    peekfd = c - '0';
755	    c = d;
756	} else {
757	    hungetc(d);
758	    lexstop = 0;
759	}
760    }
761
762    /* chars in initial position in word */
763
764    /*
765     * Handle comments.  There are some special cases when this
766     * is not normal command input: lexflags implies we are examining
767     * a line lexically without it being used for normal command input.
768     */
769    if (c == hashchar && !nocomments &&
770	(isset(INTERACTIVECOMMENTS) ||
771	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
772	  (!interact || unset(SHINSTDIN) || strin)))) {
773	/* History is handled here to prevent extra  *
774	 * newlines being inserted into the history. */
775
776	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
777	    len = 0;
778	    bptr = tokstr = (char *)hcalloc(bsiz = 32);
779	    add(c);
780	}
781	while ((c = ingetc()) != '\n' && !lexstop) {
782	    hwaddc(c);
783	    addtoline(c);
784	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
785		add(c);
786	}
787
788	if (errflag)
789	    peek = LEXERR;
790	else {
791	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
792		*bptr = '\0';
793		if (!lexstop)
794		    hungetc(c);
795		peek = STRING;
796	    } else {
797		hwend();
798		hwbegin(0);
799		hwaddc('\n');
800		addtoline('\n');
801		/*
802		 * If splitting a line and removing comments,
803		 * we don't want a newline token since it's
804		 * treated specially.
805		 */
806		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
807		    peek = ENDINPUT;
808		else
809		    peek = NEWLIN;
810	    }
811	}
812	return peek;
813    }
814    switch (lexact1[STOUC(c)]) {
815    case LX1_BKSLASH:
816	d = hgetc();
817	if (d == '\n')
818	    goto beginning;
819	hungetc(d);
820	lexstop = 0;
821	break;
822    case LX1_NEWLIN:
823	return NEWLIN;
824    case LX1_SEMI:
825	d = hgetc();
826	if(d == ';')
827	    return DSEMI;
828	else if(d == '&')
829	    return SEMIAMP;
830	else if (d == '|')
831	    return SEMIBAR;
832	hungetc(d);
833	lexstop = 0;
834	return SEMI;
835    case LX1_AMPER:
836	d = hgetc();
837	if (d == '&')
838	    return DAMPER;
839	else if (d == '!' || d == '|')
840	    return AMPERBANG;
841	else if (d == '>') {
842	    tokfd = peekfd;
843	    d = hgetc();
844	    if (d == '!' || d == '|')
845		return OUTANGAMPBANG;
846	    else if (d == '>') {
847		d = hgetc();
848		if (d == '!' || d == '|')
849		    return DOUTANGAMPBANG;
850		hungetc(d);
851		lexstop = 0;
852		return DOUTANGAMP;
853	    }
854	    hungetc(d);
855	    lexstop = 0;
856	    return AMPOUTANG;
857	}
858	hungetc(d);
859	lexstop = 0;
860	return AMPER;
861    case LX1_BAR:
862	d = hgetc();
863	if (d == '|')
864	    return DBAR;
865	else if (d == '&')
866	    return BARAMP;
867	hungetc(d);
868	lexstop = 0;
869	return BAR;
870    case LX1_INPAR:
871	d = hgetc();
872	if (d == '(') {
873	    if (infor) {
874		dbparens = 1;
875		return DINPAR;
876	    }
877	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
878		len = 0;
879		bptr = tokstr = (char *) hcalloc(bsiz = 32);
880		switch (cmd_or_math(CS_MATH)) {
881		case 1:
882		    return DINPAR;
883
884		case 0:
885		    /*
886		     * Not math, so we don't return the contents
887		     * as a string in this case.
888		     */
889		    tokstr = NULL;
890		    return INPAR;
891
892		default:
893		    return LEXERR;
894		}
895	    }
896	} else if (d == ')')
897	    return INOUTPAR;
898	hungetc(d);
899	lexstop = 0;
900	if (!(incond == 1 || incmdpos))
901	    break;
902	return INPAR;
903    case LX1_OUTPAR:
904	return OUTPAR;
905    case LX1_INANG:
906	d = hgetc();
907	if (d == '(') {
908	    hungetc(d);
909	    lexstop = 0;
910	    unpeekfd:
911	    if(peekfd != -1) {
912		hungetc(c);
913		c = '0' + peekfd;
914	    }
915	    break;
916	}
917	if (d == '>') {
918	    peek = INOUTANG;
919	} else if (d == '<') {
920	    int e = hgetc();
921
922	    if (e == '(') {
923		hungetc(e);
924		hungetc(d);
925		peek = INANG;
926	    } else if (e == '<')
927		peek = TRINANG;
928	    else if (e == '-')
929		peek = DINANGDASH;
930	    else {
931		hungetc(e);
932		lexstop = 0;
933		peek = DINANG;
934	    }
935	} else if (d == '&') {
936	    peek = INANGAMP;
937	} else {
938	    hungetc(d);
939	    if(isnumglob())
940		goto unpeekfd;
941	    peek = INANG;
942	}
943	tokfd = peekfd;
944	return peek;
945    case LX1_OUTANG:
946	d = hgetc();
947	if (d == '(') {
948	    hungetc(d);
949	    goto unpeekfd;
950	} else if (d == '&') {
951	    d = hgetc();
952	    if (d == '!' || d == '|')
953		peek = OUTANGAMPBANG;
954	    else {
955		hungetc(d);
956		lexstop = 0;
957		peek = OUTANGAMP;
958	    }
959	} else if (d == '!' || d == '|')
960	    peek = OUTANGBANG;
961	else if (d == '>') {
962	    d = hgetc();
963	    if (d == '&') {
964		d = hgetc();
965		if (d == '!' || d == '|')
966		    peek = DOUTANGAMPBANG;
967		else {
968		    hungetc(d);
969		    lexstop = 0;
970		    peek = DOUTANGAMP;
971		}
972	    } else if (d == '!' || d == '|')
973		peek = DOUTANGBANG;
974	    else if (d == '(') {
975		hungetc(d);
976		hungetc('>');
977		peek = OUTANG;
978	    } else {
979		hungetc(d);
980		lexstop = 0;
981		peek = DOUTANG;
982		if (isset(HISTALLOWCLOBBER))
983		    hwaddc('|');
984	    }
985	} else {
986	    hungetc(d);
987	    lexstop = 0;
988	    peek = OUTANG;
989	    if (!incond && isset(HISTALLOWCLOBBER))
990		hwaddc('|');
991	}
992	tokfd = peekfd;
993	return peek;
994    }
995
996    /* we've started a string, now get the *
997     * rest of it, performing tokenization */
998    return gettokstr(c, 0);
999}
1000
1001/*
1002 * Get the remains of a token string.  This has two uses.
1003 * When called from gettok(), with sub = 0, we have already identified
1004 * any interesting initial character and want to get the rest of
1005 * what we now know is a string.  However, the string may still include
1006 * metacharacters and potentially substitutions.
1007 *
1008 * When called from parse_subst_string() with sub = 1, we are not
1009 * fully parsing a command line, merely tokenizing a string.
1010 * In this case we always add characters to the parsed string
1011 * unless there is a parse error.
1012 */
1013
1014/**/
1015static enum lextok
1016gettokstr(int c, int sub)
1017{
1018    int bct = 0, pct = 0, brct = 0, fdpar = 0;
1019    int intpos = 1, in_brace_param = 0;
1020    int inquote, unmatched = 0;
1021    enum lextok peek;
1022#ifdef DEBUG
1023    int ocmdsp = cmdsp;
1024#endif
1025
1026    peek = STRING;
1027    if (!sub) {
1028	len = 0;
1029	bptr = tokstr = (char *) hcalloc(bsiz = 32);
1030    }
1031    for (;;) {
1032	int act;
1033	int e;
1034	int inbl = inblank(c);
1035
1036	if (fdpar && !inbl && c != ')')
1037	    fdpar = 0;
1038
1039	if (inbl && !in_brace_param && !pct)
1040	    act = LX2_BREAK;
1041	else {
1042	    act = lexact2[STOUC(c)];
1043	    c = lextok2[STOUC(c)];
1044	}
1045	switch (act) {
1046	case LX2_BREAK:
1047	    if (!in_brace_param && !sub)
1048		goto brk;
1049	    break;
1050	case LX2_META:
1051	    c = hgetc();
1052#ifdef DEBUG
1053	    if (lexstop) {
1054		fputs("BUG: input terminated by Meta\n", stderr);
1055		fflush(stderr);
1056		goto brk;
1057	    }
1058#endif
1059	    add(Meta);
1060	    break;
1061	case LX2_OUTPAR:
1062	    if (fdpar) {
1063		/* this is a single word `(   )', treat as INOUTPAR */
1064		add(c);
1065		*bptr = '\0';
1066		return INOUTPAR;
1067	    }
1068	    if ((sub || in_brace_param) && isset(SHGLOB))
1069		break;
1070	    if (!in_brace_param && !pct--) {
1071		if (sub) {
1072		    pct = 0;
1073		    break;
1074		} else
1075		    goto brk;
1076	    }
1077	    c = Outpar;
1078	    break;
1079	case LX2_BAR:
1080	    if (!pct && !in_brace_param) {
1081		if (sub)
1082		    break;
1083		else
1084		    goto brk;
1085	    }
1086	    if (unset(SHGLOB) || (!sub && !in_brace_param))
1087		c = Bar;
1088	    break;
1089	case LX2_STRING:
1090	    e = hgetc();
1091	    if (e == '[') {
1092		cmdpush(CS_MATHSUBST);
1093		add(String);
1094		add(Inbrack);
1095		c = dquote_parse(']', sub);
1096		cmdpop();
1097		if (c) {
1098		    peek = LEXERR;
1099		    goto brk;
1100		}
1101		c = Outbrack;
1102	    } else if (e == '(') {
1103		add(String);
1104		c = cmd_or_math_sub();
1105		if (c) {
1106		    peek = LEXERR;
1107		    goto brk;
1108		}
1109		c = Outpar;
1110	    } else {
1111		if (e == '{') {
1112		    add(c);
1113		    c = Inbrace;
1114		    ++bct;
1115		    cmdpush(CS_BRACEPAR);
1116		    if (!in_brace_param)
1117			in_brace_param = bct;
1118		} else {
1119		    hungetc(e);
1120		    lexstop = 0;
1121		}
1122	    }
1123	    break;
1124	case LX2_INBRACK:
1125	    if (!in_brace_param)
1126		brct++;
1127	    c = Inbrack;
1128	    break;
1129	case LX2_OUTBRACK:
1130	    if (!in_brace_param)
1131		brct--;
1132	    if (brct < 0)
1133		brct = 0;
1134	    c = Outbrack;
1135	    break;
1136	case LX2_INPAR:
1137	    if (isset(SHGLOB)) {
1138		if (sub || in_brace_param)
1139		    break;
1140		if (incasepat && !len)
1141		    return INPAR;
1142		if (!isset(KSHGLOB) && len)
1143		    goto brk;
1144	    }
1145	    if (!in_brace_param) {
1146		if (!sub) {
1147		    e = hgetc();
1148		    hungetc(e);
1149		    lexstop = 0;
1150		    /* For command words, parentheses are only
1151		     * special at the start.  But now we're tokenising
1152		     * the remaining string.  So I don't see what
1153		     * the old incmdpos test here is for.
1154		     *   pws 1999/6/8
1155		     *
1156		     * Oh, no.
1157		     *  func1(   )
1158		     * is a valid function definition in [k]sh.  The best
1159		     * thing we can do, without really nasty lookahead tricks,
1160		     * is break if we find a blank after a parenthesis.  At
1161		     * least this can't happen inside braces or brackets.  We
1162		     * only allow this with SHGLOB (set for both sh and ksh).
1163		     *
1164		     * Things like `print @( |foo)' should still
1165		     * work, because [k]sh don't allow multiple words
1166		     * in a function definition, so we only do this
1167		     * in command position.
1168		     *   pws 1999/6/14
1169		     */
1170		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
1171				     !brct && !intpos && incmdpos)) {
1172			/*
1173			 * Either a () token, or a command word with
1174			 * something suspiciously like a ksh function
1175			 * definition.
1176			 * The current word isn't spellcheckable.
1177			 */
1178			nocorrect |= 2;
1179			goto brk;
1180		    }
1181		}
1182		/*
1183		 * This also handles the [k]sh `foo( )' function definition.
1184		 * Maintain a variable fdpar, set as long as a single set of
1185		 * parentheses contains only space.  Then if we get to the
1186		 * closing parenthesis and it is still set, we can assume we
1187		 * have a function definition.  Only do this at the start of
1188		 * the word, since the (...) must be a separate token.
1189		 */
1190		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
1191		    fdpar = 1;
1192	    }
1193	    c = Inpar;
1194	    break;
1195	case LX2_INBRACE:
1196	    if (isset(IGNOREBRACES) || sub)
1197		c = '{';
1198	    else {
1199		if (!len && incmdpos) {
1200		    add('{');
1201		    *bptr = '\0';
1202		    return STRING;
1203		}
1204		if (in_brace_param) {
1205		    cmdpush(CS_BRACE);
1206		}
1207		bct++;
1208	    }
1209	    break;
1210	case LX2_OUTBRACE:
1211	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
1212		break;
1213	    if (!bct)
1214		break;
1215	    if (in_brace_param) {
1216		cmdpop();
1217	    }
1218	    if (bct-- == in_brace_param)
1219		in_brace_param = 0;
1220	    c = Outbrace;
1221	    break;
1222	case LX2_COMMA:
1223	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
1224		c = Comma;
1225	    break;
1226	case LX2_OUTANG:
1227	    if (in_brace_param || sub)
1228		break;
1229	    e = hgetc();
1230	    if (e != '(') {
1231		hungetc(e);
1232		lexstop = 0;
1233		goto brk;
1234	    }
1235	    add(OutangProc);
1236	    if (skipcomm()) {
1237		peek = LEXERR;
1238		goto brk;
1239	    }
1240	    c = Outpar;
1241	    break;
1242	case LX2_INANG:
1243	    if (isset(SHGLOB) && sub)
1244		break;
1245	    e = hgetc();
1246	    if (!(in_brace_param || sub) && e == '(') {
1247		add(Inang);
1248		if (skipcomm()) {
1249		    peek = LEXERR;
1250		    goto brk;
1251		}
1252		c = Outpar;
1253		break;
1254	    }
1255	    hungetc(e);
1256	    if(isnumglob()) {
1257		add(Inang);
1258		while ((c = hgetc()) != '>')
1259		    add(c);
1260		c = Outang;
1261		break;
1262	    }
1263	    lexstop = 0;
1264	    if (in_brace_param || sub)
1265		break;
1266	    goto brk;
1267	case LX2_EQUALS:
1268	    if (!sub) {
1269		if (intpos) {
1270		    e = hgetc();
1271		    if (e != '(') {
1272			hungetc(e);
1273			lexstop = 0;
1274			c = Equals;
1275		    } else {
1276			add(Equals);
1277			if (skipcomm()) {
1278			    peek = LEXERR;
1279			    goto brk;
1280			}
1281			c = Outpar;
1282		    }
1283		} else if (peek != ENVSTRING &&
1284			   incmdpos && !bct && !brct) {
1285		    char *t = tokstr;
1286		    if (idigit(*t))
1287			while (++t < bptr && idigit(*t));
1288		    else {
1289			int sav = *bptr;
1290			*bptr = '\0';
1291			t = itype_end(t, IIDENT, 0);
1292			if (t < bptr) {
1293			    skipparens(Inbrack, Outbrack, &t);
1294			} else {
1295			    *bptr = sav;
1296			}
1297		    }
1298		    if (*t == '+')
1299			t++;
1300		    if (t == bptr) {
1301			e = hgetc();
1302			if (e == '(' && incmdpos) {
1303			    *bptr = '\0';
1304			    return ENVARRAY;
1305			}
1306			hungetc(e);
1307			lexstop = 0;
1308			peek = ENVSTRING;
1309			intpos = 2;
1310		    } else
1311			c = Equals;
1312		} else
1313		    c = Equals;
1314	    }
1315	    break;
1316	case LX2_BKSLASH:
1317	    c = hgetc();
1318	    if (c == '\n') {
1319		c = hgetc();
1320		if (!lexstop)
1321		    continue;
1322	    } else
1323		add(Bnull);
1324	    if (lexstop)
1325		goto brk;
1326	    break;
1327	case LX2_QUOTE: {
1328	    int strquote = (len && bptr[-1] == String);
1329
1330	    add(Snull);
1331	    cmdpush(CS_QUOTE);
1332	    for (;;) {
1333		STOPHIST
1334		while ((c = hgetc()) != '\'' && !lexstop) {
1335		    if (strquote && c == '\\') {
1336			c = hgetc();
1337			if (lexstop)
1338			    break;
1339			/*
1340			 * Mostly we don't need to do anything special
1341			 * with escape backslashes or closing quotes
1342			 * inside $'...'; however in completion we
1343			 * need to be able to strip multiple backslashes
1344			 * neatly.
1345			 */
1346			if (c == '\\' || c == '\'')
1347			    add(Bnull);
1348			else
1349			    add('\\');
1350		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1351			if (bptr[-1] == '\\')
1352			    bptr--, len--;
1353			else
1354			    break;
1355		    }
1356		    add(c);
1357		}
1358		ALLOWHIST
1359		if (c != '\'') {
1360		    unmatched = '\'';
1361		    peek = LEXERR;
1362		    cmdpop();
1363		    goto brk;
1364		}
1365		e = hgetc();
1366		if (e != '\'' || unset(RCQUOTES) || strquote)
1367		    break;
1368		add(c);
1369	    }
1370	    cmdpop();
1371	    hungetc(e);
1372	    lexstop = 0;
1373	    c = Snull;
1374	    break;
1375	}
1376	case LX2_DQUOTE:
1377	    add(Dnull);
1378	    cmdpush(CS_DQUOTE);
1379	    c = dquote_parse('"', sub);
1380	    cmdpop();
1381	    if (c) {
1382		unmatched = '"';
1383		peek = LEXERR;
1384		goto brk;
1385	    }
1386	    c = Dnull;
1387	    break;
1388	case LX2_BQUOTE:
1389	    add(Tick);
1390	    cmdpush(CS_BQUOTE);
1391	    SETPARBEGIN
1392	    inquote = 0;
1393	    while ((c = hgetc()) != '`' && !lexstop) {
1394		if (c == '\\') {
1395		    c = hgetc();
1396		    if (c != '\n') {
1397			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
1398			add(c);
1399		    }
1400		    else if (!sub && isset(CSHJUNKIEQUOTES))
1401			add(c);
1402		} else {
1403		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1404			break;
1405		    }
1406		    add(c);
1407		    if (c == '\'') {
1408			if ((inquote = !inquote))
1409			    STOPHIST
1410			else
1411			    ALLOWHIST
1412		    }
1413		}
1414	    }
1415	    if (inquote)
1416		ALLOWHIST
1417	    cmdpop();
1418	    if (c != '`') {
1419		unmatched = '`';
1420		peek = LEXERR;
1421		goto brk;
1422	    }
1423	    c = Tick;
1424	    SETPAREND
1425	    break;
1426	}
1427	add(c);
1428	c = hgetc();
1429	if (intpos)
1430	    intpos--;
1431	if (lexstop)
1432	    break;
1433    }
1434  brk:
1435    hungetc(c);
1436    if (unmatched)
1437	zerr("unmatched %c", unmatched);
1438    if (in_brace_param) {
1439	while(bct-- >= in_brace_param)
1440	    cmdpop();
1441	zerr("closing brace expected");
1442    } else if (unset(IGNOREBRACES) && !sub && len > 1 &&
1443	       peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) {
1444	/* hack to get {foo} command syntax work */
1445	bptr--;
1446	len--;
1447	lexstop = 0;
1448	hungetc('}');
1449    }
1450    *bptr = '\0';
1451    DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
1452    return peek;
1453}
1454
1455
1456/*
1457 * Parse input as if in double quotes.
1458 * endchar is the end character to expect.
1459 * sub has got something to do with whether we are doing quoted substitution.
1460 * Return non-zero for error (character to unget), else zero
1461 */
1462
1463/**/
1464static int
1465dquote_parse(char endchar, int sub)
1466{
1467    int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
1468    int c;
1469    int math = endchar == ')' || endchar == ']';
1470    int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
1471
1472    while (((c = hgetc()) != endchar || bct ||
1473	    (math && ((pct > 0) || (brct > 0))) ||
1474	    intick) && !lexstop) {
1475      cont:
1476	switch (c) {
1477	case '\\':
1478	    c = hgetc();
1479	    if (c != '\n') {
1480		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
1481		    c == endchar || c == '`' ||
1482		    (endchar == ']' && (c == '[' || c == ']' ||
1483					c == '(' || c == ')' ||
1484					c == '{' || c == '}' ||
1485					(c == '"' && sub))))
1486		    add(Bnull);
1487		else {
1488		    /* lexstop is implicitly handled here */
1489		    add('\\');
1490		    goto cont;
1491		}
1492	    } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
1493		continue;
1494	    break;
1495	case '\n':
1496	    err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
1497	    break;
1498	case '$':
1499	    if (intick)
1500		break;
1501	    c = hgetc();
1502	    if (c == '(') {
1503		add(Qstring);
1504		err = cmd_or_math_sub();
1505		c = Outpar;
1506	    } else if (c == '[') {
1507		add(String);
1508		add(Inbrack);
1509		cmdpush(CS_MATHSUBST);
1510		err = dquote_parse(']', sub);
1511		cmdpop();
1512		c = Outbrack;
1513	    } else if (c == '{') {
1514		add(Qstring);
1515		c = Inbrace;
1516		cmdpush(CS_BRACEPAR);
1517		bct++;
1518	    } else if (c == '$')
1519		add(Qstring);
1520	    else {
1521		hungetc(c);
1522		lexstop = 0;
1523		c = Qstring;
1524	    }
1525	    break;
1526	case '}':
1527	    if (intick || !bct)
1528		break;
1529	    c = Outbrace;
1530	    bct--;
1531	    cmdpop();
1532	    break;
1533	case '`':
1534	    c = Qtick;
1535	    if (intick == 2)
1536		ALLOWHIST
1537	    if ((intick = !intick)) {
1538		SETPARBEGIN
1539		cmdpush(CS_BQUOTE);
1540	    } else {
1541		SETPAREND
1542	        cmdpop();
1543	    }
1544	    break;
1545	case '\'':
1546	    if (!intick)
1547		break;
1548	    if (intick == 1)
1549		intick = 2, STOPHIST
1550	    else
1551		intick = 1, ALLOWHIST
1552	    break;
1553	case '(':
1554	    if (!math || !bct)
1555		pct++;
1556	    break;
1557	case ')':
1558	    if (!math || !bct)
1559		err = (!pct-- && math);
1560	    break;
1561	case '[':
1562	    if (!math || !bct)
1563		brct++;
1564	    break;
1565	case ']':
1566	    if (!math || !bct)
1567		err = (!brct-- && math);
1568	    break;
1569	case '"':
1570	    if (intick || (endchar != '"' && !bct))
1571		break;
1572	    if (bct) {
1573		add(Dnull);
1574		cmdpush(CS_DQUOTE);
1575		err = dquote_parse('"', sub);
1576		cmdpop();
1577		c = Dnull;
1578	    } else
1579		err = 1;
1580	    break;
1581	}
1582	if (err || lexstop)
1583	    break;
1584	add(c);
1585    }
1586    if (intick == 2)
1587	ALLOWHIST
1588    if (intick) {
1589	cmdpop();
1590    }
1591    while (bct--)
1592	cmdpop();
1593    if (lexstop)
1594	err = intick || endchar || err;
1595    else if (err == 1) {
1596	/*
1597	 * TODO: as far as I can see, this hack is used in gettokstr()
1598	 * to hungetc() a character on an error.  However, I don't
1599	 * understand what that actually gets us, and we can't guarantee
1600	 * it's a character anyway, because of the previous test.
1601	 *
1602	 * We use the same feature in cmd_or_math where we actually do
1603	 * need to unget if we decide it's really a command substitution.
1604	 * We try to handle the other case by testing for lexstop.
1605	 */
1606	err = c;
1607    }
1608    if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
1609	inwhat = IN_MATH;
1610    return err;
1611}
1612
1613/* Tokenize a string given in s. Parsing is done as in double *
1614 * quotes.  This is usually called before singsub().          */
1615
1616/**/
1617mod_export int
1618parsestr(char *s)
1619{
1620    int err;
1621
1622    if ((err = parsestrnoerr(s))) {
1623	untokenize(s);
1624	if (err > 32 && err < 127)
1625	    zerr("parse error near `%c'", err);
1626	else
1627	    zerr("parse error");
1628    }
1629    return err;
1630}
1631
1632/**/
1633mod_export int
1634parsestrnoerr(char *s)
1635{
1636    int l = strlen(s), err;
1637
1638    lexsave();
1639    untokenize(s);
1640    inpush(dupstring(s), 0, NULL);
1641    strinbeg(0);
1642    len = 0;
1643    bptr = tokstr = s;
1644    bsiz = l + 1;
1645    err = dquote_parse('\0', 1);
1646    *bptr = '\0';
1647    strinend();
1648    inpop();
1649    DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
1650    lexrestore();
1651    return err;
1652}
1653
1654/*
1655 * Parse a subscript in string s.
1656 * sub is passed down to dquote_parse().
1657 * endchar is the final character.
1658 * Return the next character, or NULL.
1659 */
1660/**/
1661mod_export char *
1662parse_subscript(char *s, int sub, int endchar)
1663{
1664    int l = strlen(s), err;
1665    char *t;
1666
1667    if (!*s || *s == endchar)
1668	return 0;
1669    lexsave();
1670    untokenize(t = dupstring(s));
1671    inpush(t, 0, NULL);
1672    strinbeg(0);
1673    len = 0;
1674    bptr = tokstr = s;
1675    bsiz = l + 1;
1676    err = dquote_parse(endchar, sub);
1677    if (err) {
1678	err = *bptr;
1679	*bptr = '\0';
1680	untokenize(s);
1681	*bptr = err;
1682	s = NULL;
1683    } else {
1684	s = bptr;
1685    }
1686    strinend();
1687    inpop();
1688    DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
1689    lexrestore();
1690    return s;
1691}
1692
1693/* Tokenize a string given in s. Parsing is done as if s were a normal *
1694 * command-line argument but it may contain separators.  This is used  *
1695 * to parse the right-hand side of ${...%...} substitutions.           */
1696
1697/**/
1698mod_export int
1699parse_subst_string(char *s)
1700{
1701    int c, l = strlen(s), err;
1702    char *ptr;
1703    enum lextok ctok;
1704
1705    if (!*s || !strcmp(s, nulstring))
1706	return 0;
1707    lexsave();
1708    untokenize(s);
1709    inpush(dupstring(s), 0, NULL);
1710    strinbeg(0);
1711    len = 0;
1712    bptr = tokstr = s;
1713    bsiz = l + 1;
1714    c = hgetc();
1715    ctok = gettokstr(c, 1);
1716    err = errflag;
1717    strinend();
1718    inpop();
1719    DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
1720    lexrestore();
1721    errflag = err;
1722    if (ctok == LEXERR) {
1723	untokenize(s);
1724	return 1;
1725    }
1726#ifdef DEBUG
1727    /*
1728     * Historical note: we used to check here for olen (the value of len
1729     * before lexrestore()) == l, but that's not necessarily the case if
1730     * we stripped an RCQUOTE.
1731     */
1732    if (ctok != STRING || (errflag && !noerrs)) {
1733	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
1734		errflag ? "errflag" : "ctok != STRING");
1735	fflush(stderr);
1736	untokenize(s);
1737	return 1;
1738    }
1739#endif
1740    /* Check for $'...' quoting.  This needs special handling. */
1741    for (ptr = s; *ptr; )
1742    {
1743	if (*ptr == String && ptr[1] == Snull)
1744	{
1745	    char *t;
1746	    int len, tlen, diff;
1747	    t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
1748	    len += 2;
1749	    tlen = strlen(t);
1750	    diff = len - tlen;
1751	    /*
1752	     * Yuk.
1753	     * parse_subst_string() currently handles strings in-place.
1754	     * That's not so easy to fix without knowing whether
1755	     * additional memory should come off the heap or
1756	     * otherwise.  So we cheat by copying the unquoted string
1757	     * into place, unless it's too long.  That's not the
1758	     * normal case, but I'm worried there are pathological
1759	     * cases with converting metafied multibyte strings.
1760	     * If someone can prove there aren't I will be very happy.
1761	     */
1762	    if (diff < 0) {
1763		DPUTS(1, "$'...' subst too long: fix get_parse_string()");
1764		return 1;
1765	    }
1766	    memcpy(ptr, t, tlen);
1767	    ptr += tlen;
1768	    if (diff > 0) {
1769		char *dptr = ptr;
1770		char *sptr = ptr + diff;
1771		while ((*dptr++ = *sptr++))
1772		    ;
1773	    }
1774	} else
1775	    ptr++;
1776    }
1777    return 0;
1778}
1779
1780/* Called below to report word positions. */
1781
1782/**/
1783mod_export void
1784gotword(void)
1785{
1786    we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
1787    if (zlemetacs <= we) {
1788	wb = zlemetall - wordbeg + addedx;
1789	lexflags = 0;
1790    }
1791}
1792
1793/* expand aliases and reserved words */
1794
1795/**/
1796int
1797exalias(void)
1798{
1799    Alias an;
1800    Reswd rw;
1801
1802    hwend();
1803    if (interact && isset(SHINSTDIN) && !strin && !incasepat &&
1804	tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
1805	(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
1806	spckword(&tokstr, 1, incmdpos, 1);
1807
1808    if (!tokstr) {
1809	zshlextext = tokstrings[tok];
1810
1811	return 0;
1812    } else {
1813	VARARR(char, copy, (strlen(tokstr) + 1));
1814
1815	if (has_token(tokstr)) {
1816	    char *p, *t;
1817
1818	    zshlextext = p = copy;
1819	    for (t = tokstr;
1820		 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
1821	} else
1822	    zshlextext = tokstr;
1823
1824	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
1825	    int zp = lexflags;
1826
1827	    gotword();
1828	    if ((zp & LEXFLAGS_ZLE) && !lexflags) {
1829		if (zshlextext == copy)
1830		    zshlextext = tokstr;
1831		return 0;
1832	    }
1833	}
1834
1835	if (tok == STRING) {
1836	    /* Check for an alias */
1837	    if (!noaliases && isset(ALIASESOPT) &&
1838		(!isset(POSIXALIASES) ||
1839		 !reswdtab->getnode(reswdtab, zshlextext))) {
1840		char *suf;
1841
1842		an = (Alias) aliastab->getnode(aliastab, zshlextext);
1843		if (an && !an->inuse &&
1844		    ((an->node.flags & ALIAS_GLOBAL) || incmdpos || inalmore)) {
1845		    inpush(an->text, INP_ALIAS, an);
1846		    if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
1847			aliasspaceflag = 1;
1848		    lexstop = 0;
1849		    if (zshlextext == copy)
1850			zshlextext = tokstr;
1851		    return 1;
1852		}
1853		if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
1854		    suf > zshlextext && suf[-1] != Meta &&
1855		    (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
1856		    !an->inuse && incmdpos) {
1857		    inpush(dupstring(zshlextext), INP_ALIAS, NULL);
1858		    inpush(" ", INP_ALIAS, NULL);
1859		    inpush(an->text, INP_ALIAS, an);
1860		    lexstop = 0;
1861		    if (zshlextext == copy)
1862			zshlextext = tokstr;
1863		    return 1;
1864		}
1865	    }
1866
1867	    /* Then check for a reserved word */
1868	    if ((incmdpos ||
1869		 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
1870		  zshlextext[0] == '}' && !zshlextext[1])) &&
1871		(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
1872		tok = rw->token;
1873		if (tok == DINBRACK)
1874		    incond = 1;
1875	    } else if (incond && !strcmp(zshlextext, "]]")) {
1876		tok = DOUTBRACK;
1877		incond = 0;
1878	    } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
1879		tok = BANG;
1880	}
1881	inalmore = 0;
1882	if (zshlextext == copy)
1883	    zshlextext = tokstr;
1884    }
1885    return 0;
1886}
1887
1888/* skip (...) */
1889
1890/**/
1891static int
1892skipcomm(void)
1893{
1894    int pct = 1, c, start = 1;
1895
1896    cmdpush(CS_CMDSUBST);
1897    SETPARBEGIN
1898    c = Inpar;
1899    do {
1900	int iswhite;
1901	add(c);
1902	c = hgetc();
1903	if (itok(c) || lexstop)
1904	    break;
1905	iswhite = inblank(c);
1906	switch (c) {
1907	case '(':
1908	    pct++;
1909	    break;
1910	case ')':
1911	    pct--;
1912	    break;
1913	case '\\':
1914	    add(c);
1915	    c = hgetc();
1916	    break;
1917	case '\'': {
1918	    int strquote = bptr[-1] == '$';
1919	    add(c);
1920	    STOPHIST
1921	    while ((c = hgetc()) != '\'' && !lexstop) {
1922		if (c == '\\' && strquote) {
1923		    add(c);
1924		    c = hgetc();
1925		}
1926		add(c);
1927	    }
1928	    ALLOWHIST
1929	    break;
1930	}
1931	case '\"':
1932	    add(c);
1933	    while ((c = hgetc()) != '\"' && !lexstop)
1934		if (c == '\\') {
1935		    add(c);
1936		    add(hgetc());
1937		} else
1938		    add(c);
1939	    break;
1940	case '`':
1941	    add(c);
1942	    while ((c = hgetc()) != '`' && !lexstop)
1943		if (c == '\\')
1944		    add(c), add(hgetc());
1945		else
1946		    add(c);
1947	    break;
1948	case '#':
1949	    if (start) {
1950		add(c);
1951		while ((c = hgetc()) != '\n' && !lexstop)
1952		    add(c);
1953		iswhite = 1;
1954	    }
1955	    break;
1956	}
1957	start = iswhite;
1958    }
1959    while (pct);
1960    if (!lexstop)
1961	SETPAREND
1962    cmdpop();
1963    return lexstop;
1964}
1965