1/*
2 * lex.c - lexical analysis
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "zsh.mdh"
31#include "lex.pro"
32
33/* tokens */
34
35/**/
36mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\";
37
38/* parts of the current token */
39
40/**/
41char *zshlextext;
42/**/
43mod_export char *tokstr;
44/**/
45mod_export enum lextok tok;
46/**/
47mod_export int tokfd;
48
49/*
50 * Line number at which the first character of a token was found.
51 * We always set this in gettok(), which is always called from
52 * zshlex() unless we have reached an error.  So it is always
53 * valid when parsing.  It is not useful during execution
54 * of the parsed structure.
55 */
56
57/**/
58zlong toklineno;
59
60/* lexical analyzer error flag */
61
62/**/
63mod_export int lexstop;
64
65/* if != 0, this is the first line of the command */
66
67/**/
68mod_export int isfirstln;
69
70/* if != 0, this is the first char of the command (not including white space) */
71
72/**/
73int isfirstch;
74
75/* flag that an alias should be expanded after expansion ending in space */
76
77/**/
78int inalmore;
79
80/*
81 * Don't do spelling correction.
82 * Bit 1 is only valid for the current word.  It's
83 * set when we detect a lookahead that stops the word from
84 * needing correction.
85 */
86
87/**/
88int nocorrect;
89
90/*
91 * Cursor position and line length in zle when the line is
92 * metafied for access from the main shell.
93 */
94
95/**/
96mod_export int zlemetacs, zlemetall;
97
98/* inwhat says what exactly we are in     *
99 * (its value is one of the IN_* things). */
100
101/**/
102mod_export int inwhat;
103
104/* 1 if x added to complete in a blank between words */
105
106/**/
107mod_export int addedx;
108
109/* wb and we hold the beginning/end position of the word we are completing. */
110
111/**/
112mod_export int wb, we;
113
114/* 1 if aliases should not be expanded */
115
116/**/
117mod_export int noaliases;
118
119/*
120 * If non-zero, we are parsing a line sent to use by the editor, or some
121 * other string that's not part of standard command input (e.g. eval is
122 * part of normal command input).
123 *
124 * Set of bits from LEXFLAGS_*.
125 *
126 * Note that although it is passed into the lexer as an input, the
127 * lexer can set it to zero after finding the word it's searching for.
128 * This only happens if the line being parsed actually does come from
129 * ZLE, and hence the bit LEXFLAGS_ZLE is set.
130 */
131
132/**/
133mod_export int lexflags;
134
135/**/
136mod_export int wordbeg;
137
138/**/
139mod_export int parbegin;
140
141/**/
142mod_export int parend;
143
144/* don't recognize comments */
145
146/**/
147mod_export int nocomments;
148
149/* text of punctuation tokens */
150
151/**/
152mod_export char *tokstrings[WHILE + 1] = {
153    NULL,	/* NULLTOK	  0  */
154    ";",	/* SEPER	     */
155    "\\n",	/* NEWLIN	     */
156    ";",	/* SEMI		     */
157    ";;",	/* DSEMI	     */
158    "&",	/* AMPER	  5  */
159    "(",	/* INPAR	     */
160    ")",	/* OUTPAR	     */
161    "||",	/* DBAR		     */
162    "&&",	/* DAMPER	     */
163    ">",	/* OUTANG	  10 */
164    ">|",	/* OUTANGBANG	     */
165    ">>",	/* DOUTANG	     */
166    ">>|",	/* DOUTANGBANG	     */
167    "<",	/* INANG	     */
168    "<>",	/* INOUTANG	  15 */
169    "<<",	/* DINANG	     */
170    "<<-",	/* DINANGDASH	     */
171    "<&",	/* INANGAMP	     */
172    ">&",	/* OUTANGAMP	     */
173    "&>",	/* AMPOUTANG	  20 */
174    "&>|",	/* OUTANGAMPBANG     */
175    ">>&",	/* DOUTANGAMP	     */
176    ">>&|",	/* DOUTANGAMPBANG    */
177    "<<<",	/* TRINANG	     */
178    "|",	/* BAR		  25 */
179    "|&",	/* BARAMP	     */
180    "()",	/* INOUTPAR	     */
181    "((",	/* DINPAR	     */
182    "))",	/* DOUTPAR	     */
183    "&|",	/* AMPERBANG	  30 */
184    ";&",	/* SEMIAMP	     */
185    ";|",	/* SEMIBAR	     */
186};
187
188/* lexical state */
189
190static int dbparens;
191static int len = 0, bsiz = 256;
192static char *bptr;
193
194struct lexstack {
195    struct lexstack *next;
196
197    int incmdpos;
198    int incond;
199    int incasepat;
200    int dbparens;
201    int isfirstln;
202    int isfirstch;
203    int histactive;
204    int histdone;
205    int lexflags;
206    int stophist;
207    int hlinesz;
208    char *hline;
209    char *hptr;
210    enum lextok tok;
211    int isnewlin;
212    char *tokstr;
213    char *zshlextext;
214    char *bptr;
215    int bsiz;
216    int len;
217    short *chwords;
218    int chwordlen;
219    int chwordpos;
220    int hwgetword;
221    int lexstop;
222    struct heredocs *hdocs;
223    int (*hgetc) _((void));
224    void (*hungetc) _((int));
225    void (*hwaddc) _((int));
226    void (*hwbegin) _((int));
227    void (*hwend) _((void));
228    void (*addtoline) _((int));
229
230    int eclen, ecused, ecnpats;
231    Wordcode ecbuf;
232    Eccstr ecstrs;
233    int ecsoffs, ecssub, ecnfunc;
234
235    unsigned char *cstack;
236    int csp;
237    zlong toklineno;
238};
239
240static struct lexstack *lstack = NULL;
241
242/* save the lexical state */
243
244/* is this a hack or what? */
245
246/**/
247mod_export void
248lexsave(void)
249{
250    struct lexstack *ls;
251
252    ls = (struct lexstack *)malloc(sizeof(struct lexstack));
253
254    ls->incmdpos = incmdpos;
255    ls->incond = incond;
256    ls->incasepat = incasepat;
257    ls->dbparens = dbparens;
258    ls->isfirstln = isfirstln;
259    ls->isfirstch = isfirstch;
260    ls->histactive = histactive;
261    ls->histdone = histdone;
262    ls->lexflags = lexflags;
263    ls->stophist = stophist;
264    stophist = 0;
265    if (!lstack) {
266	/* top level, make this version visible to ZLE */
267	zle_chline = chline;
268	/* ensure line stored is NULL-terminated */
269	if (hptr)
270	    *hptr = '\0';
271    }
272    ls->hline = chline;
273    chline = NULL;
274    ls->hptr = hptr;
275    hptr = NULL;
276    ls->hlinesz = hlinesz;
277    ls->cstack = cmdstack;
278    ls->csp = cmdsp;
279    cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
280    ls->tok = tok;
281    ls->isnewlin = isnewlin;
282    ls->tokstr = tokstr;
283    ls->zshlextext = zshlextext;
284    ls->bptr = bptr;
285    tokstr = zshlextext = bptr = NULL;
286    ls->bsiz = bsiz;
287    bsiz = 256;
288    ls->len = len;
289    ls->chwords = chwords;
290    ls->chwordlen = chwordlen;
291    ls->chwordpos = chwordpos;
292    ls->hwgetword = hwgetword;
293    ls->lexstop = lexstop;
294    ls->hdocs = hdocs;
295    ls->hgetc = hgetc;
296    ls->hungetc = hungetc;
297    ls->hwaddc = hwaddc;
298    ls->hwbegin = hwbegin;
299    ls->hwend = hwend;
300    ls->addtoline = addtoline;
301    ls->eclen = eclen;
302    ls->ecused = ecused;
303    ls->ecnpats = ecnpats;
304    ls->ecbuf = ecbuf;
305    ls->ecstrs = ecstrs;
306    ls->ecsoffs = ecsoffs;
307    ls->ecssub = ecssub;
308    ls->ecnfunc = ecnfunc;
309    ls->toklineno = toklineno;
310    cmdsp = 0;
311    inredir = 0;
312    hdocs = NULL;
313    histactive = 0;
314    ecbuf = NULL;
315
316    ls->next = lstack;
317    lstack = ls;
318}
319
320/* restore lexical state */
321
322/**/
323mod_export void
324lexrestore(void)
325{
326    struct lexstack *ln;
327
328    DPUTS(!lstack, "BUG: lexrestore() without lexsave()");
329    incmdpos = lstack->incmdpos;
330    incond = lstack->incond;
331    incasepat = lstack->incasepat;
332    dbparens = lstack->dbparens;
333    isfirstln = lstack->isfirstln;
334    isfirstch = lstack->isfirstch;
335    histactive = lstack->histactive;
336    histdone = lstack->histdone;
337    lexflags = lstack->lexflags;
338    stophist = lstack->stophist;
339    chline = lstack->hline;
340    hptr = lstack->hptr;
341    if (cmdstack)
342	free(cmdstack);
343    cmdstack = lstack->cstack;
344    cmdsp = lstack->csp;
345    tok = lstack->tok;
346    isnewlin = lstack->isnewlin;
347    tokstr = lstack->tokstr;
348    zshlextext = lstack->zshlextext;
349    bptr = lstack->bptr;
350    bsiz = lstack->bsiz;
351    len = lstack->len;
352    chwords = lstack->chwords;
353    chwordlen = lstack->chwordlen;
354    chwordpos = lstack->chwordpos;
355    hwgetword = lstack->hwgetword;
356    lexstop = lstack->lexstop;
357    hdocs = lstack->hdocs;
358    hgetc = lstack->hgetc;
359    hungetc = lstack->hungetc;
360    hwaddc = lstack->hwaddc;
361    hwbegin = lstack->hwbegin;
362    hwend = lstack->hwend;
363    addtoline = lstack->addtoline;
364    if (ecbuf)
365	zfree(ecbuf, eclen);
366    eclen = lstack->eclen;
367    ecused = lstack->ecused;
368    ecnpats = lstack->ecnpats;
369    ecbuf = lstack->ecbuf;
370    ecstrs = lstack->ecstrs;
371    ecsoffs = lstack->ecsoffs;
372    ecssub = lstack->ecssub;
373    ecnfunc = lstack->ecnfunc;
374    hlinesz = lstack->hlinesz;
375    toklineno = lstack->toklineno;
376    errflag = 0;
377
378    ln = lstack->next;
379    if (!ln) {
380	/* Back to top level: don't need special ZLE value */
381	DPUTS(chline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
382	zle_chline = NULL;
383    }
384    free(lstack);
385    lstack = ln;
386}
387
388/**/
389void
390zshlex(void)
391{
392    if (tok == LEXERR)
393	return;
394    do
395	tok = gettok();
396    while (tok != ENDINPUT && exalias());
397    nocorrect &= 1;
398    if (tok == NEWLIN || tok == ENDINPUT) {
399	while (hdocs) {
400	    struct heredocs *next = hdocs->next;
401	    char *doc, *munged_term;
402
403	    hwbegin(0);
404	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
405	    munged_term = dupstring(hdocs->str);
406	    STOPHIST
407	    doc = gethere(&munged_term, hdocs->type);
408	    ALLOWHIST
409	    cmdpop();
410	    hwend();
411	    if (!doc) {
412		zerr("here document too large");
413		while (hdocs) {
414		    next = hdocs->next;
415		    zfree(hdocs, sizeof(struct heredocs));
416		    hdocs = next;
417		}
418		tok = LEXERR;
419		break;
420	    }
421	    setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
422		       munged_term);
423	    zfree(hdocs, sizeof(struct heredocs));
424	    hdocs = next;
425	}
426    }
427    if (tok != NEWLIN)
428	isnewlin = 0;
429    else
430	isnewlin = (inbufct) ? -1 : 1;
431    if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
432	tok = SEPER;
433}
434
435/**/
436mod_export void
437ctxtlex(void)
438{
439    static int oldpos;
440
441    zshlex();
442    switch (tok) {
443    case SEPER:
444    case NEWLIN:
445    case SEMI:
446    case DSEMI:
447    case SEMIAMP:
448    case SEMIBAR:
449    case AMPER:
450    case AMPERBANG:
451    case INPAR:
452    case INBRACE:
453    case DBAR:
454    case DAMPER:
455    case BAR:
456    case BARAMP:
457    case INOUTPAR:
458    case DOLOOP:
459    case THEN:
460    case ELIF:
461    case ELSE:
462    case DOUTBRACK:
463	incmdpos = 1;
464	break;
465    case STRING:
466 /* case ENVSTRING: */
467    case ENVARRAY:
468    case OUTPAR:
469    case CASE:
470    case DINBRACK:
471	incmdpos = 0;
472	break;
473
474    default:
475	/* nothing to do, keep compiler happy */
476	break;
477    }
478    if (tok != DINPAR)
479	infor = tok == FOR ? 2 : 0;
480    if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
481	inredir = 1;
482	oldpos = incmdpos;
483	incmdpos = 0;
484    } else if (inredir) {
485	incmdpos = oldpos;
486	inredir = 0;
487    }
488}
489
490#define LX1_BKSLASH 0
491#define LX1_COMMENT 1
492#define LX1_NEWLIN 2
493#define LX1_SEMI 3
494#define LX1_AMPER 5
495#define LX1_BAR 6
496#define LX1_INPAR 7
497#define LX1_OUTPAR 8
498#define LX1_INANG 13
499#define LX1_OUTANG 14
500#define LX1_OTHER 15
501
502#define LX2_BREAK 0
503#define LX2_OUTPAR 1
504#define LX2_BAR 2
505#define LX2_STRING 3
506#define LX2_INBRACK 4
507#define LX2_OUTBRACK 5
508#define LX2_TILDE 6
509#define LX2_INPAR 7
510#define LX2_INBRACE 8
511#define LX2_OUTBRACE 9
512#define LX2_OUTANG 10
513#define LX2_INANG 11
514#define LX2_EQUALS 12
515#define LX2_BKSLASH 13
516#define LX2_QUOTE 14
517#define LX2_DQUOTE 15
518#define LX2_BQUOTE 16
519#define LX2_COMMA 17
520#define LX2_OTHER 18
521#define LX2_META 19
522
523static unsigned char lexact1[256], lexact2[256], lextok2[256];
524
525/**/
526void
527initlextabs(void)
528{
529    int t0;
530    static char *lx1 = "\\q\n;!&|(){}[]<>";
531    static char *lx2 = ";)|$[]~({}><=\\\'\"`,";
532
533    for (t0 = 0; t0 != 256; t0++) {
534	lexact1[t0] = LX1_OTHER;
535	lexact2[t0] = LX2_OTHER;
536	lextok2[t0] = t0;
537    }
538    for (t0 = 0; lx1[t0]; t0++)
539	lexact1[(int)lx1[t0]] = t0;
540    for (t0 = 0; lx2[t0]; t0++)
541	lexact2[(int)lx2[t0]] = t0;
542    lexact2['&'] = LX2_BREAK;
543    lexact2[STOUC(Meta)] = LX2_META;
544    lextok2['*'] = Star;
545    lextok2['?'] = Quest;
546    lextok2['{'] = Inbrace;
547    lextok2['['] = Inbrack;
548    lextok2['$'] = String;
549    lextok2['~'] = Tilde;
550    lextok2['#'] = Pound;
551    lextok2['^'] = Hat;
552}
553
554/* initialize lexical state */
555
556/**/
557void
558lexinit(void)
559{
560    incond = incasepat = nocorrect =
561    infor = dbparens = lexstop = 0;
562    incmdpos = 1;
563    tok = ENDINPUT;
564}
565
566/* add a char to the string buffer */
567
568/**/
569void
570add(int c)
571{
572    *bptr++ = c;
573    if (bsiz == ++len) {
574	int newbsiz = bsiz * 2;
575
576	if (newbsiz > inbufct && inbufct > bsiz)
577	    newbsiz = inbufct;
578
579	bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz));
580	/* len == bsiz, so bptr is at the start of newly allocated memory */
581	memset(bptr, 0, newbsiz - bsiz);
582	bsiz = newbsiz;
583    }
584}
585
586#define SETPARBEGIN {							\
587	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) &&	\
588	    zlemetacs >= zlemetall+1-inbufct)				\
589	    parbegin = inbufct;		      \
590    }
591#define SETPAREND {						      \
592	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
593	    parbegin != -1 && parend == -1) {			      \
594	    if (zlemetacs >= zlemetall + 1 - inbufct)		      \
595		parbegin = -1;					      \
596	    else						      \
597		parend = inbufct;				      \
598	}							      \
599    }
600
601/*
602 * Return 1 for math, 0 for a command, 2 for an error.  If it couldn't be
603 * parsed as math, but there was no gross error, it's a command.
604 */
605
606static int
607cmd_or_math(int cs_type)
608{
609    int oldlen = len;
610    int c;
611
612    cmdpush(cs_type);
613    c = dquote_parse(')', 0);
614    cmdpop();
615    *bptr = '\0';
616    if (!c) {
617	/* Successfully parsed, see if it was math */
618	c = hgetc();
619	if (c == ')')
620	    return 1; /* yes */
621	hungetc(c);
622	lexstop = 0;
623	c = ')';
624    } else if (lexstop) {
625	/* we haven't got anything to unget */
626	return 2;
627    }
628    /* else unsuccessful: unget the whole thing */
629    hungetc(c);
630    lexstop = 0;
631    while (len > oldlen) {
632	len--;
633	hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr);
634    }
635    hungetc('(');
636    return 0;
637}
638
639
640/*
641 * Parse either a $(( ... )) or a $(...)
642 * Return 0 on success, 1 on failure.
643 */
644static int
645cmd_or_math_sub(void)
646{
647    int c = hgetc(), ret;
648
649    if (c == '(') {
650	add(Inpar);
651	add('(');
652	if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) {
653	    add(')');
654	    return 0;
655	}
656	if (ret == 2)
657	    return 1;
658	bptr -= 2;
659	len -= 2;
660    } else {
661	hungetc(c);
662	lexstop = 0;
663    }
664    return skipcomm();
665}
666
667/* Check whether we're looking at valid numeric globbing syntax      *
668 * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
669 * Leaves the input in the same place, returning 0 or 1.             */
670
671/**/
672static int
673isnumglob(void)
674{
675    int c, ec = '-', ret = 0;
676    int tbs = 256, n = 0;
677    char *tbuf = (char *)zalloc(tbs);
678
679    while(1) {
680	c = hgetc();
681	if(lexstop) {
682	    lexstop = 0;
683	    break;
684	}
685	tbuf[n++] = c;
686	if(!idigit(c)) {
687	    if(c != ec)
688		break;
689	    if(ec == '>') {
690		ret = 1;
691		break;
692	    }
693	    ec = '>';
694	}
695	if(n == tbs)
696	    tbuf = (char *)realloc(tbuf, tbs *= 2);
697    }
698    while(n--)
699	hungetc(tbuf[n]);
700    zfree(tbuf, tbs);
701    return ret;
702}
703
704/**/
705static enum lextok
706gettok(void)
707{
708    int c, d;
709    int peekfd = -1;
710    enum lextok peek;
711
712  beginning:
713    tokstr = NULL;
714    while (iblank(c = hgetc()) && !lexstop);
715    toklineno = lineno;
716    if (lexstop)
717	return (errflag) ? LEXERR : ENDINPUT;
718    isfirstln = 0;
719    wordbeg = inbufct - (qbang && c == bangchar);
720    hwbegin(-1-(qbang && c == bangchar));
721    /* word includes the last character read and possibly \ before ! */
722    if (dbparens) {
723	len = 0;
724	bptr = tokstr = (char *) hcalloc(bsiz = 32);
725	hungetc(c);
726	cmdpush(CS_MATH);
727	c = dquote_parse(infor ? ';' : ')', 0);
728	cmdpop();
729	*bptr = '\0';
730	if (!c && infor) {
731	    infor--;
732	    return DINPAR;
733	}
734	if (c || (c = hgetc()) != ')') {
735	    hungetc(c);
736	    return LEXERR;
737	}
738	dbparens = 0;
739	return DOUTPAR;
740    } else if (idigit(c)) {	/* handle 1< foo */
741	d = hgetc();
742	if(d == '&') {
743	    d = hgetc();
744	    if(d == '>') {
745		peekfd = c - '0';
746		hungetc('>');
747		c = '&';
748	    } else {
749		hungetc(d);
750		lexstop = 0;
751		hungetc('&');
752	    }
753	} else if (d == '>' || d == '<') {
754	    peekfd = c - '0';
755	    c = d;
756	} else {
757	    hungetc(d);
758	    lexstop = 0;
759	}
760    }
761
762    /* chars in initial position in word */
763
764    /*
765     * Handle comments.  There are some special cases when this
766     * is not normal command input: lexflags implies we are examining
767     * a line lexically without it being used for normal command input.
768     */
769    if (c == hashchar && !nocomments &&
770	(isset(INTERACTIVECOMMENTS) ||
771	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
772	  (!interact || unset(SHINSTDIN) || strin)))) {
773	/* History is handled here to prevent extra  *
774	 * newlines being inserted into the history. */
775
776	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
777	    len = 0;
778	    bptr = tokstr = (char *)hcalloc(bsiz = 32);
779	    add(c);
780	}
781	hwend();
782	while ((c = ingetc()) != '\n' && !lexstop) {
783	    hwaddc(c);
784	    addtoline(c);
785	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
786		add(c);
787	}
788
789	if (errflag)
790	    peek = LEXERR;
791	else {
792	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
793		*bptr = '\0';
794		if (!lexstop)
795		    hungetc(c);
796		peek = STRING;
797	    } else {
798		hwend();
799		hwbegin(0);
800		hwaddc('\n');
801		addtoline('\n');
802		/*
803		 * If splitting a line and removing comments,
804		 * we don't want a newline token since it's
805		 * treated specially.
806		 */
807		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
808		    peek = ENDINPUT;
809		else
810		    peek = NEWLIN;
811	    }
812	}
813	return peek;
814    }
815    switch (lexact1[STOUC(c)]) {
816    case LX1_BKSLASH:
817	d = hgetc();
818	if (d == '\n')
819	    goto beginning;
820	hungetc(d);
821	lexstop = 0;
822	break;
823    case LX1_NEWLIN:
824	return NEWLIN;
825    case LX1_SEMI:
826	d = hgetc();
827	if(d == ';')
828	    return DSEMI;
829	else if(d == '&')
830	    return SEMIAMP;
831	else if (d == '|')
832	    return SEMIBAR;
833	hungetc(d);
834	lexstop = 0;
835	return SEMI;
836    case LX1_AMPER:
837	d = hgetc();
838	if (d == '&')
839	    return DAMPER;
840	else if (d == '!' || d == '|')
841	    return AMPERBANG;
842	else if (d == '>') {
843	    tokfd = peekfd;
844	    d = hgetc();
845	    if (d == '!' || d == '|')
846		return OUTANGAMPBANG;
847	    else if (d == '>') {
848		d = hgetc();
849		if (d == '!' || d == '|')
850		    return DOUTANGAMPBANG;
851		hungetc(d);
852		lexstop = 0;
853		return DOUTANGAMP;
854	    }
855	    hungetc(d);
856	    lexstop = 0;
857	    return AMPOUTANG;
858	}
859	hungetc(d);
860	lexstop = 0;
861	return AMPER;
862    case LX1_BAR:
863	d = hgetc();
864	if (d == '|')
865	    return DBAR;
866	else if (d == '&')
867	    return BARAMP;
868	hungetc(d);
869	lexstop = 0;
870	return BAR;
871    case LX1_INPAR:
872	d = hgetc();
873	if (d == '(') {
874	    if (infor) {
875		dbparens = 1;
876		return DINPAR;
877	    }
878	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
879		len = 0;
880		bptr = tokstr = (char *) hcalloc(bsiz = 32);
881		switch (cmd_or_math(CS_MATH)) {
882		case 1:
883		    return DINPAR;
884
885		case 0:
886		    /*
887		     * Not math, so we don't return the contents
888		     * as a string in this case.
889		     */
890		    tokstr = NULL;
891		    return INPAR;
892
893		default:
894		    return LEXERR;
895		}
896	    }
897	} else if (d == ')')
898	    return INOUTPAR;
899	hungetc(d);
900	lexstop = 0;
901	if (!(incond == 1 || incmdpos))
902	    break;
903	return INPAR;
904    case LX1_OUTPAR:
905	return OUTPAR;
906    case LX1_INANG:
907	d = hgetc();
908	if (d == '(') {
909	    hungetc(d);
910	    lexstop = 0;
911	    unpeekfd:
912	    if(peekfd != -1) {
913		hungetc(c);
914		c = '0' + peekfd;
915	    }
916	    break;
917	}
918	if (d == '>') {
919	    peek = INOUTANG;
920	} else if (d == '<') {
921	    int e = hgetc();
922
923	    if (e == '(') {
924		hungetc(e);
925		hungetc(d);
926		peek = INANG;
927	    } else if (e == '<')
928		peek = TRINANG;
929	    else if (e == '-')
930		peek = DINANGDASH;
931	    else {
932		hungetc(e);
933		lexstop = 0;
934		peek = DINANG;
935	    }
936	} else if (d == '&') {
937	    peek = INANGAMP;
938	} else {
939	    hungetc(d);
940	    if(isnumglob())
941		goto unpeekfd;
942	    peek = INANG;
943	}
944	tokfd = peekfd;
945	return peek;
946    case LX1_OUTANG:
947	d = hgetc();
948	if (d == '(') {
949	    hungetc(d);
950	    goto unpeekfd;
951	} else if (d == '&') {
952	    d = hgetc();
953	    if (d == '!' || d == '|')
954		peek = OUTANGAMPBANG;
955	    else {
956		hungetc(d);
957		lexstop = 0;
958		peek = OUTANGAMP;
959	    }
960	} else if (d == '!' || d == '|')
961	    peek = OUTANGBANG;
962	else if (d == '>') {
963	    d = hgetc();
964	    if (d == '&') {
965		d = hgetc();
966		if (d == '!' || d == '|')
967		    peek = DOUTANGAMPBANG;
968		else {
969		    hungetc(d);
970		    lexstop = 0;
971		    peek = DOUTANGAMP;
972		}
973	    } else if (d == '!' || d == '|')
974		peek = DOUTANGBANG;
975	    else if (d == '(') {
976		hungetc(d);
977		hungetc('>');
978		peek = OUTANG;
979	    } else {
980		hungetc(d);
981		lexstop = 0;
982		peek = DOUTANG;
983		if (isset(HISTALLOWCLOBBER))
984		    hwaddc('|');
985	    }
986	} else {
987	    hungetc(d);
988	    lexstop = 0;
989	    peek = OUTANG;
990	    if (!incond && isset(HISTALLOWCLOBBER))
991		hwaddc('|');
992	}
993	tokfd = peekfd;
994	return peek;
995    }
996
997    /* we've started a string, now get the *
998     * rest of it, performing tokenization */
999    return gettokstr(c, 0);
1000}
1001
1002/*
1003 * Get the remains of a token string.  This has two uses.
1004 * When called from gettok(), with sub = 0, we have already identified
1005 * any interesting initial character and want to get the rest of
1006 * what we now know is a string.  However, the string may still include
1007 * metacharacters and potentially substitutions.
1008 *
1009 * When called from parse_subst_string() with sub = 1, we are not
1010 * fully parsing a command line, merely tokenizing a string.
1011 * In this case we always add characters to the parsed string
1012 * unless there is a parse error.
1013 */
1014
1015/**/
1016static enum lextok
1017gettokstr(int c, int sub)
1018{
1019    int bct = 0, pct = 0, brct = 0, fdpar = 0;
1020    int intpos = 1, in_brace_param = 0;
1021    int inquote, unmatched = 0;
1022    enum lextok peek;
1023#ifdef DEBUG
1024    int ocmdsp = cmdsp;
1025#endif
1026
1027    peek = STRING;
1028    if (!sub) {
1029	len = 0;
1030	bptr = tokstr = (char *) hcalloc(bsiz = 32);
1031    }
1032    for (;;) {
1033	int act;
1034	int e;
1035	int inbl = inblank(c);
1036
1037	if (fdpar && !inbl && c != ')')
1038	    fdpar = 0;
1039
1040	if (inbl && !in_brace_param && !pct)
1041	    act = LX2_BREAK;
1042	else {
1043	    act = lexact2[STOUC(c)];
1044	    c = lextok2[STOUC(c)];
1045	}
1046	switch (act) {
1047	case LX2_BREAK:
1048	    if (!in_brace_param && !sub)
1049		goto brk;
1050	    break;
1051	case LX2_META:
1052	    c = hgetc();
1053#ifdef DEBUG
1054	    if (lexstop) {
1055		fputs("BUG: input terminated by Meta\n", stderr);
1056		fflush(stderr);
1057		goto brk;
1058	    }
1059#endif
1060	    add(Meta);
1061	    break;
1062	case LX2_OUTPAR:
1063	    if (fdpar) {
1064		/* this is a single word `(   )', treat as INOUTPAR */
1065		add(c);
1066		*bptr = '\0';
1067		return INOUTPAR;
1068	    }
1069	    if ((sub || in_brace_param) && isset(SHGLOB))
1070		break;
1071	    if (!in_brace_param && !pct--) {
1072		if (sub) {
1073		    pct = 0;
1074		    break;
1075		} else
1076		    goto brk;
1077	    }
1078	    c = Outpar;
1079	    break;
1080	case LX2_BAR:
1081	    if (!pct && !in_brace_param) {
1082		if (sub)
1083		    break;
1084		else
1085		    goto brk;
1086	    }
1087	    if (unset(SHGLOB) || (!sub && !in_brace_param))
1088		c = Bar;
1089	    break;
1090	case LX2_STRING:
1091	    e = hgetc();
1092	    if (e == '[') {
1093		cmdpush(CS_MATHSUBST);
1094		add(String);
1095		add(Inbrack);
1096		c = dquote_parse(']', sub);
1097		cmdpop();
1098		if (c) {
1099		    peek = LEXERR;
1100		    goto brk;
1101		}
1102		c = Outbrack;
1103	    } else if (e == '(') {
1104		add(String);
1105		c = cmd_or_math_sub();
1106		if (c) {
1107		    peek = LEXERR;
1108		    goto brk;
1109		}
1110		c = Outpar;
1111	    } else {
1112		if (e == '{') {
1113		    add(c);
1114		    c = Inbrace;
1115		    ++bct;
1116		    cmdpush(CS_BRACEPAR);
1117		    if (!in_brace_param)
1118			in_brace_param = bct;
1119		} else {
1120		    hungetc(e);
1121		    lexstop = 0;
1122		}
1123	    }
1124	    break;
1125	case LX2_INBRACK:
1126	    if (!in_brace_param)
1127		brct++;
1128	    c = Inbrack;
1129	    break;
1130	case LX2_OUTBRACK:
1131	    if (!in_brace_param)
1132		brct--;
1133	    if (brct < 0)
1134		brct = 0;
1135	    c = Outbrack;
1136	    break;
1137	case LX2_INPAR:
1138	    if (isset(SHGLOB)) {
1139		if (sub || in_brace_param)
1140		    break;
1141		if (incasepat && !len)
1142		    return INPAR;
1143		if (!isset(KSHGLOB) && len)
1144		    goto brk;
1145	    }
1146	    if (!in_brace_param) {
1147		if (!sub) {
1148		    e = hgetc();
1149		    hungetc(e);
1150		    lexstop = 0;
1151		    /* For command words, parentheses are only
1152		     * special at the start.  But now we're tokenising
1153		     * the remaining string.  So I don't see what
1154		     * the old incmdpos test here is for.
1155		     *   pws 1999/6/8
1156		     *
1157		     * Oh, no.
1158		     *  func1(   )
1159		     * is a valid function definition in [k]sh.  The best
1160		     * thing we can do, without really nasty lookahead tricks,
1161		     * is break if we find a blank after a parenthesis.  At
1162		     * least this can't happen inside braces or brackets.  We
1163		     * only allow this with SHGLOB (set for both sh and ksh).
1164		     *
1165		     * Things like `print @( |foo)' should still
1166		     * work, because [k]sh don't allow multiple words
1167		     * in a function definition, so we only do this
1168		     * in command position.
1169		     *   pws 1999/6/14
1170		     */
1171		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
1172				     !brct && !intpos && incmdpos)) {
1173			/*
1174			 * Either a () token, or a command word with
1175			 * something suspiciously like a ksh function
1176			 * definition.
1177			 * The current word isn't spellcheckable.
1178			 */
1179			nocorrect |= 2;
1180			goto brk;
1181		    }
1182		}
1183		/*
1184		 * This also handles the [k]sh `foo( )' function definition.
1185		 * Maintain a variable fdpar, set as long as a single set of
1186		 * parentheses contains only space.  Then if we get to the
1187		 * closing parenthesis and it is still set, we can assume we
1188		 * have a function definition.  Only do this at the start of
1189		 * the word, since the (...) must be a separate token.
1190		 */
1191		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
1192		    fdpar = 1;
1193	    }
1194	    c = Inpar;
1195	    break;
1196	case LX2_INBRACE:
1197	    if (isset(IGNOREBRACES) || sub)
1198		c = '{';
1199	    else {
1200		if (!len && incmdpos) {
1201		    add('{');
1202		    *bptr = '\0';
1203		    return STRING;
1204		}
1205		if (in_brace_param) {
1206		    cmdpush(CS_BRACE);
1207		}
1208		bct++;
1209	    }
1210	    break;
1211	case LX2_OUTBRACE:
1212	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
1213		break;
1214	    if (!bct)
1215		break;
1216	    if (in_brace_param) {
1217		cmdpop();
1218	    }
1219	    if (bct-- == in_brace_param)
1220		in_brace_param = 0;
1221	    c = Outbrace;
1222	    break;
1223	case LX2_COMMA:
1224	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
1225		c = Comma;
1226	    break;
1227	case LX2_OUTANG:
1228	    if (in_brace_param || sub)
1229		break;
1230	    e = hgetc();
1231	    if (e != '(') {
1232		hungetc(e);
1233		lexstop = 0;
1234		goto brk;
1235	    }
1236	    add(OutangProc);
1237	    if (skipcomm()) {
1238		peek = LEXERR;
1239		goto brk;
1240	    }
1241	    c = Outpar;
1242	    break;
1243	case LX2_INANG:
1244	    if (isset(SHGLOB) && sub)
1245		break;
1246	    e = hgetc();
1247	    if (!(in_brace_param || sub) && e == '(') {
1248		add(Inang);
1249		if (skipcomm()) {
1250		    peek = LEXERR;
1251		    goto brk;
1252		}
1253		c = Outpar;
1254		break;
1255	    }
1256	    hungetc(e);
1257	    if(isnumglob()) {
1258		add(Inang);
1259		while ((c = hgetc()) != '>')
1260		    add(c);
1261		c = Outang;
1262		break;
1263	    }
1264	    lexstop = 0;
1265	    if (in_brace_param || sub)
1266		break;
1267	    goto brk;
1268	case LX2_EQUALS:
1269	    if (!sub) {
1270		if (intpos) {
1271		    e = hgetc();
1272		    if (e != '(') {
1273			hungetc(e);
1274			lexstop = 0;
1275			c = Equals;
1276		    } else {
1277			add(Equals);
1278			if (skipcomm()) {
1279			    peek = LEXERR;
1280			    goto brk;
1281			}
1282			c = Outpar;
1283		    }
1284		} else if (peek != ENVSTRING &&
1285			   incmdpos && !bct && !brct) {
1286		    char *t = tokstr;
1287		    if (idigit(*t))
1288			while (++t < bptr && idigit(*t));
1289		    else {
1290			int sav = *bptr;
1291			*bptr = '\0';
1292			t = itype_end(t, IIDENT, 0);
1293			if (t < bptr) {
1294			    skipparens(Inbrack, Outbrack, &t);
1295			} else {
1296			    *bptr = sav;
1297			}
1298		    }
1299		    if (*t == '+')
1300			t++;
1301		    if (t == bptr) {
1302			e = hgetc();
1303			if (e == '(' && incmdpos) {
1304			    *bptr = '\0';
1305			    return ENVARRAY;
1306			}
1307			hungetc(e);
1308			lexstop = 0;
1309			peek = ENVSTRING;
1310			intpos = 2;
1311		    } else
1312			c = Equals;
1313		} else
1314		    c = Equals;
1315	    }
1316	    break;
1317	case LX2_BKSLASH:
1318	    c = hgetc();
1319	    if (c == '\n') {
1320		c = hgetc();
1321		if (!lexstop)
1322		    continue;
1323	    } else
1324		add(Bnull);
1325	    if (lexstop)
1326		goto brk;
1327	    break;
1328	case LX2_QUOTE: {
1329	    int strquote = (len && bptr[-1] == String);
1330
1331	    add(Snull);
1332	    cmdpush(CS_QUOTE);
1333	    for (;;) {
1334		STOPHIST
1335		while ((c = hgetc()) != '\'' && !lexstop) {
1336		    if (strquote && c == '\\') {
1337			c = hgetc();
1338			if (lexstop)
1339			    break;
1340			/*
1341			 * Mostly we don't need to do anything special
1342			 * with escape backslashes or closing quotes
1343			 * inside $'...'; however in completion we
1344			 * need to be able to strip multiple backslashes
1345			 * neatly.
1346			 */
1347			if (c == '\\' || c == '\'')
1348			    add(Bnull);
1349			else
1350			    add('\\');
1351		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1352			if (bptr[-1] == '\\')
1353			    bptr--, len--;
1354			else
1355			    break;
1356		    }
1357		    add(c);
1358		}
1359		ALLOWHIST
1360		if (c != '\'') {
1361		    unmatched = '\'';
1362		    peek = LEXERR;
1363		    cmdpop();
1364		    goto brk;
1365		}
1366		e = hgetc();
1367		if (e != '\'' || unset(RCQUOTES) || strquote)
1368		    break;
1369		add(c);
1370	    }
1371	    cmdpop();
1372	    hungetc(e);
1373	    lexstop = 0;
1374	    c = Snull;
1375	    break;
1376	}
1377	case LX2_DQUOTE:
1378	    add(Dnull);
1379	    cmdpush(CS_DQUOTE);
1380	    c = dquote_parse('"', sub);
1381	    cmdpop();
1382	    if (c) {
1383		unmatched = '"';
1384		peek = LEXERR;
1385		goto brk;
1386	    }
1387	    c = Dnull;
1388	    break;
1389	case LX2_BQUOTE:
1390	    add(Tick);
1391	    cmdpush(CS_BQUOTE);
1392	    SETPARBEGIN
1393	    inquote = 0;
1394	    while ((c = hgetc()) != '`' && !lexstop) {
1395		if (c == '\\') {
1396		    c = hgetc();
1397		    if (c != '\n') {
1398			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
1399			add(c);
1400		    }
1401		    else if (!sub && isset(CSHJUNKIEQUOTES))
1402			add(c);
1403		} else {
1404		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1405			break;
1406		    }
1407		    add(c);
1408		    if (c == '\'') {
1409			if ((inquote = !inquote))
1410			    STOPHIST
1411			else
1412			    ALLOWHIST
1413		    }
1414		}
1415	    }
1416	    if (inquote)
1417		ALLOWHIST
1418	    cmdpop();
1419	    if (c != '`') {
1420		unmatched = '`';
1421		peek = LEXERR;
1422		goto brk;
1423	    }
1424	    c = Tick;
1425	    SETPAREND
1426	    break;
1427	}
1428	add(c);
1429	c = hgetc();
1430	if (intpos)
1431	    intpos--;
1432	if (lexstop)
1433	    break;
1434    }
1435  brk:
1436    hungetc(c);
1437    if (unmatched)
1438	zerr("unmatched %c", unmatched);
1439    if (in_brace_param) {
1440	while(bct-- >= in_brace_param)
1441	    cmdpop();
1442	zerr("closing brace expected");
1443    } else if (unset(IGNOREBRACES) && !sub && len > 1 &&
1444	       peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) {
1445	/* hack to get {foo} command syntax work */
1446	bptr--;
1447	len--;
1448	lexstop = 0;
1449	hungetc('}');
1450    }
1451    *bptr = '\0';
1452    DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
1453    return peek;
1454}
1455
1456
1457/*
1458 * Parse input as if in double quotes.
1459 * endchar is the end character to expect.
1460 * sub has got something to do with whether we are doing quoted substitution.
1461 * Return non-zero for error (character to unget), else zero
1462 */
1463
1464/**/
1465static int
1466dquote_parse(char endchar, int sub)
1467{
1468    int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
1469    int c;
1470    int math = endchar == ')' || endchar == ']';
1471    int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
1472
1473    while (((c = hgetc()) != endchar || bct ||
1474	    (math && ((pct > 0) || (brct > 0))) ||
1475	    intick) && !lexstop) {
1476      cont:
1477	switch (c) {
1478	case '\\':
1479	    c = hgetc();
1480	    if (c != '\n') {
1481		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
1482		    c == endchar || c == '`' ||
1483		    (endchar == ']' && (c == '[' || c == ']' ||
1484					c == '(' || c == ')' ||
1485					c == '{' || c == '}' ||
1486					(c == '"' && sub))))
1487		    add(Bnull);
1488		else {
1489		    /* lexstop is implicitly handled here */
1490		    add('\\');
1491		    goto cont;
1492		}
1493	    } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
1494		continue;
1495	    break;
1496	case '\n':
1497	    err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
1498	    break;
1499	case '$':
1500	    if (intick)
1501		break;
1502	    c = hgetc();
1503	    if (c == '(') {
1504		add(Qstring);
1505		err = cmd_or_math_sub();
1506		c = Outpar;
1507	    } else if (c == '[') {
1508		add(String);
1509		add(Inbrack);
1510		cmdpush(CS_MATHSUBST);
1511		err = dquote_parse(']', sub);
1512		cmdpop();
1513		c = Outbrack;
1514	    } else if (c == '{') {
1515		add(Qstring);
1516		c = Inbrace;
1517		cmdpush(CS_BRACEPAR);
1518		bct++;
1519	    } else if (c == '$')
1520		add(Qstring);
1521	    else {
1522		hungetc(c);
1523		lexstop = 0;
1524		c = Qstring;
1525	    }
1526	    break;
1527	case '}':
1528	    if (intick || !bct)
1529		break;
1530	    c = Outbrace;
1531	    bct--;
1532	    cmdpop();
1533	    break;
1534	case '`':
1535	    c = Qtick;
1536	    if (intick == 2)
1537		ALLOWHIST
1538	    if ((intick = !intick)) {
1539		SETPARBEGIN
1540		cmdpush(CS_BQUOTE);
1541	    } else {
1542		SETPAREND
1543	        cmdpop();
1544	    }
1545	    break;
1546	case '\'':
1547	    if (!intick)
1548		break;
1549	    if (intick == 1)
1550		intick = 2, STOPHIST
1551	    else
1552		intick = 1, ALLOWHIST
1553	    break;
1554	case '(':
1555	    if (!math || !bct)
1556		pct++;
1557	    break;
1558	case ')':
1559	    if (!math || !bct)
1560		err = (!pct-- && math);
1561	    break;
1562	case '[':
1563	    if (!math || !bct)
1564		brct++;
1565	    break;
1566	case ']':
1567	    if (!math || !bct)
1568		err = (!brct-- && math);
1569	    break;
1570	case '"':
1571	    if (intick || (endchar != '"' && !bct))
1572		break;
1573	    if (bct) {
1574		add(Dnull);
1575		cmdpush(CS_DQUOTE);
1576		err = dquote_parse('"', sub);
1577		cmdpop();
1578		c = Dnull;
1579	    } else
1580		err = 1;
1581	    break;
1582	}
1583	if (err || lexstop)
1584	    break;
1585	add(c);
1586    }
1587    if (intick == 2)
1588	ALLOWHIST
1589    if (intick) {
1590	cmdpop();
1591    }
1592    while (bct--)
1593	cmdpop();
1594    if (lexstop)
1595	err = intick || endchar || err;
1596    else if (err == 1) {
1597	/*
1598	 * TODO: as far as I can see, this hack is used in gettokstr()
1599	 * to hungetc() a character on an error.  However, I don't
1600	 * understand what that actually gets us, and we can't guarantee
1601	 * it's a character anyway, because of the previous test.
1602	 *
1603	 * We use the same feature in cmd_or_math where we actually do
1604	 * need to unget if we decide it's really a command substitution.
1605	 * We try to handle the other case by testing for lexstop.
1606	 */
1607	err = c;
1608    }
1609    if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
1610	inwhat = IN_MATH;
1611    return err;
1612}
1613
1614/* Tokenize a string given in s. Parsing is done as in double *
1615 * quotes.  This is usually called before singsub().          */
1616
1617/**/
1618mod_export int
1619parsestr(char *s)
1620{
1621    int err;
1622
1623    if ((err = parsestrnoerr(s))) {
1624	untokenize(s);
1625	if (err > 32 && err < 127)
1626	    zerr("parse error near `%c'", err);
1627	else
1628	    zerr("parse error");
1629    }
1630    return err;
1631}
1632
1633/**/
1634mod_export int
1635parsestrnoerr(char *s)
1636{
1637    int l = strlen(s), err;
1638
1639    lexsave();
1640    untokenize(s);
1641    inpush(dupstring(s), 0, NULL);
1642    strinbeg(0);
1643    len = 0;
1644    bptr = tokstr = s;
1645    bsiz = l + 1;
1646    err = dquote_parse('\0', 1);
1647    *bptr = '\0';
1648    strinend();
1649    inpop();
1650    DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
1651    lexrestore();
1652    return err;
1653}
1654
1655/*
1656 * Parse a subscript in string s.
1657 * sub is passed down to dquote_parse().
1658 * endchar is the final character.
1659 * Return the next character, or NULL.
1660 */
1661/**/
1662mod_export char *
1663parse_subscript(char *s, int sub, int endchar)
1664{
1665    int l = strlen(s), err;
1666    char *t;
1667
1668    if (!*s || *s == endchar)
1669	return 0;
1670    lexsave();
1671    untokenize(t = dupstring(s));
1672    inpush(t, 0, NULL);
1673    strinbeg(0);
1674    len = 0;
1675    bptr = tokstr = s;
1676    bsiz = l + 1;
1677    err = dquote_parse(endchar, sub);
1678    if (err) {
1679	err = *bptr;
1680	*bptr = '\0';
1681	untokenize(s);
1682	*bptr = err;
1683	s = NULL;
1684    } else {
1685	s = bptr;
1686    }
1687    strinend();
1688    inpop();
1689    DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
1690    lexrestore();
1691    return s;
1692}
1693
1694/* Tokenize a string given in s. Parsing is done as if s were a normal *
1695 * command-line argument but it may contain separators.  This is used  *
1696 * to parse the right-hand side of ${...%...} substitutions.           */
1697
1698/**/
1699mod_export int
1700parse_subst_string(char *s)
1701{
1702    int c, l = strlen(s), err;
1703    char *ptr;
1704    enum lextok ctok;
1705
1706    if (!*s || !strcmp(s, nulstring))
1707	return 0;
1708    lexsave();
1709    untokenize(s);
1710    inpush(dupstring(s), 0, NULL);
1711    strinbeg(0);
1712    len = 0;
1713    bptr = tokstr = s;
1714    bsiz = l + 1;
1715    c = hgetc();
1716    ctok = gettokstr(c, 1);
1717    err = errflag;
1718    strinend();
1719    inpop();
1720    DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
1721    lexrestore();
1722    errflag = err;
1723    if (ctok == LEXERR) {
1724	untokenize(s);
1725	return 1;
1726    }
1727#ifdef DEBUG
1728    /*
1729     * Historical note: we used to check here for olen (the value of len
1730     * before lexrestore()) == l, but that's not necessarily the case if
1731     * we stripped an RCQUOTE.
1732     */
1733    if (ctok != STRING || (errflag && !noerrs)) {
1734	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
1735		errflag ? "errflag" : "ctok != STRING");
1736	fflush(stderr);
1737	untokenize(s);
1738	return 1;
1739    }
1740#endif
1741    /* Check for $'...' quoting.  This needs special handling. */
1742    for (ptr = s; *ptr; )
1743    {
1744	if (*ptr == String && ptr[1] == Snull)
1745	{
1746	    char *t;
1747	    int len, tlen, diff;
1748	    t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
1749	    len += 2;
1750	    tlen = strlen(t);
1751	    diff = len - tlen;
1752	    /*
1753	     * Yuk.
1754	     * parse_subst_string() currently handles strings in-place.
1755	     * That's not so easy to fix without knowing whether
1756	     * additional memory should come off the heap or
1757	     * otherwise.  So we cheat by copying the unquoted string
1758	     * into place, unless it's too long.  That's not the
1759	     * normal case, but I'm worried there are pathological
1760	     * cases with converting metafied multibyte strings.
1761	     * If someone can prove there aren't I will be very happy.
1762	     */
1763	    if (diff < 0) {
1764		DPUTS(1, "$'...' subst too long: fix get_parse_string()");
1765		return 1;
1766	    }
1767	    memcpy(ptr, t, tlen);
1768	    ptr += tlen;
1769	    if (diff > 0) {
1770		char *dptr = ptr;
1771		char *sptr = ptr + diff;
1772		while ((*dptr++ = *sptr++))
1773		    ;
1774	    }
1775	} else
1776	    ptr++;
1777    }
1778    return 0;
1779}
1780
1781/* Called below to report word positions. */
1782
1783/**/
1784mod_export void
1785gotword(void)
1786{
1787    we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
1788    if (zlemetacs <= we) {
1789	wb = zlemetall - wordbeg + addedx;
1790	lexflags = 0;
1791    }
1792}
1793
1794/* expand aliases and reserved words */
1795
1796/**/
1797int
1798exalias(void)
1799{
1800    Alias an;
1801    Reswd rw;
1802
1803    hwend();
1804    if (interact && isset(SHINSTDIN) && !strin && !incasepat &&
1805	tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
1806	(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
1807	spckword(&tokstr, 1, incmdpos, 1);
1808
1809    if (!tokstr) {
1810	zshlextext = tokstrings[tok];
1811
1812	return 0;
1813    } else {
1814	VARARR(char, copy, (strlen(tokstr) + 1));
1815
1816	if (has_token(tokstr)) {
1817	    char *p, *t;
1818
1819	    zshlextext = p = copy;
1820	    for (t = tokstr;
1821		 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
1822	} else
1823	    zshlextext = tokstr;
1824
1825	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
1826	    int zp = lexflags;
1827
1828	    gotword();
1829	    if ((zp & LEXFLAGS_ZLE) && !lexflags) {
1830		if (zshlextext == copy)
1831		    zshlextext = tokstr;
1832		return 0;
1833	    }
1834	}
1835
1836	if (tok == STRING) {
1837	    /* Check for an alias */
1838	    if (!noaliases && isset(ALIASESOPT) &&
1839		(!isset(POSIXALIASES) ||
1840		 !reswdtab->getnode(reswdtab, zshlextext))) {
1841		char *suf;
1842
1843		an = (Alias) aliastab->getnode(aliastab, zshlextext);
1844		if (an && !an->inuse &&
1845		    ((an->node.flags & ALIAS_GLOBAL) || incmdpos || inalmore)) {
1846		    inpush(an->text, INP_ALIAS, an);
1847		    if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
1848			aliasspaceflag = 1;
1849		    lexstop = 0;
1850		    if (zshlextext == copy)
1851			zshlextext = tokstr;
1852		    return 1;
1853		}
1854		if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
1855		    suf > zshlextext && suf[-1] != Meta &&
1856		    (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
1857		    !an->inuse && incmdpos) {
1858		    inpush(dupstring(zshlextext), INP_ALIAS, NULL);
1859		    inpush(" ", INP_ALIAS, NULL);
1860		    inpush(an->text, INP_ALIAS, an);
1861		    lexstop = 0;
1862		    if (zshlextext == copy)
1863			zshlextext = tokstr;
1864		    return 1;
1865		}
1866	    }
1867
1868	    /* Then check for a reserved word */
1869	    if ((incmdpos ||
1870		 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
1871		  zshlextext[0] == '}' && !zshlextext[1])) &&
1872		(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
1873		tok = rw->token;
1874		if (tok == DINBRACK)
1875		    incond = 1;
1876	    } else if (incond && !strcmp(zshlextext, "]]")) {
1877		tok = DOUTBRACK;
1878		incond = 0;
1879	    } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
1880		tok = BANG;
1881	}
1882	inalmore = 0;
1883	if (zshlextext == copy)
1884	    zshlextext = tokstr;
1885    }
1886    return 0;
1887}
1888
1889/* skip (...) */
1890
1891/**/
1892static int
1893skipcomm(void)
1894{
1895    int pct = 1, c, start = 1;
1896
1897    cmdpush(CS_CMDSUBST);
1898    SETPARBEGIN
1899    c = Inpar;
1900    do {
1901	int iswhite;
1902	add(c);
1903	c = hgetc();
1904	if (itok(c) || lexstop)
1905	    break;
1906	iswhite = inblank(c);
1907	switch (c) {
1908	case '(':
1909	    pct++;
1910	    break;
1911	case ')':
1912	    pct--;
1913	    break;
1914	case '\\':
1915	    add(c);
1916	    c = hgetc();
1917	    break;
1918	case '\'': {
1919	    int strquote = bptr[-1] == '$';
1920	    add(c);
1921	    STOPHIST
1922	    while ((c = hgetc()) != '\'' && !lexstop) {
1923		if (c == '\\' && strquote) {
1924		    add(c);
1925		    c = hgetc();
1926		}
1927		add(c);
1928	    }
1929	    ALLOWHIST
1930	    break;
1931	}
1932	case '\"':
1933	    add(c);
1934	    while ((c = hgetc()) != '\"' && !lexstop)
1935		if (c == '\\') {
1936		    add(c);
1937		    add(hgetc());
1938		} else
1939		    add(c);
1940	    break;
1941	case '`':
1942	    add(c);
1943	    while ((c = hgetc()) != '`' && !lexstop)
1944		if (c == '\\')
1945		    add(c), add(hgetc());
1946		else
1947		    add(c);
1948	    break;
1949	case '#':
1950	    if (start) {
1951		add(c);
1952		while ((c = hgetc()) != '\n' && !lexstop)
1953		    add(c);
1954		iswhite = 1;
1955	    }
1956	    break;
1957	}
1958	start = iswhite;
1959    }
1960    while (pct);
1961    if (!lexstop)
1962	SETPAREND
1963    cmdpop();
1964    return lexstop;
1965}
1966