1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if HAVE_ALLOCA_H
36#include <alloca.h>
37#endif
38
39#include <stdlib.h>
40
41#include "shell.h"
42#include "parser.h"
43#include "nodes.h"
44#include "expand.h"	/* defines rmescapes() */
45#include "exec.h"	/* defines find_builtin() */
46#include "syntax.h"
47#include "options.h"
48#include "input.h"
49#include "output.h"
50#include "var.h"
51#include "error.h"
52#include "memalloc.h"
53#include "mystring.h"
54#include "alias.h"
55#include "show.h"
56#include "builtins.h"
57#include "system.h"
58
59/*
60 * Shell command parser.
61 */
62
63/* values returned by readtoken */
64#include "token_vars.h"
65
66
67
68/* Used by expandstr to get here-doc like behaviour. */
69#define FAKEEOFMARK (char *)1
70
71
72
73struct heredoc {
74	struct heredoc *next;	/* next here document in list */
75	union node *here;		/* redirection node */
76	char *eofmark;		/* string indicating end of input */
77	int striptabs;		/* if set, strip leading tabs */
78};
79
80
81
82struct heredoc *heredoclist;	/* list of here documents to read */
83int doprompt;			/* if set, prompt the user */
84int needprompt;			/* true if interactive and at start of line */
85int lasttoken;			/* last token read */
86int tokpushback;		/* last token pushed back */
87char *wordtext;			/* text of last word returned by readtoken */
88int checkkwd;
89struct nodelist *backquotelist;
90union node *redirnode;
91struct heredoc *heredoc;
92int quoteflag;			/* set if (part of) last token was quoted */
93
94
95STATIC union node *list(int);
96STATIC union node *andor(void);
97STATIC union node *pipeline(void);
98STATIC union node *command(void);
99STATIC union node *simplecmd(void);
100STATIC union node *makename(void);
101STATIC void parsefname(void);
102STATIC void parseheredoc(void);
103STATIC int peektoken(void);
104STATIC int readtoken(void);
105STATIC int xxreadtoken(void);
106STATIC int readtoken1(int, char const *, char *, int);
107STATIC void synexpect(int) __attribute__((__noreturn__));
108STATIC void synerror(const char *) __attribute__((__noreturn__));
109STATIC void setprompt(int);
110
111
112static inline int
113isassignment(const char *p)
114{
115	const char *q = endofname(p);
116	if (p == q)
117		return 0;
118	return *q == '=';
119}
120
121static inline int realeofmark(const char *eofmark)
122{
123	return eofmark && eofmark != FAKEEOFMARK;
124}
125
126
127/*
128 * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
129 * valid parse tree indicating a blank line.)
130 */
131
132union node *
133parsecmd(int interact)
134{
135	tokpushback = 0;
136	checkkwd = 0;
137	heredoclist = 0;
138	doprompt = interact;
139	if (doprompt)
140		setprompt(doprompt);
141	needprompt = 0;
142	return list(1);
143}
144
145
146STATIC union node *
147list(int nlflag)
148{
149	union node *n1, *n2, *n3;
150	int tok;
151
152	n1 = NULL;
153	for (;;) {
154		switch (peektoken()) {
155		case TNL:
156			if (!(nlflag & 1))
157				break;
158			parseheredoc();
159			return n1;
160
161		case TEOF:
162			if (!n1 && (nlflag & 1))
163				n1 = NEOF;
164			parseheredoc();
165			return n1;
166		}
167
168		checkkwd = CHKNL | CHKKWD | CHKALIAS;
169		if (nlflag == 2 && tokendlist[peektoken()])
170			return n1;
171		nlflag |= 2;
172
173		n2 = andor();
174		tok = readtoken();
175		if (tok == TBACKGND) {
176			if (n2->type == NPIPE) {
177				n2->npipe.backgnd = 1;
178			} else {
179				if (n2->type != NREDIR) {
180					n3 = stalloc(sizeof(struct nredir));
181					n3->nredir.n = n2;
182					n3->nredir.redirect = NULL;
183					n2 = n3;
184				}
185				n2->type = NBACKGND;
186			}
187		}
188		if (n1 == NULL) {
189			n1 = n2;
190		}
191		else {
192			n3 = (union node *)stalloc(sizeof (struct nbinary));
193			n3->type = NSEMI;
194			n3->nbinary.ch1 = n1;
195			n3->nbinary.ch2 = n2;
196			n1 = n3;
197		}
198		switch (tok) {
199		case TNL:
200		case TEOF:
201			tokpushback++;
202			/* fall through */
203		case TBACKGND:
204		case TSEMI:
205			break;
206		default:
207			if ((nlflag & 1))
208				synexpect(-1);
209			tokpushback++;
210			return n1;
211		}
212	}
213}
214
215
216
217STATIC union node *
218andor(void)
219{
220	union node *n1, *n2, *n3;
221	int t;
222
223	n1 = pipeline();
224	for (;;) {
225		if ((t = readtoken()) == TAND) {
226			t = NAND;
227		} else if (t == TOR) {
228			t = NOR;
229		} else {
230			tokpushback++;
231			return n1;
232		}
233		checkkwd = CHKNL | CHKKWD | CHKALIAS;
234		n2 = pipeline();
235		n3 = (union node *)stalloc(sizeof (struct nbinary));
236		n3->type = t;
237		n3->nbinary.ch1 = n1;
238		n3->nbinary.ch2 = n2;
239		n1 = n3;
240	}
241}
242
243
244
245STATIC union node *
246pipeline(void)
247{
248	union node *n1, *n2, *pipenode;
249	struct nodelist *lp, *prev;
250	int negate;
251
252	negate = 0;
253	TRACE(("pipeline: entered\n"));
254	if (readtoken() == TNOT) {
255		negate = !negate;
256		checkkwd = CHKKWD | CHKALIAS;
257	} else
258		tokpushback++;
259	n1 = command();
260	if (readtoken() == TPIPE) {
261		pipenode = (union node *)stalloc(sizeof (struct npipe));
262		pipenode->type = NPIPE;
263		pipenode->npipe.backgnd = 0;
264		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
265		pipenode->npipe.cmdlist = lp;
266		lp->n = n1;
267		do {
268			prev = lp;
269			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270			checkkwd = CHKNL | CHKKWD | CHKALIAS;
271			lp->n = command();
272			prev->next = lp;
273		} while (readtoken() == TPIPE);
274		lp->next = NULL;
275		n1 = pipenode;
276	}
277	tokpushback++;
278	if (negate) {
279		n2 = (union node *)stalloc(sizeof (struct nnot));
280		n2->type = NNOT;
281		n2->nnot.com = n1;
282		return n2;
283	} else
284		return n1;
285}
286
287
288
289STATIC union node *
290command(void)
291{
292	union node *n1, *n2;
293	union node *ap, **app;
294	union node *cp, **cpp;
295	union node *redir, **rpp;
296	union node **rpp2;
297	int t;
298	int savelinno;
299
300	redir = NULL;
301	rpp2 = &redir;
302
303	savelinno = plinno;
304
305	switch (readtoken()) {
306	default:
307		synexpect(-1);
308		/* NOTREACHED */
309	case TIF:
310		n1 = (union node *)stalloc(sizeof (struct nif));
311		n1->type = NIF;
312		n1->nif.test = list(0);
313		if (readtoken() != TTHEN)
314			synexpect(TTHEN);
315		n1->nif.ifpart = list(0);
316		n2 = n1;
317		while (readtoken() == TELIF) {
318			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
319			n2 = n2->nif.elsepart;
320			n2->type = NIF;
321			n2->nif.test = list(0);
322			if (readtoken() != TTHEN)
323				synexpect(TTHEN);
324			n2->nif.ifpart = list(0);
325		}
326		if (lasttoken == TELSE)
327			n2->nif.elsepart = list(0);
328		else {
329			n2->nif.elsepart = NULL;
330			tokpushback++;
331		}
332		t = TFI;
333		break;
334	case TWHILE:
335	case TUNTIL: {
336		int got;
337		n1 = (union node *)stalloc(sizeof (struct nbinary));
338		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
339		n1->nbinary.ch1 = list(0);
340		if ((got=readtoken()) != TDO) {
341TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
342			synexpect(TDO);
343		}
344		n1->nbinary.ch2 = list(0);
345		t = TDONE;
346		break;
347	}
348	case TFOR:
349		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
350			synerror("Bad for loop variable");
351		n1 = (union node *)stalloc(sizeof (struct nfor));
352		n1->type = NFOR;
353		n1->nfor.linno = savelinno;
354		n1->nfor.var = wordtext;
355		checkkwd = CHKNL | CHKKWD | CHKALIAS;
356		if (readtoken() == TIN) {
357			app = &ap;
358			while (readtoken() == TWORD) {
359				n2 = (union node *)stalloc(sizeof (struct narg));
360				n2->type = NARG;
361				n2->narg.text = wordtext;
362				n2->narg.backquote = backquotelist;
363				*app = n2;
364				app = &n2->narg.next;
365			}
366			*app = NULL;
367			n1->nfor.args = ap;
368			if (lasttoken != TNL && lasttoken != TSEMI)
369				synexpect(-1);
370		} else {
371			n2 = (union node *)stalloc(sizeof (struct narg));
372			n2->type = NARG;
373			n2->narg.text = (char *)dolatstr;
374			n2->narg.backquote = NULL;
375			n2->narg.next = NULL;
376			n1->nfor.args = n2;
377			/*
378			 * Newline or semicolon here is optional (but note
379			 * that the original Bourne shell only allowed NL).
380			 */
381			if (lasttoken != TSEMI)
382				tokpushback++;
383		}
384		checkkwd = CHKNL | CHKKWD | CHKALIAS;
385		if (readtoken() != TDO)
386			synexpect(TDO);
387		n1->nfor.body = list(0);
388		t = TDONE;
389		break;
390	case TCASE:
391		n1 = (union node *)stalloc(sizeof (struct ncase));
392		n1->type = NCASE;
393		n1->ncase.linno = savelinno;
394		if (readtoken() != TWORD)
395			synexpect(TWORD);
396		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
397		n2->type = NARG;
398		n2->narg.text = wordtext;
399		n2->narg.backquote = backquotelist;
400		n2->narg.next = NULL;
401		checkkwd = CHKNL | CHKKWD | CHKALIAS;
402		if (readtoken() != TIN)
403			synexpect(TIN);
404		cpp = &n1->ncase.cases;
405next_case:
406		checkkwd = CHKNL | CHKKWD;
407		t = readtoken();
408		while(t != TESAC) {
409			if (lasttoken == TLP)
410				readtoken();
411			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
412			cp->type = NCLIST;
413			app = &cp->nclist.pattern;
414			for (;;) {
415				*app = ap = (union node *)stalloc(sizeof (struct narg));
416				ap->type = NARG;
417				ap->narg.text = wordtext;
418				ap->narg.backquote = backquotelist;
419				if (readtoken() != TPIPE)
420					break;
421				app = &ap->narg.next;
422				readtoken();
423			}
424			ap->narg.next = NULL;
425			if (lasttoken != TRP)
426				synexpect(TRP);
427			cp->nclist.body = list(2);
428
429			cpp = &cp->nclist.next;
430
431			checkkwd = CHKNL | CHKKWD;
432			if ((t = readtoken()) != TESAC) {
433				if (t != TENDCASE)
434					synexpect(TENDCASE);
435				else
436					goto next_case;
437			}
438		}
439		*cpp = NULL;
440		goto redir;
441	case TLP:
442		n1 = (union node *)stalloc(sizeof (struct nredir));
443		n1->type = NSUBSHELL;
444		n1->nredir.linno = savelinno;
445		n1->nredir.n = list(0);
446		n1->nredir.redirect = NULL;
447		t = TRP;
448		break;
449	case TBEGIN:
450		n1 = list(0);
451		t = TEND;
452		break;
453	case TWORD:
454	case TREDIR:
455		tokpushback++;
456		return simplecmd();
457	}
458
459	if (readtoken() != t)
460		synexpect(t);
461
462redir:
463	/* Now check for redirection which may follow command */
464	checkkwd = CHKKWD | CHKALIAS;
465	rpp = rpp2;
466	while (readtoken() == TREDIR) {
467		*rpp = n2 = redirnode;
468		rpp = &n2->nfile.next;
469		parsefname();
470	}
471	tokpushback++;
472	*rpp = NULL;
473	if (redir) {
474		if (n1->type != NSUBSHELL) {
475			n2 = (union node *)stalloc(sizeof (struct nredir));
476			n2->type = NREDIR;
477			n2->nredir.linno = savelinno;
478			n2->nredir.n = n1;
479			n1 = n2;
480		}
481		n1->nredir.redirect = redir;
482	}
483
484	return n1;
485}
486
487
488STATIC union node *
489simplecmd(void) {
490	union node *args, **app;
491	union node *n = NULL;
492	union node *vars, **vpp;
493	union node **rpp, *redir;
494	int savecheckkwd;
495	int savelinno;
496
497	args = NULL;
498	app = &args;
499	vars = NULL;
500	vpp = &vars;
501	redir = NULL;
502	rpp = &redir;
503
504	savecheckkwd = CHKALIAS;
505	savelinno = plinno;
506	for (;;) {
507		checkkwd = savecheckkwd;
508		switch (readtoken()) {
509		case TWORD:
510			n = (union node *)stalloc(sizeof (struct narg));
511			n->type = NARG;
512			n->narg.text = wordtext;
513			n->narg.backquote = backquotelist;
514			if (savecheckkwd && isassignment(wordtext)) {
515				*vpp = n;
516				vpp = &n->narg.next;
517			} else {
518				*app = n;
519				app = &n->narg.next;
520				savecheckkwd = 0;
521			}
522			break;
523		case TREDIR:
524			*rpp = n = redirnode;
525			rpp = &n->nfile.next;
526			parsefname();	/* read name of redirection file */
527			break;
528		case TLP:
529			if (
530				args && app == &args->narg.next &&
531				!vars && !redir
532			) {
533				struct builtincmd *bcmd;
534				const char *name;
535
536				/* We have a function */
537				if (readtoken() != TRP)
538					synexpect(TRP);
539				name = n->narg.text;
540				if (
541					!goodname(name) || (
542						(bcmd = find_builtin(name)) &&
543						bcmd->flags & BUILTIN_SPECIAL
544					)
545				)
546					synerror("Bad function name");
547				n->type = NDEFUN;
548				checkkwd = CHKNL | CHKKWD | CHKALIAS;
549				n->ndefun.text = n->narg.text;
550				n->ndefun.linno = plinno;
551				n->ndefun.body = command();
552				return n;
553			}
554			/* fall through */
555		default:
556			tokpushback++;
557			goto out;
558		}
559	}
560out:
561	*app = NULL;
562	*vpp = NULL;
563	*rpp = NULL;
564	n = (union node *)stalloc(sizeof (struct ncmd));
565	n->type = NCMD;
566	n->ncmd.linno = savelinno;
567	n->ncmd.args = args;
568	n->ncmd.assign = vars;
569	n->ncmd.redirect = redir;
570	return n;
571}
572
573STATIC union node *
574makename(void)
575{
576	union node *n;
577
578	n = (union node *)stalloc(sizeof (struct narg));
579	n->type = NARG;
580	n->narg.next = NULL;
581	n->narg.text = wordtext;
582	n->narg.backquote = backquotelist;
583	return n;
584}
585
586void fixredir(union node *n, const char *text, int err)
587	{
588	TRACE(("Fix redir %s %d\n", text, err));
589	if (!err)
590		n->ndup.vname = NULL;
591
592	if (is_digit(text[0]) && text[1] == '\0')
593		n->ndup.dupfd = digit_val(text[0]);
594	else if (text[0] == '-' && text[1] == '\0')
595		n->ndup.dupfd = -1;
596	else {
597
598		if (err)
599			synerror("Bad fd number");
600		else
601			n->ndup.vname = makename();
602	}
603}
604
605
606STATIC void
607parsefname(void)
608{
609	union node *n = redirnode;
610
611	if (n->type == NHERE)
612		checkkwd = CHKEOFMARK;
613	if (readtoken() != TWORD)
614		synexpect(-1);
615	if (n->type == NHERE) {
616		struct heredoc *here = heredoc;
617		struct heredoc *p;
618
619		if (quoteflag == 0)
620			n->type = NXHERE;
621		TRACE(("Here document %d\n", n->type));
622		rmescapes(wordtext);
623		here->eofmark = wordtext;
624		here->next = NULL;
625		if (heredoclist == NULL)
626			heredoclist = here;
627		else {
628			for (p = heredoclist ; p->next ; p = p->next);
629			p->next = here;
630		}
631	} else if (n->type == NTOFD || n->type == NFROMFD) {
632		fixredir(n, wordtext, 0);
633	} else {
634		n->nfile.fname = makename();
635	}
636}
637
638
639/*
640 * Input any here documents.
641 */
642
643STATIC void
644parseheredoc(void)
645{
646	struct heredoc *here;
647	union node *n;
648
649	here = heredoclist;
650	heredoclist = 0;
651
652	while (here) {
653		if (needprompt) {
654			setprompt(2);
655		}
656		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
657				here->eofmark, here->striptabs);
658		n = (union node *)stalloc(sizeof (struct narg));
659		n->narg.type = NARG;
660		n->narg.next = NULL;
661		n->narg.text = wordtext;
662		n->narg.backquote = backquotelist;
663		here->here->nhere.doc = n;
664		here = here->next;
665	}
666}
667
668STATIC int
669peektoken(void)
670{
671	int t;
672
673	t = readtoken();
674	tokpushback++;
675	return (t);
676}
677
678STATIC int
679readtoken(void)
680{
681	int t;
682	int kwd = checkkwd;
683#ifdef DEBUG
684	int alreadyseen = tokpushback;
685#endif
686
687top:
688	t = xxreadtoken();
689
690	/*
691	 * eat newlines
692	 */
693	if (kwd & CHKNL) {
694		while (t == TNL) {
695			parseheredoc();
696			t = xxreadtoken();
697		}
698	}
699
700	if (t != TWORD || quoteflag) {
701		goto out;
702	}
703
704	/*
705	 * check for keywords
706	 */
707	if (kwd & CHKKWD) {
708		const char *const *pp;
709
710		if ((pp = findkwd(wordtext))) {
711			lasttoken = t = pp - parsekwd + KWDOFFSET;
712			TRACE(("keyword %s recognized\n", tokname[t]));
713			goto out;
714		}
715	}
716
717	if (checkkwd & CHKALIAS) {
718		struct alias *ap;
719		if ((ap = lookupalias(wordtext, 1)) != NULL) {
720			if (*ap->val) {
721				pushstring(ap->val, ap);
722			}
723			goto top;
724		}
725	}
726out:
727	checkkwd = 0;
728#ifdef DEBUG
729	if (!alreadyseen)
730	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
731	else
732	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
733#endif
734	return (t);
735}
736
737static void nlprompt(void)
738{
739	plinno++;
740	if (doprompt)
741		setprompt(2);
742}
743
744static void nlnoprompt(void)
745{
746	plinno++;
747	needprompt = doprompt;
748}
749
750
751/*
752 * Read the next input token.
753 * If the token is a word, we set backquotelist to the list of cmds in
754 *	backquotes.  We set quoteflag to true if any part of the word was
755 *	quoted.
756 * If the token is TREDIR, then we set redirnode to a structure containing
757 *	the redirection.
758 *
759 * [Change comment:  here documents and internal procedures]
760 * [Readtoken shouldn't have any arguments.  Perhaps we should make the
761 *  word parsing code into a separate routine.  In this case, readtoken
762 *  doesn't need to have any internal procedures, but parseword does.
763 *  We could also make parseoperator in essence the main routine, and
764 *  have parseword (readtoken1?) handle both words and redirection.]
765 */
766
767#define RETURN(token)	return lasttoken = token
768
769STATIC int
770xxreadtoken(void)
771{
772	int c;
773
774	if (tokpushback) {
775		tokpushback = 0;
776		return lasttoken;
777	}
778	if (needprompt) {
779		setprompt(2);
780	}
781	for (;;) {	/* until token or start of word found */
782		c = pgetc();
783		switch (c) {
784		case ' ': case '\t':
785		case PEOA:
786			continue;
787		case '#':
788			while ((c = pgetc()) != '\n' && c != PEOF);
789			pungetc();
790			continue;
791		case '\\':
792			if (pgetc() == '\n') {
793				nlprompt();
794				continue;
795			}
796			pungetc();
797			goto breakloop;
798		case '\n':
799			nlnoprompt();
800			RETURN(TNL);
801		case PEOF:
802			RETURN(TEOF);
803		case '&':
804			if (pgetc() == '&')
805				RETURN(TAND);
806			pungetc();
807			RETURN(TBACKGND);
808		case '|':
809			if (pgetc() == '|')
810				RETURN(TOR);
811			pungetc();
812			RETURN(TPIPE);
813		case ';':
814			if (pgetc() == ';')
815				RETURN(TENDCASE);
816			pungetc();
817			RETURN(TSEMI);
818		case '(':
819			RETURN(TLP);
820		case ')':
821			RETURN(TRP);
822		default:
823			goto breakloop;
824		}
825	}
826breakloop:
827	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
828#undef RETURN
829}
830
831static int pgetc_eatbnl(void)
832{
833	int c;
834
835	while ((c = pgetc()) == '\\') {
836		if (pgetc() != '\n') {
837			pungetc();
838			break;
839		}
840
841		nlprompt();
842	}
843
844	return c;
845}
846
847
848
849/*
850 * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
851 * is not NULL, read a here document.  In the latter case, eofmark is the
852 * word which marks the end of the document and striptabs is true if
853 * leading tabs should be stripped from the document.  The argument firstc
854 * is the first character of the input token or document.
855 *
856 * Because C does not have internal subroutines, I have simulated them
857 * using goto's to implement the subroutine linkage.  The following macros
858 * will run code that appears at the end of readtoken1.
859 */
860
861#define CHECKEND()	{goto checkend; checkend_return:;}
862#define PARSEREDIR()	{goto parseredir; parseredir_return:;}
863#define PARSESUB()	{goto parsesub; parsesub_return:;}
864#define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
865#define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
866#define	PARSEARITH()	{goto parsearith; parsearith_return:;}
867
868STATIC int
869readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
870{
871	int c = firstc;
872	char *out;
873	size_t len;
874	struct nodelist *bqlist;
875	int quotef;
876	int dblquote;
877	int varnest;	/* levels of variables expansion */
878	int arinest;	/* levels of arithmetic expansion */
879	int parenlevel;	/* levels of parens in arithmetic */
880	int dqvarnest;	/* levels of variables expansion within double quotes */
881	int oldstyle;
882	/* syntax before arithmetic */
883	char const *uninitialized_var(prevsyntax);
884
885	dblquote = 0;
886	if (syntax == DQSYNTAX)
887		dblquote = 1;
888	quotef = 0;
889	bqlist = NULL;
890	varnest = 0;
891	arinest = 0;
892	parenlevel = 0;
893	dqvarnest = 0;
894
895	STARTSTACKSTR(out);
896	loop: {	/* for each line, until end of word */
897#if ATTY
898		if (c == '\034' && doprompt
899		 && attyset() && ! equal(termval(), "emacs")) {
900			attyline();
901			if (syntax == BASESYNTAX)
902				return readtoken();
903			c = pgetc();
904			goto loop;
905		}
906#endif
907		CHECKEND();	/* set c to PEOF if at end of here document */
908		for (;;) {	/* until end of line or end of word */
909			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
910			switch(syntax[c]) {
911			case CNL:	/* '\n' */
912				if (syntax == BASESYNTAX)
913					goto endword;	/* exit outer loop */
914				USTPUTC(c, out);
915				nlprompt();
916				c = pgetc();
917				goto loop;		/* continue outer loop */
918			case CWORD:
919				USTPUTC(c, out);
920				break;
921			case CCTL:
922				if (eofmark == NULL || dblquote)
923					USTPUTC(CTLESC, out);
924				USTPUTC(c, out);
925				break;
926			/* backslash */
927			case CBACK:
928				c = pgetc2();
929				if (c == PEOF) {
930					USTPUTC(CTLESC, out);
931					USTPUTC('\\', out);
932					pungetc();
933				} else if (c == '\n') {
934					nlprompt();
935				} else {
936					if (
937						dblquote &&
938						c != '\\' && c != '`' &&
939						c != '$' && (
940							c != '"' ||
941							eofmark != NULL
942						)
943					) {
944						USTPUTC('\\', out);
945					}
946					USTPUTC(CTLESC, out);
947					USTPUTC(c, out);
948					quotef++;
949				}
950				break;
951			case CSQUOTE:
952				syntax = SQSYNTAX;
953quotemark:
954				if (eofmark == NULL) {
955					USTPUTC(CTLQUOTEMARK, out);
956				}
957				break;
958			case CDQUOTE:
959				syntax = DQSYNTAX;
960				dblquote = 1;
961				goto quotemark;
962			case CENDQUOTE:
963				if (eofmark && !varnest)
964					USTPUTC(c, out);
965				else {
966					if (dqvarnest == 0) {
967						syntax = BASESYNTAX;
968						dblquote = 0;
969					}
970					quotef++;
971					goto quotemark;
972				}
973				break;
974			case CVAR:	/* '$' */
975				PARSESUB();		/* parse substitution */
976				break;
977			case CENDVAR:	/* '}' */
978				if (varnest > 0) {
979					varnest--;
980					if (dqvarnest > 0) {
981						dqvarnest--;
982					}
983					USTPUTC(CTLENDVAR, out);
984				} else {
985					USTPUTC(c, out);
986				}
987				break;
988			case CLP:	/* '(' in arithmetic */
989				parenlevel++;
990				USTPUTC(c, out);
991				break;
992			case CRP:	/* ')' in arithmetic */
993				if (parenlevel > 0) {
994					USTPUTC(c, out);
995					--parenlevel;
996				} else {
997					if (pgetc() == ')') {
998						USTPUTC(CTLENDARI, out);
999						if (!--arinest)
1000							syntax = prevsyntax;
1001					} else {
1002						/*
1003						 * unbalanced parens
1004						 *  (don't 2nd guess - no error)
1005						 */
1006						pungetc();
1007						USTPUTC(')', out);
1008					}
1009				}
1010				break;
1011			case CBQUOTE:	/* '`' */
1012				PARSEBACKQOLD();
1013				break;
1014			case CEOF:
1015				goto endword;		/* exit outer loop */
1016			case CIGN:
1017				break;
1018			default:
1019				if (varnest == 0)
1020					goto endword;	/* exit outer loop */
1021				if (c != PEOA) {
1022					USTPUTC(c, out);
1023				}
1024			}
1025			c = pgetc();
1026		}
1027	}
1028endword:
1029	if (syntax == ARISYNTAX)
1030		synerror("Missing '))'");
1031	if (syntax != BASESYNTAX && eofmark == NULL)
1032		synerror("Unterminated quoted string");
1033	if (varnest != 0) {
1034		/* { */
1035		synerror("Missing '}'");
1036	}
1037	USTPUTC('\0', out);
1038	len = out - (char *)stackblock();
1039	out = stackblock();
1040	if (eofmark == NULL) {
1041		if ((c == '>' || c == '<')
1042		 && quotef == 0
1043		 && len <= 2
1044		 && (*out == '\0' || is_digit(*out))) {
1045			PARSEREDIR();
1046			return lasttoken = TREDIR;
1047		} else {
1048			pungetc();
1049		}
1050	}
1051	quoteflag = quotef;
1052	backquotelist = bqlist;
1053	grabstackblock(len);
1054	wordtext = out;
1055	return lasttoken = TWORD;
1056/* end of readtoken routine */
1057
1058
1059
1060/*
1061 * Check to see whether we are at the end of the here document.  When this
1062 * is called, c is set to the first character of the next input line.  If
1063 * we are at the end of the here document, this routine sets the c to PEOF.
1064 */
1065
1066checkend: {
1067	if (realeofmark(eofmark)) {
1068		int markloc;
1069		char *p;
1070
1071		if (c == PEOA) {
1072			c = pgetc2();
1073		}
1074		if (striptabs) {
1075			while (c == '\t') {
1076				c = pgetc2();
1077			}
1078		}
1079
1080		markloc = out - (char *)stackblock();
1081		for (p = eofmark; STPUTC(c, out), *p; p++) {
1082			if (c != *p)
1083				goto more_heredoc;
1084
1085			c = pgetc2();
1086		}
1087
1088		if (c == '\n' || c == PEOF) {
1089			c = PEOF;
1090			nlnoprompt();
1091		} else {
1092			int len;
1093
1094more_heredoc:
1095			p = (char *)stackblock() + markloc + 1;
1096			len = out - p;
1097
1098			if (len) {
1099				len -= c < 0;
1100				c = p[-1];
1101
1102				if (len) {
1103					char *str;
1104
1105					str = alloca(len + 1);
1106					*(char *)mempcpy(str, p, len) = 0;
1107
1108					pushstring(str, NULL);
1109				}
1110			}
1111		}
1112
1113		STADJUST((char *)stackblock() + markloc - out, out);
1114	}
1115	goto checkend_return;
1116}
1117
1118
1119/*
1120 * Parse a redirection operator.  The variable "out" points to a string
1121 * specifying the fd to be redirected.  The variable "c" contains the
1122 * first character of the redirection operator.
1123 */
1124
1125parseredir: {
1126	char fd = *out;
1127	union node *np;
1128
1129	np = (union node *)stalloc(sizeof (struct nfile));
1130	if (c == '>') {
1131		np->nfile.fd = 1;
1132		c = pgetc();
1133		if (c == '>')
1134			np->type = NAPPEND;
1135		else if (c == '|')
1136			np->type = NCLOBBER;
1137		else if (c == '&')
1138			np->type = NTOFD;
1139		else {
1140			np->type = NTO;
1141			pungetc();
1142		}
1143	} else {	/* c == '<' */
1144		np->nfile.fd = 0;
1145		switch (c = pgetc()) {
1146		case '<':
1147			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1148				np = (union node *)stalloc(sizeof (struct nhere));
1149				np->nfile.fd = 0;
1150			}
1151			np->type = NHERE;
1152			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1153			heredoc->here = np;
1154			if ((c = pgetc()) == '-') {
1155				heredoc->striptabs = 1;
1156			} else {
1157				heredoc->striptabs = 0;
1158				pungetc();
1159			}
1160			break;
1161
1162		case '&':
1163			np->type = NFROMFD;
1164			break;
1165
1166		case '>':
1167			np->type = NFROMTO;
1168			break;
1169
1170		default:
1171			np->type = NFROM;
1172			pungetc();
1173			break;
1174		}
1175	}
1176	if (fd != '\0')
1177		np->nfile.fd = digit_val(fd);
1178	redirnode = np;
1179	goto parseredir_return;
1180}
1181
1182
1183/*
1184 * Parse a substitution.  At this point, we have read the dollar sign
1185 * and nothing else.
1186 */
1187
1188parsesub: {
1189	int subtype;
1190	int typeloc;
1191	char *p;
1192	static const char types[] = "}-+?=";
1193
1194	c = pgetc_eatbnl();
1195	if (
1196		(checkkwd & CHKEOFMARK) ||
1197		c <= PEOA  ||
1198		(c != '(' && c != '{' && !is_name(c) && !is_special(c))
1199	) {
1200		USTPUTC('$', out);
1201		pungetc();
1202	} else if (c == '(') {	/* $(command) or $((arith)) */
1203		if (pgetc_eatbnl() == '(') {
1204			PARSEARITH();
1205		} else {
1206			pungetc();
1207			PARSEBACKQNEW();
1208		}
1209	} else {
1210		USTPUTC(CTLVAR, out);
1211		typeloc = out - (char *)stackblock();
1212		STADJUST(1, out);
1213		subtype = VSNORMAL;
1214		if (likely(c == '{')) {
1215			c = pgetc_eatbnl();
1216			subtype = 0;
1217		}
1218varname:
1219		if (is_name(c)) {
1220			do {
1221				STPUTC(c, out);
1222				c = pgetc_eatbnl();
1223			} while (is_in_name(c));
1224		} else if (is_digit(c)) {
1225			do {
1226				STPUTC(c, out);
1227				c = pgetc_eatbnl();
1228			} while (is_digit(c));
1229		} else {
1230			int cc = c;
1231
1232			c = pgetc_eatbnl();
1233
1234			if (!subtype && cc == '#') {
1235				subtype = VSLENGTH;
1236
1237				if (c == '_' || isalnum(c))
1238					goto varname;
1239
1240				cc = c;
1241				c = pgetc_eatbnl();
1242				if (cc == '}' || c != '}') {
1243					pungetc();
1244					subtype = 0;
1245					c = cc;
1246					cc = '#';
1247				}
1248			}
1249
1250			if (!is_special(cc)) {
1251				if (subtype == VSLENGTH)
1252					subtype = 0;
1253				goto badsub;
1254			}
1255
1256			USTPUTC(cc, out);
1257		}
1258
1259		if (subtype == 0) {
1260			switch (c) {
1261			case ':':
1262				subtype = VSNUL;
1263				c = pgetc_eatbnl();
1264				/*FALLTHROUGH*/
1265			default:
1266				p = strchr(types, c);
1267				if (p == NULL)
1268					break;
1269				subtype |= p - types + VSNORMAL;
1270				break;
1271			case '%':
1272			case '#':
1273				{
1274					int cc = c;
1275					subtype = c == '#' ? VSTRIMLEFT :
1276							     VSTRIMRIGHT;
1277					c = pgetc_eatbnl();
1278					if (c == cc)
1279						subtype++;
1280					else
1281						pungetc();
1282					break;
1283				}
1284			}
1285		} else {
1286badsub:
1287			pungetc();
1288		}
1289		*((char *)stackblock() + typeloc) = subtype;
1290		if (subtype != VSNORMAL) {
1291			varnest++;
1292			if (dblquote)
1293				dqvarnest++;
1294		}
1295		STPUTC('=', out);
1296	}
1297	goto parsesub_return;
1298}
1299
1300
1301/*
1302 * Called to parse command substitutions.  Newstyle is set if the command
1303 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1304 * list of commands (passed by reference), and savelen is the number of
1305 * characters on the top of the stack which must be preserved.
1306 */
1307
1308parsebackq: {
1309	struct nodelist **nlpp;
1310	union node *n;
1311	char *str;
1312	size_t savelen;
1313	int uninitialized_var(saveprompt);
1314
1315	str = NULL;
1316	savelen = out - (char *)stackblock();
1317	if (savelen > 0) {
1318		str = alloca(savelen);
1319		memcpy(str, stackblock(), savelen);
1320	}
1321        if (oldstyle) {
1322                /* We must read until the closing backquote, giving special
1323                   treatment to some slashes, and then push the string and
1324                   reread it as input, interpreting it normally.  */
1325                char *pout;
1326                int pc;
1327                size_t psavelen;
1328                char *pstr;
1329
1330
1331                STARTSTACKSTR(pout);
1332		for (;;) {
1333			if (needprompt) {
1334				setprompt(2);
1335			}
1336			switch (pc = pgetc()) {
1337			case '`':
1338				goto done;
1339
1340			case '\\':
1341                                if ((pc = pgetc()) == '\n') {
1342					nlprompt();
1343					/*
1344					 * If eating a newline, avoid putting
1345					 * the newline into the new character
1346					 * stream (via the STPUTC after the
1347					 * switch).
1348					 */
1349					continue;
1350				}
1351                                if (pc != '\\' && pc != '`' && pc != '$'
1352                                    && (!dblquote || pc != '"'))
1353                                        STPUTC('\\', pout);
1354				if (pc > PEOA) {
1355					break;
1356				}
1357				/* fall through */
1358
1359			case PEOF:
1360			case PEOA:
1361				synerror("EOF in backquote substitution");
1362
1363			case '\n':
1364				nlnoprompt();
1365				break;
1366
1367			default:
1368				break;
1369			}
1370			STPUTC(pc, pout);
1371                }
1372done:
1373                STPUTC('\0', pout);
1374                psavelen = pout - (char *)stackblock();
1375                if (psavelen > 0) {
1376			pstr = grabstackstr(pout);
1377			setinputstring(pstr);
1378                }
1379        }
1380	nlpp = &bqlist;
1381	while (*nlpp)
1382		nlpp = &(*nlpp)->next;
1383	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1384	(*nlpp)->next = NULL;
1385
1386	if (oldstyle) {
1387		saveprompt = doprompt;
1388		doprompt = 0;
1389	}
1390
1391	n = list(2);
1392
1393	if (oldstyle)
1394		doprompt = saveprompt;
1395	else {
1396		if (readtoken() != TRP)
1397			synexpect(TRP);
1398	}
1399
1400	(*nlpp)->n = n;
1401        if (oldstyle) {
1402		/*
1403		 * Start reading from old file again, ignoring any pushed back
1404		 * tokens left from the backquote parsing
1405		 */
1406                popfile();
1407		tokpushback = 0;
1408	}
1409	while (stackblocksize() <= savelen)
1410		growstackblock();
1411	STARTSTACKSTR(out);
1412	if (str) {
1413		memcpy(out, str, savelen);
1414		STADJUST(savelen, out);
1415	}
1416	USTPUTC(CTLBACKQ, out);
1417	if (oldstyle)
1418		goto parsebackq_oldreturn;
1419	else
1420		goto parsebackq_newreturn;
1421}
1422
1423/*
1424 * Parse an arithmetic expansion (indicate start of one and set state)
1425 */
1426parsearith: {
1427
1428	if (++arinest == 1) {
1429		prevsyntax = syntax;
1430		syntax = ARISYNTAX;
1431	}
1432	USTPUTC(CTLARI, out);
1433	goto parsearith_return;
1434}
1435
1436} /* end of readtoken */
1437
1438
1439
1440#ifdef mkinit
1441INCLUDE "parser.h"
1442#endif
1443
1444
1445/*
1446 * Return of a legal variable name (a letter or underscore followed by zero or
1447 * more letters, underscores, and digits).
1448 */
1449
1450char *
1451endofname(const char *name)
1452	{
1453	char *p;
1454
1455	p = (char *) name;
1456	if (! is_name(*p))
1457		return p;
1458	while (*++p) {
1459		if (! is_in_name(*p))
1460			break;
1461	}
1462	return p;
1463}
1464
1465
1466/*
1467 * Called when an unexpected token is read during the parse.  The argument
1468 * is the token that is expected, or -1 if more than one type of token can
1469 * occur at this point.
1470 */
1471
1472STATIC void
1473synexpect(int token)
1474{
1475	char msg[64];
1476
1477	if (token >= 0) {
1478		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1479			tokname[lasttoken], tokname[token]);
1480	} else {
1481		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1482	}
1483	synerror(msg);
1484	/* NOTREACHED */
1485}
1486
1487
1488STATIC void
1489synerror(const char *msg)
1490{
1491	errlinno = plinno;
1492	sh_error("Syntax error: %s", msg);
1493	/* NOTREACHED */
1494}
1495
1496STATIC void
1497setprompt(int which)
1498{
1499	struct stackmark smark;
1500	int show;
1501
1502	needprompt = 0;
1503	whichprompt = which;
1504
1505#ifdef USE_LINENOISE
1506	show = 0;
1507#else
1508	show = 1;
1509#endif
1510	if (show) {
1511		pushstackmark(&smark, stackblocksize());
1512		out2str(getprompt(NULL));
1513		popstackmark(&smark);
1514	}
1515}
1516
1517const char *
1518expandstr(const char *ps)
1519{
1520	union node n;
1521	int saveprompt;
1522
1523	/* XXX Fix (char *) cast. */
1524	setinputstring((char *)ps);
1525
1526	saveprompt = doprompt;
1527	doprompt = 0;
1528
1529	readtoken1(pgetc(), DQSYNTAX, FAKEEOFMARK, 0);
1530
1531	doprompt = saveprompt;
1532
1533	popfile();
1534
1535	n.narg.type = NARG;
1536	n.narg.next = NULL;
1537	n.narg.text = wordtext;
1538	n.narg.backquote = backquotelist;
1539
1540	expandarg(&n, NULL, EXP_QUOTED);
1541	return stackblock();
1542}
1543
1544/*
1545 * called by editline -- any expansions to the prompt
1546 *    should be added here.
1547 */
1548const char *
1549getprompt(void *unused)
1550{
1551	const char *prompt;
1552
1553	switch (whichprompt) {
1554	default:
1555#ifdef DEBUG
1556		return "<internal prompt error>";
1557#endif
1558	case 0:
1559		return nullstr;
1560	case 1:
1561		prompt = ps1val();
1562		break;
1563	case 2:
1564		prompt = ps2val();
1565		break;
1566	}
1567
1568	return expandstr(prompt);
1569}
1570
1571const char *const *
1572findkwd(const char *s)
1573{
1574	return findstring(
1575		s, parsekwd, sizeof(parsekwd) / sizeof(const char *)
1576	);
1577}
1578