parser.c revision 127958
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Kenneth Almquist.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#ifndef lint
34#if 0
35static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
36#endif
37#endif /* not lint */
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/bin/sh/parser.c 127958 2004-04-06 20:06:54Z markm $");
40
41#include <stdlib.h>
42
43#include "shell.h"
44#include "parser.h"
45#include "nodes.h"
46#include "expand.h"	/* defines rmescapes() */
47#include "syntax.h"
48#include "options.h"
49#include "input.h"
50#include "output.h"
51#include "var.h"
52#include "error.h"
53#include "memalloc.h"
54#include "mystring.h"
55#include "alias.h"
56#include "show.h"
57#include "eval.h"
58#ifndef NO_HISTORY
59#include "myhistedit.h"
60#endif
61
62/*
63 * Shell command parser.
64 */
65
66#define EOFMARKLEN 79
67
68/* values returned by readtoken */
69#include "token.h"
70
71
72
73struct heredoc {
74	struct heredoc *next;	/* next here document in list */
75	union node *here;		/* redirection node */
76	char *eofmark;		/* string indicating end of input */
77	int striptabs;		/* if set, strip leading tabs */
78};
79
80
81
82STATIC struct heredoc *heredoclist;	/* list of here documents to read */
83STATIC int parsebackquote;	/* nonzero if we are inside backquotes */
84STATIC int doprompt;		/* if set, prompt the user */
85STATIC int needprompt;		/* true if interactive and at start of line */
86STATIC int lasttoken;		/* last token read */
87MKINIT int tokpushback;		/* last token pushed back */
88STATIC char *wordtext;		/* text of last word returned by readtoken */
89MKINIT int checkkwd;            /* 1 == check for kwds, 2 == also eat newlines */
90STATIC struct nodelist *backquotelist;
91STATIC union node *redirnode;
92STATIC struct heredoc *heredoc;
93STATIC int quoteflag;		/* set if (part of) last token was quoted */
94STATIC int startlinno;		/* line # where last token started */
95
96/* XXX When 'noaliases' is set to one, no alias expansion takes place. */
97static int noaliases = 0;
98
99#define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
100#ifdef GDB_HACK
101static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'};
102static const char types[] = "}-+?=";
103#endif
104
105
106STATIC union node *list(int);
107STATIC union node *andor(void);
108STATIC union node *pipeline(void);
109STATIC union node *command(void);
110STATIC union node *simplecmd(union node **, union node *);
111STATIC union node *makename(void);
112STATIC void parsefname(void);
113STATIC void parseheredoc(void);
114STATIC int peektoken(void);
115STATIC int readtoken(void);
116STATIC int xxreadtoken(void);
117STATIC int readtoken1(int, char const *, char *, int);
118STATIC int noexpand(char *);
119STATIC void synexpect(int);
120STATIC void synerror(char *);
121STATIC void setprompt(int);
122
123
124/*
125 * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
126 * valid parse tree indicating a blank line.)
127 */
128
129union node *
130parsecmd(int interact)
131{
132	int t;
133
134	tokpushback = 0;
135	doprompt = interact;
136	if (doprompt)
137		setprompt(1);
138	else
139		setprompt(0);
140	needprompt = 0;
141	t = readtoken();
142	if (t == TEOF)
143		return NEOF;
144	if (t == TNL)
145		return NULL;
146	tokpushback++;
147	return list(1);
148}
149
150
151STATIC union node *
152list(int nlflag)
153{
154	union node *n1, *n2, *n3;
155	int tok;
156
157	checkkwd = 2;
158	if (nlflag == 0 && tokendlist[peektoken()])
159		return NULL;
160	n1 = NULL;
161	for (;;) {
162		n2 = andor();
163		tok = readtoken();
164		if (tok == TBACKGND) {
165			if (n2->type == NCMD || n2->type == NPIPE) {
166				n2->ncmd.backgnd = 1;
167			} else if (n2->type == NREDIR) {
168				n2->type = NBACKGND;
169			} else {
170				n3 = (union node *)stalloc(sizeof (struct nredir));
171				n3->type = NBACKGND;
172				n3->nredir.n = n2;
173				n3->nredir.redirect = NULL;
174				n2 = n3;
175			}
176		}
177		if (n1 == NULL) {
178			n1 = n2;
179		}
180		else {
181			n3 = (union node *)stalloc(sizeof (struct nbinary));
182			n3->type = NSEMI;
183			n3->nbinary.ch1 = n1;
184			n3->nbinary.ch2 = n2;
185			n1 = n3;
186		}
187		switch (tok) {
188		case TBACKGND:
189		case TSEMI:
190			tok = readtoken();
191			/* FALLTHROUGH */
192		case TNL:
193			if (tok == TNL) {
194				parseheredoc();
195				if (nlflag)
196					return n1;
197			} else {
198				tokpushback++;
199			}
200			checkkwd = 2;
201			if (tokendlist[peektoken()])
202				return n1;
203			break;
204		case TEOF:
205			if (heredoclist)
206				parseheredoc();
207			else
208				pungetc();		/* push back EOF on input */
209			return n1;
210		default:
211			if (nlflag)
212				synexpect(-1);
213			tokpushback++;
214			return n1;
215		}
216	}
217}
218
219
220
221STATIC union node *
222andor(void)
223{
224	union node *n1, *n2, *n3;
225	int t;
226
227	n1 = pipeline();
228	for (;;) {
229		if ((t = readtoken()) == TAND) {
230			t = NAND;
231		} else if (t == TOR) {
232			t = NOR;
233		} else {
234			tokpushback++;
235			return n1;
236		}
237		n2 = pipeline();
238		n3 = (union node *)stalloc(sizeof (struct nbinary));
239		n3->type = t;
240		n3->nbinary.ch1 = n1;
241		n3->nbinary.ch2 = n2;
242		n1 = n3;
243	}
244}
245
246
247
248STATIC union node *
249pipeline(void)
250{
251	union node *n1, *n2, *pipenode;
252	struct nodelist *lp, *prev;
253	int negate;
254
255	negate = 0;
256	TRACE(("pipeline: entered\n"));
257	while (readtoken() == TNOT)
258		negate = !negate;
259	tokpushback++;
260	n1 = command();
261	if (readtoken() == TPIPE) {
262		pipenode = (union node *)stalloc(sizeof (struct npipe));
263		pipenode->type = NPIPE;
264		pipenode->npipe.backgnd = 0;
265		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
266		pipenode->npipe.cmdlist = lp;
267		lp->n = n1;
268		do {
269			prev = lp;
270			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
271			lp->n = command();
272			prev->next = lp;
273		} while (readtoken() == TPIPE);
274		lp->next = NULL;
275		n1 = pipenode;
276	}
277	tokpushback++;
278	if (negate) {
279		n2 = (union node *)stalloc(sizeof (struct nnot));
280		n2->type = NNOT;
281		n2->nnot.com = n1;
282		return n2;
283	} else
284		return n1;
285}
286
287
288
289STATIC union node *
290command(void)
291{
292	union node *n1, *n2;
293	union node *ap, **app;
294	union node *cp, **cpp;
295	union node *redir, **rpp;
296	int t, negate = 0;
297
298	checkkwd = 2;
299	redir = NULL;
300	n1 = NULL;
301	rpp = &redir;
302
303	/* Check for redirection which may precede command */
304	while (readtoken() == TREDIR) {
305		*rpp = n2 = redirnode;
306		rpp = &n2->nfile.next;
307		parsefname();
308	}
309	tokpushback++;
310
311	while (readtoken() == TNOT) {
312		TRACE(("command: TNOT recognized\n"));
313		negate = !negate;
314	}
315	tokpushback++;
316
317	switch (readtoken()) {
318	case TIF:
319		n1 = (union node *)stalloc(sizeof (struct nif));
320		n1->type = NIF;
321		if ((n1->nif.test = list(0)) == NULL)
322			synexpect(-1);
323		if (readtoken() != TTHEN)
324			synexpect(TTHEN);
325		n1->nif.ifpart = list(0);
326		n2 = n1;
327		while (readtoken() == TELIF) {
328			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
329			n2 = n2->nif.elsepart;
330			n2->type = NIF;
331			if ((n2->nif.test = list(0)) == NULL)
332				synexpect(-1);
333			if (readtoken() != TTHEN)
334				synexpect(TTHEN);
335			n2->nif.ifpart = list(0);
336		}
337		if (lasttoken == TELSE)
338			n2->nif.elsepart = list(0);
339		else {
340			n2->nif.elsepart = NULL;
341			tokpushback++;
342		}
343		if (readtoken() != TFI)
344			synexpect(TFI);
345		checkkwd = 1;
346		break;
347	case TWHILE:
348	case TUNTIL: {
349		int got;
350		n1 = (union node *)stalloc(sizeof (struct nbinary));
351		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
352		if ((n1->nbinary.ch1 = list(0)) == NULL)
353			synexpect(-1);
354		if ((got=readtoken()) != TDO) {
355TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
356			synexpect(TDO);
357		}
358		n1->nbinary.ch2 = list(0);
359		if (readtoken() != TDONE)
360			synexpect(TDONE);
361		checkkwd = 1;
362		break;
363	}
364	case TFOR:
365		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
366			synerror("Bad for loop variable");
367		n1 = (union node *)stalloc(sizeof (struct nfor));
368		n1->type = NFOR;
369		n1->nfor.var = wordtext;
370		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
371			app = &ap;
372			while (readtoken() == TWORD) {
373				n2 = (union node *)stalloc(sizeof (struct narg));
374				n2->type = NARG;
375				n2->narg.text = wordtext;
376				n2->narg.backquote = backquotelist;
377				*app = n2;
378				app = &n2->narg.next;
379			}
380			*app = NULL;
381			n1->nfor.args = ap;
382			if (lasttoken != TNL && lasttoken != TSEMI)
383				synexpect(-1);
384		} else {
385#ifndef GDB_HACK
386			static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
387								   '@', '=', '\0'};
388#endif
389			n2 = (union node *)stalloc(sizeof (struct narg));
390			n2->type = NARG;
391			n2->narg.text = (char *)argvars;
392			n2->narg.backquote = NULL;
393			n2->narg.next = NULL;
394			n1->nfor.args = n2;
395			/*
396			 * Newline or semicolon here is optional (but note
397			 * that the original Bourne shell only allowed NL).
398			 */
399			if (lasttoken != TNL && lasttoken != TSEMI)
400				tokpushback++;
401		}
402		checkkwd = 2;
403		if ((t = readtoken()) == TDO)
404			t = TDONE;
405		else if (t == TBEGIN)
406			t = TEND;
407		else
408			synexpect(-1);
409		n1->nfor.body = list(0);
410		if (readtoken() != t)
411			synexpect(t);
412		checkkwd = 1;
413		break;
414	case TCASE:
415		n1 = (union node *)stalloc(sizeof (struct ncase));
416		n1->type = NCASE;
417		if (readtoken() != TWORD)
418			synexpect(TWORD);
419		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
420		n2->type = NARG;
421		n2->narg.text = wordtext;
422		n2->narg.backquote = backquotelist;
423		n2->narg.next = NULL;
424		while (readtoken() == TNL);
425		if (lasttoken != TWORD || ! equal(wordtext, "in"))
426			synerror("expecting \"in\"");
427		cpp = &n1->ncase.cases;
428		noaliases = 1;	/* turn off alias expansion */
429		checkkwd = 2, readtoken();
430		while (lasttoken != TESAC) {
431			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
432			cp->type = NCLIST;
433			app = &cp->nclist.pattern;
434			if (lasttoken == TLP)
435				readtoken();
436			for (;;) {
437				*app = ap = (union node *)stalloc(sizeof (struct narg));
438				ap->type = NARG;
439				ap->narg.text = wordtext;
440				ap->narg.backquote = backquotelist;
441				if (checkkwd = 2, readtoken() != TPIPE)
442					break;
443				app = &ap->narg.next;
444				readtoken();
445			}
446			ap->narg.next = NULL;
447			if (lasttoken != TRP)
448				noaliases = 0, synexpect(TRP);
449			cp->nclist.body = list(0);
450
451			checkkwd = 2;
452			if ((t = readtoken()) != TESAC) {
453				if (t != TENDCASE)
454					noaliases = 0, synexpect(TENDCASE);
455				else
456					checkkwd = 2, readtoken();
457			}
458			cpp = &cp->nclist.next;
459		}
460		noaliases = 0;	/* reset alias expansion */
461		*cpp = NULL;
462		checkkwd = 1;
463		break;
464	case TLP:
465		n1 = (union node *)stalloc(sizeof (struct nredir));
466		n1->type = NSUBSHELL;
467		n1->nredir.n = list(0);
468		n1->nredir.redirect = NULL;
469		if (readtoken() != TRP)
470			synexpect(TRP);
471		checkkwd = 1;
472		break;
473	case TBEGIN:
474		n1 = list(0);
475		if (readtoken() != TEND)
476			synexpect(TEND);
477		checkkwd = 1;
478		break;
479	/* Handle an empty command like other simple commands.  */
480	case TSEMI:
481	case TAND:
482	case TOR:
483		/*
484		 * An empty command before a ; doesn't make much sense, and
485		 * should certainly be disallowed in the case of `if ;'.
486		 */
487		if (!redir)
488			synexpect(-1);
489	case TNL:
490	case TEOF:
491	case TWORD:
492	case TRP:
493		tokpushback++;
494		n1 = simplecmd(rpp, redir);
495		goto checkneg;
496	default:
497		synexpect(-1);
498	}
499
500	/* Now check for redirection which may follow command */
501	while (readtoken() == TREDIR) {
502		*rpp = n2 = redirnode;
503		rpp = &n2->nfile.next;
504		parsefname();
505	}
506	tokpushback++;
507	*rpp = NULL;
508	if (redir) {
509		if (n1->type != NSUBSHELL) {
510			n2 = (union node *)stalloc(sizeof (struct nredir));
511			n2->type = NREDIR;
512			n2->nredir.n = n1;
513			n1 = n2;
514		}
515		n1->nredir.redirect = redir;
516	}
517
518checkneg:
519	if (negate) {
520		n2 = (union node *)stalloc(sizeof (struct nnot));
521		n2->type = NNOT;
522		n2->nnot.com = n1;
523		return n2;
524	}
525	else
526		return n1;
527}
528
529
530STATIC union node *
531simplecmd(union node **rpp, union node *redir)
532{
533	union node *args, **app;
534	union node **orig_rpp = rpp;
535	union node *n = NULL, *n2;
536	int negate = 0;
537
538	/* If we don't have any redirections already, then we must reset */
539	/* rpp to be the address of the local redir variable.  */
540	if (redir == 0)
541		rpp = &redir;
542
543	args = NULL;
544	app = &args;
545	/*
546	 * We save the incoming value, because we need this for shell
547	 * functions.  There can not be a redirect or an argument between
548	 * the function name and the open parenthesis.
549	 */
550	orig_rpp = rpp;
551
552	while (readtoken() == TNOT) {
553		TRACE(("command: TNOT recognized\n"));
554		negate = !negate;
555	}
556	tokpushback++;
557
558	for (;;) {
559		if (readtoken() == TWORD) {
560			n = (union node *)stalloc(sizeof (struct narg));
561			n->type = NARG;
562			n->narg.text = wordtext;
563			n->narg.backquote = backquotelist;
564			*app = n;
565			app = &n->narg.next;
566		} else if (lasttoken == TREDIR) {
567			*rpp = n = redirnode;
568			rpp = &n->nfile.next;
569			parsefname();	/* read name of redirection file */
570		} else if (lasttoken == TLP && app == &args->narg.next
571					    && rpp == orig_rpp) {
572			/* We have a function */
573			if (readtoken() != TRP)
574				synexpect(TRP);
575#ifdef notdef
576			if (! goodname(n->narg.text))
577				synerror("Bad function name");
578#endif
579			n->type = NDEFUN;
580			n->narg.next = command();
581			goto checkneg;
582		} else {
583			tokpushback++;
584			break;
585		}
586	}
587	*app = NULL;
588	*rpp = NULL;
589	n = (union node *)stalloc(sizeof (struct ncmd));
590	n->type = NCMD;
591	n->ncmd.backgnd = 0;
592	n->ncmd.args = args;
593	n->ncmd.redirect = redir;
594
595checkneg:
596	if (negate) {
597		n2 = (union node *)stalloc(sizeof (struct nnot));
598		n2->type = NNOT;
599		n2->nnot.com = n;
600		return n2;
601	}
602	else
603		return n;
604}
605
606STATIC union node *
607makename(void)
608{
609	union node *n;
610
611	n = (union node *)stalloc(sizeof (struct narg));
612	n->type = NARG;
613	n->narg.next = NULL;
614	n->narg.text = wordtext;
615	n->narg.backquote = backquotelist;
616	return n;
617}
618
619void fixredir(union node *n, const char *text, int err)
620{
621	TRACE(("Fix redir %s %d\n", text, err));
622	if (!err)
623		n->ndup.vname = NULL;
624
625	if (is_digit(text[0]) && text[1] == '\0')
626		n->ndup.dupfd = digit_val(text[0]);
627	else if (text[0] == '-' && text[1] == '\0')
628		n->ndup.dupfd = -1;
629	else {
630
631		if (err)
632			synerror("Bad fd number");
633		else
634			n->ndup.vname = makename();
635	}
636}
637
638
639STATIC void
640parsefname(void)
641{
642	union node *n = redirnode;
643
644	if (readtoken() != TWORD)
645		synexpect(-1);
646	if (n->type == NHERE) {
647		struct heredoc *here = heredoc;
648		struct heredoc *p;
649		int i;
650
651		if (quoteflag == 0)
652			n->type = NXHERE;
653		TRACE(("Here document %d\n", n->type));
654		if (here->striptabs) {
655			while (*wordtext == '\t')
656				wordtext++;
657		}
658		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
659			synerror("Illegal eof marker for << redirection");
660		rmescapes(wordtext);
661		here->eofmark = wordtext;
662		here->next = NULL;
663		if (heredoclist == NULL)
664			heredoclist = here;
665		else {
666			for (p = heredoclist ; p->next ; p = p->next);
667			p->next = here;
668		}
669	} else if (n->type == NTOFD || n->type == NFROMFD) {
670		fixredir(n, wordtext, 0);
671	} else {
672		n->nfile.fname = makename();
673	}
674}
675
676
677/*
678 * Input any here documents.
679 */
680
681STATIC void
682parseheredoc(void)
683{
684	struct heredoc *here;
685	union node *n;
686
687	while (heredoclist) {
688		here = heredoclist;
689		heredoclist = here->next;
690		if (needprompt) {
691			setprompt(2);
692			needprompt = 0;
693		}
694		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
695				here->eofmark, here->striptabs);
696		n = (union node *)stalloc(sizeof (struct narg));
697		n->narg.type = NARG;
698		n->narg.next = NULL;
699		n->narg.text = wordtext;
700		n->narg.backquote = backquotelist;
701		here->here->nhere.doc = n;
702	}
703}
704
705STATIC int
706peektoken(void)
707{
708	int t;
709
710	t = readtoken();
711	tokpushback++;
712	return (t);
713}
714
715STATIC int
716readtoken(void)
717{
718	int t;
719	int savecheckkwd = checkkwd;
720	struct alias *ap;
721#ifdef DEBUG
722	int alreadyseen = tokpushback;
723#endif
724
725	top:
726	t = xxreadtoken();
727
728	if (checkkwd) {
729		/*
730		 * eat newlines
731		 */
732		if (checkkwd == 2) {
733			checkkwd = 0;
734			while (t == TNL) {
735				parseheredoc();
736				t = xxreadtoken();
737			}
738		} else
739			checkkwd = 0;
740		/*
741		 * check for keywords and aliases
742		 */
743		if (t == TWORD && !quoteflag)
744		{
745			const char * const *pp;
746
747			for (pp = parsekwd; *pp; pp++) {
748				if (**pp == *wordtext && equal(*pp, wordtext))
749				{
750					lasttoken = t = pp - parsekwd + KWDOFFSET;
751					TRACE(("keyword %s recognized\n", tokname[t]));
752					goto out;
753				}
754			}
755			if (noaliases == 0 &&
756			    (ap = lookupalias(wordtext, 1)) != NULL) {
757				pushstring(ap->val, strlen(ap->val), ap);
758				checkkwd = savecheckkwd;
759				goto top;
760			}
761		}
762out:
763		checkkwd = (t == TNOT) ? savecheckkwd : 0;
764	}
765#ifdef DEBUG
766	if (!alreadyseen)
767	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
768	else
769	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
770#endif
771	return (t);
772}
773
774
775/*
776 * Read the next input token.
777 * If the token is a word, we set backquotelist to the list of cmds in
778 *	backquotes.  We set quoteflag to true if any part of the word was
779 *	quoted.
780 * If the token is TREDIR, then we set redirnode to a structure containing
781 *	the redirection.
782 * In all cases, the variable startlinno is set to the number of the line
783 *	on which the token starts.
784 *
785 * [Change comment:  here documents and internal procedures]
786 * [Readtoken shouldn't have any arguments.  Perhaps we should make the
787 *  word parsing code into a separate routine.  In this case, readtoken
788 *  doesn't need to have any internal procedures, but parseword does.
789 *  We could also make parseoperator in essence the main routine, and
790 *  have parseword (readtoken1?) handle both words and redirection.]
791 */
792
793#define RETURN(token)	return lasttoken = token
794
795STATIC int
796xxreadtoken(void)
797{
798	int c;
799
800	if (tokpushback) {
801		tokpushback = 0;
802		return lasttoken;
803	}
804	if (needprompt) {
805		setprompt(2);
806		needprompt = 0;
807	}
808	startlinno = plinno;
809	for (;;) {	/* until token or start of word found */
810		c = pgetc_macro();
811		if (c == ' ' || c == '\t')
812			continue;		/* quick check for white space first */
813		switch (c) {
814		case ' ': case '\t':
815			continue;
816		case '#':
817			while ((c = pgetc()) != '\n' && c != PEOF);
818			pungetc();
819			continue;
820		case '\\':
821			if (pgetc() == '\n') {
822				startlinno = ++plinno;
823				if (doprompt)
824					setprompt(2);
825				else
826					setprompt(0);
827				continue;
828			}
829			pungetc();
830			goto breakloop;
831		case '\n':
832			plinno++;
833			needprompt = doprompt;
834			RETURN(TNL);
835		case PEOF:
836			RETURN(TEOF);
837		case '&':
838			if (pgetc() == '&')
839				RETURN(TAND);
840			pungetc();
841			RETURN(TBACKGND);
842		case '|':
843			if (pgetc() == '|')
844				RETURN(TOR);
845			pungetc();
846			RETURN(TPIPE);
847		case ';':
848			if (pgetc() == ';')
849				RETURN(TENDCASE);
850			pungetc();
851			RETURN(TSEMI);
852		case '(':
853			RETURN(TLP);
854		case ')':
855			RETURN(TRP);
856		default:
857			goto breakloop;
858		}
859	}
860breakloop:
861	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
862#undef RETURN
863}
864
865
866
867/*
868 * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
869 * is not NULL, read a here document.  In the latter case, eofmark is the
870 * word which marks the end of the document and striptabs is true if
871 * leading tabs should be stripped from the document.  The argument firstc
872 * is the first character of the input token or document.
873 *
874 * Because C does not have internal subroutines, I have simulated them
875 * using goto's to implement the subroutine linkage.  The following macros
876 * will run code that appears at the end of readtoken1.
877 */
878
879#define CHECKEND()	{goto checkend; checkend_return:;}
880#define PARSEREDIR()	{goto parseredir; parseredir_return:;}
881#define PARSESUB()	{goto parsesub; parsesub_return:;}
882#define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
883#define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
884#define	PARSEARITH()	{goto parsearith; parsearith_return:;}
885
886STATIC int
887readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
888{
889	int c = firstc;
890	char *out;
891	int len;
892	char line[EOFMARKLEN + 1];
893	struct nodelist *bqlist;
894	int quotef;
895	int dblquote;
896	int varnest;	/* levels of variables expansion */
897	int arinest;	/* levels of arithmetic expansion */
898	int parenlevel;	/* levels of parens in arithmetic */
899	int oldstyle;
900	char const *prevsyntax;	/* syntax before arithmetic */
901	int synentry;
902#if __GNUC__
903	/* Avoid longjmp clobbering */
904	(void) &out;
905	(void) &quotef;
906	(void) &dblquote;
907	(void) &varnest;
908	(void) &arinest;
909	(void) &parenlevel;
910	(void) &oldstyle;
911	(void) &prevsyntax;
912	(void) &syntax;
913	(void) &synentry;
914#endif
915
916	startlinno = plinno;
917	dblquote = 0;
918	if (syntax == DQSYNTAX)
919		dblquote = 1;
920	quotef = 0;
921	bqlist = NULL;
922	varnest = 0;
923	arinest = 0;
924	parenlevel = 0;
925
926	STARTSTACKSTR(out);
927	loop: {	/* for each line, until end of word */
928		CHECKEND();	/* set c to PEOF if at end of here document */
929		for (;;) {	/* until end of line or end of word */
930			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
931
932			synentry = syntax[c];
933
934			switch(synentry) {
935			case CNL:	/* '\n' */
936				if (syntax == BASESYNTAX)
937					goto endword;	/* exit outer loop */
938				USTPUTC(c, out);
939				plinno++;
940				if (doprompt)
941					setprompt(2);
942				else
943					setprompt(0);
944				c = pgetc();
945				goto loop;		/* continue outer loop */
946			case CWORD:
947				USTPUTC(c, out);
948				break;
949			case CCTL:
950				if (eofmark == NULL || dblquote)
951					USTPUTC(CTLESC, out);
952				USTPUTC(c, out);
953				break;
954			case CBACK:	/* backslash */
955				c = pgetc();
956				if (c == PEOF) {
957					USTPUTC('\\', out);
958					pungetc();
959				} else if (c == '\n') {
960					if (doprompt)
961						setprompt(2);
962					else
963						setprompt(0);
964				} else {
965					if (dblquote && c != '\\' &&
966					    c != '`' && c != '$' &&
967					    (c != '"' || eofmark != NULL))
968						USTPUTC('\\', out);
969					if (SQSYNTAX[c] == CCTL)
970						USTPUTC(CTLESC, out);
971					else if (eofmark == NULL)
972						USTPUTC(CTLQUOTEMARK, out);
973					USTPUTC(c, out);
974					quotef++;
975				}
976				break;
977			case CSQUOTE:
978				if (eofmark == NULL)
979					USTPUTC(CTLQUOTEMARK, out);
980				syntax = SQSYNTAX;
981				break;
982			case CDQUOTE:
983				if (eofmark == NULL)
984					USTPUTC(CTLQUOTEMARK, out);
985				syntax = DQSYNTAX;
986				dblquote = 1;
987				break;
988			case CENDQUOTE:
989				if (eofmark != NULL && arinest == 0 &&
990				    varnest == 0) {
991					USTPUTC(c, out);
992				} else {
993					if (arinest) {
994						syntax = ARISYNTAX;
995						dblquote = 0;
996					} else if (eofmark == NULL) {
997						syntax = BASESYNTAX;
998						dblquote = 0;
999					}
1000					quotef++;
1001				}
1002				break;
1003			case CVAR:	/* '$' */
1004				PARSESUB();		/* parse substitution */
1005				break;
1006			case CENDVAR:	/* '}' */
1007				if (varnest > 0) {
1008					varnest--;
1009					USTPUTC(CTLENDVAR, out);
1010				} else {
1011					USTPUTC(c, out);
1012				}
1013				break;
1014			case CLP:	/* '(' in arithmetic */
1015				parenlevel++;
1016				USTPUTC(c, out);
1017				break;
1018			case CRP:	/* ')' in arithmetic */
1019				if (parenlevel > 0) {
1020					USTPUTC(c, out);
1021					--parenlevel;
1022				} else {
1023					if (pgetc() == ')') {
1024						if (--arinest == 0) {
1025							USTPUTC(CTLENDARI, out);
1026							syntax = prevsyntax;
1027							if (syntax == DQSYNTAX)
1028								dblquote = 1;
1029							else
1030								dblquote = 0;
1031						} else
1032							USTPUTC(')', out);
1033					} else {
1034						/*
1035						 * unbalanced parens
1036						 *  (don't 2nd guess - no error)
1037						 */
1038						pungetc();
1039						USTPUTC(')', out);
1040					}
1041				}
1042				break;
1043			case CBQUOTE:	/* '`' */
1044				PARSEBACKQOLD();
1045				break;
1046			case CEOF:
1047				goto endword;		/* exit outer loop */
1048			default:
1049				if (varnest == 0)
1050					goto endword;	/* exit outer loop */
1051				USTPUTC(c, out);
1052			}
1053			c = pgetc_macro();
1054		}
1055	}
1056endword:
1057	if (syntax == ARISYNTAX)
1058		synerror("Missing '))'");
1059	if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1060		synerror("Unterminated quoted string");
1061	if (varnest != 0) {
1062		startlinno = plinno;
1063		synerror("Missing '}'");
1064	}
1065	USTPUTC('\0', out);
1066	len = out - stackblock();
1067	out = stackblock();
1068	if (eofmark == NULL) {
1069		if ((c == '>' || c == '<')
1070		 && quotef == 0
1071		 && len <= 2
1072		 && (*out == '\0' || is_digit(*out))) {
1073			PARSEREDIR();
1074			return lasttoken = TREDIR;
1075		} else {
1076			pungetc();
1077		}
1078	}
1079	quoteflag = quotef;
1080	backquotelist = bqlist;
1081	grabstackblock(len);
1082	wordtext = out;
1083	return lasttoken = TWORD;
1084/* end of readtoken routine */
1085
1086
1087
1088/*
1089 * Check to see whether we are at the end of the here document.  When this
1090 * is called, c is set to the first character of the next input line.  If
1091 * we are at the end of the here document, this routine sets the c to PEOF.
1092 */
1093
1094checkend: {
1095	if (eofmark) {
1096		if (striptabs) {
1097			while (c == '\t')
1098				c = pgetc();
1099		}
1100		if (c == *eofmark) {
1101			if (pfgets(line, sizeof line) != NULL) {
1102				char *p, *q;
1103
1104				p = line;
1105				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1106				if (*p == '\n' && *q == '\0') {
1107					c = PEOF;
1108					plinno++;
1109					needprompt = doprompt;
1110				} else {
1111					pushstring(line, strlen(line), NULL);
1112				}
1113			}
1114		}
1115	}
1116	goto checkend_return;
1117}
1118
1119
1120/*
1121 * Parse a redirection operator.  The variable "out" points to a string
1122 * specifying the fd to be redirected.  The variable "c" contains the
1123 * first character of the redirection operator.
1124 */
1125
1126parseredir: {
1127	char fd = *out;
1128	union node *np;
1129
1130	np = (union node *)stalloc(sizeof (struct nfile));
1131	if (c == '>') {
1132		np->nfile.fd = 1;
1133		c = pgetc();
1134		if (c == '>')
1135			np->type = NAPPEND;
1136		else if (c == '&')
1137			np->type = NTOFD;
1138		else if (c == '|')
1139			np->type = NCLOBBER;
1140		else {
1141			np->type = NTO;
1142			pungetc();
1143		}
1144	} else {	/* c == '<' */
1145		np->nfile.fd = 0;
1146		c = pgetc();
1147		if (c == '<') {
1148			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1149				np = (union node *)stalloc(sizeof (struct nhere));
1150				np->nfile.fd = 0;
1151			}
1152			np->type = NHERE;
1153			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1154			heredoc->here = np;
1155			if ((c = pgetc()) == '-') {
1156				heredoc->striptabs = 1;
1157			} else {
1158				heredoc->striptabs = 0;
1159				pungetc();
1160			}
1161		} else if (c == '&')
1162			np->type = NFROMFD;
1163		else if (c == '>')
1164			np->type = NFROMTO;
1165		else {
1166			np->type = NFROM;
1167			pungetc();
1168		}
1169	}
1170	if (fd != '\0')
1171		np->nfile.fd = digit_val(fd);
1172	redirnode = np;
1173	goto parseredir_return;
1174}
1175
1176
1177/*
1178 * Parse a substitution.  At this point, we have read the dollar sign
1179 * and nothing else.
1180 */
1181
1182parsesub: {
1183	int subtype;
1184	int typeloc;
1185	int flags;
1186	char *p;
1187#ifndef GDB_HACK
1188	static const char types[] = "}-+?=";
1189#endif
1190       int bracketed_name = 0; /* used to handle ${[0-9]*} variables */
1191
1192	c = pgetc();
1193	if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1194		USTPUTC('$', out);
1195		pungetc();
1196	} else if (c == '(') {	/* $(command) or $((arith)) */
1197		if (pgetc() == '(') {
1198			PARSEARITH();
1199		} else {
1200			pungetc();
1201			PARSEBACKQNEW();
1202		}
1203	} else {
1204		USTPUTC(CTLVAR, out);
1205		typeloc = out - stackblock();
1206		USTPUTC(VSNORMAL, out);
1207		subtype = VSNORMAL;
1208		if (c == '{') {
1209			bracketed_name = 1;
1210			c = pgetc();
1211			if (c == '#') {
1212				if ((c = pgetc()) == '}')
1213					c = '#';
1214				else
1215					subtype = VSLENGTH;
1216			}
1217			else
1218				subtype = 0;
1219		}
1220		if (is_name(c)) {
1221			do {
1222				STPUTC(c, out);
1223				c = pgetc();
1224			} while (is_in_name(c));
1225		} else if (is_digit(c)) {
1226			if (bracketed_name) {
1227				do {
1228					STPUTC(c, out);
1229					c = pgetc();
1230				} while (is_digit(c));
1231			} else {
1232				STPUTC(c, out);
1233				c = pgetc();
1234			}
1235		} else {
1236			if (! is_special(c))
1237badsub:				synerror("Bad substitution");
1238			USTPUTC(c, out);
1239			c = pgetc();
1240		}
1241		STPUTC('=', out);
1242		flags = 0;
1243		if (subtype == 0) {
1244			switch (c) {
1245			case ':':
1246				flags = VSNUL;
1247				c = pgetc();
1248				/*FALLTHROUGH*/
1249			default:
1250				p = strchr(types, c);
1251				if (p == NULL)
1252					goto badsub;
1253				subtype = p - types + VSNORMAL;
1254				break;
1255			case '%':
1256			case '#':
1257				{
1258					int cc = c;
1259					subtype = c == '#' ? VSTRIMLEFT :
1260							     VSTRIMRIGHT;
1261					c = pgetc();
1262					if (c == cc)
1263						subtype++;
1264					else
1265						pungetc();
1266					break;
1267				}
1268			}
1269		} else {
1270			pungetc();
1271		}
1272		if (subtype != VSLENGTH && (dblquote || arinest))
1273			flags |= VSQUOTE;
1274		*(stackblock() + typeloc) = subtype | flags;
1275		if (subtype != VSNORMAL)
1276			varnest++;
1277	}
1278	goto parsesub_return;
1279}
1280
1281
1282/*
1283 * Called to parse command substitutions.  Newstyle is set if the command
1284 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1285 * list of commands (passed by reference), and savelen is the number of
1286 * characters on the top of the stack which must be preserved.
1287 */
1288
1289parsebackq: {
1290	struct nodelist **nlpp;
1291	int savepbq;
1292	union node *n;
1293	char *volatile str;
1294	struct jmploc jmploc;
1295	struct jmploc *volatile savehandler;
1296	int savelen;
1297	int saveprompt;
1298#if __GNUC__
1299	/* Avoid longjmp clobbering */
1300	(void) &saveprompt;
1301#endif
1302
1303	savepbq = parsebackquote;
1304	if (setjmp(jmploc.loc)) {
1305		if (str)
1306			ckfree(str);
1307		parsebackquote = 0;
1308		handler = savehandler;
1309		longjmp(handler->loc, 1);
1310	}
1311	INTOFF;
1312	str = NULL;
1313	savelen = out - stackblock();
1314	if (savelen > 0) {
1315		str = ckmalloc(savelen);
1316		memcpy(str, stackblock(), savelen);
1317	}
1318	savehandler = handler;
1319	handler = &jmploc;
1320	INTON;
1321        if (oldstyle) {
1322                /* We must read until the closing backquote, giving special
1323                   treatment to some slashes, and then push the string and
1324                   reread it as input, interpreting it normally.  */
1325                char *out;
1326                int c;
1327                int savelen;
1328                char *str;
1329
1330
1331                STARTSTACKSTR(out);
1332		for (;;) {
1333			if (needprompt) {
1334				setprompt(2);
1335				needprompt = 0;
1336			}
1337			switch (c = pgetc()) {
1338			case '`':
1339				goto done;
1340
1341			case '\\':
1342                                if ((c = pgetc()) == '\n') {
1343					plinno++;
1344					if (doprompt)
1345						setprompt(2);
1346					else
1347						setprompt(0);
1348					/*
1349					 * If eating a newline, avoid putting
1350					 * the newline into the new character
1351					 * stream (via the STPUTC after the
1352					 * switch).
1353					 */
1354					continue;
1355				}
1356                                if (c != '\\' && c != '`' && c != '$'
1357                                    && (!dblquote || c != '"'))
1358                                        STPUTC('\\', out);
1359				break;
1360
1361			case '\n':
1362				plinno++;
1363				needprompt = doprompt;
1364				break;
1365
1366			case PEOF:
1367			        startlinno = plinno;
1368				synerror("EOF in backquote substitution");
1369 				break;
1370
1371			default:
1372				break;
1373			}
1374			STPUTC(c, out);
1375                }
1376done:
1377                STPUTC('\0', out);
1378                savelen = out - stackblock();
1379                if (savelen > 0) {
1380                        str = ckmalloc(savelen);
1381                        memcpy(str, stackblock(), savelen);
1382			setinputstring(str, 1);
1383                }
1384        }
1385	nlpp = &bqlist;
1386	while (*nlpp)
1387		nlpp = &(*nlpp)->next;
1388	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1389	(*nlpp)->next = NULL;
1390	parsebackquote = oldstyle;
1391
1392	if (oldstyle) {
1393		saveprompt = doprompt;
1394		doprompt = 0;
1395	}
1396
1397	n = list(0);
1398
1399	if (oldstyle)
1400		doprompt = saveprompt;
1401	else {
1402		if (readtoken() != TRP)
1403			synexpect(TRP);
1404	}
1405
1406	(*nlpp)->n = n;
1407        if (oldstyle) {
1408		/*
1409		 * Start reading from old file again, ignoring any pushed back
1410		 * tokens left from the backquote parsing
1411		 */
1412                popfile();
1413		tokpushback = 0;
1414	}
1415	while (stackblocksize() <= savelen)
1416		growstackblock();
1417	STARTSTACKSTR(out);
1418	if (str) {
1419		memcpy(out, str, savelen);
1420		STADJUST(savelen, out);
1421		INTOFF;
1422		ckfree(str);
1423		str = NULL;
1424		INTON;
1425	}
1426	parsebackquote = savepbq;
1427	handler = savehandler;
1428	if (arinest || dblquote)
1429		USTPUTC(CTLBACKQ | CTLQUOTE, out);
1430	else
1431		USTPUTC(CTLBACKQ, out);
1432	if (oldstyle)
1433		goto parsebackq_oldreturn;
1434	else
1435		goto parsebackq_newreturn;
1436}
1437
1438/*
1439 * Parse an arithmetic expansion (indicate start of one and set state)
1440 */
1441parsearith: {
1442
1443	if (++arinest == 1) {
1444		prevsyntax = syntax;
1445		syntax = ARISYNTAX;
1446		USTPUTC(CTLARI, out);
1447		if (dblquote)
1448			USTPUTC('"',out);
1449		else
1450			USTPUTC(' ',out);
1451	} else {
1452		/*
1453		 * we collapse embedded arithmetic expansion to
1454		 * parenthesis, which should be equivalent
1455		 */
1456		USTPUTC('(', out);
1457	}
1458	goto parsearith_return;
1459}
1460
1461} /* end of readtoken */
1462
1463
1464
1465#ifdef mkinit
1466RESET {
1467	tokpushback = 0;
1468	checkkwd = 0;
1469}
1470#endif
1471
1472/*
1473 * Returns true if the text contains nothing to expand (no dollar signs
1474 * or backquotes).
1475 */
1476
1477STATIC int
1478noexpand(char *text)
1479{
1480	char *p;
1481	char c;
1482
1483	p = text;
1484	while ((c = *p++) != '\0') {
1485		if ( c == CTLQUOTEMARK)
1486			continue;
1487		if (c == CTLESC)
1488			p++;
1489		else if (BASESYNTAX[(int)c] == CCTL)
1490			return 0;
1491	}
1492	return 1;
1493}
1494
1495
1496/*
1497 * Return true if the argument is a legal variable name (a letter or
1498 * underscore followed by zero or more letters, underscores, and digits).
1499 */
1500
1501int
1502goodname(char *name)
1503{
1504	char *p;
1505
1506	p = name;
1507	if (! is_name(*p))
1508		return 0;
1509	while (*++p) {
1510		if (! is_in_name(*p))
1511			return 0;
1512	}
1513	return 1;
1514}
1515
1516
1517/*
1518 * Called when an unexpected token is read during the parse.  The argument
1519 * is the token that is expected, or -1 if more than one type of token can
1520 * occur at this point.
1521 */
1522
1523STATIC void
1524synexpect(int token)
1525{
1526	char msg[64];
1527
1528	if (token >= 0) {
1529		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1530			tokname[lasttoken], tokname[token]);
1531	} else {
1532		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1533	}
1534	synerror(msg);
1535}
1536
1537
1538STATIC void
1539synerror(char *msg)
1540{
1541	if (commandname)
1542		outfmt(&errout, "%s: %d: ", commandname, startlinno);
1543	outfmt(&errout, "Syntax error: %s\n", msg);
1544	error((char *)NULL);
1545}
1546
1547STATIC void
1548setprompt(int which)
1549{
1550	whichprompt = which;
1551
1552#ifndef NO_HISTORY
1553	if (!el)
1554#endif
1555		out2str(getprompt(NULL));
1556}
1557
1558/*
1559 * called by editline -- any expansions to the prompt
1560 *    should be added here.
1561 */
1562char *
1563getprompt(void *unused __unused)
1564{
1565	switch (whichprompt) {
1566	case 0:
1567		return "";
1568	case 1:
1569		return ps1val();
1570	case 2:
1571		return ps2val();
1572	default:
1573		return "<internal prompt error>";
1574	}
1575}
1576