expand.c revision 221602
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36#if 0
37static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
38#endif
39#endif /* not lint */
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/bin/sh/expand.c 221602 2011-05-07 14:32:16Z jilles $");
42
43#include <sys/types.h>
44#include <sys/time.h>
45#include <sys/stat.h>
46#include <dirent.h>
47#include <errno.h>
48#include <inttypes.h>
49#include <limits.h>
50#include <pwd.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55
56/*
57 * Routines to expand arguments to commands.  We have to deal with
58 * backquotes, shell variables, and file metacharacters.
59 */
60
61#include "shell.h"
62#include "main.h"
63#include "nodes.h"
64#include "eval.h"
65#include "expand.h"
66#include "syntax.h"
67#include "parser.h"
68#include "jobs.h"
69#include "options.h"
70#include "var.h"
71#include "input.h"
72#include "output.h"
73#include "memalloc.h"
74#include "error.h"
75#include "mystring.h"
76#include "arith.h"
77#include "show.h"
78
79/*
80 * Structure specifying which parts of the string should be searched
81 * for IFS characters.
82 */
83
84struct ifsregion {
85	struct ifsregion *next;	/* next region in list */
86	int begoff;		/* offset of start of region */
87	int endoff;		/* offset of end of region */
88	int inquotes;		/* search for nul bytes only */
89};
90
91
92static char *expdest;			/* output of current string */
93static struct nodelist *argbackq;	/* list of back quote expressions */
94static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
95static struct ifsregion *ifslastp;	/* last struct in list */
96static struct arglist exparg;		/* holds expanded arg list */
97
98static void argstr(char *, int);
99static char *exptilde(char *, int);
100static void expbackq(union node *, int, int);
101static int subevalvar(char *, char *, int, int, int, int, int);
102static char *evalvar(char *, int);
103static int varisset(char *, int);
104static void varvalue(char *, int, int, int);
105static void recordregion(int, int, int);
106static void removerecordregions(int);
107static void ifsbreakup(char *, struct arglist *);
108static void expandmeta(struct strlist *, int);
109static void expmeta(char *, char *);
110static void addfname(char *);
111static struct strlist *expsort(struct strlist *);
112static struct strlist *msort(struct strlist *, int);
113static char *cvtnum(int, char *);
114static int collate_range_cmp(int, int);
115
116static int
117collate_range_cmp(int c1, int c2)
118{
119	static char s1[2], s2[2];
120
121	s1[0] = c1;
122	s2[0] = c2;
123	return (strcoll(s1, s2));
124}
125
126/*
127 * Expand shell variables and backquotes inside a here document.
128 *	union node *arg		the document
129 *	int fd;			where to write the expanded version
130 */
131
132void
133expandhere(union node *arg, int fd)
134{
135	expandarg(arg, (struct arglist *)NULL, 0);
136	xwrite(fd, stackblock(), expdest - stackblock());
137}
138
139static char *
140stputs_quotes(const char *data, const char *syntax, char *p)
141{
142	while (*data) {
143		CHECKSTRSPACE(2, p);
144		if (syntax[(int)*data] == CCTL)
145			USTPUTC(CTLESC, p);
146		USTPUTC(*data++, p);
147	}
148	return (p);
149}
150#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
151
152/*
153 * Perform expansions on an argument, placing the resulting list of arguments
154 * in arglist.  Parameter expansion, command substitution and arithmetic
155 * expansion are always performed; additional expansions can be requested
156 * via flag (EXP_*).
157 * The result is left in the stack string.
158 * When arglist is NULL, perform here document expansion.
159 *
160 * Caution: this function uses global state and is not reentrant.
161 * However, a new invocation after an interrupted invocation is safe
162 * and will reset the global state for the new call.
163 */
164void
165expandarg(union node *arg, struct arglist *arglist, int flag)
166{
167	struct strlist *sp;
168	char *p;
169
170	argbackq = arg->narg.backquote;
171	STARTSTACKSTR(expdest);
172	ifsfirst.next = NULL;
173	ifslastp = NULL;
174	argstr(arg->narg.text, flag);
175	if (arglist == NULL) {
176		return;			/* here document expanded */
177	}
178	STPUTC('\0', expdest);
179	p = grabstackstr(expdest);
180	exparg.lastp = &exparg.list;
181	/*
182	 * TODO - EXP_REDIR
183	 */
184	if (flag & EXP_FULL) {
185		ifsbreakup(p, &exparg);
186		*exparg.lastp = NULL;
187		exparg.lastp = &exparg.list;
188		expandmeta(exparg.list, flag);
189	} else {
190		if (flag & EXP_REDIR) /*XXX - for now, just remove escapes */
191			rmescapes(p);
192		sp = (struct strlist *)stalloc(sizeof (struct strlist));
193		sp->text = p;
194		*exparg.lastp = sp;
195		exparg.lastp = &sp->next;
196	}
197	while (ifsfirst.next != NULL) {
198		struct ifsregion *ifsp;
199		INTOFF;
200		ifsp = ifsfirst.next->next;
201		ckfree(ifsfirst.next);
202		ifsfirst.next = ifsp;
203		INTON;
204	}
205	*exparg.lastp = NULL;
206	if (exparg.list) {
207		*arglist->lastp = exparg.list;
208		arglist->lastp = exparg.lastp;
209	}
210}
211
212
213
214/*
215 * Perform parameter expansion, command substitution and arithmetic
216 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
217 * Processing ends at a CTLENDVAR character as well as '\0'.
218 * This is used to expand word in ${var+word} etc.
219 * If EXP_FULL, EXP_CASE or EXP_REDIR are set, keep and/or generate CTLESC
220 * characters to allow for further processing.
221 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
222 */
223static void
224argstr(char *p, int flag)
225{
226	char c;
227	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);	/* do CTLESC */
228	int firsteq = 1;
229	int split_lit;
230	int lit_quoted;
231
232	split_lit = flag & EXP_SPLIT_LIT;
233	lit_quoted = flag & EXP_LIT_QUOTED;
234	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
235	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
236		p = exptilde(p, flag);
237	for (;;) {
238		CHECKSTRSPACE(2, expdest);
239		switch (c = *p++) {
240		case '\0':
241		case CTLENDVAR:
242			goto breakloop;
243		case CTLQUOTEMARK:
244			lit_quoted = 1;
245			/* "$@" syntax adherence hack */
246			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
247				break;
248			if ((flag & EXP_FULL) != 0)
249				USTPUTC(c, expdest);
250			break;
251		case CTLQUOTEEND:
252			lit_quoted = 0;
253			break;
254		case CTLESC:
255			if (quotes)
256				USTPUTC(c, expdest);
257			c = *p++;
258			USTPUTC(c, expdest);
259			if (split_lit && !lit_quoted)
260				recordregion(expdest - stackblock() -
261				    (quotes ? 2 : 1),
262				    expdest - stackblock(), 0);
263			break;
264		case CTLVAR:
265			p = evalvar(p, flag);
266			break;
267		case CTLBACKQ:
268		case CTLBACKQ|CTLQUOTE:
269			expbackq(argbackq->n, c & CTLQUOTE, flag);
270			argbackq = argbackq->next;
271			break;
272		case CTLENDARI:
273			expari(flag);
274			break;
275		case ':':
276		case '=':
277			/*
278			 * sort of a hack - expand tildes in variable
279			 * assignments (after the first '=' and after ':'s).
280			 */
281			USTPUTC(c, expdest);
282			if (split_lit && !lit_quoted)
283				recordregion(expdest - stackblock() - 1,
284				    expdest - stackblock(), 0);
285			if (flag & EXP_VARTILDE && *p == '~' &&
286			    (c != '=' || firsteq)) {
287				if (c == '=')
288					firsteq = 0;
289				p = exptilde(p, flag);
290			}
291			break;
292		default:
293			USTPUTC(c, expdest);
294			if (split_lit && !lit_quoted)
295				recordregion(expdest - stackblock() - 1,
296				    expdest - stackblock(), 0);
297		}
298	}
299breakloop:;
300}
301
302/*
303 * Perform tilde expansion, placing the result in the stack string and
304 * returning the next position in the input string to process.
305 */
306static char *
307exptilde(char *p, int flag)
308{
309	char c, *startp = p;
310	struct passwd *pw;
311	char *home;
312	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
313
314	while ((c = *p) != '\0') {
315		switch(c) {
316		case CTLESC: /* This means CTL* are always considered quoted. */
317		case CTLVAR:
318		case CTLBACKQ:
319		case CTLBACKQ | CTLQUOTE:
320		case CTLARI:
321		case CTLENDARI:
322		case CTLQUOTEMARK:
323			return (startp);
324		case ':':
325			if (flag & EXP_VARTILDE)
326				goto done;
327			break;
328		case '/':
329		case CTLENDVAR:
330			goto done;
331		}
332		p++;
333	}
334done:
335	*p = '\0';
336	if (*(startp+1) == '\0') {
337		if ((home = lookupvar("HOME")) == NULL)
338			goto lose;
339	} else {
340		if ((pw = getpwnam(startp+1)) == NULL)
341			goto lose;
342		home = pw->pw_dir;
343	}
344	if (*home == '\0')
345		goto lose;
346	*p = c;
347	if (quotes)
348		STPUTS_QUOTES(home, SQSYNTAX, expdest);
349	else
350		STPUTS(home, expdest);
351	return (p);
352lose:
353	*p = c;
354	return (startp);
355}
356
357
358static void
359removerecordregions(int endoff)
360{
361	if (ifslastp == NULL)
362		return;
363
364	if (ifsfirst.endoff > endoff) {
365		while (ifsfirst.next != NULL) {
366			struct ifsregion *ifsp;
367			INTOFF;
368			ifsp = ifsfirst.next->next;
369			ckfree(ifsfirst.next);
370			ifsfirst.next = ifsp;
371			INTON;
372		}
373		if (ifsfirst.begoff > endoff)
374			ifslastp = NULL;
375		else {
376			ifslastp = &ifsfirst;
377			ifsfirst.endoff = endoff;
378		}
379		return;
380	}
381
382	ifslastp = &ifsfirst;
383	while (ifslastp->next && ifslastp->next->begoff < endoff)
384		ifslastp=ifslastp->next;
385	while (ifslastp->next != NULL) {
386		struct ifsregion *ifsp;
387		INTOFF;
388		ifsp = ifslastp->next->next;
389		ckfree(ifslastp->next);
390		ifslastp->next = ifsp;
391		INTON;
392	}
393	if (ifslastp->endoff > endoff)
394		ifslastp->endoff = endoff;
395}
396
397/*
398 * Expand arithmetic expression.  Backup to start of expression,
399 * evaluate, place result in (backed up) result, adjust string position.
400 */
401void
402expari(int flag)
403{
404	char *p, *q, *start;
405	arith_t result;
406	int begoff;
407	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
408	int quoted;
409
410	/*
411	 * This routine is slightly over-complicated for
412	 * efficiency.  First we make sure there is
413	 * enough space for the result, which may be bigger
414	 * than the expression.  Next we
415	 * scan backwards looking for the start of arithmetic.  If the
416	 * next previous character is a CTLESC character, then we
417	 * have to rescan starting from the beginning since CTLESC
418	 * characters have to be processed left to right.
419	 */
420	CHECKSTRSPACE(DIGITS(result) - 2, expdest);
421	USTPUTC('\0', expdest);
422	start = stackblock();
423	p = expdest - 2;
424	while (p >= start && *p != CTLARI)
425		--p;
426	if (p < start || *p != CTLARI)
427		error("missing CTLARI (shouldn't happen)");
428	if (p > start && *(p - 1) == CTLESC)
429		for (p = start; *p != CTLARI; p++)
430			if (*p == CTLESC)
431				p++;
432
433	if (p[1] == '"')
434		quoted=1;
435	else
436		quoted=0;
437	begoff = p - start;
438	removerecordregions(begoff);
439	if (quotes)
440		rmescapes(p+2);
441	q = grabstackstr(expdest);
442	result = arith(p+2);
443	ungrabstackstr(q, expdest);
444	fmtstr(p, DIGITS(result), ARITH_FORMAT_STR, result);
445	while (*p++)
446		;
447	if (quoted == 0)
448		recordregion(begoff, p - 1 - start, 0);
449	result = expdest - p + 1;
450	STADJUST(-result, expdest);
451}
452
453
454/*
455 * Perform command substitution.
456 */
457static void
458expbackq(union node *cmd, int quoted, int flag)
459{
460	struct backcmd in;
461	int i;
462	char buf[128];
463	char *p;
464	char *dest = expdest;
465	struct ifsregion saveifs, *savelastp;
466	struct nodelist *saveargbackq;
467	char lastc;
468	int startloc = dest - stackblock();
469	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
470	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
471	int nnl;
472
473	INTOFF;
474	saveifs = ifsfirst;
475	savelastp = ifslastp;
476	saveargbackq = argbackq;
477	p = grabstackstr(dest);
478	evalbackcmd(cmd, &in);
479	ungrabstackstr(p, dest);
480	ifsfirst = saveifs;
481	ifslastp = savelastp;
482	argbackq = saveargbackq;
483
484	p = in.buf;
485	lastc = '\0';
486	nnl = 0;
487	/* Don't copy trailing newlines */
488	for (;;) {
489		if (--in.nleft < 0) {
490			if (in.fd < 0)
491				break;
492			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
493			TRACE(("expbackq: read returns %d\n", i));
494			if (i <= 0)
495				break;
496			p = buf;
497			in.nleft = i - 1;
498		}
499		lastc = *p++;
500		if (lastc != '\0') {
501			if (lastc == '\n') {
502				nnl++;
503			} else {
504				CHECKSTRSPACE(nnl + 2, dest);
505				while (nnl > 0) {
506					nnl--;
507					USTPUTC('\n', dest);
508				}
509				if (quotes && syntax[(int)lastc] == CCTL)
510					USTPUTC(CTLESC, dest);
511				USTPUTC(lastc, dest);
512			}
513		}
514	}
515
516	if (in.fd >= 0)
517		close(in.fd);
518	if (in.buf)
519		ckfree(in.buf);
520	if (in.jp)
521		exitstatus = waitforjob(in.jp, (int *)NULL);
522	if (quoted == 0)
523		recordregion(startloc, dest - stackblock(), 0);
524	TRACE(("expbackq: size=%td: \"%.*s\"\n",
525		((dest - stackblock()) - startloc),
526		(int)((dest - stackblock()) - startloc),
527		stackblock() + startloc));
528	expdest = dest;
529	INTON;
530}
531
532
533
534static int
535subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
536  int varflags, int quotes)
537{
538	char *startp;
539	char *loc = NULL;
540	char *q;
541	int c = 0;
542	struct nodelist *saveargbackq = argbackq;
543	int amount;
544
545	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
546	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
547	    EXP_CASE : 0) | EXP_TILDE);
548	STACKSTRNUL(expdest);
549	argbackq = saveargbackq;
550	startp = stackblock() + startloc;
551	if (str == NULL)
552	    str = stackblock() + strloc;
553
554	switch (subtype) {
555	case VSASSIGN:
556		setvar(str, startp, 0);
557		amount = startp - expdest;
558		STADJUST(amount, expdest);
559		varflags &= ~VSNUL;
560		return 1;
561
562	case VSQUESTION:
563		if (*p != CTLENDVAR) {
564			outfmt(out2, "%s\n", startp);
565			error((char *)NULL);
566		}
567		error("%.*s: parameter %snot set", (int)(p - str - 1),
568		      str, (varflags & VSNUL) ? "null or "
569					      : nullstr);
570		return 0;
571
572	case VSTRIMLEFT:
573		for (loc = startp; loc < str; loc++) {
574			c = *loc;
575			*loc = '\0';
576			if (patmatch(str, startp, quotes)) {
577				*loc = c;
578				goto recordleft;
579			}
580			*loc = c;
581			if (quotes && *loc == CTLESC)
582				loc++;
583		}
584		return 0;
585
586	case VSTRIMLEFTMAX:
587		for (loc = str - 1; loc >= startp;) {
588			c = *loc;
589			*loc = '\0';
590			if (patmatch(str, startp, quotes)) {
591				*loc = c;
592				goto recordleft;
593			}
594			*loc = c;
595			loc--;
596			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
597				for (q = startp; q < loc; q++)
598					if (*q == CTLESC)
599						q++;
600				if (q > loc)
601					loc--;
602			}
603		}
604		return 0;
605
606	case VSTRIMRIGHT:
607		for (loc = str - 1; loc >= startp;) {
608			if (patmatch(str, loc, quotes)) {
609				amount = loc - expdest;
610				STADJUST(amount, expdest);
611				return 1;
612			}
613			loc--;
614			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
615				for (q = startp; q < loc; q++)
616					if (*q == CTLESC)
617						q++;
618				if (q > loc)
619					loc--;
620			}
621		}
622		return 0;
623
624	case VSTRIMRIGHTMAX:
625		for (loc = startp; loc < str - 1; loc++) {
626			if (patmatch(str, loc, quotes)) {
627				amount = loc - expdest;
628				STADJUST(amount, expdest);
629				return 1;
630			}
631			if (quotes && *loc == CTLESC)
632				loc++;
633		}
634		return 0;
635
636
637	default:
638		abort();
639	}
640
641recordleft:
642	amount = ((str - 1) - (loc - startp)) - expdest;
643	STADJUST(amount, expdest);
644	while (loc != str - 1)
645		*startp++ = *loc++;
646	return 1;
647}
648
649
650/*
651 * Expand a variable, and return a pointer to the next character in the
652 * input string.
653 */
654
655static char *
656evalvar(char *p, int flag)
657{
658	int subtype;
659	int varflags;
660	char *var;
661	char *val;
662	int patloc;
663	int c;
664	int set;
665	int special;
666	int startloc;
667	int varlen;
668	int varlenb;
669	int easy;
670	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
671
672	varflags = (unsigned char)*p++;
673	subtype = varflags & VSTYPE;
674	var = p;
675	special = 0;
676	if (! is_name(*p))
677		special = 1;
678	p = strchr(p, '=') + 1;
679again: /* jump here after setting a variable with ${var=text} */
680	if (varflags & VSLINENO) {
681		set = 1;
682		special = 0;
683		val = var;
684		p[-1] = '\0';	/* temporarily overwrite '=' to have \0
685				   terminated string */
686	} else if (special) {
687		set = varisset(var, varflags & VSNUL);
688		val = NULL;
689	} else {
690		val = bltinlookup(var, 1);
691		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
692			val = NULL;
693			set = 0;
694		} else
695			set = 1;
696	}
697	varlen = 0;
698	startloc = expdest - stackblock();
699	if (!set && uflag && *var != '@' && *var != '*') {
700		switch (subtype) {
701		case VSNORMAL:
702		case VSTRIMLEFT:
703		case VSTRIMLEFTMAX:
704		case VSTRIMRIGHT:
705		case VSTRIMRIGHTMAX:
706		case VSLENGTH:
707			error("%.*s: parameter not set", (int)(p - var - 1),
708			    var);
709		}
710	}
711	if (set && subtype != VSPLUS) {
712		/* insert the value of the variable */
713		if (special) {
714			varvalue(var, varflags & VSQUOTE, subtype, flag);
715			if (subtype == VSLENGTH) {
716				varlenb = expdest - stackblock() - startloc;
717				varlen = varlenb;
718				if (localeisutf8) {
719					val = stackblock() + startloc;
720					for (;val != expdest; val++)
721						if ((*val & 0xC0) == 0x80)
722							varlen--;
723				}
724				STADJUST(-varlenb, expdest);
725			}
726		} else {
727			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
728								  : BASESYNTAX;
729
730			if (subtype == VSLENGTH) {
731				for (;*val; val++)
732					if (!localeisutf8 ||
733					    (*val & 0xC0) != 0x80)
734						varlen++;
735			}
736			else {
737				if (quotes)
738					STPUTS_QUOTES(val, syntax, expdest);
739				else
740					STPUTS(val, expdest);
741
742			}
743		}
744	}
745
746	if (subtype == VSPLUS)
747		set = ! set;
748
749	easy = ((varflags & VSQUOTE) == 0 ||
750		(*var == '@' && shellparam.nparam != 1));
751
752
753	switch (subtype) {
754	case VSLENGTH:
755		expdest = cvtnum(varlen, expdest);
756		goto record;
757
758	case VSNORMAL:
759		if (!easy)
760			break;
761record:
762		recordregion(startloc, expdest - stackblock(),
763			     varflags & VSQUOTE);
764		break;
765
766	case VSPLUS:
767	case VSMINUS:
768		if (!set) {
769			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
770			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
771			break;
772		}
773		if (easy)
774			goto record;
775		break;
776
777	case VSTRIMLEFT:
778	case VSTRIMLEFTMAX:
779	case VSTRIMRIGHT:
780	case VSTRIMRIGHTMAX:
781		if (!set)
782			break;
783		/*
784		 * Terminate the string and start recording the pattern
785		 * right after it
786		 */
787		STPUTC('\0', expdest);
788		patloc = expdest - stackblock();
789		if (subevalvar(p, NULL, patloc, subtype,
790		    startloc, varflags, quotes) == 0) {
791			int amount = (expdest - stackblock() - patloc) + 1;
792			STADJUST(-amount, expdest);
793		}
794		/* Remove any recorded regions beyond start of variable */
795		removerecordregions(startloc);
796		goto record;
797
798	case VSASSIGN:
799	case VSQUESTION:
800		if (!set) {
801			if (subevalvar(p, var, 0, subtype, startloc, varflags,
802			    quotes)) {
803				varflags &= ~VSNUL;
804				/*
805				 * Remove any recorded regions beyond
806				 * start of variable
807				 */
808				removerecordregions(startloc);
809				goto again;
810			}
811			break;
812		}
813		if (easy)
814			goto record;
815		break;
816
817	case VSERROR:
818		c = p - var - 1;
819		error("${%.*s%s}: Bad substitution", c, var,
820		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
821
822	default:
823		abort();
824	}
825	p[-1] = '=';	/* recover overwritten '=' */
826
827	if (subtype != VSNORMAL) {	/* skip to end of alternative */
828		int nesting = 1;
829		for (;;) {
830			if ((c = *p++) == CTLESC)
831				p++;
832			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
833				if (set)
834					argbackq = argbackq->next;
835			} else if (c == CTLVAR) {
836				if ((*p++ & VSTYPE) != VSNORMAL)
837					nesting++;
838			} else if (c == CTLENDVAR) {
839				if (--nesting == 0)
840					break;
841			}
842		}
843	}
844	return p;
845}
846
847
848
849/*
850 * Test whether a specialized variable is set.
851 */
852
853static int
854varisset(char *name, int nulok)
855{
856
857	if (*name == '!')
858		return backgndpidset();
859	else if (*name == '@' || *name == '*') {
860		if (*shellparam.p == NULL)
861			return 0;
862
863		if (nulok) {
864			char **av;
865
866			for (av = shellparam.p; *av; av++)
867				if (**av != '\0')
868					return 1;
869			return 0;
870		}
871	} else if (is_digit(*name)) {
872		char *ap;
873		int num = atoi(name);
874
875		if (num > shellparam.nparam)
876			return 0;
877
878		if (num == 0)
879			ap = arg0;
880		else
881			ap = shellparam.p[num - 1];
882
883		if (nulok && (ap == NULL || *ap == '\0'))
884			return 0;
885	}
886	return 1;
887}
888
889static void
890strtodest(const char *p, int flag, int subtype, int quoted)
891{
892	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
893		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
894	else
895		STPUTS(p, expdest);
896}
897
898/*
899 * Add the value of a specialized variable to the stack string.
900 */
901
902static void
903varvalue(char *name, int quoted, int subtype, int flag)
904{
905	int num;
906	char *p;
907	int i;
908	char sep;
909	char **ap;
910
911	switch (*name) {
912	case '$':
913		num = rootpid;
914		goto numvar;
915	case '?':
916		num = oexitstatus;
917		goto numvar;
918	case '#':
919		num = shellparam.nparam;
920		goto numvar;
921	case '!':
922		num = backgndpidval();
923numvar:
924		expdest = cvtnum(num, expdest);
925		break;
926	case '-':
927		for (i = 0 ; i < NOPTS ; i++) {
928			if (optlist[i].val)
929				STPUTC(optlist[i].letter, expdest);
930		}
931		break;
932	case '@':
933		if (flag & EXP_FULL && quoted) {
934			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
935				strtodest(p, flag, subtype, quoted);
936				if (*ap)
937					STPUTC('\0', expdest);
938			}
939			break;
940		}
941		/* FALLTHROUGH */
942	case '*':
943		if (ifsset())
944			sep = ifsval()[0];
945		else
946			sep = ' ';
947		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
948			strtodest(p, flag, subtype, quoted);
949			if (*ap && sep)
950				STPUTC(sep, expdest);
951		}
952		break;
953	case '0':
954		p = arg0;
955		strtodest(p, flag, subtype, quoted);
956		break;
957	default:
958		if (is_digit(*name)) {
959			num = atoi(name);
960			if (num > 0 && num <= shellparam.nparam) {
961				p = shellparam.p[num - 1];
962				strtodest(p, flag, subtype, quoted);
963			}
964		}
965		break;
966	}
967}
968
969
970
971/*
972 * Record the fact that we have to scan this region of the
973 * string for IFS characters.
974 */
975
976static void
977recordregion(int start, int end, int inquotes)
978{
979	struct ifsregion *ifsp;
980
981	if (ifslastp == NULL) {
982		ifsp = &ifsfirst;
983	} else {
984		if (ifslastp->endoff == start
985		    && ifslastp->inquotes == inquotes) {
986			/* extend previous area */
987			ifslastp->endoff = end;
988			return;
989		}
990		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
991		ifslastp->next = ifsp;
992	}
993	ifslastp = ifsp;
994	ifslastp->next = NULL;
995	ifslastp->begoff = start;
996	ifslastp->endoff = end;
997	ifslastp->inquotes = inquotes;
998}
999
1000
1001
1002/*
1003 * Break the argument string into pieces based upon IFS and add the
1004 * strings to the argument list.  The regions of the string to be
1005 * searched for IFS characters have been stored by recordregion.
1006 * CTLESC characters are preserved but have little effect in this pass
1007 * other than escaping CTL* characters.  In particular, they do not escape
1008 * IFS characters: that should be done with the ifsregion mechanism.
1009 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
1010 * This pass treats them as a regular character, making the string non-empty.
1011 * Later, they are removed along with the other CTL* characters.
1012 */
1013static void
1014ifsbreakup(char *string, struct arglist *arglist)
1015{
1016	struct ifsregion *ifsp;
1017	struct strlist *sp;
1018	char *start;
1019	char *p;
1020	char *q;
1021	const char *ifs;
1022	const char *ifsspc;
1023	int had_param_ch = 0;
1024
1025	start = string;
1026
1027	if (ifslastp == NULL) {
1028		/* Return entire argument, IFS doesn't apply to any of it */
1029		sp = (struct strlist *)stalloc(sizeof *sp);
1030		sp->text = start;
1031		*arglist->lastp = sp;
1032		arglist->lastp = &sp->next;
1033		return;
1034	}
1035
1036	ifs = ifsset() ? ifsval() : " \t\n";
1037
1038	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1039		p = string + ifsp->begoff;
1040		while (p < string + ifsp->endoff) {
1041			q = p;
1042			if (*p == CTLESC)
1043				p++;
1044			if (ifsp->inquotes) {
1045				/* Only NULs (should be from "$@") end args */
1046				had_param_ch = 1;
1047				if (*p != 0) {
1048					p++;
1049					continue;
1050				}
1051				ifsspc = NULL;
1052			} else {
1053				if (!strchr(ifs, *p)) {
1054					had_param_ch = 1;
1055					p++;
1056					continue;
1057				}
1058				ifsspc = strchr(" \t\n", *p);
1059
1060				/* Ignore IFS whitespace at start */
1061				if (q == start && ifsspc != NULL) {
1062					p++;
1063					start = p;
1064					continue;
1065				}
1066				had_param_ch = 0;
1067			}
1068
1069			/* Save this argument... */
1070			*q = '\0';
1071			sp = (struct strlist *)stalloc(sizeof *sp);
1072			sp->text = start;
1073			*arglist->lastp = sp;
1074			arglist->lastp = &sp->next;
1075			p++;
1076
1077			if (ifsspc != NULL) {
1078				/* Ignore further trailing IFS whitespace */
1079				for (; p < string + ifsp->endoff; p++) {
1080					q = p;
1081					if (*p == CTLESC)
1082						p++;
1083					if (strchr(ifs, *p) == NULL) {
1084						p = q;
1085						break;
1086					}
1087					if (strchr(" \t\n", *p) == NULL) {
1088						p++;
1089						break;
1090					}
1091				}
1092			}
1093			start = p;
1094		}
1095	}
1096
1097	/*
1098	 * Save anything left as an argument.
1099	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1100	 * generating 2 arguments, the second of which is empty.
1101	 * Some recent clarification of the Posix spec say that it
1102	 * should only generate one....
1103	 */
1104	if (had_param_ch || *start != 0) {
1105		sp = (struct strlist *)stalloc(sizeof *sp);
1106		sp->text = start;
1107		*arglist->lastp = sp;
1108		arglist->lastp = &sp->next;
1109	}
1110}
1111
1112
1113static char expdir[PATH_MAX];
1114#define expdir_end (expdir + sizeof(expdir))
1115
1116/*
1117 * Perform pathname generation and remove control characters.
1118 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1119 * The results are stored in the list exparg.
1120 */
1121static void
1122expandmeta(struct strlist *str, int flag __unused)
1123{
1124	char *p;
1125	struct strlist **savelastp;
1126	struct strlist *sp;
1127	char c;
1128	/* TODO - EXP_REDIR */
1129
1130	while (str) {
1131		if (fflag)
1132			goto nometa;
1133		p = str->text;
1134		for (;;) {			/* fast check for meta chars */
1135			if ((c = *p++) == '\0')
1136				goto nometa;
1137			if (c == '*' || c == '?' || c == '[')
1138				break;
1139		}
1140		savelastp = exparg.lastp;
1141		INTOFF;
1142		expmeta(expdir, str->text);
1143		INTON;
1144		if (exparg.lastp == savelastp) {
1145			/*
1146			 * no matches
1147			 */
1148nometa:
1149			*exparg.lastp = str;
1150			rmescapes(str->text);
1151			exparg.lastp = &str->next;
1152		} else {
1153			*exparg.lastp = NULL;
1154			*savelastp = sp = expsort(*savelastp);
1155			while (sp->next != NULL)
1156				sp = sp->next;
1157			exparg.lastp = &sp->next;
1158		}
1159		str = str->next;
1160	}
1161}
1162
1163
1164/*
1165 * Do metacharacter (i.e. *, ?, [...]) expansion.
1166 */
1167
1168static void
1169expmeta(char *enddir, char *name)
1170{
1171	char *p;
1172	char *q;
1173	char *start;
1174	char *endname;
1175	int metaflag;
1176	struct stat statb;
1177	DIR *dirp;
1178	struct dirent *dp;
1179	int atend;
1180	int matchdot;
1181	int esc;
1182
1183	metaflag = 0;
1184	start = name;
1185	for (p = name; esc = 0, *p; p += esc + 1) {
1186		if (*p == '*' || *p == '?')
1187			metaflag = 1;
1188		else if (*p == '[') {
1189			q = p + 1;
1190			if (*q == '!' || *q == '^')
1191				q++;
1192			for (;;) {
1193				while (*q == CTLQUOTEMARK)
1194					q++;
1195				if (*q == CTLESC)
1196					q++;
1197				if (*q == '/' || *q == '\0')
1198					break;
1199				if (*++q == ']') {
1200					metaflag = 1;
1201					break;
1202				}
1203			}
1204		} else if (*p == '\0')
1205			break;
1206		else if (*p == CTLQUOTEMARK)
1207			continue;
1208		else {
1209			if (*p == CTLESC)
1210				esc++;
1211			if (p[esc] == '/') {
1212				if (metaflag)
1213					break;
1214				start = p + esc + 1;
1215			}
1216		}
1217	}
1218	if (metaflag == 0) {	/* we've reached the end of the file name */
1219		if (enddir != expdir)
1220			metaflag++;
1221		for (p = name ; ; p++) {
1222			if (*p == CTLQUOTEMARK)
1223				continue;
1224			if (*p == CTLESC)
1225				p++;
1226			*enddir++ = *p;
1227			if (*p == '\0')
1228				break;
1229			if (enddir == expdir_end)
1230				return;
1231		}
1232		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1233			addfname(expdir);
1234		return;
1235	}
1236	endname = p;
1237	if (start != name) {
1238		p = name;
1239		while (p < start) {
1240			while (*p == CTLQUOTEMARK)
1241				p++;
1242			if (*p == CTLESC)
1243				p++;
1244			*enddir++ = *p++;
1245			if (enddir == expdir_end)
1246				return;
1247		}
1248	}
1249	if (enddir == expdir) {
1250		p = ".";
1251	} else if (enddir == expdir + 1 && *expdir == '/') {
1252		p = "/";
1253	} else {
1254		p = expdir;
1255		enddir[-1] = '\0';
1256	}
1257	if ((dirp = opendir(p)) == NULL)
1258		return;
1259	if (enddir != expdir)
1260		enddir[-1] = '/';
1261	if (*endname == 0) {
1262		atend = 1;
1263	} else {
1264		atend = 0;
1265		*endname = '\0';
1266		endname += esc + 1;
1267	}
1268	matchdot = 0;
1269	p = start;
1270	while (*p == CTLQUOTEMARK)
1271		p++;
1272	if (*p == CTLESC)
1273		p++;
1274	if (*p == '.')
1275		matchdot++;
1276	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1277		if (dp->d_name[0] == '.' && ! matchdot)
1278			continue;
1279		if (patmatch(start, dp->d_name, 0)) {
1280			if (enddir + dp->d_namlen + 1 > expdir_end)
1281				continue;
1282			memcpy(enddir, dp->d_name, dp->d_namlen + 1);
1283			if (atend)
1284				addfname(expdir);
1285			else {
1286				if (enddir + dp->d_namlen + 2 > expdir_end)
1287					continue;
1288				enddir[dp->d_namlen] = '/';
1289				enddir[dp->d_namlen + 1] = '\0';
1290				expmeta(enddir + dp->d_namlen + 1, endname);
1291			}
1292		}
1293	}
1294	closedir(dirp);
1295	if (! atend)
1296		endname[-esc - 1] = esc ? CTLESC : '/';
1297}
1298
1299
1300/*
1301 * Add a file name to the list.
1302 */
1303
1304static void
1305addfname(char *name)
1306{
1307	char *p;
1308	struct strlist *sp;
1309
1310	p = stalloc(strlen(name) + 1);
1311	scopy(name, p);
1312	sp = (struct strlist *)stalloc(sizeof *sp);
1313	sp->text = p;
1314	*exparg.lastp = sp;
1315	exparg.lastp = &sp->next;
1316}
1317
1318
1319/*
1320 * Sort the results of file name expansion.  It calculates the number of
1321 * strings to sort and then calls msort (short for merge sort) to do the
1322 * work.
1323 */
1324
1325static struct strlist *
1326expsort(struct strlist *str)
1327{
1328	int len;
1329	struct strlist *sp;
1330
1331	len = 0;
1332	for (sp = str ; sp ; sp = sp->next)
1333		len++;
1334	return msort(str, len);
1335}
1336
1337
1338static struct strlist *
1339msort(struct strlist *list, int len)
1340{
1341	struct strlist *p, *q = NULL;
1342	struct strlist **lpp;
1343	int half;
1344	int n;
1345
1346	if (len <= 1)
1347		return list;
1348	half = len >> 1;
1349	p = list;
1350	for (n = half ; --n >= 0 ; ) {
1351		q = p;
1352		p = p->next;
1353	}
1354	q->next = NULL;			/* terminate first half of list */
1355	q = msort(list, half);		/* sort first half of list */
1356	p = msort(p, len - half);		/* sort second half */
1357	lpp = &list;
1358	for (;;) {
1359		if (strcmp(p->text, q->text) < 0) {
1360			*lpp = p;
1361			lpp = &p->next;
1362			if ((p = *lpp) == NULL) {
1363				*lpp = q;
1364				break;
1365			}
1366		} else {
1367			*lpp = q;
1368			lpp = &q->next;
1369			if ((q = *lpp) == NULL) {
1370				*lpp = p;
1371				break;
1372			}
1373		}
1374	}
1375	return list;
1376}
1377
1378
1379
1380/*
1381 * Returns true if the pattern matches the string.
1382 */
1383
1384int
1385patmatch(const char *pattern, const char *string, int squoted)
1386{
1387	const char *p, *q;
1388	char c;
1389
1390	p = pattern;
1391	q = string;
1392	for (;;) {
1393		switch (c = *p++) {
1394		case '\0':
1395			goto breakloop;
1396		case CTLESC:
1397			if (squoted && *q == CTLESC)
1398				q++;
1399			if (*q++ != *p++)
1400				return 0;
1401			break;
1402		case CTLQUOTEMARK:
1403			continue;
1404		case '?':
1405			if (squoted && *q == CTLESC)
1406				q++;
1407			if (*q++ == '\0')
1408				return 0;
1409			break;
1410		case '*':
1411			c = *p;
1412			while (c == CTLQUOTEMARK || c == '*')
1413				c = *++p;
1414			if (c != CTLESC &&  c != CTLQUOTEMARK &&
1415			    c != '?' && c != '*' && c != '[') {
1416				while (*q != c) {
1417					if (squoted && *q == CTLESC &&
1418					    q[1] == c)
1419						break;
1420					if (*q == '\0')
1421						return 0;
1422					if (squoted && *q == CTLESC)
1423						q++;
1424					q++;
1425				}
1426			}
1427			do {
1428				if (patmatch(p, q, squoted))
1429					return 1;
1430				if (squoted && *q == CTLESC)
1431					q++;
1432			} while (*q++ != '\0');
1433			return 0;
1434		case '[': {
1435			const char *endp;
1436			int invert, found;
1437			char chr;
1438
1439			endp = p;
1440			if (*endp == '!' || *endp == '^')
1441				endp++;
1442			for (;;) {
1443				while (*endp == CTLQUOTEMARK)
1444					endp++;
1445				if (*endp == '\0')
1446					goto dft;		/* no matching ] */
1447				if (*endp == CTLESC)
1448					endp++;
1449				if (*++endp == ']')
1450					break;
1451			}
1452			invert = 0;
1453			if (*p == '!' || *p == '^') {
1454				invert++;
1455				p++;
1456			}
1457			found = 0;
1458			chr = *q++;
1459			if (squoted && chr == CTLESC)
1460				chr = *q++;
1461			if (chr == '\0')
1462				return 0;
1463			c = *p++;
1464			do {
1465				if (c == CTLQUOTEMARK)
1466					continue;
1467				if (c == CTLESC)
1468					c = *p++;
1469				if (*p == '-' && p[1] != ']') {
1470					p++;
1471					while (*p == CTLQUOTEMARK)
1472						p++;
1473					if (*p == CTLESC)
1474						p++;
1475					if (   collate_range_cmp(chr, c) >= 0
1476					    && collate_range_cmp(chr, *p) <= 0
1477					   )
1478						found = 1;
1479					p++;
1480				} else {
1481					if (chr == c)
1482						found = 1;
1483				}
1484			} while ((c = *p++) != ']');
1485			if (found == invert)
1486				return 0;
1487			break;
1488		}
1489dft:	        default:
1490			if (squoted && *q == CTLESC)
1491				q++;
1492			if (*q++ != c)
1493				return 0;
1494			break;
1495		}
1496	}
1497breakloop:
1498	if (*q != '\0')
1499		return 0;
1500	return 1;
1501}
1502
1503
1504
1505/*
1506 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1507 */
1508
1509void
1510rmescapes(char *str)
1511{
1512	char *p, *q;
1513
1514	p = str;
1515	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1516		if (*p++ == '\0')
1517			return;
1518	}
1519	q = p;
1520	while (*p) {
1521		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1522			p++;
1523			continue;
1524		}
1525		if (*p == CTLESC)
1526			p++;
1527		*q++ = *p++;
1528	}
1529	*q = '\0';
1530}
1531
1532
1533
1534/*
1535 * See if a pattern matches in a case statement.
1536 */
1537
1538int
1539casematch(union node *pattern, const char *val)
1540{
1541	struct stackmark smark;
1542	int result;
1543	char *p;
1544
1545	setstackmark(&smark);
1546	argbackq = pattern->narg.backquote;
1547	STARTSTACKSTR(expdest);
1548	ifslastp = NULL;
1549	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1550	STPUTC('\0', expdest);
1551	p = grabstackstr(expdest);
1552	result = patmatch(p, val, 0);
1553	popstackmark(&smark);
1554	return result;
1555}
1556
1557/*
1558 * Our own itoa().
1559 */
1560
1561static char *
1562cvtnum(int num, char *buf)
1563{
1564	char temp[32];
1565	int neg = num < 0;
1566	char *p = temp + 31;
1567
1568	temp[31] = '\0';
1569
1570	do {
1571		*--p = num % 10 + '0';
1572	} while ((num /= 10) != 0);
1573
1574	if (neg)
1575		*--p = '-';
1576
1577	STPUTS(p, buf);
1578	return buf;
1579}
1580
1581/*
1582 * Check statically if expanding a string may have side effects.
1583 */
1584int
1585expandhassideeffects(const char *p)
1586{
1587	int c;
1588	int arinest;
1589
1590	arinest = 0;
1591	while ((c = *p++) != '\0') {
1592		switch (c) {
1593		case CTLESC:
1594			p++;
1595			break;
1596		case CTLVAR:
1597			c = *p++;
1598			/* Expanding $! sets the job to remembered. */
1599			if (*p == '!')
1600				return 1;
1601			if ((c & VSTYPE) == VSASSIGN)
1602				return 1;
1603			/*
1604			 * If we are in arithmetic, the parameter may contain
1605			 * '=' which may cause side effects. Exceptions are
1606			 * the length of a parameter and $$, $# and $? which
1607			 * are always numeric.
1608			 */
1609			if ((c & VSTYPE) == VSLENGTH) {
1610				while (*p != '=')
1611					p++;
1612				p++;
1613				break;
1614			}
1615			if ((*p == '$' || *p == '#' || *p == '?') &&
1616			    p[1] == '=') {
1617				p += 2;
1618				break;
1619			}
1620			if (arinest > 0)
1621				return 1;
1622			break;
1623		case CTLBACKQ:
1624		case CTLBACKQ | CTLQUOTE:
1625			if (arinest > 0)
1626				return 1;
1627			break;
1628		case CTLARI:
1629			arinest++;
1630			break;
1631		case CTLENDARI:
1632			arinest--;
1633			break;
1634		case '=':
1635			if (*p == '=') {
1636				/* Allow '==' operator. */
1637				p++;
1638				continue;
1639			}
1640			if (arinest > 0)
1641				return 1;
1642			break;
1643		case '!': case '<': case '>':
1644			/* Allow '!=', '<=', '>=' operators. */
1645			if (*p == '=')
1646				p++;
1647			break;
1648		}
1649	}
1650	return 0;
1651}
1652
1653/*
1654 * Do most of the work for wordexp(3).
1655 */
1656
1657int
1658wordexpcmd(int argc, char **argv)
1659{
1660	size_t len;
1661	int i;
1662
1663	out1fmt("%08x", argc - 1);
1664	for (i = 1, len = 0; i < argc; i++)
1665		len += strlen(argv[i]);
1666	out1fmt("%08x", (int)len);
1667	for (i = 1; i < argc; i++)
1668		outbin(argv[i], strlen(argv[i]) + 1, out1);
1669        return (0);
1670}
1671