expand.c revision 223060
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36#if 0
37static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
38#endif
39#endif /* not lint */
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/bin/sh/expand.c 223060 2011-06-13 21:03:27Z jilles $");
42
43#include <sys/types.h>
44#include <sys/time.h>
45#include <sys/stat.h>
46#include <dirent.h>
47#include <errno.h>
48#include <inttypes.h>
49#include <limits.h>
50#include <pwd.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <wchar.h>
56
57/*
58 * Routines to expand arguments to commands.  We have to deal with
59 * backquotes, shell variables, and file metacharacters.
60 */
61
62#include "shell.h"
63#include "main.h"
64#include "nodes.h"
65#include "eval.h"
66#include "expand.h"
67#include "syntax.h"
68#include "parser.h"
69#include "jobs.h"
70#include "options.h"
71#include "var.h"
72#include "input.h"
73#include "output.h"
74#include "memalloc.h"
75#include "error.h"
76#include "mystring.h"
77#include "arith.h"
78#include "show.h"
79#include "builtins.h"
80
81/*
82 * Structure specifying which parts of the string should be searched
83 * for IFS characters.
84 */
85
86struct ifsregion {
87	struct ifsregion *next;	/* next region in list */
88	int begoff;		/* offset of start of region */
89	int endoff;		/* offset of end of region */
90	int inquotes;		/* search for nul bytes only */
91};
92
93
94static char *expdest;			/* output of current string */
95static struct nodelist *argbackq;	/* list of back quote expressions */
96static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
97static struct ifsregion *ifslastp;	/* last struct in list */
98static struct arglist exparg;		/* holds expanded arg list */
99
100static void argstr(char *, int);
101static char *exptilde(char *, int);
102static void expbackq(union node *, int, int);
103static int subevalvar(char *, char *, int, int, int, int, int);
104static char *evalvar(char *, int);
105static int varisset(char *, int);
106static void varvalue(char *, int, int, int);
107static void recordregion(int, int, int);
108static void removerecordregions(int);
109static void ifsbreakup(char *, struct arglist *);
110static void expandmeta(struct strlist *, int);
111static void expmeta(char *, char *);
112static void addfname(char *);
113static struct strlist *expsort(struct strlist *);
114static struct strlist *msort(struct strlist *, int);
115static char *cvtnum(int, char *);
116static int collate_range_cmp(wchar_t, wchar_t);
117
118static int
119collate_range_cmp(wchar_t c1, wchar_t c2)
120{
121	static wchar_t s1[2], s2[2];
122
123	s1[0] = c1;
124	s2[0] = c2;
125	return (wcscoll(s1, s2));
126}
127
128/*
129 * Expand shell variables and backquotes inside a here document.
130 *	union node *arg		the document
131 *	int fd;			where to write the expanded version
132 */
133
134void
135expandhere(union node *arg, int fd)
136{
137	expandarg(arg, (struct arglist *)NULL, 0);
138	xwrite(fd, stackblock(), expdest - stackblock());
139}
140
141static char *
142stputs_quotes(const char *data, const char *syntax, char *p)
143{
144	while (*data) {
145		CHECKSTRSPACE(2, p);
146		if (syntax[(int)*data] == CCTL)
147			USTPUTC(CTLESC, p);
148		USTPUTC(*data++, p);
149	}
150	return (p);
151}
152#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
153
154/*
155 * Perform expansions on an argument, placing the resulting list of arguments
156 * in arglist.  Parameter expansion, command substitution and arithmetic
157 * expansion are always performed; additional expansions can be requested
158 * via flag (EXP_*).
159 * The result is left in the stack string.
160 * When arglist is NULL, perform here document expansion.
161 *
162 * Caution: this function uses global state and is not reentrant.
163 * However, a new invocation after an interrupted invocation is safe
164 * and will reset the global state for the new call.
165 */
166void
167expandarg(union node *arg, struct arglist *arglist, int flag)
168{
169	struct strlist *sp;
170	char *p;
171
172	argbackq = arg->narg.backquote;
173	STARTSTACKSTR(expdest);
174	ifsfirst.next = NULL;
175	ifslastp = NULL;
176	argstr(arg->narg.text, flag);
177	if (arglist == NULL) {
178		STACKSTRNUL(expdest);
179		return;			/* here document expanded */
180	}
181	STPUTC('\0', expdest);
182	p = grabstackstr(expdest);
183	exparg.lastp = &exparg.list;
184	/*
185	 * TODO - EXP_REDIR
186	 */
187	if (flag & EXP_FULL) {
188		ifsbreakup(p, &exparg);
189		*exparg.lastp = NULL;
190		exparg.lastp = &exparg.list;
191		expandmeta(exparg.list, flag);
192	} else {
193		if (flag & EXP_REDIR) /*XXX - for now, just remove escapes */
194			rmescapes(p);
195		sp = (struct strlist *)stalloc(sizeof (struct strlist));
196		sp->text = p;
197		*exparg.lastp = sp;
198		exparg.lastp = &sp->next;
199	}
200	while (ifsfirst.next != NULL) {
201		struct ifsregion *ifsp;
202		INTOFF;
203		ifsp = ifsfirst.next->next;
204		ckfree(ifsfirst.next);
205		ifsfirst.next = ifsp;
206		INTON;
207	}
208	*exparg.lastp = NULL;
209	if (exparg.list) {
210		*arglist->lastp = exparg.list;
211		arglist->lastp = exparg.lastp;
212	}
213}
214
215
216
217/*
218 * Perform parameter expansion, command substitution and arithmetic
219 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
220 * Processing ends at a CTLENDVAR character as well as '\0'.
221 * This is used to expand word in ${var+word} etc.
222 * If EXP_FULL, EXP_CASE or EXP_REDIR are set, keep and/or generate CTLESC
223 * characters to allow for further processing.
224 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
225 */
226static void
227argstr(char *p, int flag)
228{
229	char c;
230	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);	/* do CTLESC */
231	int firsteq = 1;
232	int split_lit;
233	int lit_quoted;
234
235	split_lit = flag & EXP_SPLIT_LIT;
236	lit_quoted = flag & EXP_LIT_QUOTED;
237	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
238	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
239		p = exptilde(p, flag);
240	for (;;) {
241		CHECKSTRSPACE(2, expdest);
242		switch (c = *p++) {
243		case '\0':
244		case CTLENDVAR:
245			goto breakloop;
246		case CTLQUOTEMARK:
247			lit_quoted = 1;
248			/* "$@" syntax adherence hack */
249			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
250				break;
251			if ((flag & EXP_FULL) != 0)
252				USTPUTC(c, expdest);
253			break;
254		case CTLQUOTEEND:
255			lit_quoted = 0;
256			break;
257		case CTLESC:
258			if (quotes)
259				USTPUTC(c, expdest);
260			c = *p++;
261			USTPUTC(c, expdest);
262			if (split_lit && !lit_quoted)
263				recordregion(expdest - stackblock() -
264				    (quotes ? 2 : 1),
265				    expdest - stackblock(), 0);
266			break;
267		case CTLVAR:
268			p = evalvar(p, flag);
269			break;
270		case CTLBACKQ:
271		case CTLBACKQ|CTLQUOTE:
272			expbackq(argbackq->n, c & CTLQUOTE, flag);
273			argbackq = argbackq->next;
274			break;
275		case CTLENDARI:
276			expari(flag);
277			break;
278		case ':':
279		case '=':
280			/*
281			 * sort of a hack - expand tildes in variable
282			 * assignments (after the first '=' and after ':'s).
283			 */
284			USTPUTC(c, expdest);
285			if (split_lit && !lit_quoted)
286				recordregion(expdest - stackblock() - 1,
287				    expdest - stackblock(), 0);
288			if (flag & EXP_VARTILDE && *p == '~' &&
289			    (c != '=' || firsteq)) {
290				if (c == '=')
291					firsteq = 0;
292				p = exptilde(p, flag);
293			}
294			break;
295		default:
296			USTPUTC(c, expdest);
297			if (split_lit && !lit_quoted)
298				recordregion(expdest - stackblock() - 1,
299				    expdest - stackblock(), 0);
300		}
301	}
302breakloop:;
303}
304
305/*
306 * Perform tilde expansion, placing the result in the stack string and
307 * returning the next position in the input string to process.
308 */
309static char *
310exptilde(char *p, int flag)
311{
312	char c, *startp = p;
313	struct passwd *pw;
314	char *home;
315	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
316
317	while ((c = *p) != '\0') {
318		switch(c) {
319		case CTLESC: /* This means CTL* are always considered quoted. */
320		case CTLVAR:
321		case CTLBACKQ:
322		case CTLBACKQ | CTLQUOTE:
323		case CTLARI:
324		case CTLENDARI:
325		case CTLQUOTEMARK:
326			return (startp);
327		case ':':
328			if (flag & EXP_VARTILDE)
329				goto done;
330			break;
331		case '/':
332		case CTLENDVAR:
333			goto done;
334		}
335		p++;
336	}
337done:
338	*p = '\0';
339	if (*(startp+1) == '\0') {
340		if ((home = lookupvar("HOME")) == NULL)
341			goto lose;
342	} else {
343		if ((pw = getpwnam(startp+1)) == NULL)
344			goto lose;
345		home = pw->pw_dir;
346	}
347	if (*home == '\0')
348		goto lose;
349	*p = c;
350	if (quotes)
351		STPUTS_QUOTES(home, SQSYNTAX, expdest);
352	else
353		STPUTS(home, expdest);
354	return (p);
355lose:
356	*p = c;
357	return (startp);
358}
359
360
361static void
362removerecordregions(int endoff)
363{
364	if (ifslastp == NULL)
365		return;
366
367	if (ifsfirst.endoff > endoff) {
368		while (ifsfirst.next != NULL) {
369			struct ifsregion *ifsp;
370			INTOFF;
371			ifsp = ifsfirst.next->next;
372			ckfree(ifsfirst.next);
373			ifsfirst.next = ifsp;
374			INTON;
375		}
376		if (ifsfirst.begoff > endoff)
377			ifslastp = NULL;
378		else {
379			ifslastp = &ifsfirst;
380			ifsfirst.endoff = endoff;
381		}
382		return;
383	}
384
385	ifslastp = &ifsfirst;
386	while (ifslastp->next && ifslastp->next->begoff < endoff)
387		ifslastp=ifslastp->next;
388	while (ifslastp->next != NULL) {
389		struct ifsregion *ifsp;
390		INTOFF;
391		ifsp = ifslastp->next->next;
392		ckfree(ifslastp->next);
393		ifslastp->next = ifsp;
394		INTON;
395	}
396	if (ifslastp->endoff > endoff)
397		ifslastp->endoff = endoff;
398}
399
400/*
401 * Expand arithmetic expression.  Backup to start of expression,
402 * evaluate, place result in (backed up) result, adjust string position.
403 */
404void
405expari(int flag)
406{
407	char *p, *q, *start;
408	arith_t result;
409	int begoff;
410	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
411	int quoted;
412
413	/*
414	 * This routine is slightly over-complicated for
415	 * efficiency.  First we make sure there is
416	 * enough space for the result, which may be bigger
417	 * than the expression.  Next we
418	 * scan backwards looking for the start of arithmetic.  If the
419	 * next previous character is a CTLESC character, then we
420	 * have to rescan starting from the beginning since CTLESC
421	 * characters have to be processed left to right.
422	 */
423	CHECKSTRSPACE(DIGITS(result) - 2, expdest);
424	USTPUTC('\0', expdest);
425	start = stackblock();
426	p = expdest - 2;
427	while (p >= start && *p != CTLARI)
428		--p;
429	if (p < start || *p != CTLARI)
430		error("missing CTLARI (shouldn't happen)");
431	if (p > start && *(p - 1) == CTLESC)
432		for (p = start; *p != CTLARI; p++)
433			if (*p == CTLESC)
434				p++;
435
436	if (p[1] == '"')
437		quoted=1;
438	else
439		quoted=0;
440	begoff = p - start;
441	removerecordregions(begoff);
442	if (quotes)
443		rmescapes(p+2);
444	q = grabstackstr(expdest);
445	result = arith(p+2);
446	ungrabstackstr(q, expdest);
447	fmtstr(p, DIGITS(result), ARITH_FORMAT_STR, result);
448	while (*p++)
449		;
450	if (quoted == 0)
451		recordregion(begoff, p - 1 - start, 0);
452	result = expdest - p + 1;
453	STADJUST(-result, expdest);
454}
455
456
457/*
458 * Perform command substitution.
459 */
460static void
461expbackq(union node *cmd, int quoted, int flag)
462{
463	struct backcmd in;
464	int i;
465	char buf[128];
466	char *p;
467	char *dest = expdest;
468	struct ifsregion saveifs, *savelastp;
469	struct nodelist *saveargbackq;
470	char lastc;
471	int startloc = dest - stackblock();
472	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
473	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
474	int nnl;
475
476	INTOFF;
477	saveifs = ifsfirst;
478	savelastp = ifslastp;
479	saveargbackq = argbackq;
480	p = grabstackstr(dest);
481	evalbackcmd(cmd, &in);
482	ungrabstackstr(p, dest);
483	ifsfirst = saveifs;
484	ifslastp = savelastp;
485	argbackq = saveargbackq;
486
487	p = in.buf;
488	lastc = '\0';
489	nnl = 0;
490	/* Don't copy trailing newlines */
491	for (;;) {
492		if (--in.nleft < 0) {
493			if (in.fd < 0)
494				break;
495			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
496			TRACE(("expbackq: read returns %d\n", i));
497			if (i <= 0)
498				break;
499			p = buf;
500			in.nleft = i - 1;
501		}
502		lastc = *p++;
503		if (lastc != '\0') {
504			if (lastc == '\n') {
505				nnl++;
506			} else {
507				CHECKSTRSPACE(nnl + 2, dest);
508				while (nnl > 0) {
509					nnl--;
510					USTPUTC('\n', dest);
511				}
512				if (quotes && syntax[(int)lastc] == CCTL)
513					USTPUTC(CTLESC, dest);
514				USTPUTC(lastc, dest);
515			}
516		}
517	}
518
519	if (in.fd >= 0)
520		close(in.fd);
521	if (in.buf)
522		ckfree(in.buf);
523	if (in.jp)
524		exitstatus = waitforjob(in.jp, (int *)NULL);
525	if (quoted == 0)
526		recordregion(startloc, dest - stackblock(), 0);
527	TRACE(("expbackq: size=%td: \"%.*s\"\n",
528		((dest - stackblock()) - startloc),
529		(int)((dest - stackblock()) - startloc),
530		stackblock() + startloc));
531	expdest = dest;
532	INTON;
533}
534
535
536
537static int
538subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
539  int varflags, int quotes)
540{
541	char *startp;
542	char *loc = NULL;
543	char *q;
544	int c = 0;
545	struct nodelist *saveargbackq = argbackq;
546	int amount;
547
548	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
549	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
550	    EXP_CASE : 0) | EXP_TILDE);
551	STACKSTRNUL(expdest);
552	argbackq = saveargbackq;
553	startp = stackblock() + startloc;
554	if (str == NULL)
555	    str = stackblock() + strloc;
556
557	switch (subtype) {
558	case VSASSIGN:
559		setvar(str, startp, 0);
560		amount = startp - expdest;
561		STADJUST(amount, expdest);
562		varflags &= ~VSNUL;
563		return 1;
564
565	case VSQUESTION:
566		if (*p != CTLENDVAR) {
567			outfmt(out2, "%s\n", startp);
568			error((char *)NULL);
569		}
570		error("%.*s: parameter %snot set", (int)(p - str - 1),
571		      str, (varflags & VSNUL) ? "null or "
572					      : nullstr);
573		return 0;
574
575	case VSTRIMLEFT:
576		for (loc = startp; loc < str; loc++) {
577			c = *loc;
578			*loc = '\0';
579			if (patmatch(str, startp, quotes)) {
580				*loc = c;
581				goto recordleft;
582			}
583			*loc = c;
584			if (quotes && *loc == CTLESC)
585				loc++;
586		}
587		return 0;
588
589	case VSTRIMLEFTMAX:
590		for (loc = str - 1; loc >= startp;) {
591			c = *loc;
592			*loc = '\0';
593			if (patmatch(str, startp, quotes)) {
594				*loc = c;
595				goto recordleft;
596			}
597			*loc = c;
598			loc--;
599			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
600				for (q = startp; q < loc; q++)
601					if (*q == CTLESC)
602						q++;
603				if (q > loc)
604					loc--;
605			}
606		}
607		return 0;
608
609	case VSTRIMRIGHT:
610		for (loc = str - 1; loc >= startp;) {
611			if (patmatch(str, loc, quotes)) {
612				amount = loc - expdest;
613				STADJUST(amount, expdest);
614				return 1;
615			}
616			loc--;
617			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
618				for (q = startp; q < loc; q++)
619					if (*q == CTLESC)
620						q++;
621				if (q > loc)
622					loc--;
623			}
624		}
625		return 0;
626
627	case VSTRIMRIGHTMAX:
628		for (loc = startp; loc < str - 1; loc++) {
629			if (patmatch(str, loc, quotes)) {
630				amount = loc - expdest;
631				STADJUST(amount, expdest);
632				return 1;
633			}
634			if (quotes && *loc == CTLESC)
635				loc++;
636		}
637		return 0;
638
639
640	default:
641		abort();
642	}
643
644recordleft:
645	amount = ((str - 1) - (loc - startp)) - expdest;
646	STADJUST(amount, expdest);
647	while (loc != str - 1)
648		*startp++ = *loc++;
649	return 1;
650}
651
652
653/*
654 * Expand a variable, and return a pointer to the next character in the
655 * input string.
656 */
657
658static char *
659evalvar(char *p, int flag)
660{
661	int subtype;
662	int varflags;
663	char *var;
664	char *val;
665	int patloc;
666	int c;
667	int set;
668	int special;
669	int startloc;
670	int varlen;
671	int varlenb;
672	int easy;
673	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
674
675	varflags = (unsigned char)*p++;
676	subtype = varflags & VSTYPE;
677	var = p;
678	special = 0;
679	if (! is_name(*p))
680		special = 1;
681	p = strchr(p, '=') + 1;
682again: /* jump here after setting a variable with ${var=text} */
683	if (varflags & VSLINENO) {
684		set = 1;
685		special = 0;
686		val = var;
687		p[-1] = '\0';	/* temporarily overwrite '=' to have \0
688				   terminated string */
689	} else if (special) {
690		set = varisset(var, varflags & VSNUL);
691		val = NULL;
692	} else {
693		val = bltinlookup(var, 1);
694		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
695			val = NULL;
696			set = 0;
697		} else
698			set = 1;
699	}
700	varlen = 0;
701	startloc = expdest - stackblock();
702	if (!set && uflag && *var != '@' && *var != '*') {
703		switch (subtype) {
704		case VSNORMAL:
705		case VSTRIMLEFT:
706		case VSTRIMLEFTMAX:
707		case VSTRIMRIGHT:
708		case VSTRIMRIGHTMAX:
709		case VSLENGTH:
710			error("%.*s: parameter not set", (int)(p - var - 1),
711			    var);
712		}
713	}
714	if (set && subtype != VSPLUS) {
715		/* insert the value of the variable */
716		if (special) {
717			varvalue(var, varflags & VSQUOTE, subtype, flag);
718			if (subtype == VSLENGTH) {
719				varlenb = expdest - stackblock() - startloc;
720				varlen = varlenb;
721				if (localeisutf8) {
722					val = stackblock() + startloc;
723					for (;val != expdest; val++)
724						if ((*val & 0xC0) == 0x80)
725							varlen--;
726				}
727				STADJUST(-varlenb, expdest);
728			}
729		} else {
730			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
731								  : BASESYNTAX;
732
733			if (subtype == VSLENGTH) {
734				for (;*val; val++)
735					if (!localeisutf8 ||
736					    (*val & 0xC0) != 0x80)
737						varlen++;
738			}
739			else {
740				if (quotes)
741					STPUTS_QUOTES(val, syntax, expdest);
742				else
743					STPUTS(val, expdest);
744
745			}
746		}
747	}
748
749	if (subtype == VSPLUS)
750		set = ! set;
751
752	easy = ((varflags & VSQUOTE) == 0 ||
753		(*var == '@' && shellparam.nparam != 1));
754
755
756	switch (subtype) {
757	case VSLENGTH:
758		expdest = cvtnum(varlen, expdest);
759		goto record;
760
761	case VSNORMAL:
762		if (!easy)
763			break;
764record:
765		recordregion(startloc, expdest - stackblock(),
766		    varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
767		    (*var == '@' || *var == '*')));
768		break;
769
770	case VSPLUS:
771	case VSMINUS:
772		if (!set) {
773			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
774			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
775			break;
776		}
777		if (easy)
778			goto record;
779		break;
780
781	case VSTRIMLEFT:
782	case VSTRIMLEFTMAX:
783	case VSTRIMRIGHT:
784	case VSTRIMRIGHTMAX:
785		if (!set)
786			break;
787		/*
788		 * Terminate the string and start recording the pattern
789		 * right after it
790		 */
791		STPUTC('\0', expdest);
792		patloc = expdest - stackblock();
793		if (subevalvar(p, NULL, patloc, subtype,
794		    startloc, varflags, quotes) == 0) {
795			int amount = (expdest - stackblock() - patloc) + 1;
796			STADJUST(-amount, expdest);
797		}
798		/* Remove any recorded regions beyond start of variable */
799		removerecordregions(startloc);
800		goto record;
801
802	case VSASSIGN:
803	case VSQUESTION:
804		if (!set) {
805			if (subevalvar(p, var, 0, subtype, startloc, varflags,
806			    quotes)) {
807				varflags &= ~VSNUL;
808				/*
809				 * Remove any recorded regions beyond
810				 * start of variable
811				 */
812				removerecordregions(startloc);
813				goto again;
814			}
815			break;
816		}
817		if (easy)
818			goto record;
819		break;
820
821	case VSERROR:
822		c = p - var - 1;
823		error("${%.*s%s}: Bad substitution", c, var,
824		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
825
826	default:
827		abort();
828	}
829	p[-1] = '=';	/* recover overwritten '=' */
830
831	if (subtype != VSNORMAL) {	/* skip to end of alternative */
832		int nesting = 1;
833		for (;;) {
834			if ((c = *p++) == CTLESC)
835				p++;
836			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
837				if (set)
838					argbackq = argbackq->next;
839			} else if (c == CTLVAR) {
840				if ((*p++ & VSTYPE) != VSNORMAL)
841					nesting++;
842			} else if (c == CTLENDVAR) {
843				if (--nesting == 0)
844					break;
845			}
846		}
847	}
848	return p;
849}
850
851
852
853/*
854 * Test whether a specialized variable is set.
855 */
856
857static int
858varisset(char *name, int nulok)
859{
860
861	if (*name == '!')
862		return backgndpidset();
863	else if (*name == '@' || *name == '*') {
864		if (*shellparam.p == NULL)
865			return 0;
866
867		if (nulok) {
868			char **av;
869
870			for (av = shellparam.p; *av; av++)
871				if (**av != '\0')
872					return 1;
873			return 0;
874		}
875	} else if (is_digit(*name)) {
876		char *ap;
877		int num = atoi(name);
878
879		if (num > shellparam.nparam)
880			return 0;
881
882		if (num == 0)
883			ap = arg0;
884		else
885			ap = shellparam.p[num - 1];
886
887		if (nulok && (ap == NULL || *ap == '\0'))
888			return 0;
889	}
890	return 1;
891}
892
893static void
894strtodest(const char *p, int flag, int subtype, int quoted)
895{
896	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
897		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
898	else
899		STPUTS(p, expdest);
900}
901
902/*
903 * Add the value of a specialized variable to the stack string.
904 */
905
906static void
907varvalue(char *name, int quoted, int subtype, int flag)
908{
909	int num;
910	char *p;
911	int i;
912	char sep;
913	char **ap;
914
915	switch (*name) {
916	case '$':
917		num = rootpid;
918		goto numvar;
919	case '?':
920		num = oexitstatus;
921		goto numvar;
922	case '#':
923		num = shellparam.nparam;
924		goto numvar;
925	case '!':
926		num = backgndpidval();
927numvar:
928		expdest = cvtnum(num, expdest);
929		break;
930	case '-':
931		for (i = 0 ; i < NOPTS ; i++) {
932			if (optlist[i].val)
933				STPUTC(optlist[i].letter, expdest);
934		}
935		break;
936	case '@':
937		if (flag & EXP_FULL && quoted) {
938			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
939				strtodest(p, flag, subtype, quoted);
940				if (*ap)
941					STPUTC('\0', expdest);
942			}
943			break;
944		}
945		/* FALLTHROUGH */
946	case '*':
947		if (ifsset())
948			sep = ifsval()[0];
949		else
950			sep = ' ';
951		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
952			strtodest(p, flag, subtype, quoted);
953			if (!*ap)
954				break;
955			if (sep || (flag & EXP_FULL && !quoted && **ap != '\0'))
956				STPUTC(sep, expdest);
957		}
958		break;
959	case '0':
960		p = arg0;
961		strtodest(p, flag, subtype, quoted);
962		break;
963	default:
964		if (is_digit(*name)) {
965			num = atoi(name);
966			if (num > 0 && num <= shellparam.nparam) {
967				p = shellparam.p[num - 1];
968				strtodest(p, flag, subtype, quoted);
969			}
970		}
971		break;
972	}
973}
974
975
976
977/*
978 * Record the fact that we have to scan this region of the
979 * string for IFS characters.
980 */
981
982static void
983recordregion(int start, int end, int inquotes)
984{
985	struct ifsregion *ifsp;
986
987	if (ifslastp == NULL) {
988		ifsp = &ifsfirst;
989	} else {
990		if (ifslastp->endoff == start
991		    && ifslastp->inquotes == inquotes) {
992			/* extend previous area */
993			ifslastp->endoff = end;
994			return;
995		}
996		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
997		ifslastp->next = ifsp;
998	}
999	ifslastp = ifsp;
1000	ifslastp->next = NULL;
1001	ifslastp->begoff = start;
1002	ifslastp->endoff = end;
1003	ifslastp->inquotes = inquotes;
1004}
1005
1006
1007
1008/*
1009 * Break the argument string into pieces based upon IFS and add the
1010 * strings to the argument list.  The regions of the string to be
1011 * searched for IFS characters have been stored by recordregion.
1012 * CTLESC characters are preserved but have little effect in this pass
1013 * other than escaping CTL* characters.  In particular, they do not escape
1014 * IFS characters: that should be done with the ifsregion mechanism.
1015 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
1016 * This pass treats them as a regular character, making the string non-empty.
1017 * Later, they are removed along with the other CTL* characters.
1018 */
1019static void
1020ifsbreakup(char *string, struct arglist *arglist)
1021{
1022	struct ifsregion *ifsp;
1023	struct strlist *sp;
1024	char *start;
1025	char *p;
1026	char *q;
1027	const char *ifs;
1028	const char *ifsspc;
1029	int had_param_ch = 0;
1030
1031	start = string;
1032
1033	if (ifslastp == NULL) {
1034		/* Return entire argument, IFS doesn't apply to any of it */
1035		sp = (struct strlist *)stalloc(sizeof *sp);
1036		sp->text = start;
1037		*arglist->lastp = sp;
1038		arglist->lastp = &sp->next;
1039		return;
1040	}
1041
1042	ifs = ifsset() ? ifsval() : " \t\n";
1043
1044	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1045		p = string + ifsp->begoff;
1046		while (p < string + ifsp->endoff) {
1047			q = p;
1048			if (*p == CTLESC)
1049				p++;
1050			if (ifsp->inquotes) {
1051				/* Only NULs (should be from "$@") end args */
1052				had_param_ch = 1;
1053				if (*p != 0) {
1054					p++;
1055					continue;
1056				}
1057				ifsspc = NULL;
1058			} else {
1059				if (!strchr(ifs, *p)) {
1060					had_param_ch = 1;
1061					p++;
1062					continue;
1063				}
1064				ifsspc = strchr(" \t\n", *p);
1065
1066				/* Ignore IFS whitespace at start */
1067				if (q == start && ifsspc != NULL) {
1068					p++;
1069					start = p;
1070					continue;
1071				}
1072				had_param_ch = 0;
1073			}
1074
1075			/* Save this argument... */
1076			*q = '\0';
1077			sp = (struct strlist *)stalloc(sizeof *sp);
1078			sp->text = start;
1079			*arglist->lastp = sp;
1080			arglist->lastp = &sp->next;
1081			p++;
1082
1083			if (ifsspc != NULL) {
1084				/* Ignore further trailing IFS whitespace */
1085				for (; p < string + ifsp->endoff; p++) {
1086					q = p;
1087					if (*p == CTLESC)
1088						p++;
1089					if (strchr(ifs, *p) == NULL) {
1090						p = q;
1091						break;
1092					}
1093					if (strchr(" \t\n", *p) == NULL) {
1094						p++;
1095						break;
1096					}
1097				}
1098			}
1099			start = p;
1100		}
1101	}
1102
1103	/*
1104	 * Save anything left as an argument.
1105	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1106	 * generating 2 arguments, the second of which is empty.
1107	 * Some recent clarification of the Posix spec say that it
1108	 * should only generate one....
1109	 */
1110	if (had_param_ch || *start != 0) {
1111		sp = (struct strlist *)stalloc(sizeof *sp);
1112		sp->text = start;
1113		*arglist->lastp = sp;
1114		arglist->lastp = &sp->next;
1115	}
1116}
1117
1118
1119static char expdir[PATH_MAX];
1120#define expdir_end (expdir + sizeof(expdir))
1121
1122/*
1123 * Perform pathname generation and remove control characters.
1124 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1125 * The results are stored in the list exparg.
1126 */
1127static void
1128expandmeta(struct strlist *str, int flag __unused)
1129{
1130	char *p;
1131	struct strlist **savelastp;
1132	struct strlist *sp;
1133	char c;
1134	/* TODO - EXP_REDIR */
1135
1136	while (str) {
1137		if (fflag)
1138			goto nometa;
1139		p = str->text;
1140		for (;;) {			/* fast check for meta chars */
1141			if ((c = *p++) == '\0')
1142				goto nometa;
1143			if (c == '*' || c == '?' || c == '[')
1144				break;
1145		}
1146		savelastp = exparg.lastp;
1147		INTOFF;
1148		expmeta(expdir, str->text);
1149		INTON;
1150		if (exparg.lastp == savelastp) {
1151			/*
1152			 * no matches
1153			 */
1154nometa:
1155			*exparg.lastp = str;
1156			rmescapes(str->text);
1157			exparg.lastp = &str->next;
1158		} else {
1159			*exparg.lastp = NULL;
1160			*savelastp = sp = expsort(*savelastp);
1161			while (sp->next != NULL)
1162				sp = sp->next;
1163			exparg.lastp = &sp->next;
1164		}
1165		str = str->next;
1166	}
1167}
1168
1169
1170/*
1171 * Do metacharacter (i.e. *, ?, [...]) expansion.
1172 */
1173
1174static void
1175expmeta(char *enddir, char *name)
1176{
1177	char *p;
1178	char *q;
1179	char *start;
1180	char *endname;
1181	int metaflag;
1182	struct stat statb;
1183	DIR *dirp;
1184	struct dirent *dp;
1185	int atend;
1186	int matchdot;
1187	int esc;
1188
1189	metaflag = 0;
1190	start = name;
1191	for (p = name; esc = 0, *p; p += esc + 1) {
1192		if (*p == '*' || *p == '?')
1193			metaflag = 1;
1194		else if (*p == '[') {
1195			q = p + 1;
1196			if (*q == '!' || *q == '^')
1197				q++;
1198			for (;;) {
1199				while (*q == CTLQUOTEMARK)
1200					q++;
1201				if (*q == CTLESC)
1202					q++;
1203				if (*q == '/' || *q == '\0')
1204					break;
1205				if (*++q == ']') {
1206					metaflag = 1;
1207					break;
1208				}
1209			}
1210		} else if (*p == '\0')
1211			break;
1212		else if (*p == CTLQUOTEMARK)
1213			continue;
1214		else {
1215			if (*p == CTLESC)
1216				esc++;
1217			if (p[esc] == '/') {
1218				if (metaflag)
1219					break;
1220				start = p + esc + 1;
1221			}
1222		}
1223	}
1224	if (metaflag == 0) {	/* we've reached the end of the file name */
1225		if (enddir != expdir)
1226			metaflag++;
1227		for (p = name ; ; p++) {
1228			if (*p == CTLQUOTEMARK)
1229				continue;
1230			if (*p == CTLESC)
1231				p++;
1232			*enddir++ = *p;
1233			if (*p == '\0')
1234				break;
1235			if (enddir == expdir_end)
1236				return;
1237		}
1238		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1239			addfname(expdir);
1240		return;
1241	}
1242	endname = p;
1243	if (start != name) {
1244		p = name;
1245		while (p < start) {
1246			while (*p == CTLQUOTEMARK)
1247				p++;
1248			if (*p == CTLESC)
1249				p++;
1250			*enddir++ = *p++;
1251			if (enddir == expdir_end)
1252				return;
1253		}
1254	}
1255	if (enddir == expdir) {
1256		p = ".";
1257	} else if (enddir == expdir + 1 && *expdir == '/') {
1258		p = "/";
1259	} else {
1260		p = expdir;
1261		enddir[-1] = '\0';
1262	}
1263	if ((dirp = opendir(p)) == NULL)
1264		return;
1265	if (enddir != expdir)
1266		enddir[-1] = '/';
1267	if (*endname == 0) {
1268		atend = 1;
1269	} else {
1270		atend = 0;
1271		*endname = '\0';
1272		endname += esc + 1;
1273	}
1274	matchdot = 0;
1275	p = start;
1276	while (*p == CTLQUOTEMARK)
1277		p++;
1278	if (*p == CTLESC)
1279		p++;
1280	if (*p == '.')
1281		matchdot++;
1282	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1283		if (dp->d_name[0] == '.' && ! matchdot)
1284			continue;
1285		if (patmatch(start, dp->d_name, 0)) {
1286			if (enddir + dp->d_namlen + 1 > expdir_end)
1287				continue;
1288			memcpy(enddir, dp->d_name, dp->d_namlen + 1);
1289			if (atend)
1290				addfname(expdir);
1291			else {
1292				if (enddir + dp->d_namlen + 2 > expdir_end)
1293					continue;
1294				enddir[dp->d_namlen] = '/';
1295				enddir[dp->d_namlen + 1] = '\0';
1296				expmeta(enddir + dp->d_namlen + 1, endname);
1297			}
1298		}
1299	}
1300	closedir(dirp);
1301	if (! atend)
1302		endname[-esc - 1] = esc ? CTLESC : '/';
1303}
1304
1305
1306/*
1307 * Add a file name to the list.
1308 */
1309
1310static void
1311addfname(char *name)
1312{
1313	char *p;
1314	struct strlist *sp;
1315
1316	p = stalloc(strlen(name) + 1);
1317	scopy(name, p);
1318	sp = (struct strlist *)stalloc(sizeof *sp);
1319	sp->text = p;
1320	*exparg.lastp = sp;
1321	exparg.lastp = &sp->next;
1322}
1323
1324
1325/*
1326 * Sort the results of file name expansion.  It calculates the number of
1327 * strings to sort and then calls msort (short for merge sort) to do the
1328 * work.
1329 */
1330
1331static struct strlist *
1332expsort(struct strlist *str)
1333{
1334	int len;
1335	struct strlist *sp;
1336
1337	len = 0;
1338	for (sp = str ; sp ; sp = sp->next)
1339		len++;
1340	return msort(str, len);
1341}
1342
1343
1344static struct strlist *
1345msort(struct strlist *list, int len)
1346{
1347	struct strlist *p, *q = NULL;
1348	struct strlist **lpp;
1349	int half;
1350	int n;
1351
1352	if (len <= 1)
1353		return list;
1354	half = len >> 1;
1355	p = list;
1356	for (n = half ; --n >= 0 ; ) {
1357		q = p;
1358		p = p->next;
1359	}
1360	q->next = NULL;			/* terminate first half of list */
1361	q = msort(list, half);		/* sort first half of list */
1362	p = msort(p, len - half);		/* sort second half */
1363	lpp = &list;
1364	for (;;) {
1365		if (strcmp(p->text, q->text) < 0) {
1366			*lpp = p;
1367			lpp = &p->next;
1368			if ((p = *lpp) == NULL) {
1369				*lpp = q;
1370				break;
1371			}
1372		} else {
1373			*lpp = q;
1374			lpp = &q->next;
1375			if ((q = *lpp) == NULL) {
1376				*lpp = p;
1377				break;
1378			}
1379		}
1380	}
1381	return list;
1382}
1383
1384
1385
1386static wchar_t
1387get_wc(const char **p)
1388{
1389	wchar_t c;
1390	int chrlen;
1391
1392	chrlen = mbtowc(&c, *p, 4);
1393	if (chrlen == 0)
1394		return 0;
1395	else if (chrlen == -1)
1396		c = 0;
1397	else
1398		*p += chrlen;
1399	return c;
1400}
1401
1402
1403/*
1404 * Returns true if the pattern matches the string.
1405 */
1406
1407int
1408patmatch(const char *pattern, const char *string, int squoted)
1409{
1410	const char *p, *q;
1411	char c;
1412	wchar_t wc, wc2;
1413
1414	p = pattern;
1415	q = string;
1416	for (;;) {
1417		switch (c = *p++) {
1418		case '\0':
1419			goto breakloop;
1420		case CTLESC:
1421			if (squoted && *q == CTLESC)
1422				q++;
1423			if (*q++ != *p++)
1424				return 0;
1425			break;
1426		case CTLQUOTEMARK:
1427			continue;
1428		case '?':
1429			if (squoted && *q == CTLESC)
1430				q++;
1431			if (localeisutf8)
1432				wc = get_wc(&q);
1433			else
1434				wc = (unsigned char)*q++;
1435			if (wc == '\0')
1436				return 0;
1437			break;
1438		case '*':
1439			c = *p;
1440			while (c == CTLQUOTEMARK || c == '*')
1441				c = *++p;
1442			if (c != CTLESC &&  c != CTLQUOTEMARK &&
1443			    c != '?' && c != '*' && c != '[') {
1444				while (*q != c) {
1445					if (squoted && *q == CTLESC &&
1446					    q[1] == c)
1447						break;
1448					if (*q == '\0')
1449						return 0;
1450					if (squoted && *q == CTLESC)
1451						q++;
1452					q++;
1453				}
1454			}
1455			do {
1456				if (patmatch(p, q, squoted))
1457					return 1;
1458				if (squoted && *q == CTLESC)
1459					q++;
1460			} while (*q++ != '\0');
1461			return 0;
1462		case '[': {
1463			const char *endp;
1464			int invert, found;
1465			wchar_t chr;
1466
1467			endp = p;
1468			if (*endp == '!' || *endp == '^')
1469				endp++;
1470			for (;;) {
1471				while (*endp == CTLQUOTEMARK)
1472					endp++;
1473				if (*endp == '\0')
1474					goto dft;		/* no matching ] */
1475				if (*endp == CTLESC)
1476					endp++;
1477				if (*++endp == ']')
1478					break;
1479			}
1480			invert = 0;
1481			if (*p == '!' || *p == '^') {
1482				invert++;
1483				p++;
1484			}
1485			found = 0;
1486			if (squoted && *q == CTLESC)
1487				q++;
1488			if (localeisutf8)
1489				chr = get_wc(&q);
1490			else
1491				chr = (unsigned char)*q++;
1492			if (chr == '\0')
1493				return 0;
1494			c = *p++;
1495			do {
1496				if (c == CTLQUOTEMARK)
1497					continue;
1498				if (c == CTLESC)
1499					c = *p++;
1500				if (localeisutf8 && c & 0x80) {
1501					p--;
1502					wc = get_wc(&p);
1503					if (wc == 0) /* bad utf-8 */
1504						return 0;
1505				} else
1506					wc = (unsigned char)c;
1507				if (*p == '-' && p[1] != ']') {
1508					p++;
1509					while (*p == CTLQUOTEMARK)
1510						p++;
1511					if (*p == CTLESC)
1512						p++;
1513					if (localeisutf8) {
1514						wc2 = get_wc(&p);
1515						if (wc2 == 0) /* bad utf-8 */
1516							return 0;
1517					} else
1518						wc2 = (unsigned char)*p++;
1519					if (   collate_range_cmp(chr, wc) >= 0
1520					    && collate_range_cmp(chr, wc2) <= 0
1521					   )
1522						found = 1;
1523				} else {
1524					if (chr == wc)
1525						found = 1;
1526				}
1527			} while ((c = *p++) != ']');
1528			if (found == invert)
1529				return 0;
1530			break;
1531		}
1532dft:	        default:
1533			if (squoted && *q == CTLESC)
1534				q++;
1535			if (*q++ != c)
1536				return 0;
1537			break;
1538		}
1539	}
1540breakloop:
1541	if (*q != '\0')
1542		return 0;
1543	return 1;
1544}
1545
1546
1547
1548/*
1549 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1550 */
1551
1552void
1553rmescapes(char *str)
1554{
1555	char *p, *q;
1556
1557	p = str;
1558	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1559		if (*p++ == '\0')
1560			return;
1561	}
1562	q = p;
1563	while (*p) {
1564		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1565			p++;
1566			continue;
1567		}
1568		if (*p == CTLESC)
1569			p++;
1570		*q++ = *p++;
1571	}
1572	*q = '\0';
1573}
1574
1575
1576
1577/*
1578 * See if a pattern matches in a case statement.
1579 */
1580
1581int
1582casematch(union node *pattern, const char *val)
1583{
1584	struct stackmark smark;
1585	int result;
1586	char *p;
1587
1588	setstackmark(&smark);
1589	argbackq = pattern->narg.backquote;
1590	STARTSTACKSTR(expdest);
1591	ifslastp = NULL;
1592	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1593	STPUTC('\0', expdest);
1594	p = grabstackstr(expdest);
1595	result = patmatch(p, val, 0);
1596	popstackmark(&smark);
1597	return result;
1598}
1599
1600/*
1601 * Our own itoa().
1602 */
1603
1604static char *
1605cvtnum(int num, char *buf)
1606{
1607	char temp[32];
1608	int neg = num < 0;
1609	char *p = temp + 31;
1610
1611	temp[31] = '\0';
1612
1613	do {
1614		*--p = num % 10 + '0';
1615	} while ((num /= 10) != 0);
1616
1617	if (neg)
1618		*--p = '-';
1619
1620	STPUTS(p, buf);
1621	return buf;
1622}
1623
1624/*
1625 * Do most of the work for wordexp(3).
1626 */
1627
1628int
1629wordexpcmd(int argc, char **argv)
1630{
1631	size_t len;
1632	int i;
1633
1634	out1fmt("%08x", argc - 1);
1635	for (i = 1, len = 0; i < argc; i++)
1636		len += strlen(argv[i]);
1637	out1fmt("%08x", (int)len);
1638	for (i = 1; i < argc; i++)
1639		outbin(argv[i], strlen(argv[i]) + 1, out1);
1640        return (0);
1641}
1642