envopts.c revision 147493
1/*-
2 * Copyright (c) 2005  - Garance Alistair Drosehn <gad@FreeBSD.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * The views and conclusions contained in the software and documentation
27 * are those of the authors and should not be interpreted as representing
28 * official policies, either expressed or implied, of the FreeBSD Project.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/usr.bin/env/envopts.c 147493 2005-06-20 03:43:25Z gad $");
33
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <err.h>
37#include <errno.h>
38#include <ctype.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43
44#include "envopts.h"
45
46static void	 expand_vars(char **thisarg_p, char **dest_p, const char
47		    **src_p);
48static int	 is_there(char *candidate);
49
50/*
51 * The is*() routines take a parameter of 'int', but expect values in the range
52 * of unsigned char.  Define some wrappers which take a value of type 'char',
53 * whether signed or unsigned, and ensure the value ends up in the right range.
54 */
55#define	isalnumch(Anychar) isalnum((u_char)(Anychar))
56#define	isalphach(Anychar) isalpha((u_char)(Anychar))
57#define	isspacech(Anychar) isspace((u_char)(Anychar))
58
59/*
60 * Routine to determine if a given fully-qualified filename is executable.
61 * This is copied almost verbatim from FreeBSD's usr.bin/which/which.c.
62 */
63static int
64is_there(char *candidate)
65{
66        struct stat fin;
67
68        /* XXX work around access(2) false positives for superuser */
69        if (access(candidate, X_OK) == 0 &&
70            stat(candidate, &fin) == 0 &&
71            S_ISREG(fin.st_mode) &&
72            (getuid() != 0 ||
73            (fin.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)) {
74                if (env_verbosity > 1)
75			fprintf(stderr, "#env   matched:\t'%s'\n", candidate);
76                return (1);
77        }
78        return (0);
79}
80
81/**
82 * Routine to search through an alternate path-list, looking for a given
83 * filename to execute.  If the file is found, replace the original
84 * unqualified name with a fully-qualified path.  This allows `env' to
85 * execute programs from a specific strict list of possible paths, without
86 * changing the value of PATH seen by the program which will be executed.
87 * E.G.:
88 *	#!/usr/bin/env -S-P/usr/local/bin:/usr/bin perl
89 * will execute /usr/local/bin/perl or /usr/bin/perl (whichever is found
90 * first), no matter what the current value of PATH is, and without
91 * changing the value of PATH that the script will see when it runs.
92 *
93 * This is similar to the print_matches() routine in usr.bin/which/which.c.
94 */
95void
96search_paths(char *path, char **argv)
97{
98        char candidate[PATH_MAX];
99        const char *d;
100	char *filename, *fqname;
101
102	/* If the file has a `/' in it, then no search is done */
103	filename = *argv;
104	if (strchr(filename, '/') != NULL)
105		return;
106
107	if (env_verbosity > 1) {
108		fprintf(stderr, "#env Searching:\t'%s'\n", path);
109		fprintf(stderr, "#env  for file:\t'%s'\n", filename);
110	}
111
112	fqname = NULL;
113        while ((d = strsep(&path, ":")) != NULL) {
114                if (*d == '\0')
115                        d = ".";
116                if (snprintf(candidate, sizeof(candidate), "%s/%s", d,
117                    filename) >= (int)sizeof(candidate))
118                        continue;
119                if (is_there(candidate)) {
120                        fqname = candidate;
121			break;
122                }
123        }
124
125	if (fqname == NULL) {
126		errno = ENOENT;
127		err(127, "%s", filename);
128	}
129	*argv = strdup(candidate);
130}
131
132/**
133 * Routine to split a string into multiple parameters, while recognizing a
134 * few special characters.  It recognizes both single and double-quoted
135 * strings.  This processing is designed entirely for the benefit of the
136 * parsing of "#!"-lines (aka "shebang" lines == the first line of an
137 * executable script).  Different operating systems parse that line in very
138 * different ways, and this split-on-spaces processing is meant to provide
139 * ways to specify arbitrary arguments on that line, no matter how the OS
140 * parses it.
141 *
142 * Within a single-quoted string, the two characters "\'" are treated as
143 * a literal "'" character to add to the string, and "\\" are treated as
144 * a literal "\" character to add.  Other than that, all characters are
145 * copied until the processing gets to a terminating "'".
146 *
147 * Within a double-quoted string, many more "\"-style escape sequences
148 * are recognized, mostly copied from what is recognized in the `printf'
149 * command.  Some OS's will not allow a literal blank character to be
150 * included in the one argument that they recognize on a shebang-line,
151 * so a few additional escape-sequences are defined to provide ways to
152 * specify blanks.
153 *
154 * Within a double-quoted string "\_" is turned into a literal blank.
155 * (Inside of a single-quoted string, the two characters are just copied)
156 * Outside of a quoted string, "\_" is treated as both a blank, and the
157 * end of the current argument.  So with a shelbang-line of:
158 *		#!/usr/bin/env -SA=avalue\_perl
159 * the -S value would be broken up into arguments "A=avalue" and "perl".
160 */
161void
162split_spaces(const char *str, int *origind, int *origc, char ***origv)
163{
164	const char *bq_src, *src;
165	char *dest, **newargv, *newstr, **nextarg, **oldarg;
166	int addcount, bq_destlen, copychar, found_sep, in_arg, in_dq, in_sq;
167
168	/*
169	 * Ignore leading space on the string, and then malloc enough room
170	 * to build a copy of it.  The copy might end up shorter than the
171	 * original, due to quoted strings and '\'-processing.
172	 */
173	while (isspacech(*str))
174		str++;
175	if (*str == '\0')
176		return;
177	newstr = malloc(strlen(str) + 1);
178
179	/*
180	 * Allocate plenty of space for the new array of arg-pointers,
181	 * and start that array off with the first element of the old
182	 * array.
183	 */
184	newargv = malloc((*origc + (strlen(str) / 2) + 2) * sizeof(char *));
185	nextarg = newargv;
186	*nextarg++ = **origv;
187
188	/* Come up with the new args by splitting up the given string. */
189	addcount = 0;
190	bq_destlen = in_arg = in_dq = in_sq = 0;
191	bq_src = NULL;
192	for (src = str, dest = newstr; *src != '\0'; src++) {
193		copychar = found_sep = 0;
194		switch (*src) {
195		case '"':
196			if (in_sq)
197				copychar = *src;
198			else if (in_dq)
199				in_dq = 0;
200			else {
201				in_dq = 1;
202				bq_destlen = dest - *(nextarg - 1);
203				bq_src = src;
204			}
205			break;
206		case '$':
207			if (in_sq)
208				copychar = *src;
209			else {
210				expand_vars((nextarg - 1), &dest, &src);
211			}
212			break;
213		case '\'':
214			if (in_dq)
215				copychar = *src;
216			else if (in_sq)
217				in_sq = 0;
218			else {
219				in_sq = 1;
220				bq_destlen = dest - *(nextarg - 1);
221				bq_src = src;
222			}
223			break;
224		case '\\':
225			if (in_sq) {
226				/*
227				 * Inside single-quoted strings, only the
228				 * "\'" and "\\" are recognized as special
229				 * strings.
230				 */
231				copychar = *(src + 1);
232				if (copychar == '\'' || copychar == '\\')
233					src++;
234				else
235					copychar = *src;
236				break;
237			}
238			src++;
239			switch (*src) {
240			case '"':
241			case '#':
242			case '$':
243			case '\'':
244			case '\\':
245				copychar = *src;
246				break;
247			case '_':
248				/*
249				 * Alternate way to get a blank, which allows
250				 * that blank be used to separate arguments
251				 * when it is not inside a quoted string.
252				 */
253				if (in_dq)
254					copychar = ' ';
255				else {
256					found_sep = 1;
257					src++;
258				}
259				break;
260			case 'c':
261				/*
262				 * Ignore remaining characters in the -S string.
263				 * This would not make sense if found in the
264				 * middle of a quoted string.
265				 */
266				if (in_dq)
267					errx(1, "Sequence '\\%c' is not allowed"
268					    " in quoted strings", *src);
269				goto str_done;
270			case 'f':
271				copychar = '\f';
272				break;
273			case 'n':
274				copychar = '\n';
275				break;
276			case 'r':
277				copychar = '\r';
278				break;
279			case 't':
280				copychar = '\t';
281				break;
282			case 'v':
283				copychar = '\v';
284				break;
285			default:
286				if (isspacech(*src))
287					copychar = *src;
288				else
289					errx(1, "Invalid sequence '\\%c' in -S",
290					    *src);
291			}
292			break;
293		default:
294			if ((in_dq || in_sq) && in_arg)
295				copychar = *src;
296			else if (in_arg && isspacech(*src))
297				found_sep = 1;
298			else {
299				/*
300				 * If the first character of a new argument
301				 * is `#', then ignore the remaining chars.
302				 */
303				if (!in_arg && *src == '#')
304					goto str_done;
305				copychar = *src;
306			}
307		}
308		if (copychar) {
309			if (!in_arg) {
310				/* This is the first byte of a new argument */
311				*nextarg++ = dest;
312				addcount++;
313				in_arg = 1;
314			}
315			*dest++ = (char)copychar;
316		} else if (found_sep) {
317			*dest++ = '\0';
318			while (isspacech(*src))
319				src++;
320			--src;
321			in_arg = 0;
322		}
323	}
324str_done:
325	*dest = '\0';
326	*nextarg = NULL;
327	if (in_dq || in_sq) {
328		errx(1, "No terminating quote for string: %.*s%s",
329		    bq_destlen, *(nextarg - 1), bq_src);
330	}
331	if (env_verbosity > 1) {
332		fprintf(stderr, "#env  split -S:\t'%s'\n", str);
333		oldarg = newargv + 1;
334		fprintf(stderr, "#env      into:\t'%s'\n", *oldarg);
335		for (oldarg++; *oldarg; oldarg++)
336			fprintf(stderr, "#env          &\t'%s'\n", *oldarg);
337	}
338
339	/* Copy the unprocessed arg-pointers from the original array */
340	for (oldarg = *origv + *origind; *oldarg; oldarg++)
341		*nextarg++ = *oldarg;
342	*nextarg = NULL;
343
344	/* Update optind/argc/argv in the calling routine */
345	*origind = 1;
346	*origc += addcount;
347	*origv = newargv;
348}
349
350/**
351 * Routine to split expand any environment variables referenced in the string
352 * that -S is processing.  For now it only supports the form ${VARNAME}.  It
353 * explicitly does not support $VARNAME, and obviously can not handle special
354 * shell-variables such as $?, $*, $1, etc.  It is called with *src_p pointing
355 * at the initial '$', and if successful it will update *src_p, *dest_p, and
356 * possibly *thisarg_p in the calling routine.
357 */
358void
359expand_vars(char **thisarg_p, char **dest_p, const char **src_p)
360{
361	const char *vbegin, *vend, *vvalue;
362	char *edest, *newstr, *vname;
363	int bad_reference;
364	size_t namelen, newlen;
365
366	bad_reference = 1;
367	vbegin = vend = (*src_p) + 1;
368	if (*vbegin++ == '{')
369		if (*vbegin == '_' || isalphach(*vbegin)) {
370			vend = vbegin + 1;
371			while (*vend == '_' || isalnumch(*vend))
372				vend++;
373			if (*vend == '}')
374				bad_reference = 0;
375		}
376	if (bad_reference)
377		errx(1, "Only ${VARNAME} expansion is supported, error at: %s",
378		    *src_p);
379
380	/*
381	 * We now know we have a valid environment variable name, so update
382	 * the caller's source-pointer to the last character in that reference,
383	 * and then pick up the matching value.  If the variable is not found,
384	 * or if it has a null value, then our work here is done.
385	 */
386	*src_p = vend;
387	namelen = vend - vbegin + 1;
388	vname = malloc(namelen);
389	strlcpy(vname, vbegin, namelen);
390	vvalue = getenv(vname);
391	if (vvalue == NULL || *vvalue == '\0') {
392		if (env_verbosity > 2)
393			fprintf(stderr,
394			    "#env  replacing ${%s} with null string\n",
395			    vname);
396		return;
397	}
398
399	if (env_verbosity > 2)
400		fprintf(stderr, "#env  expanding ${%s} into '%s'\n", vname,
401		    vvalue);
402
403	/*
404	 * There is some value to copy to the destination.  If the value is
405	 * shorter than the ${VARNAME} reference that it replaces, then we
406	 * can just copy the value to the existing destination.
407	 */
408	edest = *dest_p;
409	if (strlen(vname) + 3 >= strlen(vvalue)) {
410		while (*vvalue != '\0')
411		    *edest++ = *vvalue++;
412		*dest_p = edest;
413		return;
414	}
415
416	/*
417	 * The value is longer than the string it replaces, which means the
418	 * present destination area is too small to hold it.  Create a new
419	 * destination area, copy the present 'thisarg' value and the value
420	 * of the referenced-variable to it, and then update the caller's
421	 * 'thisarg' and 'dest' variables to match.
422	 */
423	*edest = '\0';			/* Provide terminator for 'thisarg' */
424	newlen = strlen(*thisarg_p) + strlen(vvalue) + strlen(*src_p) + 1;
425	newstr = malloc(newlen);
426	strcpy(newstr, *thisarg_p);
427	strcat(newstr, vvalue);
428	*thisarg_p = newstr;
429	*dest_p = strchr(newstr, '\0');
430}
431