imgact_shell.c revision 146731
1/*-
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/imgact_shell.c 146731 2005-05-28 22:42:41Z gad $");
29
30#include <sys/param.h>
31#include <sys/vnode.h>
32#include <sys/proc.h>
33#include <sys/systm.h>
34#include <sys/sysproto.h>
35#include <sys/exec.h>
36#include <sys/imgact.h>
37#include <sys/kernel.h>
38
39#define	KEEP_OLDCODE	1
40#if BYTE_ORDER == LITTLE_ENDIAN		/* temp for OLD_CODE kludge */
41#define	DBG_MAGIC	0x2B23		/* #+ in "little-endian" */
42#define	OLD_MAGIC	0x3C23		/* #< */
43#else
44#define	DBG_MAGIC	0x232B		/* #+ in big-endian */
45#define	OLD_MAGIC	0x233C		/* #< */
46#endif
47
48#if BYTE_ORDER == LITTLE_ENDIAN
49#define SHELLMAGIC	0x2123 /* #! */
50#else
51#define SHELLMAGIC	0x2321
52#endif
53
54/*
55 * At the time of this writing, MAXSHELLCMDLEN == PAGE_SIZE.  This is
56 * significant because the caller has only mapped in one page of the
57 * file we're reading.  This code should be changed to know how to
58 * read in the second page, but I'm not doing that just yet...
59 */
60#if MAXSHELLCMDLEN > PAGE_SIZE
61#error "MAXSHELLCMDLEN is larger than a single page!"
62#endif
63
64/**
65 * Shell interpreter image activator. An interpreter name beginning at
66 * imgp->args->begin_argv is the minimal successful exit requirement.
67 *
68 * If the given file is a shell-script, then the first line will start
69 * with the two characters `#!' (aka SHELLMAGIC), followed by the name
70 * of the shell-interpreter to run, followed by zero or more tokens.
71 *
72 * The interpreter is then started up such that it will see:
73 *    arg[0] -> The name of interpreter as specified after `#!' in the
74 *		first line of the script.  The interpreter name must
75 *		not be longer than MAXSHELLCMDLEN bytes.
76 *    arg[1] -> *If* there are any additional tokens on the first line,
77 *		then we add a new arg[1], which is a copy of the rest of
78 *		that line.  The copy starts at the first token after the
79 *		interpreter name.  We leave it to the interpreter to
80 *		parse the tokens in that value.
81 *    arg[x] -> the full pathname of the script.  This will either be
82 *		arg[2] or arg[1], depending on whether or not tokens
83 *		were found after the interpreter name.
84 *  arg[x+1] -> all the arguments that were specified on the original
85 *		command line.
86 *
87 * This processing is described in the execve(2) man page.
88 */
89
90/*
91 * HISTORICAL NOTE: From 1993 to mid-2005, FreeBSD parsed out the tokens as
92 * found on the first line of the script, and setup each token as a separate
93 * value in arg[].  This extra processing did not match the behavior of other
94 * OS's, and caused a few subtle problems.  For one, it meant the kernel was
95 * deciding how those values should be parsed (wrt characters for quoting or
96 * comments, etc), while the interpreter might have other rules for parsing.
97 * It also meant the interpreter had no way of knowing which arguments came
98 * from the first line of the shell script, and which arguments were specified
99 * by the user on the command line.
100 *
101 * Only few things in the base system might depend on that non-standard
102 * processing (mainly /bin/sh and /usr/bin/env).  And for programs which are
103 * not in the base system, the "newer" behavior matches how NetBSD, OpenBSD,
104 * Linux, Solaris, AIX, IRIX, and many other Unixes have set up the arg-list
105 * for the interpreter.  So if a program can handle this behavior on those
106 * other OS's, it should be able to handle it for FreeBSD too.
107 */
108int
109exec_shell_imgact(imgp)
110	struct image_params *imgp;
111{
112	const char *image_header = imgp->image_header;
113	const char *ihp, *interpb, *interpe, *maxp, *optb, *opte;
114	int error, offset;
115	size_t length, clength;
116	struct vattr vattr;
117
118	/* a shell script? */
119	if (((const short *) image_header)[0] != SHELLMAGIC)
120		return(-1);
121
122	/*
123	 * Don't allow a shell script to be the shell for a shell
124	 *	script. :-)
125	 */
126	if (imgp->interpreted)
127		return(ENOEXEC);
128
129	imgp->interpreted = 1;
130
131	/*
132	 * At this point we have the first page of the file mapped.
133	 * However, we don't know how far into the page the contents are
134	 * valid -- the actual file might be much shorter than the page.
135	 * So find out the file size.
136 	 */
137	error = VOP_GETATTR(imgp->vp, &vattr, imgp->proc->p_ucred, curthread);
138	if (error)
139		return (error);
140
141	/*
142	 * Copy shell name and arguments from image_header into a string
143	 *	buffer.  Remember that the caller has mapped only the
144	 *	first page of the file into memory.
145	 */
146	clength = (vattr.va_size > PAGE_SIZE) ? PAGE_SIZE : vattr.va_size;
147
148	maxp = &image_header[clength];
149	ihp = &image_header[2];
150#if KEEP_OLDCODE
151	/*
152	 * XXX - Temporarily provide a quick-and-dirty way to get the
153	 * older, non-standard option-parsing behavior, just in case
154	 * someone finds themselves in an emergency where they need it.
155	 * This will not be documented.  It is only for initial testing.
156	 */
157	if (*(const short *)ihp == OLD_MAGIC)
158		ihp += 2;
159	else
160		goto new_code;
161	interpb = ihp;
162
163	/*
164	 * Figure out the number of bytes that need to be reserved in the
165	 * argument string to copy the contents of the interpreter's command
166	 * line into the argument string.
167	 */
168	ihp = interpb;
169	offset = 0;
170	while (ihp < &image_header[clength]) {
171		/* Skip any whitespace */
172		if ((*ihp == ' ') || (*ihp == '\t')) {
173			ihp++;
174			continue;
175		}
176
177		/* End of line? */
178		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
179			break;
180
181		/* Found a token */
182		do {
183			offset++;
184			ihp++;
185		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
186		    (*ihp != '#') && (*ihp != '\0') &&
187		    (ihp < &image_header[clength]));
188		/* Include terminating nulls in the offset */
189		offset++;
190	}
191
192	/* If the script gives a null line as the interpreter, we bail */
193	if (offset == 0)
194		return (ENOEXEC);
195
196	/* Check that we aren't too big */
197	if (ihp == &image_header[MAXSHELLCMDLEN])
198		return (ENAMETOOLONG);
199
200	/*
201	 * The full path name of the original script file must be tagged
202	 * onto the end, adjust the offset to deal with it.
203	 *
204	 * The original argv[0] is being replaced, set 'length' to the number
205	 * of bytes being removed.  So 'offset' is the number of bytes being
206	 * added and 'length' is the number of bytes being removed.
207	 */
208	offset += strlen(imgp->args->fname) + 1;	/* add fname */
209	length = (imgp->args->argc == 0) ? 0 :
210	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
211
212	if (offset - length > imgp->args->stringspace)
213		return (E2BIG);
214
215	bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset,
216	    imgp->args->endp - (imgp->args->begin_argv + length));
217
218	offset -= length;		/* calculate actual adjustment */
219	imgp->args->begin_envv += offset;
220	imgp->args->endp += offset;
221	imgp->args->stringspace -= offset;
222
223	/*
224	 * If there were no arguments then we've added one, otherwise
225	 * decr argc remove old argv[0], incr argc for fname add, net 0
226	 */
227	if (imgp->args->argc == 0)
228		imgp->args->argc = 1;
229
230	/*
231	 * Loop through the interpreter name yet again, copying as
232	 * we go.
233	 */
234	ihp = interpb;
235	offset = 0;
236	while (ihp < &image_header[clength]) {
237		/* Skip whitespace */
238		if ((*ihp == ' ') || (*ihp == '\t')) {
239			ihp++;
240			continue;
241		}
242
243		/* End of line? */
244		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
245			break;
246
247		/* Found a token, copy it */
248		do {
249			imgp->args->begin_argv[offset++] = *ihp++;
250		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
251		    (*ihp != '#') && (*ihp != '\0') &&
252		    (ihp < &image_header[MAXSHELLCMDLEN]));
253		imgp->args->begin_argv[offset++] = '\0';
254		imgp->args->argc++;
255	}
256	goto common_end;
257new_code:
258#endif
259	/*
260	 * Find the beginning and end of the interpreter_name.  If the
261	 * line does not include any interpreter, or if the name which
262	 * was found is too long, we bail out.
263	 */
264	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
265		ihp++;
266	interpb = ihp;
267	while (ihp < maxp && ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n')
268	    && (*ihp != '\0')))
269		ihp++;
270	interpe = ihp;
271	if (interpb == interpe)
272		return (ENOEXEC);
273	if ((interpe - interpb) >= MAXSHELLCMDLEN)
274		return (ENAMETOOLONG);
275
276	/*
277	 * Find the beginning of the options (if any), and the end-of-line.
278	 * Then trim the trailing blanks off the value.  Note that some
279	 * other operating systems do *not* trim the trailing whitespace...
280	 */
281	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
282		ihp++;
283	optb = ihp;
284	while (ihp < maxp && ((*ihp != '\n') && (*ihp != '\0')))
285		ihp++;
286	opte = ihp;
287	while (--ihp > interpe && ((*ihp == ' ') || (*ihp == '\t')))
288		opte = ihp;
289
290	/*
291	 * We need to "pop" (remove) the present value of arg[0], and "push"
292	 * either two or three new values in the arg[] list.  To do this,
293	 * we first shift all the other values in the `begin_argv' area to
294	 * provide the exact amount of room for the values added.  Set up
295	 * `offset' as the number of bytes to be added to the `begin_argv'
296	 * area, and 'length' as the number of bytes being removed.
297	 */
298	offset = interpe - interpb + 1;			/* interpreter */
299	if (opte != optb)				/* options (if any) */
300		offset += opte - optb + 1;
301	offset += strlen(imgp->args->fname) + 1;	/* fname of script */
302	length = (imgp->args->argc == 0) ? 0 :
303	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
304
305	if (offset - length > imgp->args->stringspace)
306		return (E2BIG);
307
308	bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset,
309	    imgp->args->endp - (imgp->args->begin_argv + length));
310
311	offset -= length;		/* calculate actual adjustment */
312	imgp->args->begin_envv += offset;
313	imgp->args->endp += offset;
314	imgp->args->stringspace -= offset;
315
316	/*
317	 * If there was no arg[0] when we started, then the interpreter_name
318	 * is adding an argument (instead of replacing the arg[0] we started
319	 * with).  And we're always adding an argument when we include the
320	 * full pathname of the original script.
321	 */
322	if (imgp->args->argc == 0)
323		imgp->args->argc = 1;
324	imgp->args->argc++;
325
326	/*
327	 * The original arg[] list has been shifted appropriately.  Copy in
328	 * the interpreter name and options-string.
329	 */
330	length = interpe - interpb;
331	bcopy(interpb, imgp->args->buf, length);
332	*(imgp->args->buf + length) = '\0';
333	offset = length + 1;
334	if (opte != optb) {
335		length = opte - optb;
336		bcopy(optb, imgp->args->buf + offset, length);
337		*(imgp->args->buf + offset + length) = '\0';
338		offset += length + 1;
339		imgp->args->argc++;
340	}
341
342#if KEEP_OLDCODE
343common_end:
344#endif
345	/*
346	 * Finally, add the filename onto the end for the interpreter to
347	 * use and copy the interpreter's name to imgp->interpreter_name
348	 * for exec to use.
349	 */
350	error = copystr(imgp->args->fname, imgp->args->buf + offset,
351	    imgp->args->stringspace, &length);
352
353	if (error == 0)
354		error = copystr(imgp->args->begin_argv, imgp->interpreter_name,
355		    MAXSHELLCMDLEN, &length);
356
357	return (error);
358}
359
360/*
361 * Tell kern_execve.c about it, with a little help from the linker.
362 */
363static struct execsw shell_execsw = { exec_shell_imgact, "#!" };
364EXEC_SET(shell, shell_execsw);
365