1/*
2 * Copyright (c) 2005, 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <TargetConditionals.h>
25
26#if TARGET_OS_IPHONE
27/* <rdar://problem/13875458> */
28
29#include <wordexp.h>
30int wordexp(const char *restrict words __unused, wordexp_t *restrict pwordexp __unused, int flags __unused) {
31    return WRDE_NOSPACE;
32}
33
34void wordfree(wordexp_t *pwordexp __unused) {
35}
36
37#else
38
39#include <stdlib.h>
40#include <string.h>
41#include <stdio.h>
42#include <wordexp.h>
43#include <pthread.h>
44#include <regex.h>
45#include <assert.h>
46#include <unistd.h>
47#include <paths.h>
48#include <strings.h>
49#include <spawn.h>
50#include <sys/errno.h>
51
52// For _NSGetEnviron() -- which gives us a pointer to environ
53#include <crt_externs.h>
54
55extern size_t malloc_good_size(size_t size);
56
57static pthread_once_t re_init_c = PTHREAD_ONCE_INIT;
58static regex_t re_cmd, re_goodchars, re_subcmd_syntax_err_kludge, re_quoted_string;
59
60/* Similar to popen, but captures stderr for you.  Doesn't interoperate
61  with pclose.  Call wait4 on your own */
62static pid_t popen_oe(char *cmd, FILE **out, FILE **err) {
63    int out_pipe[2], err_pipe[2];
64    char *argv[4];
65    pid_t pid;
66    posix_spawn_file_actions_t file_actions;
67    int errrtn;
68
69    if ((errrtn = posix_spawn_file_actions_init(&file_actions)) != 0) {
70	errno = errrtn;
71	return 0;
72    }
73    if (pipe(out_pipe) < 0) {
74	posix_spawn_file_actions_destroy(&file_actions);
75	return 0;
76    }
77    if (pipe(err_pipe) < 0) {
78	posix_spawn_file_actions_destroy(&file_actions);
79	close(out_pipe[0]);
80	close(out_pipe[1]);
81	return 0;
82    }
83
84    if (out_pipe[1] != STDOUT_FILENO) {
85	posix_spawn_file_actions_adddup2(&file_actions, out_pipe[1], STDOUT_FILENO);
86	posix_spawn_file_actions_addclose(&file_actions, out_pipe[1]);
87    }
88    posix_spawn_file_actions_addclose(&file_actions, out_pipe[0]);
89    if (err_pipe[1] != STDERR_FILENO) {
90	posix_spawn_file_actions_adddup2(&file_actions, err_pipe[1], STDERR_FILENO);
91	posix_spawn_file_actions_addclose(&file_actions, err_pipe[1]);
92    }
93    posix_spawn_file_actions_addclose(&file_actions, err_pipe[0]);
94
95    argv[0] = "sh";
96    argv[1] = "-c";
97    argv[2] = cmd;
98    argv[3] = NULL;
99
100    errrtn = posix_spawn(&pid, _PATH_BSHELL, &file_actions, NULL, argv, *_NSGetEnviron());
101    posix_spawn_file_actions_destroy(&file_actions);
102
103    if (errrtn != 0) {
104	close(out_pipe[0]);
105	close(out_pipe[1]);
106	close(err_pipe[0]);
107	close(err_pipe[1]);
108	errno = errrtn;
109	return 0;
110    }
111
112    *out = fdopen(out_pipe[0], "r");
113    assert(*out);
114    close(out_pipe[1]);
115    *err = fdopen(err_pipe[0], "r");
116    assert(*err);
117    close(err_pipe[1]);
118
119    return pid;
120}
121
122static void re_init(void) {
123    int rc = regcomp(&re_cmd, "(^|[^\\])(`|\\$\\([^(])", REG_EXTENDED|REG_NOSUB);
124    /* XXX I'm not sure the { } stuff is correct,
125      it may be overly restrictave */
126    char *rx = "^([^\\\"'|&;<>(){}\n]"
127      "|\\\\."
128      "|'(\\\\\\\\|\\\\'|[^'])*'"
129      "|\"(\\\\\\\\|\\\\\"|[^\"])*\""
130      "|`(\\\\\\\\|\\\\`|[^`])*`"
131      "|\\$\\(\\(([^)]|\\\\)*\\)\\)"  /* can't do nesting in a regex */
132      "|\\$\\(([^)]|\\\\)*\\)"  /* can't do nesting in a regex */
133      "|\\$\\{[^}]*\\}"
134      /* XXX: { } ? */
135      ")*$";
136    rc = regcomp(&re_goodchars, rx,
137      REG_EXTENDED|REG_NOSUB);
138
139    rc = regcomp(&re_subcmd_syntax_err_kludge,
140      "command substitution.*syntax error", REG_EXTENDED|REG_NOSUB);
141
142    rc = regcomp(&re_quoted_string,
143      "(^|[^\\])'(\\\\\\\\|\\\\'|[^'])*'", REG_EXTENDED|REG_NOSUB);
144}
145
146/* Returns zero if it can't realloc */
147static int word_alloc(size_t want, wordexp_t *__restrict__ pwe, size_t *have) {
148    if (want < *have) {
149	return 1;
150    }
151    size_t bytes = malloc_good_size(sizeof(char *) * want * 2);
152    pwe->we_wordv = reallocf(pwe->we_wordv, bytes);
153    if (pwe->we_wordv) {
154	*have = bytes / sizeof(char *);
155	return 1;
156    }
157    return 0;
158}
159
160static int
161cmd_search(const char *str) {
162    regoff_t first = 0;
163    regoff_t last = strlen(str);
164    regmatch_t m = {first, last};
165    int flags;
166
167    if (last == 0) return REG_NOMATCH; /* empty string */
168
169    flags = REG_STARTEND;
170    while(regexec(&re_quoted_string, str, 1, &m, flags) == 0) {
171	/*
172	 * We have matched a single quoted string, from m.rm_so to m.rm_eo.
173	 * So the (non-quote string) from first to m.rm_so needs to be
174	 * checked for command substitution.  Then we use REG_STARTEND to
175	 * look for any other single quote strings after this one.
176	 */
177	 regmatch_t head = {first, m.rm_so};
178	 if (regexec(&re_cmd, str, 1, &head, flags) == 0) {
179	     return 0; /* found a command substitution */
180	 }
181	 flags = REG_NOTBOL | REG_STARTEND;
182	 m.rm_so = first = m.rm_eo;
183	 m.rm_eo = last;
184    }
185    /* Check the remaining string */
186     flags = REG_STARTEND;
187     if (m.rm_so > 0) flags |= REG_NOTBOL;
188     return regexec(&re_cmd, str, 1, &m, flags);
189}
190
191/* XXX this is _not_ designed to be fast */
192/* wordexp is also rife with security "challenges", unless you pass it
193  WRDE_NOCMD it *must* support subshell expansion, and even if you
194  don't beause it has to support so much of the standard shell (all
195  the odd little variable expansion options for example) it is hard
196  to do without a subshell).  It is probbably just plan a Bad Idea
197  to call in anything setuid, or executing remotely. */
198
199int wordexp(const char *__restrict__ words,
200  wordexp_t *__restrict__ pwe, int flags) {
201    /* cbuf_l's inital value needs to be big enough for 'cmd' plus
202      about 20 chars */
203    size_t cbuf_l = 1024;
204    char *cbuf = NULL;
205    /* Put a NUL byte between each word, and at the end */
206    char *cmd = "/usr/bin/perl -e 'print join(chr(0), @ARGV), chr(0)' -- ";
207    size_t wordv_l = 0, wordv_i = 0;
208    int rc;
209    wordexp_t save;
210
211    /* Some errors require us to leave pwe unchanged, so we save it here */
212    save = *pwe;
213    pthread_once(&re_init_c, re_init);
214
215    if (flags & WRDE_NOCMD) {
216	/* This attempts to match any backticks or $(...)'s, but there may be
217	  other ways to do subshell expansion that the standard doesn't
218	  cover, but I don't know of any -- failures here are a potential
219	  security risk */
220	rc = cmd_search(words);
221	if (rc != REG_NOMATCH) {
222	    /* Technically ==0 is WRDE_CMDSUB, and != REG_NOMATCH is
223	      "some internal error", but failing to catch those here
224	      could allow a subshell */
225	    return WRDE_CMDSUB;
226	}
227    }
228    rc = regexec(&re_goodchars, words, 0, NULL, 0);
229    if (rc != 0) {
230	/* Technically ==REG_NOMATCH is WRDE_BADCHAR, and != is
231	  some internal error", but again failure to notice the
232	  internal error could allow unexpected shell commands
233	  (allowing an unexcaped ;), or file clobbering (unescaped
234	  >) */
235	return WRDE_BADCHAR;
236    }
237
238    if (flags & WRDE_APPEND) {
239	wordv_i = wordv_l = pwe->we_wordc;
240	if (flags & WRDE_DOOFFS) {
241	    wordv_l = wordv_i += pwe->we_offs;
242	}
243    } else {
244	if (flags & WRDE_REUSE) {
245	    wordfree(pwe);
246	}
247	pwe->we_wordc = 0;
248	pwe->we_wordv = NULL;
249
250	if (flags & WRDE_DOOFFS) {
251	    size_t wend = wordv_i + pwe->we_offs;
252	    word_alloc(wend, pwe, &wordv_l);
253	    if (!pwe->we_wordv) {
254		return WRDE_NOSPACE;
255	    }
256	    bzero(pwe->we_wordv + wordv_i, pwe->we_offs * sizeof(char *));
257	    wordv_i = wend;
258	} else {
259	    pwe->we_offs = 0;
260	}
261    }
262
263    size_t need = 0;
264    while(!cbuf || need > cbuf_l) {
265	if (need > cbuf_l) {
266	    cbuf_l = malloc_good_size(need +1);
267	}
268	cbuf = reallocf(cbuf, cbuf_l);
269	if (cbuf == NULL) {
270	    wordfree(pwe);
271	    return WRDE_NOSPACE;
272	}
273	cbuf[0] = '\0';
274	if (flags & WRDE_UNDEF) {
275	    strlcat(cbuf, "set -u; ", cbuf_l);
276	}
277	/* This kludge is needed because /bin/sh seems to set IFS to the
278	  defualt even if you have set it;  We also can't just ignore it
279	  because it is hard/unplesent to code around or even a potential
280	  security problem because the test suiete explicitly checks
281	  to make sure setting IFS "works" */
282	if (getenv("IFS")) {
283	    setenv("_IFS", getenv("IFS"), 1);
284	    strlcat(cbuf, "export IFS=${_IFS}; ", cbuf_l);
285	}
286	strlcat(cbuf, cmd, cbuf_l);
287	need = strlcat(cbuf, words, cbuf_l);
288    }
289
290    FILE *out, *err;
291    pid_t pid = popen_oe(cbuf, &out, &err);
292    if (pid == 0) {
293	wordfree(pwe);
294	return WRDE_NOSPACE;
295    }
296
297    char *word = NULL;
298    size_t word_l = 0;
299    size_t word_i = 0;
300    int ch;
301
302    while(EOF != (ch = fgetc(out))) {
303	if (word_l <= word_i) {
304	    word_l = malloc_good_size(word_l * 2 + 1);
305	    word = reallocf(word, word_l);
306	    if (!word) {
307		fclose(err);
308		fclose(out);
309		wordfree(pwe);
310		return WRDE_NOSPACE;
311	    }
312	}
313	word[word_i++] = ch;
314
315	if (ch == '\0') {
316	    word_alloc(wordv_i + 1, pwe, &wordv_l);
317	    char *tmp = strdup(word);
318	    if (pwe->we_wordv == NULL || tmp == NULL) {
319		fclose(err);
320		fclose(out);
321		wordfree(pwe);
322		free(word);
323		free(tmp);
324		int status;
325		wait4(pid, &status, 0, NULL);
326		return WRDE_NOSPACE;
327	    }
328	    pwe->we_wordv[wordv_i++] = tmp;
329	    pwe->we_wordc++;
330	    word_i = 0;
331	}
332    }
333
334    assert(word_i == 0);
335    free(word);
336
337    char err_buf[1024];
338    size_t err_sz = fread(err_buf, 1, sizeof(err_buf) -1, err);
339    err_buf[err_sz] = '\0';
340    if (flags & WRDE_SHOWERR) {
341	fputs(err_buf, stderr);
342    }
343
344    pid_t got_pid = 0;
345    int status;
346    do {
347	got_pid = wait4(pid, &status, 0, NULL);
348    } while(got_pid == -1 && errno == EINTR);
349
350    fclose(out);
351    fclose(err);
352
353    /* the exit status isn't set for some command syntax errors */
354    if (regexec(&re_subcmd_syntax_err_kludge, err_buf, 0, NULL, 0) == 0
355      || got_pid == -1 || (WIFEXITED(status) && WEXITSTATUS(status))) {
356	if (!(flags & (WRDE_APPEND|WRDE_REUSE))) {
357	    /* Restore pwe if possiable, can't really do it in the append
358	      case, and isn't easy in the reuse case */
359	    *pwe = save;
360	}
361	if (strstr(err_buf, " unbound variable")) {
362	    return WRDE_BADVAL;
363	}
364	return WRDE_SYNTAX;
365    }
366
367    if (!word_alloc(wordv_i + 1, pwe, &wordv_l)) {
368	return WRDE_NOSPACE;
369    }
370    pwe->we_wordv[wordv_i] = NULL;
371
372    return 0;
373}
374
375void wordfree(wordexp_t *pwe) {
376    if (pwe == NULL || pwe->we_wordv == NULL) {
377	return;
378    }
379
380    int i = 0, e = pwe->we_wordc + pwe->we_offs;
381    for(i = pwe->we_offs; i < e; i++) {
382	free(pwe->we_wordv[i]);
383    }
384    free(pwe->we_wordv);
385    pwe->we_wordv = NULL;
386}
387
388#endif /* TARGET_OS_IPHONE */
389