kern_jail.c revision 147185
1/*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 */
9
10#include <sys/cdefs.h>
11__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 147185 2005-06-09 18:49:19Z pjd $");
12
13#include "opt_mac.h"
14
15#include <sys/param.h>
16#include <sys/types.h>
17#include <sys/kernel.h>
18#include <sys/systm.h>
19#include <sys/errno.h>
20#include <sys/sysproto.h>
21#include <sys/mac.h>
22#include <sys/malloc.h>
23#include <sys/proc.h>
24#include <sys/taskqueue.h>
25#include <sys/jail.h>
26#include <sys/lock.h>
27#include <sys/mutex.h>
28#include <sys/namei.h>
29#include <sys/mount.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/syscallsubr.h>
33#include <sys/sysctl.h>
34#include <sys/vnode.h>
35#include <net/if.h>
36#include <netinet/in.h>
37
38MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
39
40SYSCTL_DECL(_security);
41SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
42    "Jail rules");
43
44int	jail_set_hostname_allowed = 1;
45SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
46    &jail_set_hostname_allowed, 0,
47    "Processes in jail can set their hostnames");
48
49int	jail_socket_unixiproute_only = 1;
50SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
51    &jail_socket_unixiproute_only, 0,
52    "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
53
54int	jail_sysvipc_allowed = 0;
55SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
56    &jail_sysvipc_allowed, 0,
57    "Processes in jail can use System V IPC primitives");
58
59static int jail_enforce_statfs = 2;
60SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
61    &jail_enforce_statfs, 0,
62    "Processes in jail cannot see all mounted file systems");
63
64int	jail_allow_raw_sockets = 0;
65SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
66    &jail_allow_raw_sockets, 0,
67    "Prison root can create raw sockets");
68
69int	jail_chflags_allowed = 0;
70SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
71    &jail_chflags_allowed, 0,
72    "Processes in jail can alter system file flags");
73
74/* allprison, lastprid, and prisoncount are protected by allprison_mtx. */
75struct	prisonlist allprison;
76struct	mtx allprison_mtx;
77int	lastprid = 0;
78int	prisoncount = 0;
79
80static void		 init_prison(void *);
81static void		 prison_complete(void *context, int pending);
82static struct prison	*prison_find(int);
83static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
84
85static void
86init_prison(void *data __unused)
87{
88
89	mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF);
90	LIST_INIT(&allprison);
91}
92
93SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
94
95/*
96 * MPSAFE
97 *
98 * struct jail_args {
99 *	struct jail *jail;
100 * };
101 */
102int
103jail(struct thread *td, struct jail_args *uap)
104{
105	struct nameidata nd;
106	struct prison *pr, *tpr;
107	struct jail j;
108	struct jail_attach_args jaa;
109	int error, tryprid;
110
111	error = copyin(uap->jail, &j, sizeof(j));
112	if (error)
113		return (error);
114	if (j.version != 0)
115		return (EINVAL);
116
117	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
118	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
119	pr->pr_ref = 1;
120	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
121	if (error)
122		goto e_killmtx;
123	mtx_lock(&Giant);
124	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, pr->pr_path, td);
125	error = namei(&nd);
126	if (error) {
127		mtx_unlock(&Giant);
128		goto e_killmtx;
129	}
130	pr->pr_root = nd.ni_vp;
131	VOP_UNLOCK(nd.ni_vp, 0, td);
132	NDFREE(&nd, NDF_ONLY_PNBUF);
133	mtx_unlock(&Giant);
134	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
135	if (error)
136		goto e_dropvnref;
137	pr->pr_ip = j.ip_number;
138	pr->pr_linux = NULL;
139	pr->pr_securelevel = securelevel;
140
141	/* Determine next pr_id and add prison to allprison list. */
142	mtx_lock(&allprison_mtx);
143	tryprid = lastprid + 1;
144	if (tryprid == JAIL_MAX)
145		tryprid = 1;
146next:
147	LIST_FOREACH(tpr, &allprison, pr_list) {
148		if (tpr->pr_id == tryprid) {
149			tryprid++;
150			if (tryprid == JAIL_MAX) {
151				mtx_unlock(&allprison_mtx);
152				error = EAGAIN;
153				goto e_dropvnref;
154			}
155			goto next;
156		}
157	}
158	pr->pr_id = jaa.jid = lastprid = tryprid;
159	LIST_INSERT_HEAD(&allprison, pr, pr_list);
160	prisoncount++;
161	mtx_unlock(&allprison_mtx);
162
163	error = jail_attach(td, &jaa);
164	if (error)
165		goto e_dropprref;
166	mtx_lock(&pr->pr_mtx);
167	pr->pr_ref--;
168	mtx_unlock(&pr->pr_mtx);
169	td->td_retval[0] = jaa.jid;
170	return (0);
171e_dropprref:
172	mtx_lock(&allprison_mtx);
173	LIST_REMOVE(pr, pr_list);
174	prisoncount--;
175	mtx_unlock(&allprison_mtx);
176e_dropvnref:
177	mtx_lock(&Giant);
178	vrele(pr->pr_root);
179	mtx_unlock(&Giant);
180e_killmtx:
181	mtx_destroy(&pr->pr_mtx);
182	FREE(pr, M_PRISON);
183	return (error);
184}
185
186/*
187 * MPSAFE
188 *
189 * struct jail_attach_args {
190 *	int jid;
191 * };
192 */
193int
194jail_attach(struct thread *td, struct jail_attach_args *uap)
195{
196	struct proc *p;
197	struct ucred *newcred, *oldcred;
198	struct prison *pr;
199	int error;
200
201	/*
202	 * XXX: Note that there is a slight race here if two threads
203	 * in the same privileged process attempt to attach to two
204	 * different jails at the same time.  It is important for
205	 * user processes not to do this, or they might end up with
206	 * a process root from one prison, but attached to the jail
207	 * of another.
208	 */
209	error = suser(td);
210	if (error)
211		return (error);
212
213	p = td->td_proc;
214	mtx_lock(&allprison_mtx);
215	pr = prison_find(uap->jid);
216	if (pr == NULL) {
217		mtx_unlock(&allprison_mtx);
218		return (EINVAL);
219	}
220	pr->pr_ref++;
221	mtx_unlock(&pr->pr_mtx);
222	mtx_unlock(&allprison_mtx);
223
224	mtx_lock(&Giant);
225	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
226	if ((error = change_dir(pr->pr_root, td)) != 0)
227		goto e_unlock;
228#ifdef MAC
229	if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
230		goto e_unlock;
231#endif
232	VOP_UNLOCK(pr->pr_root, 0, td);
233	change_root(pr->pr_root, td);
234	mtx_unlock(&Giant);
235
236	newcred = crget();
237	PROC_LOCK(p);
238	oldcred = p->p_ucred;
239	setsugid(p);
240	crcopy(newcred, oldcred);
241	newcred->cr_prison = pr;
242	p->p_ucred = newcred;
243	PROC_UNLOCK(p);
244	crfree(oldcred);
245	return (0);
246e_unlock:
247	VOP_UNLOCK(pr->pr_root, 0, td);
248	mtx_unlock(&Giant);
249	mtx_lock(&pr->pr_mtx);
250	pr->pr_ref--;
251	mtx_unlock(&pr->pr_mtx);
252	return (error);
253}
254
255/*
256 * Returns a locked prison instance, or NULL on failure.
257 */
258static struct prison *
259prison_find(int prid)
260{
261	struct prison *pr;
262
263	mtx_assert(&allprison_mtx, MA_OWNED);
264	LIST_FOREACH(pr, &allprison, pr_list) {
265		if (pr->pr_id == prid) {
266			mtx_lock(&pr->pr_mtx);
267			return (pr);
268		}
269	}
270	return (NULL);
271}
272
273void
274prison_free(struct prison *pr)
275{
276
277	mtx_lock(&allprison_mtx);
278	mtx_lock(&pr->pr_mtx);
279	pr->pr_ref--;
280	if (pr->pr_ref == 0) {
281		LIST_REMOVE(pr, pr_list);
282		mtx_unlock(&pr->pr_mtx);
283		prisoncount--;
284		mtx_unlock(&allprison_mtx);
285
286		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
287		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
288		return;
289	}
290	mtx_unlock(&pr->pr_mtx);
291	mtx_unlock(&allprison_mtx);
292}
293
294static void
295prison_complete(void *context, int pending)
296{
297	struct prison *pr;
298
299	pr = (struct prison *)context;
300
301	mtx_lock(&Giant);
302	vrele(pr->pr_root);
303	mtx_unlock(&Giant);
304
305	mtx_destroy(&pr->pr_mtx);
306	if (pr->pr_linux != NULL)
307		FREE(pr->pr_linux, M_PRISON);
308	FREE(pr, M_PRISON);
309}
310
311void
312prison_hold(struct prison *pr)
313{
314
315	mtx_lock(&pr->pr_mtx);
316	pr->pr_ref++;
317	mtx_unlock(&pr->pr_mtx);
318}
319
320u_int32_t
321prison_getip(struct ucred *cred)
322{
323
324	return (cred->cr_prison->pr_ip);
325}
326
327int
328prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
329{
330	u_int32_t tmp;
331
332	if (!jailed(cred))
333		return (0);
334	if (flag)
335		tmp = *ip;
336	else
337		tmp = ntohl(*ip);
338	if (tmp == INADDR_ANY) {
339		if (flag)
340			*ip = cred->cr_prison->pr_ip;
341		else
342			*ip = htonl(cred->cr_prison->pr_ip);
343		return (0);
344	}
345	if (tmp == INADDR_LOOPBACK) {
346		if (flag)
347			*ip = cred->cr_prison->pr_ip;
348		else
349			*ip = htonl(cred->cr_prison->pr_ip);
350		return (0);
351	}
352	if (cred->cr_prison->pr_ip != tmp)
353		return (1);
354	return (0);
355}
356
357void
358prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
359{
360	u_int32_t tmp;
361
362	if (!jailed(cred))
363		return;
364	if (flag)
365		tmp = *ip;
366	else
367		tmp = ntohl(*ip);
368	if (tmp == INADDR_LOOPBACK) {
369		if (flag)
370			*ip = cred->cr_prison->pr_ip;
371		else
372			*ip = htonl(cred->cr_prison->pr_ip);
373		return;
374	}
375	return;
376}
377
378int
379prison_if(struct ucred *cred, struct sockaddr *sa)
380{
381	struct sockaddr_in *sai;
382	int ok;
383
384	sai = (struct sockaddr_in *)sa;
385	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
386		ok = 1;
387	else if (sai->sin_family != AF_INET)
388		ok = 0;
389	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
390		ok = 1;
391	else
392		ok = 0;
393	return (ok);
394}
395
396/*
397 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
398 */
399int
400prison_check(struct ucred *cred1, struct ucred *cred2)
401{
402
403	if (jailed(cred1)) {
404		if (!jailed(cred2))
405			return (ESRCH);
406		if (cred2->cr_prison != cred1->cr_prison)
407			return (ESRCH);
408	}
409
410	return (0);
411}
412
413/*
414 * Return 1 if the passed credential is in a jail, otherwise 0.
415 */
416int
417jailed(struct ucred *cred)
418{
419
420	return (cred->cr_prison != NULL);
421}
422
423/*
424 * Return the correct hostname for the passed credential.
425 */
426void
427getcredhostname(struct ucred *cred, char *buf, size_t size)
428{
429
430	if (jailed(cred)) {
431		mtx_lock(&cred->cr_prison->pr_mtx);
432		strlcpy(buf, cred->cr_prison->pr_host, size);
433		mtx_unlock(&cred->cr_prison->pr_mtx);
434	} else
435		strlcpy(buf, hostname, size);
436}
437
438/*
439 * Determine whether the subject represented by cred can "see"
440 * status of a mount point.
441 * Returns: 0 for permitted, ENOENT otherwise.
442 * XXX: This function should be called cr_canseemount() and should be
443 *      placed in kern_prot.c.
444 */
445int
446prison_canseemount(struct ucred *cred, struct mount *mp)
447{
448	struct prison *pr;
449	struct statfs *sp;
450	size_t len;
451
452	if (!jailed(cred) || jail_enforce_statfs == 0)
453		return (0);
454	pr = cred->cr_prison;
455	if (pr->pr_root->v_mount == mp)
456		return (0);
457	if (jail_enforce_statfs == 2)
458		return (ENOENT);
459	/*
460	 * If jail's chroot directory is set to "/" we should be able to see
461	 * all mount-points from inside a jail.
462	 * This is ugly check, but this is the only situation when jail's
463	 * directory ends with '/'.
464	 */
465	if (strcmp(pr->pr_path, "/") == 0)
466		return (0);
467	len = strlen(pr->pr_path);
468	sp = &mp->mnt_stat;
469	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
470		return (ENOENT);
471	/*
472	 * Be sure that we don't have situation where jail's root directory
473	 * is "/some/path" and mount point is "/some/pathpath".
474	 */
475	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
476		return (ENOENT);
477	return (0);
478}
479
480void
481prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
482{
483	char jpath[MAXPATHLEN];
484	struct prison *pr;
485	size_t len;
486
487	if (!jailed(cred) || jail_enforce_statfs == 0)
488		return;
489	pr = cred->cr_prison;
490	if (prison_canseemount(cred, mp) != 0) {
491		/* Should never happen. */
492		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
493		strlcpy(sp->f_mntonname, "[restricted]",
494		    sizeof(sp->f_mntonname));
495		return;
496	}
497	if (pr->pr_root->v_mount == mp) {
498		/*
499		 * Clear current buffer data, so we are sure nothing from
500		 * the valid path left there.
501		 */
502		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
503		*sp->f_mntonname = '/';
504		return;
505	}
506	/*
507	 * If jail's chroot directory is set to "/" we should be able to see
508	 * all mount-points from inside a jail.
509	 */
510	if (strcmp(pr->pr_path, "/") == 0)
511		return;
512	len = strlen(pr->pr_path);
513	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
514	/*
515	 * Clear current buffer data, so we are sure nothing from
516	 * the valid path left there.
517	 */
518	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
519	if (*jpath == '\0') {
520		/* Should never happen. */
521		*sp->f_mntonname = '/';
522	} else {
523		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
524	}
525}
526
527static int
528sysctl_jail_list(SYSCTL_HANDLER_ARGS)
529{
530	struct xprison *xp, *sxp;
531	struct prison *pr;
532	int count, error;
533
534	mtx_assert(&Giant, MA_OWNED);
535	if (jailed(req->td->td_ucred))
536		return (0);
537retry:
538	mtx_lock(&allprison_mtx);
539	count = prisoncount;
540	mtx_unlock(&allprison_mtx);
541
542	if (count == 0)
543		return (0);
544
545	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
546	mtx_lock(&allprison_mtx);
547	if (count != prisoncount) {
548		mtx_unlock(&allprison_mtx);
549		free(sxp, M_TEMP);
550		goto retry;
551	}
552
553	LIST_FOREACH(pr, &allprison, pr_list) {
554		mtx_lock(&pr->pr_mtx);
555		xp->pr_version = XPRISON_VERSION;
556		xp->pr_id = pr->pr_id;
557		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
558		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
559		xp->pr_ip = pr->pr_ip;
560		mtx_unlock(&pr->pr_mtx);
561		xp++;
562	}
563	mtx_unlock(&allprison_mtx);
564
565	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
566	free(sxp, M_TEMP);
567	if (error)
568		return (error);
569	return (0);
570}
571
572SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
573    NULL, 0, sysctl_jail_list, "S", "List of active jails");
574
575static int
576sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
577{
578	int error, injail;
579
580	injail = jailed(req->td->td_ucred);
581	error = SYSCTL_OUT(req, &injail, sizeof(injail));
582
583	return (error);
584}
585SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
586    NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
587