kern_jail.c revision 186615
1/*-
2 * Copyright (c) 1999 Poul-Henning Kamp.
3 * Copyright (c) 2008 Bjoern A. Zeeb.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 186615 2008-12-30 20:39:47Z pho $");
30
31#include "opt_ddb.h"
32#include "opt_inet.h"
33#include "opt_inet6.h"
34#include "opt_mac.h"
35
36#include <sys/param.h>
37#include <sys/types.h>
38#include <sys/kernel.h>
39#include <sys/systm.h>
40#include <sys/errno.h>
41#include <sys/sysproto.h>
42#include <sys/malloc.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/taskqueue.h>
46#include <sys/fcntl.h>
47#include <sys/jail.h>
48#include <sys/lock.h>
49#include <sys/mutex.h>
50#include <sys/sx.h>
51#include <sys/namei.h>
52#include <sys/mount.h>
53#include <sys/queue.h>
54#include <sys/socket.h>
55#include <sys/syscallsubr.h>
56#include <sys/sysctl.h>
57#include <sys/vnode.h>
58#include <sys/vimage.h>
59#include <sys/osd.h>
60#include <net/if.h>
61#include <netinet/in.h>
62#ifdef DDB
63#include <ddb/ddb.h>
64#ifdef INET6
65#include <netinet6/in6_var.h>
66#endif /* INET6 */
67#endif /* DDB */
68
69#include <security/mac/mac_framework.h>
70
71MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
72
73SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
74    "Jail rules");
75
76int	jail_set_hostname_allowed = 1;
77SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
78    &jail_set_hostname_allowed, 0,
79    "Processes in jail can set their hostnames");
80
81int	jail_socket_unixiproute_only = 1;
82SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
83    &jail_socket_unixiproute_only, 0,
84    "Processes in jail are limited to creating UNIX/IP/route sockets only");
85
86int	jail_sysvipc_allowed = 0;
87SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
88    &jail_sysvipc_allowed, 0,
89    "Processes in jail can use System V IPC primitives");
90
91static int jail_enforce_statfs = 2;
92SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
93    &jail_enforce_statfs, 0,
94    "Processes in jail cannot see all mounted file systems");
95
96int	jail_allow_raw_sockets = 0;
97SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
98    &jail_allow_raw_sockets, 0,
99    "Prison root can create raw sockets");
100
101int	jail_chflags_allowed = 0;
102SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
103    &jail_chflags_allowed, 0,
104    "Processes in jail can alter system file flags");
105
106int	jail_mount_allowed = 0;
107SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
108    &jail_mount_allowed, 0,
109    "Processes in jail can mount/unmount jail-friendly file systems");
110
111int	jail_max_af_ips = 255;
112SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
113    &jail_max_af_ips, 0,
114    "Number of IP addresses a jail may have at most per address family");
115
116/* allprison, lastprid, and prisoncount are protected by allprison_lock. */
117struct	prisonlist allprison;
118struct	sx allprison_lock;
119int	lastprid = 0;
120int	prisoncount = 0;
121
122static void		 init_prison(void *);
123static void		 prison_complete(void *context, int pending);
124static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
125#ifdef INET
126static int		_prison_check_ip4(struct prison *, struct in_addr *);
127#endif
128#ifdef INET6
129static int		_prison_check_ip6(struct prison *, struct in6_addr *);
130#endif
131
132static void
133init_prison(void *data __unused)
134{
135
136	sx_init(&allprison_lock, "allprison");
137	LIST_INIT(&allprison);
138}
139
140SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
141
142#ifdef INET
143static int
144qcmp_v4(const void *ip1, const void *ip2)
145{
146	in_addr_t iaa, iab;
147
148	/*
149	 * We need to compare in HBO here to get the list sorted as expected
150	 * by the result of the code.  Sorting NBO addresses gives you
151	 * interesting results.  If you do not understand, do not try.
152	 */
153	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
154	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
155
156	/*
157	 * Do not simply return the difference of the two numbers, the int is
158	 * not wide enough.
159	 */
160	if (iaa > iab)
161		return (1);
162	else if (iaa < iab)
163		return (-1);
164	else
165		return (0);
166}
167#endif
168
169#ifdef INET6
170static int
171qcmp_v6(const void *ip1, const void *ip2)
172{
173	const struct in6_addr *ia6a, *ia6b;
174	int i, rc;
175
176	ia6a = (const struct in6_addr *)ip1;
177	ia6b = (const struct in6_addr *)ip2;
178
179	rc = 0;
180	for (i=0; rc == 0 && i < sizeof(struct in6_addr); i++) {
181		if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
182			rc = 1;
183		else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
184			rc = -1;
185	}
186	return (rc);
187}
188#endif
189
190#if defined(INET) || defined(INET6)
191static int
192prison_check_conflicting_ips(struct prison *p)
193{
194	struct prison *pr;
195	int i;
196
197	sx_assert(&allprison_lock, SX_LOCKED);
198
199	if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
200		return (0);
201
202	LIST_FOREACH(pr, &allprison, pr_list) {
203		/*
204		 * Skip 'dying' prisons to avoid problems when
205		 * restarting multi-IP jails.
206		 */
207		if (pr->pr_state == PRISON_STATE_DYING)
208			continue;
209
210		/*
211		 * We permit conflicting IPs if there is no
212		 * more than 1 IP on eeach jail.
213		 * In case there is one duplicate on a jail with
214		 * more than one IP stop checking and return error.
215		 */
216#ifdef INET
217		if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
218		    (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
219			for (i = 0; i < p->pr_ip4s; i++) {
220				if (_prison_check_ip4(pr, &p->pr_ip4[i]))
221					return (EINVAL);
222			}
223		}
224#endif
225#ifdef INET6
226		if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
227		    (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
228			for (i = 0; i < p->pr_ip6s; i++) {
229				if (_prison_check_ip6(pr, &p->pr_ip6[i]))
230					return (EINVAL);
231			}
232		}
233#endif
234	}
235
236	return (0);
237}
238
239static int
240jail_copyin_ips(struct jail *j)
241{
242#ifdef INET
243	struct in_addr  *ip4;
244#endif
245#ifdef INET6
246	struct in6_addr *ip6;
247#endif
248	int error, i;
249
250	/*
251	 * Copy in addresses, check for duplicate addresses and do some
252	 * simple 0 and broadcast checks. If users give other bogus addresses
253	 * it is their problem.
254	 *
255	 * IP addresses are all sorted but ip[0] to preserve the primary IP
256	 * address as given from userland.  This special IP is used for
257	 * unbound outgoing connections as well for "loopback" traffic.
258	 */
259#ifdef INET
260	ip4 = NULL;
261#endif
262#ifdef INET6
263	ip6 = NULL;
264#endif
265#ifdef INET
266	if (j->ip4s > 0) {
267		ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
268		    M_PRISON, M_WAITOK | M_ZERO);
269		error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
270		if (error)
271			goto e_free_ip;
272		/* Sort all but the first IPv4 address. */
273		if (j->ip4s > 1)
274			qsort((ip4 + 1), j->ip4s - 1,
275			    sizeof(struct in_addr), qcmp_v4);
276
277		/*
278		 * We do not have to care about byte order for these checks
279		 * so we will do them in NBO.
280		 */
281		for (i=0; i<j->ip4s; i++) {
282			if (ip4[i].s_addr == htonl(INADDR_ANY) ||
283			    ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
284				error = EINVAL;
285				goto e_free_ip;
286			}
287			if ((i+1) < j->ip4s &&
288			    (ip4[0].s_addr == ip4[i+1].s_addr ||
289			    ip4[i].s_addr == ip4[i+1].s_addr)) {
290				error = EINVAL;
291				goto e_free_ip;
292			}
293		}
294
295		j->ip4 = ip4;
296	} else
297		j->ip4 = NULL;
298#endif
299#ifdef INET6
300	if (j->ip6s > 0) {
301		ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
302		    M_PRISON, M_WAITOK | M_ZERO);
303		error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
304		if (error)
305			goto e_free_ip;
306		/* Sort all but the first IPv6 address. */
307		if (j->ip6s > 1)
308			qsort((ip6 + 1), j->ip6s - 1,
309			    sizeof(struct in6_addr), qcmp_v6);
310		for (i=0; i<j->ip6s; i++) {
311			if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
312				error = EINVAL;
313				goto e_free_ip;
314			}
315			if ((i+1) < j->ip6s &&
316			    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
317			    IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
318				error = EINVAL;
319				goto e_free_ip;
320			}
321		}
322
323		j->ip6 = ip6;
324	} else
325		j->ip6 = NULL;
326#endif
327	return (0);
328
329e_free_ip:
330#ifdef INET6
331	free(ip6, M_PRISON);
332	j->ip6 = NULL;
333#endif
334#ifdef INET
335	free(ip4, M_PRISON);
336	j->ip4 = NULL;
337#endif
338	return (error);
339}
340#endif /* INET || INET6 */
341
342static int
343jail_handle_ips(struct jail *j)
344{
345#if defined(INET) || defined(INET6)
346	int error;
347#endif
348
349	/*
350	 * Finish conversion for older versions, copyin and setup IPs.
351	 */
352	switch (j->version) {
353	case 0:
354	{
355#ifdef INET
356		/* FreeBSD single IPv4 jails. */
357		struct in_addr *ip4;
358
359		if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
360			return (EINVAL);
361		ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
362		    M_PRISON, M_WAITOK | M_ZERO);
363
364		/*
365		 * Jail version 0 still used HBO for the IPv4 address.
366		 */
367		ip4->s_addr = htonl(j->ip4s);
368		j->ip4s = 1;
369		j->ip4 = ip4;
370		break;
371#else
372		return (EINVAL);
373#endif
374	}
375
376	case 1:
377		/*
378		 * Version 1 was used by multi-IPv4 jail implementations
379		 * that never made it into the official kernel.
380		 * We should never hit this here; jail() should catch it.
381		 */
382		return (EINVAL);
383
384	case 2:	/* JAIL_API_VERSION */
385		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
386#if defined(INET) || defined(INET6)
387#ifdef INET
388		if (j->ip4s > jail_max_af_ips)
389			return (EINVAL);
390#else
391		if (j->ip4s != 0)
392			return (EINVAL);
393#endif
394#ifdef INET6
395		if (j->ip6s > jail_max_af_ips)
396			return (EINVAL);
397#else
398		if (j->ip6s != 0)
399			return (EINVAL);
400#endif
401		error = jail_copyin_ips(j);
402		if (error)
403			return (error);
404#endif
405		break;
406
407	default:
408		/* Sci-Fi jails are not supported, sorry. */
409		return (EINVAL);
410	}
411
412	return (0);
413}
414
415
416/*
417 * struct jail_args {
418 *	struct jail *jail;
419 * };
420 */
421int
422jail(struct thread *td, struct jail_args *uap)
423{
424	uint32_t version;
425	int error;
426	struct jail j;
427
428	error = copyin(uap->jail, &version, sizeof(uint32_t));
429	if (error)
430		return (error);
431
432	switch (version) {
433	case 0:
434		/* FreeBSD single IPv4 jails. */
435	{
436		struct jail_v0 j0;
437
438		bzero(&j, sizeof(struct jail));
439		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
440		if (error)
441			return (error);
442		j.version = j0.version;
443		j.path = j0.path;
444		j.hostname = j0.hostname;
445		j.ip4s = j0.ip_number;
446		break;
447	}
448
449	case 1:
450		/*
451		 * Version 1 was used by multi-IPv4 jail implementations
452		 * that never made it into the official kernel.
453		 */
454		return (EINVAL);
455
456	case 2:	/* JAIL_API_VERSION */
457		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
458		error = copyin(uap->jail, &j, sizeof(struct jail));
459		if (error)
460			return (error);
461		break;
462
463	default:
464		/* Sci-Fi jails are not supported, sorry. */
465		return (EINVAL);
466	}
467	return (kern_jail(td, &j));
468}
469
470int
471kern_jail(struct thread *td, struct jail *j)
472{
473	struct nameidata nd;
474	struct prison *pr, *tpr;
475	struct jail_attach_args jaa;
476	int vfslocked, error, tryprid;
477
478	KASSERT(j != NULL, ("%s: j is NULL", __func__));
479
480	/* Handle addresses - convert old structs, copyin, check IPs. */
481	error = jail_handle_ips(j);
482	if (error)
483		return (error);
484
485	/* Allocate struct prison and fill it with life. */
486	pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
487	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
488	pr->pr_ref = 1;
489	error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
490	if (error)
491		goto e_killmtx;
492	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
493	    pr->pr_path, td);
494	error = namei(&nd);
495	if (error)
496		goto e_killmtx;
497	vfslocked = NDHASGIANT(&nd);
498	pr->pr_root = nd.ni_vp;
499	VOP_UNLOCK(nd.ni_vp, 0);
500	NDFREE(&nd, NDF_ONLY_PNBUF);
501	VFS_UNLOCK_GIANT(vfslocked);
502	error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
503	if (error)
504		goto e_dropvnref;
505	if (j->jailname != NULL) {
506		error = copyinstr(j->jailname, &pr->pr_name,
507		    sizeof(pr->pr_name), NULL);
508		if (error)
509			goto e_dropvnref;
510	}
511	if (j->ip4s > 0) {
512		pr->pr_ip4 = j->ip4;
513		pr->pr_ip4s = j->ip4s;
514	}
515#ifdef INET6
516	if (j->ip6s > 0) {
517		pr->pr_ip6 = j->ip6;
518		pr->pr_ip6s = j->ip6s;
519	}
520#endif
521	pr->pr_linux = NULL;
522	pr->pr_securelevel = securelevel;
523	bzero(&pr->pr_osd, sizeof(pr->pr_osd));
524
525	/*
526	 * Pre-set prison state to ALIVE upon cration.  This is needed so we
527	 * can later attach the process to it, etc (avoiding another extra
528	 * state for ther process of creation, complicating things).
529	 */
530	pr->pr_state = PRISON_STATE_ALIVE;
531
532	/* Allocate a dedicated cpuset for each jail. */
533	error = cpuset_create_root(td, &pr->pr_cpuset);
534	if (error)
535		goto e_dropvnref;
536
537	sx_xlock(&allprison_lock);
538	/* Make sure we cannot run into problems with ambiguous bind()ings. */
539#if defined(INET) || defined(INET6)
540	error = prison_check_conflicting_ips(pr);
541	if (error) {
542		sx_xunlock(&allprison_lock);
543		goto e_dropcpuset;
544	}
545#endif
546
547	/* Determine next pr_id and add prison to allprison list. */
548	tryprid = lastprid + 1;
549	if (tryprid == JAIL_MAX)
550		tryprid = 1;
551next:
552	LIST_FOREACH(tpr, &allprison, pr_list) {
553		if (tpr->pr_id == tryprid) {
554			tryprid++;
555			if (tryprid == JAIL_MAX) {
556				sx_xunlock(&allprison_lock);
557				error = EAGAIN;
558				goto e_dropcpuset;
559			}
560			goto next;
561		}
562	}
563	pr->pr_id = jaa.jid = lastprid = tryprid;
564	LIST_INSERT_HEAD(&allprison, pr, pr_list);
565	prisoncount++;
566	sx_xunlock(&allprison_lock);
567
568	error = jail_attach(td, &jaa);
569	if (error)
570		goto e_dropprref;
571	mtx_lock(&pr->pr_mtx);
572	pr->pr_ref--;
573	mtx_unlock(&pr->pr_mtx);
574	td->td_retval[0] = jaa.jid;
575	return (0);
576e_dropprref:
577	sx_xlock(&allprison_lock);
578	LIST_REMOVE(pr, pr_list);
579	prisoncount--;
580	sx_xunlock(&allprison_lock);
581e_dropcpuset:
582	cpuset_rel(pr->pr_cpuset);
583e_dropvnref:
584	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
585	vrele(pr->pr_root);
586	VFS_UNLOCK_GIANT(vfslocked);
587e_killmtx:
588	mtx_destroy(&pr->pr_mtx);
589	free(pr, M_PRISON);
590#ifdef INET6
591	free(j->ip6, M_PRISON);
592#endif
593#ifdef INET
594	free(j->ip4, M_PRISON);
595#endif
596	return (error);
597}
598
599/*
600 * struct jail_attach_args {
601 *	int jid;
602 * };
603 */
604int
605jail_attach(struct thread *td, struct jail_attach_args *uap)
606{
607	struct proc *p;
608	struct ucred *newcred, *oldcred;
609	struct prison *pr;
610	int vfslocked, error;
611
612	/*
613	 * XXX: Note that there is a slight race here if two threads
614	 * in the same privileged process attempt to attach to two
615	 * different jails at the same time.  It is important for
616	 * user processes not to do this, or they might end up with
617	 * a process root from one prison, but attached to the jail
618	 * of another.
619	 */
620	error = priv_check(td, PRIV_JAIL_ATTACH);
621	if (error)
622		return (error);
623
624	p = td->td_proc;
625	sx_slock(&allprison_lock);
626	pr = prison_find(uap->jid);
627	if (pr == NULL) {
628		sx_sunlock(&allprison_lock);
629		return (EINVAL);
630	}
631
632	/*
633	 * Do not allow a process to attach to a prison that is not
634	 * considered to be "ALIVE".
635	 */
636	if (pr->pr_state != PRISON_STATE_ALIVE) {
637		mtx_unlock(&pr->pr_mtx);
638		sx_sunlock(&allprison_lock);
639		return (EINVAL);
640	}
641	pr->pr_ref++;
642	mtx_unlock(&pr->pr_mtx);
643	sx_sunlock(&allprison_lock);
644
645	/*
646	 * Reparent the newly attached process to this jail.
647	 */
648	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
649	if (error)
650		goto e_unref;
651
652	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
653	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
654	if ((error = change_dir(pr->pr_root, td)) != 0)
655		goto e_unlock;
656#ifdef MAC
657	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
658		goto e_unlock;
659#endif
660	VOP_UNLOCK(pr->pr_root, 0);
661	change_root(pr->pr_root, td);
662	VFS_UNLOCK_GIANT(vfslocked);
663
664	newcred = crget();
665	PROC_LOCK(p);
666	oldcred = p->p_ucred;
667	setsugid(p);
668	crcopy(newcred, oldcred);
669	newcred->cr_prison = pr;
670	p->p_ucred = newcred;
671	prison_proc_hold(pr);
672	PROC_UNLOCK(p);
673	crfree(oldcred);
674	return (0);
675e_unlock:
676	VOP_UNLOCK(pr->pr_root, 0);
677	VFS_UNLOCK_GIANT(vfslocked);
678e_unref:
679	mtx_lock(&pr->pr_mtx);
680	pr->pr_ref--;
681	mtx_unlock(&pr->pr_mtx);
682	return (error);
683}
684
685/*
686 * Returns a locked prison instance, or NULL on failure.
687 */
688struct prison *
689prison_find(int prid)
690{
691	struct prison *pr;
692
693	sx_assert(&allprison_lock, SX_LOCKED);
694	LIST_FOREACH(pr, &allprison, pr_list) {
695		if (pr->pr_id == prid) {
696			mtx_lock(&pr->pr_mtx);
697			if (pr->pr_ref == 0) {
698				mtx_unlock(&pr->pr_mtx);
699				break;
700			}
701			return (pr);
702		}
703	}
704	return (NULL);
705}
706
707void
708prison_free_locked(struct prison *pr)
709{
710
711	mtx_assert(&pr->pr_mtx, MA_OWNED);
712	pr->pr_ref--;
713	if (pr->pr_ref == 0) {
714		mtx_unlock(&pr->pr_mtx);
715		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
716		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
717		return;
718	}
719	mtx_unlock(&pr->pr_mtx);
720}
721
722void
723prison_free(struct prison *pr)
724{
725
726	mtx_lock(&pr->pr_mtx);
727	prison_free_locked(pr);
728}
729
730static void
731prison_complete(void *context, int pending)
732{
733	struct prison *pr;
734	int vfslocked;
735
736	pr = (struct prison *)context;
737
738	sx_xlock(&allprison_lock);
739	LIST_REMOVE(pr, pr_list);
740	prisoncount--;
741	sx_xunlock(&allprison_lock);
742
743	cpuset_rel(pr->pr_cpuset);
744
745	/* Free all OSD associated to this jail. */
746	osd_jail_exit(pr);
747
748	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
749	vrele(pr->pr_root);
750	VFS_UNLOCK_GIANT(vfslocked);
751
752	mtx_destroy(&pr->pr_mtx);
753	free(pr->pr_linux, M_PRISON);
754#ifdef INET6
755	free(pr->pr_ip6, M_PRISON);
756#endif
757#ifdef INET
758	free(pr->pr_ip4, M_PRISON);
759#endif
760	free(pr, M_PRISON);
761}
762
763void
764prison_hold_locked(struct prison *pr)
765{
766
767	mtx_assert(&pr->pr_mtx, MA_OWNED);
768	KASSERT(pr->pr_ref > 0,
769	    ("Trying to hold dead prison (id=%d).", pr->pr_id));
770	pr->pr_ref++;
771}
772
773void
774prison_hold(struct prison *pr)
775{
776
777	mtx_lock(&pr->pr_mtx);
778	prison_hold_locked(pr);
779	mtx_unlock(&pr->pr_mtx);
780}
781
782void
783prison_proc_hold(struct prison *pr)
784{
785
786	mtx_lock(&pr->pr_mtx);
787	KASSERT(pr->pr_state == PRISON_STATE_ALIVE,
788	    ("Cannot add a process to a non-alive prison (id=%d).", pr->pr_id));
789	pr->pr_nprocs++;
790	mtx_unlock(&pr->pr_mtx);
791}
792
793void
794prison_proc_free(struct prison *pr)
795{
796
797	mtx_lock(&pr->pr_mtx);
798	KASSERT(pr->pr_state == PRISON_STATE_ALIVE && pr->pr_nprocs > 0,
799	    ("Trying to kill a process in a dead prison (id=%d).", pr->pr_id));
800	pr->pr_nprocs--;
801	if (pr->pr_nprocs == 0)
802		pr->pr_state = PRISON_STATE_DYING;
803	mtx_unlock(&pr->pr_mtx);
804}
805
806
807#ifdef INET
808/*
809 * Pass back primary IPv4 address of this jail.
810 *
811 * If not jailed return success but do not alter the address.  Caller has to
812 * make sure to intialize it correctly (INADDR_ANY).
813 *
814 * Returns 0 on success, 1 on error.  Address returned in NBO.
815 */
816int
817prison_getip4(struct ucred *cred, struct in_addr *ia)
818{
819
820	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
821	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
822
823	if (!jailed(cred))
824		/* Do not change address passed in. */
825		return (0);
826
827	if (cred->cr_prison->pr_ip4 == NULL)
828		return (1);
829
830	ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
831	return (0);
832}
833
834/*
835 * Make sure our (source) address is set to something meaningful to this
836 * jail.
837 *
838 * Returns 0 on success, 1 on error.  Address passed in in NBO and returned
839 * in NBO.
840 */
841int
842prison_local_ip4(struct ucred *cred, struct in_addr *ia)
843{
844	struct in_addr ia0;
845
846	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
847	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
848
849	if (!jailed(cred))
850		return (0);
851	if (cred->cr_prison->pr_ip4 == NULL)
852		return (1);
853
854	ia0.s_addr = ntohl(ia->s_addr);
855	if (ia0.s_addr == INADDR_LOOPBACK) {
856		ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
857		return (0);
858	}
859
860	/*
861	 * In case there is only 1 IPv4 address, bind directly.
862	 */
863	if (ia0.s_addr == INADDR_ANY && cred->cr_prison->pr_ip4s == 1) {
864		ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
865		return (0);
866	}
867
868	if (ia0.s_addr == INADDR_ANY || prison_check_ip4(cred, ia))
869		return (0);
870
871	return (1);
872}
873
874/*
875 * Rewrite destination address in case we will connect to loopback address.
876 *
877 * Returns 0 on success, 1 on error.  Address passed in in NBO and returned
878 * in NBO.
879 */
880int
881prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
882{
883
884	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
885	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
886
887	if (!jailed(cred))
888		return (0);
889	if (cred->cr_prison->pr_ip4 == NULL)
890		return (1);
891	if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
892		ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
893		return (0);
894	}
895
896	/*
897	 * Return success because nothing had to be changed.
898	 */
899	return (0);
900}
901
902/*
903 * Check if given address belongs to the jail referenced by cred.
904 *
905 * Returns 1 if address belongs to jail, 0 if not.  Address passed in in NBO.
906 */
907static int
908_prison_check_ip4(struct prison *pr, struct in_addr *ia)
909{
910	int i, a, z, d;
911
912	if (pr->pr_ip4 == NULL)
913		return (0);
914
915	/*
916	 * Check the primary IP.
917	 */
918	if (pr->pr_ip4[0].s_addr == ia->s_addr)
919		return (1);
920
921	/*
922	 * All the other IPs are sorted so we can do a binary search.
923	 */
924	a = 0;
925	z = pr->pr_ip4s - 2;
926	while (a <= z) {
927		i = (a + z) / 2;
928		d = qcmp_v4(&pr->pr_ip4[i+1], ia);
929		if (d > 0)
930			z = i - 1;
931		else if (d < 0)
932			a = i + 1;
933		else
934			return (1);
935	}
936	return (0);
937}
938
939int
940prison_check_ip4(struct ucred *cred, struct in_addr *ia)
941{
942
943	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
944	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
945
946	if (!jailed(cred))
947		return (1);
948
949	return (_prison_check_ip4(cred->cr_prison, ia));
950}
951#endif
952
953#ifdef INET6
954/*
955 * Pass back primary IPv6 address for this jail.
956 *
957 * If not jailed return success but do not alter the address.  Caller has to
958 * make sure to intialize it correctly (IN6ADDR_ANY_INIT).
959 *
960 * Returns 0 on success, 1 on error.
961 */
962int
963prison_getip6(struct ucred *cred, struct in6_addr *ia6)
964{
965
966	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
967	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
968
969	if (!jailed(cred))
970		return (0);
971	if (cred->cr_prison->pr_ip6 == NULL)
972		return (1);
973	bcopy(&cred->cr_prison->pr_ip6[0], ia6, sizeof(struct in6_addr));
974	return (0);
975}
976
977/*
978 * Make sure our (source) address is set to something meaningful to this jail.
979 *
980 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
981 * when needed while binding.
982 *
983 * Returns 0 on success, 1 on error.
984 */
985int
986prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
987{
988
989	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
990	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
991
992	if (!jailed(cred))
993		return (0);
994	if (cred->cr_prison->pr_ip6 == NULL)
995		return (1);
996	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
997		bcopy(&cred->cr_prison->pr_ip6[0], ia6,
998		    sizeof(struct in6_addr));
999		return (0);
1000	}
1001
1002	/*
1003	 * In case there is only 1 IPv6 address, and v6only is true, then
1004	 * bind directly.
1005	 */
1006	if (v6only != 0 && IN6_IS_ADDR_UNSPECIFIED(ia6) &&
1007	    cred->cr_prison->pr_ip6s == 1) {
1008		bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1009		    sizeof(struct in6_addr));
1010		return (0);
1011	}
1012	if (IN6_IS_ADDR_UNSPECIFIED(ia6) || prison_check_ip6(cred, ia6))
1013		return (0);
1014	return (1);
1015}
1016
1017/*
1018 * Rewrite destination address in case we will connect to loopback address.
1019 *
1020 * Returns 0 on success, 1 on error.
1021 */
1022int
1023prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
1024{
1025
1026	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1027	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1028
1029	if (!jailed(cred))
1030		return (0);
1031	if (cred->cr_prison->pr_ip6 == NULL)
1032		return (1);
1033	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1034		bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1035		    sizeof(struct in6_addr));
1036		return (0);
1037	}
1038
1039	/*
1040	 * Return success because nothing had to be changed.
1041	 */
1042	return (0);
1043}
1044
1045/*
1046 * Check if given address belongs to the jail referenced by cred.
1047 *
1048 * Returns 1 if address belongs to jail, 0 if not.
1049 */
1050static int
1051_prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
1052{
1053	int i, a, z, d;
1054
1055	if (pr->pr_ip6 == NULL)
1056		return (0);
1057
1058	/*
1059	 * Check the primary IP.
1060	 */
1061	if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
1062		return (1);
1063
1064	/*
1065	 * All the other IPs are sorted so we can do a binary search.
1066	 */
1067	a = 0;
1068	z = pr->pr_ip6s - 2;
1069	while (a <= z) {
1070		i = (a + z) / 2;
1071		d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
1072		if (d > 0)
1073			z = i - 1;
1074		else if (d < 0)
1075			a = i + 1;
1076		else
1077			return (1);
1078	}
1079	return (0);
1080}
1081
1082int
1083prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
1084{
1085
1086	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1087	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1088
1089	if (!jailed(cred))
1090		return (1);
1091
1092	return (_prison_check_ip6(cred->cr_prison, ia6));
1093}
1094#endif
1095
1096/*
1097 * Check if given address belongs to the jail referenced by cred (wrapper to
1098 * prison_check_ip[46]).
1099 *
1100 * Returns 1 if address belongs to jail, 0 if not.  IPv4 Address passed in in
1101 * NBO.
1102 */
1103int
1104prison_if(struct ucred *cred, struct sockaddr *sa)
1105{
1106#ifdef INET
1107	struct sockaddr_in *sai;
1108#endif
1109#ifdef INET6
1110	struct sockaddr_in6 *sai6;
1111#endif
1112	int ok;
1113
1114	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1115	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
1116
1117	ok = 0;
1118	switch(sa->sa_family)
1119	{
1120#ifdef INET
1121	case AF_INET:
1122		sai = (struct sockaddr_in *)sa;
1123		if (prison_check_ip4(cred, &sai->sin_addr))
1124			ok = 1;
1125		break;
1126
1127#endif
1128#ifdef INET6
1129	case AF_INET6:
1130		sai6 = (struct sockaddr_in6 *)sa;
1131		if (prison_check_ip6(cred, (struct in6_addr *)&sai6->sin6_addr))
1132			ok = 1;
1133		break;
1134
1135#endif
1136	default:
1137		if (!jail_socket_unixiproute_only)
1138			ok = 1;
1139	}
1140	return (ok);
1141}
1142
1143/*
1144 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
1145 */
1146int
1147prison_check(struct ucred *cred1, struct ucred *cred2)
1148{
1149
1150	if (jailed(cred1)) {
1151		if (!jailed(cred2))
1152			return (ESRCH);
1153		if (cred2->cr_prison != cred1->cr_prison)
1154			return (ESRCH);
1155	}
1156
1157	return (0);
1158}
1159
1160/*
1161 * Return 1 if the passed credential is in a jail, otherwise 0.
1162 */
1163int
1164jailed(struct ucred *cred)
1165{
1166
1167	return (cred->cr_prison != NULL);
1168}
1169
1170/*
1171 * Return the correct hostname for the passed credential.
1172 */
1173void
1174getcredhostname(struct ucred *cred, char *buf, size_t size)
1175{
1176	INIT_VPROCG(cred->cr_vimage->v_procg);
1177
1178	if (jailed(cred)) {
1179		mtx_lock(&cred->cr_prison->pr_mtx);
1180		strlcpy(buf, cred->cr_prison->pr_host, size);
1181		mtx_unlock(&cred->cr_prison->pr_mtx);
1182	} else {
1183		mtx_lock(&hostname_mtx);
1184		strlcpy(buf, V_hostname, size);
1185		mtx_unlock(&hostname_mtx);
1186	}
1187}
1188
1189/*
1190 * Determine whether the subject represented by cred can "see"
1191 * status of a mount point.
1192 * Returns: 0 for permitted, ENOENT otherwise.
1193 * XXX: This function should be called cr_canseemount() and should be
1194 *      placed in kern_prot.c.
1195 */
1196int
1197prison_canseemount(struct ucred *cred, struct mount *mp)
1198{
1199	struct prison *pr;
1200	struct statfs *sp;
1201	size_t len;
1202
1203	if (!jailed(cred) || jail_enforce_statfs == 0)
1204		return (0);
1205	pr = cred->cr_prison;
1206	if (pr->pr_root->v_mount == mp)
1207		return (0);
1208	if (jail_enforce_statfs == 2)
1209		return (ENOENT);
1210	/*
1211	 * If jail's chroot directory is set to "/" we should be able to see
1212	 * all mount-points from inside a jail.
1213	 * This is ugly check, but this is the only situation when jail's
1214	 * directory ends with '/'.
1215	 */
1216	if (strcmp(pr->pr_path, "/") == 0)
1217		return (0);
1218	len = strlen(pr->pr_path);
1219	sp = &mp->mnt_stat;
1220	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
1221		return (ENOENT);
1222	/*
1223	 * Be sure that we don't have situation where jail's root directory
1224	 * is "/some/path" and mount point is "/some/pathpath".
1225	 */
1226	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
1227		return (ENOENT);
1228	return (0);
1229}
1230
1231void
1232prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
1233{
1234	char jpath[MAXPATHLEN];
1235	struct prison *pr;
1236	size_t len;
1237
1238	if (!jailed(cred) || jail_enforce_statfs == 0)
1239		return;
1240	pr = cred->cr_prison;
1241	if (prison_canseemount(cred, mp) != 0) {
1242		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1243		strlcpy(sp->f_mntonname, "[restricted]",
1244		    sizeof(sp->f_mntonname));
1245		return;
1246	}
1247	if (pr->pr_root->v_mount == mp) {
1248		/*
1249		 * Clear current buffer data, so we are sure nothing from
1250		 * the valid path left there.
1251		 */
1252		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1253		*sp->f_mntonname = '/';
1254		return;
1255	}
1256	/*
1257	 * If jail's chroot directory is set to "/" we should be able to see
1258	 * all mount-points from inside a jail.
1259	 */
1260	if (strcmp(pr->pr_path, "/") == 0)
1261		return;
1262	len = strlen(pr->pr_path);
1263	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
1264	/*
1265	 * Clear current buffer data, so we are sure nothing from
1266	 * the valid path left there.
1267	 */
1268	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1269	if (*jpath == '\0') {
1270		/* Should never happen. */
1271		*sp->f_mntonname = '/';
1272	} else {
1273		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
1274	}
1275}
1276
1277/*
1278 * Check with permission for a specific privilege is granted within jail.  We
1279 * have a specific list of accepted privileges; the rest are denied.
1280 */
1281int
1282prison_priv_check(struct ucred *cred, int priv)
1283{
1284
1285	if (!jailed(cred))
1286		return (0);
1287
1288	switch (priv) {
1289
1290		/*
1291		 * Allow ktrace privileges for root in jail.
1292		 */
1293	case PRIV_KTRACE:
1294
1295#if 0
1296		/*
1297		 * Allow jailed processes to configure audit identity and
1298		 * submit audit records (login, etc).  In the future we may
1299		 * want to further refine the relationship between audit and
1300		 * jail.
1301		 */
1302	case PRIV_AUDIT_GETAUDIT:
1303	case PRIV_AUDIT_SETAUDIT:
1304	case PRIV_AUDIT_SUBMIT:
1305#endif
1306
1307		/*
1308		 * Allow jailed processes to manipulate process UNIX
1309		 * credentials in any way they see fit.
1310		 */
1311	case PRIV_CRED_SETUID:
1312	case PRIV_CRED_SETEUID:
1313	case PRIV_CRED_SETGID:
1314	case PRIV_CRED_SETEGID:
1315	case PRIV_CRED_SETGROUPS:
1316	case PRIV_CRED_SETREUID:
1317	case PRIV_CRED_SETREGID:
1318	case PRIV_CRED_SETRESUID:
1319	case PRIV_CRED_SETRESGID:
1320
1321		/*
1322		 * Jail implements visibility constraints already, so allow
1323		 * jailed root to override uid/gid-based constraints.
1324		 */
1325	case PRIV_SEEOTHERGIDS:
1326	case PRIV_SEEOTHERUIDS:
1327
1328		/*
1329		 * Jail implements inter-process debugging limits already, so
1330		 * allow jailed root various debugging privileges.
1331		 */
1332	case PRIV_DEBUG_DIFFCRED:
1333	case PRIV_DEBUG_SUGID:
1334	case PRIV_DEBUG_UNPRIV:
1335
1336		/*
1337		 * Allow jail to set various resource limits and login
1338		 * properties, and for now, exceed process resource limits.
1339		 */
1340	case PRIV_PROC_LIMIT:
1341	case PRIV_PROC_SETLOGIN:
1342	case PRIV_PROC_SETRLIMIT:
1343
1344		/*
1345		 * System V and POSIX IPC privileges are granted in jail.
1346		 */
1347	case PRIV_IPC_READ:
1348	case PRIV_IPC_WRITE:
1349	case PRIV_IPC_ADMIN:
1350	case PRIV_IPC_MSGSIZE:
1351	case PRIV_MQ_ADMIN:
1352
1353		/*
1354		 * Jail implements its own inter-process limits, so allow
1355		 * root processes in jail to change scheduling on other
1356		 * processes in the same jail.  Likewise for signalling.
1357		 */
1358	case PRIV_SCHED_DIFFCRED:
1359	case PRIV_SCHED_CPUSET:
1360	case PRIV_SIGNAL_DIFFCRED:
1361	case PRIV_SIGNAL_SUGID:
1362
1363		/*
1364		 * Allow jailed processes to write to sysctls marked as jail
1365		 * writable.
1366		 */
1367	case PRIV_SYSCTL_WRITEJAIL:
1368
1369		/*
1370		 * Allow root in jail to manage a variety of quota
1371		 * properties.  These should likely be conditional on a
1372		 * configuration option.
1373		 */
1374	case PRIV_VFS_GETQUOTA:
1375	case PRIV_VFS_SETQUOTA:
1376
1377		/*
1378		 * Since Jail relies on chroot() to implement file system
1379		 * protections, grant many VFS privileges to root in jail.
1380		 * Be careful to exclude mount-related and NFS-related
1381		 * privileges.
1382		 */
1383	case PRIV_VFS_READ:
1384	case PRIV_VFS_WRITE:
1385	case PRIV_VFS_ADMIN:
1386	case PRIV_VFS_EXEC:
1387	case PRIV_VFS_LOOKUP:
1388	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
1389	case PRIV_VFS_CHFLAGS_DEV:
1390	case PRIV_VFS_CHOWN:
1391	case PRIV_VFS_CHROOT:
1392	case PRIV_VFS_RETAINSUGID:
1393	case PRIV_VFS_FCHROOT:
1394	case PRIV_VFS_LINK:
1395	case PRIV_VFS_SETGID:
1396	case PRIV_VFS_STAT:
1397	case PRIV_VFS_STICKYFILE:
1398		return (0);
1399
1400		/*
1401		 * Depending on the global setting, allow privilege of
1402		 * setting system flags.
1403		 */
1404	case PRIV_VFS_SYSFLAGS:
1405		if (jail_chflags_allowed)
1406			return (0);
1407		else
1408			return (EPERM);
1409
1410		/*
1411		 * Depending on the global setting, allow privilege of
1412		 * mounting/unmounting file systems.
1413		 */
1414	case PRIV_VFS_MOUNT:
1415	case PRIV_VFS_UNMOUNT:
1416	case PRIV_VFS_MOUNT_NONUSER:
1417	case PRIV_VFS_MOUNT_OWNER:
1418		if (jail_mount_allowed)
1419			return (0);
1420		else
1421			return (EPERM);
1422
1423		/*
1424		 * Allow jailed root to bind reserved ports and reuse in-use
1425		 * ports.
1426		 */
1427	case PRIV_NETINET_RESERVEDPORT:
1428	case PRIV_NETINET_REUSEPORT:
1429		return (0);
1430
1431		/*
1432		 * Allow jailed root to set certian IPv4/6 (option) headers.
1433		 */
1434	case PRIV_NETINET_SETHDROPTS:
1435		return (0);
1436
1437		/*
1438		 * Conditionally allow creating raw sockets in jail.
1439		 */
1440	case PRIV_NETINET_RAW:
1441		if (jail_allow_raw_sockets)
1442			return (0);
1443		else
1444			return (EPERM);
1445
1446		/*
1447		 * Since jail implements its own visibility limits on netstat
1448		 * sysctls, allow getcred.  This allows identd to work in
1449		 * jail.
1450		 */
1451	case PRIV_NETINET_GETCRED:
1452		return (0);
1453
1454	default:
1455		/*
1456		 * In all remaining cases, deny the privilege request.  This
1457		 * includes almost all network privileges, many system
1458		 * configuration privileges.
1459		 */
1460		return (EPERM);
1461	}
1462}
1463
1464static int
1465sysctl_jail_list(SYSCTL_HANDLER_ARGS)
1466{
1467	struct xprison *xp, *sxp;
1468	struct prison *pr;
1469	char *p;
1470	size_t len;
1471	int count, error;
1472
1473	if (jailed(req->td->td_ucred))
1474		return (0);
1475
1476	sx_slock(&allprison_lock);
1477	if ((count = prisoncount) == 0) {
1478		sx_sunlock(&allprison_lock);
1479		return (0);
1480	}
1481
1482	len = sizeof(*xp) * count;
1483	LIST_FOREACH(pr, &allprison, pr_list) {
1484#ifdef INET
1485		len += pr->pr_ip4s * sizeof(struct in_addr);
1486#endif
1487#ifdef INET6
1488		len += pr->pr_ip6s * sizeof(struct in6_addr);
1489#endif
1490	}
1491
1492	sxp = xp = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
1493
1494	LIST_FOREACH(pr, &allprison, pr_list) {
1495		xp->pr_version = XPRISON_VERSION;
1496		xp->pr_id = pr->pr_id;
1497		xp->pr_state = pr->pr_state;
1498		xp->pr_cpusetid = pr->pr_cpuset->cs_id;
1499		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
1500		mtx_lock(&pr->pr_mtx);
1501		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
1502		strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name));
1503		mtx_unlock(&pr->pr_mtx);
1504#ifdef INET
1505		xp->pr_ip4s = pr->pr_ip4s;
1506#endif
1507#ifdef INET6
1508		xp->pr_ip6s = pr->pr_ip6s;
1509#endif
1510		p = (char *)(xp + 1);
1511#ifdef INET
1512		if (pr->pr_ip4s > 0) {
1513			bcopy(pr->pr_ip4, (struct in_addr *)p,
1514			    pr->pr_ip4s * sizeof(struct in_addr));
1515			p += (pr->pr_ip4s * sizeof(struct in_addr));
1516		}
1517#endif
1518#ifdef INET6
1519		if (pr->pr_ip6s > 0) {
1520			bcopy(pr->pr_ip6, (struct in6_addr *)p,
1521			    pr->pr_ip6s * sizeof(struct in6_addr));
1522			p += (pr->pr_ip6s * sizeof(struct in6_addr));
1523		}
1524#endif
1525		xp = (struct xprison *)p;
1526	}
1527	sx_sunlock(&allprison_lock);
1528
1529	error = SYSCTL_OUT(req, sxp, len);
1530	free(sxp, M_TEMP);
1531	return (error);
1532}
1533
1534SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
1535    NULL, 0, sysctl_jail_list, "S", "List of active jails");
1536
1537static int
1538sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
1539{
1540	int error, injail;
1541
1542	injail = jailed(req->td->td_ucred);
1543	error = SYSCTL_OUT(req, &injail, sizeof(injail));
1544
1545	return (error);
1546}
1547SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
1548    NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
1549
1550#ifdef DDB
1551DB_SHOW_COMMAND(jails, db_show_jails)
1552{
1553	struct prison *pr;
1554#ifdef INET
1555	struct in_addr ia;
1556#endif
1557#ifdef INET6
1558	char ip6buf[INET6_ADDRSTRLEN];
1559#endif
1560	const char *state;
1561#if defined(INET) || defined(INET6)
1562	int i;
1563#endif
1564
1565	db_printf(
1566	    "   JID  pr_ref  pr_nprocs  pr_ip4s  pr_ip6s\n");
1567	db_printf(
1568	    "        Hostname                      Path\n");
1569	db_printf(
1570	    "        Name                          State\n");
1571	db_printf(
1572	    "        Cpusetid\n");
1573	db_printf(
1574	    "        IP Address(es)\n");
1575	LIST_FOREACH(pr, &allprison, pr_list) {
1576		db_printf("%6d  %6d  %9d  %7d  %7d\n",
1577		    pr->pr_id, pr->pr_ref, pr->pr_nprocs,
1578		    pr->pr_ip4s, pr->pr_ip6s);
1579		db_printf("%6s  %-29.29s %.74s\n",
1580		    "", pr->pr_host, pr->pr_path);
1581		if (pr->pr_state < 0 || pr->pr_state >= (int)((sizeof(
1582		    prison_states) / sizeof(struct prison_state))))
1583			state = "(bogus)";
1584		else
1585			state = prison_states[pr->pr_state].state_name;
1586		db_printf("%6s  %-29.29s %.74s\n",
1587		    "", (pr->pr_name[0] != '\0') ? pr->pr_name : "", state);
1588		db_printf("%6s  %-6d\n",
1589		    "", pr->pr_cpuset->cs_id);
1590#ifdef INET
1591		for (i=0; i < pr->pr_ip4s; i++) {
1592			ia.s_addr = pr->pr_ip4[i].s_addr;
1593			db_printf("%6s  %s\n", "", inet_ntoa(ia));
1594		}
1595#endif
1596#ifdef INET6
1597		for (i=0; i < pr->pr_ip6s; i++)
1598			db_printf("%6s  %s\n",
1599			    "", ip6_sprintf(ip6buf, &pr->pr_ip6[i]));
1600#endif /* INET6 */
1601		if (db_pager_quit)
1602			break;
1603	}
1604}
1605#endif /* DDB */
1606