kern_jail.c revision 196019
1139804Simp/*-
2185435Sbz * Copyright (c) 1999 Poul-Henning Kamp.
3185435Sbz * Copyright (c) 2008 Bjoern A. Zeeb.
4191673Sjamie * Copyright (c) 2009 James Gritton.
5185435Sbz * All rights reserved.
6190466Sjamie *
7185404Sbz * Redistribution and use in source and binary forms, with or without
8185404Sbz * modification, are permitted provided that the following conditions
9185404Sbz * are met:
10185404Sbz * 1. Redistributions of source code must retain the above copyright
11185404Sbz *    notice, this list of conditions and the following disclaimer.
12185404Sbz * 2. Redistributions in binary form must reproduce the above copyright
13185404Sbz *    notice, this list of conditions and the following disclaimer in the
14185404Sbz *    documentation and/or other materials provided with the distribution.
15185404Sbz *
16185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17185404Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18185404Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19185404Sbz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20185404Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21185404Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22185404Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23185404Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24185404Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25185404Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26185404Sbz * SUCH DAMAGE.
2746197Sphk */
2846155Sphk
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 196019 2009-08-01 19:26:27Z rwatson $");
31116182Sobrien
32193066Sjamie#include "opt_compat.h"
33185435Sbz#include "opt_ddb.h"
34185435Sbz#include "opt_inet.h"
35185435Sbz#include "opt_inet6.h"
36131177Spjd
3746155Sphk#include <sys/param.h>
3846155Sphk#include <sys/types.h>
3946155Sphk#include <sys/kernel.h>
4046155Sphk#include <sys/systm.h>
4146155Sphk#include <sys/errno.h>
4246155Sphk#include <sys/sysproto.h>
4346155Sphk#include <sys/malloc.h>
44192895Sjamie#include <sys/osd.h>
45164032Srwatson#include <sys/priv.h>
4646155Sphk#include <sys/proc.h>
47124882Srwatson#include <sys/taskqueue.h>
48177785Skib#include <sys/fcntl.h>
4946155Sphk#include <sys/jail.h>
5087275Srwatson#include <sys/lock.h>
5187275Srwatson#include <sys/mutex.h>
52168401Spjd#include <sys/sx.h>
53193066Sjamie#include <sys/sysent.h>
54113275Smike#include <sys/namei.h>
55147185Spjd#include <sys/mount.h>
56113275Smike#include <sys/queue.h>
5746155Sphk#include <sys/socket.h>
58113275Smike#include <sys/syscallsubr.h>
5957163Srwatson#include <sys/sysctl.h>
60113275Smike#include <sys/vnode.h>
61196019Srwatson
6246155Sphk#include <net/if.h>
63196019Srwatson#include <net/vnet.h>
64196019Srwatson
6546155Sphk#include <netinet/in.h>
66196019Srwatson
67185435Sbz#ifdef DDB
68185435Sbz#include <ddb/ddb.h>
69185435Sbz#ifdef INET6
70185435Sbz#include <netinet6/in6_var.h>
71185435Sbz#endif /* INET6 */
72185435Sbz#endif /* DDB */
7346155Sphk
74163606Srwatson#include <security/mac/mac_framework.h>
75163606Srwatson
76195944Sjamie#define	DEFAULT_HOSTUUID	"00000000-0000-0000-0000-000000000000"
77195944Sjamie
7846155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
7946155Sphk
80192895Sjamie/* prison0 describes what is "real" about the system. */
81192895Sjamiestruct prison prison0 = {
82192895Sjamie	.pr_id		= 0,
83192895Sjamie	.pr_name	= "0",
84192895Sjamie	.pr_ref		= 1,
85192895Sjamie	.pr_uref	= 1,
86192895Sjamie	.pr_path	= "/",
87192895Sjamie	.pr_securelevel	= -1,
88194762Sjamie	.pr_childmax	= JAIL_MAX,
89195944Sjamie	.pr_hostuuid	= DEFAULT_HOSTUUID,
90192895Sjamie	.pr_children	= LIST_HEAD_INITIALIZER(&prison0.pr_children),
91193066Sjamie	.pr_flags	= PR_HOST,
92192895Sjamie	.pr_allow	= PR_ALLOW_ALL,
93192895Sjamie};
94192895SjamieMTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
9557163Srwatson
96192895Sjamie/* allprison and lastprid are protected by allprison_lock. */
97168401Spjdstruct	sx allprison_lock;
98191673SjamieSX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
99191673Sjamiestruct	prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
100179881Sdelphijint	lastprid = 0;
101113275Smike
102191673Sjamiestatic int do_jail_attach(struct thread *td, struct prison *pr);
103190466Sjamiestatic void prison_complete(void *context, int pending);
104191673Sjamiestatic void prison_deref(struct prison *pr, int flags);
105192895Sjamiestatic char *prison_path(struct prison *pr1, struct prison *pr2);
106192895Sjamiestatic void prison_remove_one(struct prison *pr);
107185435Sbz#ifdef INET
108190466Sjamiestatic int _prison_check_ip4(struct prison *pr, struct in_addr *ia);
109192895Sjamiestatic int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4);
110185435Sbz#endif
111185435Sbz#ifdef INET6
112190466Sjamiestatic int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
113192895Sjamiestatic int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6);
114185435Sbz#endif
115113275Smike
116191673Sjamie/* Flags for prison_deref */
117191673Sjamie#define	PD_DEREF	0x01
118191673Sjamie#define	PD_DEUREF	0x02
119191673Sjamie#define	PD_LOCKED	0x04
120191673Sjamie#define	PD_LIST_SLOCKED	0x08
121191673Sjamie#define	PD_LIST_XLOCKED	0x10
122113275Smike
123192895Sjamie/*
124192895Sjamie * Parameter names corresponding to PR_* flag values
125192895Sjamie */
126192895Sjamiestatic char *pr_flag_names[] = {
127192895Sjamie	[0] = "persist",
128192895Sjamie};
129192895Sjamie
130192895Sjamiestatic char *pr_flag_nonames[] = {
131192895Sjamie	[0] = "nopersist",
132195870Sjamie};
133195870Sjamie
134195870Sjamiestruct jailsys_flags {
135195870Sjamie	const char	*name;
136195870Sjamie	unsigned	 disable;
137195870Sjamie	unsigned	 new;
138195870Sjamie} pr_flag_jailsys[] = {
139195870Sjamie	{ "host", 0, PR_HOST },
140195870Sjamie#ifdef VIMAGE
141195870Sjamie	{ "vnet", 0, PR_VNET },
142195870Sjamie#endif
143192895Sjamie#ifdef INET
144195870Sjamie	{ "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER },
145192895Sjamie#endif
146192895Sjamie#ifdef INET6
147195870Sjamie	{ "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER },
148192895Sjamie#endif
149192895Sjamie};
150192895Sjamie
151192895Sjamiestatic char *pr_allow_names[] = {
152192895Sjamie	"allow.set_hostname",
153192895Sjamie	"allow.sysvipc",
154192895Sjamie	"allow.raw_sockets",
155192895Sjamie	"allow.chflags",
156192895Sjamie	"allow.mount",
157192895Sjamie	"allow.quotas",
158192895Sjamie	"allow.socket_af",
159192895Sjamie};
160192895Sjamie
161192895Sjamiestatic char *pr_allow_nonames[] = {
162192895Sjamie	"allow.noset_hostname",
163192895Sjamie	"allow.nosysvipc",
164192895Sjamie	"allow.noraw_sockets",
165192895Sjamie	"allow.nochflags",
166192895Sjamie	"allow.nomount",
167192895Sjamie	"allow.noquotas",
168192895Sjamie	"allow.nosocket_af",
169192895Sjamie};
170192895Sjamie
171196002Sjamie#define	JAIL_DEFAULT_ALLOW		PR_ALLOW_SET_HOSTNAME
172196002Sjamie#define	JAIL_DEFAULT_ENFORCE_STATFS	2
173192895Sjamiestatic unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
174196002Sjamiestatic int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
175192895Sjamie#if defined(INET) || defined(INET6)
176193865Sjamiestatic unsigned jail_max_af_ips = 255;
177192895Sjamie#endif
178192895Sjamie
179192895Sjamie#ifdef INET
180185435Sbzstatic int
181185435Sbzqcmp_v4(const void *ip1, const void *ip2)
182185435Sbz{
183185435Sbz	in_addr_t iaa, iab;
184185435Sbz
185185435Sbz	/*
186185435Sbz	 * We need to compare in HBO here to get the list sorted as expected
187185435Sbz	 * by the result of the code.  Sorting NBO addresses gives you
188185435Sbz	 * interesting results.  If you do not understand, do not try.
189185435Sbz	 */
190185435Sbz	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
191185435Sbz	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
192185435Sbz
193185435Sbz	/*
194185435Sbz	 * Do not simply return the difference of the two numbers, the int is
195185435Sbz	 * not wide enough.
196185435Sbz	 */
197185435Sbz	if (iaa > iab)
198185435Sbz		return (1);
199185435Sbz	else if (iaa < iab)
200185435Sbz		return (-1);
201185435Sbz	else
202185435Sbz		return (0);
203185435Sbz}
204185435Sbz#endif
205185435Sbz
206185435Sbz#ifdef INET6
207185435Sbzstatic int
208185435Sbzqcmp_v6(const void *ip1, const void *ip2)
209185435Sbz{
210185435Sbz	const struct in6_addr *ia6a, *ia6b;
211185435Sbz	int i, rc;
212185435Sbz
213185435Sbz	ia6a = (const struct in6_addr *)ip1;
214185435Sbz	ia6b = (const struct in6_addr *)ip2;
215185435Sbz
216185435Sbz	rc = 0;
217190466Sjamie	for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
218185435Sbz		if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
219185435Sbz			rc = 1;
220185435Sbz		else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
221185435Sbz			rc = -1;
222185435Sbz	}
223185435Sbz	return (rc);
224185435Sbz}
225185435Sbz#endif
226185435Sbz
227191673Sjamie/*
228191673Sjamie * struct jail_args {
229191673Sjamie *	struct jail *jail;
230191673Sjamie * };
231191673Sjamie */
232191673Sjamieint
233191673Sjamiejail(struct thread *td, struct jail_args *uap)
234185435Sbz{
235191673Sjamie	uint32_t version;
236191673Sjamie	int error;
237192895Sjamie	struct jail j;
238185435Sbz
239191673Sjamie	error = copyin(uap->jail, &version, sizeof(uint32_t));
240191673Sjamie	if (error)
241191673Sjamie		return (error);
242185435Sbz
243191673Sjamie	switch (version) {
244191673Sjamie	case 0:
245191673Sjamie	{
246191673Sjamie		struct jail_v0 j0;
247185435Sbz
248192895Sjamie		/* FreeBSD single IPv4 jails. */
249192895Sjamie		bzero(&j, sizeof(struct jail));
250191673Sjamie		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
251191673Sjamie		if (error)
252191673Sjamie			return (error);
253192895Sjamie		j.version = j0.version;
254192895Sjamie		j.path = j0.path;
255192895Sjamie		j.hostname = j0.hostname;
256192895Sjamie		j.ip4s = j0.ip_number;
257191673Sjamie		break;
258191673Sjamie	}
259191673Sjamie
260191673Sjamie	case 1:
261185435Sbz		/*
262191673Sjamie		 * Version 1 was used by multi-IPv4 jail implementations
263191673Sjamie		 * that never made it into the official kernel.
264185435Sbz		 */
265191673Sjamie		return (EINVAL);
266185435Sbz
267191673Sjamie	case 2:	/* JAIL_API_VERSION */
268191673Sjamie		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
269191673Sjamie		error = copyin(uap->jail, &j, sizeof(struct jail));
270191673Sjamie		if (error)
271191673Sjamie			return (error);
272192895Sjamie		break;
273192895Sjamie
274192895Sjamie	default:
275192895Sjamie		/* Sci-Fi jails are not supported, sorry. */
276192895Sjamie		return (EINVAL);
277192895Sjamie	}
278192895Sjamie	return (kern_jail(td, &j));
279192895Sjamie}
280192895Sjamie
281192895Sjamieint
282192895Sjamiekern_jail(struct thread *td, struct jail *j)
283192895Sjamie{
284193865Sjamie	struct iovec optiov[2 * (4
285193865Sjamie			    + sizeof(pr_allow_names) / sizeof(pr_allow_names[0])
286193865Sjamie#ifdef INET
287193865Sjamie			    + 1
288193865Sjamie#endif
289193865Sjamie#ifdef INET6
290193865Sjamie			    + 1
291193865Sjamie#endif
292193865Sjamie			    )];
293192895Sjamie	struct uio opt;
294192895Sjamie	char *u_path, *u_hostname, *u_name;
295185435Sbz#ifdef INET
296193865Sjamie	uint32_t ip4s;
297192895Sjamie	struct in_addr *u_ip4;
298192895Sjamie#endif
299192895Sjamie#ifdef INET6
300192895Sjamie	struct in6_addr *u_ip6;
301192895Sjamie#endif
302192895Sjamie	size_t tmplen;
303192895Sjamie	int error, enforce_statfs, fi;
304192895Sjamie
305192895Sjamie	bzero(&optiov, sizeof(optiov));
306192895Sjamie	opt.uio_iov = optiov;
307192895Sjamie	opt.uio_iovcnt = 0;
308192895Sjamie	opt.uio_offset = -1;
309192895Sjamie	opt.uio_resid = -1;
310192895Sjamie	opt.uio_segflg = UIO_SYSSPACE;
311192895Sjamie	opt.uio_rw = UIO_READ;
312192895Sjamie	opt.uio_td = td;
313192895Sjamie
314192895Sjamie	/* Set permissions for top-level jails from sysctls. */
315192895Sjamie	if (!jailed(td->td_ucred)) {
316192895Sjamie		for (fi = 0; fi < sizeof(pr_allow_names) /
317192895Sjamie		     sizeof(pr_allow_names[0]); fi++) {
318192895Sjamie			optiov[opt.uio_iovcnt].iov_base =
319192895Sjamie			    (jail_default_allow & (1 << fi))
320192895Sjamie			    ? pr_allow_names[fi] : pr_allow_nonames[fi];
321192895Sjamie			optiov[opt.uio_iovcnt].iov_len =
322192895Sjamie			    strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
323192895Sjamie			opt.uio_iovcnt += 2;
324192895Sjamie		}
325192895Sjamie		optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
326192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
327192895Sjamie		opt.uio_iovcnt++;
328192895Sjamie		enforce_statfs = jail_default_enforce_statfs;
329192895Sjamie		optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
330192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
331192895Sjamie		opt.uio_iovcnt++;
332192895Sjamie	}
333192895Sjamie
334192895Sjamie	tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
335192895Sjamie#ifdef INET
336192895Sjamie	ip4s = (j->version == 0) ? 1 : j->ip4s;
337192895Sjamie	if (ip4s > jail_max_af_ips)
338192895Sjamie		return (EINVAL);
339192895Sjamie	tmplen += ip4s * sizeof(struct in_addr);
340191673Sjamie#else
341192895Sjamie	if (j->ip4s > 0)
342192895Sjamie		return (EINVAL);
343191673Sjamie#endif
344191673Sjamie#ifdef INET6
345192895Sjamie	if (j->ip6s > jail_max_af_ips)
346192895Sjamie		return (EINVAL);
347192895Sjamie	tmplen += j->ip6s * sizeof(struct in6_addr);
348191673Sjamie#else
349192895Sjamie	if (j->ip6s > 0)
350192895Sjamie		return (EINVAL);
351191673Sjamie#endif
352192895Sjamie	u_path = malloc(tmplen, M_TEMP, M_WAITOK);
353192895Sjamie	u_hostname = u_path + MAXPATHLEN;
354192895Sjamie	u_name = u_hostname + MAXHOSTNAMELEN;
355191673Sjamie#ifdef INET
356192895Sjamie	u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
357191673Sjamie#endif
358191673Sjamie#ifdef INET6
359191673Sjamie#ifdef INET
360192895Sjamie	u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
361191673Sjamie#else
362192895Sjamie	u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
363191673Sjamie#endif
364191673Sjamie#endif
365192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "path";
366192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("path");
367192895Sjamie	opt.uio_iovcnt++;
368192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_path;
369192895Sjamie	error = copyinstr(j->path, u_path, MAXPATHLEN,
370192895Sjamie	    &optiov[opt.uio_iovcnt].iov_len);
371192895Sjamie	if (error) {
372192895Sjamie		free(u_path, M_TEMP);
373192895Sjamie		return (error);
374192895Sjamie	}
375192895Sjamie	opt.uio_iovcnt++;
376192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "host.hostname";
377192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
378192895Sjamie	opt.uio_iovcnt++;
379192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_hostname;
380192895Sjamie	error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
381192895Sjamie	    &optiov[opt.uio_iovcnt].iov_len);
382192895Sjamie	if (error) {
383192895Sjamie		free(u_path, M_TEMP);
384192895Sjamie		return (error);
385192895Sjamie	}
386192895Sjamie	opt.uio_iovcnt++;
387192895Sjamie	if (j->jailname != NULL) {
388192895Sjamie		optiov[opt.uio_iovcnt].iov_base = "name";
389192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof("name");
390192895Sjamie		opt.uio_iovcnt++;
391192895Sjamie		optiov[opt.uio_iovcnt].iov_base = u_name;
392192895Sjamie		error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
393192895Sjamie		    &optiov[opt.uio_iovcnt].iov_len);
394191673Sjamie		if (error) {
395191673Sjamie			free(u_path, M_TEMP);
396191673Sjamie			return (error);
397191673Sjamie		}
398192895Sjamie		opt.uio_iovcnt++;
399192895Sjamie	}
400191673Sjamie#ifdef INET
401192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
402192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
403192895Sjamie	opt.uio_iovcnt++;
404192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_ip4;
405192895Sjamie	optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
406192895Sjamie	if (j->version == 0)
407192895Sjamie		u_ip4->s_addr = j->ip4s;
408192895Sjamie	else {
409192895Sjamie		error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
410191673Sjamie		if (error) {
411191673Sjamie			free(u_path, M_TEMP);
412191673Sjamie			return (error);
413191673Sjamie		}
414192895Sjamie	}
415192895Sjamie	opt.uio_iovcnt++;
416185435Sbz#endif
417185435Sbz#ifdef INET6
418192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
419192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
420192895Sjamie	opt.uio_iovcnt++;
421192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_ip6;
422192895Sjamie	optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
423192895Sjamie	error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
424192895Sjamie	if (error) {
425192895Sjamie		free(u_path, M_TEMP);
426192895Sjamie		return (error);
427192895Sjamie	}
428192895Sjamie	opt.uio_iovcnt++;
429185435Sbz#endif
430192895Sjamie	KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]),
431192895Sjamie	    ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
432191673Sjamie	error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
433191673Sjamie	free(u_path, M_TEMP);
434191673Sjamie	return (error);
435185435Sbz}
436185435Sbz
437192895Sjamie
438191673Sjamie/*
439191673Sjamie * struct jail_set_args {
440191673Sjamie *	struct iovec *iovp;
441191673Sjamie *	unsigned int iovcnt;
442191673Sjamie *	int flags;
443191673Sjamie * };
444191673Sjamie */
445191673Sjamieint
446191673Sjamiejail_set(struct thread *td, struct jail_set_args *uap)
447185435Sbz{
448191673Sjamie	struct uio *auio;
449191673Sjamie	int error;
450191673Sjamie
451191673Sjamie	/* Check that we have an even number of iovecs. */
452191673Sjamie	if (uap->iovcnt & 1)
453191673Sjamie		return (EINVAL);
454191673Sjamie
455191673Sjamie	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
456191673Sjamie	if (error)
457191673Sjamie		return (error);
458191673Sjamie	error = kern_jail_set(td, auio, uap->flags);
459191673Sjamie	free(auio, M_IOV);
460191673Sjamie	return (error);
461191673Sjamie}
462191673Sjamie
463191673Sjamieint
464191673Sjamiekern_jail_set(struct thread *td, struct uio *optuio, int flags)
465191673Sjamie{
466191673Sjamie	struct nameidata nd;
467185435Sbz#ifdef INET
468190466Sjamie	struct in_addr *ip4;
469185435Sbz#endif
470185435Sbz#ifdef INET6
471185435Sbz	struct in6_addr *ip6;
472185435Sbz#endif
473191673Sjamie	struct vfsopt *opt;
474191673Sjamie	struct vfsoptlist *opts;
475195945Sjamie	struct prison *pr, *deadpr, *mypr, *ppr, *tpr, *tppr;
476191673Sjamie	struct vnode *root;
477193066Sjamie	char *domain, *errmsg, *host, *name, *p, *path, *uuid;
478192895Sjamie#if defined(INET) || defined(INET6)
479191673Sjamie	void *op;
480192895Sjamie#endif
481193066Sjamie	unsigned long hid;
482192895Sjamie	size_t namelen, onamelen;
483192895Sjamie	int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
484195870Sjamie	int gotchildmax, gotenforce, gothid, gotslevel;
485195870Sjamie	int fi, jid, jsys, len, level;
486194762Sjamie	int childmax, slevel, vfslocked;
487191673Sjamie#if defined(INET) || defined(INET6)
488192895Sjamie	int ii, ij;
489191673Sjamie#endif
490191673Sjamie#ifdef INET
491195974Sjamie	int ip4s, redo_ip4;
492191673Sjamie#endif
493191673Sjamie#ifdef INET6
494195974Sjamie	int ip6s, redo_ip6;
495191673Sjamie#endif
496191673Sjamie	unsigned pr_flags, ch_flags;
497192895Sjamie	unsigned pr_allow, ch_allow, tallow;
498191673Sjamie	char numbuf[12];
499185435Sbz
500191673Sjamie	error = priv_check(td, PRIV_JAIL_SET);
501191673Sjamie	if (!error && (flags & JAIL_ATTACH))
502191673Sjamie		error = priv_check(td, PRIV_JAIL_ATTACH);
503191673Sjamie	if (error)
504191673Sjamie		return (error);
505192895Sjamie	mypr = ppr = td->td_ucred->cr_prison;
506194762Sjamie	if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
507192895Sjamie		return (EPERM);
508191673Sjamie	if (flags & ~JAIL_SET_MASK)
509191673Sjamie		return (EINVAL);
510191673Sjamie
511185435Sbz	/*
512191673Sjamie	 * Check all the parameters before committing to anything.  Not all
513191673Sjamie	 * errors can be caught early, but we may as well try.  Also, this
514191673Sjamie	 * takes care of some expensive stuff (path lookup) before getting
515191673Sjamie	 * the allprison lock.
516185435Sbz	 *
517191673Sjamie	 * XXX Jails are not filesystems, and jail parameters are not mount
518191673Sjamie	 *     options.  But it makes more sense to re-use the vfsopt code
519191673Sjamie	 *     than duplicate it under a different name.
520185435Sbz	 */
521191673Sjamie	error = vfs_buildopts(optuio, &opts);
522191673Sjamie	if (error)
523191673Sjamie		return (error);
524185435Sbz#ifdef INET
525185435Sbz	ip4 = NULL;
526185435Sbz#endif
527185435Sbz#ifdef INET6
528185435Sbz	ip6 = NULL;
529185435Sbz#endif
530191673Sjamie
531191673Sjamie	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
532191673Sjamie	if (error == ENOENT)
533191673Sjamie		jid = 0;
534191673Sjamie	else if (error != 0)
535191673Sjamie		goto done_free;
536191673Sjamie
537191673Sjamie	error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
538191673Sjamie	if (error == ENOENT)
539191673Sjamie		gotslevel = 0;
540191673Sjamie	else if (error != 0)
541191673Sjamie		goto done_free;
542191673Sjamie	else
543191673Sjamie		gotslevel = 1;
544191673Sjamie
545194762Sjamie	error =
546194762Sjamie	    vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
547194762Sjamie	if (error == ENOENT)
548194762Sjamie		gotchildmax = 0;
549194762Sjamie	else if (error != 0)
550194762Sjamie		goto done_free;
551194762Sjamie	else
552194762Sjamie		gotchildmax = 1;
553194762Sjamie
554192895Sjamie	error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
555192895Sjamie	gotenforce = (error == 0);
556192895Sjamie	if (gotenforce) {
557192895Sjamie		if (enforce < 0 || enforce > 2)
558192895Sjamie			return (EINVAL);
559192895Sjamie	} else if (error != ENOENT)
560192895Sjamie		goto done_free;
561192895Sjamie
562191673Sjamie	pr_flags = ch_flags = 0;
563192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
564192895Sjamie	    fi++) {
565192895Sjamie		if (pr_flag_names[fi] == NULL)
566192895Sjamie			continue;
567192895Sjamie		vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
568192895Sjamie		vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
569192895Sjamie	}
570191673Sjamie	ch_flags |= pr_flags;
571195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
572195870Sjamie	    fi++) {
573195870Sjamie		error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
574195870Sjamie		    sizeof(jsys));
575195870Sjamie		if (error == ENOENT)
576195870Sjamie			continue;
577195870Sjamie		if (error != 0)
578195870Sjamie			goto done_free;
579195870Sjamie		switch (jsys) {
580195870Sjamie		case JAIL_SYS_DISABLE:
581195870Sjamie			if (!pr_flag_jailsys[fi].disable) {
582195870Sjamie				error = EINVAL;
583195870Sjamie				goto done_free;
584195870Sjamie			}
585195870Sjamie			pr_flags |= pr_flag_jailsys[fi].disable;
586195870Sjamie			break;
587195870Sjamie		case JAIL_SYS_NEW:
588195870Sjamie			pr_flags |= pr_flag_jailsys[fi].new;
589195870Sjamie			break;
590195870Sjamie		case JAIL_SYS_INHERIT:
591195870Sjamie			break;
592195870Sjamie		default:
593195870Sjamie			error = EINVAL;
594195870Sjamie			goto done_free;
595195870Sjamie		}
596195870Sjamie		ch_flags |=
597195870Sjamie		    pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
598195870Sjamie	}
599191673Sjamie	if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
600191673Sjamie	    && !(pr_flags & PR_PERSIST)) {
601191673Sjamie		error = EINVAL;
602191673Sjamie		vfs_opterror(opts, "new jail must persist or attach");
603191673Sjamie		goto done_errmsg;
604191673Sjamie	}
605194251Sjamie#ifdef VIMAGE
606194251Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
607194251Sjamie		error = EINVAL;
608194251Sjamie		vfs_opterror(opts, "vnet cannot be changed after creation");
609194251Sjamie		goto done_errmsg;
610194251Sjamie	}
611194251Sjamie#endif
612195974Sjamie#ifdef INET
613195974Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
614195974Sjamie		error = EINVAL;
615195974Sjamie		vfs_opterror(opts, "ip4 cannot be changed after creation");
616195974Sjamie		goto done_errmsg;
617195974Sjamie	}
618195974Sjamie#endif
619195974Sjamie#ifdef INET6
620195974Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
621195974Sjamie		error = EINVAL;
622195974Sjamie		vfs_opterror(opts, "ip6 cannot be changed after creation");
623195974Sjamie		goto done_errmsg;
624195974Sjamie	}
625195974Sjamie#endif
626191673Sjamie
627192895Sjamie	pr_allow = ch_allow = 0;
628192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
629192895Sjamie	    fi++) {
630192895Sjamie		vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
631192895Sjamie		vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
632192895Sjamie	}
633192895Sjamie	ch_allow |= pr_allow;
634192895Sjamie
635191673Sjamie	error = vfs_getopt(opts, "name", (void **)&name, &len);
636191673Sjamie	if (error == ENOENT)
637191673Sjamie		name = NULL;
638191673Sjamie	else if (error != 0)
639191673Sjamie		goto done_free;
640191673Sjamie	else {
641191673Sjamie		if (len == 0 || name[len - 1] != '\0') {
642191673Sjamie			error = EINVAL;
643191673Sjamie			goto done_free;
644191673Sjamie		}
645191673Sjamie		if (len > MAXHOSTNAMELEN) {
646191673Sjamie			error = ENAMETOOLONG;
647191673Sjamie			goto done_free;
648191673Sjamie		}
649191673Sjamie	}
650191673Sjamie
651191673Sjamie	error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
652191673Sjamie	if (error == ENOENT)
653191673Sjamie		host = NULL;
654191673Sjamie	else if (error != 0)
655191673Sjamie		goto done_free;
656191673Sjamie	else {
657193066Sjamie		ch_flags |= PR_HOST;
658193066Sjamie		pr_flags |= PR_HOST;
659191673Sjamie		if (len == 0 || host[len - 1] != '\0') {
660191673Sjamie			error = EINVAL;
661191673Sjamie			goto done_free;
662191673Sjamie		}
663191673Sjamie		if (len > MAXHOSTNAMELEN) {
664191673Sjamie			error = ENAMETOOLONG;
665191673Sjamie			goto done_free;
666191673Sjamie		}
667191673Sjamie	}
668191673Sjamie
669193066Sjamie	error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
670193066Sjamie	if (error == ENOENT)
671193066Sjamie		domain = NULL;
672193066Sjamie	else if (error != 0)
673193066Sjamie		goto done_free;
674193066Sjamie	else {
675193066Sjamie		ch_flags |= PR_HOST;
676193066Sjamie		pr_flags |= PR_HOST;
677193066Sjamie		if (len == 0 || domain[len - 1] != '\0') {
678193066Sjamie			error = EINVAL;
679193066Sjamie			goto done_free;
680193066Sjamie		}
681193066Sjamie		if (len > MAXHOSTNAMELEN) {
682193066Sjamie			error = ENAMETOOLONG;
683193066Sjamie			goto done_free;
684193066Sjamie		}
685193066Sjamie	}
686193066Sjamie
687193066Sjamie	error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
688193066Sjamie	if (error == ENOENT)
689193066Sjamie		uuid = NULL;
690193066Sjamie	else if (error != 0)
691193066Sjamie		goto done_free;
692193066Sjamie	else {
693193066Sjamie		ch_flags |= PR_HOST;
694193066Sjamie		pr_flags |= PR_HOST;
695193066Sjamie		if (len == 0 || uuid[len - 1] != '\0') {
696193066Sjamie			error = EINVAL;
697193066Sjamie			goto done_free;
698193066Sjamie		}
699193066Sjamie		if (len > HOSTUUIDLEN) {
700193066Sjamie			error = ENAMETOOLONG;
701193066Sjamie			goto done_free;
702193066Sjamie		}
703193066Sjamie	}
704193066Sjamie
705193066Sjamie#ifdef COMPAT_IA32
706193066Sjamie	if (td->td_proc->p_sysent->sv_flags & SV_IA32) {
707193066Sjamie		uint32_t hid32;
708193066Sjamie
709193066Sjamie		error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
710193066Sjamie		hid = hid32;
711193066Sjamie	} else
712193066Sjamie#endif
713193066Sjamie		error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
714193066Sjamie	if (error == ENOENT)
715193066Sjamie		gothid = 0;
716193066Sjamie	else if (error != 0)
717193066Sjamie		goto done_free;
718193066Sjamie	else {
719193066Sjamie		gothid = 1;
720193066Sjamie		ch_flags |= PR_HOST;
721193066Sjamie		pr_flags |= PR_HOST;
722193066Sjamie	}
723193066Sjamie
724185435Sbz#ifdef INET
725191673Sjamie	error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
726191673Sjamie	if (error == ENOENT)
727195870Sjamie		ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1;
728191673Sjamie	else if (error != 0)
729191673Sjamie		goto done_free;
730191673Sjamie	else if (ip4s & (sizeof(*ip4) - 1)) {
731191673Sjamie		error = EINVAL;
732191673Sjamie		goto done_free;
733192895Sjamie	} else {
734195870Sjamie		ch_flags |= PR_IP4_USER | PR_IP4_DISABLE;
735195870Sjamie		if (ip4s == 0)
736195870Sjamie			pr_flags |= PR_IP4_USER | PR_IP4_DISABLE;
737195870Sjamie		else {
738195870Sjamie			pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER;
739192895Sjamie			ip4s /= sizeof(*ip4);
740192895Sjamie			if (ip4s > jail_max_af_ips) {
741185435Sbz				error = EINVAL;
742192895Sjamie				vfs_opterror(opts, "too many IPv4 addresses");
743192895Sjamie				goto done_errmsg;
744185435Sbz			}
745195974Sjamie			ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
746192895Sjamie			bcopy(op, ip4, ip4s * sizeof(*ip4));
747192895Sjamie			/*
748192895Sjamie			 * IP addresses are all sorted but ip[0] to preserve
749192895Sjamie			 * the primary IP address as given from userland.
750192895Sjamie			 * This special IP is used for unbound outgoing
751192895Sjamie			 * connections as well for "loopback" traffic.
752192895Sjamie			 */
753192895Sjamie			if (ip4s > 1)
754192895Sjamie				qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4);
755192895Sjamie			/*
756192895Sjamie			 * Check for duplicate addresses and do some simple
757192895Sjamie			 * zero and broadcast checks. If users give other bogus
758192895Sjamie			 * addresses it is their problem.
759192895Sjamie			 *
760192895Sjamie			 * We do not have to care about byte order for these
761192895Sjamie			 * checks so we will do them in NBO.
762192895Sjamie			 */
763192895Sjamie			for (ii = 0; ii < ip4s; ii++) {
764192895Sjamie				if (ip4[ii].s_addr == INADDR_ANY ||
765192895Sjamie				    ip4[ii].s_addr == INADDR_BROADCAST) {
766192895Sjamie					error = EINVAL;
767192895Sjamie					goto done_free;
768192895Sjamie				}
769192895Sjamie				if ((ii+1) < ip4s &&
770192895Sjamie				    (ip4[0].s_addr == ip4[ii+1].s_addr ||
771192895Sjamie				     ip4[ii].s_addr == ip4[ii+1].s_addr)) {
772192895Sjamie					error = EINVAL;
773192895Sjamie					goto done_free;
774192895Sjamie				}
775192895Sjamie			}
776185435Sbz		}
777191673Sjamie	}
778191673Sjamie#endif
779185435Sbz
780185435Sbz#ifdef INET6
781191673Sjamie	error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
782191673Sjamie	if (error == ENOENT)
783195870Sjamie		ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1;
784191673Sjamie	else if (error != 0)
785191673Sjamie		goto done_free;
786191673Sjamie	else if (ip6s & (sizeof(*ip6) - 1)) {
787191673Sjamie		error = EINVAL;
788191673Sjamie		goto done_free;
789192895Sjamie	} else {
790195870Sjamie		ch_flags |= PR_IP6_USER | PR_IP6_DISABLE;
791195870Sjamie		if (ip6s == 0)
792195870Sjamie			pr_flags |= PR_IP6_USER | PR_IP6_DISABLE;
793195870Sjamie		else {
794195870Sjamie			pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER;
795192895Sjamie			ip6s /= sizeof(*ip6);
796192895Sjamie			if (ip6s > jail_max_af_ips) {
797185435Sbz				error = EINVAL;
798192895Sjamie				vfs_opterror(opts, "too many IPv6 addresses");
799192895Sjamie				goto done_errmsg;
800185435Sbz			}
801195974Sjamie			ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
802192895Sjamie			bcopy(op, ip6, ip6s * sizeof(*ip6));
803192895Sjamie			if (ip6s > 1)
804192895Sjamie				qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6);
805192895Sjamie			for (ii = 0; ii < ip6s; ii++) {
806192895Sjamie				if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
807192895Sjamie					error = EINVAL;
808192895Sjamie					goto done_free;
809192895Sjamie				}
810192895Sjamie				if ((ii+1) < ip6s &&
811192895Sjamie				    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
812192895Sjamie				     IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
813192895Sjamie				{
814192895Sjamie					error = EINVAL;
815192895Sjamie					goto done_free;
816192895Sjamie				}
817192895Sjamie			}
818185435Sbz		}
819191673Sjamie	}
820185435Sbz#endif
821185435Sbz
822195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6))
823195945Sjamie	if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
824195945Sjamie		error = EINVAL;
825195945Sjamie		vfs_opterror(opts,
826195945Sjamie		    "vnet jails cannot have IP address restrictions");
827195945Sjamie		goto done_errmsg;
828195945Sjamie	}
829195945Sjamie#endif
830195945Sjamie
831191673Sjamie	root = NULL;
832191673Sjamie	error = vfs_getopt(opts, "path", (void **)&path, &len);
833191673Sjamie	if (error == ENOENT)
834191673Sjamie		path = NULL;
835191673Sjamie	else if (error != 0)
836191673Sjamie		goto done_free;
837191673Sjamie	else {
838191673Sjamie		if (flags & JAIL_UPDATE) {
839191673Sjamie			error = EINVAL;
840191673Sjamie			vfs_opterror(opts,
841191673Sjamie			    "path cannot be changed after creation");
842191673Sjamie			goto done_errmsg;
843191673Sjamie		}
844191673Sjamie		if (len == 0 || path[len - 1] != '\0') {
845191673Sjamie			error = EINVAL;
846191673Sjamie			goto done_free;
847191673Sjamie		}
848191673Sjamie		if (len < 2 || (len == 2 && path[0] == '/'))
849191673Sjamie			path = NULL;
850191673Sjamie		else {
851192895Sjamie			/* Leave room for a real-root full pathname. */
852192895Sjamie			if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
853192895Sjamie			    ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
854192895Sjamie				error = ENAMETOOLONG;
855192895Sjamie				goto done_free;
856192895Sjamie			}
857191673Sjamie			NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE,
858191673Sjamie			    path, td);
859191673Sjamie			error = namei(&nd);
860191673Sjamie			if (error)
861191673Sjamie				goto done_free;
862191673Sjamie			vfslocked = NDHASGIANT(&nd);
863191673Sjamie			root = nd.ni_vp;
864191673Sjamie			NDFREE(&nd, NDF_ONLY_PNBUF);
865191673Sjamie			if (root->v_type != VDIR) {
866191673Sjamie				error = ENOTDIR;
867191673Sjamie				vrele(root);
868191673Sjamie				VFS_UNLOCK_GIANT(vfslocked);
869191673Sjamie				goto done_free;
870191673Sjamie			}
871191673Sjamie			VFS_UNLOCK_GIANT(vfslocked);
872191673Sjamie		}
873191673Sjamie	}
874185435Sbz
875191673Sjamie	/*
876191673Sjamie	 * Grab the allprison lock before letting modules check their
877191673Sjamie	 * parameters.  Once we have it, do not let go so we'll have a
878191673Sjamie	 * consistent view of the OSD list.
879191673Sjamie	 */
880191673Sjamie	sx_xlock(&allprison_lock);
881191673Sjamie	error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
882191673Sjamie	if (error)
883191673Sjamie		goto done_unlock_list;
884185435Sbz
885191673Sjamie	/* By now, all parameters should have been noted. */
886191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
887191673Sjamie		if (!opt->seen && strcmp(opt->name, "errmsg")) {
888191673Sjamie			error = EINVAL;
889191673Sjamie			vfs_opterror(opts, "unknown parameter: %s", opt->name);
890191673Sjamie			goto done_unlock_list;
891191673Sjamie		}
892191673Sjamie	}
893191673Sjamie
894185435Sbz	/*
895191673Sjamie	 * See if we are creating a new record or updating an existing one.
896191673Sjamie	 * This abuses the file error codes ENOENT and EEXIST.
897185435Sbz	 */
898191673Sjamie	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
899191673Sjamie	if (!cuflags) {
900191673Sjamie		error = EINVAL;
901191673Sjamie		vfs_opterror(opts, "no valid operation (create or update)");
902191673Sjamie		goto done_unlock_list;
903191673Sjamie	}
904191673Sjamie	pr = NULL;
905191673Sjamie	if (jid != 0) {
906192895Sjamie		/*
907192895Sjamie		 * See if a requested jid already exists.  There is an
908192895Sjamie		 * information leak here if the jid exists but is not within
909192895Sjamie		 * the caller's jail hierarchy.  Jail creators will get EEXIST
910192895Sjamie		 * even though they cannot see the jail, and CREATE | UPDATE
911192895Sjamie		 * will return ENOENT which is not normally a valid error.
912192895Sjamie		 */
913191673Sjamie		if (jid < 0) {
914191673Sjamie			error = EINVAL;
915191673Sjamie			vfs_opterror(opts, "negative jid");
916191673Sjamie			goto done_unlock_list;
917191673Sjamie		}
918191673Sjamie		pr = prison_find(jid);
919191673Sjamie		if (pr != NULL) {
920192895Sjamie			ppr = pr->pr_parent;
921191673Sjamie			/* Create: jid must not exist. */
922191673Sjamie			if (cuflags == JAIL_CREATE) {
923191673Sjamie				mtx_unlock(&pr->pr_mtx);
924191673Sjamie				error = EEXIST;
925191673Sjamie				vfs_opterror(opts, "jail %d already exists",
926191673Sjamie				    jid);
927191673Sjamie				goto done_unlock_list;
928191673Sjamie			}
929192895Sjamie			if (!prison_ischild(mypr, pr)) {
930192895Sjamie				mtx_unlock(&pr->pr_mtx);
931192895Sjamie				pr = NULL;
932192895Sjamie			} else if (pr->pr_uref == 0) {
933191673Sjamie				if (!(flags & JAIL_DYING)) {
934191673Sjamie					mtx_unlock(&pr->pr_mtx);
935191673Sjamie					error = ENOENT;
936191673Sjamie					vfs_opterror(opts, "jail %d is dying",
937191673Sjamie					    jid);
938191673Sjamie					goto done_unlock_list;
939191673Sjamie				} else if ((flags & JAIL_ATTACH) ||
940191673Sjamie				    (pr_flags & PR_PERSIST)) {
941191673Sjamie					/*
942191673Sjamie					 * A dying jail might be resurrected
943191673Sjamie					 * (via attach or persist), but first
944191673Sjamie					 * it must determine if another jail
945191673Sjamie					 * has claimed its name.  Accomplish
946191673Sjamie					 * this by implicitly re-setting the
947191673Sjamie					 * name.
948191673Sjamie					 */
949191673Sjamie					if (name == NULL)
950192895Sjamie						name = prison_name(mypr, pr);
951191673Sjamie				}
952191673Sjamie			}
953191673Sjamie		}
954191673Sjamie		if (pr == NULL) {
955191673Sjamie			/* Update: jid must exist. */
956191673Sjamie			if (cuflags == JAIL_UPDATE) {
957191673Sjamie				error = ENOENT;
958191673Sjamie				vfs_opterror(opts, "jail %d not found", jid);
959191673Sjamie				goto done_unlock_list;
960191673Sjamie			}
961191673Sjamie		}
962191673Sjamie	}
963191673Sjamie	/*
964191673Sjamie	 * If the caller provided a name, look for a jail by that name.
965191673Sjamie	 * This has different semantics for creates and updates keyed by jid
966191673Sjamie	 * (where the name must not already exist in a different jail),
967191673Sjamie	 * and updates keyed by the name itself (where the name must exist
968191673Sjamie	 * because that is the jail being updated).
969191673Sjamie	 */
970191673Sjamie	if (name != NULL) {
971192895Sjamie		p = strrchr(name, '.');
972192895Sjamie		if (p != NULL) {
973192895Sjamie			/*
974192895Sjamie			 * This is a hierarchical name.  Split it into the
975192895Sjamie			 * parent and child names, and make sure the parent
976192895Sjamie			 * exists or matches an already found jail.
977192895Sjamie			 */
978192895Sjamie			*p = '\0';
979192895Sjamie			if (pr != NULL) {
980192895Sjamie				if (strncmp(name, ppr->pr_name, p - name) ||
981192895Sjamie				    ppr->pr_name[p - name] != '\0') {
982192895Sjamie					mtx_unlock(&pr->pr_mtx);
983192895Sjamie					error = EINVAL;
984192895Sjamie					vfs_opterror(opts,
985192895Sjamie					    "cannot change jail's parent");
986192895Sjamie					goto done_unlock_list;
987192895Sjamie				}
988192895Sjamie			} else {
989192895Sjamie				ppr = prison_find_name(mypr, name);
990192895Sjamie				if (ppr == NULL) {
991192895Sjamie					error = ENOENT;
992192895Sjamie					vfs_opterror(opts,
993192895Sjamie					    "jail \"%s\" not found", name);
994192895Sjamie					goto done_unlock_list;
995192895Sjamie				}
996192895Sjamie				mtx_unlock(&ppr->pr_mtx);
997192895Sjamie			}
998192895Sjamie			name = p + 1;
999192895Sjamie		}
1000191673Sjamie		if (name[0] != '\0') {
1001192895Sjamie			namelen =
1002192895Sjamie			    (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
1003192895Sjamie name_again:
1004191673Sjamie			deadpr = NULL;
1005192895Sjamie			FOREACH_PRISON_CHILD(ppr, tpr) {
1006191673Sjamie				if (tpr != pr && tpr->pr_ref > 0 &&
1007192895Sjamie				    !strcmp(tpr->pr_name + namelen, name)) {
1008191673Sjamie					if (pr == NULL &&
1009191673Sjamie					    cuflags != JAIL_CREATE) {
1010191673Sjamie						mtx_lock(&tpr->pr_mtx);
1011191673Sjamie						if (tpr->pr_ref > 0) {
1012191673Sjamie							/*
1013191673Sjamie							 * Use this jail
1014191673Sjamie							 * for updates.
1015191673Sjamie							 */
1016191673Sjamie							if (tpr->pr_uref > 0) {
1017191673Sjamie								pr = tpr;
1018191673Sjamie								break;
1019191673Sjamie							}
1020191673Sjamie							deadpr = tpr;
1021191673Sjamie						}
1022191673Sjamie						mtx_unlock(&tpr->pr_mtx);
1023191673Sjamie					} else if (tpr->pr_uref > 0) {
1024191673Sjamie						/*
1025191673Sjamie						 * Create, or update(jid):
1026191673Sjamie						 * name must not exist in an
1027192895Sjamie						 * active sibling jail.
1028191673Sjamie						 */
1029191673Sjamie						error = EEXIST;
1030191673Sjamie						if (pr != NULL)
1031191673Sjamie							mtx_unlock(&pr->pr_mtx);
1032191673Sjamie						vfs_opterror(opts,
1033191673Sjamie						   "jail \"%s\" already exists",
1034191673Sjamie						   name);
1035191673Sjamie						goto done_unlock_list;
1036191673Sjamie					}
1037191673Sjamie				}
1038191673Sjamie			}
1039191673Sjamie			/* If no active jail is found, use a dying one. */
1040191673Sjamie			if (deadpr != NULL && pr == NULL) {
1041191673Sjamie				if (flags & JAIL_DYING) {
1042191673Sjamie					mtx_lock(&deadpr->pr_mtx);
1043191673Sjamie					if (deadpr->pr_ref == 0) {
1044191673Sjamie						mtx_unlock(&deadpr->pr_mtx);
1045191673Sjamie						goto name_again;
1046191673Sjamie					}
1047191673Sjamie					pr = deadpr;
1048191673Sjamie				} else if (cuflags == JAIL_UPDATE) {
1049191673Sjamie					error = ENOENT;
1050191673Sjamie					vfs_opterror(opts,
1051191673Sjamie					    "jail \"%s\" is dying", name);
1052191673Sjamie					goto done_unlock_list;
1053191673Sjamie				}
1054191673Sjamie			}
1055191673Sjamie			/* Update: name must exist if no jid. */
1056191673Sjamie			else if (cuflags == JAIL_UPDATE && pr == NULL) {
1057191673Sjamie				error = ENOENT;
1058191673Sjamie				vfs_opterror(opts, "jail \"%s\" not found",
1059191673Sjamie				    name);
1060191673Sjamie				goto done_unlock_list;
1061191673Sjamie			}
1062191673Sjamie		}
1063191673Sjamie	}
1064191673Sjamie	/* Update: must provide a jid or name. */
1065191673Sjamie	else if (cuflags == JAIL_UPDATE && pr == NULL) {
1066191673Sjamie		error = ENOENT;
1067191673Sjamie		vfs_opterror(opts, "update specified no jail");
1068191673Sjamie		goto done_unlock_list;
1069191673Sjamie	}
1070185435Sbz
1071191673Sjamie	/* If there's no prison to update, create a new one and link it in. */
1072191673Sjamie	if (pr == NULL) {
1073194762Sjamie		for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
1074194762Sjamie			if (tpr->pr_childcount >= tpr->pr_childmax) {
1075194762Sjamie				error = EPERM;
1076194762Sjamie				vfs_opterror(opts, "prison limit exceeded");
1077194762Sjamie				goto done_unlock_list;
1078194762Sjamie			}
1079191673Sjamie		created = 1;
1080192895Sjamie		mtx_lock(&ppr->pr_mtx);
1081192895Sjamie		if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
1082192895Sjamie			mtx_unlock(&ppr->pr_mtx);
1083192895Sjamie			error = ENOENT;
1084192895Sjamie			vfs_opterror(opts, "parent jail went away!");
1085192895Sjamie			goto done_unlock_list;
1086192895Sjamie		}
1087192895Sjamie		ppr->pr_ref++;
1088192895Sjamie		ppr->pr_uref++;
1089192895Sjamie		mtx_unlock(&ppr->pr_mtx);
1090191673Sjamie		pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
1091191673Sjamie		if (jid == 0) {
1092191673Sjamie			/* Find the next free jid. */
1093191673Sjamie			jid = lastprid + 1;
1094191673Sjamie findnext:
1095191673Sjamie			if (jid == JAIL_MAX)
1096191673Sjamie				jid = 1;
1097191673Sjamie			TAILQ_FOREACH(tpr, &allprison, pr_list) {
1098191673Sjamie				if (tpr->pr_id < jid)
1099191673Sjamie					continue;
1100191673Sjamie				if (tpr->pr_id > jid || tpr->pr_ref == 0) {
1101191673Sjamie					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
1102191673Sjamie					break;
1103191673Sjamie				}
1104191673Sjamie				if (jid == lastprid) {
1105191673Sjamie					error = EAGAIN;
1106191673Sjamie					vfs_opterror(opts,
1107191673Sjamie					    "no available jail IDs");
1108191673Sjamie					free(pr, M_PRISON);
1109192895Sjamie					prison_deref(ppr, PD_DEREF |
1110192895Sjamie					    PD_DEUREF | PD_LIST_XLOCKED);
1111192895Sjamie					goto done_releroot;
1112191673Sjamie				}
1113191673Sjamie				jid++;
1114191673Sjamie				goto findnext;
1115191673Sjamie			}
1116191673Sjamie			lastprid = jid;
1117191673Sjamie		} else {
1118191673Sjamie			/*
1119191673Sjamie			 * The jail already has a jid (that did not yet exist),
1120191673Sjamie			 * so just find where to insert it.
1121191673Sjamie			 */
1122191673Sjamie			TAILQ_FOREACH(tpr, &allprison, pr_list)
1123191673Sjamie				if (tpr->pr_id >= jid) {
1124191673Sjamie					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
1125191673Sjamie					break;
1126191673Sjamie				}
1127191673Sjamie		}
1128191673Sjamie		if (tpr == NULL)
1129191673Sjamie			TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
1130192895Sjamie		LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
1131192895Sjamie		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
1132194762Sjamie			tpr->pr_childcount++;
1133185435Sbz
1134192895Sjamie		pr->pr_parent = ppr;
1135191673Sjamie		pr->pr_id = jid;
1136192895Sjamie
1137192895Sjamie		/* Set some default values, and inherit some from the parent. */
1138191673Sjamie		if (name == NULL)
1139191673Sjamie			name = "";
1140191673Sjamie		if (path == NULL) {
1141191673Sjamie			path = "/";
1142192895Sjamie			root = mypr->pr_root;
1143191673Sjamie			vref(root);
1144191673Sjamie		}
1145195944Sjamie		strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
1146195944Sjamie		pr->pr_flags |= PR_HOST;
1147195945Sjamie#if defined(INET) || defined(INET6)
1148195945Sjamie#ifdef VIMAGE
1149195945Sjamie		if (!(pr_flags & PR_VNET))
1150195945Sjamie#endif
1151195945Sjamie		{
1152192895Sjamie#ifdef INET
1153195974Sjamie			if (!(ch_flags & PR_IP4_USER))
1154195974Sjamie				pr->pr_flags |=
1155195974Sjamie				    PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE;
1156195974Sjamie			else if (!(pr_flags & PR_IP4_USER)) {
1157195974Sjamie				pr->pr_flags |= ppr->pr_flags & PR_IP4;
1158195974Sjamie				if (ppr->pr_ip4 != NULL) {
1159195974Sjamie					pr->pr_ip4s = ppr->pr_ip4s;
1160195974Sjamie					pr->pr_ip4 = malloc(pr->pr_ip4s *
1161195974Sjamie					    sizeof(struct in_addr), M_PRISON,
1162195974Sjamie					    M_WAITOK);
1163195974Sjamie					bcopy(ppr->pr_ip4, pr->pr_ip4,
1164195974Sjamie					    pr->pr_ip4s * sizeof(*pr->pr_ip4));
1165195974Sjamie				}
1166195974Sjamie			}
1167192895Sjamie#endif
1168192895Sjamie#ifdef INET6
1169195974Sjamie			if (!(ch_flags & PR_IP6_USER))
1170195974Sjamie				pr->pr_flags |=
1171195974Sjamie				    PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE;
1172195974Sjamie			else if (!(pr_flags & PR_IP6_USER)) {
1173195974Sjamie				pr->pr_flags |= ppr->pr_flags & PR_IP6;
1174195974Sjamie				if (ppr->pr_ip6 != NULL) {
1175195974Sjamie					pr->pr_ip6s = ppr->pr_ip6s;
1176195974Sjamie					pr->pr_ip6 = malloc(pr->pr_ip6s *
1177195974Sjamie					    sizeof(struct in6_addr), M_PRISON,
1178195974Sjamie					    M_WAITOK);
1179195974Sjamie					bcopy(ppr->pr_ip6, pr->pr_ip6,
1180195974Sjamie					    pr->pr_ip6s * sizeof(*pr->pr_ip6));
1181195974Sjamie				}
1182195974Sjamie			}
1183192895Sjamie#endif
1184195945Sjamie		}
1185195945Sjamie#endif
1186192895Sjamie		pr->pr_securelevel = ppr->pr_securelevel;
1187192895Sjamie		pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
1188196002Sjamie		pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
1189191673Sjamie
1190192895Sjamie		LIST_INIT(&pr->pr_children);
1191192895Sjamie		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
1192191673Sjamie
1193194251Sjamie#ifdef VIMAGE
1194194251Sjamie		/* Allocate a new vnet if specified. */
1195194251Sjamie		pr->pr_vnet = (pr_flags & PR_VNET)
1196194251Sjamie		    ? vnet_alloc() : ppr->pr_vnet;
1197194251Sjamie#endif
1198185435Sbz		/*
1199191673Sjamie		 * Allocate a dedicated cpuset for each jail.
1200191673Sjamie		 * Unlike other initial settings, this may return an erorr.
1201185435Sbz		 */
1202192895Sjamie		error = cpuset_create_root(ppr, &pr->pr_cpuset);
1203191673Sjamie		if (error) {
1204191673Sjamie			prison_deref(pr, PD_LIST_XLOCKED);
1205191673Sjamie			goto done_releroot;
1206191673Sjamie		}
1207185435Sbz
1208191673Sjamie		mtx_lock(&pr->pr_mtx);
1209185435Sbz		/*
1210191673Sjamie		 * New prisons do not yet have a reference, because we do not
1211191673Sjamie		 * want other to see the incomplete prison once the
1212191673Sjamie		 * allprison_lock is downgraded.
1213185435Sbz		 */
1214191673Sjamie	} else {
1215191673Sjamie		created = 0;
1216195974Sjamie		/*
1217195974Sjamie		 * Grab a reference for existing prisons, to ensure they
1218195974Sjamie		 * continue to exist for the duration of the call.
1219195974Sjamie		 */
1220195974Sjamie		pr->pr_ref++;
1221195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6))
1222195945Sjamie		if ((pr->pr_flags & PR_VNET) &&
1223195945Sjamie		    (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
1224195945Sjamie			error = EINVAL;
1225195945Sjamie			vfs_opterror(opts,
1226195945Sjamie			    "vnet jails cannot have IP address restrictions");
1227195945Sjamie			goto done_deref_locked;
1228195945Sjamie		}
1229195945Sjamie#endif
1230195974Sjamie#ifdef INET
1231195974Sjamie		if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
1232195974Sjamie			error = EINVAL;
1233195974Sjamie			vfs_opterror(opts,
1234195974Sjamie			    "ip4 cannot be changed after creation");
1235195974Sjamie			goto done_deref_locked;
1236195974Sjamie		}
1237195974Sjamie#endif
1238195974Sjamie#ifdef INET6
1239195974Sjamie		if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
1240195974Sjamie			error = EINVAL;
1241195974Sjamie			vfs_opterror(opts,
1242195974Sjamie			    "ip6 cannot be changed after creation");
1243195974Sjamie			goto done_deref_locked;
1244195974Sjamie		}
1245195974Sjamie#endif
1246191673Sjamie	}
1247185435Sbz
1248191673Sjamie	/* Do final error checking before setting anything. */
1249192895Sjamie	if (gotslevel) {
1250192895Sjamie		if (slevel < ppr->pr_securelevel) {
1251192895Sjamie			error = EPERM;
1252192895Sjamie			goto done_deref_locked;
1253192895Sjamie		}
1254192895Sjamie	}
1255194762Sjamie	if (gotchildmax) {
1256194762Sjamie		if (childmax >= ppr->pr_childmax) {
1257194762Sjamie			error = EPERM;
1258194762Sjamie			goto done_deref_locked;
1259194762Sjamie		}
1260194762Sjamie	}
1261192895Sjamie	if (gotenforce) {
1262192895Sjamie		if (enforce < ppr->pr_enforce_statfs) {
1263192895Sjamie			error = EPERM;
1264192895Sjamie			goto done_deref_locked;
1265192895Sjamie		}
1266192895Sjamie	}
1267185435Sbz#ifdef INET
1268195974Sjamie	if (ip4s > 0) {
1269192895Sjamie		if (ppr->pr_flags & PR_IP4) {
1270195974Sjamie			/*
1271195974Sjamie			 * Make sure the new set of IP addresses is a
1272195974Sjamie			 * subset of the parent's list.  Don't worry
1273195974Sjamie			 * about the parent being unlocked, as any
1274195974Sjamie			 * setting is done with allprison_lock held.
1275195974Sjamie			 */
1276195974Sjamie			for (ij = 0; ij < ppr->pr_ip4s; ij++)
1277195974Sjamie				if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
1278195974Sjamie					break;
1279195974Sjamie			if (ij == ppr->pr_ip4s) {
1280195974Sjamie				error = EPERM;
1281195974Sjamie				goto done_deref_locked;
1282195974Sjamie			}
1283195974Sjamie			if (ip4s > 1) {
1284195974Sjamie				for (ii = ij = 1; ii < ip4s; ii++) {
1285195974Sjamie					if (ip4[ii].s_addr ==
1286195974Sjamie					    ppr->pr_ip4[0].s_addr)
1287195974Sjamie						continue;
1288195974Sjamie					for (; ij < ppr->pr_ip4s; ij++)
1289195974Sjamie						if (ip4[ii].s_addr ==
1290195974Sjamie						    ppr->pr_ip4[ij].s_addr)
1291195974Sjamie							break;
1292195974Sjamie					if (ij == ppr->pr_ip4s)
1293195974Sjamie						break;
1294192895Sjamie				}
1295192895Sjamie				if (ij == ppr->pr_ip4s) {
1296192895Sjamie					error = EPERM;
1297192895Sjamie					goto done_deref_locked;
1298192895Sjamie				}
1299192895Sjamie			}
1300192895Sjamie		}
1301195974Sjamie		/*
1302195974Sjamie		 * Check for conflicting IP addresses.  We permit them
1303195974Sjamie		 * if there is no more than one IP on each jail.  If
1304195974Sjamie		 * there is a duplicate on a jail with more than one
1305195974Sjamie		 * IP stop checking and return error.
1306195974Sjamie		 */
1307195974Sjamie		tppr = ppr;
1308195945Sjamie#ifdef VIMAGE
1309195974Sjamie		for (; tppr != &prison0; tppr = tppr->pr_parent)
1310195974Sjamie			if (tppr->pr_flags & PR_VNET)
1311195974Sjamie				break;
1312195945Sjamie#endif
1313195974Sjamie		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
1314195974Sjamie			if (tpr == pr ||
1315195945Sjamie#ifdef VIMAGE
1316195974Sjamie			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
1317195945Sjamie#endif
1318195974Sjamie			    tpr->pr_uref == 0) {
1319192895Sjamie				descend = 0;
1320195974Sjamie				continue;
1321195974Sjamie			}
1322195974Sjamie			if (!(tpr->pr_flags & PR_IP4_USER))
1323195974Sjamie				continue;
1324195974Sjamie			descend = 0;
1325195974Sjamie			if (tpr->pr_ip4 == NULL ||
1326195974Sjamie			    (ip4s == 1 && tpr->pr_ip4s == 1))
1327195974Sjamie				continue;
1328195974Sjamie			for (ii = 0; ii < ip4s; ii++) {
1329195974Sjamie				if (_prison_check_ip4(tpr, &ip4[ii]) == 0) {
1330195974Sjamie					error = EADDRINUSE;
1331195974Sjamie					vfs_opterror(opts,
1332195974Sjamie					    "IPv4 addresses clash");
1333195974Sjamie					goto done_deref_locked;
1334192895Sjamie				}
1335192895Sjamie			}
1336192895Sjamie		}
1337192895Sjamie	}
1338185435Sbz#endif
1339191673Sjamie#ifdef INET6
1340195974Sjamie	if (ip6s > 0) {
1341192895Sjamie		if (ppr->pr_flags & PR_IP6) {
1342195974Sjamie			/*
1343195974Sjamie			 * Make sure the new set of IP addresses is a
1344195974Sjamie			 * subset of the parent's list.
1345195974Sjamie			 */
1346195974Sjamie			for (ij = 0; ij < ppr->pr_ip6s; ij++)
1347195974Sjamie				if (IN6_ARE_ADDR_EQUAL(&ip6[0],
1348195974Sjamie				    &ppr->pr_ip6[ij]))
1349195974Sjamie					break;
1350195974Sjamie			if (ij == ppr->pr_ip6s) {
1351195974Sjamie				error = EPERM;
1352195974Sjamie				goto done_deref_locked;
1353195974Sjamie			}
1354195974Sjamie			if (ip6s > 1) {
1355195974Sjamie				for (ii = ij = 1; ii < ip6s; ii++) {
1356195974Sjamie					if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
1357195974Sjamie					     &ppr->pr_ip6[0]))
1358195974Sjamie						continue;
1359195974Sjamie					for (; ij < ppr->pr_ip6s; ij++)
1360195974Sjamie						if (IN6_ARE_ADDR_EQUAL(
1361195974Sjamie						    &ip6[ii], &ppr->pr_ip6[ij]))
1362195974Sjamie							break;
1363195974Sjamie					if (ij == ppr->pr_ip6s)
1364195974Sjamie						break;
1365192895Sjamie				}
1366192895Sjamie				if (ij == ppr->pr_ip6s) {
1367192895Sjamie					error = EPERM;
1368192895Sjamie					goto done_deref_locked;
1369192895Sjamie				}
1370192895Sjamie			}
1371192895Sjamie		}
1372195974Sjamie		/* Check for conflicting IP addresses. */
1373195974Sjamie		tppr = ppr;
1374195945Sjamie#ifdef VIMAGE
1375195974Sjamie		for (; tppr != &prison0; tppr = tppr->pr_parent)
1376195974Sjamie			if (tppr->pr_flags & PR_VNET)
1377195974Sjamie				break;
1378195945Sjamie#endif
1379195974Sjamie		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
1380195974Sjamie			if (tpr == pr ||
1381195945Sjamie#ifdef VIMAGE
1382195974Sjamie			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
1383195945Sjamie#endif
1384195974Sjamie			    tpr->pr_uref == 0) {
1385192895Sjamie				descend = 0;
1386195974Sjamie				continue;
1387195974Sjamie			}
1388195974Sjamie			if (!(tpr->pr_flags & PR_IP6_USER))
1389195974Sjamie				continue;
1390195974Sjamie			descend = 0;
1391195974Sjamie			if (tpr->pr_ip6 == NULL ||
1392195974Sjamie			    (ip6s == 1 && tpr->pr_ip6s == 1))
1393195974Sjamie				continue;
1394195974Sjamie			for (ii = 0; ii < ip6s; ii++) {
1395195974Sjamie				if (_prison_check_ip6(tpr, &ip6[ii]) == 0) {
1396195974Sjamie					error = EADDRINUSE;
1397195974Sjamie					vfs_opterror(opts,
1398195974Sjamie					    "IPv6 addresses clash");
1399195974Sjamie					goto done_deref_locked;
1400192895Sjamie				}
1401192895Sjamie			}
1402191673Sjamie		}
1403192895Sjamie	}
1404191673Sjamie#endif
1405192895Sjamie	onamelen = namelen = 0;
1406192895Sjamie	if (name != NULL) {
1407191673Sjamie		/* Give a default name of the jid. */
1408191673Sjamie		if (name[0] == '\0')
1409191673Sjamie			snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
1410191673Sjamie		else if (strtoul(name, &p, 10) != jid && *p == '\0') {
1411191673Sjamie			error = EINVAL;
1412191673Sjamie			vfs_opterror(opts, "name cannot be numeric");
1413192895Sjamie			goto done_deref_locked;
1414191673Sjamie		}
1415191673Sjamie		/*
1416192895Sjamie		 * Make sure the name isn't too long for the prison or its
1417192895Sjamie		 * children.
1418191673Sjamie		 */
1419192895Sjamie		onamelen = strlen(pr->pr_name);
1420192895Sjamie		namelen = strlen(name);
1421192895Sjamie		if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
1422192895Sjamie			error = ENAMETOOLONG;
1423192895Sjamie			goto done_deref_locked;
1424192895Sjamie		}
1425192895Sjamie		FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1426192895Sjamie			if (strlen(tpr->pr_name) + (namelen - onamelen) >=
1427192895Sjamie			    sizeof(pr->pr_name)) {
1428192895Sjamie				error = ENAMETOOLONG;
1429192895Sjamie				goto done_deref_locked;
1430192895Sjamie			}
1431192895Sjamie		}
1432191673Sjamie	}
1433192895Sjamie	if (pr_allow & ~ppr->pr_allow) {
1434192895Sjamie		error = EPERM;
1435192895Sjamie		goto done_deref_locked;
1436192895Sjamie	}
1437185435Sbz
1438191673Sjamie	/* Set the parameters of the prison. */
1439191673Sjamie#ifdef INET
1440192895Sjamie	redo_ip4 = 0;
1441195974Sjamie	if (pr_flags & PR_IP4_USER) {
1442195974Sjamie		pr->pr_flags |= PR_IP4;
1443195974Sjamie		free(pr->pr_ip4, M_PRISON);
1444195974Sjamie		pr->pr_ip4s = ip4s;
1445195974Sjamie		pr->pr_ip4 = ip4;
1446195974Sjamie		ip4 = NULL;
1447192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1448195945Sjamie#ifdef VIMAGE
1449195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1450195945Sjamie				descend = 0;
1451195945Sjamie				continue;
1452195945Sjamie			}
1453195945Sjamie#endif
1454192895Sjamie			if (prison_restrict_ip4(tpr, NULL)) {
1455192895Sjamie				redo_ip4 = 1;
1456192895Sjamie				descend = 0;
1457192895Sjamie			}
1458192895Sjamie		}
1459185435Sbz	}
1460191673Sjamie#endif
1461191673Sjamie#ifdef INET6
1462192895Sjamie	redo_ip6 = 0;
1463195974Sjamie	if (pr_flags & PR_IP6_USER) {
1464195974Sjamie		pr->pr_flags |= PR_IP6;
1465195974Sjamie		free(pr->pr_ip6, M_PRISON);
1466195974Sjamie		pr->pr_ip6s = ip6s;
1467195974Sjamie		pr->pr_ip6 = ip6;
1468195974Sjamie		ip6 = NULL;
1469192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1470195945Sjamie#ifdef VIMAGE
1471195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1472195945Sjamie				descend = 0;
1473195945Sjamie				continue;
1474195945Sjamie			}
1475195945Sjamie#endif
1476192895Sjamie			if (prison_restrict_ip6(tpr, NULL)) {
1477192895Sjamie				redo_ip6 = 1;
1478192895Sjamie				descend = 0;
1479192895Sjamie			}
1480192895Sjamie		}
1481191673Sjamie	}
1482191673Sjamie#endif
1483192895Sjamie	if (gotslevel) {
1484191673Sjamie		pr->pr_securelevel = slevel;
1485192895Sjamie		/* Set all child jails to be at least this level. */
1486192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1487192895Sjamie			if (tpr->pr_securelevel < slevel)
1488192895Sjamie				tpr->pr_securelevel = slevel;
1489192895Sjamie	}
1490194762Sjamie	if (gotchildmax) {
1491194762Sjamie		pr->pr_childmax = childmax;
1492194762Sjamie		/* Set all child jails to under this limit. */
1493194762Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
1494194762Sjamie			if (tpr->pr_childmax > childmax - level)
1495194762Sjamie				tpr->pr_childmax = childmax > level
1496194762Sjamie				    ? childmax - level : 0;
1497194762Sjamie	}
1498192895Sjamie	if (gotenforce) {
1499192895Sjamie		pr->pr_enforce_statfs = enforce;
1500192895Sjamie		/* Pass this restriction on to the children. */
1501192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1502192895Sjamie			if (tpr->pr_enforce_statfs < enforce)
1503192895Sjamie				tpr->pr_enforce_statfs = enforce;
1504192895Sjamie	}
1505192895Sjamie	if (name != NULL) {
1506192895Sjamie		if (ppr == &prison0)
1507192895Sjamie			strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
1508192895Sjamie		else
1509192895Sjamie			snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
1510192895Sjamie			    ppr->pr_name, name);
1511192895Sjamie		/* Change this component of child names. */
1512192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1513192895Sjamie			bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
1514192895Sjamie			    strlen(tpr->pr_name + onamelen) + 1);
1515192895Sjamie			bcopy(pr->pr_name, tpr->pr_name, namelen);
1516192895Sjamie		}
1517192895Sjamie	}
1518191673Sjamie	if (path != NULL) {
1519192895Sjamie		/* Try to keep a real-rooted full pathname. */
1520192895Sjamie		if (path[0] == '/' && strcmp(mypr->pr_path, "/"))
1521192895Sjamie			snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
1522192895Sjamie			    mypr->pr_path, path);
1523192895Sjamie		else
1524192895Sjamie			strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
1525191673Sjamie		pr->pr_root = root;
1526191673Sjamie	}
1527193066Sjamie	if (PR_HOST & ch_flags & ~pr_flags) {
1528193066Sjamie		if (pr->pr_flags & PR_HOST) {
1529193066Sjamie			/*
1530193066Sjamie			 * Copy the parent's host info.  As with pr_ip4 above,
1531193066Sjamie			 * the lack of a lock on the parent is not a problem;
1532193066Sjamie			 * it is always set with allprison_lock at least
1533193066Sjamie			 * shared, and is held exclusively here.
1534193066Sjamie			 */
1535194118Sjamie			strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
1536194118Sjamie			    sizeof(pr->pr_hostname));
1537194118Sjamie			strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
1538194118Sjamie			    sizeof(pr->pr_domainname));
1539194118Sjamie			strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
1540194118Sjamie			    sizeof(pr->pr_hostuuid));
1541193066Sjamie			pr->pr_hostid = pr->pr_parent->pr_hostid;
1542193066Sjamie		}
1543193066Sjamie	} else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
1544193066Sjamie		/* Set this prison, and any descendants without PR_HOST. */
1545193066Sjamie		if (host != NULL)
1546194118Sjamie			strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
1547193066Sjamie		if (domain != NULL)
1548194118Sjamie			strlcpy(pr->pr_domainname, domain,
1549194118Sjamie			    sizeof(pr->pr_domainname));
1550193066Sjamie		if (uuid != NULL)
1551194118Sjamie			strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
1552193066Sjamie		if (gothid)
1553193066Sjamie			pr->pr_hostid = hid;
1554193066Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1555193066Sjamie			if (tpr->pr_flags & PR_HOST)
1556193066Sjamie				descend = 0;
1557193066Sjamie			else {
1558193066Sjamie				if (host != NULL)
1559194118Sjamie					strlcpy(tpr->pr_hostname,
1560194118Sjamie					    pr->pr_hostname,
1561194118Sjamie					    sizeof(tpr->pr_hostname));
1562193066Sjamie				if (domain != NULL)
1563194118Sjamie					strlcpy(tpr->pr_domainname,
1564194118Sjamie					    pr->pr_domainname,
1565194118Sjamie					    sizeof(tpr->pr_domainname));
1566193066Sjamie				if (uuid != NULL)
1567194118Sjamie					strlcpy(tpr->pr_hostuuid,
1568194118Sjamie					    pr->pr_hostuuid,
1569194118Sjamie					    sizeof(tpr->pr_hostuuid));
1570193066Sjamie				if (gothid)
1571193066Sjamie					tpr->pr_hostid = hid;
1572193066Sjamie			}
1573193066Sjamie		}
1574193066Sjamie	}
1575192895Sjamie	if ((tallow = ch_allow & ~pr_allow)) {
1576192895Sjamie		/* Clear allow bits in all children. */
1577192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1578192895Sjamie			tpr->pr_allow &= ~tallow;
1579192895Sjamie	}
1580192895Sjamie	pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
1581191673Sjamie	/*
1582191673Sjamie	 * Persistent prisons get an extra reference, and prisons losing their
1583191673Sjamie	 * persist flag lose that reference.  Only do this for existing prisons
1584191673Sjamie	 * for now, so new ones will remain unseen until after the module
1585191673Sjamie	 * handlers have completed.
1586191673Sjamie	 */
1587191673Sjamie	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
1588191673Sjamie		if (pr_flags & PR_PERSIST) {
1589191673Sjamie			pr->pr_ref++;
1590191673Sjamie			pr->pr_uref++;
1591191673Sjamie		} else {
1592191673Sjamie			pr->pr_ref--;
1593191673Sjamie			pr->pr_uref--;
1594191673Sjamie		}
1595191673Sjamie	}
1596191673Sjamie	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
1597191673Sjamie	mtx_unlock(&pr->pr_mtx);
1598185435Sbz
1599192895Sjamie	/* Locks may have prevented a complete restriction of child IP
1600192895Sjamie	 * addresses.  If so, allocate some more memory and try again.
1601192895Sjamie	 */
1602192895Sjamie#ifdef INET
1603192895Sjamie	while (redo_ip4) {
1604192895Sjamie		ip4s = pr->pr_ip4s;
1605192895Sjamie		ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
1606192895Sjamie		mtx_lock(&pr->pr_mtx);
1607192895Sjamie		redo_ip4 = 0;
1608192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1609195945Sjamie#ifdef VIMAGE
1610195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1611195945Sjamie				descend = 0;
1612195945Sjamie				continue;
1613195945Sjamie			}
1614195945Sjamie#endif
1615192895Sjamie			if (prison_restrict_ip4(tpr, ip4)) {
1616192895Sjamie				if (ip4 != NULL)
1617192895Sjamie					ip4 = NULL;
1618192895Sjamie				else
1619192895Sjamie					redo_ip4 = 1;
1620192895Sjamie			}
1621192895Sjamie		}
1622192895Sjamie		mtx_unlock(&pr->pr_mtx);
1623192895Sjamie	}
1624192895Sjamie#endif
1625192895Sjamie#ifdef INET6
1626192895Sjamie	while (redo_ip6) {
1627192895Sjamie		ip6s = pr->pr_ip6s;
1628192895Sjamie		ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
1629192895Sjamie		mtx_lock(&pr->pr_mtx);
1630192895Sjamie		redo_ip6 = 0;
1631192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1632195945Sjamie#ifdef VIMAGE
1633195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1634195945Sjamie				descend = 0;
1635195945Sjamie				continue;
1636195945Sjamie			}
1637195945Sjamie#endif
1638192895Sjamie			if (prison_restrict_ip6(tpr, ip6)) {
1639192895Sjamie				if (ip6 != NULL)
1640192895Sjamie					ip6 = NULL;
1641192895Sjamie				else
1642192895Sjamie					redo_ip6 = 1;
1643192895Sjamie			}
1644192895Sjamie		}
1645192895Sjamie		mtx_unlock(&pr->pr_mtx);
1646192895Sjamie	}
1647192895Sjamie#endif
1648192895Sjamie
1649191673Sjamie	/* Let the modules do their work. */
1650191673Sjamie	sx_downgrade(&allprison_lock);
1651191673Sjamie	if (created) {
1652191673Sjamie		error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
1653191673Sjamie		if (error) {
1654191673Sjamie			prison_deref(pr, PD_LIST_SLOCKED);
1655191673Sjamie			goto done_errmsg;
1656191673Sjamie		}
1657191673Sjamie	}
1658191673Sjamie	error = osd_jail_call(pr, PR_METHOD_SET, opts);
1659191673Sjamie	if (error) {
1660191673Sjamie		prison_deref(pr, created
1661191673Sjamie		    ? PD_LIST_SLOCKED
1662191673Sjamie		    : PD_DEREF | PD_LIST_SLOCKED);
1663191673Sjamie		goto done_errmsg;
1664191673Sjamie	}
1665191673Sjamie
1666191673Sjamie	/* Attach this process to the prison if requested. */
1667191673Sjamie	if (flags & JAIL_ATTACH) {
1668191673Sjamie		mtx_lock(&pr->pr_mtx);
1669191673Sjamie		error = do_jail_attach(td, pr);
1670191673Sjamie		if (error) {
1671191673Sjamie			vfs_opterror(opts, "attach failed");
1672191673Sjamie			if (!created)
1673191673Sjamie				prison_deref(pr, PD_DEREF);
1674191673Sjamie			goto done_errmsg;
1675191673Sjamie		}
1676191673Sjamie	}
1677191673Sjamie
1678191673Sjamie	/*
1679191673Sjamie	 * Now that it is all there, drop the temporary reference from existing
1680191673Sjamie	 * prisons.  Or add a reference to newly created persistent prisons
1681191673Sjamie	 * (which was not done earlier so that the prison would not be publicly
1682191673Sjamie	 * visible).
1683191673Sjamie	 */
1684191673Sjamie	if (!created) {
1685191673Sjamie		prison_deref(pr, (flags & JAIL_ATTACH)
1686191673Sjamie		    ? PD_DEREF
1687191673Sjamie		    : PD_DEREF | PD_LIST_SLOCKED);
1688191673Sjamie	} else {
1689191673Sjamie		if (pr_flags & PR_PERSIST) {
1690191673Sjamie			mtx_lock(&pr->pr_mtx);
1691191673Sjamie			pr->pr_ref++;
1692191673Sjamie			pr->pr_uref++;
1693191673Sjamie			mtx_unlock(&pr->pr_mtx);
1694191673Sjamie		}
1695191673Sjamie		if (!(flags & JAIL_ATTACH))
1696191673Sjamie			sx_sunlock(&allprison_lock);
1697191673Sjamie	}
1698191673Sjamie	td->td_retval[0] = pr->pr_id;
1699191673Sjamie	goto done_errmsg;
1700191673Sjamie
1701192895Sjamie done_deref_locked:
1702192895Sjamie	prison_deref(pr, created
1703192895Sjamie	    ? PD_LOCKED | PD_LIST_XLOCKED
1704192895Sjamie	    : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
1705192895Sjamie	goto done_releroot;
1706191673Sjamie done_unlock_list:
1707191673Sjamie	sx_xunlock(&allprison_lock);
1708191673Sjamie done_releroot:
1709191673Sjamie	if (root != NULL) {
1710191673Sjamie		vfslocked = VFS_LOCK_GIANT(root->v_mount);
1711191673Sjamie		vrele(root);
1712191673Sjamie		VFS_UNLOCK_GIANT(vfslocked);
1713191673Sjamie	}
1714191673Sjamie done_errmsg:
1715191673Sjamie	if (error) {
1716191673Sjamie		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
1717191673Sjamie		if (errmsg_len > 0) {
1718191673Sjamie			errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
1719191673Sjamie			if (errmsg_pos > 0) {
1720191673Sjamie				if (optuio->uio_segflg == UIO_SYSSPACE)
1721191673Sjamie					bcopy(errmsg,
1722191673Sjamie					   optuio->uio_iov[errmsg_pos].iov_base,
1723191673Sjamie					   errmsg_len);
1724191673Sjamie				else
1725191673Sjamie					copyout(errmsg,
1726191673Sjamie					   optuio->uio_iov[errmsg_pos].iov_base,
1727191673Sjamie					   errmsg_len);
1728191673Sjamie			}
1729191673Sjamie		}
1730191673Sjamie	}
1731191673Sjamie done_free:
1732191673Sjamie#ifdef INET
1733191673Sjamie	free(ip4, M_PRISON);
1734191673Sjamie#endif
1735191673Sjamie#ifdef INET6
1736191673Sjamie	free(ip6, M_PRISON);
1737191673Sjamie#endif
1738191673Sjamie	vfs_freeopts(opts);
1739191673Sjamie	return (error);
1740191673Sjamie}
1741191673Sjamie
1742191673Sjamie
174382710Sdillon/*
1744191673Sjamie * struct jail_get_args {
1745191673Sjamie *	struct iovec *iovp;
1746191673Sjamie *	unsigned int iovcnt;
1747191673Sjamie *	int flags;
1748114168Smike * };
174982710Sdillon */
175046155Sphkint
1751191673Sjamiejail_get(struct thread *td, struct jail_get_args *uap)
175246155Sphk{
1753191673Sjamie	struct uio *auio;
1754185435Sbz	int error;
1755185435Sbz
1756191673Sjamie	/* Check that we have an even number of iovecs. */
1757191673Sjamie	if (uap->iovcnt & 1)
1758191673Sjamie		return (EINVAL);
1759191673Sjamie
1760191673Sjamie	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
1761185435Sbz	if (error)
1762185435Sbz		return (error);
1763191673Sjamie	error = kern_jail_get(td, auio, uap->flags);
1764191673Sjamie	if (error == 0)
1765191673Sjamie		error = copyout(auio->uio_iov, uap->iovp,
1766191673Sjamie		    uap->iovcnt * sizeof (struct iovec));
1767191673Sjamie	free(auio, M_IOV);
1768191673Sjamie	return (error);
1769191673Sjamie}
1770185435Sbz
1771191673Sjamieint
1772191673Sjamiekern_jail_get(struct thread *td, struct uio *optuio, int flags)
1773191673Sjamie{
1774192895Sjamie	struct prison *pr, *mypr;
1775191673Sjamie	struct vfsopt *opt;
1776191673Sjamie	struct vfsoptlist *opts;
1777191673Sjamie	char *errmsg, *name;
1778192895Sjamie	int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
1779185435Sbz
1780191673Sjamie	if (flags & ~JAIL_GET_MASK)
1781191673Sjamie		return (EINVAL);
1782185435Sbz
1783191673Sjamie	/* Get the parameter list. */
1784191673Sjamie	error = vfs_buildopts(optuio, &opts);
1785191673Sjamie	if (error)
1786191673Sjamie		return (error);
1787191673Sjamie	errmsg_pos = vfs_getopt_pos(opts, "errmsg");
1788192895Sjamie	mypr = td->td_ucred->cr_prison;
1789185435Sbz
1790191673Sjamie	/*
1791191673Sjamie	 * Find the prison specified by one of: lastjid, jid, name.
1792191673Sjamie	 */
1793191673Sjamie	sx_slock(&allprison_lock);
1794191673Sjamie	error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
1795191673Sjamie	if (error == 0) {
1796191673Sjamie		TAILQ_FOREACH(pr, &allprison, pr_list) {
1797192895Sjamie			if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
1798191673Sjamie				mtx_lock(&pr->pr_mtx);
1799191673Sjamie				if (pr->pr_ref > 0 &&
1800191673Sjamie				    (pr->pr_uref > 0 || (flags & JAIL_DYING)))
1801191673Sjamie					break;
1802191673Sjamie				mtx_unlock(&pr->pr_mtx);
1803191673Sjamie			}
1804191673Sjamie		}
1805191673Sjamie		if (pr != NULL)
1806191673Sjamie			goto found_prison;
1807191673Sjamie		error = ENOENT;
1808191673Sjamie		vfs_opterror(opts, "no jail after %d", jid);
1809191673Sjamie		goto done_unlock_list;
1810191673Sjamie	} else if (error != ENOENT)
1811191673Sjamie		goto done_unlock_list;
1812185435Sbz
1813191673Sjamie	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
1814191673Sjamie	if (error == 0) {
1815191673Sjamie		if (jid != 0) {
1816192895Sjamie			pr = prison_find_child(mypr, jid);
1817191673Sjamie			if (pr != NULL) {
1818191673Sjamie				if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
1819191673Sjamie					mtx_unlock(&pr->pr_mtx);
1820191673Sjamie					error = ENOENT;
1821191673Sjamie					vfs_opterror(opts, "jail %d is dying",
1822191673Sjamie					    jid);
1823191673Sjamie					goto done_unlock_list;
1824191673Sjamie				}
1825191673Sjamie				goto found_prison;
1826191673Sjamie			}
1827191673Sjamie			error = ENOENT;
1828191673Sjamie			vfs_opterror(opts, "jail %d not found", jid);
1829191673Sjamie			goto done_unlock_list;
1830191673Sjamie		}
1831191673Sjamie	} else if (error != ENOENT)
1832191673Sjamie		goto done_unlock_list;
183346155Sphk
1834191673Sjamie	error = vfs_getopt(opts, "name", (void **)&name, &len);
1835191673Sjamie	if (error == 0) {
1836191673Sjamie		if (len == 0 || name[len - 1] != '\0') {
1837191673Sjamie			error = EINVAL;
1838191673Sjamie			goto done_unlock_list;
1839191673Sjamie		}
1840192895Sjamie		pr = prison_find_name(mypr, name);
1841191673Sjamie		if (pr != NULL) {
1842191673Sjamie			if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
1843191673Sjamie				mtx_unlock(&pr->pr_mtx);
1844191673Sjamie				error = ENOENT;
1845191673Sjamie				vfs_opterror(opts, "jail \"%s\" is dying",
1846191673Sjamie				    name);
1847191673Sjamie				goto done_unlock_list;
1848191673Sjamie			}
1849191673Sjamie			goto found_prison;
1850191673Sjamie		}
1851191673Sjamie		error = ENOENT;
1852191673Sjamie		vfs_opterror(opts, "jail \"%s\" not found", name);
1853191673Sjamie		goto done_unlock_list;
1854191673Sjamie	} else if (error != ENOENT)
1855191673Sjamie		goto done_unlock_list;
1856185435Sbz
1857191673Sjamie	vfs_opterror(opts, "no jail specified");
1858191673Sjamie	error = ENOENT;
1859191673Sjamie	goto done_unlock_list;
1860191673Sjamie
1861191673Sjamie found_prison:
1862191673Sjamie	/* Get the parameters of the prison. */
1863191673Sjamie	pr->pr_ref++;
1864191673Sjamie	locked = PD_LOCKED;
1865191673Sjamie	td->td_retval[0] = pr->pr_id;
1866191673Sjamie	error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
1867191673Sjamie	if (error != 0 && error != ENOENT)
1868191673Sjamie		goto done_deref;
1869192895Sjamie	i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
1870192895Sjamie	error = vfs_setopt(opts, "parent", &i, sizeof(i));
1871191673Sjamie	if (error != 0 && error != ENOENT)
1872191673Sjamie		goto done_deref;
1873192895Sjamie	error = vfs_setopts(opts, "name", prison_name(mypr, pr));
1874192895Sjamie	if (error != 0 && error != ENOENT)
1875192895Sjamie		goto done_deref;
1876192895Sjamie	error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
1877191673Sjamie	    sizeof(pr->pr_cpuset->cs_id));
1878191673Sjamie	if (error != 0 && error != ENOENT)
1879191673Sjamie		goto done_deref;
1880192895Sjamie	error = vfs_setopts(opts, "path", prison_path(mypr, pr));
1881191673Sjamie	if (error != 0 && error != ENOENT)
1882191673Sjamie		goto done_deref;
1883191673Sjamie#ifdef INET
1884191673Sjamie	error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
1885191673Sjamie	    pr->pr_ip4s * sizeof(*pr->pr_ip4));
1886191673Sjamie	if (error != 0 && error != ENOENT)
1887191673Sjamie		goto done_deref;
1888191673Sjamie#endif
1889191673Sjamie#ifdef INET6
1890191673Sjamie	error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
1891191673Sjamie	    pr->pr_ip6s * sizeof(*pr->pr_ip6));
1892191673Sjamie	if (error != 0 && error != ENOENT)
1893191673Sjamie		goto done_deref;
1894191673Sjamie#endif
1895191673Sjamie	error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
1896191673Sjamie	    sizeof(pr->pr_securelevel));
1897191673Sjamie	if (error != 0 && error != ENOENT)
1898191673Sjamie		goto done_deref;
1899194762Sjamie	error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
1900194762Sjamie	    sizeof(pr->pr_childcount));
1901194762Sjamie	if (error != 0 && error != ENOENT)
1902194762Sjamie		goto done_deref;
1903194762Sjamie	error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
1904194762Sjamie	    sizeof(pr->pr_childmax));
1905194762Sjamie	if (error != 0 && error != ENOENT)
1906194762Sjamie		goto done_deref;
1907194118Sjamie	error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
1908191673Sjamie	if (error != 0 && error != ENOENT)
1909191673Sjamie		goto done_deref;
1910194118Sjamie	error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
1911193066Sjamie	if (error != 0 && error != ENOENT)
1912193066Sjamie		goto done_deref;
1913194118Sjamie	error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
1914193066Sjamie	if (error != 0 && error != ENOENT)
1915193066Sjamie		goto done_deref;
1916193066Sjamie#ifdef COMPAT_IA32
1917193066Sjamie	if (td->td_proc->p_sysent->sv_flags & SV_IA32) {
1918193066Sjamie		uint32_t hid32 = pr->pr_hostid;
1919193066Sjamie
1920193066Sjamie		error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
1921193066Sjamie	} else
1922193066Sjamie#endif
1923193066Sjamie	error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
1924193066Sjamie	    sizeof(pr->pr_hostid));
1925193066Sjamie	if (error != 0 && error != ENOENT)
1926193066Sjamie		goto done_deref;
1927192895Sjamie	error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
1928192895Sjamie	    sizeof(pr->pr_enforce_statfs));
1929191673Sjamie	if (error != 0 && error != ENOENT)
1930191673Sjamie		goto done_deref;
1931192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
1932192895Sjamie	    fi++) {
1933192895Sjamie		if (pr_flag_names[fi] == NULL)
1934192895Sjamie			continue;
1935192895Sjamie		i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
1936192895Sjamie		error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
1937192895Sjamie		if (error != 0 && error != ENOENT)
1938192895Sjamie			goto done_deref;
1939192895Sjamie		i = !i;
1940192895Sjamie		error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
1941192895Sjamie		if (error != 0 && error != ENOENT)
1942192895Sjamie			goto done_deref;
1943192895Sjamie	}
1944195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
1945195870Sjamie	    fi++) {
1946195870Sjamie		i = pr->pr_flags &
1947195870Sjamie		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
1948195870Sjamie		i = pr_flag_jailsys[fi].disable &&
1949195870Sjamie		      (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
1950195870Sjamie		    : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
1951195870Sjamie		    : JAIL_SYS_INHERIT;
1952195870Sjamie		error =
1953195870Sjamie		    vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
1954195870Sjamie		if (error != 0 && error != ENOENT)
1955195870Sjamie			goto done_deref;
1956195870Sjamie	}
1957192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
1958192895Sjamie	    fi++) {
1959192895Sjamie		if (pr_allow_names[fi] == NULL)
1960192895Sjamie			continue;
1961192895Sjamie		i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
1962192895Sjamie		error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
1963192895Sjamie		if (error != 0 && error != ENOENT)
1964192895Sjamie			goto done_deref;
1965192895Sjamie		i = !i;
1966192895Sjamie		error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
1967192895Sjamie		if (error != 0 && error != ENOENT)
1968192895Sjamie			goto done_deref;
1969192895Sjamie	}
1970191673Sjamie	i = (pr->pr_uref == 0);
1971191673Sjamie	error = vfs_setopt(opts, "dying", &i, sizeof(i));
1972191673Sjamie	if (error != 0 && error != ENOENT)
1973191673Sjamie		goto done_deref;
1974191673Sjamie	i = !i;
1975191673Sjamie	error = vfs_setopt(opts, "nodying", &i, sizeof(i));
1976191673Sjamie	if (error != 0 && error != ENOENT)
1977191673Sjamie		goto done_deref;
1978191673Sjamie
1979191673Sjamie	/* Get the module parameters. */
1980191673Sjamie	mtx_unlock(&pr->pr_mtx);
1981191673Sjamie	locked = 0;
1982191673Sjamie	error = osd_jail_call(pr, PR_METHOD_GET, opts);
198346155Sphk	if (error)
1984191673Sjamie		goto done_deref;
1985191673Sjamie	prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
198684828Sjhb
1987191673Sjamie	/* By now, all parameters should have been noted. */
1988191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
1989191673Sjamie		if (!opt->seen && strcmp(opt->name, "errmsg")) {
1990191673Sjamie			error = EINVAL;
1991191673Sjamie			vfs_opterror(opts, "unknown parameter: %s", opt->name);
1992191673Sjamie			goto done_errmsg;
1993191673Sjamie		}
1994185435Sbz	}
1995191673Sjamie
1996191673Sjamie	/* Write the fetched parameters back to userspace. */
1997191673Sjamie	error = 0;
1998191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
1999191673Sjamie		if (opt->pos >= 0 && opt->pos != errmsg_pos) {
2000191673Sjamie			pos = 2 * opt->pos + 1;
2001191673Sjamie			optuio->uio_iov[pos].iov_len = opt->len;
2002191673Sjamie			if (opt->value != NULL) {
2003191673Sjamie				if (optuio->uio_segflg == UIO_SYSSPACE) {
2004191673Sjamie					bcopy(opt->value,
2005191673Sjamie					    optuio->uio_iov[pos].iov_base,
2006191673Sjamie					    opt->len);
2007191673Sjamie				} else {
2008191673Sjamie					error = copyout(opt->value,
2009191673Sjamie					    optuio->uio_iov[pos].iov_base,
2010191673Sjamie					    opt->len);
2011191673Sjamie					if (error)
2012191673Sjamie						break;
2013191673Sjamie				}
2014191673Sjamie			}
2015191673Sjamie		}
2016185435Sbz	}
2017191673Sjamie	goto done_errmsg;
2018191673Sjamie
2019191673Sjamie done_deref:
2020191673Sjamie	prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
2021191673Sjamie	goto done_errmsg;
2022191673Sjamie
2023191673Sjamie done_unlock_list:
2024191673Sjamie	sx_sunlock(&allprison_lock);
2025191673Sjamie done_errmsg:
2026191673Sjamie	if (error && errmsg_pos >= 0) {
2027191673Sjamie		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
2028191673Sjamie		errmsg_pos = 2 * errmsg_pos + 1;
2029191673Sjamie		if (errmsg_len > 0) {
2030191673Sjamie			if (optuio->uio_segflg == UIO_SYSSPACE)
2031191673Sjamie				bcopy(errmsg,
2032191673Sjamie				    optuio->uio_iov[errmsg_pos].iov_base,
2033191673Sjamie				    errmsg_len);
2034191673Sjamie			else
2035191673Sjamie				copyout(errmsg,
2036191673Sjamie				    optuio->uio_iov[errmsg_pos].iov_base,
2037191673Sjamie				    errmsg_len);
2038191673Sjamie		}
2039185435Sbz	}
2040191673Sjamie	vfs_freeopts(opts);
2041191673Sjamie	return (error);
2042191673Sjamie}
2043113275Smike
2044192895Sjamie
2045191673Sjamie/*
2046191673Sjamie * struct jail_remove_args {
2047191673Sjamie *	int jid;
2048191673Sjamie * };
2049191673Sjamie */
2050191673Sjamieint
2051191673Sjamiejail_remove(struct thread *td, struct jail_remove_args *uap)
2052191673Sjamie{
2053192895Sjamie	struct prison *pr, *cpr, *lpr, *tpr;
2054192895Sjamie	int descend, error;
2055185435Sbz
2056191673Sjamie	error = priv_check(td, PRIV_JAIL_REMOVE);
2057185435Sbz	if (error)
2058191673Sjamie		return (error);
2059185435Sbz
2060185435Sbz	sx_xlock(&allprison_lock);
2061192895Sjamie	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
2062191673Sjamie	if (pr == NULL) {
2063185435Sbz		sx_xunlock(&allprison_lock);
2064191673Sjamie		return (EINVAL);
2065185435Sbz	}
2066185435Sbz
2067192895Sjamie	/* Remove all descendants of this prison, then remove this prison. */
2068192895Sjamie	pr->pr_ref++;
2069192895Sjamie	pr->pr_flags |= PR_REMOVE;
2070192895Sjamie	if (!LIST_EMPTY(&pr->pr_children)) {
2071192895Sjamie		mtx_unlock(&pr->pr_mtx);
2072192895Sjamie		lpr = NULL;
2073192895Sjamie		FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
2074192895Sjamie			mtx_lock(&cpr->pr_mtx);
2075192895Sjamie			if (cpr->pr_ref > 0) {
2076192895Sjamie				tpr = cpr;
2077192895Sjamie				cpr->pr_ref++;
2078192895Sjamie				cpr->pr_flags |= PR_REMOVE;
2079192895Sjamie			} else {
2080192895Sjamie				/* Already removed - do not do it again. */
2081192895Sjamie				tpr = NULL;
2082192895Sjamie			}
2083192895Sjamie			mtx_unlock(&cpr->pr_mtx);
2084192895Sjamie			if (lpr != NULL) {
2085192895Sjamie				mtx_lock(&lpr->pr_mtx);
2086192895Sjamie				prison_remove_one(lpr);
2087192895Sjamie				sx_xlock(&allprison_lock);
2088192895Sjamie			}
2089192895Sjamie			lpr = tpr;
2090192895Sjamie		}
2091192895Sjamie		if (lpr != NULL) {
2092192895Sjamie			mtx_lock(&lpr->pr_mtx);
2093192895Sjamie			prison_remove_one(lpr);
2094192895Sjamie			sx_xlock(&allprison_lock);
2095192895Sjamie		}
2096192895Sjamie		mtx_lock(&pr->pr_mtx);
2097192895Sjamie	}
2098192895Sjamie	prison_remove_one(pr);
2099192895Sjamie	return (0);
2100192895Sjamie}
2101192895Sjamie
2102192895Sjamiestatic void
2103192895Sjamieprison_remove_one(struct prison *pr)
2104192895Sjamie{
2105192895Sjamie	struct proc *p;
2106192895Sjamie	int deuref;
2107192895Sjamie
2108191673Sjamie	/* If the prison was persistent, it is not anymore. */
2109191673Sjamie	deuref = 0;
2110191673Sjamie	if (pr->pr_flags & PR_PERSIST) {
2111191673Sjamie		pr->pr_ref--;
2112191673Sjamie		deuref = PD_DEUREF;
2113191673Sjamie		pr->pr_flags &= ~PR_PERSIST;
2114179881Sdelphij	}
2115113275Smike
2116192895Sjamie	/*
2117192895Sjamie	 * jail_remove added a reference.  If that's the only one, remove
2118192895Sjamie	 * the prison now.
2119192895Sjamie	 */
2120192895Sjamie	KASSERT(pr->pr_ref > 0,
2121192895Sjamie	    ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
2122192895Sjamie	if (pr->pr_ref == 1) {
2123191673Sjamie		prison_deref(pr,
2124191673Sjamie		    deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
2125192895Sjamie		return;
2126191673Sjamie	}
2127191673Sjamie
2128113275Smike	mtx_unlock(&pr->pr_mtx);
2129191673Sjamie	sx_xunlock(&allprison_lock);
2130191673Sjamie	/*
2131191673Sjamie	 * Kill all processes unfortunate enough to be attached to this prison.
2132191673Sjamie	 */
2133191673Sjamie	sx_slock(&allproc_lock);
2134191673Sjamie	LIST_FOREACH(p, &allproc, p_list) {
2135191673Sjamie		PROC_LOCK(p);
2136191673Sjamie		if (p->p_state != PRS_NEW && p->p_ucred &&
2137191673Sjamie		    p->p_ucred->cr_prison == pr)
2138191673Sjamie			psignal(p, SIGKILL);
2139191673Sjamie		PROC_UNLOCK(p);
2140191673Sjamie	}
2141191673Sjamie	sx_sunlock(&allproc_lock);
2142192895Sjamie	/* Remove the temporary reference added by jail_remove. */
2143191673Sjamie	prison_deref(pr, deuref | PD_DEREF);
2144113275Smike}
2145113275Smike
2146190466Sjamie
2147113275Smike/*
2148114168Smike * struct jail_attach_args {
2149114168Smike *	int jid;
2150114168Smike * };
2151113275Smike */
2152113275Smikeint
2153114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap)
2154113275Smike{
2155113275Smike	struct prison *pr;
2156191673Sjamie	int error;
2157167309Spjd
2158164032Srwatson	error = priv_check(td, PRIV_JAIL_ATTACH);
2159126023Snectar	if (error)
2160126023Snectar		return (error);
2161126023Snectar
2162168401Spjd	sx_slock(&allprison_lock);
2163192895Sjamie	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
2164113275Smike	if (pr == NULL) {
2165168401Spjd		sx_sunlock(&allprison_lock);
2166113275Smike		return (EINVAL);
2167113275Smike	}
2168185435Sbz
2169185435Sbz	/*
2170185435Sbz	 * Do not allow a process to attach to a prison that is not
2171191673Sjamie	 * considered to be "alive".
2172185435Sbz	 */
2173191673Sjamie	if (pr->pr_uref == 0) {
2174185435Sbz		mtx_unlock(&pr->pr_mtx);
2175185435Sbz		sx_sunlock(&allprison_lock);
2176185435Sbz		return (EINVAL);
2177185435Sbz	}
2178191673Sjamie
2179191673Sjamie	return (do_jail_attach(td, pr));
2180191673Sjamie}
2181191673Sjamie
2182191673Sjamiestatic int
2183191673Sjamiedo_jail_attach(struct thread *td, struct prison *pr)
2184191673Sjamie{
2185192895Sjamie	struct prison *ppr;
2186191673Sjamie	struct proc *p;
2187191673Sjamie	struct ucred *newcred, *oldcred;
2188191673Sjamie	int vfslocked, error;
2189191673Sjamie
2190191673Sjamie	/*
2191191673Sjamie	 * XXX: Note that there is a slight race here if two threads
2192191673Sjamie	 * in the same privileged process attempt to attach to two
2193191673Sjamie	 * different jails at the same time.  It is important for
2194191673Sjamie	 * user processes not to do this, or they might end up with
2195191673Sjamie	 * a process root from one prison, but attached to the jail
2196191673Sjamie	 * of another.
2197191673Sjamie	 */
2198113275Smike	pr->pr_ref++;
2199191673Sjamie	pr->pr_uref++;
2200113275Smike	mtx_unlock(&pr->pr_mtx);
2201191673Sjamie
2202191673Sjamie	/* Let modules do whatever they need to prepare for attaching. */
2203191673Sjamie	error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
2204191673Sjamie	if (error) {
2205191673Sjamie		prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
2206191673Sjamie		return (error);
2207191673Sjamie	}
2208168401Spjd	sx_sunlock(&allprison_lock);
2209113275Smike
2210185435Sbz	/*
2211185435Sbz	 * Reparent the newly attached process to this jail.
2212185435Sbz	 */
2213192895Sjamie	ppr = td->td_ucred->cr_prison;
2214191673Sjamie	p = td->td_proc;
2215185435Sbz	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
2216185435Sbz	if (error)
2217191673Sjamie		goto e_revert_osd;
2218185435Sbz
2219150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
2220175202Sattilio	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
2221113275Smike	if ((error = change_dir(pr->pr_root, td)) != 0)
2222113275Smike		goto e_unlock;
2223113275Smike#ifdef MAC
2224172930Srwatson	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
2225113275Smike		goto e_unlock;
2226113275Smike#endif
2227175294Sattilio	VOP_UNLOCK(pr->pr_root, 0);
2228191673Sjamie	if ((error = change_root(pr->pr_root, td)))
2229191673Sjamie		goto e_unlock_giant;
2230150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
2231113275Smike
223284828Sjhb	newcred = crget();
223384828Sjhb	PROC_LOCK(p);
223484828Sjhb	oldcred = p->p_ucred;
2235113275Smike	setsugid(p);
223684828Sjhb	crcopy(newcred, oldcred);
2237113630Sjhb	newcred->cr_prison = pr;
223884828Sjhb	p->p_ucred = newcred;
223984828Sjhb	PROC_UNLOCK(p);
224084828Sjhb	crfree(oldcred);
2241192895Sjamie	prison_deref(ppr, PD_DEREF | PD_DEUREF);
224246155Sphk	return (0);
2243191673Sjamie e_unlock:
2244175294Sattilio	VOP_UNLOCK(pr->pr_root, 0);
2245191673Sjamie e_unlock_giant:
2246150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
2247191673Sjamie e_revert_osd:
2248191673Sjamie	/* Tell modules this thread is still in its old jail after all. */
2249192895Sjamie	(void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
2250191673Sjamie	prison_deref(pr, PD_DEREF | PD_DEUREF);
225146155Sphk	return (error);
225246155Sphk}
225346155Sphk
2254192895Sjamie
2255113275Smike/*
2256113275Smike * Returns a locked prison instance, or NULL on failure.
2257113275Smike */
2258168399Spjdstruct prison *
2259113275Smikeprison_find(int prid)
2260113275Smike{
2261113275Smike	struct prison *pr;
2262113275Smike
2263168401Spjd	sx_assert(&allprison_lock, SX_LOCKED);
2264191673Sjamie	TAILQ_FOREACH(pr, &allprison, pr_list) {
2265113275Smike		if (pr->pr_id == prid) {
2266113275Smike			mtx_lock(&pr->pr_mtx);
2267191673Sjamie			if (pr->pr_ref > 0)
2268191673Sjamie				return (pr);
2269191673Sjamie			mtx_unlock(&pr->pr_mtx);
2270113275Smike		}
2271113275Smike	}
2272113275Smike	return (NULL);
2273113275Smike}
2274113275Smike
2275191673Sjamie/*
2276192895Sjamie * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
2277191673Sjamie */
2278191673Sjamiestruct prison *
2279192895Sjamieprison_find_child(struct prison *mypr, int prid)
2280191673Sjamie{
2281192895Sjamie	struct prison *pr;
2282192895Sjamie	int descend;
2283192895Sjamie
2284192895Sjamie	sx_assert(&allprison_lock, SX_LOCKED);
2285192895Sjamie	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
2286192895Sjamie		if (pr->pr_id == prid) {
2287192895Sjamie			mtx_lock(&pr->pr_mtx);
2288192895Sjamie			if (pr->pr_ref > 0)
2289192895Sjamie				return (pr);
2290192895Sjamie			mtx_unlock(&pr->pr_mtx);
2291192895Sjamie		}
2292192895Sjamie	}
2293192895Sjamie	return (NULL);
2294192895Sjamie}
2295192895Sjamie
2296192895Sjamie/*
2297192895Sjamie * Look for the name relative to mypr.  Returns a locked prison or NULL.
2298192895Sjamie */
2299192895Sjamiestruct prison *
2300192895Sjamieprison_find_name(struct prison *mypr, const char *name)
2301192895Sjamie{
2302191673Sjamie	struct prison *pr, *deadpr;
2303192895Sjamie	size_t mylen;
2304192895Sjamie	int descend;
2305191673Sjamie
2306191673Sjamie	sx_assert(&allprison_lock, SX_LOCKED);
2307192895Sjamie	mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
2308191673Sjamie again:
2309191673Sjamie	deadpr = NULL;
2310192895Sjamie	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
2311192895Sjamie		if (!strcmp(pr->pr_name + mylen, name)) {
2312191673Sjamie			mtx_lock(&pr->pr_mtx);
2313191673Sjamie			if (pr->pr_ref > 0) {
2314191673Sjamie				if (pr->pr_uref > 0)
2315191673Sjamie					return (pr);
2316191673Sjamie				deadpr = pr;
2317191673Sjamie			}
2318191673Sjamie			mtx_unlock(&pr->pr_mtx);
2319191673Sjamie		}
2320191673Sjamie	}
2321192895Sjamie	/* There was no valid prison - perhaps there was a dying one. */
2322191673Sjamie	if (deadpr != NULL) {
2323191673Sjamie		mtx_lock(&deadpr->pr_mtx);
2324191673Sjamie		if (deadpr->pr_ref == 0) {
2325191673Sjamie			mtx_unlock(&deadpr->pr_mtx);
2326191673Sjamie			goto again;
2327191673Sjamie		}
2328191673Sjamie	}
2329191673Sjamie	return (deadpr);
2330191673Sjamie}
2331191673Sjamie
2332191673Sjamie/*
2333192895Sjamie * See if a prison has the specific flag set.
2334192895Sjamie */
2335192895Sjamieint
2336192895Sjamieprison_flag(struct ucred *cred, unsigned flag)
2337192895Sjamie{
2338192895Sjamie
2339192895Sjamie	/* This is an atomic read, so no locking is necessary. */
2340192895Sjamie	return (cred->cr_prison->pr_flags & flag);
2341192895Sjamie}
2342192895Sjamie
2343192895Sjamieint
2344192895Sjamieprison_allow(struct ucred *cred, unsigned flag)
2345192895Sjamie{
2346192895Sjamie
2347192895Sjamie	/* This is an atomic read, so no locking is necessary. */
2348192895Sjamie	return (cred->cr_prison->pr_allow & flag);
2349192895Sjamie}
2350192895Sjamie
2351192895Sjamie/*
2352191673Sjamie * Remove a prison reference.  If that was the last reference, remove the
2353191673Sjamie * prison itself - but not in this context in case there are locks held.
2354191673Sjamie */
235572786Srwatsonvoid
2356185029Spjdprison_free_locked(struct prison *pr)
235772786Srwatson{
235872786Srwatson
2359185029Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
236072786Srwatson	pr->pr_ref--;
236172786Srwatson	if (pr->pr_ref == 0) {
2362168483Spjd		mtx_unlock(&pr->pr_mtx);
2363124882Srwatson		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
2364144660Sjeff		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
236587275Srwatson		return;
236672786Srwatson	}
236787275Srwatson	mtx_unlock(&pr->pr_mtx);
236872786Srwatson}
236972786Srwatson
2370185029Spjdvoid
2371185029Spjdprison_free(struct prison *pr)
2372185029Spjd{
2373185029Spjd
2374185029Spjd	mtx_lock(&pr->pr_mtx);
2375185029Spjd	prison_free_locked(pr);
2376185029Spjd}
2377185029Spjd
2378124882Srwatsonstatic void
2379124882Srwatsonprison_complete(void *context, int pending)
2380124882Srwatson{
2381191673Sjamie
2382191673Sjamie	prison_deref((struct prison *)context, 0);
2383191673Sjamie}
2384191673Sjamie
2385191673Sjamie/*
2386191673Sjamie * Remove a prison reference (usually).  This internal version assumes no
2387191673Sjamie * mutexes are held, except perhaps the prison itself.  If there are no more
2388191673Sjamie * references, release and delist the prison.  On completion, the prison lock
2389191673Sjamie * and the allprison lock are both unlocked.
2390191673Sjamie */
2391191673Sjamiestatic void
2392191673Sjamieprison_deref(struct prison *pr, int flags)
2393191673Sjamie{
2394192895Sjamie	struct prison *ppr, *tpr;
2395150652Scsjp	int vfslocked;
2396124882Srwatson
2397191673Sjamie	if (!(flags & PD_LOCKED))
2398191673Sjamie		mtx_lock(&pr->pr_mtx);
2399192895Sjamie	/* Decrement the user references in a separate loop. */
2400191673Sjamie	if (flags & PD_DEUREF) {
2401192895Sjamie		for (tpr = pr;; tpr = tpr->pr_parent) {
2402192895Sjamie			if (tpr != pr)
2403192895Sjamie				mtx_lock(&tpr->pr_mtx);
2404192895Sjamie			if (--tpr->pr_uref > 0)
2405192895Sjamie				break;
2406192895Sjamie			KASSERT(tpr != &prison0, ("prison0 pr_uref=0"));
2407192895Sjamie			mtx_unlock(&tpr->pr_mtx);
2408192895Sjamie		}
2409191673Sjamie		/* Done if there were only user references to remove. */
2410191673Sjamie		if (!(flags & PD_DEREF)) {
2411192895Sjamie			mtx_unlock(&tpr->pr_mtx);
2412191673Sjamie			if (flags & PD_LIST_SLOCKED)
2413191673Sjamie				sx_sunlock(&allprison_lock);
2414191673Sjamie			else if (flags & PD_LIST_XLOCKED)
2415191673Sjamie				sx_xunlock(&allprison_lock);
2416191673Sjamie			return;
2417191673Sjamie		}
2418192895Sjamie		if (tpr != pr) {
2419192895Sjamie			mtx_unlock(&tpr->pr_mtx);
2420192895Sjamie			mtx_lock(&pr->pr_mtx);
2421192895Sjamie		}
2422191673Sjamie	}
2423124882Srwatson
2424192895Sjamie	for (;;) {
2425192895Sjamie		if (flags & PD_DEREF)
2426192895Sjamie			pr->pr_ref--;
2427192895Sjamie		/* If the prison still has references, nothing else to do. */
2428192895Sjamie		if (pr->pr_ref > 0) {
2429192895Sjamie			mtx_unlock(&pr->pr_mtx);
2430192895Sjamie			if (flags & PD_LIST_SLOCKED)
2431192895Sjamie				sx_sunlock(&allprison_lock);
2432192895Sjamie			else if (flags & PD_LIST_XLOCKED)
2433192895Sjamie				sx_xunlock(&allprison_lock);
2434192895Sjamie			return;
2435191673Sjamie		}
2436191673Sjamie
2437192895Sjamie		mtx_unlock(&pr->pr_mtx);
2438192895Sjamie		if (flags & PD_LIST_SLOCKED) {
2439192895Sjamie			if (!sx_try_upgrade(&allprison_lock)) {
2440192895Sjamie				sx_sunlock(&allprison_lock);
2441192895Sjamie				sx_xlock(&allprison_lock);
2442192895Sjamie			}
2443192895Sjamie		} else if (!(flags & PD_LIST_XLOCKED))
2444192895Sjamie			sx_xlock(&allprison_lock);
2445168489Spjd
2446192895Sjamie		TAILQ_REMOVE(&allprison, pr, pr_list);
2447192895Sjamie		LIST_REMOVE(pr, pr_sibling);
2448192895Sjamie		ppr = pr->pr_parent;
2449192895Sjamie		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
2450194762Sjamie			tpr->pr_childcount--;
2451192895Sjamie		sx_downgrade(&allprison_lock);
2452192895Sjamie
2453194251Sjamie#ifdef VIMAGE
2454194251Sjamie		if (pr->pr_flags & PR_VNET)
2455194251Sjamie			vnet_destroy(pr->pr_vnet);
2456194251Sjamie#endif
2457192895Sjamie		if (pr->pr_root != NULL) {
2458192895Sjamie			vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
2459192895Sjamie			vrele(pr->pr_root);
2460192895Sjamie			VFS_UNLOCK_GIANT(vfslocked);
2461192895Sjamie		}
2462192895Sjamie		mtx_destroy(&pr->pr_mtx);
2463191673Sjamie#ifdef INET
2464192895Sjamie		free(pr->pr_ip4, M_PRISON);
2465191673Sjamie#endif
2466185435Sbz#ifdef INET6
2467192895Sjamie		free(pr->pr_ip6, M_PRISON);
2468185435Sbz#endif
2469192895Sjamie		if (pr->pr_cpuset != NULL)
2470192895Sjamie			cpuset_rel(pr->pr_cpuset);
2471192895Sjamie		osd_jail_exit(pr);
2472192895Sjamie		free(pr, M_PRISON);
2473192895Sjamie
2474192895Sjamie		/* Removing a prison frees a reference on its parent. */
2475192895Sjamie		pr = ppr;
2476192895Sjamie		mtx_lock(&pr->pr_mtx);
2477192895Sjamie		flags = PD_DEREF | PD_LIST_SLOCKED;
2478192895Sjamie	}
2479124882Srwatson}
2480124882Srwatson
248172786Srwatsonvoid
2482185029Spjdprison_hold_locked(struct prison *pr)
248372786Srwatson{
248472786Srwatson
2485185029Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
2486168489Spjd	KASSERT(pr->pr_ref > 0,
2487191671Sjamie	    ("Trying to hold dead prison (jid=%d).", pr->pr_id));
248872786Srwatson	pr->pr_ref++;
2489185029Spjd}
2490185029Spjd
2491185029Spjdvoid
2492185029Spjdprison_hold(struct prison *pr)
2493185029Spjd{
2494185029Spjd
2495185029Spjd	mtx_lock(&pr->pr_mtx);
2496185029Spjd	prison_hold_locked(pr);
249787275Srwatson	mtx_unlock(&pr->pr_mtx);
249872786Srwatson}
249972786Srwatson
2500185435Sbzvoid
2501185435Sbzprison_proc_hold(struct prison *pr)
250287275Srwatson{
250387275Srwatson
2504185435Sbz	mtx_lock(&pr->pr_mtx);
2505191673Sjamie	KASSERT(pr->pr_uref > 0,
2506191673Sjamie	    ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
2507191673Sjamie	pr->pr_uref++;
2508185435Sbz	mtx_unlock(&pr->pr_mtx);
250987275Srwatson}
251087275Srwatson
2511185435Sbzvoid
2512185435Sbzprison_proc_free(struct prison *pr)
2513185435Sbz{
2514185435Sbz
2515185435Sbz	mtx_lock(&pr->pr_mtx);
2516191673Sjamie	KASSERT(pr->pr_uref > 0,
2517191673Sjamie	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
2518191673Sjamie	prison_deref(pr, PD_DEUREF | PD_LOCKED);
2519185435Sbz}
2520185435Sbz
2521185435Sbz
2522185435Sbz#ifdef INET
2523185435Sbz/*
2524192895Sjamie * Restrict a prison's IP address list with its parent's, possibly replacing
2525192895Sjamie * it.  Return true if the replacement buffer was used (or would have been).
2526192895Sjamie */
2527192895Sjamiestatic int
2528192895Sjamieprison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
2529192895Sjamie{
2530192895Sjamie	int ii, ij, used;
2531192895Sjamie	struct prison *ppr;
2532192895Sjamie
2533192895Sjamie	ppr = pr->pr_parent;
2534192895Sjamie	if (!(pr->pr_flags & PR_IP4_USER)) {
2535192895Sjamie		/* This has no user settings, so just copy the parent's list. */
2536192895Sjamie		if (pr->pr_ip4s < ppr->pr_ip4s) {
2537192895Sjamie			/*
2538192895Sjamie			 * There's no room for the parent's list.  Use the
2539192895Sjamie			 * new list buffer, which is assumed to be big enough
2540192895Sjamie			 * (if it was passed).  If there's no buffer, try to
2541192895Sjamie			 * allocate one.
2542192895Sjamie			 */
2543192895Sjamie			used = 1;
2544192895Sjamie			if (newip4 == NULL) {
2545192895Sjamie				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
2546192895Sjamie				    M_PRISON, M_NOWAIT);
2547192895Sjamie				if (newip4 != NULL)
2548192895Sjamie					used = 0;
2549192895Sjamie			}
2550192895Sjamie			if (newip4 != NULL) {
2551192895Sjamie				bcopy(ppr->pr_ip4, newip4,
2552192895Sjamie				    ppr->pr_ip4s * sizeof(*newip4));
2553192895Sjamie				free(pr->pr_ip4, M_PRISON);
2554192895Sjamie				pr->pr_ip4 = newip4;
2555192895Sjamie				pr->pr_ip4s = ppr->pr_ip4s;
2556192895Sjamie			}
2557192895Sjamie			return (used);
2558192895Sjamie		}
2559192895Sjamie		pr->pr_ip4s = ppr->pr_ip4s;
2560192895Sjamie		if (pr->pr_ip4s > 0)
2561192895Sjamie			bcopy(ppr->pr_ip4, pr->pr_ip4,
2562192895Sjamie			    pr->pr_ip4s * sizeof(*newip4));
2563192895Sjamie		else if (pr->pr_ip4 != NULL) {
2564192895Sjamie			free(pr->pr_ip4, M_PRISON);
2565192895Sjamie			pr->pr_ip4 = NULL;
2566192895Sjamie		}
2567195974Sjamie	} else if (pr->pr_ip4s > 0) {
2568192895Sjamie		/* Remove addresses that aren't in the parent. */
2569192895Sjamie		for (ij = 0; ij < ppr->pr_ip4s; ij++)
2570192895Sjamie			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
2571192895Sjamie				break;
2572192895Sjamie		if (ij < ppr->pr_ip4s)
2573192895Sjamie			ii = 1;
2574192895Sjamie		else {
2575192895Sjamie			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
2576192895Sjamie			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
2577192895Sjamie			ii = 0;
2578192895Sjamie		}
2579192895Sjamie		for (ij = 1; ii < pr->pr_ip4s; ) {
2580192895Sjamie			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
2581192895Sjamie				ii++;
2582192895Sjamie				continue;
2583192895Sjamie			}
2584192895Sjamie			switch (ij >= ppr->pr_ip4s ? -1 :
2585192895Sjamie				qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
2586192895Sjamie			case -1:
2587192895Sjamie				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
2588192895Sjamie				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
2589192895Sjamie				break;
2590192895Sjamie			case 0:
2591192895Sjamie				ii++;
2592192895Sjamie				ij++;
2593192895Sjamie				break;
2594192895Sjamie			case 1:
2595192895Sjamie				ij++;
2596192895Sjamie				break;
2597192895Sjamie			}
2598192895Sjamie		}
2599192895Sjamie		if (pr->pr_ip4s == 0) {
2600195870Sjamie			pr->pr_flags |= PR_IP4_DISABLE;
2601192895Sjamie			free(pr->pr_ip4, M_PRISON);
2602192895Sjamie			pr->pr_ip4 = NULL;
2603192895Sjamie		}
2604192895Sjamie	}
2605192895Sjamie	return (0);
2606192895Sjamie}
2607192895Sjamie
2608192895Sjamie/*
2609185435Sbz * Pass back primary IPv4 address of this jail.
2610185435Sbz *
2611192895Sjamie * If not restricted return success but do not alter the address.  Caller has
2612192895Sjamie * to make sure to initialize it correctly (e.g. INADDR_ANY).
2613185435Sbz *
2614188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
2615188144Sjamie * Address returned in NBO.
2616185435Sbz */
261746155Sphkint
2618187684Sbzprison_get_ip4(struct ucred *cred, struct in_addr *ia)
261946155Sphk{
2620191673Sjamie	struct prison *pr;
262146155Sphk
2622185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2623185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2624185435Sbz
2625192895Sjamie	pr = cred->cr_prison;
2626192895Sjamie	if (!(pr->pr_flags & PR_IP4))
262746155Sphk		return (0);
2628191673Sjamie	mtx_lock(&pr->pr_mtx);
2629192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2630192895Sjamie		mtx_unlock(&pr->pr_mtx);
2631192895Sjamie		return (0);
2632192895Sjamie	}
2633191673Sjamie	if (pr->pr_ip4 == NULL) {
2634191673Sjamie		mtx_unlock(&pr->pr_mtx);
2635188144Sjamie		return (EAFNOSUPPORT);
2636191673Sjamie	}
2637185435Sbz
2638191673Sjamie	ia->s_addr = pr->pr_ip4[0].s_addr;
2639191673Sjamie	mtx_unlock(&pr->pr_mtx);
2640185435Sbz	return (0);
2641185435Sbz}
2642185435Sbz
2643185435Sbz/*
2644192895Sjamie * Return true if pr1 and pr2 have the same IPv4 address restrictions.
2645192895Sjamie */
2646192895Sjamieint
2647192895Sjamieprison_equal_ip4(struct prison *pr1, struct prison *pr2)
2648192895Sjamie{
2649192895Sjamie
2650192895Sjamie	if (pr1 == pr2)
2651192895Sjamie		return (1);
2652192895Sjamie
2653192895Sjamie	/*
2654195974Sjamie	 * No need to lock since the PR_IP4_USER flag can't be altered for
2655195974Sjamie	 * existing prisons.
2656192895Sjamie	 */
2657195945Sjamie	while (pr1 != &prison0 &&
2658195945Sjamie#ifdef VIMAGE
2659195945Sjamie	       !(pr1->pr_flags & PR_VNET) &&
2660195945Sjamie#endif
2661195945Sjamie	       !(pr1->pr_flags & PR_IP4_USER))
2662192895Sjamie		pr1 = pr1->pr_parent;
2663195945Sjamie	while (pr2 != &prison0 &&
2664195945Sjamie#ifdef VIMAGE
2665195945Sjamie	       !(pr2->pr_flags & PR_VNET) &&
2666195945Sjamie#endif
2667195945Sjamie	       !(pr2->pr_flags & PR_IP4_USER))
2668192895Sjamie		pr2 = pr2->pr_parent;
2669192895Sjamie	return (pr1 == pr2);
2670192895Sjamie}
2671192895Sjamie
2672192895Sjamie/*
2673185435Sbz * Make sure our (source) address is set to something meaningful to this
2674185435Sbz * jail.
2675185435Sbz *
2676192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
2677192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
2678192895Sjamie * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
2679185435Sbz */
2680185435Sbzint
2681185435Sbzprison_local_ip4(struct ucred *cred, struct in_addr *ia)
2682185435Sbz{
2683191673Sjamie	struct prison *pr;
2684185435Sbz	struct in_addr ia0;
2685191673Sjamie	int error;
2686185435Sbz
2687185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2688185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2689185435Sbz
2690192895Sjamie	pr = cred->cr_prison;
2691192895Sjamie	if (!(pr->pr_flags & PR_IP4))
269246155Sphk		return (0);
2693191673Sjamie	mtx_lock(&pr->pr_mtx);
2694192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2695192895Sjamie		mtx_unlock(&pr->pr_mtx);
2696192895Sjamie		return (0);
2697192895Sjamie	}
2698191673Sjamie	if (pr->pr_ip4 == NULL) {
2699191673Sjamie		mtx_unlock(&pr->pr_mtx);
2700188144Sjamie		return (EAFNOSUPPORT);
2701191673Sjamie	}
2702185435Sbz
2703185435Sbz	ia0.s_addr = ntohl(ia->s_addr);
2704185435Sbz	if (ia0.s_addr == INADDR_LOOPBACK) {
2705191673Sjamie		ia->s_addr = pr->pr_ip4[0].s_addr;
2706191673Sjamie		mtx_unlock(&pr->pr_mtx);
2707185435Sbz		return (0);
270846155Sphk	}
2709185435Sbz
2710188144Sjamie	if (ia0.s_addr == INADDR_ANY) {
2711188144Sjamie		/*
2712188144Sjamie		 * In case there is only 1 IPv4 address, bind directly.
2713188144Sjamie		 */
2714191673Sjamie		if (pr->pr_ip4s == 1)
2715191673Sjamie			ia->s_addr = pr->pr_ip4[0].s_addr;
2716191673Sjamie		mtx_unlock(&pr->pr_mtx);
2717185435Sbz		return (0);
2718185435Sbz	}
2719185435Sbz
2720191673Sjamie	error = _prison_check_ip4(pr, ia);
2721191673Sjamie	mtx_unlock(&pr->pr_mtx);
2722191673Sjamie	return (error);
2723185435Sbz}
2724185435Sbz
2725185435Sbz/*
2726185435Sbz * Rewrite destination address in case we will connect to loopback address.
2727185435Sbz *
2728188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
2729188144Sjamie * Address passed in in NBO and returned in NBO.
2730185435Sbz */
2731185435Sbzint
2732185435Sbzprison_remote_ip4(struct ucred *cred, struct in_addr *ia)
2733185435Sbz{
2734191673Sjamie	struct prison *pr;
2735185435Sbz
2736185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2737185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2738185435Sbz
2739192895Sjamie	pr = cred->cr_prison;
2740192895Sjamie	if (!(pr->pr_flags & PR_IP4))
2741185435Sbz		return (0);
2742191673Sjamie	mtx_lock(&pr->pr_mtx);
2743192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2744192895Sjamie		mtx_unlock(&pr->pr_mtx);
2745192895Sjamie		return (0);
2746192895Sjamie	}
2747191673Sjamie	if (pr->pr_ip4 == NULL) {
2748191673Sjamie		mtx_unlock(&pr->pr_mtx);
2749188144Sjamie		return (EAFNOSUPPORT);
2750191673Sjamie	}
2751188144Sjamie
2752185435Sbz	if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
2753191673Sjamie		ia->s_addr = pr->pr_ip4[0].s_addr;
2754191673Sjamie		mtx_unlock(&pr->pr_mtx);
2755185435Sbz		return (0);
2756185435Sbz	}
2757185435Sbz
2758185435Sbz	/*
2759185435Sbz	 * Return success because nothing had to be changed.
2760185435Sbz	 */
2761191673Sjamie	mtx_unlock(&pr->pr_mtx);
2762185435Sbz	return (0);
2763185435Sbz}
2764185435Sbz
2765185435Sbz/*
2766188144Sjamie * Check if given address belongs to the jail referenced by cred/prison.
2767185435Sbz *
2768192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
2769192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
2770192895Sjamie * doesn't allow IPv4.  Address passed in in NBO.
2771185435Sbz */
2772185435Sbzstatic int
2773185435Sbz_prison_check_ip4(struct prison *pr, struct in_addr *ia)
2774185435Sbz{
2775185435Sbz	int i, a, z, d;
2776185435Sbz
2777185435Sbz	/*
2778185435Sbz	 * Check the primary IP.
2779185435Sbz	 */
2780185435Sbz	if (pr->pr_ip4[0].s_addr == ia->s_addr)
2781188144Sjamie		return (0);
2782185435Sbz
2783185435Sbz	/*
2784185435Sbz	 * All the other IPs are sorted so we can do a binary search.
2785185435Sbz	 */
2786185435Sbz	a = 0;
2787185435Sbz	z = pr->pr_ip4s - 2;
2788185435Sbz	while (a <= z) {
2789185435Sbz		i = (a + z) / 2;
2790185435Sbz		d = qcmp_v4(&pr->pr_ip4[i+1], ia);
2791185435Sbz		if (d > 0)
2792185435Sbz			z = i - 1;
2793185435Sbz		else if (d < 0)
2794185435Sbz			a = i + 1;
279581114Srwatson		else
2796188144Sjamie			return (0);
2797185435Sbz	}
2798188144Sjamie
2799188144Sjamie	return (EADDRNOTAVAIL);
2800185435Sbz}
2801185435Sbz
2802185435Sbzint
2803185435Sbzprison_check_ip4(struct ucred *cred, struct in_addr *ia)
2804185435Sbz{
2805191673Sjamie	struct prison *pr;
2806191673Sjamie	int error;
2807185435Sbz
2808185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2809185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2810185435Sbz
2811192895Sjamie	pr = cred->cr_prison;
2812192895Sjamie	if (!(pr->pr_flags & PR_IP4))
2813188144Sjamie		return (0);
2814191673Sjamie	mtx_lock(&pr->pr_mtx);
2815192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2816192895Sjamie		mtx_unlock(&pr->pr_mtx);
2817192895Sjamie		return (0);
2818192895Sjamie	}
2819191673Sjamie	if (pr->pr_ip4 == NULL) {
2820191673Sjamie		mtx_unlock(&pr->pr_mtx);
2821188144Sjamie		return (EAFNOSUPPORT);
2822191673Sjamie	}
2823185435Sbz
2824191673Sjamie	error = _prison_check_ip4(pr, ia);
2825191673Sjamie	mtx_unlock(&pr->pr_mtx);
2826191673Sjamie	return (error);
2827185435Sbz}
2828185435Sbz#endif
2829185435Sbz
2830185435Sbz#ifdef INET6
2831192895Sjamiestatic int
2832192895Sjamieprison_restrict_ip6(struct prison *pr, struct in6_addr *newip6)
2833192895Sjamie{
2834192895Sjamie	int ii, ij, used;
2835192895Sjamie	struct prison *ppr;
2836192895Sjamie
2837192895Sjamie	ppr = pr->pr_parent;
2838192895Sjamie	if (!(pr->pr_flags & PR_IP6_USER)) {
2839192895Sjamie		/* This has no user settings, so just copy the parent's list. */
2840192895Sjamie		if (pr->pr_ip6s < ppr->pr_ip6s) {
2841192895Sjamie			/*
2842192895Sjamie			 * There's no room for the parent's list.  Use the
2843192895Sjamie			 * new list buffer, which is assumed to be big enough
2844192895Sjamie			 * (if it was passed).  If there's no buffer, try to
2845192895Sjamie			 * allocate one.
2846192895Sjamie			 */
2847192895Sjamie			used = 1;
2848192895Sjamie			if (newip6 == NULL) {
2849192895Sjamie				newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6),
2850192895Sjamie				    M_PRISON, M_NOWAIT);
2851192895Sjamie				if (newip6 != NULL)
2852192895Sjamie					used = 0;
2853192895Sjamie			}
2854192895Sjamie			if (newip6 != NULL) {
2855192895Sjamie				bcopy(ppr->pr_ip6, newip6,
2856192895Sjamie				    ppr->pr_ip6s * sizeof(*newip6));
2857192895Sjamie				free(pr->pr_ip6, M_PRISON);
2858192895Sjamie				pr->pr_ip6 = newip6;
2859192895Sjamie				pr->pr_ip6s = ppr->pr_ip6s;
2860192895Sjamie			}
2861192895Sjamie			return (used);
2862192895Sjamie		}
2863192895Sjamie		pr->pr_ip6s = ppr->pr_ip6s;
2864192895Sjamie		if (pr->pr_ip6s > 0)
2865192895Sjamie			bcopy(ppr->pr_ip6, pr->pr_ip6,
2866192895Sjamie			    pr->pr_ip6s * sizeof(*newip6));
2867192895Sjamie		else if (pr->pr_ip6 != NULL) {
2868192895Sjamie			free(pr->pr_ip6, M_PRISON);
2869192895Sjamie			pr->pr_ip6 = NULL;
2870192895Sjamie		}
2871195974Sjamie	} else if (pr->pr_ip6s > 0) {
2872192895Sjamie		/* Remove addresses that aren't in the parent. */
2873192895Sjamie		for (ij = 0; ij < ppr->pr_ip6s; ij++)
2874192895Sjamie			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0],
2875192895Sjamie			    &ppr->pr_ip6[ij]))
2876192895Sjamie				break;
2877192895Sjamie		if (ij < ppr->pr_ip6s)
2878192895Sjamie			ii = 1;
2879192895Sjamie		else {
2880192895Sjamie			bcopy(pr->pr_ip6 + 1, pr->pr_ip6,
2881192895Sjamie			    --pr->pr_ip6s * sizeof(*pr->pr_ip6));
2882192895Sjamie			ii = 0;
2883192895Sjamie		}
2884192895Sjamie		for (ij = 1; ii < pr->pr_ip6s; ) {
2885192895Sjamie			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii],
2886192895Sjamie			    &ppr->pr_ip6[0])) {
2887192895Sjamie				ii++;
2888192895Sjamie				continue;
2889192895Sjamie			}
2890192895Sjamie			switch (ij >= ppr->pr_ip4s ? -1 :
2891192895Sjamie				qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) {
2892192895Sjamie			case -1:
2893192895Sjamie				bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii,
2894192895Sjamie				    (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6));
2895192895Sjamie				break;
2896192895Sjamie			case 0:
2897192895Sjamie				ii++;
2898192895Sjamie				ij++;
2899192895Sjamie				break;
2900192895Sjamie			case 1:
2901192895Sjamie				ij++;
2902192895Sjamie				break;
2903192895Sjamie			}
2904192895Sjamie		}
2905192895Sjamie		if (pr->pr_ip6s == 0) {
2906195870Sjamie			pr->pr_flags |= PR_IP6_DISABLE;
2907192895Sjamie			free(pr->pr_ip6, M_PRISON);
2908192895Sjamie			pr->pr_ip6 = NULL;
2909192895Sjamie		}
2910192895Sjamie	}
2911192895Sjamie	return 0;
2912192895Sjamie}
2913192895Sjamie
2914185435Sbz/*
2915185435Sbz * Pass back primary IPv6 address for this jail.
2916185435Sbz *
2917192895Sjamie * If not restricted return success but do not alter the address.  Caller has
2918192895Sjamie * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
2919185435Sbz *
2920188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
2921185435Sbz */
2922185435Sbzint
2923187684Sbzprison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
2924185435Sbz{
2925191673Sjamie	struct prison *pr;
2926185435Sbz
2927185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2928185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
2929185435Sbz
2930192895Sjamie	pr = cred->cr_prison;
2931192895Sjamie	if (!(pr->pr_flags & PR_IP6))
293281114Srwatson		return (0);
2933191673Sjamie	mtx_lock(&pr->pr_mtx);
2934192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
2935192895Sjamie		mtx_unlock(&pr->pr_mtx);
2936192895Sjamie		return (0);
2937192895Sjamie	}
2938191673Sjamie	if (pr->pr_ip6 == NULL) {
2939191673Sjamie		mtx_unlock(&pr->pr_mtx);
2940188144Sjamie		return (EAFNOSUPPORT);
2941191673Sjamie	}
2942188144Sjamie
2943191673Sjamie	bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
2944191673Sjamie	mtx_unlock(&pr->pr_mtx);
2945185435Sbz	return (0);
2946185435Sbz}
2947185435Sbz
2948185435Sbz/*
2949192895Sjamie * Return true if pr1 and pr2 have the same IPv6 address restrictions.
2950192895Sjamie */
2951192895Sjamieint
2952192895Sjamieprison_equal_ip6(struct prison *pr1, struct prison *pr2)
2953192895Sjamie{
2954192895Sjamie
2955192895Sjamie	if (pr1 == pr2)
2956192895Sjamie		return (1);
2957192895Sjamie
2958195945Sjamie	while (pr1 != &prison0 &&
2959195945Sjamie#ifdef VIMAGE
2960195945Sjamie	       !(pr1->pr_flags & PR_VNET) &&
2961195945Sjamie#endif
2962195945Sjamie	       !(pr1->pr_flags & PR_IP6_USER))
2963192895Sjamie		pr1 = pr1->pr_parent;
2964195945Sjamie	while (pr2 != &prison0 &&
2965195945Sjamie#ifdef VIMAGE
2966195945Sjamie	       !(pr2->pr_flags & PR_VNET) &&
2967195945Sjamie#endif
2968195945Sjamie	       !(pr2->pr_flags & PR_IP6_USER))
2969192895Sjamie		pr2 = pr2->pr_parent;
2970192895Sjamie	return (pr1 == pr2);
2971192895Sjamie}
2972192895Sjamie
2973192895Sjamie/*
2974185435Sbz * Make sure our (source) address is set to something meaningful to this jail.
2975185435Sbz *
2976185435Sbz * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
2977185435Sbz * when needed while binding.
2978185435Sbz *
2979192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
2980192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
2981192895Sjamie * doesn't allow IPv6.
2982185435Sbz */
2983185435Sbzint
2984185435Sbzprison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
2985185435Sbz{
2986191673Sjamie	struct prison *pr;
2987191673Sjamie	int error;
2988185435Sbz
2989185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2990185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
2991185435Sbz
2992192895Sjamie	pr = cred->cr_prison;
2993192895Sjamie	if (!(pr->pr_flags & PR_IP6))
2994185435Sbz		return (0);
2995191673Sjamie	mtx_lock(&pr->pr_mtx);
2996192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
2997192895Sjamie		mtx_unlock(&pr->pr_mtx);
2998192895Sjamie		return (0);
2999192895Sjamie	}
3000191673Sjamie	if (pr->pr_ip6 == NULL) {
3001191673Sjamie		mtx_unlock(&pr->pr_mtx);
3002188144Sjamie		return (EAFNOSUPPORT);
3003191673Sjamie	}
3004188144Sjamie
3005185435Sbz	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
3006191673Sjamie		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3007191673Sjamie		mtx_unlock(&pr->pr_mtx);
3008185435Sbz		return (0);
300981114Srwatson	}
3010185435Sbz
3011188144Sjamie	if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
3012188144Sjamie		/*
3013188144Sjamie		 * In case there is only 1 IPv6 address, and v6only is true,
3014188144Sjamie		 * then bind directly.
3015188144Sjamie		 */
3016191673Sjamie		if (v6only != 0 && pr->pr_ip6s == 1)
3017191673Sjamie			bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3018191673Sjamie		mtx_unlock(&pr->pr_mtx);
3019185435Sbz		return (0);
3020185435Sbz	}
3021188144Sjamie
3022191673Sjamie	error = _prison_check_ip6(pr, ia6);
3023191673Sjamie	mtx_unlock(&pr->pr_mtx);
3024191673Sjamie	return (error);
3025185435Sbz}
3026185435Sbz
3027185435Sbz/*
3028185435Sbz * Rewrite destination address in case we will connect to loopback address.
3029185435Sbz *
3030188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
3031185435Sbz */
3032185435Sbzint
3033185435Sbzprison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
3034185435Sbz{
3035191673Sjamie	struct prison *pr;
3036185435Sbz
3037185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3038185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3039185435Sbz
3040192895Sjamie	pr = cred->cr_prison;
3041192895Sjamie	if (!(pr->pr_flags & PR_IP6))
3042185435Sbz		return (0);
3043191673Sjamie	mtx_lock(&pr->pr_mtx);
3044192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3045192895Sjamie		mtx_unlock(&pr->pr_mtx);
3046192895Sjamie		return (0);
3047192895Sjamie	}
3048191673Sjamie	if (pr->pr_ip6 == NULL) {
3049191673Sjamie		mtx_unlock(&pr->pr_mtx);
3050188144Sjamie		return (EAFNOSUPPORT);
3051191673Sjamie	}
3052188144Sjamie
3053185435Sbz	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
3054191673Sjamie		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3055191673Sjamie		mtx_unlock(&pr->pr_mtx);
3056185435Sbz		return (0);
3057185435Sbz	}
3058185435Sbz
3059185435Sbz	/*
3060185435Sbz	 * Return success because nothing had to be changed.
3061185435Sbz	 */
3062191673Sjamie	mtx_unlock(&pr->pr_mtx);
306346155Sphk	return (0);
306446155Sphk}
306546155Sphk
3066185435Sbz/*
3067188144Sjamie * Check if given address belongs to the jail referenced by cred/prison.
3068185435Sbz *
3069192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
3070192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
3071192895Sjamie * doesn't allow IPv6.
3072185435Sbz */
3073185435Sbzstatic int
3074185435Sbz_prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
307546155Sphk{
3076185435Sbz	int i, a, z, d;
307746155Sphk
3078185435Sbz	/*
3079185435Sbz	 * Check the primary IP.
3080185435Sbz	 */
3081185435Sbz	if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
3082188144Sjamie		return (0);
3083185435Sbz
3084185435Sbz	/*
3085185435Sbz	 * All the other IPs are sorted so we can do a binary search.
3086185435Sbz	 */
3087185435Sbz	a = 0;
3088185435Sbz	z = pr->pr_ip6s - 2;
3089185435Sbz	while (a <= z) {
3090185435Sbz		i = (a + z) / 2;
3091185435Sbz		d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
3092185435Sbz		if (d > 0)
3093185435Sbz			z = i - 1;
3094185435Sbz		else if (d < 0)
3095185435Sbz			a = i + 1;
309646155Sphk		else
3097188144Sjamie			return (0);
309846155Sphk	}
3099188144Sjamie
3100188144Sjamie	return (EADDRNOTAVAIL);
310146155Sphk}
310246155Sphk
310346155Sphkint
3104185435Sbzprison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
3105185435Sbz{
3106191673Sjamie	struct prison *pr;
3107191673Sjamie	int error;
3108185435Sbz
3109185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3110185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3111185435Sbz
3112192895Sjamie	pr = cred->cr_prison;
3113192895Sjamie	if (!(pr->pr_flags & PR_IP6))
3114188144Sjamie		return (0);
3115191673Sjamie	mtx_lock(&pr->pr_mtx);
3116192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3117192895Sjamie		mtx_unlock(&pr->pr_mtx);
3118192895Sjamie		return (0);
3119192895Sjamie	}
3120191673Sjamie	if (pr->pr_ip6 == NULL) {
3121191673Sjamie		mtx_unlock(&pr->pr_mtx);
3122188144Sjamie		return (EAFNOSUPPORT);
3123191673Sjamie	}
3124185435Sbz
3125191673Sjamie	error = _prison_check_ip6(pr, ia6);
3126191673Sjamie	mtx_unlock(&pr->pr_mtx);
3127191673Sjamie	return (error);
3128185435Sbz}
3129185435Sbz#endif
3130185435Sbz
3131185435Sbz/*
3132188146Sjamie * Check if a jail supports the given address family.
3133188146Sjamie *
3134188146Sjamie * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
3135188146Sjamie * if not.
3136188146Sjamie */
3137188146Sjamieint
3138188146Sjamieprison_check_af(struct ucred *cred, int af)
3139188146Sjamie{
3140192895Sjamie	struct prison *pr;
3141188146Sjamie	int error;
3142188146Sjamie
3143188146Sjamie	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3144188146Sjamie
3145192895Sjamie	pr = cred->cr_prison;
3146194923Sjamie#ifdef VIMAGE
3147194915Sjamie	/* Prisons with their own network stack are not limited. */
3148194915Sjamie	if (pr->pr_flags & PR_VNET)
3149194915Sjamie		return (0);
3150194923Sjamie#endif
3151194915Sjamie
3152188146Sjamie	error = 0;
3153188146Sjamie	switch (af)
3154188146Sjamie	{
3155188146Sjamie#ifdef INET
3156188146Sjamie	case AF_INET:
3157192895Sjamie		if (pr->pr_flags & PR_IP4)
3158192895Sjamie		{
3159192895Sjamie			mtx_lock(&pr->pr_mtx);
3160192895Sjamie			if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
3161192895Sjamie				error = EAFNOSUPPORT;
3162192895Sjamie			mtx_unlock(&pr->pr_mtx);
3163192895Sjamie		}
3164188146Sjamie		break;
3165188146Sjamie#endif
3166188146Sjamie#ifdef INET6
3167188146Sjamie	case AF_INET6:
3168192895Sjamie		if (pr->pr_flags & PR_IP6)
3169192895Sjamie		{
3170192895Sjamie			mtx_lock(&pr->pr_mtx);
3171192895Sjamie			if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
3172192895Sjamie				error = EAFNOSUPPORT;
3173192895Sjamie			mtx_unlock(&pr->pr_mtx);
3174192895Sjamie		}
3175188146Sjamie		break;
3176188146Sjamie#endif
3177188146Sjamie	case AF_LOCAL:
3178188146Sjamie	case AF_ROUTE:
3179188146Sjamie		break;
3180188146Sjamie	default:
3181192895Sjamie		if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
3182188146Sjamie			error = EAFNOSUPPORT;
3183188146Sjamie	}
3184188146Sjamie	return (error);
3185188146Sjamie}
3186188146Sjamie
3187188146Sjamie/*
3188185435Sbz * Check if given address belongs to the jail referenced by cred (wrapper to
3189185435Sbz * prison_check_ip[46]).
3190185435Sbz *
3191192895Sjamie * Returns 0 if jail doesn't restrict the address family or if address belongs
3192192895Sjamie * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
3193192895Sjamie * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
3194185435Sbz */
3195185435Sbzint
319672786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa)
319746155Sphk{
3198185435Sbz#ifdef INET
3199114168Smike	struct sockaddr_in *sai;
3200185435Sbz#endif
3201185435Sbz#ifdef INET6
3202185435Sbz	struct sockaddr_in6 *sai6;
3203185435Sbz#endif
3204188144Sjamie	int error;
320546155Sphk
3206185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3207185435Sbz	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
3208185435Sbz
3209188144Sjamie	error = 0;
3210188144Sjamie	switch (sa->sa_family)
3211185435Sbz	{
3212185435Sbz#ifdef INET
3213185435Sbz	case AF_INET:
3214185435Sbz		sai = (struct sockaddr_in *)sa;
3215188144Sjamie		error = prison_check_ip4(cred, &sai->sin_addr);
3216185435Sbz		break;
3217185435Sbz#endif
3218185435Sbz#ifdef INET6
3219185435Sbz	case AF_INET6:
3220185435Sbz		sai6 = (struct sockaddr_in6 *)sa;
3221188144Sjamie		error = prison_check_ip6(cred, &sai6->sin6_addr);
3222185435Sbz		break;
3223185435Sbz#endif
3224185435Sbz	default:
3225192895Sjamie		if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
3226188144Sjamie			error = EAFNOSUPPORT;
3227185435Sbz	}
3228188144Sjamie	return (error);
322946155Sphk}
323072786Srwatson
323172786Srwatson/*
323272786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
323372786Srwatson */
323472786Srwatsonint
3235114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2)
323672786Srwatson{
323772786Srwatson
3238192895Sjamie	return ((cred1->cr_prison == cred2->cr_prison ||
3239192895Sjamie	    prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
3240192895Sjamie}
324172786Srwatson
3242192895Sjamie/*
3243192895Sjamie * Return 1 if p2 is a child of p1, otherwise 0.
3244192895Sjamie */
3245192895Sjamieint
3246192895Sjamieprison_ischild(struct prison *pr1, struct prison *pr2)
3247192895Sjamie{
3248192895Sjamie
3249192895Sjamie	for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
3250192895Sjamie		if (pr1 == pr2)
3251192895Sjamie			return (1);
325272786Srwatson	return (0);
325372786Srwatson}
325472786Srwatson
325572786Srwatson/*
325672786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0.
325772786Srwatson */
325872786Srwatsonint
3259114168Smikejailed(struct ucred *cred)
326072786Srwatson{
326172786Srwatson
3262192895Sjamie	return (cred->cr_prison != &prison0);
326372786Srwatson}
326491384Srobert
326591384Srobert/*
3266194090Sjamie * Return the correct hostname (domainname, et al) for the passed credential.
326791384Srobert */
326891391Srobertvoid
3269114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size)
327091384Srobert{
3271193066Sjamie	struct prison *pr;
327291384Srobert
3273194090Sjamie	/*
3274194090Sjamie	 * A NULL credential can be used to shortcut to the physical
3275194090Sjamie	 * system's hostname.
3276194090Sjamie	 */
3277193066Sjamie	pr = (cred != NULL) ? cred->cr_prison : &prison0;
3278193066Sjamie	mtx_lock(&pr->pr_mtx);
3279194118Sjamie	strlcpy(buf, pr->pr_hostname, size);
3280193066Sjamie	mtx_unlock(&pr->pr_mtx);
328191384Srobert}
3282113275Smike
3283194090Sjamievoid
3284194090Sjamiegetcreddomainname(struct ucred *cred, char *buf, size_t size)
3285194090Sjamie{
3286194090Sjamie
3287194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3288194118Sjamie	strlcpy(buf, cred->cr_prison->pr_domainname, size);
3289194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3290194090Sjamie}
3291194090Sjamie
3292194090Sjamievoid
3293194090Sjamiegetcredhostuuid(struct ucred *cred, char *buf, size_t size)
3294194090Sjamie{
3295194090Sjamie
3296194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3297194118Sjamie	strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
3298194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3299194090Sjamie}
3300194090Sjamie
3301194090Sjamievoid
3302194090Sjamiegetcredhostid(struct ucred *cred, unsigned long *hostid)
3303194090Sjamie{
3304194090Sjamie
3305194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3306194090Sjamie	*hostid = cred->cr_prison->pr_hostid;
3307194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3308194090Sjamie}
3309194090Sjamie
3310125804Srwatson/*
3311147185Spjd * Determine whether the subject represented by cred can "see"
3312147185Spjd * status of a mount point.
3313147185Spjd * Returns: 0 for permitted, ENOENT otherwise.
3314147185Spjd * XXX: This function should be called cr_canseemount() and should be
3315147185Spjd *      placed in kern_prot.c.
3316125804Srwatson */
3317125804Srwatsonint
3318147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp)
3319125804Srwatson{
3320147185Spjd	struct prison *pr;
3321147185Spjd	struct statfs *sp;
3322147185Spjd	size_t len;
3323125804Srwatson
3324192895Sjamie	pr = cred->cr_prison;
3325192895Sjamie	if (pr->pr_enforce_statfs == 0)
3326147185Spjd		return (0);
3327147185Spjd	if (pr->pr_root->v_mount == mp)
3328147185Spjd		return (0);
3329192895Sjamie	if (pr->pr_enforce_statfs == 2)
3330147185Spjd		return (ENOENT);
3331147185Spjd	/*
3332147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
3333147185Spjd	 * all mount-points from inside a jail.
3334147185Spjd	 * This is ugly check, but this is the only situation when jail's
3335147185Spjd	 * directory ends with '/'.
3336147185Spjd	 */
3337147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
3338147185Spjd		return (0);
3339147185Spjd	len = strlen(pr->pr_path);
3340147185Spjd	sp = &mp->mnt_stat;
3341147185Spjd	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
3342147185Spjd		return (ENOENT);
3343147185Spjd	/*
3344147185Spjd	 * Be sure that we don't have situation where jail's root directory
3345147185Spjd	 * is "/some/path" and mount point is "/some/pathpath".
3346147185Spjd	 */
3347147185Spjd	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
3348147185Spjd		return (ENOENT);
3349147185Spjd	return (0);
3350147185Spjd}
3351147185Spjd
3352147185Spjdvoid
3353147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
3354147185Spjd{
3355147185Spjd	char jpath[MAXPATHLEN];
3356147185Spjd	struct prison *pr;
3357147185Spjd	size_t len;
3358147185Spjd
3359192895Sjamie	pr = cred->cr_prison;
3360192895Sjamie	if (pr->pr_enforce_statfs == 0)
3361147185Spjd		return;
3362147185Spjd	if (prison_canseemount(cred, mp) != 0) {
3363147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3364147185Spjd		strlcpy(sp->f_mntonname, "[restricted]",
3365147185Spjd		    sizeof(sp->f_mntonname));
3366147185Spjd		return;
3367125804Srwatson	}
3368147185Spjd	if (pr->pr_root->v_mount == mp) {
3369147185Spjd		/*
3370147185Spjd		 * Clear current buffer data, so we are sure nothing from
3371147185Spjd		 * the valid path left there.
3372147185Spjd		 */
3373147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3374147185Spjd		*sp->f_mntonname = '/';
3375147185Spjd		return;
3376147185Spjd	}
3377147185Spjd	/*
3378147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
3379147185Spjd	 * all mount-points from inside a jail.
3380147185Spjd	 */
3381147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
3382147185Spjd		return;
3383147185Spjd	len = strlen(pr->pr_path);
3384147185Spjd	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
3385147185Spjd	/*
3386147185Spjd	 * Clear current buffer data, so we are sure nothing from
3387147185Spjd	 * the valid path left there.
3388147185Spjd	 */
3389147185Spjd	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3390147185Spjd	if (*jpath == '\0') {
3391147185Spjd		/* Should never happen. */
3392147185Spjd		*sp->f_mntonname = '/';
3393147185Spjd	} else {
3394147185Spjd		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
3395147185Spjd	}
3396125804Srwatson}
3397125804Srwatson
3398164032Srwatson/*
3399164032Srwatson * Check with permission for a specific privilege is granted within jail.  We
3400164032Srwatson * have a specific list of accepted privileges; the rest are denied.
3401164032Srwatson */
3402164032Srwatsonint
3403164032Srwatsonprison_priv_check(struct ucred *cred, int priv)
3404164032Srwatson{
3405164032Srwatson
3406164032Srwatson	if (!jailed(cred))
3407164032Srwatson		return (0);
3408164032Srwatson
3409194915Sjamie#ifdef VIMAGE
3410194915Sjamie	/*
3411194915Sjamie	 * Privileges specific to prisons with a virtual network stack.
3412194915Sjamie	 * There might be a duplicate entry here in case the privilege
3413194915Sjamie	 * is only granted conditionally in the legacy jail case.
3414194915Sjamie	 */
3415164032Srwatson	switch (priv) {
3416194915Sjamie#ifdef notyet
3417194915Sjamie		/*
3418194915Sjamie		 * NFS-specific privileges.
3419194915Sjamie		 */
3420194915Sjamie	case PRIV_NFS_DAEMON:
3421194915Sjamie	case PRIV_NFS_LOCKD:
3422194915Sjamie#endif
3423194915Sjamie		/*
3424194915Sjamie		 * Network stack privileges.
3425194915Sjamie		 */
3426194915Sjamie	case PRIV_NET_BRIDGE:
3427194915Sjamie	case PRIV_NET_GRE:
3428194915Sjamie	case PRIV_NET_BPF:
3429194915Sjamie	case PRIV_NET_RAW:		/* Dup, cond. in legacy jail case. */
3430194915Sjamie	case PRIV_NET_ROUTE:
3431194915Sjamie	case PRIV_NET_TAP:
3432194915Sjamie	case PRIV_NET_SETIFMTU:
3433194915Sjamie	case PRIV_NET_SETIFFLAGS:
3434194915Sjamie	case PRIV_NET_SETIFCAP:
3435194915Sjamie	case PRIV_NET_SETIFNAME	:
3436194915Sjamie	case PRIV_NET_SETIFMETRIC:
3437194915Sjamie	case PRIV_NET_SETIFPHYS:
3438194915Sjamie	case PRIV_NET_SETIFMAC:
3439194915Sjamie	case PRIV_NET_ADDMULTI:
3440194915Sjamie	case PRIV_NET_DELMULTI:
3441194915Sjamie	case PRIV_NET_HWIOCTL:
3442194915Sjamie	case PRIV_NET_SETLLADDR:
3443194915Sjamie	case PRIV_NET_ADDIFGROUP:
3444194915Sjamie	case PRIV_NET_DELIFGROUP:
3445194915Sjamie	case PRIV_NET_IFCREATE:
3446194915Sjamie	case PRIV_NET_IFDESTROY:
3447194915Sjamie	case PRIV_NET_ADDIFADDR:
3448194915Sjamie	case PRIV_NET_DELIFADDR:
3449194915Sjamie	case PRIV_NET_LAGG:
3450194915Sjamie	case PRIV_NET_GIF:
3451194915Sjamie	case PRIV_NET_SETIFVNET:
3452164032Srwatson
3453164032Srwatson		/*
3454194915Sjamie		 * 802.11-related privileges.
3455194915Sjamie		 */
3456194915Sjamie	case PRIV_NET80211_GETKEY:
3457194915Sjamie#ifdef notyet
3458194915Sjamie	case PRIV_NET80211_MANAGE:		/* XXX-BZ discuss with sam@ */
3459194915Sjamie#endif
3460194915Sjamie
3461194915Sjamie#ifdef notyet
3462194915Sjamie		/*
3463194915Sjamie		 * AppleTalk privileges.
3464194915Sjamie		 */
3465194915Sjamie	case PRIV_NETATALK_RESERVEDPORT:
3466194915Sjamie
3467194915Sjamie		/*
3468194915Sjamie		 * ATM privileges.
3469194915Sjamie		 */
3470194915Sjamie	case PRIV_NETATM_CFG:
3471194915Sjamie	case PRIV_NETATM_ADD:
3472194915Sjamie	case PRIV_NETATM_DEL:
3473194915Sjamie	case PRIV_NETATM_SET:
3474194915Sjamie
3475194915Sjamie		/*
3476194915Sjamie		 * Bluetooth privileges.
3477194915Sjamie		 */
3478194915Sjamie	case PRIV_NETBLUETOOTH_RAW:
3479194915Sjamie#endif
3480194915Sjamie
3481194915Sjamie		/*
3482194915Sjamie		 * Netgraph and netgraph module privileges.
3483194915Sjamie		 */
3484194915Sjamie	case PRIV_NETGRAPH_CONTROL:
3485194915Sjamie#ifdef notyet
3486194915Sjamie	case PRIV_NETGRAPH_TTY:
3487194915Sjamie#endif
3488194915Sjamie
3489194915Sjamie		/*
3490194915Sjamie		 * IPv4 and IPv6 privileges.
3491194915Sjamie		 */
3492194915Sjamie	case PRIV_NETINET_IPFW:
3493194915Sjamie	case PRIV_NETINET_DIVERT:
3494194915Sjamie	case PRIV_NETINET_PF:
3495194915Sjamie	case PRIV_NETINET_DUMMYNET:
3496194915Sjamie	case PRIV_NETINET_CARP:
3497194915Sjamie	case PRIV_NETINET_MROUTE:
3498194915Sjamie	case PRIV_NETINET_RAW:
3499194915Sjamie	case PRIV_NETINET_ADDRCTRL6:
3500194915Sjamie	case PRIV_NETINET_ND6:
3501194915Sjamie	case PRIV_NETINET_SCOPE6:
3502194915Sjamie	case PRIV_NETINET_ALIFETIME6:
3503194915Sjamie	case PRIV_NETINET_IPSEC:
3504194915Sjamie	case PRIV_NETINET_BINDANY:
3505194915Sjamie
3506194915Sjamie#ifdef notyet
3507194915Sjamie		/*
3508194915Sjamie		 * IPX/SPX privileges.
3509194915Sjamie		 */
3510194915Sjamie	case PRIV_NETIPX_RESERVEDPORT:
3511194915Sjamie	case PRIV_NETIPX_RAW:
3512194915Sjamie
3513194915Sjamie		/*
3514194915Sjamie		 * NCP privileges.
3515194915Sjamie		 */
3516194915Sjamie	case PRIV_NETNCP:
3517194915Sjamie
3518194915Sjamie		/*
3519194915Sjamie		 * SMB privileges.
3520194915Sjamie		 */
3521194915Sjamie	case PRIV_NETSMB:
3522194915Sjamie#endif
3523194915Sjamie
3524194915Sjamie	/*
3525194915Sjamie	 * No default: or deny here.
3526194915Sjamie	 * In case of no permit fall through to next switch().
3527194915Sjamie	 */
3528194915Sjamie		if (cred->cr_prison->pr_flags & PR_VNET)
3529194915Sjamie			return (0);
3530194915Sjamie	}
3531194915Sjamie#endif /* VIMAGE */
3532194915Sjamie
3533194915Sjamie	switch (priv) {
3534194915Sjamie
3535194915Sjamie		/*
3536164032Srwatson		 * Allow ktrace privileges for root in jail.
3537164032Srwatson		 */
3538164032Srwatson	case PRIV_KTRACE:
3539164032Srwatson
3540166827Srwatson#if 0
3541164032Srwatson		/*
3542164032Srwatson		 * Allow jailed processes to configure audit identity and
3543164032Srwatson		 * submit audit records (login, etc).  In the future we may
3544164032Srwatson		 * want to further refine the relationship between audit and
3545164032Srwatson		 * jail.
3546164032Srwatson		 */
3547164032Srwatson	case PRIV_AUDIT_GETAUDIT:
3548164032Srwatson	case PRIV_AUDIT_SETAUDIT:
3549164032Srwatson	case PRIV_AUDIT_SUBMIT:
3550166827Srwatson#endif
3551164032Srwatson
3552164032Srwatson		/*
3553164032Srwatson		 * Allow jailed processes to manipulate process UNIX
3554164032Srwatson		 * credentials in any way they see fit.
3555164032Srwatson		 */
3556164032Srwatson	case PRIV_CRED_SETUID:
3557164032Srwatson	case PRIV_CRED_SETEUID:
3558164032Srwatson	case PRIV_CRED_SETGID:
3559164032Srwatson	case PRIV_CRED_SETEGID:
3560164032Srwatson	case PRIV_CRED_SETGROUPS:
3561164032Srwatson	case PRIV_CRED_SETREUID:
3562164032Srwatson	case PRIV_CRED_SETREGID:
3563164032Srwatson	case PRIV_CRED_SETRESUID:
3564164032Srwatson	case PRIV_CRED_SETRESGID:
3565164032Srwatson
3566164032Srwatson		/*
3567164032Srwatson		 * Jail implements visibility constraints already, so allow
3568164032Srwatson		 * jailed root to override uid/gid-based constraints.
3569164032Srwatson		 */
3570164032Srwatson	case PRIV_SEEOTHERGIDS:
3571164032Srwatson	case PRIV_SEEOTHERUIDS:
3572164032Srwatson
3573164032Srwatson		/*
3574164032Srwatson		 * Jail implements inter-process debugging limits already, so
3575164032Srwatson		 * allow jailed root various debugging privileges.
3576164032Srwatson		 */
3577164032Srwatson	case PRIV_DEBUG_DIFFCRED:
3578164032Srwatson	case PRIV_DEBUG_SUGID:
3579164032Srwatson	case PRIV_DEBUG_UNPRIV:
3580164032Srwatson
3581164032Srwatson		/*
3582164032Srwatson		 * Allow jail to set various resource limits and login
3583164032Srwatson		 * properties, and for now, exceed process resource limits.
3584164032Srwatson		 */
3585164032Srwatson	case PRIV_PROC_LIMIT:
3586164032Srwatson	case PRIV_PROC_SETLOGIN:
3587164032Srwatson	case PRIV_PROC_SETRLIMIT:
3588164032Srwatson
3589164032Srwatson		/*
3590164032Srwatson		 * System V and POSIX IPC privileges are granted in jail.
3591164032Srwatson		 */
3592164032Srwatson	case PRIV_IPC_READ:
3593164032Srwatson	case PRIV_IPC_WRITE:
3594164032Srwatson	case PRIV_IPC_ADMIN:
3595164032Srwatson	case PRIV_IPC_MSGSIZE:
3596164032Srwatson	case PRIV_MQ_ADMIN:
3597164032Srwatson
3598164032Srwatson		/*
3599192895Sjamie		 * Jail operations within a jail work on child jails.
3600192895Sjamie		 */
3601192895Sjamie	case PRIV_JAIL_ATTACH:
3602192895Sjamie	case PRIV_JAIL_SET:
3603192895Sjamie	case PRIV_JAIL_REMOVE:
3604192895Sjamie
3605192895Sjamie		/*
3606164032Srwatson		 * Jail implements its own inter-process limits, so allow
3607164032Srwatson		 * root processes in jail to change scheduling on other
3608164032Srwatson		 * processes in the same jail.  Likewise for signalling.
3609164032Srwatson		 */
3610164032Srwatson	case PRIV_SCHED_DIFFCRED:
3611185435Sbz	case PRIV_SCHED_CPUSET:
3612164032Srwatson	case PRIV_SIGNAL_DIFFCRED:
3613164032Srwatson	case PRIV_SIGNAL_SUGID:
3614164032Srwatson
3615164032Srwatson		/*
3616164032Srwatson		 * Allow jailed processes to write to sysctls marked as jail
3617164032Srwatson		 * writable.
3618164032Srwatson		 */
3619164032Srwatson	case PRIV_SYSCTL_WRITEJAIL:
3620164032Srwatson
3621164032Srwatson		/*
3622164032Srwatson		 * Allow root in jail to manage a variety of quota
3623166831Srwatson		 * properties.  These should likely be conditional on a
3624166831Srwatson		 * configuration option.
3625164032Srwatson		 */
3626166832Srwatson	case PRIV_VFS_GETQUOTA:
3627166832Srwatson	case PRIV_VFS_SETQUOTA:
3628164032Srwatson
3629164032Srwatson		/*
3630164032Srwatson		 * Since Jail relies on chroot() to implement file system
3631164032Srwatson		 * protections, grant many VFS privileges to root in jail.
3632164032Srwatson		 * Be careful to exclude mount-related and NFS-related
3633164032Srwatson		 * privileges.
3634164032Srwatson		 */
3635164032Srwatson	case PRIV_VFS_READ:
3636164032Srwatson	case PRIV_VFS_WRITE:
3637164032Srwatson	case PRIV_VFS_ADMIN:
3638164032Srwatson	case PRIV_VFS_EXEC:
3639164032Srwatson	case PRIV_VFS_LOOKUP:
3640164032Srwatson	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
3641164032Srwatson	case PRIV_VFS_CHFLAGS_DEV:
3642164032Srwatson	case PRIV_VFS_CHOWN:
3643164032Srwatson	case PRIV_VFS_CHROOT:
3644167152Spjd	case PRIV_VFS_RETAINSUGID:
3645164032Srwatson	case PRIV_VFS_FCHROOT:
3646164032Srwatson	case PRIV_VFS_LINK:
3647164032Srwatson	case PRIV_VFS_SETGID:
3648172860Srwatson	case PRIV_VFS_STAT:
3649164032Srwatson	case PRIV_VFS_STICKYFILE:
3650164032Srwatson		return (0);
3651164032Srwatson
3652164032Srwatson		/*
3653164032Srwatson		 * Depending on the global setting, allow privilege of
3654164032Srwatson		 * setting system flags.
3655164032Srwatson		 */
3656164032Srwatson	case PRIV_VFS_SYSFLAGS:
3657192895Sjamie		if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
3658164032Srwatson			return (0);
3659164032Srwatson		else
3660164032Srwatson			return (EPERM);
3661164032Srwatson
3662164032Srwatson		/*
3663168396Spjd		 * Depending on the global setting, allow privilege of
3664168396Spjd		 * mounting/unmounting file systems.
3665168396Spjd		 */
3666168396Spjd	case PRIV_VFS_MOUNT:
3667168396Spjd	case PRIV_VFS_UNMOUNT:
3668168396Spjd	case PRIV_VFS_MOUNT_NONUSER:
3669168699Spjd	case PRIV_VFS_MOUNT_OWNER:
3670192895Sjamie		if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT)
3671168396Spjd			return (0);
3672168396Spjd		else
3673168396Spjd			return (EPERM);
3674168396Spjd
3675168396Spjd		/*
3676168591Srwatson		 * Allow jailed root to bind reserved ports and reuse in-use
3677168591Srwatson		 * ports.
3678164032Srwatson		 */
3679164032Srwatson	case PRIV_NETINET_RESERVEDPORT:
3680168591Srwatson	case PRIV_NETINET_REUSEPORT:
3681164032Srwatson		return (0);
3682164032Srwatson
3683164032Srwatson		/*
3684175630Sbz		 * Allow jailed root to set certian IPv4/6 (option) headers.
3685175630Sbz		 */
3686175630Sbz	case PRIV_NETINET_SETHDROPTS:
3687175630Sbz		return (0);
3688175630Sbz
3689175630Sbz		/*
3690164032Srwatson		 * Conditionally allow creating raw sockets in jail.
3691164032Srwatson		 */
3692164032Srwatson	case PRIV_NETINET_RAW:
3693192895Sjamie		if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
3694164032Srwatson			return (0);
3695164032Srwatson		else
3696164032Srwatson			return (EPERM);
3697164032Srwatson
3698164032Srwatson		/*
3699164032Srwatson		 * Since jail implements its own visibility limits on netstat
3700164032Srwatson		 * sysctls, allow getcred.  This allows identd to work in
3701164032Srwatson		 * jail.
3702164032Srwatson		 */
3703164032Srwatson	case PRIV_NETINET_GETCRED:
3704164032Srwatson		return (0);
3705164032Srwatson
3706164032Srwatson	default:
3707164032Srwatson		/*
3708164032Srwatson		 * In all remaining cases, deny the privilege request.  This
3709164032Srwatson		 * includes almost all network privileges, many system
3710164032Srwatson		 * configuration privileges.
3711164032Srwatson		 */
3712164032Srwatson		return (EPERM);
3713164032Srwatson	}
3714164032Srwatson}
3715164032Srwatson
3716192895Sjamie/*
3717192895Sjamie * Return the part of pr2's name that is relative to pr1, or the whole name
3718192895Sjamie * if it does not directly follow.
3719192895Sjamie */
3720192895Sjamie
3721192895Sjamiechar *
3722192895Sjamieprison_name(struct prison *pr1, struct prison *pr2)
3723192895Sjamie{
3724192895Sjamie	char *name;
3725192895Sjamie
3726192895Sjamie	/* Jails see themselves as "0" (if they see themselves at all). */
3727192895Sjamie	if (pr1 == pr2)
3728192895Sjamie		return "0";
3729192895Sjamie	name = pr2->pr_name;
3730192895Sjamie	if (prison_ischild(pr1, pr2)) {
3731192895Sjamie		/*
3732192895Sjamie		 * pr1 isn't locked (and allprison_lock may not be either)
3733192895Sjamie		 * so its length can't be counted on.  But the number of dots
3734192895Sjamie		 * can be counted on - and counted.
3735192895Sjamie		 */
3736192895Sjamie		for (; pr1 != &prison0; pr1 = pr1->pr_parent)
3737192895Sjamie			name = strchr(name, '.') + 1;
3738192895Sjamie	}
3739192895Sjamie	return (name);
3740192895Sjamie}
3741192895Sjamie
3742192895Sjamie/*
3743192895Sjamie * Return the part of pr2's path that is relative to pr1, or the whole path
3744192895Sjamie * if it does not directly follow.
3745192895Sjamie */
3746192895Sjamiestatic char *
3747192895Sjamieprison_path(struct prison *pr1, struct prison *pr2)
3748192895Sjamie{
3749192895Sjamie	char *path1, *path2;
3750192895Sjamie	int len1;
3751192895Sjamie
3752192895Sjamie	path1 = pr1->pr_path;
3753192895Sjamie	path2 = pr2->pr_path;
3754192895Sjamie	if (!strcmp(path1, "/"))
3755192895Sjamie		return (path2);
3756192895Sjamie	len1 = strlen(path1);
3757192895Sjamie	if (strncmp(path1, path2, len1))
3758192895Sjamie		return (path2);
3759192895Sjamie	if (path2[len1] == '\0')
3760192895Sjamie		return "/";
3761192895Sjamie	if (path2[len1] == '/')
3762192895Sjamie		return (path2 + len1);
3763192895Sjamie	return (path2);
3764192895Sjamie}
3765192895Sjamie
3766192895Sjamie
3767192895Sjamie/*
3768192895Sjamie * Jail-related sysctls.
3769192895Sjamie */
3770192895SjamieSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
3771192895Sjamie    "Jails");
3772192895Sjamie
3773113275Smikestatic int
3774113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS)
3775113275Smike{
3776191673Sjamie	struct xprison *xp;
3777192895Sjamie	struct prison *pr, *cpr;
3778191673Sjamie#ifdef INET
3779191673Sjamie	struct in_addr *ip4 = NULL;
3780191673Sjamie	int ip4s = 0;
3781191673Sjamie#endif
3782191673Sjamie#ifdef INET6
3783191673Sjamie	struct in_addr *ip6 = NULL;
3784191673Sjamie	int ip6s = 0;
3785191673Sjamie#endif
3786192895Sjamie	int descend, error;
3787113275Smike
3788191673Sjamie	xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
3789192895Sjamie	pr = req->td->td_ucred->cr_prison;
3790191673Sjamie	error = 0;
3791168401Spjd	sx_slock(&allprison_lock);
3792192895Sjamie	FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
3793192895Sjamie#if defined(INET) || defined(INET6)
3794191673Sjamie again:
3795192895Sjamie#endif
3796192895Sjamie		mtx_lock(&cpr->pr_mtx);
3797185435Sbz#ifdef INET
3798192895Sjamie		if (cpr->pr_ip4s > 0) {
3799192895Sjamie			if (ip4s < cpr->pr_ip4s) {
3800192895Sjamie				ip4s = cpr->pr_ip4s;
3801192895Sjamie				mtx_unlock(&cpr->pr_mtx);
3802191673Sjamie				ip4 = realloc(ip4, ip4s *
3803191673Sjamie				    sizeof(struct in_addr), M_TEMP, M_WAITOK);
3804191673Sjamie				goto again;
3805191673Sjamie			}
3806192895Sjamie			bcopy(cpr->pr_ip4, ip4,
3807192895Sjamie			    cpr->pr_ip4s * sizeof(struct in_addr));
3808191673Sjamie		}
3809185435Sbz#endif
3810185435Sbz#ifdef INET6
3811192895Sjamie		if (cpr->pr_ip6s > 0) {
3812192895Sjamie			if (ip6s < cpr->pr_ip6s) {
3813192895Sjamie				ip6s = cpr->pr_ip6s;
3814192895Sjamie				mtx_unlock(&cpr->pr_mtx);
3815191673Sjamie				ip6 = realloc(ip6, ip6s *
3816191673Sjamie				    sizeof(struct in6_addr), M_TEMP, M_WAITOK);
3817191673Sjamie				goto again;
3818191673Sjamie			}
3819192895Sjamie			bcopy(cpr->pr_ip6, ip6,
3820192895Sjamie			    cpr->pr_ip6s * sizeof(struct in6_addr));
3821191673Sjamie		}
3822185435Sbz#endif
3823192895Sjamie		if (cpr->pr_ref == 0) {
3824192895Sjamie			mtx_unlock(&cpr->pr_mtx);
3825191673Sjamie			continue;
3826191673Sjamie		}
3827191673Sjamie		bzero(xp, sizeof(*xp));
3828113275Smike		xp->pr_version = XPRISON_VERSION;
3829192895Sjamie		xp->pr_id = cpr->pr_id;
3830192895Sjamie		xp->pr_state = cpr->pr_uref > 0
3831191673Sjamie		    ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
3832192895Sjamie		strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
3833194118Sjamie		strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
3834192895Sjamie		strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
3835185435Sbz#ifdef INET
3836192895Sjamie		xp->pr_ip4s = cpr->pr_ip4s;
3837185435Sbz#endif
3838185435Sbz#ifdef INET6
3839192895Sjamie		xp->pr_ip6s = cpr->pr_ip6s;
3840185435Sbz#endif
3841192895Sjamie		mtx_unlock(&cpr->pr_mtx);
3842191673Sjamie		error = SYSCTL_OUT(req, xp, sizeof(*xp));
3843191673Sjamie		if (error)
3844191673Sjamie			break;
3845185435Sbz#ifdef INET
3846191673Sjamie		if (xp->pr_ip4s > 0) {
3847191673Sjamie			error = SYSCTL_OUT(req, ip4,
3848191673Sjamie			    xp->pr_ip4s * sizeof(struct in_addr));
3849191673Sjamie			if (error)
3850191673Sjamie				break;
3851185435Sbz		}
3852185435Sbz#endif
3853185435Sbz#ifdef INET6
3854191673Sjamie		if (xp->pr_ip6s > 0) {
3855191673Sjamie			error = SYSCTL_OUT(req, ip6,
3856191673Sjamie			    xp->pr_ip6s * sizeof(struct in6_addr));
3857191673Sjamie			if (error)
3858191673Sjamie				break;
3859185435Sbz		}
3860185435Sbz#endif
3861113275Smike	}
3862168401Spjd	sx_sunlock(&allprison_lock);
3863191673Sjamie	free(xp, M_TEMP);
3864191673Sjamie#ifdef INET
3865191673Sjamie	free(ip4, M_TEMP);
3866191673Sjamie#endif
3867191673Sjamie#ifdef INET6
3868191673Sjamie	free(ip6, M_TEMP);
3869191673Sjamie#endif
3870167354Spjd	return (error);
3871113275Smike}
3872113275Smike
3873187864SedSYSCTL_OID(_security_jail, OID_AUTO, list,
3874187864Sed    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3875187864Sed    sysctl_jail_list, "S", "List of active jails");
3876126004Spjd
3877126004Spjdstatic int
3878126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
3879126004Spjd{
3880126004Spjd	int error, injail;
3881126004Spjd
3882126004Spjd	injail = jailed(req->td->td_ucred);
3883126004Spjd	error = SYSCTL_OUT(req, &injail, sizeof(injail));
3884126004Spjd
3885126004Spjd	return (error);
3886126004Spjd}
3887192895Sjamie
3888187864SedSYSCTL_PROC(_security_jail, OID_AUTO, jailed,
3889187864Sed    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3890187864Sed    sysctl_jail_jailed, "I", "Process in jail?");
3891185435Sbz
3892192895Sjamie#if defined(INET) || defined(INET6)
3893193865SjamieSYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
3894192895Sjamie    &jail_max_af_ips, 0,
3895192895Sjamie    "Number of IP addresses a jail may have at most per address family");
3896192895Sjamie#endif
3897192895Sjamie
3898192895Sjamie/*
3899192895Sjamie * Default parameters for jail(2) compatability.  For historical reasons,
3900192895Sjamie * the sysctl names have varying similarity to the parameter names.  Prisons
3901192895Sjamie * just see their own parameters, and can't change them.
3902192895Sjamie */
3903192895Sjamiestatic int
3904192895Sjamiesysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
3905192895Sjamie{
3906192895Sjamie	struct prison *pr;
3907192895Sjamie	int allow, error, i;
3908192895Sjamie
3909192895Sjamie	pr = req->td->td_ucred->cr_prison;
3910192895Sjamie	allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
3911192895Sjamie
3912192895Sjamie	/* Get the current flag value, and convert it to a boolean. */
3913192895Sjamie	i = (allow & arg2) ? 1 : 0;
3914192895Sjamie	if (arg1 != NULL)
3915192895Sjamie		i = !i;
3916192895Sjamie	error = sysctl_handle_int(oidp, &i, 0, req);
3917192895Sjamie	if (error || !req->newptr)
3918192895Sjamie		return (error);
3919192895Sjamie	i = i ? arg2 : 0;
3920192895Sjamie	if (arg1 != NULL)
3921192895Sjamie		i ^= arg2;
3922192895Sjamie	/*
3923192895Sjamie	 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
3924192895Sjamie	 * for writing.
3925192895Sjamie	 */
3926192895Sjamie	mtx_lock(&prison0.pr_mtx);
3927192895Sjamie	jail_default_allow = (jail_default_allow & ~arg2) | i;
3928192895Sjamie	mtx_unlock(&prison0.pr_mtx);
3929192895Sjamie	return (0);
3930192895Sjamie}
3931192895Sjamie
3932192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
3933192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3934192895Sjamie    NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
3935192895Sjamie    "Processes in jail can set their hostnames");
3936192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
3937192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3938192895Sjamie    (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
3939192895Sjamie    "Processes in jail are limited to creating UNIX/IP/route sockets only");
3940192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
3941192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3942192895Sjamie    NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
3943192895Sjamie    "Processes in jail can use System V IPC primitives");
3944192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
3945192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3946192895Sjamie    NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
3947192895Sjamie    "Prison root can create raw sockets");
3948192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
3949192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3950192895Sjamie    NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
3951192895Sjamie    "Processes in jail can alter system file flags");
3952192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
3953192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3954192895Sjamie    NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
3955192895Sjamie    "Processes in jail can mount/unmount jail-friendly file systems");
3956192895Sjamie
3957192895Sjamiestatic int
3958192895Sjamiesysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
3959192895Sjamie{
3960192895Sjamie	struct prison *pr;
3961192895Sjamie	int level, error;
3962192895Sjamie
3963192895Sjamie	pr = req->td->td_ucred->cr_prison;
3964192895Sjamie	level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
3965192895Sjamie	error = sysctl_handle_int(oidp, &level, 0, req);
3966192895Sjamie	if (error || !req->newptr)
3967192895Sjamie		return (error);
3968192895Sjamie	*(int *)arg1 = level;
3969192895Sjamie	return (0);
3970192895Sjamie}
3971192895Sjamie
3972192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
3973192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
3974192895Sjamie    &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
3975192895Sjamie    sysctl_jail_default_level, "I",
3976192895Sjamie    "Processes in jail cannot see all mounted file systems");
3977192895Sjamie
3978192895Sjamie/*
3979192895Sjamie * Nodes to describe jail parameters.  Maximum length of string parameters
3980192895Sjamie * is returned in the string itself, and the other parameters exist merely
3981192895Sjamie * to make themselves and their types known.
3982192895Sjamie */
3983192895SjamieSYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
3984192895Sjamie    "Jail parameters");
3985192895Sjamie
3986192895Sjamieint
3987192895Sjamiesysctl_jail_param(SYSCTL_HANDLER_ARGS)
3988192895Sjamie{
3989192895Sjamie	int i;
3990192895Sjamie	long l;
3991192895Sjamie	size_t s;
3992192895Sjamie	char numbuf[12];
3993192895Sjamie
3994192895Sjamie	switch (oidp->oid_kind & CTLTYPE)
3995192895Sjamie	{
3996192895Sjamie	case CTLTYPE_LONG:
3997192895Sjamie	case CTLTYPE_ULONG:
3998192895Sjamie		l = 0;
3999192895Sjamie#ifdef SCTL_MASK32
4000192895Sjamie		if (!(req->flags & SCTL_MASK32))
4001192895Sjamie#endif
4002192895Sjamie			return (SYSCTL_OUT(req, &l, sizeof(l)));
4003192895Sjamie	case CTLTYPE_INT:
4004192895Sjamie	case CTLTYPE_UINT:
4005192895Sjamie		i = 0;
4006192895Sjamie		return (SYSCTL_OUT(req, &i, sizeof(i)));
4007192895Sjamie	case CTLTYPE_STRING:
4008192895Sjamie		snprintf(numbuf, sizeof(numbuf), "%d", arg2);
4009192895Sjamie		return
4010192895Sjamie		    (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
4011192895Sjamie	case CTLTYPE_STRUCT:
4012192895Sjamie		s = (size_t)arg2;
4013192895Sjamie		return (SYSCTL_OUT(req, &s, sizeof(s)));
4014192895Sjamie	}
4015192895Sjamie	return (0);
4016192895Sjamie}
4017192895Sjamie
4018192895SjamieSYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
4019192895SjamieSYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
4020192895SjamieSYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
4021192895SjamieSYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
4022192895SjamieSYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
4023192895Sjamie    "I", "Jail secure level");
4024192895SjamieSYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
4025192895Sjamie    "I", "Jail cannot see all mounted file systems");
4026192895SjamieSYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
4027192895Sjamie    "B", "Jail persistence");
4028194251Sjamie#ifdef VIMAGE
4029194251SjamieSYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
4030195870Sjamie    "E,jailsys", "Virtual network stack");
4031194251Sjamie#endif
4032192895SjamieSYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
4033192895Sjamie    "B", "Jail is in the process of shutting down");
4034192895Sjamie
4035194762SjamieSYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
4036194762SjamieSYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
4037194762Sjamie    "I", "Current number of child jails");
4038194762SjamieSYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
4039194762Sjamie    "I", "Maximum number of child jails");
4040194762Sjamie
4041195870SjamieSYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
4042192895SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
4043192895Sjamie    "Jail hostname");
4044193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
4045193066Sjamie    "Jail NIS domainname");
4046193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
4047193066Sjamie    "Jail host UUID");
4048193066SjamieSYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
4049193066Sjamie    "LU", "Jail host ID");
4050192895Sjamie
4051192895SjamieSYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
4052192895SjamieSYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
4053192895Sjamie
4054192895Sjamie#ifdef INET
4055195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
4056195974Sjamie    "Jail IPv4 address virtualization");
4057192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
4058192895Sjamie    "S,in_addr,a", "Jail IPv4 addresses");
4059192895Sjamie#endif
4060192895Sjamie#ifdef INET6
4061195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
4062195974Sjamie    "Jail IPv6 address virtualization");
4063192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
4064192895Sjamie    "S,in6_addr,a", "Jail IPv6 addresses");
4065192895Sjamie#endif
4066192895Sjamie
4067192895SjamieSYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
4068192895SjamieSYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
4069192895Sjamie    "B", "Jail may set hostname");
4070192895SjamieSYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
4071192895Sjamie    "B", "Jail may use SYSV IPC");
4072192895SjamieSYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
4073192895Sjamie    "B", "Jail may create raw sockets");
4074192895SjamieSYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
4075192895Sjamie    "B", "Jail may alter system file flags");
4076192895SjamieSYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW,
4077192895Sjamie    "B", "Jail may mount/unmount jail-friendly file systems");
4078192895SjamieSYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
4079192895Sjamie    "B", "Jail may set file quotas");
4080192895SjamieSYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
4081192895Sjamie    "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
4082192895Sjamie
4083192895Sjamie
4084185435Sbz#ifdef DDB
4085191673Sjamie
4086191673Sjamiestatic void
4087191673Sjamiedb_show_prison(struct prison *pr)
4088185435Sbz{
4089192895Sjamie	int fi;
4090191673Sjamie#if defined(INET) || defined(INET6)
4091191673Sjamie	int ii;
4092185435Sbz#endif
4093195870Sjamie	unsigned jsf;
4094185435Sbz#ifdef INET6
4095185435Sbz	char ip6buf[INET6_ADDRSTRLEN];
4096185435Sbz#endif
4097185435Sbz
4098191673Sjamie	db_printf("prison %p:\n", pr);
4099191673Sjamie	db_printf(" jid             = %d\n", pr->pr_id);
4100191673Sjamie	db_printf(" name            = %s\n", pr->pr_name);
4101192895Sjamie	db_printf(" parent          = %p\n", pr->pr_parent);
4102191673Sjamie	db_printf(" ref             = %d\n", pr->pr_ref);
4103191673Sjamie	db_printf(" uref            = %d\n", pr->pr_uref);
4104191673Sjamie	db_printf(" path            = %s\n", pr->pr_path);
4105191673Sjamie	db_printf(" cpuset          = %d\n", pr->pr_cpuset
4106191673Sjamie	    ? pr->pr_cpuset->cs_id : -1);
4107194251Sjamie#ifdef VIMAGE
4108194251Sjamie	db_printf(" vnet            = %p\n", pr->pr_vnet);
4109194251Sjamie#endif
4110191673Sjamie	db_printf(" root            = %p\n", pr->pr_root);
4111191673Sjamie	db_printf(" securelevel     = %d\n", pr->pr_securelevel);
4112194762Sjamie	db_printf(" childcount      = %d\n", pr->pr_childcount);
4113192895Sjamie	db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
4114192895Sjamie	db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
4115191673Sjamie	db_printf(" flags           = %x", pr->pr_flags);
4116192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
4117192895Sjamie	    fi++)
4118192895Sjamie		if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
4119192895Sjamie			db_printf(" %s", pr_flag_names[fi]);
4120195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
4121195870Sjamie	    fi++) {
4122195870Sjamie		jsf = pr->pr_flags &
4123195870Sjamie		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
4124195870Sjamie		db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
4125195870Sjamie		    pr_flag_jailsys[fi].disable &&
4126195870Sjamie		      (jsf == pr_flag_jailsys[fi].disable) ? "disable"
4127195870Sjamie		    : (jsf == pr_flag_jailsys[fi].new) ? "new"
4128195870Sjamie		    : "inherit");
4129195870Sjamie	}
4130192895Sjamie	db_printf(" allow           = %x", pr->pr_allow);
4131192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
4132192895Sjamie	    fi++)
4133192895Sjamie		if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
4134192895Sjamie			db_printf(" %s", pr_allow_names[fi]);
4135191673Sjamie	db_printf("\n");
4136192895Sjamie	db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
4137194118Sjamie	db_printf(" host.hostname   = %s\n", pr->pr_hostname);
4138194118Sjamie	db_printf(" host.domainname = %s\n", pr->pr_domainname);
4139194118Sjamie	db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
4140193066Sjamie	db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
4141185435Sbz#ifdef INET
4142191673Sjamie	db_printf(" ip4s            = %d\n", pr->pr_ip4s);
4143191673Sjamie	for (ii = 0; ii < pr->pr_ip4s; ii++)
4144191673Sjamie		db_printf(" %s %s\n",
4145191673Sjamie		    ii == 0 ? "ip4             =" : "                 ",
4146191673Sjamie		    inet_ntoa(pr->pr_ip4[ii]));
4147185435Sbz#endif
4148185435Sbz#ifdef INET6
4149191673Sjamie	db_printf(" ip6s            = %d\n", pr->pr_ip6s);
4150191673Sjamie	for (ii = 0; ii < pr->pr_ip6s; ii++)
4151191673Sjamie		db_printf(" %s %s\n",
4152191673Sjamie		    ii == 0 ? "ip6             =" : "                 ",
4153191673Sjamie		    ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
4154191673Sjamie#endif
4155191673Sjamie}
4156191673Sjamie
4157191673SjamieDB_SHOW_COMMAND(prison, db_show_prison_command)
4158191673Sjamie{
4159191673Sjamie	struct prison *pr;
4160191673Sjamie
4161191673Sjamie	if (!have_addr) {
4162192895Sjamie		/*
4163192895Sjamie		 * Show all prisons in the list, and prison0 which is not
4164192895Sjamie		 * listed.
4165192895Sjamie		 */
4166192895Sjamie		db_show_prison(&prison0);
4167192895Sjamie		if (!db_pager_quit) {
4168192895Sjamie			TAILQ_FOREACH(pr, &allprison, pr_list) {
4169192895Sjamie				db_show_prison(pr);
4170192895Sjamie				if (db_pager_quit)
4171192895Sjamie					break;
4172192895Sjamie			}
4173191673Sjamie		}
4174191673Sjamie		return;
4175191673Sjamie	}
4176191673Sjamie
4177192895Sjamie	if (addr == 0)
4178192895Sjamie		pr = &prison0;
4179192895Sjamie	else {
4180192895Sjamie		/* Look for a prison with the ID and with references. */
4181191673Sjamie		TAILQ_FOREACH(pr, &allprison, pr_list)
4182192895Sjamie			if (pr->pr_id == addr && pr->pr_ref > 0)
4183191673Sjamie				break;
4184192895Sjamie		if (pr == NULL)
4185192895Sjamie			/* Look again, without requiring a reference. */
4186192895Sjamie			TAILQ_FOREACH(pr, &allprison, pr_list)
4187192895Sjamie				if (pr->pr_id == addr)
4188192895Sjamie					break;
4189192895Sjamie		if (pr == NULL)
4190192895Sjamie			/* Assume address points to a valid prison. */
4191192895Sjamie			pr = (struct prison *)addr;
4192192895Sjamie	}
4193191673Sjamie	db_show_prison(pr);
4194185435Sbz}
4195191673Sjamie
4196185435Sbz#endif /* DDB */
4197