kern_jail.c revision 168401
1139804Simp/*-
246197Sphk * ----------------------------------------------------------------------------
346197Sphk * "THE BEER-WARE LICENSE" (Revision 42):
446197Sphk * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think
646197Sphk * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
746197Sphk * ----------------------------------------------------------------------------
846197Sphk */
946155Sphk
10116182Sobrien#include <sys/cdefs.h>
11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 168401 2007-04-05 23:19:13Z pjd $");
12116182Sobrien
13131177Spjd#include "opt_mac.h"
14131177Spjd
1546155Sphk#include <sys/param.h>
1646155Sphk#include <sys/types.h>
1746155Sphk#include <sys/kernel.h>
1846155Sphk#include <sys/systm.h>
1946155Sphk#include <sys/errno.h>
2046155Sphk#include <sys/sysproto.h>
2146155Sphk#include <sys/malloc.h>
22164032Srwatson#include <sys/priv.h>
2346155Sphk#include <sys/proc.h>
24124882Srwatson#include <sys/taskqueue.h>
2546155Sphk#include <sys/jail.h>
2687275Srwatson#include <sys/lock.h>
2787275Srwatson#include <sys/mutex.h>
28168401Spjd#include <sys/sx.h>
29113275Smike#include <sys/namei.h>
30147185Spjd#include <sys/mount.h>
31113275Smike#include <sys/queue.h>
3246155Sphk#include <sys/socket.h>
33113275Smike#include <sys/syscallsubr.h>
3457163Srwatson#include <sys/sysctl.h>
35113275Smike#include <sys/vnode.h>
3646155Sphk#include <net/if.h>
3746155Sphk#include <netinet/in.h>
3846155Sphk
39163606Srwatson#include <security/mac/mac_framework.h>
40163606Srwatson
4146155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
4246155Sphk
4389414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
4457163Srwatson    "Jail rules");
4557163Srwatson
4657163Srwatsonint	jail_set_hostname_allowed = 1;
4789414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
4857163Srwatson    &jail_set_hostname_allowed, 0,
4957163Srwatson    "Processes in jail can set their hostnames");
5057163Srwatson
5161235Srwatsonint	jail_socket_unixiproute_only = 1;
5289414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
5361235Srwatson    &jail_socket_unixiproute_only, 0,
5461235Srwatson    "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
5561235Srwatson
5668024Srwatsonint	jail_sysvipc_allowed = 0;
5789414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
5868024Srwatson    &jail_sysvipc_allowed, 0,
5968024Srwatson    "Processes in jail can use System V IPC primitives");
6068024Srwatson
61147185Spjdstatic int jail_enforce_statfs = 2;
62147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
63147185Spjd    &jail_enforce_statfs, 0,
64147185Spjd    "Processes in jail cannot see all mounted file systems");
65125804Srwatson
66128664Sbmilekicint	jail_allow_raw_sockets = 0;
67128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
68128664Sbmilekic    &jail_allow_raw_sockets, 0,
69128664Sbmilekic    "Prison root can create raw sockets");
70128664Sbmilekic
71141543Scpercivaint	jail_chflags_allowed = 0;
72141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
73141543Scperciva    &jail_chflags_allowed, 0,
74141543Scperciva    "Processes in jail can alter system file flags");
75141543Scperciva
76168396Spjdint	jail_mount_allowed = 0;
77168396SpjdSYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
78168396Spjd    &jail_mount_allowed, 0,
79168396Spjd    "Processes in jail can mount/unmount jail-friendly file systems");
80168396Spjd
81168401Spjd/* allprison, lastprid, and prisoncount are protected by allprison_lock. */
82113275Smikestruct	prisonlist allprison;
83168401Spjdstruct	sx allprison_lock;
84113275Smikeint	lastprid = 0;
85113275Smikeint	prisoncount = 0;
86113275Smike
87168401Spjd/*
88168401Spjd * List of jail services. Protected by allprison_lock.
89168401Spjd */
90168401SpjdTAILQ_HEAD(prison_services_head, prison_service);
91168401Spjdstatic struct prison_services_head prison_services =
92168401Spjd    TAILQ_HEAD_INITIALIZER(prison_services);
93168401Spjdstatic int prison_service_slots = 0;
94168401Spjd
95168401Spjdstruct prison_service {
96168401Spjd	prison_create_t ps_create;
97168401Spjd	prison_destroy_t ps_destroy;
98168401Spjd	int		ps_slotno;
99168401Spjd	TAILQ_ENTRY(prison_service) ps_next;
100168401Spjd	char	ps_name[0];
101168401Spjd};
102168401Spjd
103113275Smikestatic void		 init_prison(void *);
104124882Srwatsonstatic void		 prison_complete(void *context, int pending);
105113275Smikestatic int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
106113275Smike
107113275Smikestatic void
108113275Smikeinit_prison(void *data __unused)
109113275Smike{
110113275Smike
111168401Spjd	sx_init(&allprison_lock, "allprison");
112113275Smike	LIST_INIT(&allprison);
113113275Smike}
114113275Smike
115113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
116113275Smike
11782710Sdillon/*
118114168Smike * struct jail_args {
119114168Smike *	struct jail *jail;
120114168Smike * };
12182710Sdillon */
12246155Sphkint
123114168Smikejail(struct thread *td, struct jail_args *uap)
12446155Sphk{
125113275Smike	struct nameidata nd;
126113275Smike	struct prison *pr, *tpr;
127168401Spjd	struct prison_service *psrv;
12846155Sphk	struct jail j;
129113275Smike	struct jail_attach_args jaa;
130150652Scsjp	int vfslocked, error, tryprid;
13146155Sphk
132114168Smike	error = copyin(uap->jail, &j, sizeof(j));
13346155Sphk	if (error)
13484828Sjhb		return (error);
13584828Sjhb	if (j.version != 0)
13684828Sjhb		return (EINVAL);
13784828Sjhb
138114168Smike	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
13993818Sjhb	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
140113275Smike	pr->pr_ref = 1;
141114168Smike	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
142113275Smike	if (error)
143113275Smike		goto e_killmtx;
144150652Scsjp	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
145150652Scsjp	    pr->pr_path, td);
146113275Smike	error = namei(&nd);
147150652Scsjp	if (error)
148113275Smike		goto e_killmtx;
149150652Scsjp	vfslocked = NDHASGIANT(&nd);
150113275Smike	pr->pr_root = nd.ni_vp;
151113275Smike	VOP_UNLOCK(nd.ni_vp, 0, td);
152113275Smike	NDFREE(&nd, NDF_ONLY_PNBUF);
153150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
154114168Smike	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
15584828Sjhb	if (error)
156113275Smike		goto e_dropvnref;
157113275Smike	pr->pr_ip = j.ip_number;
158113275Smike	pr->pr_linux = NULL;
159113275Smike	pr->pr_securelevel = securelevel;
160168401Spjd	if (prison_service_slots == 0)
161168401Spjd		pr->pr_slots = NULL;
162168401Spjd	else {
163168401Spjd		pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
164168401Spjd		    M_PRISON, M_ZERO | M_WAITOK);
165168401Spjd	}
166113275Smike
167113275Smike	/* Determine next pr_id and add prison to allprison list. */
168168401Spjd	sx_xlock(&allprison_lock);
169113275Smike	tryprid = lastprid + 1;
170113275Smike	if (tryprid == JAIL_MAX)
171113275Smike		tryprid = 1;
172113275Smikenext:
173113275Smike	LIST_FOREACH(tpr, &allprison, pr_list) {
174113275Smike		if (tpr->pr_id == tryprid) {
175113275Smike			tryprid++;
176113275Smike			if (tryprid == JAIL_MAX) {
177168401Spjd				sx_xunlock(&allprison_lock);
178113275Smike				error = EAGAIN;
179113275Smike				goto e_dropvnref;
180113275Smike			}
181113275Smike			goto next;
182113275Smike		}
183113275Smike	}
184113275Smike	pr->pr_id = jaa.jid = lastprid = tryprid;
185113275Smike	LIST_INSERT_HEAD(&allprison, pr, pr_list);
186113275Smike	prisoncount++;
187168401Spjd	sx_downgrade(&allprison_lock);
188168401Spjd	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
189168401Spjd		psrv->ps_create(psrv, pr);
190168401Spjd	}
191168401Spjd	sx_sunlock(&allprison_lock);
192113275Smike
193113275Smike	error = jail_attach(td, &jaa);
194113275Smike	if (error)
195113275Smike		goto e_dropprref;
196113275Smike	mtx_lock(&pr->pr_mtx);
197113275Smike	pr->pr_ref--;
198113275Smike	mtx_unlock(&pr->pr_mtx);
199113275Smike	td->td_retval[0] = jaa.jid;
200113275Smike	return (0);
201113275Smikee_dropprref:
202168401Spjd	sx_xlock(&allprison_lock);
203113275Smike	LIST_REMOVE(pr, pr_list);
204113275Smike	prisoncount--;
205168401Spjd	sx_downgrade(&allprison_lock);
206168401Spjd	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
207168401Spjd		psrv->ps_destroy(psrv, pr);
208168401Spjd	}
209168401Spjd	sx_sunlock(&allprison_lock);
210113275Smikee_dropvnref:
211150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
212113275Smike	vrele(pr->pr_root);
213150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
214113275Smikee_killmtx:
215113275Smike	mtx_destroy(&pr->pr_mtx);
216113275Smike	FREE(pr, M_PRISON);
217113275Smike	return (error);
218113275Smike}
219113275Smike
220113275Smike/*
221114168Smike * struct jail_attach_args {
222114168Smike *	int jid;
223114168Smike * };
224113275Smike */
225113275Smikeint
226114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap)
227113275Smike{
228113275Smike	struct proc *p;
229113275Smike	struct ucred *newcred, *oldcred;
230113275Smike	struct prison *pr;
231150652Scsjp	int vfslocked, error;
232167309Spjd
233126023Snectar	/*
234126023Snectar	 * XXX: Note that there is a slight race here if two threads
235126023Snectar	 * in the same privileged process attempt to attach to two
236126023Snectar	 * different jails at the same time.  It is important for
237126023Snectar	 * user processes not to do this, or they might end up with
238126023Snectar	 * a process root from one prison, but attached to the jail
239126023Snectar	 * of another.
240126023Snectar	 */
241164032Srwatson	error = priv_check(td, PRIV_JAIL_ATTACH);
242126023Snectar	if (error)
243126023Snectar		return (error);
244126023Snectar
245113275Smike	p = td->td_proc;
246168401Spjd	sx_slock(&allprison_lock);
247113275Smike	pr = prison_find(uap->jid);
248113275Smike	if (pr == NULL) {
249168401Spjd		sx_sunlock(&allprison_lock);
250113275Smike		return (EINVAL);
251113275Smike	}
252113275Smike	pr->pr_ref++;
253113275Smike	mtx_unlock(&pr->pr_mtx);
254168401Spjd	sx_sunlock(&allprison_lock);
255113275Smike
256150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
257113275Smike	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
258113275Smike	if ((error = change_dir(pr->pr_root, td)) != 0)
259113275Smike		goto e_unlock;
260113275Smike#ifdef MAC
261113275Smike	if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
262113275Smike		goto e_unlock;
263113275Smike#endif
264113275Smike	VOP_UNLOCK(pr->pr_root, 0, td);
265113275Smike	change_root(pr->pr_root, td);
266150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
267113275Smike
26884828Sjhb	newcred = crget();
26984828Sjhb	PROC_LOCK(p);
27084828Sjhb	oldcred = p->p_ucred;
271113275Smike	setsugid(p);
27284828Sjhb	crcopy(newcred, oldcred);
273113630Sjhb	newcred->cr_prison = pr;
27484828Sjhb	p->p_ucred = newcred;
27584828Sjhb	PROC_UNLOCK(p);
27684828Sjhb	crfree(oldcred);
27746155Sphk	return (0);
278113275Smikee_unlock:
279113275Smike	VOP_UNLOCK(pr->pr_root, 0, td);
280150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
281113275Smike	mtx_lock(&pr->pr_mtx);
282113275Smike	pr->pr_ref--;
283113275Smike	mtx_unlock(&pr->pr_mtx);
28446155Sphk	return (error);
28546155Sphk}
28646155Sphk
287113275Smike/*
288113275Smike * Returns a locked prison instance, or NULL on failure.
289113275Smike */
290168399Spjdstruct prison *
291113275Smikeprison_find(int prid)
292113275Smike{
293113275Smike	struct prison *pr;
294113275Smike
295168401Spjd	sx_assert(&allprison_lock, SX_LOCKED);
296113275Smike	LIST_FOREACH(pr, &allprison, pr_list) {
297113275Smike		if (pr->pr_id == prid) {
298113275Smike			mtx_lock(&pr->pr_mtx);
299113275Smike			return (pr);
300113275Smike		}
301113275Smike	}
302113275Smike	return (NULL);
303113275Smike}
304113275Smike
30572786Srwatsonvoid
30672786Srwatsonprison_free(struct prison *pr)
30772786Srwatson{
308168401Spjd	struct prison_service *psrv;
30972786Srwatson
310168401Spjd	sx_xlock(&allprison_lock);
31187275Srwatson	mtx_lock(&pr->pr_mtx);
31272786Srwatson	pr->pr_ref--;
31372786Srwatson	if (pr->pr_ref == 0) {
314113275Smike		LIST_REMOVE(pr, pr_list);
31587275Srwatson		mtx_unlock(&pr->pr_mtx);
316113275Smike		prisoncount--;
317168401Spjd		sx_downgrade(&allprison_lock);
318168401Spjd		TAILQ_FOREACH(psrv, &prison_services, ps_next) {
319168401Spjd			psrv->ps_destroy(psrv, pr);
320168401Spjd		}
321168401Spjd		sx_sunlock(&allprison_lock);
322124882Srwatson
323124882Srwatson		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
324144660Sjeff		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
32587275Srwatson		return;
32672786Srwatson	}
32787275Srwatson	mtx_unlock(&pr->pr_mtx);
328168401Spjd	sx_xunlock(&allprison_lock);
32972786Srwatson}
33072786Srwatson
331124882Srwatsonstatic void
332124882Srwatsonprison_complete(void *context, int pending)
333124882Srwatson{
334124882Srwatson	struct prison *pr;
335150652Scsjp	int vfslocked;
336124882Srwatson
337124882Srwatson	pr = (struct prison *)context;
338124882Srwatson
339150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
340124882Srwatson	vrele(pr->pr_root);
341150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
342124882Srwatson
343124882Srwatson	mtx_destroy(&pr->pr_mtx);
344124882Srwatson	if (pr->pr_linux != NULL)
345124882Srwatson		FREE(pr->pr_linux, M_PRISON);
346124882Srwatson	FREE(pr, M_PRISON);
347124882Srwatson}
348124882Srwatson
34972786Srwatsonvoid
35072786Srwatsonprison_hold(struct prison *pr)
35172786Srwatson{
35272786Srwatson
35387275Srwatson	mtx_lock(&pr->pr_mtx);
35472786Srwatson	pr->pr_ref++;
35587275Srwatson	mtx_unlock(&pr->pr_mtx);
35672786Srwatson}
35772786Srwatson
35887275Srwatsonu_int32_t
35987275Srwatsonprison_getip(struct ucred *cred)
36087275Srwatson{
36187275Srwatson
36287275Srwatson	return (cred->cr_prison->pr_ip);
36387275Srwatson}
36487275Srwatson
36546155Sphkint
36672786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip)
36746155Sphk{
36846155Sphk	u_int32_t tmp;
36946155Sphk
37072786Srwatson	if (!jailed(cred))
37146155Sphk		return (0);
372167309Spjd	if (flag)
37346155Sphk		tmp = *ip;
37446155Sphk	else
37546155Sphk		tmp = ntohl(*ip);
37646155Sphk	if (tmp == INADDR_ANY) {
377167309Spjd		if (flag)
37872786Srwatson			*ip = cred->cr_prison->pr_ip;
37946155Sphk		else
38072786Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
38146155Sphk		return (0);
38246155Sphk	}
38381114Srwatson	if (tmp == INADDR_LOOPBACK) {
38481114Srwatson		if (flag)
38581114Srwatson			*ip = cred->cr_prison->pr_ip;
38681114Srwatson		else
38781114Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
38881114Srwatson		return (0);
38981114Srwatson	}
39072786Srwatson	if (cred->cr_prison->pr_ip != tmp)
39146155Sphk		return (1);
39246155Sphk	return (0);
39346155Sphk}
39446155Sphk
39546155Sphkvoid
39672786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
39746155Sphk{
39846155Sphk	u_int32_t tmp;
39946155Sphk
40072786Srwatson	if (!jailed(cred))
40146155Sphk		return;
40246155Sphk	if (flag)
40346155Sphk		tmp = *ip;
40446155Sphk	else
40546155Sphk		tmp = ntohl(*ip);
40681114Srwatson	if (tmp == INADDR_LOOPBACK) {
40746155Sphk		if (flag)
40872786Srwatson			*ip = cred->cr_prison->pr_ip;
40946155Sphk		else
41072786Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
41146155Sphk		return;
41246155Sphk	}
41346155Sphk	return;
41446155Sphk}
41546155Sphk
41646155Sphkint
41772786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa)
41846155Sphk{
419114168Smike	struct sockaddr_in *sai;
42046155Sphk	int ok;
42146155Sphk
422114168Smike	sai = (struct sockaddr_in *)sa;
42361235Srwatson	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
42461235Srwatson		ok = 1;
42561235Srwatson	else if (sai->sin_family != AF_INET)
42646155Sphk		ok = 0;
42772786Srwatson	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
42846155Sphk		ok = 1;
42946155Sphk	else
43046155Sphk		ok = 0;
43146155Sphk	return (ok);
43246155Sphk}
43372786Srwatson
43472786Srwatson/*
43572786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
43672786Srwatson */
43772786Srwatsonint
438114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2)
43972786Srwatson{
44072786Srwatson
44172786Srwatson	if (jailed(cred1)) {
44272786Srwatson		if (!jailed(cred2))
44372786Srwatson			return (ESRCH);
44472786Srwatson		if (cred2->cr_prison != cred1->cr_prison)
44572786Srwatson			return (ESRCH);
44672786Srwatson	}
44772786Srwatson
44872786Srwatson	return (0);
44972786Srwatson}
45072786Srwatson
45172786Srwatson/*
45272786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0.
45372786Srwatson */
45472786Srwatsonint
455114168Smikejailed(struct ucred *cred)
45672786Srwatson{
45772786Srwatson
45872786Srwatson	return (cred->cr_prison != NULL);
45972786Srwatson}
46091384Srobert
46191384Srobert/*
46291384Srobert * Return the correct hostname for the passed credential.
46391384Srobert */
46491391Srobertvoid
465114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size)
46691384Srobert{
46791384Srobert
46891391Srobert	if (jailed(cred)) {
46991391Srobert		mtx_lock(&cred->cr_prison->pr_mtx);
470105354Srobert		strlcpy(buf, cred->cr_prison->pr_host, size);
47191391Srobert		mtx_unlock(&cred->cr_prison->pr_mtx);
472114168Smike	} else
473105354Srobert		strlcpy(buf, hostname, size);
47491384Srobert}
475113275Smike
476125804Srwatson/*
477147185Spjd * Determine whether the subject represented by cred can "see"
478147185Spjd * status of a mount point.
479147185Spjd * Returns: 0 for permitted, ENOENT otherwise.
480147185Spjd * XXX: This function should be called cr_canseemount() and should be
481147185Spjd *      placed in kern_prot.c.
482125804Srwatson */
483125804Srwatsonint
484147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp)
485125804Srwatson{
486147185Spjd	struct prison *pr;
487147185Spjd	struct statfs *sp;
488147185Spjd	size_t len;
489125804Srwatson
490147185Spjd	if (!jailed(cred) || jail_enforce_statfs == 0)
491147185Spjd		return (0);
492147185Spjd	pr = cred->cr_prison;
493147185Spjd	if (pr->pr_root->v_mount == mp)
494147185Spjd		return (0);
495147185Spjd	if (jail_enforce_statfs == 2)
496147185Spjd		return (ENOENT);
497147185Spjd	/*
498147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
499147185Spjd	 * all mount-points from inside a jail.
500147185Spjd	 * This is ugly check, but this is the only situation when jail's
501147185Spjd	 * directory ends with '/'.
502147185Spjd	 */
503147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
504147185Spjd		return (0);
505147185Spjd	len = strlen(pr->pr_path);
506147185Spjd	sp = &mp->mnt_stat;
507147185Spjd	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
508147185Spjd		return (ENOENT);
509147185Spjd	/*
510147185Spjd	 * Be sure that we don't have situation where jail's root directory
511147185Spjd	 * is "/some/path" and mount point is "/some/pathpath".
512147185Spjd	 */
513147185Spjd	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
514147185Spjd		return (ENOENT);
515147185Spjd	return (0);
516147185Spjd}
517147185Spjd
518147185Spjdvoid
519147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
520147185Spjd{
521147185Spjd	char jpath[MAXPATHLEN];
522147185Spjd	struct prison *pr;
523147185Spjd	size_t len;
524147185Spjd
525147185Spjd	if (!jailed(cred) || jail_enforce_statfs == 0)
526147185Spjd		return;
527147185Spjd	pr = cred->cr_prison;
528147185Spjd	if (prison_canseemount(cred, mp) != 0) {
529147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
530147185Spjd		strlcpy(sp->f_mntonname, "[restricted]",
531147185Spjd		    sizeof(sp->f_mntonname));
532147185Spjd		return;
533125804Srwatson	}
534147185Spjd	if (pr->pr_root->v_mount == mp) {
535147185Spjd		/*
536147185Spjd		 * Clear current buffer data, so we are sure nothing from
537147185Spjd		 * the valid path left there.
538147185Spjd		 */
539147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
540147185Spjd		*sp->f_mntonname = '/';
541147185Spjd		return;
542147185Spjd	}
543147185Spjd	/*
544147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
545147185Spjd	 * all mount-points from inside a jail.
546147185Spjd	 */
547147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
548147185Spjd		return;
549147185Spjd	len = strlen(pr->pr_path);
550147185Spjd	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
551147185Spjd	/*
552147185Spjd	 * Clear current buffer data, so we are sure nothing from
553147185Spjd	 * the valid path left there.
554147185Spjd	 */
555147185Spjd	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
556147185Spjd	if (*jpath == '\0') {
557147185Spjd		/* Should never happen. */
558147185Spjd		*sp->f_mntonname = '/';
559147185Spjd	} else {
560147185Spjd		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
561147185Spjd	}
562125804Srwatson}
563125804Srwatson
564164032Srwatson/*
565164032Srwatson * Check with permission for a specific privilege is granted within jail.  We
566164032Srwatson * have a specific list of accepted privileges; the rest are denied.
567164032Srwatson */
568164032Srwatsonint
569164032Srwatsonprison_priv_check(struct ucred *cred, int priv)
570164032Srwatson{
571164032Srwatson
572164032Srwatson	if (!jailed(cred))
573164032Srwatson		return (0);
574164032Srwatson
575164032Srwatson	switch (priv) {
576164032Srwatson
577164032Srwatson		/*
578164032Srwatson		 * Allow ktrace privileges for root in jail.
579164032Srwatson		 */
580164032Srwatson	case PRIV_KTRACE:
581164032Srwatson
582166827Srwatson#if 0
583164032Srwatson		/*
584164032Srwatson		 * Allow jailed processes to configure audit identity and
585164032Srwatson		 * submit audit records (login, etc).  In the future we may
586164032Srwatson		 * want to further refine the relationship between audit and
587164032Srwatson		 * jail.
588164032Srwatson		 */
589164032Srwatson	case PRIV_AUDIT_GETAUDIT:
590164032Srwatson	case PRIV_AUDIT_SETAUDIT:
591164032Srwatson	case PRIV_AUDIT_SUBMIT:
592166827Srwatson#endif
593164032Srwatson
594164032Srwatson		/*
595164032Srwatson		 * Allow jailed processes to manipulate process UNIX
596164032Srwatson		 * credentials in any way they see fit.
597164032Srwatson		 */
598164032Srwatson	case PRIV_CRED_SETUID:
599164032Srwatson	case PRIV_CRED_SETEUID:
600164032Srwatson	case PRIV_CRED_SETGID:
601164032Srwatson	case PRIV_CRED_SETEGID:
602164032Srwatson	case PRIV_CRED_SETGROUPS:
603164032Srwatson	case PRIV_CRED_SETREUID:
604164032Srwatson	case PRIV_CRED_SETREGID:
605164032Srwatson	case PRIV_CRED_SETRESUID:
606164032Srwatson	case PRIV_CRED_SETRESGID:
607164032Srwatson
608164032Srwatson		/*
609164032Srwatson		 * Jail implements visibility constraints already, so allow
610164032Srwatson		 * jailed root to override uid/gid-based constraints.
611164032Srwatson		 */
612164032Srwatson	case PRIV_SEEOTHERGIDS:
613164032Srwatson	case PRIV_SEEOTHERUIDS:
614164032Srwatson
615164032Srwatson		/*
616164032Srwatson		 * Jail implements inter-process debugging limits already, so
617164032Srwatson		 * allow jailed root various debugging privileges.
618164032Srwatson		 */
619164032Srwatson	case PRIV_DEBUG_DIFFCRED:
620164032Srwatson	case PRIV_DEBUG_SUGID:
621164032Srwatson	case PRIV_DEBUG_UNPRIV:
622164032Srwatson
623164032Srwatson		/*
624164032Srwatson		 * Allow jail to set various resource limits and login
625164032Srwatson		 * properties, and for now, exceed process resource limits.
626164032Srwatson		 */
627164032Srwatson	case PRIV_PROC_LIMIT:
628164032Srwatson	case PRIV_PROC_SETLOGIN:
629164032Srwatson	case PRIV_PROC_SETRLIMIT:
630164032Srwatson
631164032Srwatson		/*
632164032Srwatson		 * System V and POSIX IPC privileges are granted in jail.
633164032Srwatson		 */
634164032Srwatson	case PRIV_IPC_READ:
635164032Srwatson	case PRIV_IPC_WRITE:
636164032Srwatson	case PRIV_IPC_ADMIN:
637164032Srwatson	case PRIV_IPC_MSGSIZE:
638164032Srwatson	case PRIV_MQ_ADMIN:
639164032Srwatson
640164032Srwatson		/*
641164032Srwatson		 * Jail implements its own inter-process limits, so allow
642164032Srwatson		 * root processes in jail to change scheduling on other
643164032Srwatson		 * processes in the same jail.  Likewise for signalling.
644164032Srwatson		 */
645164032Srwatson	case PRIV_SCHED_DIFFCRED:
646164032Srwatson	case PRIV_SIGNAL_DIFFCRED:
647164032Srwatson	case PRIV_SIGNAL_SUGID:
648164032Srwatson
649164032Srwatson		/*
650164032Srwatson		 * Allow jailed processes to write to sysctls marked as jail
651164032Srwatson		 * writable.
652164032Srwatson		 */
653164032Srwatson	case PRIV_SYSCTL_WRITEJAIL:
654164032Srwatson
655164032Srwatson		/*
656164032Srwatson		 * Allow root in jail to manage a variety of quota
657166831Srwatson		 * properties.  These should likely be conditional on a
658166831Srwatson		 * configuration option.
659164032Srwatson		 */
660166832Srwatson	case PRIV_VFS_GETQUOTA:
661166832Srwatson	case PRIV_VFS_SETQUOTA:
662164032Srwatson
663164032Srwatson		/*
664164032Srwatson		 * Since Jail relies on chroot() to implement file system
665164032Srwatson		 * protections, grant many VFS privileges to root in jail.
666164032Srwatson		 * Be careful to exclude mount-related and NFS-related
667164032Srwatson		 * privileges.
668164032Srwatson		 */
669164032Srwatson	case PRIV_VFS_READ:
670164032Srwatson	case PRIV_VFS_WRITE:
671164032Srwatson	case PRIV_VFS_ADMIN:
672164032Srwatson	case PRIV_VFS_EXEC:
673164032Srwatson	case PRIV_VFS_LOOKUP:
674164032Srwatson	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
675164032Srwatson	case PRIV_VFS_CHFLAGS_DEV:
676164032Srwatson	case PRIV_VFS_CHOWN:
677164032Srwatson	case PRIV_VFS_CHROOT:
678167152Spjd	case PRIV_VFS_RETAINSUGID:
679164032Srwatson	case PRIV_VFS_FCHROOT:
680164032Srwatson	case PRIV_VFS_LINK:
681164032Srwatson	case PRIV_VFS_SETGID:
682164032Srwatson	case PRIV_VFS_STICKYFILE:
683164032Srwatson		return (0);
684164032Srwatson
685164032Srwatson		/*
686164032Srwatson		 * Depending on the global setting, allow privilege of
687164032Srwatson		 * setting system flags.
688164032Srwatson		 */
689164032Srwatson	case PRIV_VFS_SYSFLAGS:
690164032Srwatson		if (jail_chflags_allowed)
691164032Srwatson			return (0);
692164032Srwatson		else
693164032Srwatson			return (EPERM);
694164032Srwatson
695164032Srwatson		/*
696168396Spjd		 * Depending on the global setting, allow privilege of
697168396Spjd		 * mounting/unmounting file systems.
698168396Spjd		 */
699168396Spjd	case PRIV_VFS_MOUNT:
700168396Spjd	case PRIV_VFS_UNMOUNT:
701168396Spjd	case PRIV_VFS_MOUNT_NONUSER:
702168396Spjd		if (jail_mount_allowed)
703168396Spjd			return (0);
704168396Spjd		else
705168396Spjd			return (EPERM);
706168396Spjd
707168396Spjd		/*
708164032Srwatson		 * Allow jailed root to bind reserved ports.
709164032Srwatson		 */
710164032Srwatson	case PRIV_NETINET_RESERVEDPORT:
711164032Srwatson		return (0);
712164032Srwatson
713164032Srwatson		/*
714164032Srwatson		 * Conditionally allow creating raw sockets in jail.
715164032Srwatson		 */
716164032Srwatson	case PRIV_NETINET_RAW:
717164032Srwatson		if (jail_allow_raw_sockets)
718164032Srwatson			return (0);
719164032Srwatson		else
720164032Srwatson			return (EPERM);
721164032Srwatson
722164032Srwatson		/*
723164032Srwatson		 * Since jail implements its own visibility limits on netstat
724164032Srwatson		 * sysctls, allow getcred.  This allows identd to work in
725164032Srwatson		 * jail.
726164032Srwatson		 */
727164032Srwatson	case PRIV_NETINET_GETCRED:
728164032Srwatson		return (0);
729164032Srwatson
730164032Srwatson	default:
731164032Srwatson		/*
732164032Srwatson		 * In all remaining cases, deny the privilege request.  This
733164032Srwatson		 * includes almost all network privileges, many system
734164032Srwatson		 * configuration privileges.
735164032Srwatson		 */
736164032Srwatson		return (EPERM);
737164032Srwatson	}
738164032Srwatson}
739164032Srwatson
740168401Spjd/*
741168401Spjd * Register jail service. Provides 'create' and 'destroy' methods.
742168401Spjd * 'create' method will be called for every existing jail and all
743168401Spjd * jails in the future as they beeing created.
744168401Spjd * 'destroy' method will be called for every jail going away and
745168401Spjd * for all existing jails at the time of service deregistration.
746168401Spjd */
747168401Spjdstruct prison_service *
748168401Spjdprison_service_register(const char *name, prison_create_t create,
749168401Spjd    prison_destroy_t destroy)
750168401Spjd{
751168401Spjd	struct prison_service *psrv, *psrv2;
752168401Spjd	struct prison *pr;
753168401Spjd	int reallocate = 1, slotno = 0;
754168401Spjd	void **slots, **oldslots;
755168401Spjd
756168401Spjd	psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
757168401Spjd	    M_WAITOK | M_ZERO);
758168401Spjd	psrv->ps_create = create;
759168401Spjd	psrv->ps_destroy = destroy;
760168401Spjd	strcpy(psrv->ps_name, name);
761168401Spjd	/*
762168401Spjd	 * Grab the allprison_lock here, so we won't miss any jail
763168401Spjd	 * creation/destruction.
764168401Spjd	 */
765168401Spjd	sx_xlock(&allprison_lock);
766168401Spjd#ifdef INVARIANTS
767168401Spjd	/*
768168401Spjd	 * Verify if service is not already registered.
769168401Spjd	 */
770168401Spjd	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
771168401Spjd		KASSERT(strcmp(psrv2->ps_name, name) != 0,
772168401Spjd		    ("jail service %s already registered", name));
773168401Spjd	}
774168401Spjd#endif
775168401Spjd	/*
776168401Spjd	 * Find free slot. When there is no existing free slot available,
777168401Spjd	 * allocate one at the end.
778168401Spjd	 */
779168401Spjd	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
780168401Spjd		if (psrv2->ps_slotno != slotno) {
781168401Spjd			KASSERT(slotno < psrv2->ps_slotno,
782168401Spjd			    ("Invalid slotno (slotno=%d >= ps_slotno=%d",
783168401Spjd			    slotno, psrv2->ps_slotno));
784168401Spjd			/* We found free slot. */
785168401Spjd			reallocate = 0;
786168401Spjd			break;
787168401Spjd		}
788168401Spjd		slotno++;
789168401Spjd	}
790168401Spjd	psrv->ps_slotno = slotno;
791168401Spjd	/*
792168401Spjd	 * Keep the list sorted by slot number.
793168401Spjd	 */
794168401Spjd	if (psrv2 != NULL) {
795168401Spjd		KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
796168401Spjd		TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
797168401Spjd	} else {
798168401Spjd		KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
799168401Spjd		TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
800168401Spjd	}
801168401Spjd	prison_service_slots++;
802168401Spjd	sx_downgrade(&allprison_lock);
803168401Spjd	/*
804168401Spjd	 * Allocate memory for new slot if we didn't found empty one.
805168401Spjd	 * Do not use realloc(9), because pr_slots is protected with a mutex,
806168401Spjd	 * so we can't sleep.
807168401Spjd	 */
808168401Spjd	LIST_FOREACH(pr, &allprison, pr_list) {
809168401Spjd		if (reallocate) {
810168401Spjd			/* First allocate memory with M_WAITOK. */
811168401Spjd			slots = malloc(sizeof(*slots) * prison_service_slots,
812168401Spjd			    M_PRISON, M_WAITOK);
813168401Spjd			/* Now grab the mutex and replace pr_slots. */
814168401Spjd			mtx_lock(&pr->pr_mtx);
815168401Spjd			oldslots = pr->pr_slots;
816168401Spjd			if (psrv->ps_slotno > 0) {
817168401Spjd				bcopy(oldslots, slots,
818168401Spjd				    sizeof(*slots) * (prison_service_slots - 1));
819168401Spjd			}
820168401Spjd			slots[psrv->ps_slotno] = NULL;
821168401Spjd			pr->pr_slots = slots;
822168401Spjd			mtx_unlock(&pr->pr_mtx);
823168401Spjd			if (oldslots != NULL)
824168401Spjd				free(oldslots, M_PRISON);
825168401Spjd		}
826168401Spjd		/*
827168401Spjd		 * Call 'create' method for each existing jail.
828168401Spjd		 */
829168401Spjd		psrv->ps_create(psrv, pr);
830168401Spjd	}
831168401Spjd	sx_sunlock(&allprison_lock);
832168401Spjd
833168401Spjd	return (psrv);
834168401Spjd}
835168401Spjd
836168401Spjdvoid
837168401Spjdprison_service_deregister(struct prison_service *psrv)
838168401Spjd{
839168401Spjd	struct prison *pr;
840168401Spjd	void **slots, **oldslots;
841168401Spjd	int last = 0;
842168401Spjd
843168401Spjd	sx_xlock(&allprison_lock);
844168401Spjd	if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
845168401Spjd		last = 1;
846168401Spjd	TAILQ_REMOVE(&prison_services, psrv, ps_next);
847168401Spjd	prison_service_slots--;
848168401Spjd	sx_downgrade(&allprison_lock);
849168401Spjd	LIST_FOREACH(pr, &allprison, pr_list) {
850168401Spjd		/*
851168401Spjd		 * Call 'destroy' method for every currently existing jail.
852168401Spjd		 */
853168401Spjd		psrv->ps_destroy(psrv, pr);
854168401Spjd		/*
855168401Spjd		 * If this is the last slot, free the memory allocated for it.
856168401Spjd		 */
857168401Spjd		if (last) {
858168401Spjd			if (prison_service_slots == 0)
859168401Spjd				slots = NULL;
860168401Spjd			else {
861168401Spjd				slots = malloc(sizeof(*slots) * prison_service_slots,
862168401Spjd				    M_PRISON, M_WAITOK);
863168401Spjd			}
864168401Spjd			mtx_lock(&pr->pr_mtx);
865168401Spjd			oldslots = pr->pr_slots;
866168401Spjd			/*
867168401Spjd			 * We require setting slot to NULL after freeing it,
868168401Spjd			 * this way we can check for memory leaks here.
869168401Spjd			 */
870168401Spjd			KASSERT(oldslots[psrv->ps_slotno] == NULL,
871168401Spjd			    ("Slot %d (service %s, jailid=%d) still contains data?",
872168401Spjd			     psrv->ps_slotno, psrv->ps_name, pr->pr_id));
873168401Spjd			if (psrv->ps_slotno > 0) {
874168401Spjd				bcopy(oldslots, slots,
875168401Spjd				    sizeof(*slots) * prison_service_slots);
876168401Spjd			}
877168401Spjd			pr->pr_slots = slots;
878168401Spjd			mtx_unlock(&pr->pr_mtx);
879168401Spjd			KASSERT(oldslots != NULL, ("oldslots == NULL"));
880168401Spjd			free(oldslots, M_PRISON);
881168401Spjd		}
882168401Spjd	}
883168401Spjd	sx_sunlock(&allprison_lock);
884168401Spjd	free(psrv, M_PRISON);
885168401Spjd}
886168401Spjd
887168401Spjd/*
888168401Spjd * Function sets data for the given jail in slot assigned for the given
889168401Spjd * jail service.
890168401Spjd */
891168401Spjdvoid
892168401Spjdprison_service_data_set(struct prison_service *psrv, struct prison *pr,
893168401Spjd    void *data)
894168401Spjd{
895168401Spjd
896168401Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
897168401Spjd	pr->pr_slots[psrv->ps_slotno] = data;
898168401Spjd}
899168401Spjd
900168401Spjd/*
901168401Spjd * Function clears slots assigned for the given jail service in the given
902168401Spjd * prison structure and returns current slot data.
903168401Spjd */
904168401Spjdvoid *
905168401Spjdprison_service_data_del(struct prison_service *psrv, struct prison *pr)
906168401Spjd{
907168401Spjd	void *data;
908168401Spjd
909168401Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
910168401Spjd	data = pr->pr_slots[psrv->ps_slotno];
911168401Spjd	pr->pr_slots[psrv->ps_slotno] = NULL;
912168401Spjd	return (data);
913168401Spjd}
914168401Spjd
915168401Spjd/*
916168401Spjd * Function returns current data from the slot assigned to the given jail
917168401Spjd * service for the given jail.
918168401Spjd */
919168401Spjdvoid *
920168401Spjdprison_service_data_get(struct prison_service *psrv, struct prison *pr)
921168401Spjd{
922168401Spjd
923168401Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
924168401Spjd	return (pr->pr_slots[psrv->ps_slotno]);
925168401Spjd}
926168401Spjd
927113275Smikestatic int
928113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS)
929113275Smike{
930113275Smike	struct xprison *xp, *sxp;
931113275Smike	struct prison *pr;
932113275Smike	int count, error;
933113275Smike
934127020Spjd	if (jailed(req->td->td_ucred))
935125806Srwatson		return (0);
936113275Smike
937168401Spjd	sx_slock(&allprison_lock);
938168401Spjd	if ((count = prisoncount) == 0) {
939168401Spjd		sx_sunlock(&allprison_lock);
940113275Smike		return (0);
941168401Spjd	}
942113275Smike
943113275Smike	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
944167309Spjd
945113275Smike	LIST_FOREACH(pr, &allprison, pr_list) {
946113275Smike		mtx_lock(&pr->pr_mtx);
947113275Smike		xp->pr_version = XPRISON_VERSION;
948113275Smike		xp->pr_id = pr->pr_id;
949113275Smike		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
950113275Smike		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
951113275Smike		xp->pr_ip = pr->pr_ip;
952113275Smike		mtx_unlock(&pr->pr_mtx);
953113275Smike		xp++;
954113275Smike	}
955168401Spjd	sx_sunlock(&allprison_lock);
956113275Smike
957113275Smike	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
958113275Smike	free(sxp, M_TEMP);
959167354Spjd	return (error);
960113275Smike}
961113275Smike
962113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
963113275Smike    NULL, 0, sysctl_jail_list, "S", "List of active jails");
964126004Spjd
965126004Spjdstatic int
966126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
967126004Spjd{
968126004Spjd	int error, injail;
969126004Spjd
970126004Spjd	injail = jailed(req->td->td_ucred);
971126004Spjd	error = SYSCTL_OUT(req, &injail, sizeof(injail));
972126004Spjd
973126004Spjd	return (error);
974126004Spjd}
975126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
976126004Spjd    NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
977