Deleted Added
full compact
kern_jail.c (179881) kern_jail.c (180291)
1/*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 */
9
10#include <sys/cdefs.h>
1/*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 */
9
10#include <sys/cdefs.h>
11__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 179881 2008-06-19 21:41:57Z delphij $");
11__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 180291 2008-07-05 13:10:10Z rwatson $");
12
13#include "opt_mac.h"
14
15#include <sys/param.h>
16#include <sys/types.h>
17#include <sys/kernel.h>
18#include <sys/systm.h>
19#include <sys/errno.h>
20#include <sys/sysproto.h>
21#include <sys/malloc.h>
22#include <sys/priv.h>
23#include <sys/proc.h>
24#include <sys/taskqueue.h>
25#include <sys/fcntl.h>
26#include <sys/jail.h>
27#include <sys/lock.h>
28#include <sys/mutex.h>
29#include <sys/sx.h>
30#include <sys/namei.h>
31#include <sys/mount.h>
32#include <sys/queue.h>
33#include <sys/socket.h>
34#include <sys/syscallsubr.h>
35#include <sys/sysctl.h>
36#include <sys/vnode.h>
37#include <net/if.h>
38#include <netinet/in.h>
39
40#include <security/mac/mac_framework.h>
41
42MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
43
44SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
45 "Jail rules");
46
47int jail_set_hostname_allowed = 1;
48SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
49 &jail_set_hostname_allowed, 0,
50 "Processes in jail can set their hostnames");
51
52int jail_socket_unixiproute_only = 1;
53SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
54 &jail_socket_unixiproute_only, 0,
55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
56
57int jail_sysvipc_allowed = 0;
58SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
59 &jail_sysvipc_allowed, 0,
60 "Processes in jail can use System V IPC primitives");
61
62static int jail_enforce_statfs = 2;
63SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
64 &jail_enforce_statfs, 0,
65 "Processes in jail cannot see all mounted file systems");
66
67int jail_allow_raw_sockets = 0;
68SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
69 &jail_allow_raw_sockets, 0,
70 "Prison root can create raw sockets");
71
72int jail_chflags_allowed = 0;
73SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
74 &jail_chflags_allowed, 0,
75 "Processes in jail can alter system file flags");
76
77int jail_mount_allowed = 0;
78SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
79 &jail_mount_allowed, 0,
80 "Processes in jail can mount/unmount jail-friendly file systems");
81
82/* allprison, lastprid, and prisoncount are protected by allprison_lock. */
83struct prisonlist allprison;
84struct sx allprison_lock;
85int lastprid = 0;
86int prisoncount = 0;
87
88/*
89 * List of jail services. Protected by allprison_lock.
90 */
91TAILQ_HEAD(prison_services_head, prison_service);
92static struct prison_services_head prison_services =
93 TAILQ_HEAD_INITIALIZER(prison_services);
94static int prison_service_slots = 0;
95
96struct prison_service {
97 prison_create_t ps_create;
98 prison_destroy_t ps_destroy;
99 int ps_slotno;
100 TAILQ_ENTRY(prison_service) ps_next;
101 char ps_name[0];
102};
103
104static void init_prison(void *);
105static void prison_complete(void *context, int pending);
106static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
107
108static void
109init_prison(void *data __unused)
110{
111
112 sx_init(&allprison_lock, "allprison");
113 LIST_INIT(&allprison);
114}
115
116SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
117
118/*
119 * struct jail_args {
120 * struct jail *jail;
121 * };
122 */
123int
124jail(struct thread *td, struct jail_args *uap)
125{
126 struct nameidata nd;
127 struct prison *pr, *tpr;
128 struct prison_service *psrv;
129 struct jail j;
130 struct jail_attach_args jaa;
131 int vfslocked, error, tryprid;
132
133 error = copyin(uap->jail, &j, sizeof(j));
134 if (error)
135 return (error);
136 if (j.version != 0)
137 return (EINVAL);
138
139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
141 pr->pr_ref = 1;
142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
143 if (error)
144 goto e_killmtx;
145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
146 pr->pr_path, td);
147 error = namei(&nd);
148 if (error)
149 goto e_killmtx;
150 vfslocked = NDHASGIANT(&nd);
151 pr->pr_root = nd.ni_vp;
152 VOP_UNLOCK(nd.ni_vp, 0);
153 NDFREE(&nd, NDF_ONLY_PNBUF);
154 VFS_UNLOCK_GIANT(vfslocked);
155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
156 if (error)
157 goto e_dropvnref;
158 pr->pr_ip = j.ip_number;
159 pr->pr_linux = NULL;
160 pr->pr_securelevel = securelevel;
161 if (prison_service_slots == 0)
162 pr->pr_slots = NULL;
163 else {
164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
165 M_PRISON, M_ZERO | M_WAITOK);
166 }
167
168 /* Determine next pr_id and add prison to allprison list. */
169 sx_xlock(&allprison_lock);
170 tryprid = lastprid + 1;
171 if (tryprid == JAIL_MAX)
172 tryprid = 1;
173next:
174 LIST_FOREACH(tpr, &allprison, pr_list) {
175 if (tpr->pr_id == tryprid) {
176 tryprid++;
177 if (tryprid == JAIL_MAX) {
178 sx_xunlock(&allprison_lock);
179 error = EAGAIN;
180 goto e_dropvnref;
181 }
182 goto next;
183 }
184 }
185 pr->pr_id = jaa.jid = lastprid = tryprid;
186 LIST_INSERT_HEAD(&allprison, pr, pr_list);
187 prisoncount++;
188 sx_downgrade(&allprison_lock);
189 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
190 psrv->ps_create(psrv, pr);
191 }
192 sx_sunlock(&allprison_lock);
193
194 error = jail_attach(td, &jaa);
195 if (error)
196 goto e_dropprref;
197 mtx_lock(&pr->pr_mtx);
198 pr->pr_ref--;
199 mtx_unlock(&pr->pr_mtx);
200 td->td_retval[0] = jaa.jid;
201 return (0);
202e_dropprref:
203 sx_xlock(&allprison_lock);
204 LIST_REMOVE(pr, pr_list);
205 prisoncount--;
206 sx_downgrade(&allprison_lock);
207 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
208 psrv->ps_destroy(psrv, pr);
209 }
210 sx_sunlock(&allprison_lock);
211e_dropvnref:
212 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
213 vrele(pr->pr_root);
214 VFS_UNLOCK_GIANT(vfslocked);
215e_killmtx:
216 mtx_destroy(&pr->pr_mtx);
217 FREE(pr, M_PRISON);
218 return (error);
219}
220
221/*
222 * struct jail_attach_args {
223 * int jid;
224 * };
225 */
226int
227jail_attach(struct thread *td, struct jail_attach_args *uap)
228{
229 struct proc *p;
230 struct ucred *newcred, *oldcred;
231 struct prison *pr;
232 int vfslocked, error;
233
234 /*
235 * XXX: Note that there is a slight race here if two threads
236 * in the same privileged process attempt to attach to two
237 * different jails at the same time. It is important for
238 * user processes not to do this, or they might end up with
239 * a process root from one prison, but attached to the jail
240 * of another.
241 */
242 error = priv_check(td, PRIV_JAIL_ATTACH);
243 if (error)
244 return (error);
245
246 p = td->td_proc;
247 sx_slock(&allprison_lock);
248 pr = prison_find(uap->jid);
249 if (pr == NULL) {
250 sx_sunlock(&allprison_lock);
251 return (EINVAL);
252 }
253 pr->pr_ref++;
254 mtx_unlock(&pr->pr_mtx);
255 sx_sunlock(&allprison_lock);
256
257 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
258 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
259 if ((error = change_dir(pr->pr_root, td)) != 0)
260 goto e_unlock;
261#ifdef MAC
262 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
263 goto e_unlock;
264#endif
265 VOP_UNLOCK(pr->pr_root, 0);
266 change_root(pr->pr_root, td);
267 VFS_UNLOCK_GIANT(vfslocked);
268
269 newcred = crget();
270 PROC_LOCK(p);
271 oldcred = p->p_ucred;
272 setsugid(p);
273 crcopy(newcred, oldcred);
274 newcred->cr_prison = pr;
275 p->p_ucred = newcred;
276 PROC_UNLOCK(p);
277 crfree(oldcred);
278 return (0);
279e_unlock:
280 VOP_UNLOCK(pr->pr_root, 0);
281 VFS_UNLOCK_GIANT(vfslocked);
282 mtx_lock(&pr->pr_mtx);
283 pr->pr_ref--;
284 mtx_unlock(&pr->pr_mtx);
285 return (error);
286}
287
288/*
289 * Returns a locked prison instance, or NULL on failure.
290 */
291struct prison *
292prison_find(int prid)
293{
294 struct prison *pr;
295
296 sx_assert(&allprison_lock, SX_LOCKED);
297 LIST_FOREACH(pr, &allprison, pr_list) {
298 if (pr->pr_id == prid) {
299 mtx_lock(&pr->pr_mtx);
300 if (pr->pr_ref == 0) {
301 mtx_unlock(&pr->pr_mtx);
302 break;
303 }
304 return (pr);
305 }
306 }
307 return (NULL);
308}
309
310void
311prison_free(struct prison *pr)
312{
313
314 mtx_lock(&pr->pr_mtx);
315 pr->pr_ref--;
316 if (pr->pr_ref == 0) {
317 mtx_unlock(&pr->pr_mtx);
318 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
319 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
320 return;
321 }
322 mtx_unlock(&pr->pr_mtx);
323}
324
325static void
326prison_complete(void *context, int pending)
327{
328 struct prison_service *psrv;
329 struct prison *pr;
330 int vfslocked;
331
332 pr = (struct prison *)context;
333
334 sx_xlock(&allprison_lock);
335 LIST_REMOVE(pr, pr_list);
336 prisoncount--;
337 sx_downgrade(&allprison_lock);
338 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
339 psrv->ps_destroy(psrv, pr);
340 }
341 sx_sunlock(&allprison_lock);
342
343 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
344 vrele(pr->pr_root);
345 VFS_UNLOCK_GIANT(vfslocked);
346
347 mtx_destroy(&pr->pr_mtx);
348 if (pr->pr_linux != NULL)
349 FREE(pr->pr_linux, M_PRISON);
350 FREE(pr, M_PRISON);
351}
352
353void
354prison_hold(struct prison *pr)
355{
356
357 mtx_lock(&pr->pr_mtx);
358 KASSERT(pr->pr_ref > 0,
359 ("Trying to hold dead prison (id=%d).", pr->pr_id));
360 pr->pr_ref++;
361 mtx_unlock(&pr->pr_mtx);
362}
363
364u_int32_t
365prison_getip(struct ucred *cred)
366{
367
368 return (cred->cr_prison->pr_ip);
369}
370
371int
372prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
373{
374 u_int32_t tmp;
375
376 if (!jailed(cred))
377 return (0);
378 if (flag)
379 tmp = *ip;
380 else
381 tmp = ntohl(*ip);
382 if (tmp == INADDR_ANY) {
383 if (flag)
384 *ip = cred->cr_prison->pr_ip;
385 else
386 *ip = htonl(cred->cr_prison->pr_ip);
387 return (0);
388 }
389 if (tmp == INADDR_LOOPBACK) {
390 if (flag)
391 *ip = cred->cr_prison->pr_ip;
392 else
393 *ip = htonl(cred->cr_prison->pr_ip);
394 return (0);
395 }
396 if (cred->cr_prison->pr_ip != tmp)
397 return (1);
398 return (0);
399}
400
401void
402prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
403{
404 u_int32_t tmp;
405
406 if (!jailed(cred))
407 return;
408 if (flag)
409 tmp = *ip;
410 else
411 tmp = ntohl(*ip);
412 if (tmp == INADDR_LOOPBACK) {
413 if (flag)
414 *ip = cred->cr_prison->pr_ip;
415 else
416 *ip = htonl(cred->cr_prison->pr_ip);
417 return;
418 }
419 return;
420}
421
422int
423prison_if(struct ucred *cred, struct sockaddr *sa)
424{
425 struct sockaddr_in *sai;
426 int ok;
427
428 sai = (struct sockaddr_in *)sa;
429 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
430 ok = 1;
431 else if (sai->sin_family != AF_INET)
432 ok = 0;
433 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
434 ok = 1;
435 else
436 ok = 0;
437 return (ok);
438}
439
440/*
441 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
442 */
443int
444prison_check(struct ucred *cred1, struct ucred *cred2)
445{
446
447 if (jailed(cred1)) {
448 if (!jailed(cred2))
449 return (ESRCH);
450 if (cred2->cr_prison != cred1->cr_prison)
451 return (ESRCH);
452 }
453
454 return (0);
455}
456
457/*
458 * Return 1 if the passed credential is in a jail, otherwise 0.
459 */
460int
461jailed(struct ucred *cred)
462{
463
464 return (cred->cr_prison != NULL);
465}
466
467/*
468 * Return the correct hostname for the passed credential.
469 */
470void
471getcredhostname(struct ucred *cred, char *buf, size_t size)
472{
473
474 if (jailed(cred)) {
475 mtx_lock(&cred->cr_prison->pr_mtx);
476 strlcpy(buf, cred->cr_prison->pr_host, size);
477 mtx_unlock(&cred->cr_prison->pr_mtx);
12
13#include "opt_mac.h"
14
15#include <sys/param.h>
16#include <sys/types.h>
17#include <sys/kernel.h>
18#include <sys/systm.h>
19#include <sys/errno.h>
20#include <sys/sysproto.h>
21#include <sys/malloc.h>
22#include <sys/priv.h>
23#include <sys/proc.h>
24#include <sys/taskqueue.h>
25#include <sys/fcntl.h>
26#include <sys/jail.h>
27#include <sys/lock.h>
28#include <sys/mutex.h>
29#include <sys/sx.h>
30#include <sys/namei.h>
31#include <sys/mount.h>
32#include <sys/queue.h>
33#include <sys/socket.h>
34#include <sys/syscallsubr.h>
35#include <sys/sysctl.h>
36#include <sys/vnode.h>
37#include <net/if.h>
38#include <netinet/in.h>
39
40#include <security/mac/mac_framework.h>
41
42MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
43
44SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
45 "Jail rules");
46
47int jail_set_hostname_allowed = 1;
48SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
49 &jail_set_hostname_allowed, 0,
50 "Processes in jail can set their hostnames");
51
52int jail_socket_unixiproute_only = 1;
53SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
54 &jail_socket_unixiproute_only, 0,
55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
56
57int jail_sysvipc_allowed = 0;
58SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
59 &jail_sysvipc_allowed, 0,
60 "Processes in jail can use System V IPC primitives");
61
62static int jail_enforce_statfs = 2;
63SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
64 &jail_enforce_statfs, 0,
65 "Processes in jail cannot see all mounted file systems");
66
67int jail_allow_raw_sockets = 0;
68SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
69 &jail_allow_raw_sockets, 0,
70 "Prison root can create raw sockets");
71
72int jail_chflags_allowed = 0;
73SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
74 &jail_chflags_allowed, 0,
75 "Processes in jail can alter system file flags");
76
77int jail_mount_allowed = 0;
78SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
79 &jail_mount_allowed, 0,
80 "Processes in jail can mount/unmount jail-friendly file systems");
81
82/* allprison, lastprid, and prisoncount are protected by allprison_lock. */
83struct prisonlist allprison;
84struct sx allprison_lock;
85int lastprid = 0;
86int prisoncount = 0;
87
88/*
89 * List of jail services. Protected by allprison_lock.
90 */
91TAILQ_HEAD(prison_services_head, prison_service);
92static struct prison_services_head prison_services =
93 TAILQ_HEAD_INITIALIZER(prison_services);
94static int prison_service_slots = 0;
95
96struct prison_service {
97 prison_create_t ps_create;
98 prison_destroy_t ps_destroy;
99 int ps_slotno;
100 TAILQ_ENTRY(prison_service) ps_next;
101 char ps_name[0];
102};
103
104static void init_prison(void *);
105static void prison_complete(void *context, int pending);
106static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
107
108static void
109init_prison(void *data __unused)
110{
111
112 sx_init(&allprison_lock, "allprison");
113 LIST_INIT(&allprison);
114}
115
116SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
117
118/*
119 * struct jail_args {
120 * struct jail *jail;
121 * };
122 */
123int
124jail(struct thread *td, struct jail_args *uap)
125{
126 struct nameidata nd;
127 struct prison *pr, *tpr;
128 struct prison_service *psrv;
129 struct jail j;
130 struct jail_attach_args jaa;
131 int vfslocked, error, tryprid;
132
133 error = copyin(uap->jail, &j, sizeof(j));
134 if (error)
135 return (error);
136 if (j.version != 0)
137 return (EINVAL);
138
139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
141 pr->pr_ref = 1;
142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
143 if (error)
144 goto e_killmtx;
145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
146 pr->pr_path, td);
147 error = namei(&nd);
148 if (error)
149 goto e_killmtx;
150 vfslocked = NDHASGIANT(&nd);
151 pr->pr_root = nd.ni_vp;
152 VOP_UNLOCK(nd.ni_vp, 0);
153 NDFREE(&nd, NDF_ONLY_PNBUF);
154 VFS_UNLOCK_GIANT(vfslocked);
155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
156 if (error)
157 goto e_dropvnref;
158 pr->pr_ip = j.ip_number;
159 pr->pr_linux = NULL;
160 pr->pr_securelevel = securelevel;
161 if (prison_service_slots == 0)
162 pr->pr_slots = NULL;
163 else {
164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
165 M_PRISON, M_ZERO | M_WAITOK);
166 }
167
168 /* Determine next pr_id and add prison to allprison list. */
169 sx_xlock(&allprison_lock);
170 tryprid = lastprid + 1;
171 if (tryprid == JAIL_MAX)
172 tryprid = 1;
173next:
174 LIST_FOREACH(tpr, &allprison, pr_list) {
175 if (tpr->pr_id == tryprid) {
176 tryprid++;
177 if (tryprid == JAIL_MAX) {
178 sx_xunlock(&allprison_lock);
179 error = EAGAIN;
180 goto e_dropvnref;
181 }
182 goto next;
183 }
184 }
185 pr->pr_id = jaa.jid = lastprid = tryprid;
186 LIST_INSERT_HEAD(&allprison, pr, pr_list);
187 prisoncount++;
188 sx_downgrade(&allprison_lock);
189 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
190 psrv->ps_create(psrv, pr);
191 }
192 sx_sunlock(&allprison_lock);
193
194 error = jail_attach(td, &jaa);
195 if (error)
196 goto e_dropprref;
197 mtx_lock(&pr->pr_mtx);
198 pr->pr_ref--;
199 mtx_unlock(&pr->pr_mtx);
200 td->td_retval[0] = jaa.jid;
201 return (0);
202e_dropprref:
203 sx_xlock(&allprison_lock);
204 LIST_REMOVE(pr, pr_list);
205 prisoncount--;
206 sx_downgrade(&allprison_lock);
207 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
208 psrv->ps_destroy(psrv, pr);
209 }
210 sx_sunlock(&allprison_lock);
211e_dropvnref:
212 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
213 vrele(pr->pr_root);
214 VFS_UNLOCK_GIANT(vfslocked);
215e_killmtx:
216 mtx_destroy(&pr->pr_mtx);
217 FREE(pr, M_PRISON);
218 return (error);
219}
220
221/*
222 * struct jail_attach_args {
223 * int jid;
224 * };
225 */
226int
227jail_attach(struct thread *td, struct jail_attach_args *uap)
228{
229 struct proc *p;
230 struct ucred *newcred, *oldcred;
231 struct prison *pr;
232 int vfslocked, error;
233
234 /*
235 * XXX: Note that there is a slight race here if two threads
236 * in the same privileged process attempt to attach to two
237 * different jails at the same time. It is important for
238 * user processes not to do this, or they might end up with
239 * a process root from one prison, but attached to the jail
240 * of another.
241 */
242 error = priv_check(td, PRIV_JAIL_ATTACH);
243 if (error)
244 return (error);
245
246 p = td->td_proc;
247 sx_slock(&allprison_lock);
248 pr = prison_find(uap->jid);
249 if (pr == NULL) {
250 sx_sunlock(&allprison_lock);
251 return (EINVAL);
252 }
253 pr->pr_ref++;
254 mtx_unlock(&pr->pr_mtx);
255 sx_sunlock(&allprison_lock);
256
257 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
258 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
259 if ((error = change_dir(pr->pr_root, td)) != 0)
260 goto e_unlock;
261#ifdef MAC
262 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
263 goto e_unlock;
264#endif
265 VOP_UNLOCK(pr->pr_root, 0);
266 change_root(pr->pr_root, td);
267 VFS_UNLOCK_GIANT(vfslocked);
268
269 newcred = crget();
270 PROC_LOCK(p);
271 oldcred = p->p_ucred;
272 setsugid(p);
273 crcopy(newcred, oldcred);
274 newcred->cr_prison = pr;
275 p->p_ucred = newcred;
276 PROC_UNLOCK(p);
277 crfree(oldcred);
278 return (0);
279e_unlock:
280 VOP_UNLOCK(pr->pr_root, 0);
281 VFS_UNLOCK_GIANT(vfslocked);
282 mtx_lock(&pr->pr_mtx);
283 pr->pr_ref--;
284 mtx_unlock(&pr->pr_mtx);
285 return (error);
286}
287
288/*
289 * Returns a locked prison instance, or NULL on failure.
290 */
291struct prison *
292prison_find(int prid)
293{
294 struct prison *pr;
295
296 sx_assert(&allprison_lock, SX_LOCKED);
297 LIST_FOREACH(pr, &allprison, pr_list) {
298 if (pr->pr_id == prid) {
299 mtx_lock(&pr->pr_mtx);
300 if (pr->pr_ref == 0) {
301 mtx_unlock(&pr->pr_mtx);
302 break;
303 }
304 return (pr);
305 }
306 }
307 return (NULL);
308}
309
310void
311prison_free(struct prison *pr)
312{
313
314 mtx_lock(&pr->pr_mtx);
315 pr->pr_ref--;
316 if (pr->pr_ref == 0) {
317 mtx_unlock(&pr->pr_mtx);
318 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
319 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
320 return;
321 }
322 mtx_unlock(&pr->pr_mtx);
323}
324
325static void
326prison_complete(void *context, int pending)
327{
328 struct prison_service *psrv;
329 struct prison *pr;
330 int vfslocked;
331
332 pr = (struct prison *)context;
333
334 sx_xlock(&allprison_lock);
335 LIST_REMOVE(pr, pr_list);
336 prisoncount--;
337 sx_downgrade(&allprison_lock);
338 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
339 psrv->ps_destroy(psrv, pr);
340 }
341 sx_sunlock(&allprison_lock);
342
343 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
344 vrele(pr->pr_root);
345 VFS_UNLOCK_GIANT(vfslocked);
346
347 mtx_destroy(&pr->pr_mtx);
348 if (pr->pr_linux != NULL)
349 FREE(pr->pr_linux, M_PRISON);
350 FREE(pr, M_PRISON);
351}
352
353void
354prison_hold(struct prison *pr)
355{
356
357 mtx_lock(&pr->pr_mtx);
358 KASSERT(pr->pr_ref > 0,
359 ("Trying to hold dead prison (id=%d).", pr->pr_id));
360 pr->pr_ref++;
361 mtx_unlock(&pr->pr_mtx);
362}
363
364u_int32_t
365prison_getip(struct ucred *cred)
366{
367
368 return (cred->cr_prison->pr_ip);
369}
370
371int
372prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
373{
374 u_int32_t tmp;
375
376 if (!jailed(cred))
377 return (0);
378 if (flag)
379 tmp = *ip;
380 else
381 tmp = ntohl(*ip);
382 if (tmp == INADDR_ANY) {
383 if (flag)
384 *ip = cred->cr_prison->pr_ip;
385 else
386 *ip = htonl(cred->cr_prison->pr_ip);
387 return (0);
388 }
389 if (tmp == INADDR_LOOPBACK) {
390 if (flag)
391 *ip = cred->cr_prison->pr_ip;
392 else
393 *ip = htonl(cred->cr_prison->pr_ip);
394 return (0);
395 }
396 if (cred->cr_prison->pr_ip != tmp)
397 return (1);
398 return (0);
399}
400
401void
402prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
403{
404 u_int32_t tmp;
405
406 if (!jailed(cred))
407 return;
408 if (flag)
409 tmp = *ip;
410 else
411 tmp = ntohl(*ip);
412 if (tmp == INADDR_LOOPBACK) {
413 if (flag)
414 *ip = cred->cr_prison->pr_ip;
415 else
416 *ip = htonl(cred->cr_prison->pr_ip);
417 return;
418 }
419 return;
420}
421
422int
423prison_if(struct ucred *cred, struct sockaddr *sa)
424{
425 struct sockaddr_in *sai;
426 int ok;
427
428 sai = (struct sockaddr_in *)sa;
429 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
430 ok = 1;
431 else if (sai->sin_family != AF_INET)
432 ok = 0;
433 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
434 ok = 1;
435 else
436 ok = 0;
437 return (ok);
438}
439
440/*
441 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
442 */
443int
444prison_check(struct ucred *cred1, struct ucred *cred2)
445{
446
447 if (jailed(cred1)) {
448 if (!jailed(cred2))
449 return (ESRCH);
450 if (cred2->cr_prison != cred1->cr_prison)
451 return (ESRCH);
452 }
453
454 return (0);
455}
456
457/*
458 * Return 1 if the passed credential is in a jail, otherwise 0.
459 */
460int
461jailed(struct ucred *cred)
462{
463
464 return (cred->cr_prison != NULL);
465}
466
467/*
468 * Return the correct hostname for the passed credential.
469 */
470void
471getcredhostname(struct ucred *cred, char *buf, size_t size)
472{
473
474 if (jailed(cred)) {
475 mtx_lock(&cred->cr_prison->pr_mtx);
476 strlcpy(buf, cred->cr_prison->pr_host, size);
477 mtx_unlock(&cred->cr_prison->pr_mtx);
478 } else
478 } else {
479 mtx_lock(&hostname_mtx);
479 strlcpy(buf, hostname, size);
480 strlcpy(buf, hostname, size);
481 mtx_unlock(&hostname_mtx);
482 }
480}
481
482/*
483 * Determine whether the subject represented by cred can "see"
484 * status of a mount point.
485 * Returns: 0 for permitted, ENOENT otherwise.
486 * XXX: This function should be called cr_canseemount() and should be
487 * placed in kern_prot.c.
488 */
489int
490prison_canseemount(struct ucred *cred, struct mount *mp)
491{
492 struct prison *pr;
493 struct statfs *sp;
494 size_t len;
495
496 if (!jailed(cred) || jail_enforce_statfs == 0)
497 return (0);
498 pr = cred->cr_prison;
499 if (pr->pr_root->v_mount == mp)
500 return (0);
501 if (jail_enforce_statfs == 2)
502 return (ENOENT);
503 /*
504 * If jail's chroot directory is set to "/" we should be able to see
505 * all mount-points from inside a jail.
506 * This is ugly check, but this is the only situation when jail's
507 * directory ends with '/'.
508 */
509 if (strcmp(pr->pr_path, "/") == 0)
510 return (0);
511 len = strlen(pr->pr_path);
512 sp = &mp->mnt_stat;
513 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
514 return (ENOENT);
515 /*
516 * Be sure that we don't have situation where jail's root directory
517 * is "/some/path" and mount point is "/some/pathpath".
518 */
519 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
520 return (ENOENT);
521 return (0);
522}
523
524void
525prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
526{
527 char jpath[MAXPATHLEN];
528 struct prison *pr;
529 size_t len;
530
531 if (!jailed(cred) || jail_enforce_statfs == 0)
532 return;
533 pr = cred->cr_prison;
534 if (prison_canseemount(cred, mp) != 0) {
535 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
536 strlcpy(sp->f_mntonname, "[restricted]",
537 sizeof(sp->f_mntonname));
538 return;
539 }
540 if (pr->pr_root->v_mount == mp) {
541 /*
542 * Clear current buffer data, so we are sure nothing from
543 * the valid path left there.
544 */
545 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
546 *sp->f_mntonname = '/';
547 return;
548 }
549 /*
550 * If jail's chroot directory is set to "/" we should be able to see
551 * all mount-points from inside a jail.
552 */
553 if (strcmp(pr->pr_path, "/") == 0)
554 return;
555 len = strlen(pr->pr_path);
556 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
557 /*
558 * Clear current buffer data, so we are sure nothing from
559 * the valid path left there.
560 */
561 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
562 if (*jpath == '\0') {
563 /* Should never happen. */
564 *sp->f_mntonname = '/';
565 } else {
566 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
567 }
568}
569
570/*
571 * Check with permission for a specific privilege is granted within jail. We
572 * have a specific list of accepted privileges; the rest are denied.
573 */
574int
575prison_priv_check(struct ucred *cred, int priv)
576{
577
578 if (!jailed(cred))
579 return (0);
580
581 switch (priv) {
582
583 /*
584 * Allow ktrace privileges for root in jail.
585 */
586 case PRIV_KTRACE:
587
588#if 0
589 /*
590 * Allow jailed processes to configure audit identity and
591 * submit audit records (login, etc). In the future we may
592 * want to further refine the relationship between audit and
593 * jail.
594 */
595 case PRIV_AUDIT_GETAUDIT:
596 case PRIV_AUDIT_SETAUDIT:
597 case PRIV_AUDIT_SUBMIT:
598#endif
599
600 /*
601 * Allow jailed processes to manipulate process UNIX
602 * credentials in any way they see fit.
603 */
604 case PRIV_CRED_SETUID:
605 case PRIV_CRED_SETEUID:
606 case PRIV_CRED_SETGID:
607 case PRIV_CRED_SETEGID:
608 case PRIV_CRED_SETGROUPS:
609 case PRIV_CRED_SETREUID:
610 case PRIV_CRED_SETREGID:
611 case PRIV_CRED_SETRESUID:
612 case PRIV_CRED_SETRESGID:
613
614 /*
615 * Jail implements visibility constraints already, so allow
616 * jailed root to override uid/gid-based constraints.
617 */
618 case PRIV_SEEOTHERGIDS:
619 case PRIV_SEEOTHERUIDS:
620
621 /*
622 * Jail implements inter-process debugging limits already, so
623 * allow jailed root various debugging privileges.
624 */
625 case PRIV_DEBUG_DIFFCRED:
626 case PRIV_DEBUG_SUGID:
627 case PRIV_DEBUG_UNPRIV:
628
629 /*
630 * Allow jail to set various resource limits and login
631 * properties, and for now, exceed process resource limits.
632 */
633 case PRIV_PROC_LIMIT:
634 case PRIV_PROC_SETLOGIN:
635 case PRIV_PROC_SETRLIMIT:
636
637 /*
638 * System V and POSIX IPC privileges are granted in jail.
639 */
640 case PRIV_IPC_READ:
641 case PRIV_IPC_WRITE:
642 case PRIV_IPC_ADMIN:
643 case PRIV_IPC_MSGSIZE:
644 case PRIV_MQ_ADMIN:
645
646 /*
647 * Jail implements its own inter-process limits, so allow
648 * root processes in jail to change scheduling on other
649 * processes in the same jail. Likewise for signalling.
650 */
651 case PRIV_SCHED_DIFFCRED:
652 case PRIV_SIGNAL_DIFFCRED:
653 case PRIV_SIGNAL_SUGID:
654
655 /*
656 * Allow jailed processes to write to sysctls marked as jail
657 * writable.
658 */
659 case PRIV_SYSCTL_WRITEJAIL:
660
661 /*
662 * Allow root in jail to manage a variety of quota
663 * properties. These should likely be conditional on a
664 * configuration option.
665 */
666 case PRIV_VFS_GETQUOTA:
667 case PRIV_VFS_SETQUOTA:
668
669 /*
670 * Since Jail relies on chroot() to implement file system
671 * protections, grant many VFS privileges to root in jail.
672 * Be careful to exclude mount-related and NFS-related
673 * privileges.
674 */
675 case PRIV_VFS_READ:
676 case PRIV_VFS_WRITE:
677 case PRIV_VFS_ADMIN:
678 case PRIV_VFS_EXEC:
679 case PRIV_VFS_LOOKUP:
680 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */
681 case PRIV_VFS_CHFLAGS_DEV:
682 case PRIV_VFS_CHOWN:
683 case PRIV_VFS_CHROOT:
684 case PRIV_VFS_RETAINSUGID:
685 case PRIV_VFS_FCHROOT:
686 case PRIV_VFS_LINK:
687 case PRIV_VFS_SETGID:
688 case PRIV_VFS_STAT:
689 case PRIV_VFS_STICKYFILE:
690 return (0);
691
692 /*
693 * Depending on the global setting, allow privilege of
694 * setting system flags.
695 */
696 case PRIV_VFS_SYSFLAGS:
697 if (jail_chflags_allowed)
698 return (0);
699 else
700 return (EPERM);
701
702 /*
703 * Depending on the global setting, allow privilege of
704 * mounting/unmounting file systems.
705 */
706 case PRIV_VFS_MOUNT:
707 case PRIV_VFS_UNMOUNT:
708 case PRIV_VFS_MOUNT_NONUSER:
709 case PRIV_VFS_MOUNT_OWNER:
710 if (jail_mount_allowed)
711 return (0);
712 else
713 return (EPERM);
714
715 /*
716 * Allow jailed root to bind reserved ports and reuse in-use
717 * ports.
718 */
719 case PRIV_NETINET_RESERVEDPORT:
720 case PRIV_NETINET_REUSEPORT:
721 return (0);
722
723 /*
724 * Allow jailed root to set certian IPv4/6 (option) headers.
725 */
726 case PRIV_NETINET_SETHDROPTS:
727 return (0);
728
729 /*
730 * Conditionally allow creating raw sockets in jail.
731 */
732 case PRIV_NETINET_RAW:
733 if (jail_allow_raw_sockets)
734 return (0);
735 else
736 return (EPERM);
737
738 /*
739 * Since jail implements its own visibility limits on netstat
740 * sysctls, allow getcred. This allows identd to work in
741 * jail.
742 */
743 case PRIV_NETINET_GETCRED:
744 return (0);
745
746 default:
747 /*
748 * In all remaining cases, deny the privilege request. This
749 * includes almost all network privileges, many system
750 * configuration privileges.
751 */
752 return (EPERM);
753 }
754}
755
756/*
757 * Register jail service. Provides 'create' and 'destroy' methods.
758 * 'create' method will be called for every existing jail and all
759 * jails in the future as they beeing created.
760 * 'destroy' method will be called for every jail going away and
761 * for all existing jails at the time of service deregistration.
762 */
763struct prison_service *
764prison_service_register(const char *name, prison_create_t create,
765 prison_destroy_t destroy)
766{
767 struct prison_service *psrv, *psrv2;
768 struct prison *pr;
769 int reallocate = 1, slotno = 0;
770 void **slots, **oldslots;
771
772 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
773 M_WAITOK | M_ZERO);
774 psrv->ps_create = create;
775 psrv->ps_destroy = destroy;
776 strcpy(psrv->ps_name, name);
777 /*
778 * Grab the allprison_lock here, so we won't miss any jail
779 * creation/destruction.
780 */
781 sx_xlock(&allprison_lock);
782#ifdef INVARIANTS
783 /*
784 * Verify if service is not already registered.
785 */
786 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
787 KASSERT(strcmp(psrv2->ps_name, name) != 0,
788 ("jail service %s already registered", name));
789 }
790#endif
791 /*
792 * Find free slot. When there is no existing free slot available,
793 * allocate one at the end.
794 */
795 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
796 if (psrv2->ps_slotno != slotno) {
797 KASSERT(slotno < psrv2->ps_slotno,
798 ("Invalid slotno (slotno=%d >= ps_slotno=%d",
799 slotno, psrv2->ps_slotno));
800 /* We found free slot. */
801 reallocate = 0;
802 break;
803 }
804 slotno++;
805 }
806 psrv->ps_slotno = slotno;
807 /*
808 * Keep the list sorted by slot number.
809 */
810 if (psrv2 != NULL) {
811 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
812 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
813 } else {
814 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
815 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
816 }
817 prison_service_slots++;
818 sx_downgrade(&allprison_lock);
819 /*
820 * Allocate memory for new slot if we didn't found empty one.
821 * Do not use realloc(9), because pr_slots is protected with a mutex,
822 * so we can't sleep.
823 */
824 LIST_FOREACH(pr, &allprison, pr_list) {
825 if (reallocate) {
826 /* First allocate memory with M_WAITOK. */
827 slots = malloc(sizeof(*slots) * prison_service_slots,
828 M_PRISON, M_WAITOK);
829 /* Now grab the mutex and replace pr_slots. */
830 mtx_lock(&pr->pr_mtx);
831 oldslots = pr->pr_slots;
832 if (psrv->ps_slotno > 0) {
833 bcopy(oldslots, slots,
834 sizeof(*slots) * (prison_service_slots - 1));
835 }
836 slots[psrv->ps_slotno] = NULL;
837 pr->pr_slots = slots;
838 mtx_unlock(&pr->pr_mtx);
839 if (oldslots != NULL)
840 free(oldslots, M_PRISON);
841 }
842 /*
843 * Call 'create' method for each existing jail.
844 */
845 psrv->ps_create(psrv, pr);
846 }
847 sx_sunlock(&allprison_lock);
848
849 return (psrv);
850}
851
852void
853prison_service_deregister(struct prison_service *psrv)
854{
855 struct prison *pr;
856 void **slots, **oldslots;
857 int last = 0;
858
859 sx_xlock(&allprison_lock);
860 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
861 last = 1;
862 TAILQ_REMOVE(&prison_services, psrv, ps_next);
863 prison_service_slots--;
864 sx_downgrade(&allprison_lock);
865 LIST_FOREACH(pr, &allprison, pr_list) {
866 /*
867 * Call 'destroy' method for every currently existing jail.
868 */
869 psrv->ps_destroy(psrv, pr);
870 /*
871 * If this is the last slot, free the memory allocated for it.
872 */
873 if (last) {
874 if (prison_service_slots == 0)
875 slots = NULL;
876 else {
877 slots = malloc(sizeof(*slots) * prison_service_slots,
878 M_PRISON, M_WAITOK);
879 }
880 mtx_lock(&pr->pr_mtx);
881 oldslots = pr->pr_slots;
882 /*
883 * We require setting slot to NULL after freeing it,
884 * this way we can check for memory leaks here.
885 */
886 KASSERT(oldslots[psrv->ps_slotno] == NULL,
887 ("Slot %d (service %s, jailid=%d) still contains data?",
888 psrv->ps_slotno, psrv->ps_name, pr->pr_id));
889 if (psrv->ps_slotno > 0) {
890 bcopy(oldslots, slots,
891 sizeof(*slots) * prison_service_slots);
892 }
893 pr->pr_slots = slots;
894 mtx_unlock(&pr->pr_mtx);
895 KASSERT(oldslots != NULL, ("oldslots == NULL"));
896 free(oldslots, M_PRISON);
897 }
898 }
899 sx_sunlock(&allprison_lock);
900 free(psrv, M_PRISON);
901}
902
903/*
904 * Function sets data for the given jail in slot assigned for the given
905 * jail service.
906 */
907void
908prison_service_data_set(struct prison_service *psrv, struct prison *pr,
909 void *data)
910{
911
912 mtx_assert(&pr->pr_mtx, MA_OWNED);
913 pr->pr_slots[psrv->ps_slotno] = data;
914}
915
916/*
917 * Function clears slots assigned for the given jail service in the given
918 * prison structure and returns current slot data.
919 */
920void *
921prison_service_data_del(struct prison_service *psrv, struct prison *pr)
922{
923 void *data;
924
925 mtx_assert(&pr->pr_mtx, MA_OWNED);
926 data = pr->pr_slots[psrv->ps_slotno];
927 pr->pr_slots[psrv->ps_slotno] = NULL;
928 return (data);
929}
930
931/*
932 * Function returns current data from the slot assigned to the given jail
933 * service for the given jail.
934 */
935void *
936prison_service_data_get(struct prison_service *psrv, struct prison *pr)
937{
938
939 mtx_assert(&pr->pr_mtx, MA_OWNED);
940 return (pr->pr_slots[psrv->ps_slotno]);
941}
942
943static int
944sysctl_jail_list(SYSCTL_HANDLER_ARGS)
945{
946 struct xprison *xp, *sxp;
947 struct prison *pr;
948 int count, error;
949
950 if (jailed(req->td->td_ucred))
951 return (0);
952
953 sx_slock(&allprison_lock);
954 if ((count = prisoncount) == 0) {
955 sx_sunlock(&allprison_lock);
956 return (0);
957 }
958
959 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
960
961 LIST_FOREACH(pr, &allprison, pr_list) {
962 xp->pr_version = XPRISON_VERSION;
963 xp->pr_id = pr->pr_id;
964 xp->pr_ip = pr->pr_ip;
965 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
966 mtx_lock(&pr->pr_mtx);
967 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
968 mtx_unlock(&pr->pr_mtx);
969 xp++;
970 }
971 sx_sunlock(&allprison_lock);
972
973 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
974 free(sxp, M_TEMP);
975 return (error);
976}
977
978SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
979 NULL, 0, sysctl_jail_list, "S", "List of active jails");
980
981static int
982sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
983{
984 int error, injail;
985
986 injail = jailed(req->td->td_ucred);
987 error = SYSCTL_OUT(req, &injail, sizeof(injail));
988
989 return (error);
990}
991SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
992 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
483}
484
485/*
486 * Determine whether the subject represented by cred can "see"
487 * status of a mount point.
488 * Returns: 0 for permitted, ENOENT otherwise.
489 * XXX: This function should be called cr_canseemount() and should be
490 * placed in kern_prot.c.
491 */
492int
493prison_canseemount(struct ucred *cred, struct mount *mp)
494{
495 struct prison *pr;
496 struct statfs *sp;
497 size_t len;
498
499 if (!jailed(cred) || jail_enforce_statfs == 0)
500 return (0);
501 pr = cred->cr_prison;
502 if (pr->pr_root->v_mount == mp)
503 return (0);
504 if (jail_enforce_statfs == 2)
505 return (ENOENT);
506 /*
507 * If jail's chroot directory is set to "/" we should be able to see
508 * all mount-points from inside a jail.
509 * This is ugly check, but this is the only situation when jail's
510 * directory ends with '/'.
511 */
512 if (strcmp(pr->pr_path, "/") == 0)
513 return (0);
514 len = strlen(pr->pr_path);
515 sp = &mp->mnt_stat;
516 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
517 return (ENOENT);
518 /*
519 * Be sure that we don't have situation where jail's root directory
520 * is "/some/path" and mount point is "/some/pathpath".
521 */
522 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
523 return (ENOENT);
524 return (0);
525}
526
527void
528prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
529{
530 char jpath[MAXPATHLEN];
531 struct prison *pr;
532 size_t len;
533
534 if (!jailed(cred) || jail_enforce_statfs == 0)
535 return;
536 pr = cred->cr_prison;
537 if (prison_canseemount(cred, mp) != 0) {
538 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
539 strlcpy(sp->f_mntonname, "[restricted]",
540 sizeof(sp->f_mntonname));
541 return;
542 }
543 if (pr->pr_root->v_mount == mp) {
544 /*
545 * Clear current buffer data, so we are sure nothing from
546 * the valid path left there.
547 */
548 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
549 *sp->f_mntonname = '/';
550 return;
551 }
552 /*
553 * If jail's chroot directory is set to "/" we should be able to see
554 * all mount-points from inside a jail.
555 */
556 if (strcmp(pr->pr_path, "/") == 0)
557 return;
558 len = strlen(pr->pr_path);
559 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
560 /*
561 * Clear current buffer data, so we are sure nothing from
562 * the valid path left there.
563 */
564 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
565 if (*jpath == '\0') {
566 /* Should never happen. */
567 *sp->f_mntonname = '/';
568 } else {
569 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
570 }
571}
572
573/*
574 * Check with permission for a specific privilege is granted within jail. We
575 * have a specific list of accepted privileges; the rest are denied.
576 */
577int
578prison_priv_check(struct ucred *cred, int priv)
579{
580
581 if (!jailed(cred))
582 return (0);
583
584 switch (priv) {
585
586 /*
587 * Allow ktrace privileges for root in jail.
588 */
589 case PRIV_KTRACE:
590
591#if 0
592 /*
593 * Allow jailed processes to configure audit identity and
594 * submit audit records (login, etc). In the future we may
595 * want to further refine the relationship between audit and
596 * jail.
597 */
598 case PRIV_AUDIT_GETAUDIT:
599 case PRIV_AUDIT_SETAUDIT:
600 case PRIV_AUDIT_SUBMIT:
601#endif
602
603 /*
604 * Allow jailed processes to manipulate process UNIX
605 * credentials in any way they see fit.
606 */
607 case PRIV_CRED_SETUID:
608 case PRIV_CRED_SETEUID:
609 case PRIV_CRED_SETGID:
610 case PRIV_CRED_SETEGID:
611 case PRIV_CRED_SETGROUPS:
612 case PRIV_CRED_SETREUID:
613 case PRIV_CRED_SETREGID:
614 case PRIV_CRED_SETRESUID:
615 case PRIV_CRED_SETRESGID:
616
617 /*
618 * Jail implements visibility constraints already, so allow
619 * jailed root to override uid/gid-based constraints.
620 */
621 case PRIV_SEEOTHERGIDS:
622 case PRIV_SEEOTHERUIDS:
623
624 /*
625 * Jail implements inter-process debugging limits already, so
626 * allow jailed root various debugging privileges.
627 */
628 case PRIV_DEBUG_DIFFCRED:
629 case PRIV_DEBUG_SUGID:
630 case PRIV_DEBUG_UNPRIV:
631
632 /*
633 * Allow jail to set various resource limits and login
634 * properties, and for now, exceed process resource limits.
635 */
636 case PRIV_PROC_LIMIT:
637 case PRIV_PROC_SETLOGIN:
638 case PRIV_PROC_SETRLIMIT:
639
640 /*
641 * System V and POSIX IPC privileges are granted in jail.
642 */
643 case PRIV_IPC_READ:
644 case PRIV_IPC_WRITE:
645 case PRIV_IPC_ADMIN:
646 case PRIV_IPC_MSGSIZE:
647 case PRIV_MQ_ADMIN:
648
649 /*
650 * Jail implements its own inter-process limits, so allow
651 * root processes in jail to change scheduling on other
652 * processes in the same jail. Likewise for signalling.
653 */
654 case PRIV_SCHED_DIFFCRED:
655 case PRIV_SIGNAL_DIFFCRED:
656 case PRIV_SIGNAL_SUGID:
657
658 /*
659 * Allow jailed processes to write to sysctls marked as jail
660 * writable.
661 */
662 case PRIV_SYSCTL_WRITEJAIL:
663
664 /*
665 * Allow root in jail to manage a variety of quota
666 * properties. These should likely be conditional on a
667 * configuration option.
668 */
669 case PRIV_VFS_GETQUOTA:
670 case PRIV_VFS_SETQUOTA:
671
672 /*
673 * Since Jail relies on chroot() to implement file system
674 * protections, grant many VFS privileges to root in jail.
675 * Be careful to exclude mount-related and NFS-related
676 * privileges.
677 */
678 case PRIV_VFS_READ:
679 case PRIV_VFS_WRITE:
680 case PRIV_VFS_ADMIN:
681 case PRIV_VFS_EXEC:
682 case PRIV_VFS_LOOKUP:
683 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */
684 case PRIV_VFS_CHFLAGS_DEV:
685 case PRIV_VFS_CHOWN:
686 case PRIV_VFS_CHROOT:
687 case PRIV_VFS_RETAINSUGID:
688 case PRIV_VFS_FCHROOT:
689 case PRIV_VFS_LINK:
690 case PRIV_VFS_SETGID:
691 case PRIV_VFS_STAT:
692 case PRIV_VFS_STICKYFILE:
693 return (0);
694
695 /*
696 * Depending on the global setting, allow privilege of
697 * setting system flags.
698 */
699 case PRIV_VFS_SYSFLAGS:
700 if (jail_chflags_allowed)
701 return (0);
702 else
703 return (EPERM);
704
705 /*
706 * Depending on the global setting, allow privilege of
707 * mounting/unmounting file systems.
708 */
709 case PRIV_VFS_MOUNT:
710 case PRIV_VFS_UNMOUNT:
711 case PRIV_VFS_MOUNT_NONUSER:
712 case PRIV_VFS_MOUNT_OWNER:
713 if (jail_mount_allowed)
714 return (0);
715 else
716 return (EPERM);
717
718 /*
719 * Allow jailed root to bind reserved ports and reuse in-use
720 * ports.
721 */
722 case PRIV_NETINET_RESERVEDPORT:
723 case PRIV_NETINET_REUSEPORT:
724 return (0);
725
726 /*
727 * Allow jailed root to set certian IPv4/6 (option) headers.
728 */
729 case PRIV_NETINET_SETHDROPTS:
730 return (0);
731
732 /*
733 * Conditionally allow creating raw sockets in jail.
734 */
735 case PRIV_NETINET_RAW:
736 if (jail_allow_raw_sockets)
737 return (0);
738 else
739 return (EPERM);
740
741 /*
742 * Since jail implements its own visibility limits on netstat
743 * sysctls, allow getcred. This allows identd to work in
744 * jail.
745 */
746 case PRIV_NETINET_GETCRED:
747 return (0);
748
749 default:
750 /*
751 * In all remaining cases, deny the privilege request. This
752 * includes almost all network privileges, many system
753 * configuration privileges.
754 */
755 return (EPERM);
756 }
757}
758
759/*
760 * Register jail service. Provides 'create' and 'destroy' methods.
761 * 'create' method will be called for every existing jail and all
762 * jails in the future as they beeing created.
763 * 'destroy' method will be called for every jail going away and
764 * for all existing jails at the time of service deregistration.
765 */
766struct prison_service *
767prison_service_register(const char *name, prison_create_t create,
768 prison_destroy_t destroy)
769{
770 struct prison_service *psrv, *psrv2;
771 struct prison *pr;
772 int reallocate = 1, slotno = 0;
773 void **slots, **oldslots;
774
775 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
776 M_WAITOK | M_ZERO);
777 psrv->ps_create = create;
778 psrv->ps_destroy = destroy;
779 strcpy(psrv->ps_name, name);
780 /*
781 * Grab the allprison_lock here, so we won't miss any jail
782 * creation/destruction.
783 */
784 sx_xlock(&allprison_lock);
785#ifdef INVARIANTS
786 /*
787 * Verify if service is not already registered.
788 */
789 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
790 KASSERT(strcmp(psrv2->ps_name, name) != 0,
791 ("jail service %s already registered", name));
792 }
793#endif
794 /*
795 * Find free slot. When there is no existing free slot available,
796 * allocate one at the end.
797 */
798 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
799 if (psrv2->ps_slotno != slotno) {
800 KASSERT(slotno < psrv2->ps_slotno,
801 ("Invalid slotno (slotno=%d >= ps_slotno=%d",
802 slotno, psrv2->ps_slotno));
803 /* We found free slot. */
804 reallocate = 0;
805 break;
806 }
807 slotno++;
808 }
809 psrv->ps_slotno = slotno;
810 /*
811 * Keep the list sorted by slot number.
812 */
813 if (psrv2 != NULL) {
814 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
815 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
816 } else {
817 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
818 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
819 }
820 prison_service_slots++;
821 sx_downgrade(&allprison_lock);
822 /*
823 * Allocate memory for new slot if we didn't found empty one.
824 * Do not use realloc(9), because pr_slots is protected with a mutex,
825 * so we can't sleep.
826 */
827 LIST_FOREACH(pr, &allprison, pr_list) {
828 if (reallocate) {
829 /* First allocate memory with M_WAITOK. */
830 slots = malloc(sizeof(*slots) * prison_service_slots,
831 M_PRISON, M_WAITOK);
832 /* Now grab the mutex and replace pr_slots. */
833 mtx_lock(&pr->pr_mtx);
834 oldslots = pr->pr_slots;
835 if (psrv->ps_slotno > 0) {
836 bcopy(oldslots, slots,
837 sizeof(*slots) * (prison_service_slots - 1));
838 }
839 slots[psrv->ps_slotno] = NULL;
840 pr->pr_slots = slots;
841 mtx_unlock(&pr->pr_mtx);
842 if (oldslots != NULL)
843 free(oldslots, M_PRISON);
844 }
845 /*
846 * Call 'create' method for each existing jail.
847 */
848 psrv->ps_create(psrv, pr);
849 }
850 sx_sunlock(&allprison_lock);
851
852 return (psrv);
853}
854
855void
856prison_service_deregister(struct prison_service *psrv)
857{
858 struct prison *pr;
859 void **slots, **oldslots;
860 int last = 0;
861
862 sx_xlock(&allprison_lock);
863 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
864 last = 1;
865 TAILQ_REMOVE(&prison_services, psrv, ps_next);
866 prison_service_slots--;
867 sx_downgrade(&allprison_lock);
868 LIST_FOREACH(pr, &allprison, pr_list) {
869 /*
870 * Call 'destroy' method for every currently existing jail.
871 */
872 psrv->ps_destroy(psrv, pr);
873 /*
874 * If this is the last slot, free the memory allocated for it.
875 */
876 if (last) {
877 if (prison_service_slots == 0)
878 slots = NULL;
879 else {
880 slots = malloc(sizeof(*slots) * prison_service_slots,
881 M_PRISON, M_WAITOK);
882 }
883 mtx_lock(&pr->pr_mtx);
884 oldslots = pr->pr_slots;
885 /*
886 * We require setting slot to NULL after freeing it,
887 * this way we can check for memory leaks here.
888 */
889 KASSERT(oldslots[psrv->ps_slotno] == NULL,
890 ("Slot %d (service %s, jailid=%d) still contains data?",
891 psrv->ps_slotno, psrv->ps_name, pr->pr_id));
892 if (psrv->ps_slotno > 0) {
893 bcopy(oldslots, slots,
894 sizeof(*slots) * prison_service_slots);
895 }
896 pr->pr_slots = slots;
897 mtx_unlock(&pr->pr_mtx);
898 KASSERT(oldslots != NULL, ("oldslots == NULL"));
899 free(oldslots, M_PRISON);
900 }
901 }
902 sx_sunlock(&allprison_lock);
903 free(psrv, M_PRISON);
904}
905
906/*
907 * Function sets data for the given jail in slot assigned for the given
908 * jail service.
909 */
910void
911prison_service_data_set(struct prison_service *psrv, struct prison *pr,
912 void *data)
913{
914
915 mtx_assert(&pr->pr_mtx, MA_OWNED);
916 pr->pr_slots[psrv->ps_slotno] = data;
917}
918
919/*
920 * Function clears slots assigned for the given jail service in the given
921 * prison structure and returns current slot data.
922 */
923void *
924prison_service_data_del(struct prison_service *psrv, struct prison *pr)
925{
926 void *data;
927
928 mtx_assert(&pr->pr_mtx, MA_OWNED);
929 data = pr->pr_slots[psrv->ps_slotno];
930 pr->pr_slots[psrv->ps_slotno] = NULL;
931 return (data);
932}
933
934/*
935 * Function returns current data from the slot assigned to the given jail
936 * service for the given jail.
937 */
938void *
939prison_service_data_get(struct prison_service *psrv, struct prison *pr)
940{
941
942 mtx_assert(&pr->pr_mtx, MA_OWNED);
943 return (pr->pr_slots[psrv->ps_slotno]);
944}
945
946static int
947sysctl_jail_list(SYSCTL_HANDLER_ARGS)
948{
949 struct xprison *xp, *sxp;
950 struct prison *pr;
951 int count, error;
952
953 if (jailed(req->td->td_ucred))
954 return (0);
955
956 sx_slock(&allprison_lock);
957 if ((count = prisoncount) == 0) {
958 sx_sunlock(&allprison_lock);
959 return (0);
960 }
961
962 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
963
964 LIST_FOREACH(pr, &allprison, pr_list) {
965 xp->pr_version = XPRISON_VERSION;
966 xp->pr_id = pr->pr_id;
967 xp->pr_ip = pr->pr_ip;
968 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
969 mtx_lock(&pr->pr_mtx);
970 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
971 mtx_unlock(&pr->pr_mtx);
972 xp++;
973 }
974 sx_sunlock(&allprison_lock);
975
976 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
977 free(sxp, M_TEMP);
978 return (error);
979}
980
981SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
982 NULL, 0, sysctl_jail_list, "S", "List of active jails");
983
984static int
985sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
986{
987 int error, injail;
988
989 injail = jailed(req->td->td_ucred);
990 error = SYSCTL_OUT(req, &injail, sizeof(injail));
991
992 return (error);
993}
994SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
995 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");