1/*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10#include <sys/cdefs.h>
| 1/*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10#include <sys/cdefs.h>
|
11__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 179881 2008-06-19 21:41:57Z delphij $");
| 11__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 180291 2008-07-05 13:10:10Z rwatson $");
|
12 13#include "opt_mac.h" 14 15#include <sys/param.h> 16#include <sys/types.h> 17#include <sys/kernel.h> 18#include <sys/systm.h> 19#include <sys/errno.h> 20#include <sys/sysproto.h> 21#include <sys/malloc.h> 22#include <sys/priv.h> 23#include <sys/proc.h> 24#include <sys/taskqueue.h> 25#include <sys/fcntl.h> 26#include <sys/jail.h> 27#include <sys/lock.h> 28#include <sys/mutex.h> 29#include <sys/sx.h> 30#include <sys/namei.h> 31#include <sys/mount.h> 32#include <sys/queue.h> 33#include <sys/socket.h> 34#include <sys/syscallsubr.h> 35#include <sys/sysctl.h> 36#include <sys/vnode.h> 37#include <net/if.h> 38#include <netinet/in.h> 39 40#include <security/mac/mac_framework.h> 41 42MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 43 44SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 45 "Jail rules"); 46 47int jail_set_hostname_allowed = 1; 48SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 49 &jail_set_hostname_allowed, 0, 50 "Processes in jail can set their hostnames"); 51 52int jail_socket_unixiproute_only = 1; 53SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 54 &jail_socket_unixiproute_only, 0, 55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 56 57int jail_sysvipc_allowed = 0; 58SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 59 &jail_sysvipc_allowed, 0, 60 "Processes in jail can use System V IPC primitives"); 61 62static int jail_enforce_statfs = 2; 63SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 64 &jail_enforce_statfs, 0, 65 "Processes in jail cannot see all mounted file systems"); 66 67int jail_allow_raw_sockets = 0; 68SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 69 &jail_allow_raw_sockets, 0, 70 "Prison root can create raw sockets"); 71 72int jail_chflags_allowed = 0; 73SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 74 &jail_chflags_allowed, 0, 75 "Processes in jail can alter system file flags"); 76 77int jail_mount_allowed = 0; 78SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 79 &jail_mount_allowed, 0, 80 "Processes in jail can mount/unmount jail-friendly file systems"); 81 82/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 83struct prisonlist allprison; 84struct sx allprison_lock; 85int lastprid = 0; 86int prisoncount = 0; 87 88/* 89 * List of jail services. Protected by allprison_lock. 90 */ 91TAILQ_HEAD(prison_services_head, prison_service); 92static struct prison_services_head prison_services = 93 TAILQ_HEAD_INITIALIZER(prison_services); 94static int prison_service_slots = 0; 95 96struct prison_service { 97 prison_create_t ps_create; 98 prison_destroy_t ps_destroy; 99 int ps_slotno; 100 TAILQ_ENTRY(prison_service) ps_next; 101 char ps_name[0]; 102}; 103 104static void init_prison(void *); 105static void prison_complete(void *context, int pending); 106static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 107 108static void 109init_prison(void *data __unused) 110{ 111 112 sx_init(&allprison_lock, "allprison"); 113 LIST_INIT(&allprison); 114} 115 116SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 117 118/* 119 * struct jail_args { 120 * struct jail *jail; 121 * }; 122 */ 123int 124jail(struct thread *td, struct jail_args *uap) 125{ 126 struct nameidata nd; 127 struct prison *pr, *tpr; 128 struct prison_service *psrv; 129 struct jail j; 130 struct jail_attach_args jaa; 131 int vfslocked, error, tryprid; 132 133 error = copyin(uap->jail, &j, sizeof(j)); 134 if (error) 135 return (error); 136 if (j.version != 0) 137 return (EINVAL); 138 139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 141 pr->pr_ref = 1; 142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 143 if (error) 144 goto e_killmtx; 145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 146 pr->pr_path, td); 147 error = namei(&nd); 148 if (error) 149 goto e_killmtx; 150 vfslocked = NDHASGIANT(&nd); 151 pr->pr_root = nd.ni_vp; 152 VOP_UNLOCK(nd.ni_vp, 0); 153 NDFREE(&nd, NDF_ONLY_PNBUF); 154 VFS_UNLOCK_GIANT(vfslocked); 155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 156 if (error) 157 goto e_dropvnref; 158 pr->pr_ip = j.ip_number; 159 pr->pr_linux = NULL; 160 pr->pr_securelevel = securelevel; 161 if (prison_service_slots == 0) 162 pr->pr_slots = NULL; 163 else { 164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 165 M_PRISON, M_ZERO | M_WAITOK); 166 } 167 168 /* Determine next pr_id and add prison to allprison list. */ 169 sx_xlock(&allprison_lock); 170 tryprid = lastprid + 1; 171 if (tryprid == JAIL_MAX) 172 tryprid = 1; 173next: 174 LIST_FOREACH(tpr, &allprison, pr_list) { 175 if (tpr->pr_id == tryprid) { 176 tryprid++; 177 if (tryprid == JAIL_MAX) { 178 sx_xunlock(&allprison_lock); 179 error = EAGAIN; 180 goto e_dropvnref; 181 } 182 goto next; 183 } 184 } 185 pr->pr_id = jaa.jid = lastprid = tryprid; 186 LIST_INSERT_HEAD(&allprison, pr, pr_list); 187 prisoncount++; 188 sx_downgrade(&allprison_lock); 189 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 190 psrv->ps_create(psrv, pr); 191 } 192 sx_sunlock(&allprison_lock); 193 194 error = jail_attach(td, &jaa); 195 if (error) 196 goto e_dropprref; 197 mtx_lock(&pr->pr_mtx); 198 pr->pr_ref--; 199 mtx_unlock(&pr->pr_mtx); 200 td->td_retval[0] = jaa.jid; 201 return (0); 202e_dropprref: 203 sx_xlock(&allprison_lock); 204 LIST_REMOVE(pr, pr_list); 205 prisoncount--; 206 sx_downgrade(&allprison_lock); 207 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 208 psrv->ps_destroy(psrv, pr); 209 } 210 sx_sunlock(&allprison_lock); 211e_dropvnref: 212 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 213 vrele(pr->pr_root); 214 VFS_UNLOCK_GIANT(vfslocked); 215e_killmtx: 216 mtx_destroy(&pr->pr_mtx); 217 FREE(pr, M_PRISON); 218 return (error); 219} 220 221/* 222 * struct jail_attach_args { 223 * int jid; 224 * }; 225 */ 226int 227jail_attach(struct thread *td, struct jail_attach_args *uap) 228{ 229 struct proc *p; 230 struct ucred *newcred, *oldcred; 231 struct prison *pr; 232 int vfslocked, error; 233 234 /* 235 * XXX: Note that there is a slight race here if two threads 236 * in the same privileged process attempt to attach to two 237 * different jails at the same time. It is important for 238 * user processes not to do this, or they might end up with 239 * a process root from one prison, but attached to the jail 240 * of another. 241 */ 242 error = priv_check(td, PRIV_JAIL_ATTACH); 243 if (error) 244 return (error); 245 246 p = td->td_proc; 247 sx_slock(&allprison_lock); 248 pr = prison_find(uap->jid); 249 if (pr == NULL) { 250 sx_sunlock(&allprison_lock); 251 return (EINVAL); 252 } 253 pr->pr_ref++; 254 mtx_unlock(&pr->pr_mtx); 255 sx_sunlock(&allprison_lock); 256 257 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 258 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 259 if ((error = change_dir(pr->pr_root, td)) != 0) 260 goto e_unlock; 261#ifdef MAC 262 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 263 goto e_unlock; 264#endif 265 VOP_UNLOCK(pr->pr_root, 0); 266 change_root(pr->pr_root, td); 267 VFS_UNLOCK_GIANT(vfslocked); 268 269 newcred = crget(); 270 PROC_LOCK(p); 271 oldcred = p->p_ucred; 272 setsugid(p); 273 crcopy(newcred, oldcred); 274 newcred->cr_prison = pr; 275 p->p_ucred = newcred; 276 PROC_UNLOCK(p); 277 crfree(oldcred); 278 return (0); 279e_unlock: 280 VOP_UNLOCK(pr->pr_root, 0); 281 VFS_UNLOCK_GIANT(vfslocked); 282 mtx_lock(&pr->pr_mtx); 283 pr->pr_ref--; 284 mtx_unlock(&pr->pr_mtx); 285 return (error); 286} 287 288/* 289 * Returns a locked prison instance, or NULL on failure. 290 */ 291struct prison * 292prison_find(int prid) 293{ 294 struct prison *pr; 295 296 sx_assert(&allprison_lock, SX_LOCKED); 297 LIST_FOREACH(pr, &allprison, pr_list) { 298 if (pr->pr_id == prid) { 299 mtx_lock(&pr->pr_mtx); 300 if (pr->pr_ref == 0) { 301 mtx_unlock(&pr->pr_mtx); 302 break; 303 } 304 return (pr); 305 } 306 } 307 return (NULL); 308} 309 310void 311prison_free(struct prison *pr) 312{ 313 314 mtx_lock(&pr->pr_mtx); 315 pr->pr_ref--; 316 if (pr->pr_ref == 0) { 317 mtx_unlock(&pr->pr_mtx); 318 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 319 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 320 return; 321 } 322 mtx_unlock(&pr->pr_mtx); 323} 324 325static void 326prison_complete(void *context, int pending) 327{ 328 struct prison_service *psrv; 329 struct prison *pr; 330 int vfslocked; 331 332 pr = (struct prison *)context; 333 334 sx_xlock(&allprison_lock); 335 LIST_REMOVE(pr, pr_list); 336 prisoncount--; 337 sx_downgrade(&allprison_lock); 338 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 339 psrv->ps_destroy(psrv, pr); 340 } 341 sx_sunlock(&allprison_lock); 342 343 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 344 vrele(pr->pr_root); 345 VFS_UNLOCK_GIANT(vfslocked); 346 347 mtx_destroy(&pr->pr_mtx); 348 if (pr->pr_linux != NULL) 349 FREE(pr->pr_linux, M_PRISON); 350 FREE(pr, M_PRISON); 351} 352 353void 354prison_hold(struct prison *pr) 355{ 356 357 mtx_lock(&pr->pr_mtx); 358 KASSERT(pr->pr_ref > 0, 359 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 360 pr->pr_ref++; 361 mtx_unlock(&pr->pr_mtx); 362} 363 364u_int32_t 365prison_getip(struct ucred *cred) 366{ 367 368 return (cred->cr_prison->pr_ip); 369} 370 371int 372prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 373{ 374 u_int32_t tmp; 375 376 if (!jailed(cred)) 377 return (0); 378 if (flag) 379 tmp = *ip; 380 else 381 tmp = ntohl(*ip); 382 if (tmp == INADDR_ANY) { 383 if (flag) 384 *ip = cred->cr_prison->pr_ip; 385 else 386 *ip = htonl(cred->cr_prison->pr_ip); 387 return (0); 388 } 389 if (tmp == INADDR_LOOPBACK) { 390 if (flag) 391 *ip = cred->cr_prison->pr_ip; 392 else 393 *ip = htonl(cred->cr_prison->pr_ip); 394 return (0); 395 } 396 if (cred->cr_prison->pr_ip != tmp) 397 return (1); 398 return (0); 399} 400 401void 402prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 403{ 404 u_int32_t tmp; 405 406 if (!jailed(cred)) 407 return; 408 if (flag) 409 tmp = *ip; 410 else 411 tmp = ntohl(*ip); 412 if (tmp == INADDR_LOOPBACK) { 413 if (flag) 414 *ip = cred->cr_prison->pr_ip; 415 else 416 *ip = htonl(cred->cr_prison->pr_ip); 417 return; 418 } 419 return; 420} 421 422int 423prison_if(struct ucred *cred, struct sockaddr *sa) 424{ 425 struct sockaddr_in *sai; 426 int ok; 427 428 sai = (struct sockaddr_in *)sa; 429 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 430 ok = 1; 431 else if (sai->sin_family != AF_INET) 432 ok = 0; 433 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 434 ok = 1; 435 else 436 ok = 0; 437 return (ok); 438} 439 440/* 441 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 442 */ 443int 444prison_check(struct ucred *cred1, struct ucred *cred2) 445{ 446 447 if (jailed(cred1)) { 448 if (!jailed(cred2)) 449 return (ESRCH); 450 if (cred2->cr_prison != cred1->cr_prison) 451 return (ESRCH); 452 } 453 454 return (0); 455} 456 457/* 458 * Return 1 if the passed credential is in a jail, otherwise 0. 459 */ 460int 461jailed(struct ucred *cred) 462{ 463 464 return (cred->cr_prison != NULL); 465} 466 467/* 468 * Return the correct hostname for the passed credential. 469 */ 470void 471getcredhostname(struct ucred *cred, char *buf, size_t size) 472{ 473 474 if (jailed(cred)) { 475 mtx_lock(&cred->cr_prison->pr_mtx); 476 strlcpy(buf, cred->cr_prison->pr_host, size); 477 mtx_unlock(&cred->cr_prison->pr_mtx);
| 12 13#include "opt_mac.h" 14 15#include <sys/param.h> 16#include <sys/types.h> 17#include <sys/kernel.h> 18#include <sys/systm.h> 19#include <sys/errno.h> 20#include <sys/sysproto.h> 21#include <sys/malloc.h> 22#include <sys/priv.h> 23#include <sys/proc.h> 24#include <sys/taskqueue.h> 25#include <sys/fcntl.h> 26#include <sys/jail.h> 27#include <sys/lock.h> 28#include <sys/mutex.h> 29#include <sys/sx.h> 30#include <sys/namei.h> 31#include <sys/mount.h> 32#include <sys/queue.h> 33#include <sys/socket.h> 34#include <sys/syscallsubr.h> 35#include <sys/sysctl.h> 36#include <sys/vnode.h> 37#include <net/if.h> 38#include <netinet/in.h> 39 40#include <security/mac/mac_framework.h> 41 42MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 43 44SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 45 "Jail rules"); 46 47int jail_set_hostname_allowed = 1; 48SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 49 &jail_set_hostname_allowed, 0, 50 "Processes in jail can set their hostnames"); 51 52int jail_socket_unixiproute_only = 1; 53SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 54 &jail_socket_unixiproute_only, 0, 55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 56 57int jail_sysvipc_allowed = 0; 58SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 59 &jail_sysvipc_allowed, 0, 60 "Processes in jail can use System V IPC primitives"); 61 62static int jail_enforce_statfs = 2; 63SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 64 &jail_enforce_statfs, 0, 65 "Processes in jail cannot see all mounted file systems"); 66 67int jail_allow_raw_sockets = 0; 68SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 69 &jail_allow_raw_sockets, 0, 70 "Prison root can create raw sockets"); 71 72int jail_chflags_allowed = 0; 73SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 74 &jail_chflags_allowed, 0, 75 "Processes in jail can alter system file flags"); 76 77int jail_mount_allowed = 0; 78SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 79 &jail_mount_allowed, 0, 80 "Processes in jail can mount/unmount jail-friendly file systems"); 81 82/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 83struct prisonlist allprison; 84struct sx allprison_lock; 85int lastprid = 0; 86int prisoncount = 0; 87 88/* 89 * List of jail services. Protected by allprison_lock. 90 */ 91TAILQ_HEAD(prison_services_head, prison_service); 92static struct prison_services_head prison_services = 93 TAILQ_HEAD_INITIALIZER(prison_services); 94static int prison_service_slots = 0; 95 96struct prison_service { 97 prison_create_t ps_create; 98 prison_destroy_t ps_destroy; 99 int ps_slotno; 100 TAILQ_ENTRY(prison_service) ps_next; 101 char ps_name[0]; 102}; 103 104static void init_prison(void *); 105static void prison_complete(void *context, int pending); 106static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 107 108static void 109init_prison(void *data __unused) 110{ 111 112 sx_init(&allprison_lock, "allprison"); 113 LIST_INIT(&allprison); 114} 115 116SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 117 118/* 119 * struct jail_args { 120 * struct jail *jail; 121 * }; 122 */ 123int 124jail(struct thread *td, struct jail_args *uap) 125{ 126 struct nameidata nd; 127 struct prison *pr, *tpr; 128 struct prison_service *psrv; 129 struct jail j; 130 struct jail_attach_args jaa; 131 int vfslocked, error, tryprid; 132 133 error = copyin(uap->jail, &j, sizeof(j)); 134 if (error) 135 return (error); 136 if (j.version != 0) 137 return (EINVAL); 138 139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 141 pr->pr_ref = 1; 142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 143 if (error) 144 goto e_killmtx; 145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 146 pr->pr_path, td); 147 error = namei(&nd); 148 if (error) 149 goto e_killmtx; 150 vfslocked = NDHASGIANT(&nd); 151 pr->pr_root = nd.ni_vp; 152 VOP_UNLOCK(nd.ni_vp, 0); 153 NDFREE(&nd, NDF_ONLY_PNBUF); 154 VFS_UNLOCK_GIANT(vfslocked); 155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 156 if (error) 157 goto e_dropvnref; 158 pr->pr_ip = j.ip_number; 159 pr->pr_linux = NULL; 160 pr->pr_securelevel = securelevel; 161 if (prison_service_slots == 0) 162 pr->pr_slots = NULL; 163 else { 164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 165 M_PRISON, M_ZERO | M_WAITOK); 166 } 167 168 /* Determine next pr_id and add prison to allprison list. */ 169 sx_xlock(&allprison_lock); 170 tryprid = lastprid + 1; 171 if (tryprid == JAIL_MAX) 172 tryprid = 1; 173next: 174 LIST_FOREACH(tpr, &allprison, pr_list) { 175 if (tpr->pr_id == tryprid) { 176 tryprid++; 177 if (tryprid == JAIL_MAX) { 178 sx_xunlock(&allprison_lock); 179 error = EAGAIN; 180 goto e_dropvnref; 181 } 182 goto next; 183 } 184 } 185 pr->pr_id = jaa.jid = lastprid = tryprid; 186 LIST_INSERT_HEAD(&allprison, pr, pr_list); 187 prisoncount++; 188 sx_downgrade(&allprison_lock); 189 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 190 psrv->ps_create(psrv, pr); 191 } 192 sx_sunlock(&allprison_lock); 193 194 error = jail_attach(td, &jaa); 195 if (error) 196 goto e_dropprref; 197 mtx_lock(&pr->pr_mtx); 198 pr->pr_ref--; 199 mtx_unlock(&pr->pr_mtx); 200 td->td_retval[0] = jaa.jid; 201 return (0); 202e_dropprref: 203 sx_xlock(&allprison_lock); 204 LIST_REMOVE(pr, pr_list); 205 prisoncount--; 206 sx_downgrade(&allprison_lock); 207 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 208 psrv->ps_destroy(psrv, pr); 209 } 210 sx_sunlock(&allprison_lock); 211e_dropvnref: 212 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 213 vrele(pr->pr_root); 214 VFS_UNLOCK_GIANT(vfslocked); 215e_killmtx: 216 mtx_destroy(&pr->pr_mtx); 217 FREE(pr, M_PRISON); 218 return (error); 219} 220 221/* 222 * struct jail_attach_args { 223 * int jid; 224 * }; 225 */ 226int 227jail_attach(struct thread *td, struct jail_attach_args *uap) 228{ 229 struct proc *p; 230 struct ucred *newcred, *oldcred; 231 struct prison *pr; 232 int vfslocked, error; 233 234 /* 235 * XXX: Note that there is a slight race here if two threads 236 * in the same privileged process attempt to attach to two 237 * different jails at the same time. It is important for 238 * user processes not to do this, or they might end up with 239 * a process root from one prison, but attached to the jail 240 * of another. 241 */ 242 error = priv_check(td, PRIV_JAIL_ATTACH); 243 if (error) 244 return (error); 245 246 p = td->td_proc; 247 sx_slock(&allprison_lock); 248 pr = prison_find(uap->jid); 249 if (pr == NULL) { 250 sx_sunlock(&allprison_lock); 251 return (EINVAL); 252 } 253 pr->pr_ref++; 254 mtx_unlock(&pr->pr_mtx); 255 sx_sunlock(&allprison_lock); 256 257 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 258 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 259 if ((error = change_dir(pr->pr_root, td)) != 0) 260 goto e_unlock; 261#ifdef MAC 262 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 263 goto e_unlock; 264#endif 265 VOP_UNLOCK(pr->pr_root, 0); 266 change_root(pr->pr_root, td); 267 VFS_UNLOCK_GIANT(vfslocked); 268 269 newcred = crget(); 270 PROC_LOCK(p); 271 oldcred = p->p_ucred; 272 setsugid(p); 273 crcopy(newcred, oldcred); 274 newcred->cr_prison = pr; 275 p->p_ucred = newcred; 276 PROC_UNLOCK(p); 277 crfree(oldcred); 278 return (0); 279e_unlock: 280 VOP_UNLOCK(pr->pr_root, 0); 281 VFS_UNLOCK_GIANT(vfslocked); 282 mtx_lock(&pr->pr_mtx); 283 pr->pr_ref--; 284 mtx_unlock(&pr->pr_mtx); 285 return (error); 286} 287 288/* 289 * Returns a locked prison instance, or NULL on failure. 290 */ 291struct prison * 292prison_find(int prid) 293{ 294 struct prison *pr; 295 296 sx_assert(&allprison_lock, SX_LOCKED); 297 LIST_FOREACH(pr, &allprison, pr_list) { 298 if (pr->pr_id == prid) { 299 mtx_lock(&pr->pr_mtx); 300 if (pr->pr_ref == 0) { 301 mtx_unlock(&pr->pr_mtx); 302 break; 303 } 304 return (pr); 305 } 306 } 307 return (NULL); 308} 309 310void 311prison_free(struct prison *pr) 312{ 313 314 mtx_lock(&pr->pr_mtx); 315 pr->pr_ref--; 316 if (pr->pr_ref == 0) { 317 mtx_unlock(&pr->pr_mtx); 318 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 319 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 320 return; 321 } 322 mtx_unlock(&pr->pr_mtx); 323} 324 325static void 326prison_complete(void *context, int pending) 327{ 328 struct prison_service *psrv; 329 struct prison *pr; 330 int vfslocked; 331 332 pr = (struct prison *)context; 333 334 sx_xlock(&allprison_lock); 335 LIST_REMOVE(pr, pr_list); 336 prisoncount--; 337 sx_downgrade(&allprison_lock); 338 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 339 psrv->ps_destroy(psrv, pr); 340 } 341 sx_sunlock(&allprison_lock); 342 343 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 344 vrele(pr->pr_root); 345 VFS_UNLOCK_GIANT(vfslocked); 346 347 mtx_destroy(&pr->pr_mtx); 348 if (pr->pr_linux != NULL) 349 FREE(pr->pr_linux, M_PRISON); 350 FREE(pr, M_PRISON); 351} 352 353void 354prison_hold(struct prison *pr) 355{ 356 357 mtx_lock(&pr->pr_mtx); 358 KASSERT(pr->pr_ref > 0, 359 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 360 pr->pr_ref++; 361 mtx_unlock(&pr->pr_mtx); 362} 363 364u_int32_t 365prison_getip(struct ucred *cred) 366{ 367 368 return (cred->cr_prison->pr_ip); 369} 370 371int 372prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 373{ 374 u_int32_t tmp; 375 376 if (!jailed(cred)) 377 return (0); 378 if (flag) 379 tmp = *ip; 380 else 381 tmp = ntohl(*ip); 382 if (tmp == INADDR_ANY) { 383 if (flag) 384 *ip = cred->cr_prison->pr_ip; 385 else 386 *ip = htonl(cred->cr_prison->pr_ip); 387 return (0); 388 } 389 if (tmp == INADDR_LOOPBACK) { 390 if (flag) 391 *ip = cred->cr_prison->pr_ip; 392 else 393 *ip = htonl(cred->cr_prison->pr_ip); 394 return (0); 395 } 396 if (cred->cr_prison->pr_ip != tmp) 397 return (1); 398 return (0); 399} 400 401void 402prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 403{ 404 u_int32_t tmp; 405 406 if (!jailed(cred)) 407 return; 408 if (flag) 409 tmp = *ip; 410 else 411 tmp = ntohl(*ip); 412 if (tmp == INADDR_LOOPBACK) { 413 if (flag) 414 *ip = cred->cr_prison->pr_ip; 415 else 416 *ip = htonl(cred->cr_prison->pr_ip); 417 return; 418 } 419 return; 420} 421 422int 423prison_if(struct ucred *cred, struct sockaddr *sa) 424{ 425 struct sockaddr_in *sai; 426 int ok; 427 428 sai = (struct sockaddr_in *)sa; 429 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 430 ok = 1; 431 else if (sai->sin_family != AF_INET) 432 ok = 0; 433 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 434 ok = 1; 435 else 436 ok = 0; 437 return (ok); 438} 439 440/* 441 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 442 */ 443int 444prison_check(struct ucred *cred1, struct ucred *cred2) 445{ 446 447 if (jailed(cred1)) { 448 if (!jailed(cred2)) 449 return (ESRCH); 450 if (cred2->cr_prison != cred1->cr_prison) 451 return (ESRCH); 452 } 453 454 return (0); 455} 456 457/* 458 * Return 1 if the passed credential is in a jail, otherwise 0. 459 */ 460int 461jailed(struct ucred *cred) 462{ 463 464 return (cred->cr_prison != NULL); 465} 466 467/* 468 * Return the correct hostname for the passed credential. 469 */ 470void 471getcredhostname(struct ucred *cred, char *buf, size_t size) 472{ 473 474 if (jailed(cred)) { 475 mtx_lock(&cred->cr_prison->pr_mtx); 476 strlcpy(buf, cred->cr_prison->pr_host, size); 477 mtx_unlock(&cred->cr_prison->pr_mtx);
|
478 } else
| 478 } else { 479 mtx_lock(&hostname_mtx);
|
479 strlcpy(buf, hostname, size);
| 480 strlcpy(buf, hostname, size);
|
| 481 mtx_unlock(&hostname_mtx); 482 }
|
480} 481 482/* 483 * Determine whether the subject represented by cred can "see" 484 * status of a mount point. 485 * Returns: 0 for permitted, ENOENT otherwise. 486 * XXX: This function should be called cr_canseemount() and should be 487 * placed in kern_prot.c. 488 */ 489int 490prison_canseemount(struct ucred *cred, struct mount *mp) 491{ 492 struct prison *pr; 493 struct statfs *sp; 494 size_t len; 495 496 if (!jailed(cred) || jail_enforce_statfs == 0) 497 return (0); 498 pr = cred->cr_prison; 499 if (pr->pr_root->v_mount == mp) 500 return (0); 501 if (jail_enforce_statfs == 2) 502 return (ENOENT); 503 /* 504 * If jail's chroot directory is set to "/" we should be able to see 505 * all mount-points from inside a jail. 506 * This is ugly check, but this is the only situation when jail's 507 * directory ends with '/'. 508 */ 509 if (strcmp(pr->pr_path, "/") == 0) 510 return (0); 511 len = strlen(pr->pr_path); 512 sp = &mp->mnt_stat; 513 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 514 return (ENOENT); 515 /* 516 * Be sure that we don't have situation where jail's root directory 517 * is "/some/path" and mount point is "/some/pathpath". 518 */ 519 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 520 return (ENOENT); 521 return (0); 522} 523 524void 525prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 526{ 527 char jpath[MAXPATHLEN]; 528 struct prison *pr; 529 size_t len; 530 531 if (!jailed(cred) || jail_enforce_statfs == 0) 532 return; 533 pr = cred->cr_prison; 534 if (prison_canseemount(cred, mp) != 0) { 535 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 536 strlcpy(sp->f_mntonname, "[restricted]", 537 sizeof(sp->f_mntonname)); 538 return; 539 } 540 if (pr->pr_root->v_mount == mp) { 541 /* 542 * Clear current buffer data, so we are sure nothing from 543 * the valid path left there. 544 */ 545 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 546 *sp->f_mntonname = '/'; 547 return; 548 } 549 /* 550 * If jail's chroot directory is set to "/" we should be able to see 551 * all mount-points from inside a jail. 552 */ 553 if (strcmp(pr->pr_path, "/") == 0) 554 return; 555 len = strlen(pr->pr_path); 556 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 557 /* 558 * Clear current buffer data, so we are sure nothing from 559 * the valid path left there. 560 */ 561 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 562 if (*jpath == '\0') { 563 /* Should never happen. */ 564 *sp->f_mntonname = '/'; 565 } else { 566 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 567 } 568} 569 570/* 571 * Check with permission for a specific privilege is granted within jail. We 572 * have a specific list of accepted privileges; the rest are denied. 573 */ 574int 575prison_priv_check(struct ucred *cred, int priv) 576{ 577 578 if (!jailed(cred)) 579 return (0); 580 581 switch (priv) { 582 583 /* 584 * Allow ktrace privileges for root in jail. 585 */ 586 case PRIV_KTRACE: 587 588#if 0 589 /* 590 * Allow jailed processes to configure audit identity and 591 * submit audit records (login, etc). In the future we may 592 * want to further refine the relationship between audit and 593 * jail. 594 */ 595 case PRIV_AUDIT_GETAUDIT: 596 case PRIV_AUDIT_SETAUDIT: 597 case PRIV_AUDIT_SUBMIT: 598#endif 599 600 /* 601 * Allow jailed processes to manipulate process UNIX 602 * credentials in any way they see fit. 603 */ 604 case PRIV_CRED_SETUID: 605 case PRIV_CRED_SETEUID: 606 case PRIV_CRED_SETGID: 607 case PRIV_CRED_SETEGID: 608 case PRIV_CRED_SETGROUPS: 609 case PRIV_CRED_SETREUID: 610 case PRIV_CRED_SETREGID: 611 case PRIV_CRED_SETRESUID: 612 case PRIV_CRED_SETRESGID: 613 614 /* 615 * Jail implements visibility constraints already, so allow 616 * jailed root to override uid/gid-based constraints. 617 */ 618 case PRIV_SEEOTHERGIDS: 619 case PRIV_SEEOTHERUIDS: 620 621 /* 622 * Jail implements inter-process debugging limits already, so 623 * allow jailed root various debugging privileges. 624 */ 625 case PRIV_DEBUG_DIFFCRED: 626 case PRIV_DEBUG_SUGID: 627 case PRIV_DEBUG_UNPRIV: 628 629 /* 630 * Allow jail to set various resource limits and login 631 * properties, and for now, exceed process resource limits. 632 */ 633 case PRIV_PROC_LIMIT: 634 case PRIV_PROC_SETLOGIN: 635 case PRIV_PROC_SETRLIMIT: 636 637 /* 638 * System V and POSIX IPC privileges are granted in jail. 639 */ 640 case PRIV_IPC_READ: 641 case PRIV_IPC_WRITE: 642 case PRIV_IPC_ADMIN: 643 case PRIV_IPC_MSGSIZE: 644 case PRIV_MQ_ADMIN: 645 646 /* 647 * Jail implements its own inter-process limits, so allow 648 * root processes in jail to change scheduling on other 649 * processes in the same jail. Likewise for signalling. 650 */ 651 case PRIV_SCHED_DIFFCRED: 652 case PRIV_SIGNAL_DIFFCRED: 653 case PRIV_SIGNAL_SUGID: 654 655 /* 656 * Allow jailed processes to write to sysctls marked as jail 657 * writable. 658 */ 659 case PRIV_SYSCTL_WRITEJAIL: 660 661 /* 662 * Allow root in jail to manage a variety of quota 663 * properties. These should likely be conditional on a 664 * configuration option. 665 */ 666 case PRIV_VFS_GETQUOTA: 667 case PRIV_VFS_SETQUOTA: 668 669 /* 670 * Since Jail relies on chroot() to implement file system 671 * protections, grant many VFS privileges to root in jail. 672 * Be careful to exclude mount-related and NFS-related 673 * privileges. 674 */ 675 case PRIV_VFS_READ: 676 case PRIV_VFS_WRITE: 677 case PRIV_VFS_ADMIN: 678 case PRIV_VFS_EXEC: 679 case PRIV_VFS_LOOKUP: 680 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 681 case PRIV_VFS_CHFLAGS_DEV: 682 case PRIV_VFS_CHOWN: 683 case PRIV_VFS_CHROOT: 684 case PRIV_VFS_RETAINSUGID: 685 case PRIV_VFS_FCHROOT: 686 case PRIV_VFS_LINK: 687 case PRIV_VFS_SETGID: 688 case PRIV_VFS_STAT: 689 case PRIV_VFS_STICKYFILE: 690 return (0); 691 692 /* 693 * Depending on the global setting, allow privilege of 694 * setting system flags. 695 */ 696 case PRIV_VFS_SYSFLAGS: 697 if (jail_chflags_allowed) 698 return (0); 699 else 700 return (EPERM); 701 702 /* 703 * Depending on the global setting, allow privilege of 704 * mounting/unmounting file systems. 705 */ 706 case PRIV_VFS_MOUNT: 707 case PRIV_VFS_UNMOUNT: 708 case PRIV_VFS_MOUNT_NONUSER: 709 case PRIV_VFS_MOUNT_OWNER: 710 if (jail_mount_allowed) 711 return (0); 712 else 713 return (EPERM); 714 715 /* 716 * Allow jailed root to bind reserved ports and reuse in-use 717 * ports. 718 */ 719 case PRIV_NETINET_RESERVEDPORT: 720 case PRIV_NETINET_REUSEPORT: 721 return (0); 722 723 /* 724 * Allow jailed root to set certian IPv4/6 (option) headers. 725 */ 726 case PRIV_NETINET_SETHDROPTS: 727 return (0); 728 729 /* 730 * Conditionally allow creating raw sockets in jail. 731 */ 732 case PRIV_NETINET_RAW: 733 if (jail_allow_raw_sockets) 734 return (0); 735 else 736 return (EPERM); 737 738 /* 739 * Since jail implements its own visibility limits on netstat 740 * sysctls, allow getcred. This allows identd to work in 741 * jail. 742 */ 743 case PRIV_NETINET_GETCRED: 744 return (0); 745 746 default: 747 /* 748 * In all remaining cases, deny the privilege request. This 749 * includes almost all network privileges, many system 750 * configuration privileges. 751 */ 752 return (EPERM); 753 } 754} 755 756/* 757 * Register jail service. Provides 'create' and 'destroy' methods. 758 * 'create' method will be called for every existing jail and all 759 * jails in the future as they beeing created. 760 * 'destroy' method will be called for every jail going away and 761 * for all existing jails at the time of service deregistration. 762 */ 763struct prison_service * 764prison_service_register(const char *name, prison_create_t create, 765 prison_destroy_t destroy) 766{ 767 struct prison_service *psrv, *psrv2; 768 struct prison *pr; 769 int reallocate = 1, slotno = 0; 770 void **slots, **oldslots; 771 772 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 773 M_WAITOK | M_ZERO); 774 psrv->ps_create = create; 775 psrv->ps_destroy = destroy; 776 strcpy(psrv->ps_name, name); 777 /* 778 * Grab the allprison_lock here, so we won't miss any jail 779 * creation/destruction. 780 */ 781 sx_xlock(&allprison_lock); 782#ifdef INVARIANTS 783 /* 784 * Verify if service is not already registered. 785 */ 786 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 787 KASSERT(strcmp(psrv2->ps_name, name) != 0, 788 ("jail service %s already registered", name)); 789 } 790#endif 791 /* 792 * Find free slot. When there is no existing free slot available, 793 * allocate one at the end. 794 */ 795 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 796 if (psrv2->ps_slotno != slotno) { 797 KASSERT(slotno < psrv2->ps_slotno, 798 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 799 slotno, psrv2->ps_slotno)); 800 /* We found free slot. */ 801 reallocate = 0; 802 break; 803 } 804 slotno++; 805 } 806 psrv->ps_slotno = slotno; 807 /* 808 * Keep the list sorted by slot number. 809 */ 810 if (psrv2 != NULL) { 811 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 812 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 813 } else { 814 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 815 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 816 } 817 prison_service_slots++; 818 sx_downgrade(&allprison_lock); 819 /* 820 * Allocate memory for new slot if we didn't found empty one. 821 * Do not use realloc(9), because pr_slots is protected with a mutex, 822 * so we can't sleep. 823 */ 824 LIST_FOREACH(pr, &allprison, pr_list) { 825 if (reallocate) { 826 /* First allocate memory with M_WAITOK. */ 827 slots = malloc(sizeof(*slots) * prison_service_slots, 828 M_PRISON, M_WAITOK); 829 /* Now grab the mutex and replace pr_slots. */ 830 mtx_lock(&pr->pr_mtx); 831 oldslots = pr->pr_slots; 832 if (psrv->ps_slotno > 0) { 833 bcopy(oldslots, slots, 834 sizeof(*slots) * (prison_service_slots - 1)); 835 } 836 slots[psrv->ps_slotno] = NULL; 837 pr->pr_slots = slots; 838 mtx_unlock(&pr->pr_mtx); 839 if (oldslots != NULL) 840 free(oldslots, M_PRISON); 841 } 842 /* 843 * Call 'create' method for each existing jail. 844 */ 845 psrv->ps_create(psrv, pr); 846 } 847 sx_sunlock(&allprison_lock); 848 849 return (psrv); 850} 851 852void 853prison_service_deregister(struct prison_service *psrv) 854{ 855 struct prison *pr; 856 void **slots, **oldslots; 857 int last = 0; 858 859 sx_xlock(&allprison_lock); 860 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 861 last = 1; 862 TAILQ_REMOVE(&prison_services, psrv, ps_next); 863 prison_service_slots--; 864 sx_downgrade(&allprison_lock); 865 LIST_FOREACH(pr, &allprison, pr_list) { 866 /* 867 * Call 'destroy' method for every currently existing jail. 868 */ 869 psrv->ps_destroy(psrv, pr); 870 /* 871 * If this is the last slot, free the memory allocated for it. 872 */ 873 if (last) { 874 if (prison_service_slots == 0) 875 slots = NULL; 876 else { 877 slots = malloc(sizeof(*slots) * prison_service_slots, 878 M_PRISON, M_WAITOK); 879 } 880 mtx_lock(&pr->pr_mtx); 881 oldslots = pr->pr_slots; 882 /* 883 * We require setting slot to NULL after freeing it, 884 * this way we can check for memory leaks here. 885 */ 886 KASSERT(oldslots[psrv->ps_slotno] == NULL, 887 ("Slot %d (service %s, jailid=%d) still contains data?", 888 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 889 if (psrv->ps_slotno > 0) { 890 bcopy(oldslots, slots, 891 sizeof(*slots) * prison_service_slots); 892 } 893 pr->pr_slots = slots; 894 mtx_unlock(&pr->pr_mtx); 895 KASSERT(oldslots != NULL, ("oldslots == NULL")); 896 free(oldslots, M_PRISON); 897 } 898 } 899 sx_sunlock(&allprison_lock); 900 free(psrv, M_PRISON); 901} 902 903/* 904 * Function sets data for the given jail in slot assigned for the given 905 * jail service. 906 */ 907void 908prison_service_data_set(struct prison_service *psrv, struct prison *pr, 909 void *data) 910{ 911 912 mtx_assert(&pr->pr_mtx, MA_OWNED); 913 pr->pr_slots[psrv->ps_slotno] = data; 914} 915 916/* 917 * Function clears slots assigned for the given jail service in the given 918 * prison structure and returns current slot data. 919 */ 920void * 921prison_service_data_del(struct prison_service *psrv, struct prison *pr) 922{ 923 void *data; 924 925 mtx_assert(&pr->pr_mtx, MA_OWNED); 926 data = pr->pr_slots[psrv->ps_slotno]; 927 pr->pr_slots[psrv->ps_slotno] = NULL; 928 return (data); 929} 930 931/* 932 * Function returns current data from the slot assigned to the given jail 933 * service for the given jail. 934 */ 935void * 936prison_service_data_get(struct prison_service *psrv, struct prison *pr) 937{ 938 939 mtx_assert(&pr->pr_mtx, MA_OWNED); 940 return (pr->pr_slots[psrv->ps_slotno]); 941} 942 943static int 944sysctl_jail_list(SYSCTL_HANDLER_ARGS) 945{ 946 struct xprison *xp, *sxp; 947 struct prison *pr; 948 int count, error; 949 950 if (jailed(req->td->td_ucred)) 951 return (0); 952 953 sx_slock(&allprison_lock); 954 if ((count = prisoncount) == 0) { 955 sx_sunlock(&allprison_lock); 956 return (0); 957 } 958 959 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 960 961 LIST_FOREACH(pr, &allprison, pr_list) { 962 xp->pr_version = XPRISON_VERSION; 963 xp->pr_id = pr->pr_id; 964 xp->pr_ip = pr->pr_ip; 965 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 966 mtx_lock(&pr->pr_mtx); 967 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 968 mtx_unlock(&pr->pr_mtx); 969 xp++; 970 } 971 sx_sunlock(&allprison_lock); 972 973 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 974 free(sxp, M_TEMP); 975 return (error); 976} 977 978SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 979 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 980 981static int 982sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 983{ 984 int error, injail; 985 986 injail = jailed(req->td->td_ucred); 987 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 988 989 return (error); 990} 991SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 992 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
| 483} 484 485/* 486 * Determine whether the subject represented by cred can "see" 487 * status of a mount point. 488 * Returns: 0 for permitted, ENOENT otherwise. 489 * XXX: This function should be called cr_canseemount() and should be 490 * placed in kern_prot.c. 491 */ 492int 493prison_canseemount(struct ucred *cred, struct mount *mp) 494{ 495 struct prison *pr; 496 struct statfs *sp; 497 size_t len; 498 499 if (!jailed(cred) || jail_enforce_statfs == 0) 500 return (0); 501 pr = cred->cr_prison; 502 if (pr->pr_root->v_mount == mp) 503 return (0); 504 if (jail_enforce_statfs == 2) 505 return (ENOENT); 506 /* 507 * If jail's chroot directory is set to "/" we should be able to see 508 * all mount-points from inside a jail. 509 * This is ugly check, but this is the only situation when jail's 510 * directory ends with '/'. 511 */ 512 if (strcmp(pr->pr_path, "/") == 0) 513 return (0); 514 len = strlen(pr->pr_path); 515 sp = &mp->mnt_stat; 516 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 517 return (ENOENT); 518 /* 519 * Be sure that we don't have situation where jail's root directory 520 * is "/some/path" and mount point is "/some/pathpath". 521 */ 522 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 523 return (ENOENT); 524 return (0); 525} 526 527void 528prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 529{ 530 char jpath[MAXPATHLEN]; 531 struct prison *pr; 532 size_t len; 533 534 if (!jailed(cred) || jail_enforce_statfs == 0) 535 return; 536 pr = cred->cr_prison; 537 if (prison_canseemount(cred, mp) != 0) { 538 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 539 strlcpy(sp->f_mntonname, "[restricted]", 540 sizeof(sp->f_mntonname)); 541 return; 542 } 543 if (pr->pr_root->v_mount == mp) { 544 /* 545 * Clear current buffer data, so we are sure nothing from 546 * the valid path left there. 547 */ 548 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 549 *sp->f_mntonname = '/'; 550 return; 551 } 552 /* 553 * If jail's chroot directory is set to "/" we should be able to see 554 * all mount-points from inside a jail. 555 */ 556 if (strcmp(pr->pr_path, "/") == 0) 557 return; 558 len = strlen(pr->pr_path); 559 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 560 /* 561 * Clear current buffer data, so we are sure nothing from 562 * the valid path left there. 563 */ 564 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 565 if (*jpath == '\0') { 566 /* Should never happen. */ 567 *sp->f_mntonname = '/'; 568 } else { 569 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 570 } 571} 572 573/* 574 * Check with permission for a specific privilege is granted within jail. We 575 * have a specific list of accepted privileges; the rest are denied. 576 */ 577int 578prison_priv_check(struct ucred *cred, int priv) 579{ 580 581 if (!jailed(cred)) 582 return (0); 583 584 switch (priv) { 585 586 /* 587 * Allow ktrace privileges for root in jail. 588 */ 589 case PRIV_KTRACE: 590 591#if 0 592 /* 593 * Allow jailed processes to configure audit identity and 594 * submit audit records (login, etc). In the future we may 595 * want to further refine the relationship between audit and 596 * jail. 597 */ 598 case PRIV_AUDIT_GETAUDIT: 599 case PRIV_AUDIT_SETAUDIT: 600 case PRIV_AUDIT_SUBMIT: 601#endif 602 603 /* 604 * Allow jailed processes to manipulate process UNIX 605 * credentials in any way they see fit. 606 */ 607 case PRIV_CRED_SETUID: 608 case PRIV_CRED_SETEUID: 609 case PRIV_CRED_SETGID: 610 case PRIV_CRED_SETEGID: 611 case PRIV_CRED_SETGROUPS: 612 case PRIV_CRED_SETREUID: 613 case PRIV_CRED_SETREGID: 614 case PRIV_CRED_SETRESUID: 615 case PRIV_CRED_SETRESGID: 616 617 /* 618 * Jail implements visibility constraints already, so allow 619 * jailed root to override uid/gid-based constraints. 620 */ 621 case PRIV_SEEOTHERGIDS: 622 case PRIV_SEEOTHERUIDS: 623 624 /* 625 * Jail implements inter-process debugging limits already, so 626 * allow jailed root various debugging privileges. 627 */ 628 case PRIV_DEBUG_DIFFCRED: 629 case PRIV_DEBUG_SUGID: 630 case PRIV_DEBUG_UNPRIV: 631 632 /* 633 * Allow jail to set various resource limits and login 634 * properties, and for now, exceed process resource limits. 635 */ 636 case PRIV_PROC_LIMIT: 637 case PRIV_PROC_SETLOGIN: 638 case PRIV_PROC_SETRLIMIT: 639 640 /* 641 * System V and POSIX IPC privileges are granted in jail. 642 */ 643 case PRIV_IPC_READ: 644 case PRIV_IPC_WRITE: 645 case PRIV_IPC_ADMIN: 646 case PRIV_IPC_MSGSIZE: 647 case PRIV_MQ_ADMIN: 648 649 /* 650 * Jail implements its own inter-process limits, so allow 651 * root processes in jail to change scheduling on other 652 * processes in the same jail. Likewise for signalling. 653 */ 654 case PRIV_SCHED_DIFFCRED: 655 case PRIV_SIGNAL_DIFFCRED: 656 case PRIV_SIGNAL_SUGID: 657 658 /* 659 * Allow jailed processes to write to sysctls marked as jail 660 * writable. 661 */ 662 case PRIV_SYSCTL_WRITEJAIL: 663 664 /* 665 * Allow root in jail to manage a variety of quota 666 * properties. These should likely be conditional on a 667 * configuration option. 668 */ 669 case PRIV_VFS_GETQUOTA: 670 case PRIV_VFS_SETQUOTA: 671 672 /* 673 * Since Jail relies on chroot() to implement file system 674 * protections, grant many VFS privileges to root in jail. 675 * Be careful to exclude mount-related and NFS-related 676 * privileges. 677 */ 678 case PRIV_VFS_READ: 679 case PRIV_VFS_WRITE: 680 case PRIV_VFS_ADMIN: 681 case PRIV_VFS_EXEC: 682 case PRIV_VFS_LOOKUP: 683 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 684 case PRIV_VFS_CHFLAGS_DEV: 685 case PRIV_VFS_CHOWN: 686 case PRIV_VFS_CHROOT: 687 case PRIV_VFS_RETAINSUGID: 688 case PRIV_VFS_FCHROOT: 689 case PRIV_VFS_LINK: 690 case PRIV_VFS_SETGID: 691 case PRIV_VFS_STAT: 692 case PRIV_VFS_STICKYFILE: 693 return (0); 694 695 /* 696 * Depending on the global setting, allow privilege of 697 * setting system flags. 698 */ 699 case PRIV_VFS_SYSFLAGS: 700 if (jail_chflags_allowed) 701 return (0); 702 else 703 return (EPERM); 704 705 /* 706 * Depending on the global setting, allow privilege of 707 * mounting/unmounting file systems. 708 */ 709 case PRIV_VFS_MOUNT: 710 case PRIV_VFS_UNMOUNT: 711 case PRIV_VFS_MOUNT_NONUSER: 712 case PRIV_VFS_MOUNT_OWNER: 713 if (jail_mount_allowed) 714 return (0); 715 else 716 return (EPERM); 717 718 /* 719 * Allow jailed root to bind reserved ports and reuse in-use 720 * ports. 721 */ 722 case PRIV_NETINET_RESERVEDPORT: 723 case PRIV_NETINET_REUSEPORT: 724 return (0); 725 726 /* 727 * Allow jailed root to set certian IPv4/6 (option) headers. 728 */ 729 case PRIV_NETINET_SETHDROPTS: 730 return (0); 731 732 /* 733 * Conditionally allow creating raw sockets in jail. 734 */ 735 case PRIV_NETINET_RAW: 736 if (jail_allow_raw_sockets) 737 return (0); 738 else 739 return (EPERM); 740 741 /* 742 * Since jail implements its own visibility limits on netstat 743 * sysctls, allow getcred. This allows identd to work in 744 * jail. 745 */ 746 case PRIV_NETINET_GETCRED: 747 return (0); 748 749 default: 750 /* 751 * In all remaining cases, deny the privilege request. This 752 * includes almost all network privileges, many system 753 * configuration privileges. 754 */ 755 return (EPERM); 756 } 757} 758 759/* 760 * Register jail service. Provides 'create' and 'destroy' methods. 761 * 'create' method will be called for every existing jail and all 762 * jails in the future as they beeing created. 763 * 'destroy' method will be called for every jail going away and 764 * for all existing jails at the time of service deregistration. 765 */ 766struct prison_service * 767prison_service_register(const char *name, prison_create_t create, 768 prison_destroy_t destroy) 769{ 770 struct prison_service *psrv, *psrv2; 771 struct prison *pr; 772 int reallocate = 1, slotno = 0; 773 void **slots, **oldslots; 774 775 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 776 M_WAITOK | M_ZERO); 777 psrv->ps_create = create; 778 psrv->ps_destroy = destroy; 779 strcpy(psrv->ps_name, name); 780 /* 781 * Grab the allprison_lock here, so we won't miss any jail 782 * creation/destruction. 783 */ 784 sx_xlock(&allprison_lock); 785#ifdef INVARIANTS 786 /* 787 * Verify if service is not already registered. 788 */ 789 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 790 KASSERT(strcmp(psrv2->ps_name, name) != 0, 791 ("jail service %s already registered", name)); 792 } 793#endif 794 /* 795 * Find free slot. When there is no existing free slot available, 796 * allocate one at the end. 797 */ 798 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 799 if (psrv2->ps_slotno != slotno) { 800 KASSERT(slotno < psrv2->ps_slotno, 801 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 802 slotno, psrv2->ps_slotno)); 803 /* We found free slot. */ 804 reallocate = 0; 805 break; 806 } 807 slotno++; 808 } 809 psrv->ps_slotno = slotno; 810 /* 811 * Keep the list sorted by slot number. 812 */ 813 if (psrv2 != NULL) { 814 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 815 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 816 } else { 817 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 818 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 819 } 820 prison_service_slots++; 821 sx_downgrade(&allprison_lock); 822 /* 823 * Allocate memory for new slot if we didn't found empty one. 824 * Do not use realloc(9), because pr_slots is protected with a mutex, 825 * so we can't sleep. 826 */ 827 LIST_FOREACH(pr, &allprison, pr_list) { 828 if (reallocate) { 829 /* First allocate memory with M_WAITOK. */ 830 slots = malloc(sizeof(*slots) * prison_service_slots, 831 M_PRISON, M_WAITOK); 832 /* Now grab the mutex and replace pr_slots. */ 833 mtx_lock(&pr->pr_mtx); 834 oldslots = pr->pr_slots; 835 if (psrv->ps_slotno > 0) { 836 bcopy(oldslots, slots, 837 sizeof(*slots) * (prison_service_slots - 1)); 838 } 839 slots[psrv->ps_slotno] = NULL; 840 pr->pr_slots = slots; 841 mtx_unlock(&pr->pr_mtx); 842 if (oldslots != NULL) 843 free(oldslots, M_PRISON); 844 } 845 /* 846 * Call 'create' method for each existing jail. 847 */ 848 psrv->ps_create(psrv, pr); 849 } 850 sx_sunlock(&allprison_lock); 851 852 return (psrv); 853} 854 855void 856prison_service_deregister(struct prison_service *psrv) 857{ 858 struct prison *pr; 859 void **slots, **oldslots; 860 int last = 0; 861 862 sx_xlock(&allprison_lock); 863 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 864 last = 1; 865 TAILQ_REMOVE(&prison_services, psrv, ps_next); 866 prison_service_slots--; 867 sx_downgrade(&allprison_lock); 868 LIST_FOREACH(pr, &allprison, pr_list) { 869 /* 870 * Call 'destroy' method for every currently existing jail. 871 */ 872 psrv->ps_destroy(psrv, pr); 873 /* 874 * If this is the last slot, free the memory allocated for it. 875 */ 876 if (last) { 877 if (prison_service_slots == 0) 878 slots = NULL; 879 else { 880 slots = malloc(sizeof(*slots) * prison_service_slots, 881 M_PRISON, M_WAITOK); 882 } 883 mtx_lock(&pr->pr_mtx); 884 oldslots = pr->pr_slots; 885 /* 886 * We require setting slot to NULL after freeing it, 887 * this way we can check for memory leaks here. 888 */ 889 KASSERT(oldslots[psrv->ps_slotno] == NULL, 890 ("Slot %d (service %s, jailid=%d) still contains data?", 891 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 892 if (psrv->ps_slotno > 0) { 893 bcopy(oldslots, slots, 894 sizeof(*slots) * prison_service_slots); 895 } 896 pr->pr_slots = slots; 897 mtx_unlock(&pr->pr_mtx); 898 KASSERT(oldslots != NULL, ("oldslots == NULL")); 899 free(oldslots, M_PRISON); 900 } 901 } 902 sx_sunlock(&allprison_lock); 903 free(psrv, M_PRISON); 904} 905 906/* 907 * Function sets data for the given jail in slot assigned for the given 908 * jail service. 909 */ 910void 911prison_service_data_set(struct prison_service *psrv, struct prison *pr, 912 void *data) 913{ 914 915 mtx_assert(&pr->pr_mtx, MA_OWNED); 916 pr->pr_slots[psrv->ps_slotno] = data; 917} 918 919/* 920 * Function clears slots assigned for the given jail service in the given 921 * prison structure and returns current slot data. 922 */ 923void * 924prison_service_data_del(struct prison_service *psrv, struct prison *pr) 925{ 926 void *data; 927 928 mtx_assert(&pr->pr_mtx, MA_OWNED); 929 data = pr->pr_slots[psrv->ps_slotno]; 930 pr->pr_slots[psrv->ps_slotno] = NULL; 931 return (data); 932} 933 934/* 935 * Function returns current data from the slot assigned to the given jail 936 * service for the given jail. 937 */ 938void * 939prison_service_data_get(struct prison_service *psrv, struct prison *pr) 940{ 941 942 mtx_assert(&pr->pr_mtx, MA_OWNED); 943 return (pr->pr_slots[psrv->ps_slotno]); 944} 945 946static int 947sysctl_jail_list(SYSCTL_HANDLER_ARGS) 948{ 949 struct xprison *xp, *sxp; 950 struct prison *pr; 951 int count, error; 952 953 if (jailed(req->td->td_ucred)) 954 return (0); 955 956 sx_slock(&allprison_lock); 957 if ((count = prisoncount) == 0) { 958 sx_sunlock(&allprison_lock); 959 return (0); 960 } 961 962 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 963 964 LIST_FOREACH(pr, &allprison, pr_list) { 965 xp->pr_version = XPRISON_VERSION; 966 xp->pr_id = pr->pr_id; 967 xp->pr_ip = pr->pr_ip; 968 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 969 mtx_lock(&pr->pr_mtx); 970 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 971 mtx_unlock(&pr->pr_mtx); 972 xp++; 973 } 974 sx_sunlock(&allprison_lock); 975 976 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 977 free(sxp, M_TEMP); 978 return (error); 979} 980 981SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 982 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 983 984static int 985sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 986{ 987 int error, injail; 988 989 injail = jailed(req->td->td_ucred); 990 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 991 992 return (error); 993} 994SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 995 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
|