kern_jail.c revision 168401
1139804Simp/*- 246197Sphk * ---------------------------------------------------------------------------- 346197Sphk * "THE BEER-WARE LICENSE" (Revision 42): 446197Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think 646197Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 746197Sphk * ---------------------------------------------------------------------------- 846197Sphk */ 946155Sphk 10116182Sobrien#include <sys/cdefs.h> 11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 168401 2007-04-05 23:19:13Z pjd $"); 12116182Sobrien 13131177Spjd#include "opt_mac.h" 14131177Spjd 1546155Sphk#include <sys/param.h> 1646155Sphk#include <sys/types.h> 1746155Sphk#include <sys/kernel.h> 1846155Sphk#include <sys/systm.h> 1946155Sphk#include <sys/errno.h> 2046155Sphk#include <sys/sysproto.h> 2146155Sphk#include <sys/malloc.h> 22164032Srwatson#include <sys/priv.h> 2346155Sphk#include <sys/proc.h> 24124882Srwatson#include <sys/taskqueue.h> 2546155Sphk#include <sys/jail.h> 2687275Srwatson#include <sys/lock.h> 2787275Srwatson#include <sys/mutex.h> 28168401Spjd#include <sys/sx.h> 29113275Smike#include <sys/namei.h> 30147185Spjd#include <sys/mount.h> 31113275Smike#include <sys/queue.h> 3246155Sphk#include <sys/socket.h> 33113275Smike#include <sys/syscallsubr.h> 3457163Srwatson#include <sys/sysctl.h> 35113275Smike#include <sys/vnode.h> 3646155Sphk#include <net/if.h> 3746155Sphk#include <netinet/in.h> 3846155Sphk 39163606Srwatson#include <security/mac/mac_framework.h> 40163606Srwatson 4146155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 4246155Sphk 4389414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4457163Srwatson "Jail rules"); 4557163Srwatson 4657163Srwatsonint jail_set_hostname_allowed = 1; 4789414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 4857163Srwatson &jail_set_hostname_allowed, 0, 4957163Srwatson "Processes in jail can set their hostnames"); 5057163Srwatson 5161235Srwatsonint jail_socket_unixiproute_only = 1; 5289414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 5361235Srwatson &jail_socket_unixiproute_only, 0, 5461235Srwatson "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 5561235Srwatson 5668024Srwatsonint jail_sysvipc_allowed = 0; 5789414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 5868024Srwatson &jail_sysvipc_allowed, 0, 5968024Srwatson "Processes in jail can use System V IPC primitives"); 6068024Srwatson 61147185Spjdstatic int jail_enforce_statfs = 2; 62147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 63147185Spjd &jail_enforce_statfs, 0, 64147185Spjd "Processes in jail cannot see all mounted file systems"); 65125804Srwatson 66128664Sbmilekicint jail_allow_raw_sockets = 0; 67128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 68128664Sbmilekic &jail_allow_raw_sockets, 0, 69128664Sbmilekic "Prison root can create raw sockets"); 70128664Sbmilekic 71141543Scpercivaint jail_chflags_allowed = 0; 72141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 73141543Scperciva &jail_chflags_allowed, 0, 74141543Scperciva "Processes in jail can alter system file flags"); 75141543Scperciva 76168396Spjdint jail_mount_allowed = 0; 77168396SpjdSYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 78168396Spjd &jail_mount_allowed, 0, 79168396Spjd "Processes in jail can mount/unmount jail-friendly file systems"); 80168396Spjd 81168401Spjd/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 82113275Smikestruct prisonlist allprison; 83168401Spjdstruct sx allprison_lock; 84113275Smikeint lastprid = 0; 85113275Smikeint prisoncount = 0; 86113275Smike 87168401Spjd/* 88168401Spjd * List of jail services. Protected by allprison_lock. 89168401Spjd */ 90168401SpjdTAILQ_HEAD(prison_services_head, prison_service); 91168401Spjdstatic struct prison_services_head prison_services = 92168401Spjd TAILQ_HEAD_INITIALIZER(prison_services); 93168401Spjdstatic int prison_service_slots = 0; 94168401Spjd 95168401Spjdstruct prison_service { 96168401Spjd prison_create_t ps_create; 97168401Spjd prison_destroy_t ps_destroy; 98168401Spjd int ps_slotno; 99168401Spjd TAILQ_ENTRY(prison_service) ps_next; 100168401Spjd char ps_name[0]; 101168401Spjd}; 102168401Spjd 103113275Smikestatic void init_prison(void *); 104124882Srwatsonstatic void prison_complete(void *context, int pending); 105113275Smikestatic int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 106113275Smike 107113275Smikestatic void 108113275Smikeinit_prison(void *data __unused) 109113275Smike{ 110113275Smike 111168401Spjd sx_init(&allprison_lock, "allprison"); 112113275Smike LIST_INIT(&allprison); 113113275Smike} 114113275Smike 115113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 116113275Smike 11782710Sdillon/* 118114168Smike * struct jail_args { 119114168Smike * struct jail *jail; 120114168Smike * }; 12182710Sdillon */ 12246155Sphkint 123114168Smikejail(struct thread *td, struct jail_args *uap) 12446155Sphk{ 125113275Smike struct nameidata nd; 126113275Smike struct prison *pr, *tpr; 127168401Spjd struct prison_service *psrv; 12846155Sphk struct jail j; 129113275Smike struct jail_attach_args jaa; 130150652Scsjp int vfslocked, error, tryprid; 13146155Sphk 132114168Smike error = copyin(uap->jail, &j, sizeof(j)); 13346155Sphk if (error) 13484828Sjhb return (error); 13584828Sjhb if (j.version != 0) 13684828Sjhb return (EINVAL); 13784828Sjhb 138114168Smike MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 13993818Sjhb mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 140113275Smike pr->pr_ref = 1; 141114168Smike error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 142113275Smike if (error) 143113275Smike goto e_killmtx; 144150652Scsjp NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 145150652Scsjp pr->pr_path, td); 146113275Smike error = namei(&nd); 147150652Scsjp if (error) 148113275Smike goto e_killmtx; 149150652Scsjp vfslocked = NDHASGIANT(&nd); 150113275Smike pr->pr_root = nd.ni_vp; 151113275Smike VOP_UNLOCK(nd.ni_vp, 0, td); 152113275Smike NDFREE(&nd, NDF_ONLY_PNBUF); 153150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 154114168Smike error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 15584828Sjhb if (error) 156113275Smike goto e_dropvnref; 157113275Smike pr->pr_ip = j.ip_number; 158113275Smike pr->pr_linux = NULL; 159113275Smike pr->pr_securelevel = securelevel; 160168401Spjd if (prison_service_slots == 0) 161168401Spjd pr->pr_slots = NULL; 162168401Spjd else { 163168401Spjd pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 164168401Spjd M_PRISON, M_ZERO | M_WAITOK); 165168401Spjd } 166113275Smike 167113275Smike /* Determine next pr_id and add prison to allprison list. */ 168168401Spjd sx_xlock(&allprison_lock); 169113275Smike tryprid = lastprid + 1; 170113275Smike if (tryprid == JAIL_MAX) 171113275Smike tryprid = 1; 172113275Smikenext: 173113275Smike LIST_FOREACH(tpr, &allprison, pr_list) { 174113275Smike if (tpr->pr_id == tryprid) { 175113275Smike tryprid++; 176113275Smike if (tryprid == JAIL_MAX) { 177168401Spjd sx_xunlock(&allprison_lock); 178113275Smike error = EAGAIN; 179113275Smike goto e_dropvnref; 180113275Smike } 181113275Smike goto next; 182113275Smike } 183113275Smike } 184113275Smike pr->pr_id = jaa.jid = lastprid = tryprid; 185113275Smike LIST_INSERT_HEAD(&allprison, pr, pr_list); 186113275Smike prisoncount++; 187168401Spjd sx_downgrade(&allprison_lock); 188168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 189168401Spjd psrv->ps_create(psrv, pr); 190168401Spjd } 191168401Spjd sx_sunlock(&allprison_lock); 192113275Smike 193113275Smike error = jail_attach(td, &jaa); 194113275Smike if (error) 195113275Smike goto e_dropprref; 196113275Smike mtx_lock(&pr->pr_mtx); 197113275Smike pr->pr_ref--; 198113275Smike mtx_unlock(&pr->pr_mtx); 199113275Smike td->td_retval[0] = jaa.jid; 200113275Smike return (0); 201113275Smikee_dropprref: 202168401Spjd sx_xlock(&allprison_lock); 203113275Smike LIST_REMOVE(pr, pr_list); 204113275Smike prisoncount--; 205168401Spjd sx_downgrade(&allprison_lock); 206168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 207168401Spjd psrv->ps_destroy(psrv, pr); 208168401Spjd } 209168401Spjd sx_sunlock(&allprison_lock); 210113275Smikee_dropvnref: 211150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 212113275Smike vrele(pr->pr_root); 213150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 214113275Smikee_killmtx: 215113275Smike mtx_destroy(&pr->pr_mtx); 216113275Smike FREE(pr, M_PRISON); 217113275Smike return (error); 218113275Smike} 219113275Smike 220113275Smike/* 221114168Smike * struct jail_attach_args { 222114168Smike * int jid; 223114168Smike * }; 224113275Smike */ 225113275Smikeint 226114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 227113275Smike{ 228113275Smike struct proc *p; 229113275Smike struct ucred *newcred, *oldcred; 230113275Smike struct prison *pr; 231150652Scsjp int vfslocked, error; 232167309Spjd 233126023Snectar /* 234126023Snectar * XXX: Note that there is a slight race here if two threads 235126023Snectar * in the same privileged process attempt to attach to two 236126023Snectar * different jails at the same time. It is important for 237126023Snectar * user processes not to do this, or they might end up with 238126023Snectar * a process root from one prison, but attached to the jail 239126023Snectar * of another. 240126023Snectar */ 241164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 242126023Snectar if (error) 243126023Snectar return (error); 244126023Snectar 245113275Smike p = td->td_proc; 246168401Spjd sx_slock(&allprison_lock); 247113275Smike pr = prison_find(uap->jid); 248113275Smike if (pr == NULL) { 249168401Spjd sx_sunlock(&allprison_lock); 250113275Smike return (EINVAL); 251113275Smike } 252113275Smike pr->pr_ref++; 253113275Smike mtx_unlock(&pr->pr_mtx); 254168401Spjd sx_sunlock(&allprison_lock); 255113275Smike 256150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 257113275Smike vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td); 258113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 259113275Smike goto e_unlock; 260113275Smike#ifdef MAC 261113275Smike if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root))) 262113275Smike goto e_unlock; 263113275Smike#endif 264113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 265113275Smike change_root(pr->pr_root, td); 266150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 267113275Smike 26884828Sjhb newcred = crget(); 26984828Sjhb PROC_LOCK(p); 27084828Sjhb oldcred = p->p_ucred; 271113275Smike setsugid(p); 27284828Sjhb crcopy(newcred, oldcred); 273113630Sjhb newcred->cr_prison = pr; 27484828Sjhb p->p_ucred = newcred; 27584828Sjhb PROC_UNLOCK(p); 27684828Sjhb crfree(oldcred); 27746155Sphk return (0); 278113275Smikee_unlock: 279113275Smike VOP_UNLOCK(pr->pr_root, 0, td); 280150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 281113275Smike mtx_lock(&pr->pr_mtx); 282113275Smike pr->pr_ref--; 283113275Smike mtx_unlock(&pr->pr_mtx); 28446155Sphk return (error); 28546155Sphk} 28646155Sphk 287113275Smike/* 288113275Smike * Returns a locked prison instance, or NULL on failure. 289113275Smike */ 290168399Spjdstruct prison * 291113275Smikeprison_find(int prid) 292113275Smike{ 293113275Smike struct prison *pr; 294113275Smike 295168401Spjd sx_assert(&allprison_lock, SX_LOCKED); 296113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 297113275Smike if (pr->pr_id == prid) { 298113275Smike mtx_lock(&pr->pr_mtx); 299113275Smike return (pr); 300113275Smike } 301113275Smike } 302113275Smike return (NULL); 303113275Smike} 304113275Smike 30572786Srwatsonvoid 30672786Srwatsonprison_free(struct prison *pr) 30772786Srwatson{ 308168401Spjd struct prison_service *psrv; 30972786Srwatson 310168401Spjd sx_xlock(&allprison_lock); 31187275Srwatson mtx_lock(&pr->pr_mtx); 31272786Srwatson pr->pr_ref--; 31372786Srwatson if (pr->pr_ref == 0) { 314113275Smike LIST_REMOVE(pr, pr_list); 31587275Srwatson mtx_unlock(&pr->pr_mtx); 316113275Smike prisoncount--; 317168401Spjd sx_downgrade(&allprison_lock); 318168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 319168401Spjd psrv->ps_destroy(psrv, pr); 320168401Spjd } 321168401Spjd sx_sunlock(&allprison_lock); 322124882Srwatson 323124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 324144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 32587275Srwatson return; 32672786Srwatson } 32787275Srwatson mtx_unlock(&pr->pr_mtx); 328168401Spjd sx_xunlock(&allprison_lock); 32972786Srwatson} 33072786Srwatson 331124882Srwatsonstatic void 332124882Srwatsonprison_complete(void *context, int pending) 333124882Srwatson{ 334124882Srwatson struct prison *pr; 335150652Scsjp int vfslocked; 336124882Srwatson 337124882Srwatson pr = (struct prison *)context; 338124882Srwatson 339150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 340124882Srwatson vrele(pr->pr_root); 341150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 342124882Srwatson 343124882Srwatson mtx_destroy(&pr->pr_mtx); 344124882Srwatson if (pr->pr_linux != NULL) 345124882Srwatson FREE(pr->pr_linux, M_PRISON); 346124882Srwatson FREE(pr, M_PRISON); 347124882Srwatson} 348124882Srwatson 34972786Srwatsonvoid 35072786Srwatsonprison_hold(struct prison *pr) 35172786Srwatson{ 35272786Srwatson 35387275Srwatson mtx_lock(&pr->pr_mtx); 35472786Srwatson pr->pr_ref++; 35587275Srwatson mtx_unlock(&pr->pr_mtx); 35672786Srwatson} 35772786Srwatson 35887275Srwatsonu_int32_t 35987275Srwatsonprison_getip(struct ucred *cred) 36087275Srwatson{ 36187275Srwatson 36287275Srwatson return (cred->cr_prison->pr_ip); 36387275Srwatson} 36487275Srwatson 36546155Sphkint 36672786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip) 36746155Sphk{ 36846155Sphk u_int32_t tmp; 36946155Sphk 37072786Srwatson if (!jailed(cred)) 37146155Sphk return (0); 372167309Spjd if (flag) 37346155Sphk tmp = *ip; 37446155Sphk else 37546155Sphk tmp = ntohl(*ip); 37646155Sphk if (tmp == INADDR_ANY) { 377167309Spjd if (flag) 37872786Srwatson *ip = cred->cr_prison->pr_ip; 37946155Sphk else 38072786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 38146155Sphk return (0); 38246155Sphk } 38381114Srwatson if (tmp == INADDR_LOOPBACK) { 38481114Srwatson if (flag) 38581114Srwatson *ip = cred->cr_prison->pr_ip; 38681114Srwatson else 38781114Srwatson *ip = htonl(cred->cr_prison->pr_ip); 38881114Srwatson return (0); 38981114Srwatson } 39072786Srwatson if (cred->cr_prison->pr_ip != tmp) 39146155Sphk return (1); 39246155Sphk return (0); 39346155Sphk} 39446155Sphk 39546155Sphkvoid 39672786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 39746155Sphk{ 39846155Sphk u_int32_t tmp; 39946155Sphk 40072786Srwatson if (!jailed(cred)) 40146155Sphk return; 40246155Sphk if (flag) 40346155Sphk tmp = *ip; 40446155Sphk else 40546155Sphk tmp = ntohl(*ip); 40681114Srwatson if (tmp == INADDR_LOOPBACK) { 40746155Sphk if (flag) 40872786Srwatson *ip = cred->cr_prison->pr_ip; 40946155Sphk else 41072786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 41146155Sphk return; 41246155Sphk } 41346155Sphk return; 41446155Sphk} 41546155Sphk 41646155Sphkint 41772786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 41846155Sphk{ 419114168Smike struct sockaddr_in *sai; 42046155Sphk int ok; 42146155Sphk 422114168Smike sai = (struct sockaddr_in *)sa; 42361235Srwatson if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 42461235Srwatson ok = 1; 42561235Srwatson else if (sai->sin_family != AF_INET) 42646155Sphk ok = 0; 42772786Srwatson else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 42846155Sphk ok = 1; 42946155Sphk else 43046155Sphk ok = 0; 43146155Sphk return (ok); 43246155Sphk} 43372786Srwatson 43472786Srwatson/* 43572786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 43672786Srwatson */ 43772786Srwatsonint 438114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 43972786Srwatson{ 44072786Srwatson 44172786Srwatson if (jailed(cred1)) { 44272786Srwatson if (!jailed(cred2)) 44372786Srwatson return (ESRCH); 44472786Srwatson if (cred2->cr_prison != cred1->cr_prison) 44572786Srwatson return (ESRCH); 44672786Srwatson } 44772786Srwatson 44872786Srwatson return (0); 44972786Srwatson} 45072786Srwatson 45172786Srwatson/* 45272786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 45372786Srwatson */ 45472786Srwatsonint 455114168Smikejailed(struct ucred *cred) 45672786Srwatson{ 45772786Srwatson 45872786Srwatson return (cred->cr_prison != NULL); 45972786Srwatson} 46091384Srobert 46191384Srobert/* 46291384Srobert * Return the correct hostname for the passed credential. 46391384Srobert */ 46491391Srobertvoid 465114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 46691384Srobert{ 46791384Srobert 46891391Srobert if (jailed(cred)) { 46991391Srobert mtx_lock(&cred->cr_prison->pr_mtx); 470105354Srobert strlcpy(buf, cred->cr_prison->pr_host, size); 47191391Srobert mtx_unlock(&cred->cr_prison->pr_mtx); 472114168Smike } else 473105354Srobert strlcpy(buf, hostname, size); 47491384Srobert} 475113275Smike 476125804Srwatson/* 477147185Spjd * Determine whether the subject represented by cred can "see" 478147185Spjd * status of a mount point. 479147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 480147185Spjd * XXX: This function should be called cr_canseemount() and should be 481147185Spjd * placed in kern_prot.c. 482125804Srwatson */ 483125804Srwatsonint 484147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 485125804Srwatson{ 486147185Spjd struct prison *pr; 487147185Spjd struct statfs *sp; 488147185Spjd size_t len; 489125804Srwatson 490147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 491147185Spjd return (0); 492147185Spjd pr = cred->cr_prison; 493147185Spjd if (pr->pr_root->v_mount == mp) 494147185Spjd return (0); 495147185Spjd if (jail_enforce_statfs == 2) 496147185Spjd return (ENOENT); 497147185Spjd /* 498147185Spjd * If jail's chroot directory is set to "/" we should be able to see 499147185Spjd * all mount-points from inside a jail. 500147185Spjd * This is ugly check, but this is the only situation when jail's 501147185Spjd * directory ends with '/'. 502147185Spjd */ 503147185Spjd if (strcmp(pr->pr_path, "/") == 0) 504147185Spjd return (0); 505147185Spjd len = strlen(pr->pr_path); 506147185Spjd sp = &mp->mnt_stat; 507147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 508147185Spjd return (ENOENT); 509147185Spjd /* 510147185Spjd * Be sure that we don't have situation where jail's root directory 511147185Spjd * is "/some/path" and mount point is "/some/pathpath". 512147185Spjd */ 513147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 514147185Spjd return (ENOENT); 515147185Spjd return (0); 516147185Spjd} 517147185Spjd 518147185Spjdvoid 519147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 520147185Spjd{ 521147185Spjd char jpath[MAXPATHLEN]; 522147185Spjd struct prison *pr; 523147185Spjd size_t len; 524147185Spjd 525147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 526147185Spjd return; 527147185Spjd pr = cred->cr_prison; 528147185Spjd if (prison_canseemount(cred, mp) != 0) { 529147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 530147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 531147185Spjd sizeof(sp->f_mntonname)); 532147185Spjd return; 533125804Srwatson } 534147185Spjd if (pr->pr_root->v_mount == mp) { 535147185Spjd /* 536147185Spjd * Clear current buffer data, so we are sure nothing from 537147185Spjd * the valid path left there. 538147185Spjd */ 539147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 540147185Spjd *sp->f_mntonname = '/'; 541147185Spjd return; 542147185Spjd } 543147185Spjd /* 544147185Spjd * If jail's chroot directory is set to "/" we should be able to see 545147185Spjd * all mount-points from inside a jail. 546147185Spjd */ 547147185Spjd if (strcmp(pr->pr_path, "/") == 0) 548147185Spjd return; 549147185Spjd len = strlen(pr->pr_path); 550147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 551147185Spjd /* 552147185Spjd * Clear current buffer data, so we are sure nothing from 553147185Spjd * the valid path left there. 554147185Spjd */ 555147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 556147185Spjd if (*jpath == '\0') { 557147185Spjd /* Should never happen. */ 558147185Spjd *sp->f_mntonname = '/'; 559147185Spjd } else { 560147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 561147185Spjd } 562125804Srwatson} 563125804Srwatson 564164032Srwatson/* 565164032Srwatson * Check with permission for a specific privilege is granted within jail. We 566164032Srwatson * have a specific list of accepted privileges; the rest are denied. 567164032Srwatson */ 568164032Srwatsonint 569164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 570164032Srwatson{ 571164032Srwatson 572164032Srwatson if (!jailed(cred)) 573164032Srwatson return (0); 574164032Srwatson 575164032Srwatson switch (priv) { 576164032Srwatson 577164032Srwatson /* 578164032Srwatson * Allow ktrace privileges for root in jail. 579164032Srwatson */ 580164032Srwatson case PRIV_KTRACE: 581164032Srwatson 582166827Srwatson#if 0 583164032Srwatson /* 584164032Srwatson * Allow jailed processes to configure audit identity and 585164032Srwatson * submit audit records (login, etc). In the future we may 586164032Srwatson * want to further refine the relationship between audit and 587164032Srwatson * jail. 588164032Srwatson */ 589164032Srwatson case PRIV_AUDIT_GETAUDIT: 590164032Srwatson case PRIV_AUDIT_SETAUDIT: 591164032Srwatson case PRIV_AUDIT_SUBMIT: 592166827Srwatson#endif 593164032Srwatson 594164032Srwatson /* 595164032Srwatson * Allow jailed processes to manipulate process UNIX 596164032Srwatson * credentials in any way they see fit. 597164032Srwatson */ 598164032Srwatson case PRIV_CRED_SETUID: 599164032Srwatson case PRIV_CRED_SETEUID: 600164032Srwatson case PRIV_CRED_SETGID: 601164032Srwatson case PRIV_CRED_SETEGID: 602164032Srwatson case PRIV_CRED_SETGROUPS: 603164032Srwatson case PRIV_CRED_SETREUID: 604164032Srwatson case PRIV_CRED_SETREGID: 605164032Srwatson case PRIV_CRED_SETRESUID: 606164032Srwatson case PRIV_CRED_SETRESGID: 607164032Srwatson 608164032Srwatson /* 609164032Srwatson * Jail implements visibility constraints already, so allow 610164032Srwatson * jailed root to override uid/gid-based constraints. 611164032Srwatson */ 612164032Srwatson case PRIV_SEEOTHERGIDS: 613164032Srwatson case PRIV_SEEOTHERUIDS: 614164032Srwatson 615164032Srwatson /* 616164032Srwatson * Jail implements inter-process debugging limits already, so 617164032Srwatson * allow jailed root various debugging privileges. 618164032Srwatson */ 619164032Srwatson case PRIV_DEBUG_DIFFCRED: 620164032Srwatson case PRIV_DEBUG_SUGID: 621164032Srwatson case PRIV_DEBUG_UNPRIV: 622164032Srwatson 623164032Srwatson /* 624164032Srwatson * Allow jail to set various resource limits and login 625164032Srwatson * properties, and for now, exceed process resource limits. 626164032Srwatson */ 627164032Srwatson case PRIV_PROC_LIMIT: 628164032Srwatson case PRIV_PROC_SETLOGIN: 629164032Srwatson case PRIV_PROC_SETRLIMIT: 630164032Srwatson 631164032Srwatson /* 632164032Srwatson * System V and POSIX IPC privileges are granted in jail. 633164032Srwatson */ 634164032Srwatson case PRIV_IPC_READ: 635164032Srwatson case PRIV_IPC_WRITE: 636164032Srwatson case PRIV_IPC_ADMIN: 637164032Srwatson case PRIV_IPC_MSGSIZE: 638164032Srwatson case PRIV_MQ_ADMIN: 639164032Srwatson 640164032Srwatson /* 641164032Srwatson * Jail implements its own inter-process limits, so allow 642164032Srwatson * root processes in jail to change scheduling on other 643164032Srwatson * processes in the same jail. Likewise for signalling. 644164032Srwatson */ 645164032Srwatson case PRIV_SCHED_DIFFCRED: 646164032Srwatson case PRIV_SIGNAL_DIFFCRED: 647164032Srwatson case PRIV_SIGNAL_SUGID: 648164032Srwatson 649164032Srwatson /* 650164032Srwatson * Allow jailed processes to write to sysctls marked as jail 651164032Srwatson * writable. 652164032Srwatson */ 653164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 654164032Srwatson 655164032Srwatson /* 656164032Srwatson * Allow root in jail to manage a variety of quota 657166831Srwatson * properties. These should likely be conditional on a 658166831Srwatson * configuration option. 659164032Srwatson */ 660166832Srwatson case PRIV_VFS_GETQUOTA: 661166832Srwatson case PRIV_VFS_SETQUOTA: 662164032Srwatson 663164032Srwatson /* 664164032Srwatson * Since Jail relies on chroot() to implement file system 665164032Srwatson * protections, grant many VFS privileges to root in jail. 666164032Srwatson * Be careful to exclude mount-related and NFS-related 667164032Srwatson * privileges. 668164032Srwatson */ 669164032Srwatson case PRIV_VFS_READ: 670164032Srwatson case PRIV_VFS_WRITE: 671164032Srwatson case PRIV_VFS_ADMIN: 672164032Srwatson case PRIV_VFS_EXEC: 673164032Srwatson case PRIV_VFS_LOOKUP: 674164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 675164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 676164032Srwatson case PRIV_VFS_CHOWN: 677164032Srwatson case PRIV_VFS_CHROOT: 678167152Spjd case PRIV_VFS_RETAINSUGID: 679164032Srwatson case PRIV_VFS_FCHROOT: 680164032Srwatson case PRIV_VFS_LINK: 681164032Srwatson case PRIV_VFS_SETGID: 682164032Srwatson case PRIV_VFS_STICKYFILE: 683164032Srwatson return (0); 684164032Srwatson 685164032Srwatson /* 686164032Srwatson * Depending on the global setting, allow privilege of 687164032Srwatson * setting system flags. 688164032Srwatson */ 689164032Srwatson case PRIV_VFS_SYSFLAGS: 690164032Srwatson if (jail_chflags_allowed) 691164032Srwatson return (0); 692164032Srwatson else 693164032Srwatson return (EPERM); 694164032Srwatson 695164032Srwatson /* 696168396Spjd * Depending on the global setting, allow privilege of 697168396Spjd * mounting/unmounting file systems. 698168396Spjd */ 699168396Spjd case PRIV_VFS_MOUNT: 700168396Spjd case PRIV_VFS_UNMOUNT: 701168396Spjd case PRIV_VFS_MOUNT_NONUSER: 702168396Spjd if (jail_mount_allowed) 703168396Spjd return (0); 704168396Spjd else 705168396Spjd return (EPERM); 706168396Spjd 707168396Spjd /* 708164032Srwatson * Allow jailed root to bind reserved ports. 709164032Srwatson */ 710164032Srwatson case PRIV_NETINET_RESERVEDPORT: 711164032Srwatson return (0); 712164032Srwatson 713164032Srwatson /* 714164032Srwatson * Conditionally allow creating raw sockets in jail. 715164032Srwatson */ 716164032Srwatson case PRIV_NETINET_RAW: 717164032Srwatson if (jail_allow_raw_sockets) 718164032Srwatson return (0); 719164032Srwatson else 720164032Srwatson return (EPERM); 721164032Srwatson 722164032Srwatson /* 723164032Srwatson * Since jail implements its own visibility limits on netstat 724164032Srwatson * sysctls, allow getcred. This allows identd to work in 725164032Srwatson * jail. 726164032Srwatson */ 727164032Srwatson case PRIV_NETINET_GETCRED: 728164032Srwatson return (0); 729164032Srwatson 730164032Srwatson default: 731164032Srwatson /* 732164032Srwatson * In all remaining cases, deny the privilege request. This 733164032Srwatson * includes almost all network privileges, many system 734164032Srwatson * configuration privileges. 735164032Srwatson */ 736164032Srwatson return (EPERM); 737164032Srwatson } 738164032Srwatson} 739164032Srwatson 740168401Spjd/* 741168401Spjd * Register jail service. Provides 'create' and 'destroy' methods. 742168401Spjd * 'create' method will be called for every existing jail and all 743168401Spjd * jails in the future as they beeing created. 744168401Spjd * 'destroy' method will be called for every jail going away and 745168401Spjd * for all existing jails at the time of service deregistration. 746168401Spjd */ 747168401Spjdstruct prison_service * 748168401Spjdprison_service_register(const char *name, prison_create_t create, 749168401Spjd prison_destroy_t destroy) 750168401Spjd{ 751168401Spjd struct prison_service *psrv, *psrv2; 752168401Spjd struct prison *pr; 753168401Spjd int reallocate = 1, slotno = 0; 754168401Spjd void **slots, **oldslots; 755168401Spjd 756168401Spjd psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 757168401Spjd M_WAITOK | M_ZERO); 758168401Spjd psrv->ps_create = create; 759168401Spjd psrv->ps_destroy = destroy; 760168401Spjd strcpy(psrv->ps_name, name); 761168401Spjd /* 762168401Spjd * Grab the allprison_lock here, so we won't miss any jail 763168401Spjd * creation/destruction. 764168401Spjd */ 765168401Spjd sx_xlock(&allprison_lock); 766168401Spjd#ifdef INVARIANTS 767168401Spjd /* 768168401Spjd * Verify if service is not already registered. 769168401Spjd */ 770168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 771168401Spjd KASSERT(strcmp(psrv2->ps_name, name) != 0, 772168401Spjd ("jail service %s already registered", name)); 773168401Spjd } 774168401Spjd#endif 775168401Spjd /* 776168401Spjd * Find free slot. When there is no existing free slot available, 777168401Spjd * allocate one at the end. 778168401Spjd */ 779168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 780168401Spjd if (psrv2->ps_slotno != slotno) { 781168401Spjd KASSERT(slotno < psrv2->ps_slotno, 782168401Spjd ("Invalid slotno (slotno=%d >= ps_slotno=%d", 783168401Spjd slotno, psrv2->ps_slotno)); 784168401Spjd /* We found free slot. */ 785168401Spjd reallocate = 0; 786168401Spjd break; 787168401Spjd } 788168401Spjd slotno++; 789168401Spjd } 790168401Spjd psrv->ps_slotno = slotno; 791168401Spjd /* 792168401Spjd * Keep the list sorted by slot number. 793168401Spjd */ 794168401Spjd if (psrv2 != NULL) { 795168401Spjd KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 796168401Spjd TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 797168401Spjd } else { 798168401Spjd KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 799168401Spjd TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 800168401Spjd } 801168401Spjd prison_service_slots++; 802168401Spjd sx_downgrade(&allprison_lock); 803168401Spjd /* 804168401Spjd * Allocate memory for new slot if we didn't found empty one. 805168401Spjd * Do not use realloc(9), because pr_slots is protected with a mutex, 806168401Spjd * so we can't sleep. 807168401Spjd */ 808168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 809168401Spjd if (reallocate) { 810168401Spjd /* First allocate memory with M_WAITOK. */ 811168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 812168401Spjd M_PRISON, M_WAITOK); 813168401Spjd /* Now grab the mutex and replace pr_slots. */ 814168401Spjd mtx_lock(&pr->pr_mtx); 815168401Spjd oldslots = pr->pr_slots; 816168401Spjd if (psrv->ps_slotno > 0) { 817168401Spjd bcopy(oldslots, slots, 818168401Spjd sizeof(*slots) * (prison_service_slots - 1)); 819168401Spjd } 820168401Spjd slots[psrv->ps_slotno] = NULL; 821168401Spjd pr->pr_slots = slots; 822168401Spjd mtx_unlock(&pr->pr_mtx); 823168401Spjd if (oldslots != NULL) 824168401Spjd free(oldslots, M_PRISON); 825168401Spjd } 826168401Spjd /* 827168401Spjd * Call 'create' method for each existing jail. 828168401Spjd */ 829168401Spjd psrv->ps_create(psrv, pr); 830168401Spjd } 831168401Spjd sx_sunlock(&allprison_lock); 832168401Spjd 833168401Spjd return (psrv); 834168401Spjd} 835168401Spjd 836168401Spjdvoid 837168401Spjdprison_service_deregister(struct prison_service *psrv) 838168401Spjd{ 839168401Spjd struct prison *pr; 840168401Spjd void **slots, **oldslots; 841168401Spjd int last = 0; 842168401Spjd 843168401Spjd sx_xlock(&allprison_lock); 844168401Spjd if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 845168401Spjd last = 1; 846168401Spjd TAILQ_REMOVE(&prison_services, psrv, ps_next); 847168401Spjd prison_service_slots--; 848168401Spjd sx_downgrade(&allprison_lock); 849168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 850168401Spjd /* 851168401Spjd * Call 'destroy' method for every currently existing jail. 852168401Spjd */ 853168401Spjd psrv->ps_destroy(psrv, pr); 854168401Spjd /* 855168401Spjd * If this is the last slot, free the memory allocated for it. 856168401Spjd */ 857168401Spjd if (last) { 858168401Spjd if (prison_service_slots == 0) 859168401Spjd slots = NULL; 860168401Spjd else { 861168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 862168401Spjd M_PRISON, M_WAITOK); 863168401Spjd } 864168401Spjd mtx_lock(&pr->pr_mtx); 865168401Spjd oldslots = pr->pr_slots; 866168401Spjd /* 867168401Spjd * We require setting slot to NULL after freeing it, 868168401Spjd * this way we can check for memory leaks here. 869168401Spjd */ 870168401Spjd KASSERT(oldslots[psrv->ps_slotno] == NULL, 871168401Spjd ("Slot %d (service %s, jailid=%d) still contains data?", 872168401Spjd psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 873168401Spjd if (psrv->ps_slotno > 0) { 874168401Spjd bcopy(oldslots, slots, 875168401Spjd sizeof(*slots) * prison_service_slots); 876168401Spjd } 877168401Spjd pr->pr_slots = slots; 878168401Spjd mtx_unlock(&pr->pr_mtx); 879168401Spjd KASSERT(oldslots != NULL, ("oldslots == NULL")); 880168401Spjd free(oldslots, M_PRISON); 881168401Spjd } 882168401Spjd } 883168401Spjd sx_sunlock(&allprison_lock); 884168401Spjd free(psrv, M_PRISON); 885168401Spjd} 886168401Spjd 887168401Spjd/* 888168401Spjd * Function sets data for the given jail in slot assigned for the given 889168401Spjd * jail service. 890168401Spjd */ 891168401Spjdvoid 892168401Spjdprison_service_data_set(struct prison_service *psrv, struct prison *pr, 893168401Spjd void *data) 894168401Spjd{ 895168401Spjd 896168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 897168401Spjd pr->pr_slots[psrv->ps_slotno] = data; 898168401Spjd} 899168401Spjd 900168401Spjd/* 901168401Spjd * Function clears slots assigned for the given jail service in the given 902168401Spjd * prison structure and returns current slot data. 903168401Spjd */ 904168401Spjdvoid * 905168401Spjdprison_service_data_del(struct prison_service *psrv, struct prison *pr) 906168401Spjd{ 907168401Spjd void *data; 908168401Spjd 909168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 910168401Spjd data = pr->pr_slots[psrv->ps_slotno]; 911168401Spjd pr->pr_slots[psrv->ps_slotno] = NULL; 912168401Spjd return (data); 913168401Spjd} 914168401Spjd 915168401Spjd/* 916168401Spjd * Function returns current data from the slot assigned to the given jail 917168401Spjd * service for the given jail. 918168401Spjd */ 919168401Spjdvoid * 920168401Spjdprison_service_data_get(struct prison_service *psrv, struct prison *pr) 921168401Spjd{ 922168401Spjd 923168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 924168401Spjd return (pr->pr_slots[psrv->ps_slotno]); 925168401Spjd} 926168401Spjd 927113275Smikestatic int 928113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 929113275Smike{ 930113275Smike struct xprison *xp, *sxp; 931113275Smike struct prison *pr; 932113275Smike int count, error; 933113275Smike 934127020Spjd if (jailed(req->td->td_ucred)) 935125806Srwatson return (0); 936113275Smike 937168401Spjd sx_slock(&allprison_lock); 938168401Spjd if ((count = prisoncount) == 0) { 939168401Spjd sx_sunlock(&allprison_lock); 940113275Smike return (0); 941168401Spjd } 942113275Smike 943113275Smike sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 944167309Spjd 945113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 946113275Smike mtx_lock(&pr->pr_mtx); 947113275Smike xp->pr_version = XPRISON_VERSION; 948113275Smike xp->pr_id = pr->pr_id; 949113275Smike strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 950113275Smike strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 951113275Smike xp->pr_ip = pr->pr_ip; 952113275Smike mtx_unlock(&pr->pr_mtx); 953113275Smike xp++; 954113275Smike } 955168401Spjd sx_sunlock(&allprison_lock); 956113275Smike 957113275Smike error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 958113275Smike free(sxp, M_TEMP); 959167354Spjd return (error); 960113275Smike} 961113275Smike 962113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 963113275Smike NULL, 0, sysctl_jail_list, "S", "List of active jails"); 964126004Spjd 965126004Spjdstatic int 966126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 967126004Spjd{ 968126004Spjd int error, injail; 969126004Spjd 970126004Spjd injail = jailed(req->td->td_ucred); 971126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 972126004Spjd 973126004Spjd return (error); 974126004Spjd} 975126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 976126004Spjd NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 977