kern_jail.c revision 175630
1139804Simp/*- 246197Sphk * ---------------------------------------------------------------------------- 346197Sphk * "THE BEER-WARE LICENSE" (Revision 42): 446197Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think 646197Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 746197Sphk * ---------------------------------------------------------------------------- 846197Sphk */ 946155Sphk 10116182Sobrien#include <sys/cdefs.h> 11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 175630 2008-01-24 08:25:59Z bz $"); 12116182Sobrien 13131177Spjd#include "opt_mac.h" 14131177Spjd 1546155Sphk#include <sys/param.h> 1646155Sphk#include <sys/types.h> 1746155Sphk#include <sys/kernel.h> 1846155Sphk#include <sys/systm.h> 1946155Sphk#include <sys/errno.h> 2046155Sphk#include <sys/sysproto.h> 2146155Sphk#include <sys/malloc.h> 22164032Srwatson#include <sys/priv.h> 2346155Sphk#include <sys/proc.h> 24124882Srwatson#include <sys/taskqueue.h> 2546155Sphk#include <sys/jail.h> 2687275Srwatson#include <sys/lock.h> 2787275Srwatson#include <sys/mutex.h> 28168401Spjd#include <sys/sx.h> 29113275Smike#include <sys/namei.h> 30147185Spjd#include <sys/mount.h> 31113275Smike#include <sys/queue.h> 3246155Sphk#include <sys/socket.h> 33113275Smike#include <sys/syscallsubr.h> 3457163Srwatson#include <sys/sysctl.h> 35113275Smike#include <sys/vnode.h> 3646155Sphk#include <net/if.h> 3746155Sphk#include <netinet/in.h> 3846155Sphk 39163606Srwatson#include <security/mac/mac_framework.h> 40163606Srwatson 4146155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 4246155Sphk 4389414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4457163Srwatson "Jail rules"); 4557163Srwatson 4657163Srwatsonint jail_set_hostname_allowed = 1; 4789414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 4857163Srwatson &jail_set_hostname_allowed, 0, 4957163Srwatson "Processes in jail can set their hostnames"); 5057163Srwatson 5161235Srwatsonint jail_socket_unixiproute_only = 1; 5289414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 5361235Srwatson &jail_socket_unixiproute_only, 0, 5461235Srwatson "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 5561235Srwatson 5668024Srwatsonint jail_sysvipc_allowed = 0; 5789414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 5868024Srwatson &jail_sysvipc_allowed, 0, 5968024Srwatson "Processes in jail can use System V IPC primitives"); 6068024Srwatson 61147185Spjdstatic int jail_enforce_statfs = 2; 62147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 63147185Spjd &jail_enforce_statfs, 0, 64147185Spjd "Processes in jail cannot see all mounted file systems"); 65125804Srwatson 66128664Sbmilekicint jail_allow_raw_sockets = 0; 67128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 68128664Sbmilekic &jail_allow_raw_sockets, 0, 69128664Sbmilekic "Prison root can create raw sockets"); 70128664Sbmilekic 71141543Scpercivaint jail_chflags_allowed = 0; 72141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 73141543Scperciva &jail_chflags_allowed, 0, 74141543Scperciva "Processes in jail can alter system file flags"); 75141543Scperciva 76168396Spjdint jail_mount_allowed = 0; 77168396SpjdSYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 78168396Spjd &jail_mount_allowed, 0, 79168396Spjd "Processes in jail can mount/unmount jail-friendly file systems"); 80168396Spjd 81168401Spjd/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 82113275Smikestruct prisonlist allprison; 83168401Spjdstruct sx allprison_lock; 84113275Smikeint lastprid = 0; 85113275Smikeint prisoncount = 0; 86113275Smike 87168401Spjd/* 88168401Spjd * List of jail services. Protected by allprison_lock. 89168401Spjd */ 90168401SpjdTAILQ_HEAD(prison_services_head, prison_service); 91168401Spjdstatic struct prison_services_head prison_services = 92168401Spjd TAILQ_HEAD_INITIALIZER(prison_services); 93168401Spjdstatic int prison_service_slots = 0; 94168401Spjd 95168401Spjdstruct prison_service { 96168401Spjd prison_create_t ps_create; 97168401Spjd prison_destroy_t ps_destroy; 98168401Spjd int ps_slotno; 99168401Spjd TAILQ_ENTRY(prison_service) ps_next; 100168401Spjd char ps_name[0]; 101168401Spjd}; 102168401Spjd 103113275Smikestatic void init_prison(void *); 104124882Srwatsonstatic void prison_complete(void *context, int pending); 105113275Smikestatic int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 106113275Smike 107113275Smikestatic void 108113275Smikeinit_prison(void *data __unused) 109113275Smike{ 110113275Smike 111168401Spjd sx_init(&allprison_lock, "allprison"); 112113275Smike LIST_INIT(&allprison); 113113275Smike} 114113275Smike 115113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 116113275Smike 11782710Sdillon/* 118114168Smike * struct jail_args { 119114168Smike * struct jail *jail; 120114168Smike * }; 12182710Sdillon */ 12246155Sphkint 123114168Smikejail(struct thread *td, struct jail_args *uap) 12446155Sphk{ 125113275Smike struct nameidata nd; 126113275Smike struct prison *pr, *tpr; 127168401Spjd struct prison_service *psrv; 12846155Sphk struct jail j; 129113275Smike struct jail_attach_args jaa; 130150652Scsjp int vfslocked, error, tryprid; 13146155Sphk 132114168Smike error = copyin(uap->jail, &j, sizeof(j)); 13346155Sphk if (error) 13484828Sjhb return (error); 13584828Sjhb if (j.version != 0) 13684828Sjhb return (EINVAL); 13784828Sjhb 138114168Smike MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 13993818Sjhb mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 140113275Smike pr->pr_ref = 1; 141114168Smike error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 142113275Smike if (error) 143113275Smike goto e_killmtx; 144150652Scsjp NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 145150652Scsjp pr->pr_path, td); 146113275Smike error = namei(&nd); 147150652Scsjp if (error) 148113275Smike goto e_killmtx; 149150652Scsjp vfslocked = NDHASGIANT(&nd); 150113275Smike pr->pr_root = nd.ni_vp; 151175294Sattilio VOP_UNLOCK(nd.ni_vp, 0); 152113275Smike NDFREE(&nd, NDF_ONLY_PNBUF); 153150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 154114168Smike error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 15584828Sjhb if (error) 156113275Smike goto e_dropvnref; 157113275Smike pr->pr_ip = j.ip_number; 158113275Smike pr->pr_linux = NULL; 159113275Smike pr->pr_securelevel = securelevel; 160168401Spjd if (prison_service_slots == 0) 161168401Spjd pr->pr_slots = NULL; 162168401Spjd else { 163168401Spjd pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 164168401Spjd M_PRISON, M_ZERO | M_WAITOK); 165168401Spjd } 166113275Smike 167113275Smike /* Determine next pr_id and add prison to allprison list. */ 168168401Spjd sx_xlock(&allprison_lock); 169113275Smike tryprid = lastprid + 1; 170113275Smike if (tryprid == JAIL_MAX) 171113275Smike tryprid = 1; 172113275Smikenext: 173113275Smike LIST_FOREACH(tpr, &allprison, pr_list) { 174113275Smike if (tpr->pr_id == tryprid) { 175113275Smike tryprid++; 176113275Smike if (tryprid == JAIL_MAX) { 177168401Spjd sx_xunlock(&allprison_lock); 178113275Smike error = EAGAIN; 179113275Smike goto e_dropvnref; 180113275Smike } 181113275Smike goto next; 182113275Smike } 183113275Smike } 184113275Smike pr->pr_id = jaa.jid = lastprid = tryprid; 185113275Smike LIST_INSERT_HEAD(&allprison, pr, pr_list); 186113275Smike prisoncount++; 187168401Spjd sx_downgrade(&allprison_lock); 188168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 189168401Spjd psrv->ps_create(psrv, pr); 190168401Spjd } 191168401Spjd sx_sunlock(&allprison_lock); 192113275Smike 193113275Smike error = jail_attach(td, &jaa); 194113275Smike if (error) 195113275Smike goto e_dropprref; 196113275Smike mtx_lock(&pr->pr_mtx); 197113275Smike pr->pr_ref--; 198113275Smike mtx_unlock(&pr->pr_mtx); 199113275Smike td->td_retval[0] = jaa.jid; 200113275Smike return (0); 201113275Smikee_dropprref: 202168401Spjd sx_xlock(&allprison_lock); 203113275Smike LIST_REMOVE(pr, pr_list); 204113275Smike prisoncount--; 205168401Spjd sx_downgrade(&allprison_lock); 206168401Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 207168401Spjd psrv->ps_destroy(psrv, pr); 208168401Spjd } 209168401Spjd sx_sunlock(&allprison_lock); 210113275Smikee_dropvnref: 211150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 212113275Smike vrele(pr->pr_root); 213150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 214113275Smikee_killmtx: 215113275Smike mtx_destroy(&pr->pr_mtx); 216113275Smike FREE(pr, M_PRISON); 217113275Smike return (error); 218113275Smike} 219113275Smike 220113275Smike/* 221114168Smike * struct jail_attach_args { 222114168Smike * int jid; 223114168Smike * }; 224113275Smike */ 225113275Smikeint 226114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap) 227113275Smike{ 228113275Smike struct proc *p; 229113275Smike struct ucred *newcred, *oldcred; 230113275Smike struct prison *pr; 231150652Scsjp int vfslocked, error; 232167309Spjd 233126023Snectar /* 234126023Snectar * XXX: Note that there is a slight race here if two threads 235126023Snectar * in the same privileged process attempt to attach to two 236126023Snectar * different jails at the same time. It is important for 237126023Snectar * user processes not to do this, or they might end up with 238126023Snectar * a process root from one prison, but attached to the jail 239126023Snectar * of another. 240126023Snectar */ 241164032Srwatson error = priv_check(td, PRIV_JAIL_ATTACH); 242126023Snectar if (error) 243126023Snectar return (error); 244126023Snectar 245113275Smike p = td->td_proc; 246168401Spjd sx_slock(&allprison_lock); 247113275Smike pr = prison_find(uap->jid); 248113275Smike if (pr == NULL) { 249168401Spjd sx_sunlock(&allprison_lock); 250113275Smike return (EINVAL); 251113275Smike } 252113275Smike pr->pr_ref++; 253113275Smike mtx_unlock(&pr->pr_mtx); 254168401Spjd sx_sunlock(&allprison_lock); 255113275Smike 256150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 257175202Sattilio vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 258113275Smike if ((error = change_dir(pr->pr_root, td)) != 0) 259113275Smike goto e_unlock; 260113275Smike#ifdef MAC 261172930Srwatson if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 262113275Smike goto e_unlock; 263113275Smike#endif 264175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 265113275Smike change_root(pr->pr_root, td); 266150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 267113275Smike 26884828Sjhb newcred = crget(); 26984828Sjhb PROC_LOCK(p); 27084828Sjhb oldcred = p->p_ucred; 271113275Smike setsugid(p); 27284828Sjhb crcopy(newcred, oldcred); 273113630Sjhb newcred->cr_prison = pr; 27484828Sjhb p->p_ucred = newcred; 27584828Sjhb PROC_UNLOCK(p); 27684828Sjhb crfree(oldcred); 27746155Sphk return (0); 278113275Smikee_unlock: 279175294Sattilio VOP_UNLOCK(pr->pr_root, 0); 280150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 281113275Smike mtx_lock(&pr->pr_mtx); 282113275Smike pr->pr_ref--; 283113275Smike mtx_unlock(&pr->pr_mtx); 28446155Sphk return (error); 28546155Sphk} 28646155Sphk 287113275Smike/* 288113275Smike * Returns a locked prison instance, or NULL on failure. 289113275Smike */ 290168399Spjdstruct prison * 291113275Smikeprison_find(int prid) 292113275Smike{ 293113275Smike struct prison *pr; 294113275Smike 295168401Spjd sx_assert(&allprison_lock, SX_LOCKED); 296113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 297113275Smike if (pr->pr_id == prid) { 298113275Smike mtx_lock(&pr->pr_mtx); 299168489Spjd if (pr->pr_ref == 0) { 300168489Spjd mtx_unlock(&pr->pr_mtx); 301168489Spjd break; 302168489Spjd } 303113275Smike return (pr); 304113275Smike } 305113275Smike } 306113275Smike return (NULL); 307113275Smike} 308113275Smike 30972786Srwatsonvoid 31072786Srwatsonprison_free(struct prison *pr) 31172786Srwatson{ 31272786Srwatson 31387275Srwatson mtx_lock(&pr->pr_mtx); 31472786Srwatson pr->pr_ref--; 31572786Srwatson if (pr->pr_ref == 0) { 316168483Spjd mtx_unlock(&pr->pr_mtx); 317124882Srwatson TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 318144660Sjeff taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 31987275Srwatson return; 32072786Srwatson } 32187275Srwatson mtx_unlock(&pr->pr_mtx); 32272786Srwatson} 32372786Srwatson 324124882Srwatsonstatic void 325124882Srwatsonprison_complete(void *context, int pending) 326124882Srwatson{ 327168489Spjd struct prison_service *psrv; 328124882Srwatson struct prison *pr; 329150652Scsjp int vfslocked; 330124882Srwatson 331124882Srwatson pr = (struct prison *)context; 332124882Srwatson 333168489Spjd sx_xlock(&allprison_lock); 334168489Spjd LIST_REMOVE(pr, pr_list); 335168489Spjd prisoncount--; 336168489Spjd sx_downgrade(&allprison_lock); 337168489Spjd TAILQ_FOREACH(psrv, &prison_services, ps_next) { 338168489Spjd psrv->ps_destroy(psrv, pr); 339168489Spjd } 340168489Spjd sx_sunlock(&allprison_lock); 341168489Spjd 342150652Scsjp vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 343124882Srwatson vrele(pr->pr_root); 344150652Scsjp VFS_UNLOCK_GIANT(vfslocked); 345124882Srwatson 346124882Srwatson mtx_destroy(&pr->pr_mtx); 347124882Srwatson if (pr->pr_linux != NULL) 348124882Srwatson FREE(pr->pr_linux, M_PRISON); 349124882Srwatson FREE(pr, M_PRISON); 350124882Srwatson} 351124882Srwatson 35272786Srwatsonvoid 35372786Srwatsonprison_hold(struct prison *pr) 35472786Srwatson{ 35572786Srwatson 35687275Srwatson mtx_lock(&pr->pr_mtx); 357168489Spjd KASSERT(pr->pr_ref > 0, 358168489Spjd ("Trying to hold dead prison (id=%d).", pr->pr_id)); 35972786Srwatson pr->pr_ref++; 36087275Srwatson mtx_unlock(&pr->pr_mtx); 36172786Srwatson} 36272786Srwatson 36387275Srwatsonu_int32_t 36487275Srwatsonprison_getip(struct ucred *cred) 36587275Srwatson{ 36687275Srwatson 36787275Srwatson return (cred->cr_prison->pr_ip); 36887275Srwatson} 36987275Srwatson 37046155Sphkint 37172786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip) 37246155Sphk{ 37346155Sphk u_int32_t tmp; 37446155Sphk 37572786Srwatson if (!jailed(cred)) 37646155Sphk return (0); 377167309Spjd if (flag) 37846155Sphk tmp = *ip; 37946155Sphk else 38046155Sphk tmp = ntohl(*ip); 38146155Sphk if (tmp == INADDR_ANY) { 382167309Spjd if (flag) 38372786Srwatson *ip = cred->cr_prison->pr_ip; 38446155Sphk else 38572786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 38646155Sphk return (0); 38746155Sphk } 38881114Srwatson if (tmp == INADDR_LOOPBACK) { 38981114Srwatson if (flag) 39081114Srwatson *ip = cred->cr_prison->pr_ip; 39181114Srwatson else 39281114Srwatson *ip = htonl(cred->cr_prison->pr_ip); 39381114Srwatson return (0); 39481114Srwatson } 39572786Srwatson if (cred->cr_prison->pr_ip != tmp) 39646155Sphk return (1); 39746155Sphk return (0); 39846155Sphk} 39946155Sphk 40046155Sphkvoid 40172786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 40246155Sphk{ 40346155Sphk u_int32_t tmp; 40446155Sphk 40572786Srwatson if (!jailed(cred)) 40646155Sphk return; 40746155Sphk if (flag) 40846155Sphk tmp = *ip; 40946155Sphk else 41046155Sphk tmp = ntohl(*ip); 41181114Srwatson if (tmp == INADDR_LOOPBACK) { 41246155Sphk if (flag) 41372786Srwatson *ip = cred->cr_prison->pr_ip; 41446155Sphk else 41572786Srwatson *ip = htonl(cred->cr_prison->pr_ip); 41646155Sphk return; 41746155Sphk } 41846155Sphk return; 41946155Sphk} 42046155Sphk 42146155Sphkint 42272786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa) 42346155Sphk{ 424114168Smike struct sockaddr_in *sai; 42546155Sphk int ok; 42646155Sphk 427114168Smike sai = (struct sockaddr_in *)sa; 42861235Srwatson if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 42961235Srwatson ok = 1; 43061235Srwatson else if (sai->sin_family != AF_INET) 43146155Sphk ok = 0; 43272786Srwatson else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 43346155Sphk ok = 1; 43446155Sphk else 43546155Sphk ok = 0; 43646155Sphk return (ok); 43746155Sphk} 43872786Srwatson 43972786Srwatson/* 44072786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 44172786Srwatson */ 44272786Srwatsonint 443114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2) 44472786Srwatson{ 44572786Srwatson 44672786Srwatson if (jailed(cred1)) { 44772786Srwatson if (!jailed(cred2)) 44872786Srwatson return (ESRCH); 44972786Srwatson if (cred2->cr_prison != cred1->cr_prison) 45072786Srwatson return (ESRCH); 45172786Srwatson } 45272786Srwatson 45372786Srwatson return (0); 45472786Srwatson} 45572786Srwatson 45672786Srwatson/* 45772786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0. 45872786Srwatson */ 45972786Srwatsonint 460114168Smikejailed(struct ucred *cred) 46172786Srwatson{ 46272786Srwatson 46372786Srwatson return (cred->cr_prison != NULL); 46472786Srwatson} 46591384Srobert 46691384Srobert/* 46791384Srobert * Return the correct hostname for the passed credential. 46891384Srobert */ 46991391Srobertvoid 470114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size) 47191384Srobert{ 47291384Srobert 47391391Srobert if (jailed(cred)) { 47491391Srobert mtx_lock(&cred->cr_prison->pr_mtx); 475105354Srobert strlcpy(buf, cred->cr_prison->pr_host, size); 47691391Srobert mtx_unlock(&cred->cr_prison->pr_mtx); 477114168Smike } else 478105354Srobert strlcpy(buf, hostname, size); 47991384Srobert} 480113275Smike 481125804Srwatson/* 482147185Spjd * Determine whether the subject represented by cred can "see" 483147185Spjd * status of a mount point. 484147185Spjd * Returns: 0 for permitted, ENOENT otherwise. 485147185Spjd * XXX: This function should be called cr_canseemount() and should be 486147185Spjd * placed in kern_prot.c. 487125804Srwatson */ 488125804Srwatsonint 489147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp) 490125804Srwatson{ 491147185Spjd struct prison *pr; 492147185Spjd struct statfs *sp; 493147185Spjd size_t len; 494125804Srwatson 495147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 496147185Spjd return (0); 497147185Spjd pr = cred->cr_prison; 498147185Spjd if (pr->pr_root->v_mount == mp) 499147185Spjd return (0); 500147185Spjd if (jail_enforce_statfs == 2) 501147185Spjd return (ENOENT); 502147185Spjd /* 503147185Spjd * If jail's chroot directory is set to "/" we should be able to see 504147185Spjd * all mount-points from inside a jail. 505147185Spjd * This is ugly check, but this is the only situation when jail's 506147185Spjd * directory ends with '/'. 507147185Spjd */ 508147185Spjd if (strcmp(pr->pr_path, "/") == 0) 509147185Spjd return (0); 510147185Spjd len = strlen(pr->pr_path); 511147185Spjd sp = &mp->mnt_stat; 512147185Spjd if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 513147185Spjd return (ENOENT); 514147185Spjd /* 515147185Spjd * Be sure that we don't have situation where jail's root directory 516147185Spjd * is "/some/path" and mount point is "/some/pathpath". 517147185Spjd */ 518147185Spjd if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 519147185Spjd return (ENOENT); 520147185Spjd return (0); 521147185Spjd} 522147185Spjd 523147185Spjdvoid 524147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 525147185Spjd{ 526147185Spjd char jpath[MAXPATHLEN]; 527147185Spjd struct prison *pr; 528147185Spjd size_t len; 529147185Spjd 530147185Spjd if (!jailed(cred) || jail_enforce_statfs == 0) 531147185Spjd return; 532147185Spjd pr = cred->cr_prison; 533147185Spjd if (prison_canseemount(cred, mp) != 0) { 534147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 535147185Spjd strlcpy(sp->f_mntonname, "[restricted]", 536147185Spjd sizeof(sp->f_mntonname)); 537147185Spjd return; 538125804Srwatson } 539147185Spjd if (pr->pr_root->v_mount == mp) { 540147185Spjd /* 541147185Spjd * Clear current buffer data, so we are sure nothing from 542147185Spjd * the valid path left there. 543147185Spjd */ 544147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 545147185Spjd *sp->f_mntonname = '/'; 546147185Spjd return; 547147185Spjd } 548147185Spjd /* 549147185Spjd * If jail's chroot directory is set to "/" we should be able to see 550147185Spjd * all mount-points from inside a jail. 551147185Spjd */ 552147185Spjd if (strcmp(pr->pr_path, "/") == 0) 553147185Spjd return; 554147185Spjd len = strlen(pr->pr_path); 555147185Spjd strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 556147185Spjd /* 557147185Spjd * Clear current buffer data, so we are sure nothing from 558147185Spjd * the valid path left there. 559147185Spjd */ 560147185Spjd bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 561147185Spjd if (*jpath == '\0') { 562147185Spjd /* Should never happen. */ 563147185Spjd *sp->f_mntonname = '/'; 564147185Spjd } else { 565147185Spjd strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 566147185Spjd } 567125804Srwatson} 568125804Srwatson 569164032Srwatson/* 570164032Srwatson * Check with permission for a specific privilege is granted within jail. We 571164032Srwatson * have a specific list of accepted privileges; the rest are denied. 572164032Srwatson */ 573164032Srwatsonint 574164032Srwatsonprison_priv_check(struct ucred *cred, int priv) 575164032Srwatson{ 576164032Srwatson 577164032Srwatson if (!jailed(cred)) 578164032Srwatson return (0); 579164032Srwatson 580164032Srwatson switch (priv) { 581164032Srwatson 582164032Srwatson /* 583164032Srwatson * Allow ktrace privileges for root in jail. 584164032Srwatson */ 585164032Srwatson case PRIV_KTRACE: 586164032Srwatson 587166827Srwatson#if 0 588164032Srwatson /* 589164032Srwatson * Allow jailed processes to configure audit identity and 590164032Srwatson * submit audit records (login, etc). In the future we may 591164032Srwatson * want to further refine the relationship between audit and 592164032Srwatson * jail. 593164032Srwatson */ 594164032Srwatson case PRIV_AUDIT_GETAUDIT: 595164032Srwatson case PRIV_AUDIT_SETAUDIT: 596164032Srwatson case PRIV_AUDIT_SUBMIT: 597166827Srwatson#endif 598164032Srwatson 599164032Srwatson /* 600164032Srwatson * Allow jailed processes to manipulate process UNIX 601164032Srwatson * credentials in any way they see fit. 602164032Srwatson */ 603164032Srwatson case PRIV_CRED_SETUID: 604164032Srwatson case PRIV_CRED_SETEUID: 605164032Srwatson case PRIV_CRED_SETGID: 606164032Srwatson case PRIV_CRED_SETEGID: 607164032Srwatson case PRIV_CRED_SETGROUPS: 608164032Srwatson case PRIV_CRED_SETREUID: 609164032Srwatson case PRIV_CRED_SETREGID: 610164032Srwatson case PRIV_CRED_SETRESUID: 611164032Srwatson case PRIV_CRED_SETRESGID: 612164032Srwatson 613164032Srwatson /* 614164032Srwatson * Jail implements visibility constraints already, so allow 615164032Srwatson * jailed root to override uid/gid-based constraints. 616164032Srwatson */ 617164032Srwatson case PRIV_SEEOTHERGIDS: 618164032Srwatson case PRIV_SEEOTHERUIDS: 619164032Srwatson 620164032Srwatson /* 621164032Srwatson * Jail implements inter-process debugging limits already, so 622164032Srwatson * allow jailed root various debugging privileges. 623164032Srwatson */ 624164032Srwatson case PRIV_DEBUG_DIFFCRED: 625164032Srwatson case PRIV_DEBUG_SUGID: 626164032Srwatson case PRIV_DEBUG_UNPRIV: 627164032Srwatson 628164032Srwatson /* 629164032Srwatson * Allow jail to set various resource limits and login 630164032Srwatson * properties, and for now, exceed process resource limits. 631164032Srwatson */ 632164032Srwatson case PRIV_PROC_LIMIT: 633164032Srwatson case PRIV_PROC_SETLOGIN: 634164032Srwatson case PRIV_PROC_SETRLIMIT: 635164032Srwatson 636164032Srwatson /* 637164032Srwatson * System V and POSIX IPC privileges are granted in jail. 638164032Srwatson */ 639164032Srwatson case PRIV_IPC_READ: 640164032Srwatson case PRIV_IPC_WRITE: 641164032Srwatson case PRIV_IPC_ADMIN: 642164032Srwatson case PRIV_IPC_MSGSIZE: 643164032Srwatson case PRIV_MQ_ADMIN: 644164032Srwatson 645164032Srwatson /* 646164032Srwatson * Jail implements its own inter-process limits, so allow 647164032Srwatson * root processes in jail to change scheduling on other 648164032Srwatson * processes in the same jail. Likewise for signalling. 649164032Srwatson */ 650164032Srwatson case PRIV_SCHED_DIFFCRED: 651164032Srwatson case PRIV_SIGNAL_DIFFCRED: 652164032Srwatson case PRIV_SIGNAL_SUGID: 653164032Srwatson 654164032Srwatson /* 655164032Srwatson * Allow jailed processes to write to sysctls marked as jail 656164032Srwatson * writable. 657164032Srwatson */ 658164032Srwatson case PRIV_SYSCTL_WRITEJAIL: 659164032Srwatson 660164032Srwatson /* 661164032Srwatson * Allow root in jail to manage a variety of quota 662166831Srwatson * properties. These should likely be conditional on a 663166831Srwatson * configuration option. 664164032Srwatson */ 665166832Srwatson case PRIV_VFS_GETQUOTA: 666166832Srwatson case PRIV_VFS_SETQUOTA: 667164032Srwatson 668164032Srwatson /* 669164032Srwatson * Since Jail relies on chroot() to implement file system 670164032Srwatson * protections, grant many VFS privileges to root in jail. 671164032Srwatson * Be careful to exclude mount-related and NFS-related 672164032Srwatson * privileges. 673164032Srwatson */ 674164032Srwatson case PRIV_VFS_READ: 675164032Srwatson case PRIV_VFS_WRITE: 676164032Srwatson case PRIV_VFS_ADMIN: 677164032Srwatson case PRIV_VFS_EXEC: 678164032Srwatson case PRIV_VFS_LOOKUP: 679164032Srwatson case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 680164032Srwatson case PRIV_VFS_CHFLAGS_DEV: 681164032Srwatson case PRIV_VFS_CHOWN: 682164032Srwatson case PRIV_VFS_CHROOT: 683167152Spjd case PRIV_VFS_RETAINSUGID: 684164032Srwatson case PRIV_VFS_FCHROOT: 685164032Srwatson case PRIV_VFS_LINK: 686164032Srwatson case PRIV_VFS_SETGID: 687172860Srwatson case PRIV_VFS_STAT: 688164032Srwatson case PRIV_VFS_STICKYFILE: 689164032Srwatson return (0); 690164032Srwatson 691164032Srwatson /* 692164032Srwatson * Depending on the global setting, allow privilege of 693164032Srwatson * setting system flags. 694164032Srwatson */ 695164032Srwatson case PRIV_VFS_SYSFLAGS: 696164032Srwatson if (jail_chflags_allowed) 697164032Srwatson return (0); 698164032Srwatson else 699164032Srwatson return (EPERM); 700164032Srwatson 701164032Srwatson /* 702168396Spjd * Depending on the global setting, allow privilege of 703168396Spjd * mounting/unmounting file systems. 704168396Spjd */ 705168396Spjd case PRIV_VFS_MOUNT: 706168396Spjd case PRIV_VFS_UNMOUNT: 707168396Spjd case PRIV_VFS_MOUNT_NONUSER: 708168699Spjd case PRIV_VFS_MOUNT_OWNER: 709168396Spjd if (jail_mount_allowed) 710168396Spjd return (0); 711168396Spjd else 712168396Spjd return (EPERM); 713168396Spjd 714168396Spjd /* 715168591Srwatson * Allow jailed root to bind reserved ports and reuse in-use 716168591Srwatson * ports. 717164032Srwatson */ 718164032Srwatson case PRIV_NETINET_RESERVEDPORT: 719168591Srwatson case PRIV_NETINET_REUSEPORT: 720164032Srwatson return (0); 721164032Srwatson 722164032Srwatson /* 723175630Sbz * Allow jailed root to set certian IPv4/6 (option) headers. 724175630Sbz */ 725175630Sbz case PRIV_NETINET_SETHDROPTS: 726175630Sbz return (0); 727175630Sbz 728175630Sbz /* 729164032Srwatson * Conditionally allow creating raw sockets in jail. 730164032Srwatson */ 731164032Srwatson case PRIV_NETINET_RAW: 732164032Srwatson if (jail_allow_raw_sockets) 733164032Srwatson return (0); 734164032Srwatson else 735164032Srwatson return (EPERM); 736164032Srwatson 737164032Srwatson /* 738164032Srwatson * Since jail implements its own visibility limits on netstat 739164032Srwatson * sysctls, allow getcred. This allows identd to work in 740164032Srwatson * jail. 741164032Srwatson */ 742164032Srwatson case PRIV_NETINET_GETCRED: 743164032Srwatson return (0); 744164032Srwatson 745164032Srwatson default: 746164032Srwatson /* 747164032Srwatson * In all remaining cases, deny the privilege request. This 748164032Srwatson * includes almost all network privileges, many system 749164032Srwatson * configuration privileges. 750164032Srwatson */ 751164032Srwatson return (EPERM); 752164032Srwatson } 753164032Srwatson} 754164032Srwatson 755168401Spjd/* 756168401Spjd * Register jail service. Provides 'create' and 'destroy' methods. 757168401Spjd * 'create' method will be called for every existing jail and all 758168401Spjd * jails in the future as they beeing created. 759168401Spjd * 'destroy' method will be called for every jail going away and 760168401Spjd * for all existing jails at the time of service deregistration. 761168401Spjd */ 762168401Spjdstruct prison_service * 763168401Spjdprison_service_register(const char *name, prison_create_t create, 764168401Spjd prison_destroy_t destroy) 765168401Spjd{ 766168401Spjd struct prison_service *psrv, *psrv2; 767168401Spjd struct prison *pr; 768168401Spjd int reallocate = 1, slotno = 0; 769168401Spjd void **slots, **oldslots; 770168401Spjd 771168401Spjd psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 772168401Spjd M_WAITOK | M_ZERO); 773168401Spjd psrv->ps_create = create; 774168401Spjd psrv->ps_destroy = destroy; 775168401Spjd strcpy(psrv->ps_name, name); 776168401Spjd /* 777168401Spjd * Grab the allprison_lock here, so we won't miss any jail 778168401Spjd * creation/destruction. 779168401Spjd */ 780168401Spjd sx_xlock(&allprison_lock); 781168401Spjd#ifdef INVARIANTS 782168401Spjd /* 783168401Spjd * Verify if service is not already registered. 784168401Spjd */ 785168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 786168401Spjd KASSERT(strcmp(psrv2->ps_name, name) != 0, 787168401Spjd ("jail service %s already registered", name)); 788168401Spjd } 789168401Spjd#endif 790168401Spjd /* 791168401Spjd * Find free slot. When there is no existing free slot available, 792168401Spjd * allocate one at the end. 793168401Spjd */ 794168401Spjd TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 795168401Spjd if (psrv2->ps_slotno != slotno) { 796168401Spjd KASSERT(slotno < psrv2->ps_slotno, 797168401Spjd ("Invalid slotno (slotno=%d >= ps_slotno=%d", 798168401Spjd slotno, psrv2->ps_slotno)); 799168401Spjd /* We found free slot. */ 800168401Spjd reallocate = 0; 801168401Spjd break; 802168401Spjd } 803168401Spjd slotno++; 804168401Spjd } 805168401Spjd psrv->ps_slotno = slotno; 806168401Spjd /* 807168401Spjd * Keep the list sorted by slot number. 808168401Spjd */ 809168401Spjd if (psrv2 != NULL) { 810168401Spjd KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 811168401Spjd TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 812168401Spjd } else { 813168401Spjd KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 814168401Spjd TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 815168401Spjd } 816168401Spjd prison_service_slots++; 817168401Spjd sx_downgrade(&allprison_lock); 818168401Spjd /* 819168401Spjd * Allocate memory for new slot if we didn't found empty one. 820168401Spjd * Do not use realloc(9), because pr_slots is protected with a mutex, 821168401Spjd * so we can't sleep. 822168401Spjd */ 823168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 824168401Spjd if (reallocate) { 825168401Spjd /* First allocate memory with M_WAITOK. */ 826168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 827168401Spjd M_PRISON, M_WAITOK); 828168401Spjd /* Now grab the mutex and replace pr_slots. */ 829168401Spjd mtx_lock(&pr->pr_mtx); 830168401Spjd oldslots = pr->pr_slots; 831168401Spjd if (psrv->ps_slotno > 0) { 832168401Spjd bcopy(oldslots, slots, 833168401Spjd sizeof(*slots) * (prison_service_slots - 1)); 834168401Spjd } 835168401Spjd slots[psrv->ps_slotno] = NULL; 836168401Spjd pr->pr_slots = slots; 837168401Spjd mtx_unlock(&pr->pr_mtx); 838168401Spjd if (oldslots != NULL) 839168401Spjd free(oldslots, M_PRISON); 840168401Spjd } 841168401Spjd /* 842168401Spjd * Call 'create' method for each existing jail. 843168401Spjd */ 844168401Spjd psrv->ps_create(psrv, pr); 845168401Spjd } 846168401Spjd sx_sunlock(&allprison_lock); 847168401Spjd 848168401Spjd return (psrv); 849168401Spjd} 850168401Spjd 851168401Spjdvoid 852168401Spjdprison_service_deregister(struct prison_service *psrv) 853168401Spjd{ 854168401Spjd struct prison *pr; 855168401Spjd void **slots, **oldslots; 856168401Spjd int last = 0; 857168401Spjd 858168401Spjd sx_xlock(&allprison_lock); 859168401Spjd if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 860168401Spjd last = 1; 861168401Spjd TAILQ_REMOVE(&prison_services, psrv, ps_next); 862168401Spjd prison_service_slots--; 863168401Spjd sx_downgrade(&allprison_lock); 864168401Spjd LIST_FOREACH(pr, &allprison, pr_list) { 865168401Spjd /* 866168401Spjd * Call 'destroy' method for every currently existing jail. 867168401Spjd */ 868168401Spjd psrv->ps_destroy(psrv, pr); 869168401Spjd /* 870168401Spjd * If this is the last slot, free the memory allocated for it. 871168401Spjd */ 872168401Spjd if (last) { 873168401Spjd if (prison_service_slots == 0) 874168401Spjd slots = NULL; 875168401Spjd else { 876168401Spjd slots = malloc(sizeof(*slots) * prison_service_slots, 877168401Spjd M_PRISON, M_WAITOK); 878168401Spjd } 879168401Spjd mtx_lock(&pr->pr_mtx); 880168401Spjd oldslots = pr->pr_slots; 881168401Spjd /* 882168401Spjd * We require setting slot to NULL after freeing it, 883168401Spjd * this way we can check for memory leaks here. 884168401Spjd */ 885168401Spjd KASSERT(oldslots[psrv->ps_slotno] == NULL, 886168401Spjd ("Slot %d (service %s, jailid=%d) still contains data?", 887168401Spjd psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 888168401Spjd if (psrv->ps_slotno > 0) { 889168401Spjd bcopy(oldslots, slots, 890168401Spjd sizeof(*slots) * prison_service_slots); 891168401Spjd } 892168401Spjd pr->pr_slots = slots; 893168401Spjd mtx_unlock(&pr->pr_mtx); 894168401Spjd KASSERT(oldslots != NULL, ("oldslots == NULL")); 895168401Spjd free(oldslots, M_PRISON); 896168401Spjd } 897168401Spjd } 898168401Spjd sx_sunlock(&allprison_lock); 899168401Spjd free(psrv, M_PRISON); 900168401Spjd} 901168401Spjd 902168401Spjd/* 903168401Spjd * Function sets data for the given jail in slot assigned for the given 904168401Spjd * jail service. 905168401Spjd */ 906168401Spjdvoid 907168401Spjdprison_service_data_set(struct prison_service *psrv, struct prison *pr, 908168401Spjd void *data) 909168401Spjd{ 910168401Spjd 911168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 912168401Spjd pr->pr_slots[psrv->ps_slotno] = data; 913168401Spjd} 914168401Spjd 915168401Spjd/* 916168401Spjd * Function clears slots assigned for the given jail service in the given 917168401Spjd * prison structure and returns current slot data. 918168401Spjd */ 919168401Spjdvoid * 920168401Spjdprison_service_data_del(struct prison_service *psrv, struct prison *pr) 921168401Spjd{ 922168401Spjd void *data; 923168401Spjd 924168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 925168401Spjd data = pr->pr_slots[psrv->ps_slotno]; 926168401Spjd pr->pr_slots[psrv->ps_slotno] = NULL; 927168401Spjd return (data); 928168401Spjd} 929168401Spjd 930168401Spjd/* 931168401Spjd * Function returns current data from the slot assigned to the given jail 932168401Spjd * service for the given jail. 933168401Spjd */ 934168401Spjdvoid * 935168401Spjdprison_service_data_get(struct prison_service *psrv, struct prison *pr) 936168401Spjd{ 937168401Spjd 938168401Spjd mtx_assert(&pr->pr_mtx, MA_OWNED); 939168401Spjd return (pr->pr_slots[psrv->ps_slotno]); 940168401Spjd} 941168401Spjd 942113275Smikestatic int 943113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS) 944113275Smike{ 945113275Smike struct xprison *xp, *sxp; 946113275Smike struct prison *pr; 947113275Smike int count, error; 948113275Smike 949127020Spjd if (jailed(req->td->td_ucred)) 950125806Srwatson return (0); 951113275Smike 952168401Spjd sx_slock(&allprison_lock); 953168401Spjd if ((count = prisoncount) == 0) { 954168401Spjd sx_sunlock(&allprison_lock); 955113275Smike return (0); 956168401Spjd } 957113275Smike 958113275Smike sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 959167309Spjd 960113275Smike LIST_FOREACH(pr, &allprison, pr_list) { 961113275Smike xp->pr_version = XPRISON_VERSION; 962113275Smike xp->pr_id = pr->pr_id; 963168487Spjd xp->pr_ip = pr->pr_ip; 964113275Smike strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 965168487Spjd mtx_lock(&pr->pr_mtx); 966113275Smike strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 967113275Smike mtx_unlock(&pr->pr_mtx); 968113275Smike xp++; 969113275Smike } 970168401Spjd sx_sunlock(&allprison_lock); 971113275Smike 972113275Smike error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 973113275Smike free(sxp, M_TEMP); 974167354Spjd return (error); 975113275Smike} 976113275Smike 977113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 978113275Smike NULL, 0, sysctl_jail_list, "S", "List of active jails"); 979126004Spjd 980126004Spjdstatic int 981126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 982126004Spjd{ 983126004Spjd int error, injail; 984126004Spjd 985126004Spjd injail = jailed(req->td->td_ucred); 986126004Spjd error = SYSCTL_OUT(req, &injail, sizeof(injail)); 987126004Spjd 988126004Spjd return (error); 989126004Spjd} 990126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 991126004Spjd NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 992