kern_jail.c revision 191896
155682Smarkm/*- 2233294Sstas * Copyright (c) 1999 Poul-Henning Kamp. 355682Smarkm * Copyright (c) 2008 Bjoern A. Zeeb. 455682Smarkm * Copyright (c) 2009 James Gritton. 555682Smarkm * All rights reserved. 655682Smarkm * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 191896 2009-05-07 18:36:47Z jamie $"); 31 32#include "opt_ddb.h" 33#include "opt_inet.h" 34#include "opt_inet6.h" 35#include "opt_mac.h" 36 37#include <sys/param.h> 38#include <sys/types.h> 39#include <sys/kernel.h> 40#include <sys/systm.h> 41#include <sys/errno.h> 42#include <sys/sysproto.h> 43#include <sys/malloc.h> 44#include <sys/priv.h> 45#include <sys/proc.h> 46#include <sys/taskqueue.h> 47#include <sys/fcntl.h> 48#include <sys/jail.h> 49#include <sys/lock.h> 50#include <sys/mutex.h> 51#include <sys/osd.h> 52#include <sys/sx.h> 53#include <sys/namei.h> 54#include <sys/mount.h> 55#include <sys/queue.h> 56#include <sys/socket.h> 57#include <sys/syscallsubr.h> 58#include <sys/sysctl.h> 59#include <sys/vnode.h> 60#include <sys/vimage.h> 61#include <net/if.h> 62#include <netinet/in.h> 63#ifdef DDB 64#include <ddb/ddb.h> 65#ifdef INET6 66#include <netinet6/in6_var.h> 67#endif /* INET6 */ 68#endif /* DDB */ 69 70#include <security/mac/mac_framework.h> 71 72MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 73 74SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 75 "Jail rules"); 76 77int jail_set_hostname_allowed = 1; 78SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 79 &jail_set_hostname_allowed, 0, 80 "Processes in jail can set their hostnames"); 81 82int jail_socket_unixiproute_only = 1; 83SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 84 &jail_socket_unixiproute_only, 0, 85 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 86 87int jail_sysvipc_allowed = 0; 88SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 89 &jail_sysvipc_allowed, 0, 90 "Processes in jail can use System V IPC primitives"); 91 92static int jail_enforce_statfs = 2; 93SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 94 &jail_enforce_statfs, 0, 95 "Processes in jail cannot see all mounted file systems"); 96 97int jail_allow_raw_sockets = 0; 98SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 99 &jail_allow_raw_sockets, 0, 100 "Prison root can create raw sockets"); 101 102int jail_chflags_allowed = 0; 103SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 104 &jail_chflags_allowed, 0, 105 "Processes in jail can alter system file flags"); 106 107int jail_mount_allowed = 0; 108SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 109 &jail_mount_allowed, 0, 110 "Processes in jail can mount/unmount jail-friendly file systems"); 111 112int jail_max_af_ips = 255; 113SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 114 &jail_max_af_ips, 0, 115 "Number of IP addresses a jail may have at most per address family"); 116 117/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 118struct sx allprison_lock; 119SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 120struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 121int lastprid = 0; 122int prisoncount = 0; 123 124static int do_jail_attach(struct thread *td, struct prison *pr); 125static void prison_complete(void *context, int pending); 126static void prison_deref(struct prison *pr, int flags); 127#ifdef INET 128static int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 129#endif 130#ifdef INET6 131static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 132#endif 133static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 134 135/* Flags for prison_deref */ 136#define PD_DEREF 0x01 137#define PD_DEUREF 0x02 138#define PD_LOCKED 0x04 139#define PD_LIST_SLOCKED 0x08 140#define PD_LIST_XLOCKED 0x10 141 142#ifdef INET 143static int 144qcmp_v4(const void *ip1, const void *ip2) 145{ 146 in_addr_t iaa, iab; 147 148 /* 149 * We need to compare in HBO here to get the list sorted as expected 150 * by the result of the code. Sorting NBO addresses gives you 151 * interesting results. If you do not understand, do not try. 152 */ 153 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 154 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 155 156 /* 157 * Do not simply return the difference of the two numbers, the int is 158 * not wide enough. 159 */ 160 if (iaa > iab) 161 return (1); 162 else if (iaa < iab) 163 return (-1); 164 else 165 return (0); 166} 167#endif 168 169#ifdef INET6 170static int 171qcmp_v6(const void *ip1, const void *ip2) 172{ 173 const struct in6_addr *ia6a, *ia6b; 174 int i, rc; 175 176 ia6a = (const struct in6_addr *)ip1; 177 ia6b = (const struct in6_addr *)ip2; 178 179 rc = 0; 180 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 181 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 182 rc = 1; 183 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 184 rc = -1; 185 } 186 return (rc); 187} 188#endif 189 190/* 191 * struct jail_args { 192 * struct jail *jail; 193 * }; 194 */ 195int 196jail(struct thread *td, struct jail_args *uap) 197{ 198 struct iovec optiov[10]; 199 struct uio opt; 200 char *u_path, *u_hostname, *u_name; 201#ifdef INET 202 struct in_addr *u_ip4; 203#endif 204#ifdef INET6 205 struct in6_addr *u_ip6; 206#endif 207 uint32_t version; 208 int error; 209 210 error = copyin(uap->jail, &version, sizeof(uint32_t)); 211 if (error) 212 return (error); 213 214 switch (version) { 215 case 0: 216 { 217 /* FreeBSD single IPv4 jails. */ 218 struct jail_v0 j0; 219 220 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 221 if (error) 222 return (error); 223 u_path = malloc(MAXPATHLEN + MAXHOSTNAMELEN, M_TEMP, M_WAITOK); 224 u_hostname = u_path + MAXPATHLEN; 225 opt.uio_iov = optiov; 226 opt.uio_iovcnt = 4; 227 opt.uio_offset = -1; 228 opt.uio_resid = -1; 229 opt.uio_segflg = UIO_SYSSPACE; 230 opt.uio_rw = UIO_READ; 231 opt.uio_td = td; 232 optiov[0].iov_base = "path"; 233 optiov[0].iov_len = sizeof("path"); 234 optiov[1].iov_base = u_path; 235 error = 236 copyinstr(j0.path, u_path, MAXPATHLEN, &optiov[1].iov_len); 237 if (error) { 238 free(u_path, M_TEMP); 239 return (error); 240 } 241 optiov[2].iov_base = "host.hostname"; 242 optiov[2].iov_len = sizeof("host.hostname"); 243 optiov[3].iov_base = u_hostname; 244 error = copyinstr(j0.hostname, u_hostname, MAXHOSTNAMELEN, 245 &optiov[3].iov_len); 246 if (error) { 247 free(u_path, M_TEMP); 248 return (error); 249 } 250#ifdef INET 251 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 252 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 253 opt.uio_iovcnt++; 254 optiov[opt.uio_iovcnt].iov_base = &j0.ip_number; 255 j0.ip_number = htonl(j0.ip_number); 256 optiov[opt.uio_iovcnt].iov_len = sizeof(j0.ip_number); 257 opt.uio_iovcnt++; 258#endif 259 break; 260 } 261 262 case 1: 263 /* 264 * Version 1 was used by multi-IPv4 jail implementations 265 * that never made it into the official kernel. 266 */ 267 return (EINVAL); 268 269 case 2: /* JAIL_API_VERSION */ 270 { 271 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 272 struct jail j; 273 size_t tmplen; 274 275 error = copyin(uap->jail, &j, sizeof(struct jail)); 276 if (error) 277 return (error); 278 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 279#ifdef INET 280 if (j.ip4s > jail_max_af_ips) 281 return (EINVAL); 282 tmplen += j.ip4s * sizeof(struct in_addr); 283#else 284 if (j.ip4s > 0) 285 return (EINVAL); 286#endif 287#ifdef INET6 288 if (j.ip6s > jail_max_af_ips) 289 return (EINVAL); 290 tmplen += j.ip6s * sizeof(struct in6_addr); 291#else 292 if (j.ip6s > 0) 293 return (EINVAL); 294#endif 295 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 296 u_hostname = u_path + MAXPATHLEN; 297 u_name = u_hostname + MAXHOSTNAMELEN; 298#ifdef INET 299 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 300#endif 301#ifdef INET6 302#ifdef INET 303 u_ip6 = (struct in6_addr *)(u_ip4 + j.ip4s); 304#else 305 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 306#endif 307#endif 308 opt.uio_iov = optiov; 309 opt.uio_iovcnt = 4; 310 opt.uio_offset = -1; 311 opt.uio_resid = -1; 312 opt.uio_segflg = UIO_SYSSPACE; 313 opt.uio_rw = UIO_READ; 314 opt.uio_td = td; 315 optiov[0].iov_base = "path"; 316 optiov[0].iov_len = sizeof("path"); 317 optiov[1].iov_base = u_path; 318 error = 319 copyinstr(j.path, u_path, MAXPATHLEN, &optiov[1].iov_len); 320 if (error) { 321 free(u_path, M_TEMP); 322 return (error); 323 } 324 optiov[2].iov_base = "host.hostname"; 325 optiov[2].iov_len = sizeof("host.hostname"); 326 optiov[3].iov_base = u_hostname; 327 error = copyinstr(j.hostname, u_hostname, MAXHOSTNAMELEN, 328 &optiov[3].iov_len); 329 if (error) { 330 free(u_path, M_TEMP); 331 return (error); 332 } 333 if (j.jailname != NULL) { 334 optiov[opt.uio_iovcnt].iov_base = "name"; 335 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 336 opt.uio_iovcnt++; 337 optiov[opt.uio_iovcnt].iov_base = u_name; 338 error = copyinstr(j.jailname, u_name, MAXHOSTNAMELEN, 339 &optiov[opt.uio_iovcnt].iov_len); 340 if (error) { 341 free(u_path, M_TEMP); 342 return (error); 343 } 344 opt.uio_iovcnt++; 345 } 346#ifdef INET 347 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 348 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 349 opt.uio_iovcnt++; 350 optiov[opt.uio_iovcnt].iov_base = u_ip4; 351 optiov[opt.uio_iovcnt].iov_len = 352 j.ip4s * sizeof(struct in_addr); 353 error = copyin(j.ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 354 if (error) { 355 free(u_path, M_TEMP); 356 return (error); 357 } 358 opt.uio_iovcnt++; 359#endif 360#ifdef INET6 361 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 362 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 363 opt.uio_iovcnt++; 364 optiov[opt.uio_iovcnt].iov_base = u_ip6; 365 optiov[opt.uio_iovcnt].iov_len = 366 j.ip6s * sizeof(struct in6_addr); 367 error = copyin(j.ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 368 if (error) { 369 free(u_path, M_TEMP); 370 return (error); 371 } 372 opt.uio_iovcnt++; 373#endif 374 break; 375 } 376 377 default: 378 /* Sci-Fi jails are not supported, sorry. */ 379 return (EINVAL); 380 } 381 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 382 free(u_path, M_TEMP); 383 return (error); 384} 385 386/* 387 * struct jail_set_args { 388 * struct iovec *iovp; 389 * unsigned int iovcnt; 390 * int flags; 391 * }; 392 */ 393int 394jail_set(struct thread *td, struct jail_set_args *uap) 395{ 396 struct uio *auio; 397 int error; 398 399 /* Check that we have an even number of iovecs. */ 400 if (uap->iovcnt & 1) 401 return (EINVAL); 402 403 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 404 if (error) 405 return (error); 406 error = kern_jail_set(td, auio, uap->flags); 407 free(auio, M_IOV); 408 return (error); 409} 410 411int 412kern_jail_set(struct thread *td, struct uio *optuio, int flags) 413{ 414 struct nameidata nd; 415#ifdef INET 416 struct in_addr *ip4; 417#endif 418#ifdef INET6 419 struct in6_addr *ip6; 420#endif 421 struct vfsopt *opt; 422 struct vfsoptlist *opts; 423 struct prison *pr, *deadpr, *tpr; 424 struct vnode *root; 425 char *errmsg, *host, *name, *p, *path; 426 void *op; 427 int created, cuflags, error, errmsg_len, errmsg_pos; 428 int gotslevel, jid, len; 429 int slevel, vfslocked; 430#if defined(INET) || defined(INET6) 431 int ii; 432#endif 433#ifdef INET 434 int ip4s; 435#endif 436#ifdef INET6 437 int ip6s; 438#endif 439 unsigned pr_flags, ch_flags; 440 char numbuf[12]; 441 442 error = priv_check(td, PRIV_JAIL_SET); 443 if (!error && (flags & JAIL_ATTACH)) 444 error = priv_check(td, PRIV_JAIL_ATTACH); 445 if (error) 446 return (error); 447 if (flags & ~JAIL_SET_MASK) 448 return (EINVAL); 449 450 /* 451 * Check all the parameters before committing to anything. Not all 452 * errors can be caught early, but we may as well try. Also, this 453 * takes care of some expensive stuff (path lookup) before getting 454 * the allprison lock. 455 * 456 * XXX Jails are not filesystems, and jail parameters are not mount 457 * options. But it makes more sense to re-use the vfsopt code 458 * than duplicate it under a different name. 459 */ 460 error = vfs_buildopts(optuio, &opts); 461 if (error) 462 return (error); 463#ifdef INET 464 ip4 = NULL; 465#endif 466#ifdef INET6 467 ip6 = NULL; 468#endif 469 470 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 471 if (error == ENOENT) 472 jid = 0; 473 else if (error != 0) 474 goto done_free; 475 476 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 477 if (error == ENOENT) 478 gotslevel = 0; 479 else if (error != 0) 480 goto done_free; 481 else 482 gotslevel = 1; 483 484 pr_flags = ch_flags = 0; 485 vfs_flagopt(opts, "persist", &pr_flags, PR_PERSIST); 486 vfs_flagopt(opts, "nopersist", &ch_flags, PR_PERSIST); 487 ch_flags |= pr_flags; 488 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 489 && !(pr_flags & PR_PERSIST)) { 490 error = EINVAL; 491 vfs_opterror(opts, "new jail must persist or attach"); 492 goto done_errmsg; 493 } 494 495 error = vfs_getopt(opts, "name", (void **)&name, &len); 496 if (error == ENOENT) 497 name = NULL; 498 else if (error != 0) 499 goto done_free; 500 else { 501 if (len == 0 || name[len - 1] != '\0') { 502 error = EINVAL; 503 goto done_free; 504 } 505 if (len > MAXHOSTNAMELEN) { 506 error = ENAMETOOLONG; 507 goto done_free; 508 } 509 } 510 511 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 512 if (error == ENOENT) 513 host = NULL; 514 else if (error != 0) 515 goto done_free; 516 else { 517 if (len == 0 || host[len - 1] != '\0') { 518 error = EINVAL; 519 goto done_free; 520 } 521 if (len > MAXHOSTNAMELEN) { 522 error = ENAMETOOLONG; 523 goto done_free; 524 } 525 } 526 527#ifdef INET 528 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 529 if (error == ENOENT) 530 ip4s = -1; 531 else if (error != 0) 532 goto done_free; 533 else if (ip4s & (sizeof(*ip4) - 1)) { 534 error = EINVAL; 535 goto done_free; 536 } else if (ip4s > 0) { 537 ip4s /= sizeof(*ip4); 538 if (ip4s > jail_max_af_ips) { 539 error = EINVAL; 540 vfs_opterror(opts, "too many IPv4 addresses"); 541 goto done_errmsg; 542 } 543 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 544 bcopy(op, ip4, ip4s * sizeof(*ip4)); 545 /* 546 * IP addresses are all sorted but ip[0] to preserve the 547 * primary IP address as given from userland. This special IP 548 * is used for unbound outgoing connections as well for 549 * "loopback" traffic. 550 */ 551 if (ip4s > 1) 552 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 553 /* 554 * Check for duplicate addresses and do some simple zero and 555 * broadcast checks. If users give other bogus addresses it is 556 * their problem. 557 * 558 * We do not have to care about byte order for these checks so 559 * we will do them in NBO. 560 */ 561 for (ii = 0; ii < ip4s; ii++) { 562 if (ip4[ii].s_addr == INADDR_ANY || 563 ip4[ii].s_addr == INADDR_BROADCAST) { 564 error = EINVAL; 565 goto done_free; 566 } 567 if ((ii+1) < ip4s && 568 (ip4[0].s_addr == ip4[ii+1].s_addr || 569 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 570 error = EINVAL; 571 goto done_free; 572 } 573 } 574 } 575#endif 576 577#ifdef INET6 578 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 579 if (error == ENOENT) 580 ip6s = -1; 581 else if (error != 0) 582 goto done_free; 583 else if (ip6s & (sizeof(*ip6) - 1)) { 584 error = EINVAL; 585 goto done_free; 586 } else if (ip6s > 0) { 587 ip6s /= sizeof(*ip6); 588 if (ip6s > jail_max_af_ips) { 589 error = EINVAL; 590 vfs_opterror(opts, "too many IPv6 addresses"); 591 goto done_errmsg; 592 } 593 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 594 bcopy(op, ip6, ip6s * sizeof(*ip6)); 595 if (ip6s > 1) 596 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 597 for (ii = 0; ii < ip6s; ii++) { 598 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[0])) { 599 error = EINVAL; 600 goto done_free; 601 } 602 if ((ii+1) < ip6s && 603 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 604 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 605 { 606 error = EINVAL; 607 goto done_free; 608 } 609 } 610 } 611#endif 612 613 root = NULL; 614 error = vfs_getopt(opts, "path", (void **)&path, &len); 615 if (error == ENOENT) 616 path = NULL; 617 else if (error != 0) 618 goto done_free; 619 else { 620 if (flags & JAIL_UPDATE) { 621 error = EINVAL; 622 vfs_opterror(opts, 623 "path cannot be changed after creation"); 624 goto done_errmsg; 625 } 626 if (len == 0 || path[len - 1] != '\0') { 627 error = EINVAL; 628 goto done_free; 629 } 630 if (len > MAXPATHLEN) { 631 error = ENAMETOOLONG; 632 goto done_free; 633 } 634 if (len < 2 || (len == 2 && path[0] == '/')) 635 path = NULL; 636 else { 637 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE, 638 path, td); 639 error = namei(&nd); 640 if (error) 641 goto done_free; 642 vfslocked = NDHASGIANT(&nd); 643 root = nd.ni_vp; 644 NDFREE(&nd, NDF_ONLY_PNBUF); 645 if (root->v_type != VDIR) { 646 error = ENOTDIR; 647 vrele(root); 648 VFS_UNLOCK_GIANT(vfslocked); 649 goto done_free; 650 } 651 VFS_UNLOCK_GIANT(vfslocked); 652 } 653 } 654 655 /* 656 * Grab the allprison lock before letting modules check their 657 * parameters. Once we have it, do not let go so we'll have a 658 * consistent view of the OSD list. 659 */ 660 sx_xlock(&allprison_lock); 661 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 662 if (error) 663 goto done_unlock_list; 664 665 /* By now, all parameters should have been noted. */ 666 TAILQ_FOREACH(opt, opts, link) { 667 if (!opt->seen && strcmp(opt->name, "errmsg")) { 668 error = EINVAL; 669 vfs_opterror(opts, "unknown parameter: %s", opt->name); 670 goto done_unlock_list; 671 } 672 } 673 674 /* 675 * See if we are creating a new record or updating an existing one. 676 * This abuses the file error codes ENOENT and EEXIST. 677 */ 678 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 679 if (!cuflags) { 680 error = EINVAL; 681 vfs_opterror(opts, "no valid operation (create or update)"); 682 goto done_unlock_list; 683 } 684 pr = NULL; 685 if (jid != 0) { 686 /* See if a requested jid already exists. */ 687 if (jid < 0) { 688 error = EINVAL; 689 vfs_opterror(opts, "negative jid"); 690 goto done_unlock_list; 691 } 692 pr = prison_find(jid); 693 if (pr != NULL) { 694 /* Create: jid must not exist. */ 695 if (cuflags == JAIL_CREATE) { 696 mtx_unlock(&pr->pr_mtx); 697 error = EEXIST; 698 vfs_opterror(opts, "jail %d already exists", 699 jid); 700 goto done_unlock_list; 701 } 702 if (pr->pr_uref == 0) { 703 if (!(flags & JAIL_DYING)) { 704 mtx_unlock(&pr->pr_mtx); 705 error = ENOENT; 706 vfs_opterror(opts, "jail %d is dying", 707 jid); 708 goto done_unlock_list; 709 } else if ((flags & JAIL_ATTACH) || 710 (pr_flags & PR_PERSIST)) { 711 /* 712 * A dying jail might be resurrected 713 * (via attach or persist), but first 714 * it must determine if another jail 715 * has claimed its name. Accomplish 716 * this by implicitly re-setting the 717 * name. 718 */ 719 if (name == NULL) 720 name = pr->pr_name; 721 } 722 } 723 } 724 if (pr == NULL) { 725 /* Update: jid must exist. */ 726 if (cuflags == JAIL_UPDATE) { 727 error = ENOENT; 728 vfs_opterror(opts, "jail %d not found", jid); 729 goto done_unlock_list; 730 } 731 } 732 } 733 /* 734 * If the caller provided a name, look for a jail by that name. 735 * This has different semantics for creates and updates keyed by jid 736 * (where the name must not already exist in a different jail), 737 * and updates keyed by the name itself (where the name must exist 738 * because that is the jail being updated). 739 */ 740 if (name != NULL) { 741 if (name[0] != '\0') { 742 deadpr = NULL; 743 name_again: 744 TAILQ_FOREACH(tpr, &allprison, pr_list) { 745 if (tpr != pr && tpr->pr_ref > 0 && 746 !strcmp(tpr->pr_name, name)) { 747 if (pr == NULL && 748 cuflags != JAIL_CREATE) { 749 mtx_lock(&tpr->pr_mtx); 750 if (tpr->pr_ref > 0) { 751 /* 752 * Use this jail 753 * for updates. 754 */ 755 if (tpr->pr_uref > 0) { 756 pr = tpr; 757 break; 758 } 759 deadpr = tpr; 760 } 761 mtx_unlock(&tpr->pr_mtx); 762 } else if (tpr->pr_uref > 0) { 763 /* 764 * Create, or update(jid): 765 * name must not exist in an 766 * active jail. 767 */ 768 error = EEXIST; 769 if (pr != NULL) 770 mtx_unlock(&pr->pr_mtx); 771 vfs_opterror(opts, 772 "jail \"%s\" already exists", 773 name); 774 goto done_unlock_list; 775 } 776 } 777 } 778 /* If no active jail is found, use a dying one. */ 779 if (deadpr != NULL && pr == NULL) { 780 if (flags & JAIL_DYING) { 781 mtx_lock(&deadpr->pr_mtx); 782 if (deadpr->pr_ref == 0) { 783 mtx_unlock(&deadpr->pr_mtx); 784 goto name_again; 785 } 786 pr = deadpr; 787 } else if (cuflags == JAIL_UPDATE) { 788 error = ENOENT; 789 vfs_opterror(opts, 790 "jail \"%s\" is dying", name); 791 goto done_unlock_list; 792 } 793 } 794 /* Update: name must exist if no jid. */ 795 else if (cuflags == JAIL_UPDATE && pr == NULL) { 796 error = ENOENT; 797 vfs_opterror(opts, "jail \"%s\" not found", 798 name); 799 goto done_unlock_list; 800 } 801 } 802 } 803 /* Update: must provide a jid or name. */ 804 else if (cuflags == JAIL_UPDATE && pr == NULL) { 805 error = ENOENT; 806 vfs_opterror(opts, "update specified no jail"); 807 goto done_unlock_list; 808 } 809 810 /* If there's no prison to update, create a new one and link it in. */ 811 if (pr == NULL) { 812 created = 1; 813 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 814 if (jid == 0) { 815 /* Find the next free jid. */ 816 jid = lastprid + 1; 817 findnext: 818 if (jid == JAIL_MAX) 819 jid = 1; 820 TAILQ_FOREACH(tpr, &allprison, pr_list) { 821 if (tpr->pr_id < jid) 822 continue; 823 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 824 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 825 break; 826 } 827 if (jid == lastprid) { 828 error = EAGAIN; 829 vfs_opterror(opts, 830 "no available jail IDs"); 831 free(pr, M_PRISON); 832 goto done_unlock_list; 833 } 834 jid++; 835 goto findnext; 836 } 837 lastprid = jid; 838 } else { 839 /* 840 * The jail already has a jid (that did not yet exist), 841 * so just find where to insert it. 842 */ 843 TAILQ_FOREACH(tpr, &allprison, pr_list) 844 if (tpr->pr_id >= jid) { 845 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 846 break; 847 } 848 } 849 if (tpr == NULL) 850 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 851 prisoncount++; 852 853 pr->pr_id = jid; 854 if (name == NULL) 855 name = ""; 856 if (path == NULL) { 857 path = "/"; 858 root = rootvnode; 859 vref(root); 860 } 861 862 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 863 864 /* 865 * Allocate a dedicated cpuset for each jail. 866 * Unlike other initial settings, this may return an erorr. 867 */ 868 error = cpuset_create_root(td, &pr->pr_cpuset); 869 if (error) { 870 prison_deref(pr, PD_LIST_XLOCKED); 871 goto done_releroot; 872 } 873 874 mtx_lock(&pr->pr_mtx); 875 /* 876 * New prisons do not yet have a reference, because we do not 877 * want other to see the incomplete prison once the 878 * allprison_lock is downgraded. 879 */ 880 } else { 881 created = 0; 882 /* 883 * Grab a reference for existing prisons, to ensure they 884 * continue to exist for the duration of the call. 885 */ 886 pr->pr_ref++; 887 } 888 889 /* Do final error checking before setting anything. */ 890 error = 0; 891#if defined(INET) || defined(INET6) 892 if ( 893#ifdef INET 894 ip4s > 0 895#ifdef INET6 896 || 897#endif 898#endif 899#ifdef INET6 900 ip6s > 0 901#endif 902 ) 903 /* 904 * Check for conflicting IP addresses. We permit them if there 905 * is no more than 1 IP on each jail. If there is a duplicate 906 * on a jail with more than one IP stop checking and return 907 * error. 908 */ 909 TAILQ_FOREACH(tpr, &allprison, pr_list) { 910 if (tpr == pr || tpr->pr_uref == 0) 911 continue; 912#ifdef INET 913 if ((ip4s > 0 && tpr->pr_ip4s > 1) || 914 (ip4s > 1 && tpr->pr_ip4s > 0)) 915 for (ii = 0; ii < ip4s; ii++) 916 if (_prison_check_ip4(tpr, 917 &ip4[ii]) == 0) { 918 error = EINVAL; 919 vfs_opterror(opts, 920 "IPv4 addresses clash"); 921 goto done_deref_locked; 922 } 923#endif 924#ifdef INET6 925 if ((ip6s > 0 && tpr->pr_ip6s > 1) || 926 (ip6s > 1 && tpr->pr_ip6s > 0)) 927 for (ii = 0; ii < ip6s; ii++) 928 if (_prison_check_ip6(tpr, 929 &ip6[ii]) == 0) { 930 error = EINVAL; 931 vfs_opterror(opts, 932 "IPv6 addresses clash"); 933 goto done_deref_locked; 934 } 935#endif 936 } 937#endif 938 if (error == 0 && name != NULL) { 939 /* Give a default name of the jid. */ 940 if (name[0] == '\0') 941 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 942 else if (strtoul(name, &p, 10) != jid && *p == '\0') { 943 error = EINVAL; 944 vfs_opterror(opts, "name cannot be numeric"); 945 } 946 } 947 if (error) { 948 done_deref_locked: 949 /* 950 * Some parameter had an error so do not set anything. 951 * If this is a new jail, it will go away without ever 952 * having been seen. 953 */ 954 prison_deref(pr, created 955 ? PD_LOCKED | PD_LIST_XLOCKED 956 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 957 goto done_releroot; 958 } 959 960 /* Set the parameters of the prison. */ 961#ifdef INET 962 if (ip4s >= 0) { 963 pr->pr_ip4s = ip4s; 964 free(pr->pr_ip4, M_PRISON); 965 pr->pr_ip4 = ip4; 966 ip4 = NULL; 967 } 968#endif 969#ifdef INET6 970 if (ip6s >= 0) { 971 pr->pr_ip6s = ip6s; 972 free(pr->pr_ip6, M_PRISON); 973 pr->pr_ip6 = ip6; 974 ip6 = NULL; 975 } 976#endif 977 if (gotslevel) 978 pr->pr_securelevel = slevel; 979 if (name != NULL) 980 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 981 if (path != NULL) { 982 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 983 pr->pr_root = root; 984 } 985 if (host != NULL) 986 strlcpy(pr->pr_host, host, sizeof(pr->pr_host)); 987 /* 988 * Persistent prisons get an extra reference, and prisons losing their 989 * persist flag lose that reference. Only do this for existing prisons 990 * for now, so new ones will remain unseen until after the module 991 * handlers have completed. 992 */ 993 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 994 if (pr_flags & PR_PERSIST) { 995 pr->pr_ref++; 996 pr->pr_uref++; 997 } else { 998 pr->pr_ref--; 999 pr->pr_uref--; 1000 } 1001 } 1002 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1003 mtx_unlock(&pr->pr_mtx); 1004 1005 /* Let the modules do their work. */ 1006 sx_downgrade(&allprison_lock); 1007 if (created) { 1008 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1009 if (error) { 1010 prison_deref(pr, PD_LIST_SLOCKED); 1011 goto done_errmsg; 1012 } 1013 } 1014 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1015 if (error) { 1016 prison_deref(pr, created 1017 ? PD_LIST_SLOCKED 1018 : PD_DEREF | PD_LIST_SLOCKED); 1019 goto done_errmsg; 1020 } 1021 1022 /* Attach this process to the prison if requested. */ 1023 if (flags & JAIL_ATTACH) { 1024 mtx_lock(&pr->pr_mtx); 1025 error = do_jail_attach(td, pr); 1026 if (error) { 1027 vfs_opterror(opts, "attach failed"); 1028 if (!created) 1029 prison_deref(pr, PD_DEREF); 1030 goto done_errmsg; 1031 } 1032 } 1033 1034 /* 1035 * Now that it is all there, drop the temporary reference from existing 1036 * prisons. Or add a reference to newly created persistent prisons 1037 * (which was not done earlier so that the prison would not be publicly 1038 * visible). 1039 */ 1040 if (!created) { 1041 prison_deref(pr, (flags & JAIL_ATTACH) 1042 ? PD_DEREF 1043 : PD_DEREF | PD_LIST_SLOCKED); 1044 } else { 1045 if (pr_flags & PR_PERSIST) { 1046 mtx_lock(&pr->pr_mtx); 1047 pr->pr_ref++; 1048 pr->pr_uref++; 1049 mtx_unlock(&pr->pr_mtx); 1050 } 1051 if (!(flags & JAIL_ATTACH)) 1052 sx_sunlock(&allprison_lock); 1053 } 1054 td->td_retval[0] = pr->pr_id; 1055 goto done_errmsg; 1056 1057 done_unlock_list: 1058 sx_xunlock(&allprison_lock); 1059 done_releroot: 1060 if (root != NULL) { 1061 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1062 vrele(root); 1063 VFS_UNLOCK_GIANT(vfslocked); 1064 } 1065 done_errmsg: 1066 if (error) { 1067 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1068 if (errmsg_len > 0) { 1069 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1070 if (errmsg_pos > 0) { 1071 if (optuio->uio_segflg == UIO_SYSSPACE) 1072 bcopy(errmsg, 1073 optuio->uio_iov[errmsg_pos].iov_base, 1074 errmsg_len); 1075 else 1076 copyout(errmsg, 1077 optuio->uio_iov[errmsg_pos].iov_base, 1078 errmsg_len); 1079 } 1080 } 1081 } 1082 done_free: 1083#ifdef INET 1084 free(ip4, M_PRISON); 1085#endif 1086#ifdef INET6 1087 free(ip6, M_PRISON); 1088#endif 1089 vfs_freeopts(opts); 1090 return (error); 1091} 1092 1093/* 1094 * Sysctl nodes to describe jail parameters. Maximum length of string 1095 * parameters is returned in the string itself, and the other parameters 1096 * exist merely to make themselves and their types known. 1097 */ 1098SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 1099 "Jail parameters"); 1100 1101int 1102sysctl_jail_param(SYSCTL_HANDLER_ARGS) 1103{ 1104 int i; 1105 long l; 1106 size_t s; 1107 char numbuf[12]; 1108 1109 switch (oidp->oid_kind & CTLTYPE) 1110 { 1111 case CTLTYPE_LONG: 1112 case CTLTYPE_ULONG: 1113 l = 0; 1114#ifdef SCTL_MASK32 1115 if (!(req->flags & SCTL_MASK32)) 1116#endif 1117 return (SYSCTL_OUT(req, &l, sizeof(l))); 1118 case CTLTYPE_INT: 1119 case CTLTYPE_UINT: 1120 i = 0; 1121 return (SYSCTL_OUT(req, &i, sizeof(i))); 1122 case CTLTYPE_STRING: 1123 snprintf(numbuf, sizeof(numbuf), "%d", arg2); 1124 return 1125 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 1126 case CTLTYPE_STRUCT: 1127 s = (size_t)arg2; 1128 return (SYSCTL_OUT(req, &s, sizeof(s))); 1129 } 1130 return (0); 1131} 1132 1133SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail ID"); 1134SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 1135SYSCTL_JAIL_PARAM(, cpuset, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 1136SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RD, MAXPATHLEN, "Jail root path"); 1137SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 1138 "I", "Jail secure level"); 1139SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 1140 "B", "Jail persistence"); 1141SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 1142 "B", "Jail is in the process of shutting down"); 1143 1144SYSCTL_JAIL_PARAM_NODE(host, "Jail host info"); 1145SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 1146 "Jail hostname"); 1147 1148#ifdef INET 1149SYSCTL_JAIL_PARAM_NODE(ip4, "Jail IPv4 address virtualization"); 1150SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 1151 "S,in_addr,a", "Jail IPv4 addresses"); 1152#endif 1153#ifdef INET6 1154SYSCTL_JAIL_PARAM_NODE(ip6, "Jail IPv6 address virtualization"); 1155SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 1156 "S,in6_addr,a", "Jail IPv6 addresses"); 1157#endif 1158 1159 1160/* 1161 * struct jail_get_args { 1162 * struct iovec *iovp; 1163 * unsigned int iovcnt; 1164 * int flags; 1165 * }; 1166 */ 1167int 1168jail_get(struct thread *td, struct jail_get_args *uap) 1169{ 1170 struct uio *auio; 1171 int error; 1172 1173 /* Check that we have an even number of iovecs. */ 1174 if (uap->iovcnt & 1) 1175 return (EINVAL); 1176 1177 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1178 if (error) 1179 return (error); 1180 error = kern_jail_get(td, auio, uap->flags); 1181 if (error == 0) 1182 error = copyout(auio->uio_iov, uap->iovp, 1183 uap->iovcnt * sizeof (struct iovec)); 1184 free(auio, M_IOV); 1185 return (error); 1186} 1187 1188int 1189kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1190{ 1191 struct prison *pr; 1192 struct vfsopt *opt; 1193 struct vfsoptlist *opts; 1194 char *errmsg, *name; 1195 int error, errmsg_len, errmsg_pos, i, jid, len, locked, pos; 1196 1197 if (flags & ~JAIL_GET_MASK) 1198 return (EINVAL); 1199 if (jailed(td->td_ucred)) { 1200 /* 1201 * Don't allow a jailed process to see any jails, 1202 * not even its own. 1203 */ 1204 vfs_opterror(opts, "jail not found"); 1205 return (ENOENT); 1206 } 1207 1208 /* Get the parameter list. */ 1209 error = vfs_buildopts(optuio, &opts); 1210 if (error) 1211 return (error); 1212 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1213 1214 /* 1215 * Find the prison specified by one of: lastjid, jid, name. 1216 */ 1217 sx_slock(&allprison_lock); 1218 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1219 if (error == 0) { 1220 TAILQ_FOREACH(pr, &allprison, pr_list) { 1221 if (pr->pr_id > jid) { 1222 mtx_lock(&pr->pr_mtx); 1223 if (pr->pr_ref > 0 && 1224 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1225 break; 1226 mtx_unlock(&pr->pr_mtx); 1227 } 1228 } 1229 if (pr != NULL) 1230 goto found_prison; 1231 error = ENOENT; 1232 vfs_opterror(opts, "no jail after %d", jid); 1233 goto done_unlock_list; 1234 } else if (error != ENOENT) 1235 goto done_unlock_list; 1236 1237 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1238 if (error == 0) { 1239 if (jid != 0) { 1240 pr = prison_find(jid); 1241 if (pr != NULL) { 1242 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1243 mtx_unlock(&pr->pr_mtx); 1244 error = ENOENT; 1245 vfs_opterror(opts, "jail %d is dying", 1246 jid); 1247 goto done_unlock_list; 1248 } 1249 goto found_prison; 1250 } 1251 error = ENOENT; 1252 vfs_opterror(opts, "jail %d not found", jid); 1253 goto done_unlock_list; 1254 } 1255 } else if (error != ENOENT) 1256 goto done_unlock_list; 1257 1258 error = vfs_getopt(opts, "name", (void **)&name, &len); 1259 if (error == 0) { 1260 if (len == 0 || name[len - 1] != '\0') { 1261 error = EINVAL; 1262 goto done_unlock_list; 1263 } 1264 pr = prison_find_name(name); 1265 if (pr != NULL) { 1266 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1267 mtx_unlock(&pr->pr_mtx); 1268 error = ENOENT; 1269 vfs_opterror(opts, "jail \"%s\" is dying", 1270 name); 1271 goto done_unlock_list; 1272 } 1273 goto found_prison; 1274 } 1275 error = ENOENT; 1276 vfs_opterror(opts, "jail \"%s\" not found", name); 1277 goto done_unlock_list; 1278 } else if (error != ENOENT) 1279 goto done_unlock_list; 1280 1281 vfs_opterror(opts, "no jail specified"); 1282 error = ENOENT; 1283 goto done_unlock_list; 1284 1285 found_prison: 1286 /* Get the parameters of the prison. */ 1287 pr->pr_ref++; 1288 locked = PD_LOCKED; 1289 td->td_retval[0] = pr->pr_id; 1290 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 1291 if (error != 0 && error != ENOENT) 1292 goto done_deref; 1293 error = vfs_setopts(opts, "name", pr->pr_name); 1294 if (error != 0 && error != ENOENT) 1295 goto done_deref; 1296 error = vfs_setopt(opts, "cpuset", &pr->pr_cpuset->cs_id, 1297 sizeof(pr->pr_cpuset->cs_id)); 1298 if (error != 0 && error != ENOENT) 1299 goto done_deref; 1300 error = vfs_setopts(opts, "path", pr->pr_path); 1301 if (error != 0 && error != ENOENT) 1302 goto done_deref; 1303#ifdef INET 1304 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 1305 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1306 if (error != 0 && error != ENOENT) 1307 goto done_deref; 1308#endif 1309#ifdef INET6 1310 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 1311 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1312 if (error != 0 && error != ENOENT) 1313 goto done_deref; 1314#endif 1315 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 1316 sizeof(pr->pr_securelevel)); 1317 if (error != 0 && error != ENOENT) 1318 goto done_deref; 1319 error = vfs_setopts(opts, "host.hostname", pr->pr_host); 1320 if (error != 0 && error != ENOENT) 1321 goto done_deref; 1322 i = pr->pr_flags & PR_PERSIST ? 1 : 0; 1323 error = vfs_setopt(opts, "persist", &i, sizeof(i)); 1324 if (error != 0 && error != ENOENT) 1325 goto done_deref; 1326 i = !i; 1327 error = vfs_setopt(opts, "nopersist", &i, sizeof(i)); 1328 if (error != 0 && error != ENOENT) 1329 goto done_deref; 1330 i = (pr->pr_uref == 0); 1331 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 1332 if (error != 0 && error != ENOENT) 1333 goto done_deref; 1334 i = !i; 1335 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 1336 if (error != 0 && error != ENOENT) 1337 goto done_deref; 1338 1339 /* Get the module parameters. */ 1340 mtx_unlock(&pr->pr_mtx); 1341 locked = 0; 1342 error = osd_jail_call(pr, PR_METHOD_GET, opts); 1343 if (error) 1344 goto done_deref; 1345 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 1346 1347 /* By now, all parameters should have been noted. */ 1348 TAILQ_FOREACH(opt, opts, link) { 1349 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1350 error = EINVAL; 1351 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1352 goto done_errmsg; 1353 } 1354 } 1355 1356 /* Write the fetched parameters back to userspace. */ 1357 error = 0; 1358 TAILQ_FOREACH(opt, opts, link) { 1359 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 1360 pos = 2 * opt->pos + 1; 1361 optuio->uio_iov[pos].iov_len = opt->len; 1362 if (opt->value != NULL) { 1363 if (optuio->uio_segflg == UIO_SYSSPACE) { 1364 bcopy(opt->value, 1365 optuio->uio_iov[pos].iov_base, 1366 opt->len); 1367 } else { 1368 error = copyout(opt->value, 1369 optuio->uio_iov[pos].iov_base, 1370 opt->len); 1371 if (error) 1372 break; 1373 } 1374 } 1375 } 1376 } 1377 goto done_errmsg; 1378 1379 done_deref: 1380 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 1381 goto done_errmsg; 1382 1383 done_unlock_list: 1384 sx_sunlock(&allprison_lock); 1385 done_errmsg: 1386 if (error && errmsg_pos >= 0) { 1387 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1388 errmsg_pos = 2 * errmsg_pos + 1; 1389 if (errmsg_len > 0) { 1390 if (optuio->uio_segflg == UIO_SYSSPACE) 1391 bcopy(errmsg, 1392 optuio->uio_iov[errmsg_pos].iov_base, 1393 errmsg_len); 1394 else 1395 copyout(errmsg, 1396 optuio->uio_iov[errmsg_pos].iov_base, 1397 errmsg_len); 1398 } 1399 } 1400 vfs_freeopts(opts); 1401 return (error); 1402} 1403 1404/* 1405 * struct jail_remove_args { 1406 * int jid; 1407 * }; 1408 */ 1409int 1410jail_remove(struct thread *td, struct jail_remove_args *uap) 1411{ 1412 struct prison *pr; 1413 struct proc *p; 1414 int deuref, error; 1415 1416 error = priv_check(td, PRIV_JAIL_REMOVE); 1417 if (error) 1418 return (error); 1419 1420 sx_xlock(&allprison_lock); 1421 pr = prison_find(uap->jid); 1422 if (pr == NULL) { 1423 sx_xunlock(&allprison_lock); 1424 return (EINVAL); 1425 } 1426 1427 /* If the prison was persistent, it is not anymore. */ 1428 deuref = 0; 1429 if (pr->pr_flags & PR_PERSIST) { 1430 pr->pr_ref--; 1431 deuref = PD_DEUREF; 1432 pr->pr_flags &= ~PR_PERSIST; 1433 } 1434 1435 /* If there are no references left, remove the prison now. */ 1436 if (pr->pr_ref == 0) { 1437 prison_deref(pr, 1438 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1439 return (0); 1440 } 1441 1442 /* 1443 * Keep a temporary reference to make sure this prison sticks around. 1444 */ 1445 pr->pr_ref++; 1446 mtx_unlock(&pr->pr_mtx); 1447 sx_xunlock(&allprison_lock); 1448 /* 1449 * Kill all processes unfortunate enough to be attached to this prison. 1450 */ 1451 sx_slock(&allproc_lock); 1452 LIST_FOREACH(p, &allproc, p_list) { 1453 PROC_LOCK(p); 1454 if (p->p_state != PRS_NEW && p->p_ucred && 1455 p->p_ucred->cr_prison == pr) 1456 psignal(p, SIGKILL); 1457 PROC_UNLOCK(p); 1458 } 1459 sx_sunlock(&allproc_lock); 1460 /* Remove the temporary reference. */ 1461 prison_deref(pr, deuref | PD_DEREF); 1462 return (0); 1463} 1464 1465 1466/* 1467 * struct jail_attach_args { 1468 * int jid; 1469 * }; 1470 */ 1471int 1472jail_attach(struct thread *td, struct jail_attach_args *uap) 1473{ 1474 struct prison *pr; 1475 int error; 1476 1477 error = priv_check(td, PRIV_JAIL_ATTACH); 1478 if (error) 1479 return (error); 1480 1481 sx_slock(&allprison_lock); 1482 pr = prison_find(uap->jid); 1483 if (pr == NULL) { 1484 sx_sunlock(&allprison_lock); 1485 return (EINVAL); 1486 } 1487 1488 /* 1489 * Do not allow a process to attach to a prison that is not 1490 * considered to be "alive". 1491 */ 1492 if (pr->pr_uref == 0) { 1493 mtx_unlock(&pr->pr_mtx); 1494 sx_sunlock(&allprison_lock); 1495 return (EINVAL); 1496 } 1497 1498 return (do_jail_attach(td, pr)); 1499} 1500 1501static int 1502do_jail_attach(struct thread *td, struct prison *pr) 1503{ 1504 struct proc *p; 1505 struct ucred *newcred, *oldcred; 1506 int vfslocked, error; 1507 1508 /* 1509 * XXX: Note that there is a slight race here if two threads 1510 * in the same privileged process attempt to attach to two 1511 * different jails at the same time. It is important for 1512 * user processes not to do this, or they might end up with 1513 * a process root from one prison, but attached to the jail 1514 * of another. 1515 */ 1516 pr->pr_ref++; 1517 pr->pr_uref++; 1518 mtx_unlock(&pr->pr_mtx); 1519 1520 /* Let modules do whatever they need to prepare for attaching. */ 1521 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 1522 if (error) { 1523 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 1524 return (error); 1525 } 1526 sx_sunlock(&allprison_lock); 1527 1528 /* 1529 * Reparent the newly attached process to this jail. 1530 */ 1531 p = td->td_proc; 1532 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 1533 if (error) 1534 goto e_revert_osd; 1535 1536 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 1537 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 1538 if ((error = change_dir(pr->pr_root, td)) != 0) 1539 goto e_unlock; 1540#ifdef MAC 1541 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 1542 goto e_unlock; 1543#endif 1544 VOP_UNLOCK(pr->pr_root, 0); 1545 if ((error = change_root(pr->pr_root, td))) 1546 goto e_unlock_giant; 1547 VFS_UNLOCK_GIANT(vfslocked); 1548 1549 newcred = crget(); 1550 PROC_LOCK(p); 1551 oldcred = p->p_ucred; 1552 setsugid(p); 1553 crcopy(newcred, oldcred); 1554 newcred->cr_prison = pr; 1555 p->p_ucred = newcred; 1556 PROC_UNLOCK(p); 1557 crfree(oldcred); 1558 return (0); 1559 e_unlock: 1560 VOP_UNLOCK(pr->pr_root, 0); 1561 e_unlock_giant: 1562 VFS_UNLOCK_GIANT(vfslocked); 1563 e_revert_osd: 1564 /* Tell modules this thread is still in its old jail after all. */ 1565 (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td); 1566 prison_deref(pr, PD_DEREF | PD_DEUREF); 1567 return (error); 1568} 1569 1570/* 1571 * Returns a locked prison instance, or NULL on failure. 1572 */ 1573struct prison * 1574prison_find(int prid) 1575{ 1576 struct prison *pr; 1577 1578 sx_assert(&allprison_lock, SX_LOCKED); 1579 TAILQ_FOREACH(pr, &allprison, pr_list) { 1580 if (pr->pr_id == prid) { 1581 mtx_lock(&pr->pr_mtx); 1582 if (pr->pr_ref > 0) 1583 return (pr); 1584 mtx_unlock(&pr->pr_mtx); 1585 } 1586 } 1587 return (NULL); 1588} 1589 1590/* 1591 * Look for the named prison. Returns a locked prison or NULL. 1592 */ 1593struct prison * 1594prison_find_name(const char *name) 1595{ 1596 struct prison *pr, *deadpr; 1597 1598 sx_assert(&allprison_lock, SX_LOCKED); 1599 again: 1600 deadpr = NULL; 1601 TAILQ_FOREACH(pr, &allprison, pr_list) { 1602 if (!strcmp(pr->pr_name, name)) { 1603 mtx_lock(&pr->pr_mtx); 1604 if (pr->pr_ref > 0) { 1605 if (pr->pr_uref > 0) 1606 return (pr); 1607 deadpr = pr; 1608 } 1609 mtx_unlock(&pr->pr_mtx); 1610 } 1611 } 1612 /* There was no valid prison - perhaps there was a dying one */ 1613 if (deadpr != NULL) { 1614 mtx_lock(&deadpr->pr_mtx); 1615 if (deadpr->pr_ref == 0) { 1616 mtx_unlock(&deadpr->pr_mtx); 1617 goto again; 1618 } 1619 } 1620 return (deadpr); 1621} 1622 1623/* 1624 * Remove a prison reference. If that was the last reference, remove the 1625 * prison itself - but not in this context in case there are locks held. 1626 */ 1627void 1628prison_free_locked(struct prison *pr) 1629{ 1630 1631 mtx_assert(&pr->pr_mtx, MA_OWNED); 1632 pr->pr_ref--; 1633 if (pr->pr_ref == 0) { 1634 mtx_unlock(&pr->pr_mtx); 1635 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 1636 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 1637 return; 1638 } 1639 mtx_unlock(&pr->pr_mtx); 1640} 1641 1642void 1643prison_free(struct prison *pr) 1644{ 1645 1646 mtx_lock(&pr->pr_mtx); 1647 prison_free_locked(pr); 1648} 1649 1650static void 1651prison_complete(void *context, int pending) 1652{ 1653 1654 prison_deref((struct prison *)context, 0); 1655} 1656 1657/* 1658 * Remove a prison reference (usually). This internal version assumes no 1659 * mutexes are held, except perhaps the prison itself. If there are no more 1660 * references, release and delist the prison. On completion, the prison lock 1661 * and the allprison lock are both unlocked. 1662 */ 1663static void 1664prison_deref(struct prison *pr, int flags) 1665{ 1666 int vfslocked; 1667 1668 if (!(flags & PD_LOCKED)) 1669 mtx_lock(&pr->pr_mtx); 1670 if (flags & PD_DEUREF) { 1671 pr->pr_uref--; 1672 /* Done if there were only user references to remove. */ 1673 if (!(flags & PD_DEREF)) { 1674 mtx_unlock(&pr->pr_mtx); 1675 if (flags & PD_LIST_SLOCKED) 1676 sx_sunlock(&allprison_lock); 1677 else if (flags & PD_LIST_XLOCKED) 1678 sx_xunlock(&allprison_lock); 1679 return; 1680 } 1681 } 1682 if (flags & PD_DEREF) 1683 pr->pr_ref--; 1684 /* If the prison still has references, nothing else to do. */ 1685 if (pr->pr_ref > 0) { 1686 mtx_unlock(&pr->pr_mtx); 1687 if (flags & PD_LIST_SLOCKED) 1688 sx_sunlock(&allprison_lock); 1689 else if (flags & PD_LIST_XLOCKED) 1690 sx_xunlock(&allprison_lock); 1691 return; 1692 } 1693 1694 KASSERT(pr->pr_uref == 0, 1695 ("%s: Trying to remove an active prison (jid=%d).", __func__, 1696 pr->pr_id)); 1697 mtx_unlock(&pr->pr_mtx); 1698 if (flags & PD_LIST_SLOCKED) { 1699 if (!sx_try_upgrade(&allprison_lock)) { 1700 sx_sunlock(&allprison_lock); 1701 sx_xlock(&allprison_lock); 1702 } 1703 } else if (!(flags & PD_LIST_XLOCKED)) 1704 sx_xlock(&allprison_lock); 1705 1706 TAILQ_REMOVE(&allprison, pr, pr_list); 1707 prisoncount--; 1708 sx_xunlock(&allprison_lock); 1709 1710 if (pr->pr_root != NULL) { 1711 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 1712 vrele(pr->pr_root); 1713 VFS_UNLOCK_GIANT(vfslocked); 1714 } 1715 mtx_destroy(&pr->pr_mtx); 1716#ifdef INET 1717 free(pr->pr_ip4, M_PRISON); 1718#endif 1719#ifdef INET6 1720 free(pr->pr_ip6, M_PRISON); 1721#endif 1722 if (pr->pr_cpuset != NULL) 1723 cpuset_rel(pr->pr_cpuset); 1724 osd_jail_exit(pr); 1725 free(pr, M_PRISON); 1726} 1727 1728void 1729prison_hold_locked(struct prison *pr) 1730{ 1731 1732 mtx_assert(&pr->pr_mtx, MA_OWNED); 1733 KASSERT(pr->pr_ref > 0, 1734 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 1735 pr->pr_ref++; 1736} 1737 1738void 1739prison_hold(struct prison *pr) 1740{ 1741 1742 mtx_lock(&pr->pr_mtx); 1743 prison_hold_locked(pr); 1744 mtx_unlock(&pr->pr_mtx); 1745} 1746 1747void 1748prison_proc_hold(struct prison *pr) 1749{ 1750 1751 mtx_lock(&pr->pr_mtx); 1752 KASSERT(pr->pr_uref > 0, 1753 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 1754 pr->pr_uref++; 1755 mtx_unlock(&pr->pr_mtx); 1756} 1757 1758void 1759prison_proc_free(struct prison *pr) 1760{ 1761 1762 mtx_lock(&pr->pr_mtx); 1763 KASSERT(pr->pr_uref > 0, 1764 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 1765 prison_deref(pr, PD_DEUREF | PD_LOCKED); 1766} 1767 1768 1769#ifdef INET 1770/* 1771 * Pass back primary IPv4 address of this jail. 1772 * 1773 * If not jailed return success but do not alter the address. Caller has to 1774 * make sure to initialize it correctly (e.g. INADDR_ANY). 1775 * 1776 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 1777 * Address returned in NBO. 1778 */ 1779int 1780prison_get_ip4(struct ucred *cred, struct in_addr *ia) 1781{ 1782 struct prison *pr; 1783 1784 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1785 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 1786 1787 if (!jailed(cred)) 1788 return (0); 1789 pr = cred->cr_prison; 1790 mtx_lock(&pr->pr_mtx); 1791 if (pr->pr_ip4 == NULL) { 1792 mtx_unlock(&pr->pr_mtx); 1793 return (EAFNOSUPPORT); 1794 } 1795 1796 ia->s_addr = pr->pr_ip4[0].s_addr; 1797 mtx_unlock(&pr->pr_mtx); 1798 return (0); 1799} 1800 1801/* 1802 * Make sure our (source) address is set to something meaningful to this 1803 * jail. 1804 * 1805 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if 1806 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4. 1807 * Address passed in in NBO and returned in NBO. 1808 */ 1809int 1810prison_local_ip4(struct ucred *cred, struct in_addr *ia) 1811{ 1812 struct prison *pr; 1813 struct in_addr ia0; 1814 int error; 1815 1816 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1817 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 1818 1819 if (!jailed(cred)) 1820 return (0); 1821 pr = cred->cr_prison; 1822 mtx_lock(&pr->pr_mtx); 1823 if (pr->pr_ip4 == NULL) { 1824 mtx_unlock(&pr->pr_mtx); 1825 return (EAFNOSUPPORT); 1826 } 1827 1828 ia0.s_addr = ntohl(ia->s_addr); 1829 if (ia0.s_addr == INADDR_LOOPBACK) { 1830 ia->s_addr = pr->pr_ip4[0].s_addr; 1831 mtx_unlock(&pr->pr_mtx); 1832 return (0); 1833 } 1834 1835 if (ia0.s_addr == INADDR_ANY) { 1836 /* 1837 * In case there is only 1 IPv4 address, bind directly. 1838 */ 1839 if (pr->pr_ip4s == 1) 1840 ia->s_addr = pr->pr_ip4[0].s_addr; 1841 mtx_unlock(&pr->pr_mtx); 1842 return (0); 1843 } 1844 1845 error = _prison_check_ip4(pr, ia); 1846 mtx_unlock(&pr->pr_mtx); 1847 return (error); 1848} 1849 1850/* 1851 * Rewrite destination address in case we will connect to loopback address. 1852 * 1853 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 1854 * Address passed in in NBO and returned in NBO. 1855 */ 1856int 1857prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 1858{ 1859 struct prison *pr; 1860 1861 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1862 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 1863 1864 if (!jailed(cred)) 1865 return (0); 1866 pr = cred->cr_prison; 1867 mtx_lock(&pr->pr_mtx); 1868 if (pr->pr_ip4 == NULL) { 1869 mtx_unlock(&pr->pr_mtx); 1870 return (EAFNOSUPPORT); 1871 } 1872 1873 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 1874 ia->s_addr = pr->pr_ip4[0].s_addr; 1875 mtx_unlock(&pr->pr_mtx); 1876 return (0); 1877 } 1878 1879 /* 1880 * Return success because nothing had to be changed. 1881 */ 1882 mtx_unlock(&pr->pr_mtx); 1883 return (0); 1884} 1885 1886/* 1887 * Check if given address belongs to the jail referenced by cred/prison. 1888 * 1889 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if 1890 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4. 1891 * Address passed in in NBO. 1892 */ 1893static int 1894_prison_check_ip4(struct prison *pr, struct in_addr *ia) 1895{ 1896 int i, a, z, d; 1897 1898 /* 1899 * Check the primary IP. 1900 */ 1901 if (pr->pr_ip4[0].s_addr == ia->s_addr) 1902 return (0); 1903 1904 /* 1905 * All the other IPs are sorted so we can do a binary search. 1906 */ 1907 a = 0; 1908 z = pr->pr_ip4s - 2; 1909 while (a <= z) { 1910 i = (a + z) / 2; 1911 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 1912 if (d > 0) 1913 z = i - 1; 1914 else if (d < 0) 1915 a = i + 1; 1916 else 1917 return (0); 1918 } 1919 1920 return (EADDRNOTAVAIL); 1921} 1922 1923int 1924prison_check_ip4(struct ucred *cred, struct in_addr *ia) 1925{ 1926 struct prison *pr; 1927 int error; 1928 1929 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1930 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 1931 1932 if (!jailed(cred)) 1933 return (0); 1934 pr = cred->cr_prison; 1935 mtx_lock(&pr->pr_mtx); 1936 if (pr->pr_ip4 == NULL) { 1937 mtx_unlock(&pr->pr_mtx); 1938 return (EAFNOSUPPORT); 1939 } 1940 1941 error = _prison_check_ip4(pr, ia); 1942 mtx_unlock(&pr->pr_mtx); 1943 return (error); 1944} 1945#endif 1946 1947#ifdef INET6 1948/* 1949 * Pass back primary IPv6 address for this jail. 1950 * 1951 * If not jailed return success but do not alter the address. Caller has to 1952 * make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 1953 * 1954 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 1955 */ 1956int 1957prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 1958{ 1959 struct prison *pr; 1960 1961 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1962 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 1963 1964 if (!jailed(cred)) 1965 return (0); 1966 pr = cred->cr_prison; 1967 mtx_lock(&pr->pr_mtx); 1968 if (pr->pr_ip6 == NULL) { 1969 mtx_unlock(&pr->pr_mtx); 1970 return (EAFNOSUPPORT); 1971 } 1972 1973 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 1974 mtx_unlock(&pr->pr_mtx); 1975 return (0); 1976} 1977 1978/* 1979 * Make sure our (source) address is set to something meaningful to this jail. 1980 * 1981 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 1982 * when needed while binding. 1983 * 1984 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if 1985 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6. 1986 */ 1987int 1988prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 1989{ 1990 struct prison *pr; 1991 int error; 1992 1993 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 1994 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 1995 1996 if (!jailed(cred)) 1997 return (0); 1998 pr = cred->cr_prison; 1999 mtx_lock(&pr->pr_mtx); 2000 if (pr->pr_ip6 == NULL) { 2001 mtx_unlock(&pr->pr_mtx); 2002 return (EAFNOSUPPORT); 2003 } 2004 2005 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2006 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2007 mtx_unlock(&pr->pr_mtx); 2008 return (0); 2009 } 2010 2011 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 2012 /* 2013 * In case there is only 1 IPv6 address, and v6only is true, 2014 * then bind directly. 2015 */ 2016 if (v6only != 0 && pr->pr_ip6s == 1) 2017 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2018 mtx_unlock(&pr->pr_mtx); 2019 return (0); 2020 } 2021 2022 error = _prison_check_ip6(pr, ia6); 2023 mtx_unlock(&pr->pr_mtx); 2024 return (error); 2025} 2026 2027/* 2028 * Rewrite destination address in case we will connect to loopback address. 2029 * 2030 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2031 */ 2032int 2033prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 2034{ 2035 struct prison *pr; 2036 2037 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2038 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2039 2040 if (!jailed(cred)) 2041 return (0); 2042 pr = cred->cr_prison; 2043 mtx_lock(&pr->pr_mtx); 2044 if (pr->pr_ip6 == NULL) { 2045 mtx_unlock(&pr->pr_mtx); 2046 return (EAFNOSUPPORT); 2047 } 2048 2049 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2050 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2051 mtx_unlock(&pr->pr_mtx); 2052 return (0); 2053 } 2054 2055 /* 2056 * Return success because nothing had to be changed. 2057 */ 2058 mtx_unlock(&pr->pr_mtx); 2059 return (0); 2060} 2061 2062/* 2063 * Check if given address belongs to the jail referenced by cred/prison. 2064 * 2065 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if 2066 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6. 2067 */ 2068static int 2069_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 2070{ 2071 int i, a, z, d; 2072 2073 /* 2074 * Check the primary IP. 2075 */ 2076 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 2077 return (0); 2078 2079 /* 2080 * All the other IPs are sorted so we can do a binary search. 2081 */ 2082 a = 0; 2083 z = pr->pr_ip6s - 2; 2084 while (a <= z) { 2085 i = (a + z) / 2; 2086 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 2087 if (d > 0) 2088 z = i - 1; 2089 else if (d < 0) 2090 a = i + 1; 2091 else 2092 return (0); 2093 } 2094 2095 return (EADDRNOTAVAIL); 2096} 2097 2098int 2099prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 2100{ 2101 struct prison *pr; 2102 int error; 2103 2104 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2105 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2106 2107 if (!jailed(cred)) 2108 return (0); 2109 pr = cred->cr_prison; 2110 mtx_lock(&pr->pr_mtx); 2111 if (pr->pr_ip6 == NULL) { 2112 mtx_unlock(&pr->pr_mtx); 2113 return (EAFNOSUPPORT); 2114 } 2115 2116 error = _prison_check_ip6(pr, ia6); 2117 mtx_unlock(&pr->pr_mtx); 2118 return (error); 2119} 2120#endif 2121 2122/* 2123 * Check if a jail supports the given address family. 2124 * 2125 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 2126 * if not. 2127 */ 2128int 2129prison_check_af(struct ucred *cred, int af) 2130{ 2131 int error; 2132 2133 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2134 2135 2136 if (!jailed(cred)) 2137 return (0); 2138 2139 error = 0; 2140 switch (af) 2141 { 2142#ifdef INET 2143 case AF_INET: 2144 if (cred->cr_prison->pr_ip4 == NULL) 2145 error = EAFNOSUPPORT; 2146 break; 2147#endif 2148#ifdef INET6 2149 case AF_INET6: 2150 if (cred->cr_prison->pr_ip6 == NULL) 2151 error = EAFNOSUPPORT; 2152 break; 2153#endif 2154 case AF_LOCAL: 2155 case AF_ROUTE: 2156 break; 2157 default: 2158 if (jail_socket_unixiproute_only) 2159 error = EAFNOSUPPORT; 2160 } 2161 return (error); 2162} 2163 2164/* 2165 * Check if given address belongs to the jail referenced by cred (wrapper to 2166 * prison_check_ip[46]). 2167 * 2168 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if 2169 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow 2170 * the address family. IPv4 Address passed in in NBO. 2171 */ 2172int 2173prison_if(struct ucred *cred, struct sockaddr *sa) 2174{ 2175#ifdef INET 2176 struct sockaddr_in *sai; 2177#endif 2178#ifdef INET6 2179 struct sockaddr_in6 *sai6; 2180#endif 2181 int error; 2182 2183 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2184 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 2185 2186 error = 0; 2187 switch (sa->sa_family) 2188 { 2189#ifdef INET 2190 case AF_INET: 2191 sai = (struct sockaddr_in *)sa; 2192 error = prison_check_ip4(cred, &sai->sin_addr); 2193 break; 2194#endif 2195#ifdef INET6 2196 case AF_INET6: 2197 sai6 = (struct sockaddr_in6 *)sa; 2198 error = prison_check_ip6(cred, &sai6->sin6_addr); 2199 break; 2200#endif 2201 default: 2202 if (jailed(cred) && jail_socket_unixiproute_only) 2203 error = EAFNOSUPPORT; 2204 } 2205 return (error); 2206} 2207 2208/* 2209 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 2210 */ 2211int 2212prison_check(struct ucred *cred1, struct ucred *cred2) 2213{ 2214 2215 if (jailed(cred1)) { 2216 if (!jailed(cred2)) 2217 return (ESRCH); 2218 if (cred2->cr_prison != cred1->cr_prison) 2219 return (ESRCH); 2220 } 2221 2222 return (0); 2223} 2224 2225/* 2226 * Return 1 if the passed credential is in a jail, otherwise 0. 2227 */ 2228int 2229jailed(struct ucred *cred) 2230{ 2231 2232 return (cred->cr_prison != NULL); 2233} 2234 2235/* 2236 * Return the correct hostname for the passed credential. 2237 */ 2238void 2239getcredhostname(struct ucred *cred, char *buf, size_t size) 2240{ 2241 INIT_VPROCG(cred->cr_vimage->v_procg); 2242 2243 if (jailed(cred)) { 2244 mtx_lock(&cred->cr_prison->pr_mtx); 2245 strlcpy(buf, cred->cr_prison->pr_host, size); 2246 mtx_unlock(&cred->cr_prison->pr_mtx); 2247 } else { 2248 mtx_lock(&hostname_mtx); 2249 strlcpy(buf, V_hostname, size); 2250 mtx_unlock(&hostname_mtx); 2251 } 2252} 2253 2254/* 2255 * Determine whether the subject represented by cred can "see" 2256 * status of a mount point. 2257 * Returns: 0 for permitted, ENOENT otherwise. 2258 * XXX: This function should be called cr_canseemount() and should be 2259 * placed in kern_prot.c. 2260 */ 2261int 2262prison_canseemount(struct ucred *cred, struct mount *mp) 2263{ 2264 struct prison *pr; 2265 struct statfs *sp; 2266 size_t len; 2267 2268 if (!jailed(cred) || jail_enforce_statfs == 0) 2269 return (0); 2270 pr = cred->cr_prison; 2271 if (pr->pr_root->v_mount == mp) 2272 return (0); 2273 if (jail_enforce_statfs == 2) 2274 return (ENOENT); 2275 /* 2276 * If jail's chroot directory is set to "/" we should be able to see 2277 * all mount-points from inside a jail. 2278 * This is ugly check, but this is the only situation when jail's 2279 * directory ends with '/'. 2280 */ 2281 if (strcmp(pr->pr_path, "/") == 0) 2282 return (0); 2283 len = strlen(pr->pr_path); 2284 sp = &mp->mnt_stat; 2285 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 2286 return (ENOENT); 2287 /* 2288 * Be sure that we don't have situation where jail's root directory 2289 * is "/some/path" and mount point is "/some/pathpath". 2290 */ 2291 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 2292 return (ENOENT); 2293 return (0); 2294} 2295 2296void 2297prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 2298{ 2299 char jpath[MAXPATHLEN]; 2300 struct prison *pr; 2301 size_t len; 2302 2303 if (!jailed(cred) || jail_enforce_statfs == 0) 2304 return; 2305 pr = cred->cr_prison; 2306 if (prison_canseemount(cred, mp) != 0) { 2307 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 2308 strlcpy(sp->f_mntonname, "[restricted]", 2309 sizeof(sp->f_mntonname)); 2310 return; 2311 } 2312 if (pr->pr_root->v_mount == mp) { 2313 /* 2314 * Clear current buffer data, so we are sure nothing from 2315 * the valid path left there. 2316 */ 2317 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 2318 *sp->f_mntonname = '/'; 2319 return; 2320 } 2321 /* 2322 * If jail's chroot directory is set to "/" we should be able to see 2323 * all mount-points from inside a jail. 2324 */ 2325 if (strcmp(pr->pr_path, "/") == 0) 2326 return; 2327 len = strlen(pr->pr_path); 2328 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 2329 /* 2330 * Clear current buffer data, so we are sure nothing from 2331 * the valid path left there. 2332 */ 2333 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 2334 if (*jpath == '\0') { 2335 /* Should never happen. */ 2336 *sp->f_mntonname = '/'; 2337 } else { 2338 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 2339 } 2340} 2341 2342/* 2343 * Check with permission for a specific privilege is granted within jail. We 2344 * have a specific list of accepted privileges; the rest are denied. 2345 */ 2346int 2347prison_priv_check(struct ucred *cred, int priv) 2348{ 2349 2350 if (!jailed(cred)) 2351 return (0); 2352 2353 switch (priv) { 2354 2355 /* 2356 * Allow ktrace privileges for root in jail. 2357 */ 2358 case PRIV_KTRACE: 2359 2360#if 0 2361 /* 2362 * Allow jailed processes to configure audit identity and 2363 * submit audit records (login, etc). In the future we may 2364 * want to further refine the relationship between audit and 2365 * jail. 2366 */ 2367 case PRIV_AUDIT_GETAUDIT: 2368 case PRIV_AUDIT_SETAUDIT: 2369 case PRIV_AUDIT_SUBMIT: 2370#endif 2371 2372 /* 2373 * Allow jailed processes to manipulate process UNIX 2374 * credentials in any way they see fit. 2375 */ 2376 case PRIV_CRED_SETUID: 2377 case PRIV_CRED_SETEUID: 2378 case PRIV_CRED_SETGID: 2379 case PRIV_CRED_SETEGID: 2380 case PRIV_CRED_SETGROUPS: 2381 case PRIV_CRED_SETREUID: 2382 case PRIV_CRED_SETREGID: 2383 case PRIV_CRED_SETRESUID: 2384 case PRIV_CRED_SETRESGID: 2385 2386 /* 2387 * Jail implements visibility constraints already, so allow 2388 * jailed root to override uid/gid-based constraints. 2389 */ 2390 case PRIV_SEEOTHERGIDS: 2391 case PRIV_SEEOTHERUIDS: 2392 2393 /* 2394 * Jail implements inter-process debugging limits already, so 2395 * allow jailed root various debugging privileges. 2396 */ 2397 case PRIV_DEBUG_DIFFCRED: 2398 case PRIV_DEBUG_SUGID: 2399 case PRIV_DEBUG_UNPRIV: 2400 2401 /* 2402 * Allow jail to set various resource limits and login 2403 * properties, and for now, exceed process resource limits. 2404 */ 2405 case PRIV_PROC_LIMIT: 2406 case PRIV_PROC_SETLOGIN: 2407 case PRIV_PROC_SETRLIMIT: 2408 2409 /* 2410 * System V and POSIX IPC privileges are granted in jail. 2411 */ 2412 case PRIV_IPC_READ: 2413 case PRIV_IPC_WRITE: 2414 case PRIV_IPC_ADMIN: 2415 case PRIV_IPC_MSGSIZE: 2416 case PRIV_MQ_ADMIN: 2417 2418 /* 2419 * Jail implements its own inter-process limits, so allow 2420 * root processes in jail to change scheduling on other 2421 * processes in the same jail. Likewise for signalling. 2422 */ 2423 case PRIV_SCHED_DIFFCRED: 2424 case PRIV_SCHED_CPUSET: 2425 case PRIV_SIGNAL_DIFFCRED: 2426 case PRIV_SIGNAL_SUGID: 2427 2428 /* 2429 * Allow jailed processes to write to sysctls marked as jail 2430 * writable. 2431 */ 2432 case PRIV_SYSCTL_WRITEJAIL: 2433 2434 /* 2435 * Allow root in jail to manage a variety of quota 2436 * properties. These should likely be conditional on a 2437 * configuration option. 2438 */ 2439 case PRIV_VFS_GETQUOTA: 2440 case PRIV_VFS_SETQUOTA: 2441 2442 /* 2443 * Since Jail relies on chroot() to implement file system 2444 * protections, grant many VFS privileges to root in jail. 2445 * Be careful to exclude mount-related and NFS-related 2446 * privileges. 2447 */ 2448 case PRIV_VFS_READ: 2449 case PRIV_VFS_WRITE: 2450 case PRIV_VFS_ADMIN: 2451 case PRIV_VFS_EXEC: 2452 case PRIV_VFS_LOOKUP: 2453 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 2454 case PRIV_VFS_CHFLAGS_DEV: 2455 case PRIV_VFS_CHOWN: 2456 case PRIV_VFS_CHROOT: 2457 case PRIV_VFS_RETAINSUGID: 2458 case PRIV_VFS_FCHROOT: 2459 case PRIV_VFS_LINK: 2460 case PRIV_VFS_SETGID: 2461 case PRIV_VFS_STAT: 2462 case PRIV_VFS_STICKYFILE: 2463 return (0); 2464 2465 /* 2466 * Depending on the global setting, allow privilege of 2467 * setting system flags. 2468 */ 2469 case PRIV_VFS_SYSFLAGS: 2470 if (jail_chflags_allowed) 2471 return (0); 2472 else 2473 return (EPERM); 2474 2475 /* 2476 * Depending on the global setting, allow privilege of 2477 * mounting/unmounting file systems. 2478 */ 2479 case PRIV_VFS_MOUNT: 2480 case PRIV_VFS_UNMOUNT: 2481 case PRIV_VFS_MOUNT_NONUSER: 2482 case PRIV_VFS_MOUNT_OWNER: 2483 if (jail_mount_allowed) 2484 return (0); 2485 else 2486 return (EPERM); 2487 2488 /* 2489 * Allow jailed root to bind reserved ports and reuse in-use 2490 * ports. 2491 */ 2492 case PRIV_NETINET_RESERVEDPORT: 2493 case PRIV_NETINET_REUSEPORT: 2494 return (0); 2495 2496 /* 2497 * Allow jailed root to set certian IPv4/6 (option) headers. 2498 */ 2499 case PRIV_NETINET_SETHDROPTS: 2500 return (0); 2501 2502 /* 2503 * Conditionally allow creating raw sockets in jail. 2504 */ 2505 case PRIV_NETINET_RAW: 2506 if (jail_allow_raw_sockets) 2507 return (0); 2508 else 2509 return (EPERM); 2510 2511 /* 2512 * Since jail implements its own visibility limits on netstat 2513 * sysctls, allow getcred. This allows identd to work in 2514 * jail. 2515 */ 2516 case PRIV_NETINET_GETCRED: 2517 return (0); 2518 2519 default: 2520 /* 2521 * In all remaining cases, deny the privilege request. This 2522 * includes almost all network privileges, many system 2523 * configuration privileges. 2524 */ 2525 return (EPERM); 2526 } 2527} 2528 2529static int 2530sysctl_jail_list(SYSCTL_HANDLER_ARGS) 2531{ 2532 struct xprison *xp; 2533 struct prison *pr; 2534#ifdef INET 2535 struct in_addr *ip4 = NULL; 2536 int ip4s = 0; 2537#endif 2538#ifdef INET6 2539 struct in_addr *ip6 = NULL; 2540 int ip6s = 0; 2541#endif 2542 int error; 2543 2544 if (jailed(req->td->td_ucred)) 2545 return (0); 2546 2547 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 2548 error = 0; 2549 sx_slock(&allprison_lock); 2550 TAILQ_FOREACH(pr, &allprison, pr_list) { 2551 again: 2552 mtx_lock(&pr->pr_mtx); 2553#ifdef INET 2554 if (pr->pr_ip4s > 0) { 2555 if (ip4s < pr->pr_ip4s) { 2556 ip4s = pr->pr_ip4s; 2557 mtx_unlock(&pr->pr_mtx); 2558 ip4 = realloc(ip4, ip4s * 2559 sizeof(struct in_addr), M_TEMP, M_WAITOK); 2560 goto again; 2561 } 2562 bcopy(pr->pr_ip4, ip4, 2563 pr->pr_ip4s * sizeof(struct in_addr)); 2564 } 2565#endif 2566#ifdef INET6 2567 if (pr->pr_ip6s > 0) { 2568 if (ip6s < pr->pr_ip6s) { 2569 ip6s = pr->pr_ip6s; 2570 mtx_unlock(&pr->pr_mtx); 2571 ip6 = realloc(ip6, ip6s * 2572 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 2573 goto again; 2574 } 2575 bcopy(pr->pr_ip6, ip6, 2576 pr->pr_ip6s * sizeof(struct in6_addr)); 2577 } 2578#endif 2579 if (pr->pr_ref == 0) { 2580 mtx_unlock(&pr->pr_mtx); 2581 continue; 2582 } 2583 bzero(xp, sizeof(*xp)); 2584 xp->pr_version = XPRISON_VERSION; 2585 xp->pr_id = pr->pr_id; 2586 xp->pr_state = pr->pr_uref > 0 2587 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 2588 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 2589 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 2590 strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name)); 2591#ifdef INET 2592 xp->pr_ip4s = pr->pr_ip4s; 2593#endif 2594#ifdef INET6 2595 xp->pr_ip6s = pr->pr_ip6s; 2596#endif 2597 mtx_unlock(&pr->pr_mtx); 2598 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 2599 if (error) 2600 break; 2601#ifdef INET 2602 if (xp->pr_ip4s > 0) { 2603 error = SYSCTL_OUT(req, ip4, 2604 xp->pr_ip4s * sizeof(struct in_addr)); 2605 if (error) 2606 break; 2607 } 2608#endif 2609#ifdef INET6 2610 if (xp->pr_ip6s > 0) { 2611 error = SYSCTL_OUT(req, ip6, 2612 xp->pr_ip6s * sizeof(struct in6_addr)); 2613 if (error) 2614 break; 2615 } 2616#endif 2617 } 2618 sx_sunlock(&allprison_lock); 2619 free(xp, M_TEMP); 2620#ifdef INET 2621 free(ip4, M_TEMP); 2622#endif 2623#ifdef INET6 2624 free(ip6, M_TEMP); 2625#endif 2626 return (error); 2627} 2628 2629SYSCTL_OID(_security_jail, OID_AUTO, list, 2630 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 2631 sysctl_jail_list, "S", "List of active jails"); 2632 2633static int 2634sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 2635{ 2636 int error, injail; 2637 2638 injail = jailed(req->td->td_ucred); 2639 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 2640 2641 return (error); 2642} 2643SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 2644 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 2645 sysctl_jail_jailed, "I", "Process in jail?"); 2646 2647#ifdef DDB 2648 2649static void 2650db_show_prison(struct prison *pr) 2651{ 2652#if defined(INET) || defined(INET6) 2653 int ii; 2654#endif 2655#ifdef INET6 2656 char ip6buf[INET6_ADDRSTRLEN]; 2657#endif 2658 2659 db_printf("prison %p:\n", pr); 2660 db_printf(" jid = %d\n", pr->pr_id); 2661 db_printf(" name = %s\n", pr->pr_name); 2662 db_printf(" ref = %d\n", pr->pr_ref); 2663 db_printf(" uref = %d\n", pr->pr_uref); 2664 db_printf(" path = %s\n", pr->pr_path); 2665 db_printf(" cpuset = %d\n", pr->pr_cpuset 2666 ? pr->pr_cpuset->cs_id : -1); 2667 db_printf(" root = %p\n", pr->pr_root); 2668 db_printf(" securelevel = %d\n", pr->pr_securelevel); 2669 db_printf(" flags = %x", pr->pr_flags); 2670 if (pr->pr_flags & PR_PERSIST) 2671 db_printf(" persist"); 2672 db_printf("\n"); 2673 db_printf(" host.hostname = %s\n", pr->pr_host); 2674#ifdef INET 2675 db_printf(" ip4s = %d\n", pr->pr_ip4s); 2676 for (ii = 0; ii < pr->pr_ip4s; ii++) 2677 db_printf(" %s %s\n", 2678 ii == 0 ? "ip4 =" : " ", 2679 inet_ntoa(pr->pr_ip4[ii])); 2680#endif 2681#ifdef INET6 2682 db_printf(" ip6s = %d\n", pr->pr_ip6s); 2683 for (ii = 0; ii < pr->pr_ip6s; ii++) 2684 db_printf(" %s %s\n", 2685 ii == 0 ? "ip6 =" : " ", 2686 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 2687#endif 2688} 2689 2690DB_SHOW_COMMAND(prison, db_show_prison_command) 2691{ 2692 struct prison *pr; 2693 2694 if (!have_addr) { 2695 /* Show all prisons in the list. */ 2696 TAILQ_FOREACH(pr, &allprison, pr_list) { 2697 db_show_prison(pr); 2698 if (db_pager_quit) 2699 break; 2700 } 2701 return; 2702 } 2703 2704 /* Look for a prison with the ID and with references. */ 2705 TAILQ_FOREACH(pr, &allprison, pr_list) 2706 if (pr->pr_id == addr && pr->pr_ref > 0) 2707 break; 2708 if (pr == NULL) 2709 /* Look again, without requiring a reference. */ 2710 TAILQ_FOREACH(pr, &allprison, pr_list) 2711 if (pr->pr_id == addr) 2712 break; 2713 if (pr == NULL) 2714 /* Assume address points to a valid prison. */ 2715 pr = (struct prison *)addr; 2716 db_show_prison(pr); 2717} 2718 2719#endif /* DDB */ 2720