221}; 222const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames); 223 224#define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 225#define JAIL_DEFAULT_ENFORCE_STATFS 2 226#define JAIL_DEFAULT_DEVFS_RSNUM 0 227static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 228static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 229static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; 230#if defined(INET) || defined(INET6) 231static unsigned jail_max_af_ips = 255; 232#endif 233 234#ifdef INET 235static int 236qcmp_v4(const void *ip1, const void *ip2) 237{ 238 in_addr_t iaa, iab; 239 240 /* 241 * We need to compare in HBO here to get the list sorted as expected 242 * by the result of the code. Sorting NBO addresses gives you 243 * interesting results. If you do not understand, do not try. 244 */ 245 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 246 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 247 248 /* 249 * Do not simply return the difference of the two numbers, the int is 250 * not wide enough. 251 */ 252 if (iaa > iab) 253 return (1); 254 else if (iaa < iab) 255 return (-1); 256 else 257 return (0); 258} 259#endif 260 261#ifdef INET6 262static int 263qcmp_v6(const void *ip1, const void *ip2) 264{ 265 const struct in6_addr *ia6a, *ia6b; 266 int i, rc; 267 268 ia6a = (const struct in6_addr *)ip1; 269 ia6b = (const struct in6_addr *)ip2; 270 271 rc = 0; 272 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 273 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 274 rc = 1; 275 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 276 rc = -1; 277 } 278 return (rc); 279} 280#endif 281 282/* 283 * struct jail_args { 284 * struct jail *jail; 285 * }; 286 */ 287int 288sys_jail(struct thread *td, struct jail_args *uap) 289{ 290 uint32_t version; 291 int error; 292 struct jail j; 293 294 error = copyin(uap->jail, &version, sizeof(uint32_t)); 295 if (error) 296 return (error); 297 298 switch (version) { 299 case 0: 300 { 301 struct jail_v0 j0; 302 303 /* FreeBSD single IPv4 jails. */ 304 bzero(&j, sizeof(struct jail)); 305 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 306 if (error) 307 return (error); 308 j.version = j0.version; 309 j.path = j0.path; 310 j.hostname = j0.hostname; 311 j.ip4s = j0.ip_number; 312 break; 313 } 314 315 case 1: 316 /* 317 * Version 1 was used by multi-IPv4 jail implementations 318 * that never made it into the official kernel. 319 */ 320 return (EINVAL); 321 322 case 2: /* JAIL_API_VERSION */ 323 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 324 error = copyin(uap->jail, &j, sizeof(struct jail)); 325 if (error) 326 return (error); 327 break; 328 329 default: 330 /* Sci-Fi jails are not supported, sorry. */ 331 return (EINVAL); 332 } 333 return (kern_jail(td, &j)); 334} 335 336int 337kern_jail(struct thread *td, struct jail *j) 338{ 339 struct iovec optiov[2 * (4 340 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 341#ifdef INET 342 + 1 343#endif 344#ifdef INET6 345 + 1 346#endif 347 )]; 348 struct uio opt; 349 char *u_path, *u_hostname, *u_name; 350#ifdef INET 351 uint32_t ip4s; 352 struct in_addr *u_ip4; 353#endif 354#ifdef INET6 355 struct in6_addr *u_ip6; 356#endif 357 size_t tmplen; 358 int error, enforce_statfs, fi; 359 360 bzero(&optiov, sizeof(optiov)); 361 opt.uio_iov = optiov; 362 opt.uio_iovcnt = 0; 363 opt.uio_offset = -1; 364 opt.uio_resid = -1; 365 opt.uio_segflg = UIO_SYSSPACE; 366 opt.uio_rw = UIO_READ; 367 opt.uio_td = td; 368 369 /* Set permissions for top-level jails from sysctls. */ 370 if (!jailed(td->td_ucred)) { 371 for (fi = 0; fi < sizeof(pr_allow_names) / 372 sizeof(pr_allow_names[0]); fi++) { 373 optiov[opt.uio_iovcnt].iov_base = 374 (jail_default_allow & (1 << fi)) 375 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 376 optiov[opt.uio_iovcnt].iov_len = 377 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 378 opt.uio_iovcnt += 2; 379 } 380 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 381 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 382 opt.uio_iovcnt++; 383 enforce_statfs = jail_default_enforce_statfs; 384 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 385 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 386 opt.uio_iovcnt++; 387 } 388 389 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 390#ifdef INET 391 ip4s = (j->version == 0) ? 1 : j->ip4s; 392 if (ip4s > jail_max_af_ips) 393 return (EINVAL); 394 tmplen += ip4s * sizeof(struct in_addr); 395#else 396 if (j->ip4s > 0) 397 return (EINVAL); 398#endif 399#ifdef INET6 400 if (j->ip6s > jail_max_af_ips) 401 return (EINVAL); 402 tmplen += j->ip6s * sizeof(struct in6_addr); 403#else 404 if (j->ip6s > 0) 405 return (EINVAL); 406#endif 407 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 408 u_hostname = u_path + MAXPATHLEN; 409 u_name = u_hostname + MAXHOSTNAMELEN; 410#ifdef INET 411 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 412#endif 413#ifdef INET6 414#ifdef INET 415 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 416#else 417 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 418#endif 419#endif 420 optiov[opt.uio_iovcnt].iov_base = "path"; 421 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 422 opt.uio_iovcnt++; 423 optiov[opt.uio_iovcnt].iov_base = u_path; 424 error = copyinstr(j->path, u_path, MAXPATHLEN, 425 &optiov[opt.uio_iovcnt].iov_len); 426 if (error) { 427 free(u_path, M_TEMP); 428 return (error); 429 } 430 opt.uio_iovcnt++; 431 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 432 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 433 opt.uio_iovcnt++; 434 optiov[opt.uio_iovcnt].iov_base = u_hostname; 435 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 436 &optiov[opt.uio_iovcnt].iov_len); 437 if (error) { 438 free(u_path, M_TEMP); 439 return (error); 440 } 441 opt.uio_iovcnt++; 442 if (j->jailname != NULL) { 443 optiov[opt.uio_iovcnt].iov_base = "name"; 444 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 445 opt.uio_iovcnt++; 446 optiov[opt.uio_iovcnt].iov_base = u_name; 447 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 448 &optiov[opt.uio_iovcnt].iov_len); 449 if (error) { 450 free(u_path, M_TEMP); 451 return (error); 452 } 453 opt.uio_iovcnt++; 454 } 455#ifdef INET 456 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 457 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 458 opt.uio_iovcnt++; 459 optiov[opt.uio_iovcnt].iov_base = u_ip4; 460 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 461 if (j->version == 0) 462 u_ip4->s_addr = j->ip4s; 463 else { 464 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 465 if (error) { 466 free(u_path, M_TEMP); 467 return (error); 468 } 469 } 470 opt.uio_iovcnt++; 471#endif 472#ifdef INET6 473 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 474 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 475 opt.uio_iovcnt++; 476 optiov[opt.uio_iovcnt].iov_base = u_ip6; 477 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 478 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 479 if (error) { 480 free(u_path, M_TEMP); 481 return (error); 482 } 483 opt.uio_iovcnt++; 484#endif 485 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 486 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 487 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 488 free(u_path, M_TEMP); 489 return (error); 490} 491 492 493/* 494 * struct jail_set_args { 495 * struct iovec *iovp; 496 * unsigned int iovcnt; 497 * int flags; 498 * }; 499 */ 500int 501sys_jail_set(struct thread *td, struct jail_set_args *uap) 502{ 503 struct uio *auio; 504 int error; 505 506 /* Check that we have an even number of iovecs. */ 507 if (uap->iovcnt & 1) 508 return (EINVAL); 509 510 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 511 if (error) 512 return (error); 513 error = kern_jail_set(td, auio, uap->flags); 514 free(auio, M_IOV); 515 return (error); 516} 517 518int 519kern_jail_set(struct thread *td, struct uio *optuio, int flags) 520{ 521 struct nameidata nd; 522#ifdef INET 523 struct in_addr *ip4; 524#endif 525#ifdef INET6 526 struct in6_addr *ip6; 527#endif 528 struct vfsopt *opt; 529 struct vfsoptlist *opts; 530 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 531 struct vnode *root; 532 char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; 533 char *g_path; 534#if defined(INET) || defined(INET6) 535 struct prison *tppr; 536 void *op; 537#endif 538 unsigned long hid; 539 size_t namelen, onamelen; 540 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 541 int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; 542 int fi, jid, jsys, len, level; 543 int childmax, rsnum, slevel, vfslocked; 544 int fullpath_disabled; 545#if defined(INET) || defined(INET6) 546 int ii, ij; 547#endif 548#ifdef INET 549 int ip4s, redo_ip4; 550#endif 551#ifdef INET6 552 int ip6s, redo_ip6; 553#endif 554 uint64_t pr_allow, ch_allow, pr_flags, ch_flags; 555 unsigned tallow; 556 char numbuf[12]; 557 558 error = priv_check(td, PRIV_JAIL_SET); 559 if (!error && (flags & JAIL_ATTACH)) 560 error = priv_check(td, PRIV_JAIL_ATTACH); 561 if (error) 562 return (error); 563 mypr = ppr = td->td_ucred->cr_prison; 564 if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) 565 return (EPERM); 566 if (flags & ~JAIL_SET_MASK) 567 return (EINVAL); 568 569 /* 570 * Check all the parameters before committing to anything. Not all 571 * errors can be caught early, but we may as well try. Also, this 572 * takes care of some expensive stuff (path lookup) before getting 573 * the allprison lock. 574 * 575 * XXX Jails are not filesystems, and jail parameters are not mount 576 * options. But it makes more sense to re-use the vfsopt code 577 * than duplicate it under a different name. 578 */ 579 error = vfs_buildopts(optuio, &opts); 580 if (error) 581 return (error); 582#ifdef INET 583 ip4 = NULL; 584#endif 585#ifdef INET6 586 ip6 = NULL; 587#endif 588 g_path = NULL; 589 590 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 591 if (error == ENOENT) 592 jid = 0; 593 else if (error != 0) 594 goto done_free; 595 596 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 597 if (error == ENOENT) 598 gotslevel = 0; 599 else if (error != 0) 600 goto done_free; 601 else 602 gotslevel = 1; 603 604 error = 605 vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); 606 if (error == ENOENT) 607 gotchildmax = 0; 608 else if (error != 0) 609 goto done_free; 610 else 611 gotchildmax = 1; 612 613 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 614 if (error == ENOENT) 615 gotenforce = 0; 616 else if (error != 0) 617 goto done_free; 618 else if (enforce < 0 || enforce > 2) { 619 error = EINVAL; 620 goto done_free; 621 } else 622 gotenforce = 1; 623 624 error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); 625 if (error == ENOENT) 626 gotrsnum = 0; 627 else if (error != 0) 628 goto done_free; 629 else 630 gotrsnum = 1; 631 632 pr_flags = ch_flags = 0; 633 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 634 fi++) { 635 if (pr_flag_names[fi] == NULL) 636 continue; 637 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 638 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 639 } 640 ch_flags |= pr_flags; 641 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 642 fi++) { 643 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, 644 sizeof(jsys)); 645 if (error == ENOENT) 646 continue; 647 if (error != 0) 648 goto done_free; 649 switch (jsys) { 650 case JAIL_SYS_DISABLE: 651 if (!pr_flag_jailsys[fi].disable) { 652 error = EINVAL; 653 goto done_free; 654 } 655 pr_flags |= pr_flag_jailsys[fi].disable; 656 break; 657 case JAIL_SYS_NEW: 658 pr_flags |= pr_flag_jailsys[fi].new; 659 break; 660 case JAIL_SYS_INHERIT: 661 break; 662 default: 663 error = EINVAL; 664 goto done_free; 665 } 666 ch_flags |= 667 pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; 668 } 669 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 670 && !(pr_flags & PR_PERSIST)) { 671 error = EINVAL; 672 vfs_opterror(opts, "new jail must persist or attach"); 673 goto done_errmsg; 674 } 675#ifdef VIMAGE 676 if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { 677 error = EINVAL; 678 vfs_opterror(opts, "vnet cannot be changed after creation"); 679 goto done_errmsg; 680 } 681#endif 682#ifdef INET 683 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { 684 error = EINVAL; 685 vfs_opterror(opts, "ip4 cannot be changed after creation"); 686 goto done_errmsg; 687 } 688#endif 689#ifdef INET6 690 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { 691 error = EINVAL; 692 vfs_opterror(opts, "ip6 cannot be changed after creation"); 693 goto done_errmsg; 694 } 695#endif 696 697 pr_allow = ch_allow = 0; 698 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 699 fi++) { 700 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 701 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 702 } 703 ch_allow |= pr_allow; 704 705 error = vfs_getopt(opts, "name", (void **)&name, &len); 706 if (error == ENOENT) 707 name = NULL; 708 else if (error != 0) 709 goto done_free; 710 else { 711 if (len == 0 || name[len - 1] != '\0') { 712 error = EINVAL; 713 goto done_free; 714 } 715 if (len > MAXHOSTNAMELEN) { 716 error = ENAMETOOLONG; 717 goto done_free; 718 } 719 } 720 721 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 722 if (error == ENOENT) 723 host = NULL; 724 else if (error != 0) 725 goto done_free; 726 else { 727 ch_flags |= PR_HOST; 728 pr_flags |= PR_HOST; 729 if (len == 0 || host[len - 1] != '\0') { 730 error = EINVAL; 731 goto done_free; 732 } 733 if (len > MAXHOSTNAMELEN) { 734 error = ENAMETOOLONG; 735 goto done_free; 736 } 737 } 738 739 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 740 if (error == ENOENT) 741 domain = NULL; 742 else if (error != 0) 743 goto done_free; 744 else { 745 ch_flags |= PR_HOST; 746 pr_flags |= PR_HOST; 747 if (len == 0 || domain[len - 1] != '\0') { 748 error = EINVAL; 749 goto done_free; 750 } 751 if (len > MAXHOSTNAMELEN) { 752 error = ENAMETOOLONG; 753 goto done_free; 754 } 755 } 756 757 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 758 if (error == ENOENT) 759 uuid = NULL; 760 else if (error != 0) 761 goto done_free; 762 else { 763 ch_flags |= PR_HOST; 764 pr_flags |= PR_HOST; 765 if (len == 0 || uuid[len - 1] != '\0') { 766 error = EINVAL; 767 goto done_free; 768 } 769 if (len > HOSTUUIDLEN) { 770 error = ENAMETOOLONG; 771 goto done_free; 772 } 773 } 774 775#ifdef COMPAT_FREEBSD32 776 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 777 uint32_t hid32; 778 779 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 780 hid = hid32; 781 } else 782#endif 783 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 784 if (error == ENOENT) 785 gothid = 0; 786 else if (error != 0) 787 goto done_free; 788 else { 789 gothid = 1; 790 ch_flags |= PR_HOST; 791 pr_flags |= PR_HOST; 792 } 793 794#ifdef INET 795 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 796 if (error == ENOENT) 797 ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1; 798 else if (error != 0) 799 goto done_free; 800 else if (ip4s & (sizeof(*ip4) - 1)) { 801 error = EINVAL; 802 goto done_free; 803 } else { 804 ch_flags |= PR_IP4_USER | PR_IP4_DISABLE; 805 if (ip4s == 0) 806 pr_flags |= PR_IP4_USER | PR_IP4_DISABLE; 807 else { 808 pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER; 809 ip4s /= sizeof(*ip4); 810 if (ip4s > jail_max_af_ips) { 811 error = EINVAL; 812 vfs_opterror(opts, "too many IPv4 addresses"); 813 goto done_errmsg; 814 } 815 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 816 bcopy(op, ip4, ip4s * sizeof(*ip4)); 817 /* 818 * IP addresses are all sorted but ip[0] to preserve 819 * the primary IP address as given from userland. 820 * This special IP is used for unbound outgoing 821 * connections as well for "loopback" traffic in case 822 * source address selection cannot find any more fitting 823 * address to connect from. 824 */ 825 if (ip4s > 1) 826 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 827 /* 828 * Check for duplicate addresses and do some simple 829 * zero and broadcast checks. If users give other bogus 830 * addresses it is their problem. 831 * 832 * We do not have to care about byte order for these 833 * checks so we will do them in NBO. 834 */ 835 for (ii = 0; ii < ip4s; ii++) { 836 if (ip4[ii].s_addr == INADDR_ANY || 837 ip4[ii].s_addr == INADDR_BROADCAST) { 838 error = EINVAL; 839 goto done_free; 840 } 841 if ((ii+1) < ip4s && 842 (ip4[0].s_addr == ip4[ii+1].s_addr || 843 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 844 error = EINVAL; 845 goto done_free; 846 } 847 } 848 } 849 } 850#endif 851 852#ifdef INET6 853 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 854 if (error == ENOENT) 855 ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1; 856 else if (error != 0) 857 goto done_free; 858 else if (ip6s & (sizeof(*ip6) - 1)) { 859 error = EINVAL; 860 goto done_free; 861 } else { 862 ch_flags |= PR_IP6_USER | PR_IP6_DISABLE; 863 if (ip6s == 0) 864 pr_flags |= PR_IP6_USER | PR_IP6_DISABLE; 865 else { 866 pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER; 867 ip6s /= sizeof(*ip6); 868 if (ip6s > jail_max_af_ips) { 869 error = EINVAL; 870 vfs_opterror(opts, "too many IPv6 addresses"); 871 goto done_errmsg; 872 } 873 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 874 bcopy(op, ip6, ip6s * sizeof(*ip6)); 875 if (ip6s > 1) 876 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 877 for (ii = 0; ii < ip6s; ii++) { 878 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 879 error = EINVAL; 880 goto done_free; 881 } 882 if ((ii+1) < ip6s && 883 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 884 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 885 { 886 error = EINVAL; 887 goto done_free; 888 } 889 } 890 } 891 } 892#endif 893 894#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 895 if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 896 error = EINVAL; 897 vfs_opterror(opts, 898 "vnet jails cannot have IP address restrictions"); 899 goto done_errmsg; 900 } 901#endif 902 903 fullpath_disabled = 0; 904 root = NULL; 905 error = vfs_getopt(opts, "path", (void **)&path, &len); 906 if (error == ENOENT) 907 path = NULL; 908 else if (error != 0) 909 goto done_free; 910 else { 911 if (flags & JAIL_UPDATE) { 912 error = EINVAL; 913 vfs_opterror(opts, 914 "path cannot be changed after creation"); 915 goto done_errmsg; 916 } 917 if (len == 0 || path[len - 1] != '\0') { 918 error = EINVAL; 919 goto done_free; 920 } 921 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_SYSSPACE, 922 path, td); 923 error = namei(&nd); 924 if (error) 925 goto done_free; 926 vfslocked = NDHASGIANT(&nd); 927 root = nd.ni_vp; 928 NDFREE(&nd, NDF_ONLY_PNBUF); 929 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 930 strlcpy(g_path, path, MAXPATHLEN); 931 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); 932 if (error == 0) 933 path = g_path; 934 else if (error == ENODEV) { 935 /* proceed if sysctl debug.disablefullpath == 1 */ 936 fullpath_disabled = 1; 937 if (len < 2 || (len == 2 && path[0] == '/')) 938 path = NULL; 939 } else { 940 /* exit on other errors */ 941 VFS_UNLOCK_GIANT(vfslocked); 942 goto done_free; 943 } 944 if (root->v_type != VDIR) { 945 error = ENOTDIR; 946 vput(root); 947 VFS_UNLOCK_GIANT(vfslocked); 948 goto done_free; 949 } 950 VOP_UNLOCK(root, 0); 951 VFS_UNLOCK_GIANT(vfslocked); 952 if (fullpath_disabled) { 953 /* Leave room for a real-root full pathname. */ 954 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 955 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 956 error = ENAMETOOLONG; 957 goto done_free; 958 } 959 } 960 } 961 962 /* 963 * Grab the allprison lock before letting modules check their 964 * parameters. Once we have it, do not let go so we'll have a 965 * consistent view of the OSD list. 966 */ 967 sx_xlock(&allprison_lock); 968 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 969 if (error) 970 goto done_unlock_list; 971 972 /* By now, all parameters should have been noted. */ 973 TAILQ_FOREACH(opt, opts, link) { 974 if (!opt->seen && strcmp(opt->name, "errmsg")) { 975 error = EINVAL; 976 vfs_opterror(opts, "unknown parameter: %s", opt->name); 977 goto done_unlock_list; 978 } 979 } 980 981 /* 982 * See if we are creating a new record or updating an existing one. 983 * This abuses the file error codes ENOENT and EEXIST. 984 */ 985 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 986 if (!cuflags) { 987 error = EINVAL; 988 vfs_opterror(opts, "no valid operation (create or update)"); 989 goto done_unlock_list; 990 } 991 pr = NULL; 992 namelc = NULL; 993 if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { 994 namelc = strrchr(name, '.'); 995 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); 996 if (*p != '\0') 997 jid = 0; 998 } 999 if (jid != 0) { 1000 /* 1001 * See if a requested jid already exists. There is an 1002 * information leak here if the jid exists but is not within 1003 * the caller's jail hierarchy. Jail creators will get EEXIST 1004 * even though they cannot see the jail, and CREATE | UPDATE 1005 * will return ENOENT which is not normally a valid error. 1006 */ 1007 if (jid < 0) { 1008 error = EINVAL; 1009 vfs_opterror(opts, "negative jid"); 1010 goto done_unlock_list; 1011 } 1012 pr = prison_find(jid); 1013 if (pr != NULL) { 1014 ppr = pr->pr_parent; 1015 /* Create: jid must not exist. */ 1016 if (cuflags == JAIL_CREATE) { 1017 mtx_unlock(&pr->pr_mtx); 1018 error = EEXIST; 1019 vfs_opterror(opts, "jail %d already exists", 1020 jid); 1021 goto done_unlock_list; 1022 } 1023 if (!prison_ischild(mypr, pr)) { 1024 mtx_unlock(&pr->pr_mtx); 1025 pr = NULL; 1026 } else if (pr->pr_uref == 0) { 1027 if (!(flags & JAIL_DYING)) { 1028 mtx_unlock(&pr->pr_mtx); 1029 error = ENOENT; 1030 vfs_opterror(opts, "jail %d is dying", 1031 jid); 1032 goto done_unlock_list; 1033 } else if ((flags & JAIL_ATTACH) || 1034 (pr_flags & PR_PERSIST)) { 1035 /* 1036 * A dying jail might be resurrected 1037 * (via attach or persist), but first 1038 * it must determine if another jail 1039 * has claimed its name. Accomplish 1040 * this by implicitly re-setting the 1041 * name. 1042 */ 1043 if (name == NULL) 1044 name = prison_name(mypr, pr); 1045 } 1046 } 1047 } 1048 if (pr == NULL) { 1049 /* Update: jid must exist. */ 1050 if (cuflags == JAIL_UPDATE) { 1051 error = ENOENT; 1052 vfs_opterror(opts, "jail %d not found", jid); 1053 goto done_unlock_list; 1054 } 1055 } 1056 } 1057 /* 1058 * If the caller provided a name, look for a jail by that name. 1059 * This has different semantics for creates and updates keyed by jid 1060 * (where the name must not already exist in a different jail), 1061 * and updates keyed by the name itself (where the name must exist 1062 * because that is the jail being updated). 1063 */ 1064 if (name != NULL) { 1065 namelc = strrchr(name, '.'); 1066 if (namelc == NULL) 1067 namelc = name; 1068 else { 1069 /* 1070 * This is a hierarchical name. Split it into the 1071 * parent and child names, and make sure the parent 1072 * exists or matches an already found jail. 1073 */ 1074 *namelc = '\0'; 1075 if (pr != NULL) { 1076 if (strncmp(name, ppr->pr_name, namelc - name) 1077 || ppr->pr_name[namelc - name] != '\0') { 1078 mtx_unlock(&pr->pr_mtx); 1079 error = EINVAL; 1080 vfs_opterror(opts, 1081 "cannot change jail's parent"); 1082 goto done_unlock_list; 1083 } 1084 } else { 1085 ppr = prison_find_name(mypr, name); 1086 if (ppr == NULL) { 1087 error = ENOENT; 1088 vfs_opterror(opts, 1089 "jail \"%s\" not found", name); 1090 goto done_unlock_list; 1091 } 1092 mtx_unlock(&ppr->pr_mtx); 1093 } 1094 name = ++namelc; 1095 } 1096 if (name[0] != '\0') { 1097 namelen = 1098 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 1099 name_again: 1100 deadpr = NULL; 1101 FOREACH_PRISON_CHILD(ppr, tpr) { 1102 if (tpr != pr && tpr->pr_ref > 0 && 1103 !strcmp(tpr->pr_name + namelen, name)) { 1104 if (pr == NULL && 1105 cuflags != JAIL_CREATE) { 1106 mtx_lock(&tpr->pr_mtx); 1107 if (tpr->pr_ref > 0) { 1108 /* 1109 * Use this jail 1110 * for updates. 1111 */ 1112 if (tpr->pr_uref > 0) { 1113 pr = tpr; 1114 break; 1115 } 1116 deadpr = tpr; 1117 } 1118 mtx_unlock(&tpr->pr_mtx); 1119 } else if (tpr->pr_uref > 0) { 1120 /* 1121 * Create, or update(jid): 1122 * name must not exist in an 1123 * active sibling jail. 1124 */ 1125 error = EEXIST; 1126 if (pr != NULL) 1127 mtx_unlock(&pr->pr_mtx); 1128 vfs_opterror(opts, 1129 "jail \"%s\" already exists", 1130 name); 1131 goto done_unlock_list; 1132 } 1133 } 1134 } 1135 /* If no active jail is found, use a dying one. */ 1136 if (deadpr != NULL && pr == NULL) { 1137 if (flags & JAIL_DYING) { 1138 mtx_lock(&deadpr->pr_mtx); 1139 if (deadpr->pr_ref == 0) { 1140 mtx_unlock(&deadpr->pr_mtx); 1141 goto name_again; 1142 } 1143 pr = deadpr; 1144 } else if (cuflags == JAIL_UPDATE) { 1145 error = ENOENT; 1146 vfs_opterror(opts, 1147 "jail \"%s\" is dying", name); 1148 goto done_unlock_list; 1149 } 1150 } 1151 /* Update: name must exist if no jid. */ 1152 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1153 error = ENOENT; 1154 vfs_opterror(opts, "jail \"%s\" not found", 1155 name); 1156 goto done_unlock_list; 1157 } 1158 } 1159 } 1160 /* Update: must provide a jid or name. */ 1161 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1162 error = ENOENT; 1163 vfs_opterror(opts, "update specified no jail"); 1164 goto done_unlock_list; 1165 } 1166 1167 /* If there's no prison to update, create a new one and link it in. */ 1168 if (pr == NULL) { 1169 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) 1170 if (tpr->pr_childcount >= tpr->pr_childmax) { 1171 error = EPERM; 1172 vfs_opterror(opts, "prison limit exceeded"); 1173 goto done_unlock_list; 1174 } 1175 created = 1; 1176 mtx_lock(&ppr->pr_mtx); 1177 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1178 mtx_unlock(&ppr->pr_mtx); 1179 error = ENOENT; 1180 vfs_opterror(opts, "parent jail went away!"); 1181 goto done_unlock_list; 1182 } 1183 ppr->pr_ref++; 1184 ppr->pr_uref++; 1185 mtx_unlock(&ppr->pr_mtx); 1186 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1187 if (jid == 0) { 1188 /* Find the next free jid. */ 1189 jid = lastprid + 1; 1190 findnext: 1191 if (jid == JAIL_MAX) 1192 jid = 1; 1193 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1194 if (tpr->pr_id < jid) 1195 continue; 1196 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1197 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1198 break; 1199 } 1200 if (jid == lastprid) { 1201 error = EAGAIN; 1202 vfs_opterror(opts, 1203 "no available jail IDs"); 1204 free(pr, M_PRISON); 1205 prison_deref(ppr, PD_DEREF | 1206 PD_DEUREF | PD_LIST_XLOCKED); 1207 goto done_releroot; 1208 } 1209 jid++; 1210 goto findnext; 1211 } 1212 lastprid = jid; 1213 } else { 1214 /* 1215 * The jail already has a jid (that did not yet exist), 1216 * so just find where to insert it. 1217 */ 1218 TAILQ_FOREACH(tpr, &allprison, pr_list) 1219 if (tpr->pr_id >= jid) { 1220 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1221 break; 1222 } 1223 } 1224 if (tpr == NULL) 1225 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1226 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1227 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1228 tpr->pr_childcount++; 1229 1230 pr->pr_parent = ppr; 1231 pr->pr_id = jid; 1232 1233 /* Set some default values, and inherit some from the parent. */ 1234 if (name == NULL) 1235 name = ""; 1236 if (path == NULL) { 1237 path = "/"; 1238 root = mypr->pr_root; 1239 vref(root); 1240 } 1241 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); 1242 pr->pr_flags |= PR_HOST; 1243#if defined(INET) || defined(INET6) 1244#ifdef VIMAGE 1245 if (!(pr_flags & PR_VNET)) 1246#endif 1247 { 1248#ifdef INET 1249 if (!(ch_flags & PR_IP4_USER)) 1250 pr->pr_flags |= 1251 PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE; 1252 else if (!(pr_flags & PR_IP4_USER)) { 1253 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1254 if (ppr->pr_ip4 != NULL) { 1255 pr->pr_ip4s = ppr->pr_ip4s; 1256 pr->pr_ip4 = malloc(pr->pr_ip4s * 1257 sizeof(struct in_addr), M_PRISON, 1258 M_WAITOK); 1259 bcopy(ppr->pr_ip4, pr->pr_ip4, 1260 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1261 } 1262 } 1263#endif 1264#ifdef INET6 1265 if (!(ch_flags & PR_IP6_USER)) 1266 pr->pr_flags |= 1267 PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE; 1268 else if (!(pr_flags & PR_IP6_USER)) { 1269 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1270 if (ppr->pr_ip6 != NULL) { 1271 pr->pr_ip6s = ppr->pr_ip6s; 1272 pr->pr_ip6 = malloc(pr->pr_ip6s * 1273 sizeof(struct in6_addr), M_PRISON, 1274 M_WAITOK); 1275 bcopy(ppr->pr_ip6, pr->pr_ip6, 1276 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1277 } 1278 } 1279#endif 1280 } 1281#endif 1282 /* Source address selection is always on by default. */ 1283 pr->pr_flags |= _PR_IP_SADDRSEL; 1284 1285 pr->pr_securelevel = ppr->pr_securelevel; 1286 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1287 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 1288 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; 1289 1290 LIST_INIT(&pr->pr_children); 1291 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1292 1293#ifdef VIMAGE 1294 /* Allocate a new vnet if specified. */ 1295 pr->pr_vnet = (pr_flags & PR_VNET) 1296 ? vnet_alloc() : ppr->pr_vnet; 1297#endif 1298 /* 1299 * Allocate a dedicated cpuset for each jail. 1300 * Unlike other initial settings, this may return an erorr. 1301 */ 1302 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1303 if (error) { 1304 prison_deref(pr, PD_LIST_XLOCKED); 1305 goto done_releroot; 1306 } 1307 1308 mtx_lock(&pr->pr_mtx); 1309 /* 1310 * New prisons do not yet have a reference, because we do not 1311 * want other to see the incomplete prison once the 1312 * allprison_lock is downgraded. 1313 */ 1314 } else { 1315 created = 0; 1316 /* 1317 * Grab a reference for existing prisons, to ensure they 1318 * continue to exist for the duration of the call. 1319 */ 1320 pr->pr_ref++; 1321#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 1322 if ((pr->pr_flags & PR_VNET) && 1323 (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 1324 error = EINVAL; 1325 vfs_opterror(opts, 1326 "vnet jails cannot have IP address restrictions"); 1327 goto done_deref_locked; 1328 } 1329#endif 1330#ifdef INET 1331 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1332 error = EINVAL; 1333 vfs_opterror(opts, 1334 "ip4 cannot be changed after creation"); 1335 goto done_deref_locked; 1336 } 1337#endif 1338#ifdef INET6 1339 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1340 error = EINVAL; 1341 vfs_opterror(opts, 1342 "ip6 cannot be changed after creation"); 1343 goto done_deref_locked; 1344 } 1345#endif 1346 } 1347 1348 /* Do final error checking before setting anything. */ 1349 if (gotslevel) { 1350 if (slevel < ppr->pr_securelevel) { 1351 error = EPERM; 1352 goto done_deref_locked; 1353 } 1354 } 1355 if (gotchildmax) { 1356 if (childmax >= ppr->pr_childmax) { 1357 error = EPERM; 1358 goto done_deref_locked; 1359 } 1360 } 1361 if (gotenforce) { 1362 if (enforce < ppr->pr_enforce_statfs) { 1363 error = EPERM; 1364 goto done_deref_locked; 1365 } 1366 } 1367 if (gotrsnum) { 1368 /* 1369 * devfs_rsnum is a uint16_t 1370 */ 1371 if (rsnum < 0 || rsnum > 65535) { 1372 error = EINVAL; 1373 goto done_deref_locked; 1374 } 1375 /* 1376 * Nested jails always inherit parent's devfs ruleset 1377 */ 1378 if (jailed(td->td_ucred)) { 1379 if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { 1380 error = EPERM; 1381 goto done_deref_locked; 1382 } else 1383 rsnum = ppr->pr_devfs_rsnum; 1384 } 1385 } 1386#ifdef INET 1387 if (ip4s > 0) { 1388 if (ppr->pr_flags & PR_IP4) { 1389 /* 1390 * Make sure the new set of IP addresses is a 1391 * subset of the parent's list. Don't worry 1392 * about the parent being unlocked, as any 1393 * setting is done with allprison_lock held. 1394 */ 1395 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1396 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 1397 break; 1398 if (ij == ppr->pr_ip4s) { 1399 error = EPERM; 1400 goto done_deref_locked; 1401 } 1402 if (ip4s > 1) { 1403 for (ii = ij = 1; ii < ip4s; ii++) { 1404 if (ip4[ii].s_addr == 1405 ppr->pr_ip4[0].s_addr) 1406 continue; 1407 for (; ij < ppr->pr_ip4s; ij++) 1408 if (ip4[ii].s_addr == 1409 ppr->pr_ip4[ij].s_addr) 1410 break; 1411 if (ij == ppr->pr_ip4s) 1412 break; 1413 } 1414 if (ij == ppr->pr_ip4s) { 1415 error = EPERM; 1416 goto done_deref_locked; 1417 } 1418 } 1419 } 1420 /* 1421 * Check for conflicting IP addresses. We permit them 1422 * if there is no more than one IP on each jail. If 1423 * there is a duplicate on a jail with more than one 1424 * IP stop checking and return error. 1425 */ 1426 tppr = ppr; 1427#ifdef VIMAGE 1428 for (; tppr != &prison0; tppr = tppr->pr_parent) 1429 if (tppr->pr_flags & PR_VNET) 1430 break; 1431#endif 1432 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1433 if (tpr == pr || 1434#ifdef VIMAGE 1435 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1436#endif 1437 tpr->pr_uref == 0) { 1438 descend = 0; 1439 continue; 1440 } 1441 if (!(tpr->pr_flags & PR_IP4_USER)) 1442 continue; 1443 descend = 0; 1444 if (tpr->pr_ip4 == NULL || 1445 (ip4s == 1 && tpr->pr_ip4s == 1)) 1446 continue; 1447 for (ii = 0; ii < ip4s; ii++) { 1448 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { 1449 error = EADDRINUSE; 1450 vfs_opterror(opts, 1451 "IPv4 addresses clash"); 1452 goto done_deref_locked; 1453 } 1454 } 1455 } 1456 } 1457#endif 1458#ifdef INET6 1459 if (ip6s > 0) { 1460 if (ppr->pr_flags & PR_IP6) { 1461 /* 1462 * Make sure the new set of IP addresses is a 1463 * subset of the parent's list. 1464 */ 1465 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1466 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1467 &ppr->pr_ip6[ij])) 1468 break; 1469 if (ij == ppr->pr_ip6s) { 1470 error = EPERM; 1471 goto done_deref_locked; 1472 } 1473 if (ip6s > 1) { 1474 for (ii = ij = 1; ii < ip6s; ii++) { 1475 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1476 &ppr->pr_ip6[0])) 1477 continue; 1478 for (; ij < ppr->pr_ip6s; ij++) 1479 if (IN6_ARE_ADDR_EQUAL( 1480 &ip6[ii], &ppr->pr_ip6[ij])) 1481 break; 1482 if (ij == ppr->pr_ip6s) 1483 break; 1484 } 1485 if (ij == ppr->pr_ip6s) { 1486 error = EPERM; 1487 goto done_deref_locked; 1488 } 1489 } 1490 } 1491 /* Check for conflicting IP addresses. */ 1492 tppr = ppr; 1493#ifdef VIMAGE 1494 for (; tppr != &prison0; tppr = tppr->pr_parent) 1495 if (tppr->pr_flags & PR_VNET) 1496 break; 1497#endif 1498 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1499 if (tpr == pr || 1500#ifdef VIMAGE 1501 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1502#endif 1503 tpr->pr_uref == 0) { 1504 descend = 0; 1505 continue; 1506 } 1507 if (!(tpr->pr_flags & PR_IP6_USER)) 1508 continue; 1509 descend = 0; 1510 if (tpr->pr_ip6 == NULL || 1511 (ip6s == 1 && tpr->pr_ip6s == 1)) 1512 continue; 1513 for (ii = 0; ii < ip6s; ii++) { 1514 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { 1515 error = EADDRINUSE; 1516 vfs_opterror(opts, 1517 "IPv6 addresses clash"); 1518 goto done_deref_locked; 1519 } 1520 } 1521 } 1522 } 1523#endif 1524 onamelen = namelen = 0; 1525 if (name != NULL) { 1526 /* Give a default name of the jid. */ 1527 if (name[0] == '\0') 1528 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1529 else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid && 1530 *p == '\0')) { 1531 error = EINVAL; 1532 vfs_opterror(opts, 1533 "name cannot be numeric (unless it is the jid)"); 1534 goto done_deref_locked; 1535 } 1536 /* 1537 * Make sure the name isn't too long for the prison or its 1538 * children. 1539 */ 1540 onamelen = strlen(pr->pr_name); 1541 namelen = strlen(name); 1542 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1543 error = ENAMETOOLONG; 1544 goto done_deref_locked; 1545 } 1546 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1547 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1548 sizeof(pr->pr_name)) { 1549 error = ENAMETOOLONG; 1550 goto done_deref_locked; 1551 } 1552 } 1553 } 1554 if (pr_allow & ~ppr->pr_allow) { 1555 error = EPERM; 1556 goto done_deref_locked; 1557 } 1558 1559 /* Set the parameters of the prison. */ 1560#ifdef INET 1561 redo_ip4 = 0; 1562 if (pr_flags & PR_IP4_USER) { 1563 pr->pr_flags |= PR_IP4; 1564 free(pr->pr_ip4, M_PRISON); 1565 pr->pr_ip4s = ip4s; 1566 pr->pr_ip4 = ip4; 1567 ip4 = NULL; 1568 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1569#ifdef VIMAGE 1570 if (tpr->pr_flags & PR_VNET) { 1571 descend = 0; 1572 continue; 1573 } 1574#endif 1575 if (prison_restrict_ip4(tpr, NULL)) { 1576 redo_ip4 = 1; 1577 descend = 0; 1578 } 1579 } 1580 } 1581#endif 1582#ifdef INET6 1583 redo_ip6 = 0; 1584 if (pr_flags & PR_IP6_USER) { 1585 pr->pr_flags |= PR_IP6; 1586 free(pr->pr_ip6, M_PRISON); 1587 pr->pr_ip6s = ip6s; 1588 pr->pr_ip6 = ip6; 1589 ip6 = NULL; 1590 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1591#ifdef VIMAGE 1592 if (tpr->pr_flags & PR_VNET) { 1593 descend = 0; 1594 continue; 1595 } 1596#endif 1597 if (prison_restrict_ip6(tpr, NULL)) { 1598 redo_ip6 = 1; 1599 descend = 0; 1600 } 1601 } 1602 } 1603#endif 1604 if (gotslevel) { 1605 pr->pr_securelevel = slevel; 1606 /* Set all child jails to be at least this level. */ 1607 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1608 if (tpr->pr_securelevel < slevel) 1609 tpr->pr_securelevel = slevel; 1610 } 1611 if (gotchildmax) { 1612 pr->pr_childmax = childmax; 1613 /* Set all child jails to under this limit. */ 1614 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) 1615 if (tpr->pr_childmax > childmax - level) 1616 tpr->pr_childmax = childmax > level 1617 ? childmax - level : 0; 1618 } 1619 if (gotenforce) { 1620 pr->pr_enforce_statfs = enforce; 1621 /* Pass this restriction on to the children. */ 1622 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1623 if (tpr->pr_enforce_statfs < enforce) 1624 tpr->pr_enforce_statfs = enforce; 1625 } 1626 if (gotrsnum) { 1627 pr->pr_devfs_rsnum = rsnum; 1628 /* Pass this restriction on to the children. */ 1629 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1630 tpr->pr_devfs_rsnum = rsnum; 1631 } 1632 if (name != NULL) { 1633 if (ppr == &prison0) 1634 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1635 else 1636 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1637 ppr->pr_name, name); 1638 /* Change this component of child names. */ 1639 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1640 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1641 strlen(tpr->pr_name + onamelen) + 1); 1642 bcopy(pr->pr_name, tpr->pr_name, namelen); 1643 } 1644 } 1645 if (path != NULL) { 1646 /* Try to keep a real-rooted full pathname. */ 1647 if (fullpath_disabled && path[0] == '/' && 1648 strcmp(mypr->pr_path, "/")) 1649 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1650 mypr->pr_path, path); 1651 else 1652 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1653 pr->pr_root = root; 1654 } 1655 if (PR_HOST & ch_flags & ~pr_flags) { 1656 if (pr->pr_flags & PR_HOST) { 1657 /* 1658 * Copy the parent's host info. As with pr_ip4 above, 1659 * the lack of a lock on the parent is not a problem; 1660 * it is always set with allprison_lock at least 1661 * shared, and is held exclusively here. 1662 */ 1663 strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, 1664 sizeof(pr->pr_hostname)); 1665 strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, 1666 sizeof(pr->pr_domainname)); 1667 strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, 1668 sizeof(pr->pr_hostuuid)); 1669 pr->pr_hostid = pr->pr_parent->pr_hostid; 1670 } 1671 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1672 /* Set this prison, and any descendants without PR_HOST. */ 1673 if (host != NULL) 1674 strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); 1675 if (domain != NULL) 1676 strlcpy(pr->pr_domainname, domain, 1677 sizeof(pr->pr_domainname)); 1678 if (uuid != NULL) 1679 strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); 1680 if (gothid) 1681 pr->pr_hostid = hid; 1682 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1683 if (tpr->pr_flags & PR_HOST) 1684 descend = 0; 1685 else { 1686 if (host != NULL) 1687 strlcpy(tpr->pr_hostname, 1688 pr->pr_hostname, 1689 sizeof(tpr->pr_hostname)); 1690 if (domain != NULL) 1691 strlcpy(tpr->pr_domainname, 1692 pr->pr_domainname, 1693 sizeof(tpr->pr_domainname)); 1694 if (uuid != NULL) 1695 strlcpy(tpr->pr_hostuuid, 1696 pr->pr_hostuuid, 1697 sizeof(tpr->pr_hostuuid)); 1698 if (gothid) 1699 tpr->pr_hostid = hid; 1700 } 1701 } 1702 } 1703 if ((tallow = ch_allow & ~pr_allow)) { 1704 /* Clear allow bits in all children. */ 1705 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1706 tpr->pr_allow &= ~tallow; 1707 } 1708 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1709 /* 1710 * Persistent prisons get an extra reference, and prisons losing their 1711 * persist flag lose that reference. Only do this for existing prisons 1712 * for now, so new ones will remain unseen until after the module 1713 * handlers have completed. 1714 */ 1715 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1716 if (pr_flags & PR_PERSIST) { 1717 pr->pr_ref++; 1718 pr->pr_uref++; 1719 } else { 1720 pr->pr_ref--; 1721 pr->pr_uref--; 1722 } 1723 } 1724 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1725 mtx_unlock(&pr->pr_mtx); 1726 1727#ifdef RACCT 1728 if (created) 1729 prison_racct_attach(pr); 1730#endif 1731 1732 /* Locks may have prevented a complete restriction of child IP 1733 * addresses. If so, allocate some more memory and try again. 1734 */ 1735#ifdef INET 1736 while (redo_ip4) { 1737 ip4s = pr->pr_ip4s; 1738 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1739 mtx_lock(&pr->pr_mtx); 1740 redo_ip4 = 0; 1741 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1742#ifdef VIMAGE 1743 if (tpr->pr_flags & PR_VNET) { 1744 descend = 0; 1745 continue; 1746 } 1747#endif 1748 if (prison_restrict_ip4(tpr, ip4)) { 1749 if (ip4 != NULL) 1750 ip4 = NULL; 1751 else 1752 redo_ip4 = 1; 1753 } 1754 } 1755 mtx_unlock(&pr->pr_mtx); 1756 } 1757#endif 1758#ifdef INET6 1759 while (redo_ip6) { 1760 ip6s = pr->pr_ip6s; 1761 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1762 mtx_lock(&pr->pr_mtx); 1763 redo_ip6 = 0; 1764 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1765#ifdef VIMAGE 1766 if (tpr->pr_flags & PR_VNET) { 1767 descend = 0; 1768 continue; 1769 } 1770#endif 1771 if (prison_restrict_ip6(tpr, ip6)) { 1772 if (ip6 != NULL) 1773 ip6 = NULL; 1774 else 1775 redo_ip6 = 1; 1776 } 1777 } 1778 mtx_unlock(&pr->pr_mtx); 1779 } 1780#endif 1781 1782 /* Let the modules do their work. */ 1783 sx_downgrade(&allprison_lock); 1784 if (created) { 1785 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1786 if (error) { 1787 prison_deref(pr, PD_LIST_SLOCKED); 1788 goto done_errmsg; 1789 } 1790 } 1791 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1792 if (error) { 1793 prison_deref(pr, created 1794 ? PD_LIST_SLOCKED 1795 : PD_DEREF | PD_LIST_SLOCKED); 1796 goto done_errmsg; 1797 } 1798 1799 /* Attach this process to the prison if requested. */ 1800 if (flags & JAIL_ATTACH) { 1801 mtx_lock(&pr->pr_mtx); 1802 error = do_jail_attach(td, pr); 1803 if (error) { 1804 vfs_opterror(opts, "attach failed"); 1805 if (!created) 1806 prison_deref(pr, PD_DEREF); 1807 goto done_errmsg; 1808 } 1809 } 1810 1811 /* 1812 * Now that it is all there, drop the temporary reference from existing 1813 * prisons. Or add a reference to newly created persistent prisons 1814 * (which was not done earlier so that the prison would not be publicly 1815 * visible). 1816 */ 1817 if (!created) { 1818 prison_deref(pr, (flags & JAIL_ATTACH) 1819 ? PD_DEREF 1820 : PD_DEREF | PD_LIST_SLOCKED); 1821 } else { 1822 if (pr_flags & PR_PERSIST) { 1823 mtx_lock(&pr->pr_mtx); 1824 pr->pr_ref++; 1825 pr->pr_uref++; 1826 mtx_unlock(&pr->pr_mtx); 1827 } 1828 if (!(flags & JAIL_ATTACH)) 1829 sx_sunlock(&allprison_lock); 1830 } 1831 td->td_retval[0] = pr->pr_id; 1832 goto done_errmsg; 1833 1834 done_deref_locked: 1835 prison_deref(pr, created 1836 ? PD_LOCKED | PD_LIST_XLOCKED 1837 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1838 goto done_releroot; 1839 done_unlock_list: 1840 sx_xunlock(&allprison_lock); 1841 done_releroot: 1842 if (root != NULL) { 1843 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1844 vrele(root); 1845 VFS_UNLOCK_GIANT(vfslocked); 1846 } 1847 done_errmsg: 1848 if (error) { 1849 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1850 if (errmsg_len > 0) { 1851 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1852 if (errmsg_pos > 0) { 1853 if (optuio->uio_segflg == UIO_SYSSPACE) 1854 bcopy(errmsg, 1855 optuio->uio_iov[errmsg_pos].iov_base, 1856 errmsg_len); 1857 else 1858 copyout(errmsg, 1859 optuio->uio_iov[errmsg_pos].iov_base, 1860 errmsg_len); 1861 } 1862 } 1863 } 1864 done_free: 1865#ifdef INET 1866 free(ip4, M_PRISON); 1867#endif 1868#ifdef INET6 1869 free(ip6, M_PRISON); 1870#endif 1871 if (g_path != NULL) 1872 free(g_path, M_TEMP); 1873 vfs_freeopts(opts); 1874 return (error); 1875} 1876 1877 1878/* 1879 * struct jail_get_args { 1880 * struct iovec *iovp; 1881 * unsigned int iovcnt; 1882 * int flags; 1883 * }; 1884 */ 1885int 1886sys_jail_get(struct thread *td, struct jail_get_args *uap) 1887{ 1888 struct uio *auio; 1889 int error; 1890 1891 /* Check that we have an even number of iovecs. */ 1892 if (uap->iovcnt & 1) 1893 return (EINVAL); 1894 1895 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1896 if (error) 1897 return (error); 1898 error = kern_jail_get(td, auio, uap->flags); 1899 if (error == 0) 1900 error = copyout(auio->uio_iov, uap->iovp, 1901 uap->iovcnt * sizeof (struct iovec)); 1902 free(auio, M_IOV); 1903 return (error); 1904} 1905 1906int 1907kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1908{ 1909 struct prison *pr, *mypr; 1910 struct vfsopt *opt; 1911 struct vfsoptlist *opts; 1912 char *errmsg, *name; 1913 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1914 1915 if (flags & ~JAIL_GET_MASK) 1916 return (EINVAL); 1917 1918 /* Get the parameter list. */ 1919 error = vfs_buildopts(optuio, &opts); 1920 if (error) 1921 return (error); 1922 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1923 mypr = td->td_ucred->cr_prison; 1924 1925 /* 1926 * Find the prison specified by one of: lastjid, jid, name. 1927 */ 1928 sx_slock(&allprison_lock); 1929 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1930 if (error == 0) { 1931 TAILQ_FOREACH(pr, &allprison, pr_list) { 1932 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1933 mtx_lock(&pr->pr_mtx); 1934 if (pr->pr_ref > 0 && 1935 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1936 break; 1937 mtx_unlock(&pr->pr_mtx); 1938 } 1939 } 1940 if (pr != NULL) 1941 goto found_prison; 1942 error = ENOENT; 1943 vfs_opterror(opts, "no jail after %d", jid); 1944 goto done_unlock_list; 1945 } else if (error != ENOENT) 1946 goto done_unlock_list; 1947 1948 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1949 if (error == 0) { 1950 if (jid != 0) { 1951 pr = prison_find_child(mypr, jid); 1952 if (pr != NULL) { 1953 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1954 mtx_unlock(&pr->pr_mtx); 1955 error = ENOENT; 1956 vfs_opterror(opts, "jail %d is dying", 1957 jid); 1958 goto done_unlock_list; 1959 } 1960 goto found_prison; 1961 } 1962 error = ENOENT; 1963 vfs_opterror(opts, "jail %d not found", jid); 1964 goto done_unlock_list; 1965 } 1966 } else if (error != ENOENT) 1967 goto done_unlock_list; 1968 1969 error = vfs_getopt(opts, "name", (void **)&name, &len); 1970 if (error == 0) { 1971 if (len == 0 || name[len - 1] != '\0') { 1972 error = EINVAL; 1973 goto done_unlock_list; 1974 } 1975 pr = prison_find_name(mypr, name); 1976 if (pr != NULL) { 1977 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1978 mtx_unlock(&pr->pr_mtx); 1979 error = ENOENT; 1980 vfs_opterror(opts, "jail \"%s\" is dying", 1981 name); 1982 goto done_unlock_list; 1983 } 1984 goto found_prison; 1985 } 1986 error = ENOENT; 1987 vfs_opterror(opts, "jail \"%s\" not found", name); 1988 goto done_unlock_list; 1989 } else if (error != ENOENT) 1990 goto done_unlock_list; 1991 1992 vfs_opterror(opts, "no jail specified"); 1993 error = ENOENT; 1994 goto done_unlock_list; 1995 1996 found_prison: 1997 /* Get the parameters of the prison. */ 1998 pr->pr_ref++; 1999 locked = PD_LOCKED; 2000 td->td_retval[0] = pr->pr_id; 2001 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 2002 if (error != 0 && error != ENOENT) 2003 goto done_deref; 2004 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 2005 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 2006 if (error != 0 && error != ENOENT) 2007 goto done_deref; 2008 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 2009 if (error != 0 && error != ENOENT) 2010 goto done_deref; 2011 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 2012 sizeof(pr->pr_cpuset->cs_id)); 2013 if (error != 0 && error != ENOENT) 2014 goto done_deref; 2015 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 2016 if (error != 0 && error != ENOENT) 2017 goto done_deref; 2018#ifdef INET 2019 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 2020 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2021 if (error != 0 && error != ENOENT) 2022 goto done_deref; 2023#endif 2024#ifdef INET6 2025 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 2026 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2027 if (error != 0 && error != ENOENT) 2028 goto done_deref; 2029#endif 2030 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 2031 sizeof(pr->pr_securelevel)); 2032 if (error != 0 && error != ENOENT) 2033 goto done_deref; 2034 error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, 2035 sizeof(pr->pr_childcount)); 2036 if (error != 0 && error != ENOENT) 2037 goto done_deref; 2038 error = vfs_setopt(opts, "children.max", &pr->pr_childmax, 2039 sizeof(pr->pr_childmax)); 2040 if (error != 0 && error != ENOENT) 2041 goto done_deref; 2042 error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); 2043 if (error != 0 && error != ENOENT) 2044 goto done_deref; 2045 error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); 2046 if (error != 0 && error != ENOENT) 2047 goto done_deref; 2048 error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); 2049 if (error != 0 && error != ENOENT) 2050 goto done_deref; 2051#ifdef COMPAT_FREEBSD32 2052 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 2053 uint32_t hid32 = pr->pr_hostid; 2054 2055 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 2056 } else 2057#endif 2058 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 2059 sizeof(pr->pr_hostid)); 2060 if (error != 0 && error != ENOENT) 2061 goto done_deref; 2062 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 2063 sizeof(pr->pr_enforce_statfs)); 2064 if (error != 0 && error != ENOENT) 2065 goto done_deref; 2066 error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, 2067 sizeof(pr->pr_devfs_rsnum)); 2068 if (error != 0 && error != ENOENT) 2069 goto done_deref; 2070 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 2071 fi++) { 2072 if (pr_flag_names[fi] == NULL) 2073 continue; 2074 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 2075 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 2076 if (error != 0 && error != ENOENT) 2077 goto done_deref; 2078 i = !i; 2079 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 2080 if (error != 0 && error != ENOENT) 2081 goto done_deref; 2082 } 2083 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 2084 fi++) { 2085 i = pr->pr_flags & 2086 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 2087 i = pr_flag_jailsys[fi].disable && 2088 (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE 2089 : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW 2090 : JAIL_SYS_INHERIT; 2091 error = 2092 vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); 2093 if (error != 0 && error != ENOENT) 2094 goto done_deref; 2095 } 2096 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 2097 fi++) { 2098 if (pr_allow_names[fi] == NULL) 2099 continue; 2100 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 2101 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 2102 if (error != 0 && error != ENOENT) 2103 goto done_deref; 2104 i = !i; 2105 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 2106 if (error != 0 && error != ENOENT) 2107 goto done_deref; 2108 } 2109 i = (pr->pr_uref == 0); 2110 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 2111 if (error != 0 && error != ENOENT) 2112 goto done_deref; 2113 i = !i; 2114 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 2115 if (error != 0 && error != ENOENT) 2116 goto done_deref; 2117 2118 /* Get the module parameters. */ 2119 mtx_unlock(&pr->pr_mtx); 2120 locked = 0; 2121 error = osd_jail_call(pr, PR_METHOD_GET, opts); 2122 if (error) 2123 goto done_deref; 2124 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 2125 2126 /* By now, all parameters should have been noted. */ 2127 TAILQ_FOREACH(opt, opts, link) { 2128 if (!opt->seen && strcmp(opt->name, "errmsg")) { 2129 error = EINVAL; 2130 vfs_opterror(opts, "unknown parameter: %s", opt->name); 2131 goto done_errmsg; 2132 } 2133 } 2134 2135 /* Write the fetched parameters back to userspace. */ 2136 error = 0; 2137 TAILQ_FOREACH(opt, opts, link) { 2138 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 2139 pos = 2 * opt->pos + 1; 2140 optuio->uio_iov[pos].iov_len = opt->len; 2141 if (opt->value != NULL) { 2142 if (optuio->uio_segflg == UIO_SYSSPACE) { 2143 bcopy(opt->value, 2144 optuio->uio_iov[pos].iov_base, 2145 opt->len); 2146 } else { 2147 error = copyout(opt->value, 2148 optuio->uio_iov[pos].iov_base, 2149 opt->len); 2150 if (error) 2151 break; 2152 } 2153 } 2154 } 2155 } 2156 goto done_errmsg; 2157 2158 done_deref: 2159 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 2160 goto done_errmsg; 2161 2162 done_unlock_list: 2163 sx_sunlock(&allprison_lock); 2164 done_errmsg: 2165 if (error && errmsg_pos >= 0) { 2166 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 2167 errmsg_pos = 2 * errmsg_pos + 1; 2168 if (errmsg_len > 0) { 2169 if (optuio->uio_segflg == UIO_SYSSPACE) 2170 bcopy(errmsg, 2171 optuio->uio_iov[errmsg_pos].iov_base, 2172 errmsg_len); 2173 else 2174 copyout(errmsg, 2175 optuio->uio_iov[errmsg_pos].iov_base, 2176 errmsg_len); 2177 } 2178 } 2179 vfs_freeopts(opts); 2180 return (error); 2181} 2182 2183 2184/* 2185 * struct jail_remove_args { 2186 * int jid; 2187 * }; 2188 */ 2189int 2190sys_jail_remove(struct thread *td, struct jail_remove_args *uap) 2191{ 2192 struct prison *pr, *cpr, *lpr, *tpr; 2193 int descend, error; 2194 2195 error = priv_check(td, PRIV_JAIL_REMOVE); 2196 if (error) 2197 return (error); 2198 2199 sx_xlock(&allprison_lock); 2200 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2201 if (pr == NULL) { 2202 sx_xunlock(&allprison_lock); 2203 return (EINVAL); 2204 } 2205 2206 /* Remove all descendants of this prison, then remove this prison. */ 2207 pr->pr_ref++; 2208 pr->pr_flags |= PR_REMOVE; 2209 if (!LIST_EMPTY(&pr->pr_children)) { 2210 mtx_unlock(&pr->pr_mtx); 2211 lpr = NULL; 2212 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2213 mtx_lock(&cpr->pr_mtx); 2214 if (cpr->pr_ref > 0) { 2215 tpr = cpr; 2216 cpr->pr_ref++; 2217 cpr->pr_flags |= PR_REMOVE; 2218 } else { 2219 /* Already removed - do not do it again. */ 2220 tpr = NULL; 2221 } 2222 mtx_unlock(&cpr->pr_mtx); 2223 if (lpr != NULL) { 2224 mtx_lock(&lpr->pr_mtx); 2225 prison_remove_one(lpr); 2226 sx_xlock(&allprison_lock); 2227 } 2228 lpr = tpr; 2229 } 2230 if (lpr != NULL) { 2231 mtx_lock(&lpr->pr_mtx); 2232 prison_remove_one(lpr); 2233 sx_xlock(&allprison_lock); 2234 } 2235 mtx_lock(&pr->pr_mtx); 2236 } 2237 prison_remove_one(pr); 2238 return (0); 2239} 2240 2241static void 2242prison_remove_one(struct prison *pr) 2243{ 2244 struct proc *p; 2245 int deuref; 2246 2247 /* If the prison was persistent, it is not anymore. */ 2248 deuref = 0; 2249 if (pr->pr_flags & PR_PERSIST) { 2250 pr->pr_ref--; 2251 deuref = PD_DEUREF; 2252 pr->pr_flags &= ~PR_PERSIST; 2253 } 2254 2255 /* 2256 * jail_remove added a reference. If that's the only one, remove 2257 * the prison now. 2258 */ 2259 KASSERT(pr->pr_ref > 0, 2260 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2261 if (pr->pr_ref == 1) { 2262 prison_deref(pr, 2263 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2264 return; 2265 } 2266 2267 mtx_unlock(&pr->pr_mtx); 2268 sx_xunlock(&allprison_lock); 2269 /* 2270 * Kill all processes unfortunate enough to be attached to this prison. 2271 */ 2272 sx_slock(&allproc_lock); 2273 LIST_FOREACH(p, &allproc, p_list) { 2274 PROC_LOCK(p); 2275 if (p->p_state != PRS_NEW && p->p_ucred && 2276 p->p_ucred->cr_prison == pr) 2277 kern_psignal(p, SIGKILL); 2278 PROC_UNLOCK(p); 2279 } 2280 sx_sunlock(&allproc_lock); 2281 /* Remove the temporary reference added by jail_remove. */ 2282 prison_deref(pr, deuref | PD_DEREF); 2283} 2284 2285 2286/* 2287 * struct jail_attach_args { 2288 * int jid; 2289 * }; 2290 */ 2291int 2292sys_jail_attach(struct thread *td, struct jail_attach_args *uap) 2293{ 2294 struct prison *pr; 2295 int error; 2296 2297 error = priv_check(td, PRIV_JAIL_ATTACH); 2298 if (error) 2299 return (error); 2300 2301 sx_slock(&allprison_lock); 2302 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2303 if (pr == NULL) { 2304 sx_sunlock(&allprison_lock); 2305 return (EINVAL); 2306 } 2307 2308 /* 2309 * Do not allow a process to attach to a prison that is not 2310 * considered to be "alive". 2311 */ 2312 if (pr->pr_uref == 0) { 2313 mtx_unlock(&pr->pr_mtx); 2314 sx_sunlock(&allprison_lock); 2315 return (EINVAL); 2316 } 2317 2318 return (do_jail_attach(td, pr)); 2319} 2320 2321static int 2322do_jail_attach(struct thread *td, struct prison *pr) 2323{ 2324 struct prison *ppr; 2325 struct proc *p; 2326 struct ucred *newcred, *oldcred; 2327 int vfslocked, error; 2328 2329 /* 2330 * XXX: Note that there is a slight race here if two threads 2331 * in the same privileged process attempt to attach to two 2332 * different jails at the same time. It is important for 2333 * user processes not to do this, or they might end up with 2334 * a process root from one prison, but attached to the jail 2335 * of another. 2336 */ 2337 pr->pr_ref++; 2338 pr->pr_uref++; 2339 mtx_unlock(&pr->pr_mtx); 2340 2341 /* Let modules do whatever they need to prepare for attaching. */ 2342 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2343 if (error) { 2344 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2345 return (error); 2346 } 2347 sx_sunlock(&allprison_lock); 2348 2349 /* 2350 * Reparent the newly attached process to this jail. 2351 */ 2352 ppr = td->td_ucred->cr_prison; 2353 p = td->td_proc; 2354 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2355 if (error) 2356 goto e_revert_osd; 2357 2358 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2359 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2360 if ((error = change_dir(pr->pr_root, td)) != 0) 2361 goto e_unlock; 2362#ifdef MAC 2363 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2364 goto e_unlock; 2365#endif 2366 VOP_UNLOCK(pr->pr_root, 0); 2367 if ((error = change_root(pr->pr_root, td))) 2368 goto e_unlock_giant; 2369 VFS_UNLOCK_GIANT(vfslocked); 2370 2371 newcred = crget(); 2372 PROC_LOCK(p); 2373 oldcred = p->p_ucred; 2374 setsugid(p); 2375 crcopy(newcred, oldcred); 2376 newcred->cr_prison = pr; 2377 p->p_ucred = newcred; 2378 PROC_UNLOCK(p); 2379#ifdef RACCT 2380 racct_proc_ucred_changed(p, oldcred, newcred); 2381#endif 2382 crfree(oldcred); 2383 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2384 return (0); 2385 e_unlock: 2386 VOP_UNLOCK(pr->pr_root, 0); 2387 e_unlock_giant: 2388 VFS_UNLOCK_GIANT(vfslocked); 2389 e_revert_osd: 2390 /* Tell modules this thread is still in its old jail after all. */ 2391 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2392 prison_deref(pr, PD_DEREF | PD_DEUREF); 2393 return (error); 2394} 2395 2396 2397/* 2398 * Returns a locked prison instance, or NULL on failure. 2399 */ 2400struct prison * 2401prison_find(int prid) 2402{ 2403 struct prison *pr; 2404 2405 sx_assert(&allprison_lock, SX_LOCKED); 2406 TAILQ_FOREACH(pr, &allprison, pr_list) { 2407 if (pr->pr_id == prid) { 2408 mtx_lock(&pr->pr_mtx); 2409 if (pr->pr_ref > 0) 2410 return (pr); 2411 mtx_unlock(&pr->pr_mtx); 2412 } 2413 } 2414 return (NULL); 2415} 2416 2417/* 2418 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2419 */ 2420struct prison * 2421prison_find_child(struct prison *mypr, int prid) 2422{ 2423 struct prison *pr; 2424 int descend; 2425 2426 sx_assert(&allprison_lock, SX_LOCKED); 2427 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2428 if (pr->pr_id == prid) { 2429 mtx_lock(&pr->pr_mtx); 2430 if (pr->pr_ref > 0) 2431 return (pr); 2432 mtx_unlock(&pr->pr_mtx); 2433 } 2434 } 2435 return (NULL); 2436} 2437 2438/* 2439 * Look for the name relative to mypr. Returns a locked prison or NULL. 2440 */ 2441struct prison * 2442prison_find_name(struct prison *mypr, const char *name) 2443{ 2444 struct prison *pr, *deadpr; 2445 size_t mylen; 2446 int descend; 2447 2448 sx_assert(&allprison_lock, SX_LOCKED); 2449 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2450 again: 2451 deadpr = NULL; 2452 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2453 if (!strcmp(pr->pr_name + mylen, name)) { 2454 mtx_lock(&pr->pr_mtx); 2455 if (pr->pr_ref > 0) { 2456 if (pr->pr_uref > 0) 2457 return (pr); 2458 deadpr = pr; 2459 } 2460 mtx_unlock(&pr->pr_mtx); 2461 } 2462 } 2463 /* There was no valid prison - perhaps there was a dying one. */ 2464 if (deadpr != NULL) { 2465 mtx_lock(&deadpr->pr_mtx); 2466 if (deadpr->pr_ref == 0) { 2467 mtx_unlock(&deadpr->pr_mtx); 2468 goto again; 2469 } 2470 } 2471 return (deadpr); 2472} 2473 2474/* 2475 * See if a prison has the specific flag set. 2476 */ 2477int 2478prison_flag(struct ucred *cred, unsigned flag) 2479{ 2480 2481 /* This is an atomic read, so no locking is necessary. */ 2482 return (cred->cr_prison->pr_flags & flag); 2483} 2484 2485int 2486prison_allow(struct ucred *cred, unsigned flag) 2487{ 2488 2489 /* This is an atomic read, so no locking is necessary. */ 2490 return (cred->cr_prison->pr_allow & flag); 2491} 2492 2493/* 2494 * Remove a prison reference. If that was the last reference, remove the 2495 * prison itself - but not in this context in case there are locks held. 2496 */ 2497void 2498prison_free_locked(struct prison *pr) 2499{ 2500 2501 mtx_assert(&pr->pr_mtx, MA_OWNED); 2502 pr->pr_ref--; 2503 if (pr->pr_ref == 0) { 2504 mtx_unlock(&pr->pr_mtx); 2505 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2506 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2507 return; 2508 } 2509 mtx_unlock(&pr->pr_mtx); 2510} 2511 2512void 2513prison_free(struct prison *pr) 2514{ 2515 2516 mtx_lock(&pr->pr_mtx); 2517 prison_free_locked(pr); 2518} 2519 2520static void 2521prison_complete(void *context, int pending) 2522{ 2523 2524 prison_deref((struct prison *)context, 0); 2525} 2526 2527/* 2528 * Remove a prison reference (usually). This internal version assumes no 2529 * mutexes are held, except perhaps the prison itself. If there are no more 2530 * references, release and delist the prison. On completion, the prison lock 2531 * and the allprison lock are both unlocked. 2532 */ 2533static void 2534prison_deref(struct prison *pr, int flags) 2535{ 2536 struct prison *ppr, *tpr; 2537 int vfslocked; 2538 2539 if (!(flags & PD_LOCKED)) 2540 mtx_lock(&pr->pr_mtx); 2541 for (;;) { 2542 if (flags & PD_DEUREF) { 2543 pr->pr_uref--; 2544 KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); 2545 } 2546 if (flags & PD_DEREF) 2547 pr->pr_ref--; 2548 /* If the prison still has references, nothing else to do. */ 2549 if (pr->pr_ref > 0) { 2550 mtx_unlock(&pr->pr_mtx); 2551 if (flags & PD_LIST_SLOCKED) 2552 sx_sunlock(&allprison_lock); 2553 else if (flags & PD_LIST_XLOCKED) 2554 sx_xunlock(&allprison_lock); 2555 return; 2556 } 2557 2558 mtx_unlock(&pr->pr_mtx); 2559 if (flags & PD_LIST_SLOCKED) { 2560 if (!sx_try_upgrade(&allprison_lock)) { 2561 sx_sunlock(&allprison_lock); 2562 sx_xlock(&allprison_lock); 2563 } 2564 } else if (!(flags & PD_LIST_XLOCKED)) 2565 sx_xlock(&allprison_lock); 2566 2567 TAILQ_REMOVE(&allprison, pr, pr_list); 2568 LIST_REMOVE(pr, pr_sibling); 2569 ppr = pr->pr_parent; 2570 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2571 tpr->pr_childcount--; 2572 sx_xunlock(&allprison_lock); 2573 2574#ifdef VIMAGE 2575 if (pr->pr_vnet != ppr->pr_vnet) 2576 vnet_destroy(pr->pr_vnet); 2577#endif 2578 if (pr->pr_root != NULL) { 2579 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2580 vrele(pr->pr_root); 2581 VFS_UNLOCK_GIANT(vfslocked); 2582 } 2583 mtx_destroy(&pr->pr_mtx); 2584#ifdef INET 2585 free(pr->pr_ip4, M_PRISON); 2586#endif 2587#ifdef INET6 2588 free(pr->pr_ip6, M_PRISON); 2589#endif 2590 if (pr->pr_cpuset != NULL) 2591 cpuset_rel(pr->pr_cpuset); 2592 osd_jail_exit(pr); 2593#ifdef RACCT 2594 prison_racct_detach(pr); 2595#endif 2596 free(pr, M_PRISON); 2597 2598 /* Removing a prison frees a reference on its parent. */ 2599 pr = ppr; 2600 mtx_lock(&pr->pr_mtx); 2601 flags = PD_DEREF | PD_DEUREF; 2602 } 2603} 2604 2605void 2606prison_hold_locked(struct prison *pr) 2607{ 2608 2609 mtx_assert(&pr->pr_mtx, MA_OWNED); 2610 KASSERT(pr->pr_ref > 0, 2611 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2612 pr->pr_ref++; 2613} 2614 2615void 2616prison_hold(struct prison *pr) 2617{ 2618 2619 mtx_lock(&pr->pr_mtx); 2620 prison_hold_locked(pr); 2621 mtx_unlock(&pr->pr_mtx); 2622} 2623 2624void 2625prison_proc_hold(struct prison *pr) 2626{ 2627 2628 mtx_lock(&pr->pr_mtx); 2629 KASSERT(pr->pr_uref > 0, 2630 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2631 pr->pr_uref++; 2632 mtx_unlock(&pr->pr_mtx); 2633} 2634 2635void 2636prison_proc_free(struct prison *pr) 2637{ 2638 2639 mtx_lock(&pr->pr_mtx); 2640 KASSERT(pr->pr_uref > 0, 2641 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2642 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2643} 2644 2645 2646#ifdef INET 2647/* 2648 * Restrict a prison's IP address list with its parent's, possibly replacing 2649 * it. Return true if the replacement buffer was used (or would have been). 2650 */ 2651static int 2652prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2653{ 2654 int ii, ij, used; 2655 struct prison *ppr; 2656 2657 ppr = pr->pr_parent; 2658 if (!(pr->pr_flags & PR_IP4_USER)) { 2659 /* This has no user settings, so just copy the parent's list. */ 2660 if (pr->pr_ip4s < ppr->pr_ip4s) { 2661 /* 2662 * There's no room for the parent's list. Use the 2663 * new list buffer, which is assumed to be big enough 2664 * (if it was passed). If there's no buffer, try to 2665 * allocate one. 2666 */ 2667 used = 1; 2668 if (newip4 == NULL) { 2669 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2670 M_PRISON, M_NOWAIT); 2671 if (newip4 != NULL) 2672 used = 0; 2673 } 2674 if (newip4 != NULL) { 2675 bcopy(ppr->pr_ip4, newip4, 2676 ppr->pr_ip4s * sizeof(*newip4)); 2677 free(pr->pr_ip4, M_PRISON); 2678 pr->pr_ip4 = newip4; 2679 pr->pr_ip4s = ppr->pr_ip4s; 2680 } 2681 return (used); 2682 } 2683 pr->pr_ip4s = ppr->pr_ip4s; 2684 if (pr->pr_ip4s > 0) 2685 bcopy(ppr->pr_ip4, pr->pr_ip4, 2686 pr->pr_ip4s * sizeof(*newip4)); 2687 else if (pr->pr_ip4 != NULL) { 2688 free(pr->pr_ip4, M_PRISON); 2689 pr->pr_ip4 = NULL; 2690 } 2691 } else if (pr->pr_ip4s > 0) { 2692 /* Remove addresses that aren't in the parent. */ 2693 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2694 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2695 break; 2696 if (ij < ppr->pr_ip4s) 2697 ii = 1; 2698 else { 2699 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2700 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2701 ii = 0; 2702 } 2703 for (ij = 1; ii < pr->pr_ip4s; ) { 2704 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2705 ii++; 2706 continue; 2707 } 2708 switch (ij >= ppr->pr_ip4s ? -1 : 2709 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2710 case -1: 2711 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2712 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2713 break; 2714 case 0: 2715 ii++; 2716 ij++; 2717 break; 2718 case 1: 2719 ij++; 2720 break; 2721 } 2722 } 2723 if (pr->pr_ip4s == 0) { 2724 pr->pr_flags |= PR_IP4_DISABLE; 2725 free(pr->pr_ip4, M_PRISON); 2726 pr->pr_ip4 = NULL; 2727 } 2728 } 2729 return (0); 2730} 2731 2732/* 2733 * Pass back primary IPv4 address of this jail. 2734 * 2735 * If not restricted return success but do not alter the address. Caller has 2736 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2737 * 2738 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2739 * Address returned in NBO. 2740 */ 2741int 2742prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2743{ 2744 struct prison *pr; 2745 2746 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2747 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2748 2749 pr = cred->cr_prison; 2750 if (!(pr->pr_flags & PR_IP4)) 2751 return (0); 2752 mtx_lock(&pr->pr_mtx); 2753 if (!(pr->pr_flags & PR_IP4)) { 2754 mtx_unlock(&pr->pr_mtx); 2755 return (0); 2756 } 2757 if (pr->pr_ip4 == NULL) { 2758 mtx_unlock(&pr->pr_mtx); 2759 return (EAFNOSUPPORT); 2760 } 2761 2762 ia->s_addr = pr->pr_ip4[0].s_addr; 2763 mtx_unlock(&pr->pr_mtx); 2764 return (0); 2765} 2766 2767/* 2768 * Return 1 if we should do proper source address selection or are not jailed. 2769 * We will return 0 if we should bypass source address selection in favour 2770 * of the primary jail IPv4 address. Only in this case *ia will be updated and 2771 * returned in NBO. 2772 * Return EAFNOSUPPORT, in case this jail does not allow IPv4. 2773 */ 2774int 2775prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 2776{ 2777 struct prison *pr; 2778 struct in_addr lia; 2779 int error; 2780 2781 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2782 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2783 2784 if (!jailed(cred)) 2785 return (1); 2786 2787 pr = cred->cr_prison; 2788 if (pr->pr_flags & PR_IP4_SADDRSEL) 2789 return (1); 2790 2791 lia.s_addr = INADDR_ANY; 2792 error = prison_get_ip4(cred, &lia); 2793 if (error) 2794 return (error); 2795 if (lia.s_addr == INADDR_ANY) 2796 return (1); 2797 2798 ia->s_addr = lia.s_addr; 2799 return (0); 2800} 2801 2802/* 2803 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2804 */ 2805int 2806prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2807{ 2808 2809 if (pr1 == pr2) 2810 return (1); 2811 2812 /* 2813 * No need to lock since the PR_IP4_USER flag can't be altered for 2814 * existing prisons. 2815 */ 2816 while (pr1 != &prison0 && 2817#ifdef VIMAGE 2818 !(pr1->pr_flags & PR_VNET) && 2819#endif 2820 !(pr1->pr_flags & PR_IP4_USER)) 2821 pr1 = pr1->pr_parent; 2822 while (pr2 != &prison0 && 2823#ifdef VIMAGE 2824 !(pr2->pr_flags & PR_VNET) && 2825#endif 2826 !(pr2->pr_flags & PR_IP4_USER)) 2827 pr2 = pr2->pr_parent; 2828 return (pr1 == pr2); 2829} 2830 2831/* 2832 * Make sure our (source) address is set to something meaningful to this 2833 * jail. 2834 * 2835 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2836 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2837 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2838 */ 2839int 2840prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2841{ 2842 struct prison *pr; 2843 struct in_addr ia0; 2844 int error; 2845 2846 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2847 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2848 2849 pr = cred->cr_prison; 2850 if (!(pr->pr_flags & PR_IP4)) 2851 return (0); 2852 mtx_lock(&pr->pr_mtx); 2853 if (!(pr->pr_flags & PR_IP4)) { 2854 mtx_unlock(&pr->pr_mtx); 2855 return (0); 2856 } 2857 if (pr->pr_ip4 == NULL) { 2858 mtx_unlock(&pr->pr_mtx); 2859 return (EAFNOSUPPORT); 2860 } 2861 2862 ia0.s_addr = ntohl(ia->s_addr); 2863 if (ia0.s_addr == INADDR_LOOPBACK) { 2864 ia->s_addr = pr->pr_ip4[0].s_addr; 2865 mtx_unlock(&pr->pr_mtx); 2866 return (0); 2867 } 2868 2869 if (ia0.s_addr == INADDR_ANY) { 2870 /* 2871 * In case there is only 1 IPv4 address, bind directly. 2872 */ 2873 if (pr->pr_ip4s == 1) 2874 ia->s_addr = pr->pr_ip4[0].s_addr; 2875 mtx_unlock(&pr->pr_mtx); 2876 return (0); 2877 } 2878 2879 error = _prison_check_ip4(pr, ia); 2880 mtx_unlock(&pr->pr_mtx); 2881 return (error); 2882} 2883 2884/* 2885 * Rewrite destination address in case we will connect to loopback address. 2886 * 2887 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2888 * Address passed in in NBO and returned in NBO. 2889 */ 2890int 2891prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2892{ 2893 struct prison *pr; 2894 2895 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2896 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2897 2898 pr = cred->cr_prison; 2899 if (!(pr->pr_flags & PR_IP4)) 2900 return (0); 2901 mtx_lock(&pr->pr_mtx); 2902 if (!(pr->pr_flags & PR_IP4)) { 2903 mtx_unlock(&pr->pr_mtx); 2904 return (0); 2905 } 2906 if (pr->pr_ip4 == NULL) { 2907 mtx_unlock(&pr->pr_mtx); 2908 return (EAFNOSUPPORT); 2909 } 2910 2911 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2912 ia->s_addr = pr->pr_ip4[0].s_addr; 2913 mtx_unlock(&pr->pr_mtx); 2914 return (0); 2915 } 2916 2917 /* 2918 * Return success because nothing had to be changed. 2919 */ 2920 mtx_unlock(&pr->pr_mtx); 2921 return (0); 2922} 2923 2924/* 2925 * Check if given address belongs to the jail referenced by cred/prison. 2926 * 2927 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2928 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2929 * doesn't allow IPv4. Address passed in in NBO. 2930 */ 2931static int 2932_prison_check_ip4(struct prison *pr, struct in_addr *ia) 2933{ 2934 int i, a, z, d; 2935 2936 /* 2937 * Check the primary IP. 2938 */ 2939 if (pr->pr_ip4[0].s_addr == ia->s_addr) 2940 return (0); 2941 2942 /* 2943 * All the other IPs are sorted so we can do a binary search. 2944 */ 2945 a = 0; 2946 z = pr->pr_ip4s - 2; 2947 while (a <= z) { 2948 i = (a + z) / 2; 2949 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2950 if (d > 0) 2951 z = i - 1; 2952 else if (d < 0) 2953 a = i + 1; 2954 else 2955 return (0); 2956 } 2957 2958 return (EADDRNOTAVAIL); 2959} 2960 2961int 2962prison_check_ip4(struct ucred *cred, struct in_addr *ia) 2963{ 2964 struct prison *pr; 2965 int error; 2966 2967 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2968 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2969 2970 pr = cred->cr_prison; 2971 if (!(pr->pr_flags & PR_IP4)) 2972 return (0); 2973 mtx_lock(&pr->pr_mtx); 2974 if (!(pr->pr_flags & PR_IP4)) { 2975 mtx_unlock(&pr->pr_mtx); 2976 return (0); 2977 } 2978 if (pr->pr_ip4 == NULL) { 2979 mtx_unlock(&pr->pr_mtx); 2980 return (EAFNOSUPPORT); 2981 } 2982 2983 error = _prison_check_ip4(pr, ia); 2984 mtx_unlock(&pr->pr_mtx); 2985 return (error); 2986} 2987#endif 2988 2989#ifdef INET6 2990static int 2991prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2992{ 2993 int ii, ij, used; 2994 struct prison *ppr; 2995 2996 ppr = pr->pr_parent; 2997 if (!(pr->pr_flags & PR_IP6_USER)) { 2998 /* This has no user settings, so just copy the parent's list. */ 2999 if (pr->pr_ip6s < ppr->pr_ip6s) { 3000 /* 3001 * There's no room for the parent's list. Use the 3002 * new list buffer, which is assumed to be big enough 3003 * (if it was passed). If there's no buffer, try to 3004 * allocate one. 3005 */ 3006 used = 1; 3007 if (newip6 == NULL) { 3008 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 3009 M_PRISON, M_NOWAIT); 3010 if (newip6 != NULL) 3011 used = 0; 3012 } 3013 if (newip6 != NULL) { 3014 bcopy(ppr->pr_ip6, newip6, 3015 ppr->pr_ip6s * sizeof(*newip6)); 3016 free(pr->pr_ip6, M_PRISON); 3017 pr->pr_ip6 = newip6; 3018 pr->pr_ip6s = ppr->pr_ip6s; 3019 } 3020 return (used); 3021 } 3022 pr->pr_ip6s = ppr->pr_ip6s; 3023 if (pr->pr_ip6s > 0) 3024 bcopy(ppr->pr_ip6, pr->pr_ip6, 3025 pr->pr_ip6s * sizeof(*newip6)); 3026 else if (pr->pr_ip6 != NULL) { 3027 free(pr->pr_ip6, M_PRISON); 3028 pr->pr_ip6 = NULL; 3029 } 3030 } else if (pr->pr_ip6s > 0) { 3031 /* Remove addresses that aren't in the parent. */ 3032 for (ij = 0; ij < ppr->pr_ip6s; ij++) 3033 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 3034 &ppr->pr_ip6[ij])) 3035 break; 3036 if (ij < ppr->pr_ip6s) 3037 ii = 1; 3038 else { 3039 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 3040 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 3041 ii = 0; 3042 } 3043 for (ij = 1; ii < pr->pr_ip6s; ) { 3044 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 3045 &ppr->pr_ip6[0])) { 3046 ii++; 3047 continue; 3048 } 3049 switch (ij >= ppr->pr_ip4s ? -1 : 3050 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 3051 case -1: 3052 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 3053 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 3054 break; 3055 case 0: 3056 ii++; 3057 ij++; 3058 break; 3059 case 1: 3060 ij++; 3061 break; 3062 } 3063 } 3064 if (pr->pr_ip6s == 0) { 3065 pr->pr_flags |= PR_IP6_DISABLE; 3066 free(pr->pr_ip6, M_PRISON); 3067 pr->pr_ip6 = NULL; 3068 } 3069 } 3070 return 0; 3071} 3072 3073/* 3074 * Pass back primary IPv6 address for this jail. 3075 * 3076 * If not restricted return success but do not alter the address. Caller has 3077 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 3078 * 3079 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3080 */ 3081int 3082prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 3083{ 3084 struct prison *pr; 3085 3086 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3087 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3088 3089 pr = cred->cr_prison; 3090 if (!(pr->pr_flags & PR_IP6)) 3091 return (0); 3092 mtx_lock(&pr->pr_mtx); 3093 if (!(pr->pr_flags & PR_IP6)) { 3094 mtx_unlock(&pr->pr_mtx); 3095 return (0); 3096 } 3097 if (pr->pr_ip6 == NULL) { 3098 mtx_unlock(&pr->pr_mtx); 3099 return (EAFNOSUPPORT); 3100 } 3101 3102 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3103 mtx_unlock(&pr->pr_mtx); 3104 return (0); 3105} 3106 3107/* 3108 * Return 1 if we should do proper source address selection or are not jailed. 3109 * We will return 0 if we should bypass source address selection in favour 3110 * of the primary jail IPv6 address. Only in this case *ia will be updated and 3111 * returned in NBO. 3112 * Return EAFNOSUPPORT, in case this jail does not allow IPv6. 3113 */ 3114int 3115prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6) 3116{ 3117 struct prison *pr; 3118 struct in6_addr lia6; 3119 int error; 3120 3121 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3122 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3123 3124 if (!jailed(cred)) 3125 return (1); 3126 3127 pr = cred->cr_prison; 3128 if (pr->pr_flags & PR_IP6_SADDRSEL) 3129 return (1); 3130 3131 lia6 = in6addr_any; 3132 error = prison_get_ip6(cred, &lia6); 3133 if (error) 3134 return (error); 3135 if (IN6_IS_ADDR_UNSPECIFIED(&lia6)) 3136 return (1); 3137 3138 bcopy(&lia6, ia6, sizeof(struct in6_addr)); 3139 return (0); 3140} 3141 3142/* 3143 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 3144 */ 3145int 3146prison_equal_ip6(struct prison *pr1, struct prison *pr2) 3147{ 3148 3149 if (pr1 == pr2) 3150 return (1); 3151 3152 while (pr1 != &prison0 && 3153#ifdef VIMAGE 3154 !(pr1->pr_flags & PR_VNET) && 3155#endif 3156 !(pr1->pr_flags & PR_IP6_USER)) 3157 pr1 = pr1->pr_parent; 3158 while (pr2 != &prison0 && 3159#ifdef VIMAGE 3160 !(pr2->pr_flags & PR_VNET) && 3161#endif 3162 !(pr2->pr_flags & PR_IP6_USER)) 3163 pr2 = pr2->pr_parent; 3164 return (pr1 == pr2); 3165} 3166 3167/* 3168 * Make sure our (source) address is set to something meaningful to this jail. 3169 * 3170 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 3171 * when needed while binding. 3172 * 3173 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3174 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3175 * doesn't allow IPv6. 3176 */ 3177int 3178prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 3179{ 3180 struct prison *pr; 3181 int error; 3182 3183 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3184 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3185 3186 pr = cred->cr_prison; 3187 if (!(pr->pr_flags & PR_IP6)) 3188 return (0); 3189 mtx_lock(&pr->pr_mtx); 3190 if (!(pr->pr_flags & PR_IP6)) { 3191 mtx_unlock(&pr->pr_mtx); 3192 return (0); 3193 } 3194 if (pr->pr_ip6 == NULL) { 3195 mtx_unlock(&pr->pr_mtx); 3196 return (EAFNOSUPPORT); 3197 } 3198 3199 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3200 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3201 mtx_unlock(&pr->pr_mtx); 3202 return (0); 3203 } 3204 3205 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 3206 /* 3207 * In case there is only 1 IPv6 address, and v6only is true, 3208 * then bind directly. 3209 */ 3210 if (v6only != 0 && pr->pr_ip6s == 1) 3211 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3212 mtx_unlock(&pr->pr_mtx); 3213 return (0); 3214 } 3215 3216 error = _prison_check_ip6(pr, ia6); 3217 mtx_unlock(&pr->pr_mtx); 3218 return (error); 3219} 3220 3221/* 3222 * Rewrite destination address in case we will connect to loopback address. 3223 * 3224 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3225 */ 3226int 3227prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 3228{ 3229 struct prison *pr; 3230 3231 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3232 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3233 3234 pr = cred->cr_prison; 3235 if (!(pr->pr_flags & PR_IP6)) 3236 return (0); 3237 mtx_lock(&pr->pr_mtx); 3238 if (!(pr->pr_flags & PR_IP6)) { 3239 mtx_unlock(&pr->pr_mtx); 3240 return (0); 3241 } 3242 if (pr->pr_ip6 == NULL) { 3243 mtx_unlock(&pr->pr_mtx); 3244 return (EAFNOSUPPORT); 3245 } 3246 3247 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3248 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3249 mtx_unlock(&pr->pr_mtx); 3250 return (0); 3251 } 3252 3253 /* 3254 * Return success because nothing had to be changed. 3255 */ 3256 mtx_unlock(&pr->pr_mtx); 3257 return (0); 3258} 3259 3260/* 3261 * Check if given address belongs to the jail referenced by cred/prison. 3262 * 3263 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3264 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3265 * doesn't allow IPv6. 3266 */ 3267static int 3268_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3269{ 3270 int i, a, z, d; 3271 3272 /* 3273 * Check the primary IP. 3274 */ 3275 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3276 return (0); 3277 3278 /* 3279 * All the other IPs are sorted so we can do a binary search. 3280 */ 3281 a = 0; 3282 z = pr->pr_ip6s - 2; 3283 while (a <= z) { 3284 i = (a + z) / 2; 3285 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3286 if (d > 0) 3287 z = i - 1; 3288 else if (d < 0) 3289 a = i + 1; 3290 else 3291 return (0); 3292 } 3293 3294 return (EADDRNOTAVAIL); 3295} 3296 3297int 3298prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3299{ 3300 struct prison *pr; 3301 int error; 3302 3303 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3304 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3305 3306 pr = cred->cr_prison; 3307 if (!(pr->pr_flags & PR_IP6)) 3308 return (0); 3309 mtx_lock(&pr->pr_mtx); 3310 if (!(pr->pr_flags & PR_IP6)) { 3311 mtx_unlock(&pr->pr_mtx); 3312 return (0); 3313 } 3314 if (pr->pr_ip6 == NULL) { 3315 mtx_unlock(&pr->pr_mtx); 3316 return (EAFNOSUPPORT); 3317 } 3318 3319 error = _prison_check_ip6(pr, ia6); 3320 mtx_unlock(&pr->pr_mtx); 3321 return (error); 3322} 3323#endif 3324 3325/* 3326 * Check if a jail supports the given address family. 3327 * 3328 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3329 * if not. 3330 */ 3331int 3332prison_check_af(struct ucred *cred, int af) 3333{ 3334 struct prison *pr; 3335 int error; 3336 3337 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3338 3339 pr = cred->cr_prison; 3340#ifdef VIMAGE 3341 /* Prisons with their own network stack are not limited. */ 3342 if (prison_owns_vnet(cred)) 3343 return (0); 3344#endif 3345 3346 error = 0; 3347 switch (af) 3348 { 3349#ifdef INET 3350 case AF_INET: 3351 if (pr->pr_flags & PR_IP4) 3352 { 3353 mtx_lock(&pr->pr_mtx); 3354 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3355 error = EAFNOSUPPORT; 3356 mtx_unlock(&pr->pr_mtx); 3357 } 3358 break; 3359#endif 3360#ifdef INET6 3361 case AF_INET6: 3362 if (pr->pr_flags & PR_IP6) 3363 { 3364 mtx_lock(&pr->pr_mtx); 3365 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3366 error = EAFNOSUPPORT; 3367 mtx_unlock(&pr->pr_mtx); 3368 } 3369 break; 3370#endif 3371 case AF_LOCAL: 3372 case AF_ROUTE: 3373 break; 3374 default: 3375 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3376 error = EAFNOSUPPORT; 3377 } 3378 return (error); 3379} 3380 3381/* 3382 * Check if given address belongs to the jail referenced by cred (wrapper to 3383 * prison_check_ip[46]). 3384 * 3385 * Returns 0 if jail doesn't restrict the address family or if address belongs 3386 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3387 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3388 */ 3389int 3390prison_if(struct ucred *cred, struct sockaddr *sa) 3391{ 3392#ifdef INET 3393 struct sockaddr_in *sai; 3394#endif 3395#ifdef INET6 3396 struct sockaddr_in6 *sai6; 3397#endif 3398 int error; 3399 3400 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3401 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3402 3403#ifdef VIMAGE 3404 if (prison_owns_vnet(cred)) 3405 return (0); 3406#endif 3407 3408 error = 0; 3409 switch (sa->sa_family) 3410 { 3411#ifdef INET 3412 case AF_INET: 3413 sai = (struct sockaddr_in *)sa; 3414 error = prison_check_ip4(cred, &sai->sin_addr); 3415 break; 3416#endif 3417#ifdef INET6 3418 case AF_INET6: 3419 sai6 = (struct sockaddr_in6 *)sa; 3420 error = prison_check_ip6(cred, &sai6->sin6_addr); 3421 break; 3422#endif 3423 default: 3424 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3425 error = EAFNOSUPPORT; 3426 } 3427 return (error); 3428} 3429 3430/* 3431 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3432 */ 3433int 3434prison_check(struct ucred *cred1, struct ucred *cred2) 3435{ 3436 3437 return ((cred1->cr_prison == cred2->cr_prison || 3438 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3439} 3440 3441/* 3442 * Return 1 if p2 is a child of p1, otherwise 0. 3443 */ 3444int 3445prison_ischild(struct prison *pr1, struct prison *pr2) 3446{ 3447 3448 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3449 if (pr1 == pr2) 3450 return (1); 3451 return (0); 3452} 3453 3454/* 3455 * Return 1 if the passed credential is in a jail, otherwise 0. 3456 */ 3457int 3458jailed(struct ucred *cred) 3459{ 3460 3461 return (cred->cr_prison != &prison0); 3462} 3463 3464/* 3465 * Return 1 if the passed credential is in a jail and that jail does not 3466 * have its own virtual network stack, otherwise 0. 3467 */ 3468int 3469jailed_without_vnet(struct ucred *cred) 3470{ 3471 3472 if (!jailed(cred)) 3473 return (0); 3474#ifdef VIMAGE 3475 if (prison_owns_vnet(cred)) 3476 return (0); 3477#endif 3478 3479 return (1); 3480} 3481 3482/* 3483 * Return the correct hostname (domainname, et al) for the passed credential. 3484 */ 3485void 3486getcredhostname(struct ucred *cred, char *buf, size_t size) 3487{ 3488 struct prison *pr; 3489 3490 /* 3491 * A NULL credential can be used to shortcut to the physical 3492 * system's hostname. 3493 */ 3494 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3495 mtx_lock(&pr->pr_mtx); 3496 strlcpy(buf, pr->pr_hostname, size); 3497 mtx_unlock(&pr->pr_mtx); 3498} 3499 3500void 3501getcreddomainname(struct ucred *cred, char *buf, size_t size) 3502{ 3503 3504 mtx_lock(&cred->cr_prison->pr_mtx); 3505 strlcpy(buf, cred->cr_prison->pr_domainname, size); 3506 mtx_unlock(&cred->cr_prison->pr_mtx); 3507} 3508 3509void 3510getcredhostuuid(struct ucred *cred, char *buf, size_t size) 3511{ 3512 3513 mtx_lock(&cred->cr_prison->pr_mtx); 3514 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 3515 mtx_unlock(&cred->cr_prison->pr_mtx); 3516} 3517 3518void 3519getcredhostid(struct ucred *cred, unsigned long *hostid) 3520{ 3521 3522 mtx_lock(&cred->cr_prison->pr_mtx); 3523 *hostid = cred->cr_prison->pr_hostid; 3524 mtx_unlock(&cred->cr_prison->pr_mtx); 3525} 3526 3527#ifdef VIMAGE 3528/* 3529 * Determine whether the prison represented by cred owns 3530 * its vnet rather than having it inherited. 3531 * 3532 * Returns 1 in case the prison owns the vnet, 0 otherwise. 3533 */ 3534int 3535prison_owns_vnet(struct ucred *cred) 3536{ 3537 3538 /* 3539 * vnets cannot be added/removed after jail creation, 3540 * so no need to lock here. 3541 */ 3542 return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0); 3543} 3544#endif 3545 3546/* 3547 * Determine whether the subject represented by cred can "see" 3548 * status of a mount point. 3549 * Returns: 0 for permitted, ENOENT otherwise. 3550 * XXX: This function should be called cr_canseemount() and should be 3551 * placed in kern_prot.c. 3552 */ 3553int 3554prison_canseemount(struct ucred *cred, struct mount *mp) 3555{ 3556 struct prison *pr; 3557 struct statfs *sp; 3558 size_t len; 3559 3560 pr = cred->cr_prison; 3561 if (pr->pr_enforce_statfs == 0) 3562 return (0); 3563 if (pr->pr_root->v_mount == mp) 3564 return (0); 3565 if (pr->pr_enforce_statfs == 2) 3566 return (ENOENT); 3567 /* 3568 * If jail's chroot directory is set to "/" we should be able to see 3569 * all mount-points from inside a jail. 3570 * This is ugly check, but this is the only situation when jail's 3571 * directory ends with '/'. 3572 */ 3573 if (strcmp(pr->pr_path, "/") == 0) 3574 return (0); 3575 len = strlen(pr->pr_path); 3576 sp = &mp->mnt_stat; 3577 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3578 return (ENOENT); 3579 /* 3580 * Be sure that we don't have situation where jail's root directory 3581 * is "/some/path" and mount point is "/some/pathpath". 3582 */ 3583 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3584 return (ENOENT); 3585 return (0); 3586} 3587 3588void 3589prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3590{ 3591 char jpath[MAXPATHLEN]; 3592 struct prison *pr; 3593 size_t len; 3594 3595 pr = cred->cr_prison; 3596 if (pr->pr_enforce_statfs == 0) 3597 return; 3598 if (prison_canseemount(cred, mp) != 0) { 3599 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3600 strlcpy(sp->f_mntonname, "[restricted]", 3601 sizeof(sp->f_mntonname)); 3602 return; 3603 } 3604 if (pr->pr_root->v_mount == mp) { 3605 /* 3606 * Clear current buffer data, so we are sure nothing from 3607 * the valid path left there. 3608 */ 3609 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3610 *sp->f_mntonname = '/'; 3611 return; 3612 } 3613 /* 3614 * If jail's chroot directory is set to "/" we should be able to see 3615 * all mount-points from inside a jail. 3616 */ 3617 if (strcmp(pr->pr_path, "/") == 0) 3618 return; 3619 len = strlen(pr->pr_path); 3620 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3621 /* 3622 * Clear current buffer data, so we are sure nothing from 3623 * the valid path left there. 3624 */ 3625 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3626 if (*jpath == '\0') { 3627 /* Should never happen. */ 3628 *sp->f_mntonname = '/'; 3629 } else { 3630 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3631 } 3632} 3633 3634/* 3635 * Check with permission for a specific privilege is granted within jail. We 3636 * have a specific list of accepted privileges; the rest are denied. 3637 */ 3638int 3639prison_priv_check(struct ucred *cred, int priv) 3640{ 3641 3642 if (!jailed(cred)) 3643 return (0); 3644 3645#ifdef VIMAGE 3646 /* 3647 * Privileges specific to prisons with a virtual network stack. 3648 * There might be a duplicate entry here in case the privilege 3649 * is only granted conditionally in the legacy jail case. 3650 */ 3651 switch (priv) { 3652#ifdef notyet 3653 /* 3654 * NFS-specific privileges. 3655 */ 3656 case PRIV_NFS_DAEMON: 3657 case PRIV_NFS_LOCKD: 3658#endif 3659 /* 3660 * Network stack privileges. 3661 */ 3662 case PRIV_NET_BRIDGE: 3663 case PRIV_NET_GRE: 3664 case PRIV_NET_BPF: 3665 case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ 3666 case PRIV_NET_ROUTE: 3667 case PRIV_NET_TAP: 3668 case PRIV_NET_SETIFMTU: 3669 case PRIV_NET_SETIFFLAGS: 3670 case PRIV_NET_SETIFCAP: 3671 case PRIV_NET_SETIFDESCR: 3672 case PRIV_NET_SETIFNAME : 3673 case PRIV_NET_SETIFMETRIC: 3674 case PRIV_NET_SETIFPHYS: 3675 case PRIV_NET_SETIFMAC: 3676 case PRIV_NET_ADDMULTI: 3677 case PRIV_NET_DELMULTI: 3678 case PRIV_NET_HWIOCTL: 3679 case PRIV_NET_SETLLADDR: 3680 case PRIV_NET_ADDIFGROUP: 3681 case PRIV_NET_DELIFGROUP: 3682 case PRIV_NET_IFCREATE: 3683 case PRIV_NET_IFDESTROY: 3684 case PRIV_NET_ADDIFADDR: 3685 case PRIV_NET_DELIFADDR: 3686 case PRIV_NET_LAGG: 3687 case PRIV_NET_GIF: 3688 case PRIV_NET_SETIFVNET: 3689 case PRIV_NET_SETIFFIB: 3690 3691 /* 3692 * 802.11-related privileges. 3693 */ 3694 case PRIV_NET80211_GETKEY: 3695#ifdef notyet 3696 case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ 3697#endif 3698 3699#ifdef notyet 3700 /* 3701 * AppleTalk privileges. 3702 */ 3703 case PRIV_NETATALK_RESERVEDPORT: 3704 3705 /* 3706 * ATM privileges. 3707 */ 3708 case PRIV_NETATM_CFG: 3709 case PRIV_NETATM_ADD: 3710 case PRIV_NETATM_DEL: 3711 case PRIV_NETATM_SET: 3712 3713 /* 3714 * Bluetooth privileges. 3715 */ 3716 case PRIV_NETBLUETOOTH_RAW: 3717#endif 3718 3719 /* 3720 * Netgraph and netgraph module privileges. 3721 */ 3722 case PRIV_NETGRAPH_CONTROL: 3723#ifdef notyet 3724 case PRIV_NETGRAPH_TTY: 3725#endif 3726 3727 /* 3728 * IPv4 and IPv6 privileges. 3729 */ 3730 case PRIV_NETINET_IPFW: 3731 case PRIV_NETINET_DIVERT: 3732 case PRIV_NETINET_PF: 3733 case PRIV_NETINET_DUMMYNET: 3734 case PRIV_NETINET_CARP: 3735 case PRIV_NETINET_MROUTE: 3736 case PRIV_NETINET_RAW: 3737 case PRIV_NETINET_ADDRCTRL6: 3738 case PRIV_NETINET_ND6: 3739 case PRIV_NETINET_SCOPE6: 3740 case PRIV_NETINET_ALIFETIME6: 3741 case PRIV_NETINET_IPSEC: 3742 case PRIV_NETINET_BINDANY: 3743 3744#ifdef notyet 3745 /* 3746 * IPX/SPX privileges. 3747 */ 3748 case PRIV_NETIPX_RESERVEDPORT: 3749 case PRIV_NETIPX_RAW: 3750 3751 /* 3752 * NCP privileges. 3753 */ 3754 case PRIV_NETNCP: 3755 3756 /* 3757 * SMB privileges. 3758 */ 3759 case PRIV_NETSMB: 3760#endif 3761 3762 /* 3763 * No default: or deny here. 3764 * In case of no permit fall through to next switch(). 3765 */ 3766 if (cred->cr_prison->pr_flags & PR_VNET) 3767 return (0); 3768 } 3769#endif /* VIMAGE */ 3770 3771 switch (priv) { 3772 3773 /* 3774 * Allow ktrace privileges for root in jail. 3775 */ 3776 case PRIV_KTRACE: 3777 3778#if 0 3779 /* 3780 * Allow jailed processes to configure audit identity and 3781 * submit audit records (login, etc). In the future we may 3782 * want to further refine the relationship between audit and 3783 * jail. 3784 */ 3785 case PRIV_AUDIT_GETAUDIT: 3786 case PRIV_AUDIT_SETAUDIT: 3787 case PRIV_AUDIT_SUBMIT: 3788#endif 3789 3790 /* 3791 * Allow jailed processes to manipulate process UNIX 3792 * credentials in any way they see fit. 3793 */ 3794 case PRIV_CRED_SETUID: 3795 case PRIV_CRED_SETEUID: 3796 case PRIV_CRED_SETGID: 3797 case PRIV_CRED_SETEGID: 3798 case PRIV_CRED_SETGROUPS: 3799 case PRIV_CRED_SETREUID: 3800 case PRIV_CRED_SETREGID: 3801 case PRIV_CRED_SETRESUID: 3802 case PRIV_CRED_SETRESGID: 3803 3804 /* 3805 * Jail implements visibility constraints already, so allow 3806 * jailed root to override uid/gid-based constraints. 3807 */ 3808 case PRIV_SEEOTHERGIDS: 3809 case PRIV_SEEOTHERUIDS: 3810 3811 /* 3812 * Jail implements inter-process debugging limits already, so 3813 * allow jailed root various debugging privileges. 3814 */ 3815 case PRIV_DEBUG_DIFFCRED: 3816 case PRIV_DEBUG_SUGID: 3817 case PRIV_DEBUG_UNPRIV: 3818 3819 /* 3820 * Allow jail to set various resource limits and login 3821 * properties, and for now, exceed process resource limits. 3822 */ 3823 case PRIV_PROC_LIMIT: 3824 case PRIV_PROC_SETLOGIN: 3825 case PRIV_PROC_SETRLIMIT: 3826 3827 /* 3828 * System V and POSIX IPC privileges are granted in jail. 3829 */ 3830 case PRIV_IPC_READ: 3831 case PRIV_IPC_WRITE: 3832 case PRIV_IPC_ADMIN: 3833 case PRIV_IPC_MSGSIZE: 3834 case PRIV_MQ_ADMIN: 3835 3836 /* 3837 * Jail operations within a jail work on child jails. 3838 */ 3839 case PRIV_JAIL_ATTACH: 3840 case PRIV_JAIL_SET: 3841 case PRIV_JAIL_REMOVE: 3842 3843 /* 3844 * Jail implements its own inter-process limits, so allow 3845 * root processes in jail to change scheduling on other 3846 * processes in the same jail. Likewise for signalling. 3847 */ 3848 case PRIV_SCHED_DIFFCRED: 3849 case PRIV_SCHED_CPUSET: 3850 case PRIV_SIGNAL_DIFFCRED: 3851 case PRIV_SIGNAL_SUGID: 3852 3853 /* 3854 * Allow jailed processes to write to sysctls marked as jail 3855 * writable. 3856 */ 3857 case PRIV_SYSCTL_WRITEJAIL: 3858 3859 /* 3860 * Allow root in jail to manage a variety of quota 3861 * properties. These should likely be conditional on a 3862 * configuration option. 3863 */ 3864 case PRIV_VFS_GETQUOTA: 3865 case PRIV_VFS_SETQUOTA: 3866 3867 /* 3868 * Since Jail relies on chroot() to implement file system 3869 * protections, grant many VFS privileges to root in jail. 3870 * Be careful to exclude mount-related and NFS-related 3871 * privileges. 3872 */ 3873 case PRIV_VFS_READ: 3874 case PRIV_VFS_WRITE: 3875 case PRIV_VFS_ADMIN: 3876 case PRIV_VFS_EXEC: 3877 case PRIV_VFS_LOOKUP: 3878 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3879 case PRIV_VFS_CHFLAGS_DEV: 3880 case PRIV_VFS_CHOWN: 3881 case PRIV_VFS_CHROOT: 3882 case PRIV_VFS_RETAINSUGID: 3883 case PRIV_VFS_FCHROOT: 3884 case PRIV_VFS_LINK: 3885 case PRIV_VFS_SETGID: 3886 case PRIV_VFS_STAT: 3887 case PRIV_VFS_STICKYFILE: 3888 return (0); 3889 3890 /* 3891 * Depending on the global setting, allow privilege of 3892 * setting system flags. 3893 */ 3894 case PRIV_VFS_SYSFLAGS: 3895 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3896 return (0); 3897 else 3898 return (EPERM); 3899 3900 /* 3901 * Depending on the global setting, allow privilege of 3902 * mounting/unmounting file systems. 3903 */ 3904 case PRIV_VFS_MOUNT: 3905 case PRIV_VFS_UNMOUNT: 3906 case PRIV_VFS_MOUNT_NONUSER: 3907 case PRIV_VFS_MOUNT_OWNER: 3908 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT && 3909 cred->cr_prison->pr_enforce_statfs < 2) 3910 return (0); 3911 else 3912 return (EPERM); 3913 3914 /* 3915 * Allow jailed root to bind reserved ports and reuse in-use 3916 * ports. 3917 */ 3918 case PRIV_NETINET_RESERVEDPORT: 3919 case PRIV_NETINET_REUSEPORT: 3920 return (0); 3921 3922 /* 3923 * Allow jailed root to set certian IPv4/6 (option) headers. 3924 */ 3925 case PRIV_NETINET_SETHDROPTS: 3926 return (0); 3927 3928 /* 3929 * Conditionally allow creating raw sockets in jail. 3930 */ 3931 case PRIV_NETINET_RAW: 3932 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3933 return (0); 3934 else 3935 return (EPERM); 3936 3937 /* 3938 * Since jail implements its own visibility limits on netstat 3939 * sysctls, allow getcred. This allows identd to work in 3940 * jail. 3941 */ 3942 case PRIV_NETINET_GETCRED: 3943 return (0); 3944 3945 /* 3946 * Allow jailed root to set loginclass. 3947 */ 3948 case PRIV_PROC_SETLOGINCLASS: 3949 return (0); 3950 3951 default: 3952 /* 3953 * In all remaining cases, deny the privilege request. This 3954 * includes almost all network privileges, many system 3955 * configuration privileges. 3956 */ 3957 return (EPERM); 3958 } 3959} 3960 3961/* 3962 * Return the part of pr2's name that is relative to pr1, or the whole name 3963 * if it does not directly follow. 3964 */ 3965 3966char * 3967prison_name(struct prison *pr1, struct prison *pr2) 3968{ 3969 char *name; 3970 3971 /* Jails see themselves as "0" (if they see themselves at all). */ 3972 if (pr1 == pr2) 3973 return "0"; 3974 name = pr2->pr_name; 3975 if (prison_ischild(pr1, pr2)) { 3976 /* 3977 * pr1 isn't locked (and allprison_lock may not be either) 3978 * so its length can't be counted on. But the number of dots 3979 * can be counted on - and counted. 3980 */ 3981 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3982 name = strchr(name, '.') + 1; 3983 } 3984 return (name); 3985} 3986 3987/* 3988 * Return the part of pr2's path that is relative to pr1, or the whole path 3989 * if it does not directly follow. 3990 */ 3991static char * 3992prison_path(struct prison *pr1, struct prison *pr2) 3993{ 3994 char *path1, *path2; 3995 int len1; 3996 3997 path1 = pr1->pr_path; 3998 path2 = pr2->pr_path; 3999 if (!strcmp(path1, "/")) 4000 return (path2); 4001 len1 = strlen(path1); 4002 if (strncmp(path1, path2, len1)) 4003 return (path2); 4004 if (path2[len1] == '\0') 4005 return "/"; 4006 if (path2[len1] == '/') 4007 return (path2 + len1); 4008 return (path2); 4009} 4010 4011 4012/* 4013 * Jail-related sysctls. 4014 */ 4015static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4016 "Jails"); 4017 4018static int 4019sysctl_jail_list(SYSCTL_HANDLER_ARGS) 4020{ 4021 struct xprison *xp; 4022 struct prison *pr, *cpr; 4023#ifdef INET 4024 struct in_addr *ip4 = NULL; 4025 int ip4s = 0; 4026#endif 4027#ifdef INET6 4028 struct in6_addr *ip6 = NULL; 4029 int ip6s = 0; 4030#endif 4031 int descend, error; 4032 4033 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 4034 pr = req->td->td_ucred->cr_prison; 4035 error = 0; 4036 sx_slock(&allprison_lock); 4037 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 4038#if defined(INET) || defined(INET6) 4039 again: 4040#endif 4041 mtx_lock(&cpr->pr_mtx); 4042#ifdef INET 4043 if (cpr->pr_ip4s > 0) { 4044 if (ip4s < cpr->pr_ip4s) { 4045 ip4s = cpr->pr_ip4s; 4046 mtx_unlock(&cpr->pr_mtx); 4047 ip4 = realloc(ip4, ip4s * 4048 sizeof(struct in_addr), M_TEMP, M_WAITOK); 4049 goto again; 4050 } 4051 bcopy(cpr->pr_ip4, ip4, 4052 cpr->pr_ip4s * sizeof(struct in_addr)); 4053 } 4054#endif 4055#ifdef INET6 4056 if (cpr->pr_ip6s > 0) { 4057 if (ip6s < cpr->pr_ip6s) { 4058 ip6s = cpr->pr_ip6s; 4059 mtx_unlock(&cpr->pr_mtx); 4060 ip6 = realloc(ip6, ip6s * 4061 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 4062 goto again; 4063 } 4064 bcopy(cpr->pr_ip6, ip6, 4065 cpr->pr_ip6s * sizeof(struct in6_addr)); 4066 } 4067#endif 4068 if (cpr->pr_ref == 0) { 4069 mtx_unlock(&cpr->pr_mtx); 4070 continue; 4071 } 4072 bzero(xp, sizeof(*xp)); 4073 xp->pr_version = XPRISON_VERSION; 4074 xp->pr_id = cpr->pr_id; 4075 xp->pr_state = cpr->pr_uref > 0 4076 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 4077 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 4078 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); 4079 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 4080#ifdef INET 4081 xp->pr_ip4s = cpr->pr_ip4s; 4082#endif 4083#ifdef INET6 4084 xp->pr_ip6s = cpr->pr_ip6s; 4085#endif 4086 mtx_unlock(&cpr->pr_mtx); 4087 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 4088 if (error) 4089 break; 4090#ifdef INET 4091 if (xp->pr_ip4s > 0) { 4092 error = SYSCTL_OUT(req, ip4, 4093 xp->pr_ip4s * sizeof(struct in_addr)); 4094 if (error) 4095 break; 4096 } 4097#endif 4098#ifdef INET6 4099 if (xp->pr_ip6s > 0) { 4100 error = SYSCTL_OUT(req, ip6, 4101 xp->pr_ip6s * sizeof(struct in6_addr)); 4102 if (error) 4103 break; 4104 } 4105#endif 4106 } 4107 sx_sunlock(&allprison_lock); 4108 free(xp, M_TEMP); 4109#ifdef INET 4110 free(ip4, M_TEMP); 4111#endif 4112#ifdef INET6 4113 free(ip6, M_TEMP); 4114#endif 4115 return (error); 4116} 4117 4118SYSCTL_OID(_security_jail, OID_AUTO, list, 4119 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4120 sysctl_jail_list, "S", "List of active jails"); 4121 4122static int 4123sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 4124{ 4125 int error, injail; 4126 4127 injail = jailed(req->td->td_ucred); 4128 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 4129 4130 return (error); 4131} 4132 4133SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 4134 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4135 sysctl_jail_jailed, "I", "Process in jail?"); 4136 4137#if defined(INET) || defined(INET6) 4138SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 4139 &jail_max_af_ips, 0, 4140 "Number of IP addresses a jail may have at most per address family"); 4141#endif 4142 4143/* 4144 * Default parameters for jail(2) compatability. For historical reasons, 4145 * the sysctl names have varying similarity to the parameter names. Prisons 4146 * just see their own parameters, and can't change them. 4147 */ 4148static int 4149sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 4150{ 4151 struct prison *pr; 4152 int allow, error, i; 4153 4154 pr = req->td->td_ucred->cr_prison; 4155 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 4156 4157 /* Get the current flag value, and convert it to a boolean. */ 4158 i = (allow & arg2) ? 1 : 0; 4159 if (arg1 != NULL) 4160 i = !i; 4161 error = sysctl_handle_int(oidp, &i, 0, req); 4162 if (error || !req->newptr) 4163 return (error); 4164 i = i ? arg2 : 0; 4165 if (arg1 != NULL) 4166 i ^= arg2; 4167 /* 4168 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 4169 * for writing. 4170 */ 4171 mtx_lock(&prison0.pr_mtx); 4172 jail_default_allow = (jail_default_allow & ~arg2) | i; 4173 mtx_unlock(&prison0.pr_mtx); 4174 return (0); 4175} 4176 4177SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 4178 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4179 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 4180 "Processes in jail can set their hostnames"); 4181SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 4182 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4183 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 4184 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 4185SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 4186 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4187 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 4188 "Processes in jail can use System V IPC primitives"); 4189SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 4190 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4191 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 4192 "Prison root can create raw sockets"); 4193SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 4194 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4195 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 4196 "Processes in jail can alter system file flags"); 4197SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 4198 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4199 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 4200 "Processes in jail can mount/unmount jail-friendly file systems"); 4201SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed, 4202 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4203 NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I", 4204 "Processes in jail can mount the devfs file system"); 4205SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed, 4206 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4207 NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I", 4208 "Processes in jail can mount the nullfs file system");
| 223}; 224const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames); 225 226#define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 227#define JAIL_DEFAULT_ENFORCE_STATFS 2 228#define JAIL_DEFAULT_DEVFS_RSNUM 0 229static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 230static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 231static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; 232#if defined(INET) || defined(INET6) 233static unsigned jail_max_af_ips = 255; 234#endif 235 236#ifdef INET 237static int 238qcmp_v4(const void *ip1, const void *ip2) 239{ 240 in_addr_t iaa, iab; 241 242 /* 243 * We need to compare in HBO here to get the list sorted as expected 244 * by the result of the code. Sorting NBO addresses gives you 245 * interesting results. If you do not understand, do not try. 246 */ 247 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 248 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 249 250 /* 251 * Do not simply return the difference of the two numbers, the int is 252 * not wide enough. 253 */ 254 if (iaa > iab) 255 return (1); 256 else if (iaa < iab) 257 return (-1); 258 else 259 return (0); 260} 261#endif 262 263#ifdef INET6 264static int 265qcmp_v6(const void *ip1, const void *ip2) 266{ 267 const struct in6_addr *ia6a, *ia6b; 268 int i, rc; 269 270 ia6a = (const struct in6_addr *)ip1; 271 ia6b = (const struct in6_addr *)ip2; 272 273 rc = 0; 274 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 275 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 276 rc = 1; 277 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 278 rc = -1; 279 } 280 return (rc); 281} 282#endif 283 284/* 285 * struct jail_args { 286 * struct jail *jail; 287 * }; 288 */ 289int 290sys_jail(struct thread *td, struct jail_args *uap) 291{ 292 uint32_t version; 293 int error; 294 struct jail j; 295 296 error = copyin(uap->jail, &version, sizeof(uint32_t)); 297 if (error) 298 return (error); 299 300 switch (version) { 301 case 0: 302 { 303 struct jail_v0 j0; 304 305 /* FreeBSD single IPv4 jails. */ 306 bzero(&j, sizeof(struct jail)); 307 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 308 if (error) 309 return (error); 310 j.version = j0.version; 311 j.path = j0.path; 312 j.hostname = j0.hostname; 313 j.ip4s = j0.ip_number; 314 break; 315 } 316 317 case 1: 318 /* 319 * Version 1 was used by multi-IPv4 jail implementations 320 * that never made it into the official kernel. 321 */ 322 return (EINVAL); 323 324 case 2: /* JAIL_API_VERSION */ 325 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 326 error = copyin(uap->jail, &j, sizeof(struct jail)); 327 if (error) 328 return (error); 329 break; 330 331 default: 332 /* Sci-Fi jails are not supported, sorry. */ 333 return (EINVAL); 334 } 335 return (kern_jail(td, &j)); 336} 337 338int 339kern_jail(struct thread *td, struct jail *j) 340{ 341 struct iovec optiov[2 * (4 342 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 343#ifdef INET 344 + 1 345#endif 346#ifdef INET6 347 + 1 348#endif 349 )]; 350 struct uio opt; 351 char *u_path, *u_hostname, *u_name; 352#ifdef INET 353 uint32_t ip4s; 354 struct in_addr *u_ip4; 355#endif 356#ifdef INET6 357 struct in6_addr *u_ip6; 358#endif 359 size_t tmplen; 360 int error, enforce_statfs, fi; 361 362 bzero(&optiov, sizeof(optiov)); 363 opt.uio_iov = optiov; 364 opt.uio_iovcnt = 0; 365 opt.uio_offset = -1; 366 opt.uio_resid = -1; 367 opt.uio_segflg = UIO_SYSSPACE; 368 opt.uio_rw = UIO_READ; 369 opt.uio_td = td; 370 371 /* Set permissions for top-level jails from sysctls. */ 372 if (!jailed(td->td_ucred)) { 373 for (fi = 0; fi < sizeof(pr_allow_names) / 374 sizeof(pr_allow_names[0]); fi++) { 375 optiov[opt.uio_iovcnt].iov_base = 376 (jail_default_allow & (1 << fi)) 377 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 378 optiov[opt.uio_iovcnt].iov_len = 379 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 380 opt.uio_iovcnt += 2; 381 } 382 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 383 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 384 opt.uio_iovcnt++; 385 enforce_statfs = jail_default_enforce_statfs; 386 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 387 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 388 opt.uio_iovcnt++; 389 } 390 391 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 392#ifdef INET 393 ip4s = (j->version == 0) ? 1 : j->ip4s; 394 if (ip4s > jail_max_af_ips) 395 return (EINVAL); 396 tmplen += ip4s * sizeof(struct in_addr); 397#else 398 if (j->ip4s > 0) 399 return (EINVAL); 400#endif 401#ifdef INET6 402 if (j->ip6s > jail_max_af_ips) 403 return (EINVAL); 404 tmplen += j->ip6s * sizeof(struct in6_addr); 405#else 406 if (j->ip6s > 0) 407 return (EINVAL); 408#endif 409 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 410 u_hostname = u_path + MAXPATHLEN; 411 u_name = u_hostname + MAXHOSTNAMELEN; 412#ifdef INET 413 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 414#endif 415#ifdef INET6 416#ifdef INET 417 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 418#else 419 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 420#endif 421#endif 422 optiov[opt.uio_iovcnt].iov_base = "path"; 423 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 424 opt.uio_iovcnt++; 425 optiov[opt.uio_iovcnt].iov_base = u_path; 426 error = copyinstr(j->path, u_path, MAXPATHLEN, 427 &optiov[opt.uio_iovcnt].iov_len); 428 if (error) { 429 free(u_path, M_TEMP); 430 return (error); 431 } 432 opt.uio_iovcnt++; 433 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 434 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 435 opt.uio_iovcnt++; 436 optiov[opt.uio_iovcnt].iov_base = u_hostname; 437 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 438 &optiov[opt.uio_iovcnt].iov_len); 439 if (error) { 440 free(u_path, M_TEMP); 441 return (error); 442 } 443 opt.uio_iovcnt++; 444 if (j->jailname != NULL) { 445 optiov[opt.uio_iovcnt].iov_base = "name"; 446 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 447 opt.uio_iovcnt++; 448 optiov[opt.uio_iovcnt].iov_base = u_name; 449 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 450 &optiov[opt.uio_iovcnt].iov_len); 451 if (error) { 452 free(u_path, M_TEMP); 453 return (error); 454 } 455 opt.uio_iovcnt++; 456 } 457#ifdef INET 458 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 459 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 460 opt.uio_iovcnt++; 461 optiov[opt.uio_iovcnt].iov_base = u_ip4; 462 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 463 if (j->version == 0) 464 u_ip4->s_addr = j->ip4s; 465 else { 466 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 467 if (error) { 468 free(u_path, M_TEMP); 469 return (error); 470 } 471 } 472 opt.uio_iovcnt++; 473#endif 474#ifdef INET6 475 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 476 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 477 opt.uio_iovcnt++; 478 optiov[opt.uio_iovcnt].iov_base = u_ip6; 479 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 480 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 481 if (error) { 482 free(u_path, M_TEMP); 483 return (error); 484 } 485 opt.uio_iovcnt++; 486#endif 487 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 488 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 489 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 490 free(u_path, M_TEMP); 491 return (error); 492} 493 494 495/* 496 * struct jail_set_args { 497 * struct iovec *iovp; 498 * unsigned int iovcnt; 499 * int flags; 500 * }; 501 */ 502int 503sys_jail_set(struct thread *td, struct jail_set_args *uap) 504{ 505 struct uio *auio; 506 int error; 507 508 /* Check that we have an even number of iovecs. */ 509 if (uap->iovcnt & 1) 510 return (EINVAL); 511 512 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 513 if (error) 514 return (error); 515 error = kern_jail_set(td, auio, uap->flags); 516 free(auio, M_IOV); 517 return (error); 518} 519 520int 521kern_jail_set(struct thread *td, struct uio *optuio, int flags) 522{ 523 struct nameidata nd; 524#ifdef INET 525 struct in_addr *ip4; 526#endif 527#ifdef INET6 528 struct in6_addr *ip6; 529#endif 530 struct vfsopt *opt; 531 struct vfsoptlist *opts; 532 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 533 struct vnode *root; 534 char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; 535 char *g_path; 536#if defined(INET) || defined(INET6) 537 struct prison *tppr; 538 void *op; 539#endif 540 unsigned long hid; 541 size_t namelen, onamelen; 542 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 543 int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; 544 int fi, jid, jsys, len, level; 545 int childmax, rsnum, slevel, vfslocked; 546 int fullpath_disabled; 547#if defined(INET) || defined(INET6) 548 int ii, ij; 549#endif 550#ifdef INET 551 int ip4s, redo_ip4; 552#endif 553#ifdef INET6 554 int ip6s, redo_ip6; 555#endif 556 uint64_t pr_allow, ch_allow, pr_flags, ch_flags; 557 unsigned tallow; 558 char numbuf[12]; 559 560 error = priv_check(td, PRIV_JAIL_SET); 561 if (!error && (flags & JAIL_ATTACH)) 562 error = priv_check(td, PRIV_JAIL_ATTACH); 563 if (error) 564 return (error); 565 mypr = ppr = td->td_ucred->cr_prison; 566 if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) 567 return (EPERM); 568 if (flags & ~JAIL_SET_MASK) 569 return (EINVAL); 570 571 /* 572 * Check all the parameters before committing to anything. Not all 573 * errors can be caught early, but we may as well try. Also, this 574 * takes care of some expensive stuff (path lookup) before getting 575 * the allprison lock. 576 * 577 * XXX Jails are not filesystems, and jail parameters are not mount 578 * options. But it makes more sense to re-use the vfsopt code 579 * than duplicate it under a different name. 580 */ 581 error = vfs_buildopts(optuio, &opts); 582 if (error) 583 return (error); 584#ifdef INET 585 ip4 = NULL; 586#endif 587#ifdef INET6 588 ip6 = NULL; 589#endif 590 g_path = NULL; 591 592 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 593 if (error == ENOENT) 594 jid = 0; 595 else if (error != 0) 596 goto done_free; 597 598 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 599 if (error == ENOENT) 600 gotslevel = 0; 601 else if (error != 0) 602 goto done_free; 603 else 604 gotslevel = 1; 605 606 error = 607 vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); 608 if (error == ENOENT) 609 gotchildmax = 0; 610 else if (error != 0) 611 goto done_free; 612 else 613 gotchildmax = 1; 614 615 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 616 if (error == ENOENT) 617 gotenforce = 0; 618 else if (error != 0) 619 goto done_free; 620 else if (enforce < 0 || enforce > 2) { 621 error = EINVAL; 622 goto done_free; 623 } else 624 gotenforce = 1; 625 626 error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); 627 if (error == ENOENT) 628 gotrsnum = 0; 629 else if (error != 0) 630 goto done_free; 631 else 632 gotrsnum = 1; 633 634 pr_flags = ch_flags = 0; 635 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 636 fi++) { 637 if (pr_flag_names[fi] == NULL) 638 continue; 639 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 640 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 641 } 642 ch_flags |= pr_flags; 643 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 644 fi++) { 645 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, 646 sizeof(jsys)); 647 if (error == ENOENT) 648 continue; 649 if (error != 0) 650 goto done_free; 651 switch (jsys) { 652 case JAIL_SYS_DISABLE: 653 if (!pr_flag_jailsys[fi].disable) { 654 error = EINVAL; 655 goto done_free; 656 } 657 pr_flags |= pr_flag_jailsys[fi].disable; 658 break; 659 case JAIL_SYS_NEW: 660 pr_flags |= pr_flag_jailsys[fi].new; 661 break; 662 case JAIL_SYS_INHERIT: 663 break; 664 default: 665 error = EINVAL; 666 goto done_free; 667 } 668 ch_flags |= 669 pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; 670 } 671 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 672 && !(pr_flags & PR_PERSIST)) { 673 error = EINVAL; 674 vfs_opterror(opts, "new jail must persist or attach"); 675 goto done_errmsg; 676 } 677#ifdef VIMAGE 678 if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { 679 error = EINVAL; 680 vfs_opterror(opts, "vnet cannot be changed after creation"); 681 goto done_errmsg; 682 } 683#endif 684#ifdef INET 685 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { 686 error = EINVAL; 687 vfs_opterror(opts, "ip4 cannot be changed after creation"); 688 goto done_errmsg; 689 } 690#endif 691#ifdef INET6 692 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { 693 error = EINVAL; 694 vfs_opterror(opts, "ip6 cannot be changed after creation"); 695 goto done_errmsg; 696 } 697#endif 698 699 pr_allow = ch_allow = 0; 700 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 701 fi++) { 702 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 703 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 704 } 705 ch_allow |= pr_allow; 706 707 error = vfs_getopt(opts, "name", (void **)&name, &len); 708 if (error == ENOENT) 709 name = NULL; 710 else if (error != 0) 711 goto done_free; 712 else { 713 if (len == 0 || name[len - 1] != '\0') { 714 error = EINVAL; 715 goto done_free; 716 } 717 if (len > MAXHOSTNAMELEN) { 718 error = ENAMETOOLONG; 719 goto done_free; 720 } 721 } 722 723 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 724 if (error == ENOENT) 725 host = NULL; 726 else if (error != 0) 727 goto done_free; 728 else { 729 ch_flags |= PR_HOST; 730 pr_flags |= PR_HOST; 731 if (len == 0 || host[len - 1] != '\0') { 732 error = EINVAL; 733 goto done_free; 734 } 735 if (len > MAXHOSTNAMELEN) { 736 error = ENAMETOOLONG; 737 goto done_free; 738 } 739 } 740 741 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 742 if (error == ENOENT) 743 domain = NULL; 744 else if (error != 0) 745 goto done_free; 746 else { 747 ch_flags |= PR_HOST; 748 pr_flags |= PR_HOST; 749 if (len == 0 || domain[len - 1] != '\0') { 750 error = EINVAL; 751 goto done_free; 752 } 753 if (len > MAXHOSTNAMELEN) { 754 error = ENAMETOOLONG; 755 goto done_free; 756 } 757 } 758 759 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 760 if (error == ENOENT) 761 uuid = NULL; 762 else if (error != 0) 763 goto done_free; 764 else { 765 ch_flags |= PR_HOST; 766 pr_flags |= PR_HOST; 767 if (len == 0 || uuid[len - 1] != '\0') { 768 error = EINVAL; 769 goto done_free; 770 } 771 if (len > HOSTUUIDLEN) { 772 error = ENAMETOOLONG; 773 goto done_free; 774 } 775 } 776 777#ifdef COMPAT_FREEBSD32 778 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 779 uint32_t hid32; 780 781 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 782 hid = hid32; 783 } else 784#endif 785 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 786 if (error == ENOENT) 787 gothid = 0; 788 else if (error != 0) 789 goto done_free; 790 else { 791 gothid = 1; 792 ch_flags |= PR_HOST; 793 pr_flags |= PR_HOST; 794 } 795 796#ifdef INET 797 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 798 if (error == ENOENT) 799 ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1; 800 else if (error != 0) 801 goto done_free; 802 else if (ip4s & (sizeof(*ip4) - 1)) { 803 error = EINVAL; 804 goto done_free; 805 } else { 806 ch_flags |= PR_IP4_USER | PR_IP4_DISABLE; 807 if (ip4s == 0) 808 pr_flags |= PR_IP4_USER | PR_IP4_DISABLE; 809 else { 810 pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER; 811 ip4s /= sizeof(*ip4); 812 if (ip4s > jail_max_af_ips) { 813 error = EINVAL; 814 vfs_opterror(opts, "too many IPv4 addresses"); 815 goto done_errmsg; 816 } 817 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 818 bcopy(op, ip4, ip4s * sizeof(*ip4)); 819 /* 820 * IP addresses are all sorted but ip[0] to preserve 821 * the primary IP address as given from userland. 822 * This special IP is used for unbound outgoing 823 * connections as well for "loopback" traffic in case 824 * source address selection cannot find any more fitting 825 * address to connect from. 826 */ 827 if (ip4s > 1) 828 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 829 /* 830 * Check for duplicate addresses and do some simple 831 * zero and broadcast checks. If users give other bogus 832 * addresses it is their problem. 833 * 834 * We do not have to care about byte order for these 835 * checks so we will do them in NBO. 836 */ 837 for (ii = 0; ii < ip4s; ii++) { 838 if (ip4[ii].s_addr == INADDR_ANY || 839 ip4[ii].s_addr == INADDR_BROADCAST) { 840 error = EINVAL; 841 goto done_free; 842 } 843 if ((ii+1) < ip4s && 844 (ip4[0].s_addr == ip4[ii+1].s_addr || 845 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 846 error = EINVAL; 847 goto done_free; 848 } 849 } 850 } 851 } 852#endif 853 854#ifdef INET6 855 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 856 if (error == ENOENT) 857 ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1; 858 else if (error != 0) 859 goto done_free; 860 else if (ip6s & (sizeof(*ip6) - 1)) { 861 error = EINVAL; 862 goto done_free; 863 } else { 864 ch_flags |= PR_IP6_USER | PR_IP6_DISABLE; 865 if (ip6s == 0) 866 pr_flags |= PR_IP6_USER | PR_IP6_DISABLE; 867 else { 868 pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER; 869 ip6s /= sizeof(*ip6); 870 if (ip6s > jail_max_af_ips) { 871 error = EINVAL; 872 vfs_opterror(opts, "too many IPv6 addresses"); 873 goto done_errmsg; 874 } 875 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 876 bcopy(op, ip6, ip6s * sizeof(*ip6)); 877 if (ip6s > 1) 878 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 879 for (ii = 0; ii < ip6s; ii++) { 880 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 881 error = EINVAL; 882 goto done_free; 883 } 884 if ((ii+1) < ip6s && 885 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 886 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 887 { 888 error = EINVAL; 889 goto done_free; 890 } 891 } 892 } 893 } 894#endif 895 896#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 897 if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 898 error = EINVAL; 899 vfs_opterror(opts, 900 "vnet jails cannot have IP address restrictions"); 901 goto done_errmsg; 902 } 903#endif 904 905 fullpath_disabled = 0; 906 root = NULL; 907 error = vfs_getopt(opts, "path", (void **)&path, &len); 908 if (error == ENOENT) 909 path = NULL; 910 else if (error != 0) 911 goto done_free; 912 else { 913 if (flags & JAIL_UPDATE) { 914 error = EINVAL; 915 vfs_opterror(opts, 916 "path cannot be changed after creation"); 917 goto done_errmsg; 918 } 919 if (len == 0 || path[len - 1] != '\0') { 920 error = EINVAL; 921 goto done_free; 922 } 923 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_SYSSPACE, 924 path, td); 925 error = namei(&nd); 926 if (error) 927 goto done_free; 928 vfslocked = NDHASGIANT(&nd); 929 root = nd.ni_vp; 930 NDFREE(&nd, NDF_ONLY_PNBUF); 931 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 932 strlcpy(g_path, path, MAXPATHLEN); 933 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); 934 if (error == 0) 935 path = g_path; 936 else if (error == ENODEV) { 937 /* proceed if sysctl debug.disablefullpath == 1 */ 938 fullpath_disabled = 1; 939 if (len < 2 || (len == 2 && path[0] == '/')) 940 path = NULL; 941 } else { 942 /* exit on other errors */ 943 VFS_UNLOCK_GIANT(vfslocked); 944 goto done_free; 945 } 946 if (root->v_type != VDIR) { 947 error = ENOTDIR; 948 vput(root); 949 VFS_UNLOCK_GIANT(vfslocked); 950 goto done_free; 951 } 952 VOP_UNLOCK(root, 0); 953 VFS_UNLOCK_GIANT(vfslocked); 954 if (fullpath_disabled) { 955 /* Leave room for a real-root full pathname. */ 956 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 957 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 958 error = ENAMETOOLONG; 959 goto done_free; 960 } 961 } 962 } 963 964 /* 965 * Grab the allprison lock before letting modules check their 966 * parameters. Once we have it, do not let go so we'll have a 967 * consistent view of the OSD list. 968 */ 969 sx_xlock(&allprison_lock); 970 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 971 if (error) 972 goto done_unlock_list; 973 974 /* By now, all parameters should have been noted. */ 975 TAILQ_FOREACH(opt, opts, link) { 976 if (!opt->seen && strcmp(opt->name, "errmsg")) { 977 error = EINVAL; 978 vfs_opterror(opts, "unknown parameter: %s", opt->name); 979 goto done_unlock_list; 980 } 981 } 982 983 /* 984 * See if we are creating a new record or updating an existing one. 985 * This abuses the file error codes ENOENT and EEXIST. 986 */ 987 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 988 if (!cuflags) { 989 error = EINVAL; 990 vfs_opterror(opts, "no valid operation (create or update)"); 991 goto done_unlock_list; 992 } 993 pr = NULL; 994 namelc = NULL; 995 if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { 996 namelc = strrchr(name, '.'); 997 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); 998 if (*p != '\0') 999 jid = 0; 1000 } 1001 if (jid != 0) { 1002 /* 1003 * See if a requested jid already exists. There is an 1004 * information leak here if the jid exists but is not within 1005 * the caller's jail hierarchy. Jail creators will get EEXIST 1006 * even though they cannot see the jail, and CREATE | UPDATE 1007 * will return ENOENT which is not normally a valid error. 1008 */ 1009 if (jid < 0) { 1010 error = EINVAL; 1011 vfs_opterror(opts, "negative jid"); 1012 goto done_unlock_list; 1013 } 1014 pr = prison_find(jid); 1015 if (pr != NULL) { 1016 ppr = pr->pr_parent; 1017 /* Create: jid must not exist. */ 1018 if (cuflags == JAIL_CREATE) { 1019 mtx_unlock(&pr->pr_mtx); 1020 error = EEXIST; 1021 vfs_opterror(opts, "jail %d already exists", 1022 jid); 1023 goto done_unlock_list; 1024 } 1025 if (!prison_ischild(mypr, pr)) { 1026 mtx_unlock(&pr->pr_mtx); 1027 pr = NULL; 1028 } else if (pr->pr_uref == 0) { 1029 if (!(flags & JAIL_DYING)) { 1030 mtx_unlock(&pr->pr_mtx); 1031 error = ENOENT; 1032 vfs_opterror(opts, "jail %d is dying", 1033 jid); 1034 goto done_unlock_list; 1035 } else if ((flags & JAIL_ATTACH) || 1036 (pr_flags & PR_PERSIST)) { 1037 /* 1038 * A dying jail might be resurrected 1039 * (via attach or persist), but first 1040 * it must determine if another jail 1041 * has claimed its name. Accomplish 1042 * this by implicitly re-setting the 1043 * name. 1044 */ 1045 if (name == NULL) 1046 name = prison_name(mypr, pr); 1047 } 1048 } 1049 } 1050 if (pr == NULL) { 1051 /* Update: jid must exist. */ 1052 if (cuflags == JAIL_UPDATE) { 1053 error = ENOENT; 1054 vfs_opterror(opts, "jail %d not found", jid); 1055 goto done_unlock_list; 1056 } 1057 } 1058 } 1059 /* 1060 * If the caller provided a name, look for a jail by that name. 1061 * This has different semantics for creates and updates keyed by jid 1062 * (where the name must not already exist in a different jail), 1063 * and updates keyed by the name itself (where the name must exist 1064 * because that is the jail being updated). 1065 */ 1066 if (name != NULL) { 1067 namelc = strrchr(name, '.'); 1068 if (namelc == NULL) 1069 namelc = name; 1070 else { 1071 /* 1072 * This is a hierarchical name. Split it into the 1073 * parent and child names, and make sure the parent 1074 * exists or matches an already found jail. 1075 */ 1076 *namelc = '\0'; 1077 if (pr != NULL) { 1078 if (strncmp(name, ppr->pr_name, namelc - name) 1079 || ppr->pr_name[namelc - name] != '\0') { 1080 mtx_unlock(&pr->pr_mtx); 1081 error = EINVAL; 1082 vfs_opterror(opts, 1083 "cannot change jail's parent"); 1084 goto done_unlock_list; 1085 } 1086 } else { 1087 ppr = prison_find_name(mypr, name); 1088 if (ppr == NULL) { 1089 error = ENOENT; 1090 vfs_opterror(opts, 1091 "jail \"%s\" not found", name); 1092 goto done_unlock_list; 1093 } 1094 mtx_unlock(&ppr->pr_mtx); 1095 } 1096 name = ++namelc; 1097 } 1098 if (name[0] != '\0') { 1099 namelen = 1100 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 1101 name_again: 1102 deadpr = NULL; 1103 FOREACH_PRISON_CHILD(ppr, tpr) { 1104 if (tpr != pr && tpr->pr_ref > 0 && 1105 !strcmp(tpr->pr_name + namelen, name)) { 1106 if (pr == NULL && 1107 cuflags != JAIL_CREATE) { 1108 mtx_lock(&tpr->pr_mtx); 1109 if (tpr->pr_ref > 0) { 1110 /* 1111 * Use this jail 1112 * for updates. 1113 */ 1114 if (tpr->pr_uref > 0) { 1115 pr = tpr; 1116 break; 1117 } 1118 deadpr = tpr; 1119 } 1120 mtx_unlock(&tpr->pr_mtx); 1121 } else if (tpr->pr_uref > 0) { 1122 /* 1123 * Create, or update(jid): 1124 * name must not exist in an 1125 * active sibling jail. 1126 */ 1127 error = EEXIST; 1128 if (pr != NULL) 1129 mtx_unlock(&pr->pr_mtx); 1130 vfs_opterror(opts, 1131 "jail \"%s\" already exists", 1132 name); 1133 goto done_unlock_list; 1134 } 1135 } 1136 } 1137 /* If no active jail is found, use a dying one. */ 1138 if (deadpr != NULL && pr == NULL) { 1139 if (flags & JAIL_DYING) { 1140 mtx_lock(&deadpr->pr_mtx); 1141 if (deadpr->pr_ref == 0) { 1142 mtx_unlock(&deadpr->pr_mtx); 1143 goto name_again; 1144 } 1145 pr = deadpr; 1146 } else if (cuflags == JAIL_UPDATE) { 1147 error = ENOENT; 1148 vfs_opterror(opts, 1149 "jail \"%s\" is dying", name); 1150 goto done_unlock_list; 1151 } 1152 } 1153 /* Update: name must exist if no jid. */ 1154 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1155 error = ENOENT; 1156 vfs_opterror(opts, "jail \"%s\" not found", 1157 name); 1158 goto done_unlock_list; 1159 } 1160 } 1161 } 1162 /* Update: must provide a jid or name. */ 1163 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1164 error = ENOENT; 1165 vfs_opterror(opts, "update specified no jail"); 1166 goto done_unlock_list; 1167 } 1168 1169 /* If there's no prison to update, create a new one and link it in. */ 1170 if (pr == NULL) { 1171 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) 1172 if (tpr->pr_childcount >= tpr->pr_childmax) { 1173 error = EPERM; 1174 vfs_opterror(opts, "prison limit exceeded"); 1175 goto done_unlock_list; 1176 } 1177 created = 1; 1178 mtx_lock(&ppr->pr_mtx); 1179 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1180 mtx_unlock(&ppr->pr_mtx); 1181 error = ENOENT; 1182 vfs_opterror(opts, "parent jail went away!"); 1183 goto done_unlock_list; 1184 } 1185 ppr->pr_ref++; 1186 ppr->pr_uref++; 1187 mtx_unlock(&ppr->pr_mtx); 1188 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1189 if (jid == 0) { 1190 /* Find the next free jid. */ 1191 jid = lastprid + 1; 1192 findnext: 1193 if (jid == JAIL_MAX) 1194 jid = 1; 1195 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1196 if (tpr->pr_id < jid) 1197 continue; 1198 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1199 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1200 break; 1201 } 1202 if (jid == lastprid) { 1203 error = EAGAIN; 1204 vfs_opterror(opts, 1205 "no available jail IDs"); 1206 free(pr, M_PRISON); 1207 prison_deref(ppr, PD_DEREF | 1208 PD_DEUREF | PD_LIST_XLOCKED); 1209 goto done_releroot; 1210 } 1211 jid++; 1212 goto findnext; 1213 } 1214 lastprid = jid; 1215 } else { 1216 /* 1217 * The jail already has a jid (that did not yet exist), 1218 * so just find where to insert it. 1219 */ 1220 TAILQ_FOREACH(tpr, &allprison, pr_list) 1221 if (tpr->pr_id >= jid) { 1222 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1223 break; 1224 } 1225 } 1226 if (tpr == NULL) 1227 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1228 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1229 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1230 tpr->pr_childcount++; 1231 1232 pr->pr_parent = ppr; 1233 pr->pr_id = jid; 1234 1235 /* Set some default values, and inherit some from the parent. */ 1236 if (name == NULL) 1237 name = ""; 1238 if (path == NULL) { 1239 path = "/"; 1240 root = mypr->pr_root; 1241 vref(root); 1242 } 1243 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); 1244 pr->pr_flags |= PR_HOST; 1245#if defined(INET) || defined(INET6) 1246#ifdef VIMAGE 1247 if (!(pr_flags & PR_VNET)) 1248#endif 1249 { 1250#ifdef INET 1251 if (!(ch_flags & PR_IP4_USER)) 1252 pr->pr_flags |= 1253 PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE; 1254 else if (!(pr_flags & PR_IP4_USER)) { 1255 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1256 if (ppr->pr_ip4 != NULL) { 1257 pr->pr_ip4s = ppr->pr_ip4s; 1258 pr->pr_ip4 = malloc(pr->pr_ip4s * 1259 sizeof(struct in_addr), M_PRISON, 1260 M_WAITOK); 1261 bcopy(ppr->pr_ip4, pr->pr_ip4, 1262 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1263 } 1264 } 1265#endif 1266#ifdef INET6 1267 if (!(ch_flags & PR_IP6_USER)) 1268 pr->pr_flags |= 1269 PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE; 1270 else if (!(pr_flags & PR_IP6_USER)) { 1271 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1272 if (ppr->pr_ip6 != NULL) { 1273 pr->pr_ip6s = ppr->pr_ip6s; 1274 pr->pr_ip6 = malloc(pr->pr_ip6s * 1275 sizeof(struct in6_addr), M_PRISON, 1276 M_WAITOK); 1277 bcopy(ppr->pr_ip6, pr->pr_ip6, 1278 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1279 } 1280 } 1281#endif 1282 } 1283#endif 1284 /* Source address selection is always on by default. */ 1285 pr->pr_flags |= _PR_IP_SADDRSEL; 1286 1287 pr->pr_securelevel = ppr->pr_securelevel; 1288 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1289 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 1290 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; 1291 1292 LIST_INIT(&pr->pr_children); 1293 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1294 1295#ifdef VIMAGE 1296 /* Allocate a new vnet if specified. */ 1297 pr->pr_vnet = (pr_flags & PR_VNET) 1298 ? vnet_alloc() : ppr->pr_vnet; 1299#endif 1300 /* 1301 * Allocate a dedicated cpuset for each jail. 1302 * Unlike other initial settings, this may return an erorr. 1303 */ 1304 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1305 if (error) { 1306 prison_deref(pr, PD_LIST_XLOCKED); 1307 goto done_releroot; 1308 } 1309 1310 mtx_lock(&pr->pr_mtx); 1311 /* 1312 * New prisons do not yet have a reference, because we do not 1313 * want other to see the incomplete prison once the 1314 * allprison_lock is downgraded. 1315 */ 1316 } else { 1317 created = 0; 1318 /* 1319 * Grab a reference for existing prisons, to ensure they 1320 * continue to exist for the duration of the call. 1321 */ 1322 pr->pr_ref++; 1323#if defined(VIMAGE) && (defined(INET) || defined(INET6)) 1324 if ((pr->pr_flags & PR_VNET) && 1325 (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 1326 error = EINVAL; 1327 vfs_opterror(opts, 1328 "vnet jails cannot have IP address restrictions"); 1329 goto done_deref_locked; 1330 } 1331#endif 1332#ifdef INET 1333 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1334 error = EINVAL; 1335 vfs_opterror(opts, 1336 "ip4 cannot be changed after creation"); 1337 goto done_deref_locked; 1338 } 1339#endif 1340#ifdef INET6 1341 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1342 error = EINVAL; 1343 vfs_opterror(opts, 1344 "ip6 cannot be changed after creation"); 1345 goto done_deref_locked; 1346 } 1347#endif 1348 } 1349 1350 /* Do final error checking before setting anything. */ 1351 if (gotslevel) { 1352 if (slevel < ppr->pr_securelevel) { 1353 error = EPERM; 1354 goto done_deref_locked; 1355 } 1356 } 1357 if (gotchildmax) { 1358 if (childmax >= ppr->pr_childmax) { 1359 error = EPERM; 1360 goto done_deref_locked; 1361 } 1362 } 1363 if (gotenforce) { 1364 if (enforce < ppr->pr_enforce_statfs) { 1365 error = EPERM; 1366 goto done_deref_locked; 1367 } 1368 } 1369 if (gotrsnum) { 1370 /* 1371 * devfs_rsnum is a uint16_t 1372 */ 1373 if (rsnum < 0 || rsnum > 65535) { 1374 error = EINVAL; 1375 goto done_deref_locked; 1376 } 1377 /* 1378 * Nested jails always inherit parent's devfs ruleset 1379 */ 1380 if (jailed(td->td_ucred)) { 1381 if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { 1382 error = EPERM; 1383 goto done_deref_locked; 1384 } else 1385 rsnum = ppr->pr_devfs_rsnum; 1386 } 1387 } 1388#ifdef INET 1389 if (ip4s > 0) { 1390 if (ppr->pr_flags & PR_IP4) { 1391 /* 1392 * Make sure the new set of IP addresses is a 1393 * subset of the parent's list. Don't worry 1394 * about the parent being unlocked, as any 1395 * setting is done with allprison_lock held. 1396 */ 1397 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1398 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 1399 break; 1400 if (ij == ppr->pr_ip4s) { 1401 error = EPERM; 1402 goto done_deref_locked; 1403 } 1404 if (ip4s > 1) { 1405 for (ii = ij = 1; ii < ip4s; ii++) { 1406 if (ip4[ii].s_addr == 1407 ppr->pr_ip4[0].s_addr) 1408 continue; 1409 for (; ij < ppr->pr_ip4s; ij++) 1410 if (ip4[ii].s_addr == 1411 ppr->pr_ip4[ij].s_addr) 1412 break; 1413 if (ij == ppr->pr_ip4s) 1414 break; 1415 } 1416 if (ij == ppr->pr_ip4s) { 1417 error = EPERM; 1418 goto done_deref_locked; 1419 } 1420 } 1421 } 1422 /* 1423 * Check for conflicting IP addresses. We permit them 1424 * if there is no more than one IP on each jail. If 1425 * there is a duplicate on a jail with more than one 1426 * IP stop checking and return error. 1427 */ 1428 tppr = ppr; 1429#ifdef VIMAGE 1430 for (; tppr != &prison0; tppr = tppr->pr_parent) 1431 if (tppr->pr_flags & PR_VNET) 1432 break; 1433#endif 1434 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1435 if (tpr == pr || 1436#ifdef VIMAGE 1437 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1438#endif 1439 tpr->pr_uref == 0) { 1440 descend = 0; 1441 continue; 1442 } 1443 if (!(tpr->pr_flags & PR_IP4_USER)) 1444 continue; 1445 descend = 0; 1446 if (tpr->pr_ip4 == NULL || 1447 (ip4s == 1 && tpr->pr_ip4s == 1)) 1448 continue; 1449 for (ii = 0; ii < ip4s; ii++) { 1450 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { 1451 error = EADDRINUSE; 1452 vfs_opterror(opts, 1453 "IPv4 addresses clash"); 1454 goto done_deref_locked; 1455 } 1456 } 1457 } 1458 } 1459#endif 1460#ifdef INET6 1461 if (ip6s > 0) { 1462 if (ppr->pr_flags & PR_IP6) { 1463 /* 1464 * Make sure the new set of IP addresses is a 1465 * subset of the parent's list. 1466 */ 1467 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1468 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1469 &ppr->pr_ip6[ij])) 1470 break; 1471 if (ij == ppr->pr_ip6s) { 1472 error = EPERM; 1473 goto done_deref_locked; 1474 } 1475 if (ip6s > 1) { 1476 for (ii = ij = 1; ii < ip6s; ii++) { 1477 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1478 &ppr->pr_ip6[0])) 1479 continue; 1480 for (; ij < ppr->pr_ip6s; ij++) 1481 if (IN6_ARE_ADDR_EQUAL( 1482 &ip6[ii], &ppr->pr_ip6[ij])) 1483 break; 1484 if (ij == ppr->pr_ip6s) 1485 break; 1486 } 1487 if (ij == ppr->pr_ip6s) { 1488 error = EPERM; 1489 goto done_deref_locked; 1490 } 1491 } 1492 } 1493 /* Check for conflicting IP addresses. */ 1494 tppr = ppr; 1495#ifdef VIMAGE 1496 for (; tppr != &prison0; tppr = tppr->pr_parent) 1497 if (tppr->pr_flags & PR_VNET) 1498 break; 1499#endif 1500 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1501 if (tpr == pr || 1502#ifdef VIMAGE 1503 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1504#endif 1505 tpr->pr_uref == 0) { 1506 descend = 0; 1507 continue; 1508 } 1509 if (!(tpr->pr_flags & PR_IP6_USER)) 1510 continue; 1511 descend = 0; 1512 if (tpr->pr_ip6 == NULL || 1513 (ip6s == 1 && tpr->pr_ip6s == 1)) 1514 continue; 1515 for (ii = 0; ii < ip6s; ii++) { 1516 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { 1517 error = EADDRINUSE; 1518 vfs_opterror(opts, 1519 "IPv6 addresses clash"); 1520 goto done_deref_locked; 1521 } 1522 } 1523 } 1524 } 1525#endif 1526 onamelen = namelen = 0; 1527 if (name != NULL) { 1528 /* Give a default name of the jid. */ 1529 if (name[0] == '\0') 1530 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1531 else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid && 1532 *p == '\0')) { 1533 error = EINVAL; 1534 vfs_opterror(opts, 1535 "name cannot be numeric (unless it is the jid)"); 1536 goto done_deref_locked; 1537 } 1538 /* 1539 * Make sure the name isn't too long for the prison or its 1540 * children. 1541 */ 1542 onamelen = strlen(pr->pr_name); 1543 namelen = strlen(name); 1544 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1545 error = ENAMETOOLONG; 1546 goto done_deref_locked; 1547 } 1548 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1549 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1550 sizeof(pr->pr_name)) { 1551 error = ENAMETOOLONG; 1552 goto done_deref_locked; 1553 } 1554 } 1555 } 1556 if (pr_allow & ~ppr->pr_allow) { 1557 error = EPERM; 1558 goto done_deref_locked; 1559 } 1560 1561 /* Set the parameters of the prison. */ 1562#ifdef INET 1563 redo_ip4 = 0; 1564 if (pr_flags & PR_IP4_USER) { 1565 pr->pr_flags |= PR_IP4; 1566 free(pr->pr_ip4, M_PRISON); 1567 pr->pr_ip4s = ip4s; 1568 pr->pr_ip4 = ip4; 1569 ip4 = NULL; 1570 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1571#ifdef VIMAGE 1572 if (tpr->pr_flags & PR_VNET) { 1573 descend = 0; 1574 continue; 1575 } 1576#endif 1577 if (prison_restrict_ip4(tpr, NULL)) { 1578 redo_ip4 = 1; 1579 descend = 0; 1580 } 1581 } 1582 } 1583#endif 1584#ifdef INET6 1585 redo_ip6 = 0; 1586 if (pr_flags & PR_IP6_USER) { 1587 pr->pr_flags |= PR_IP6; 1588 free(pr->pr_ip6, M_PRISON); 1589 pr->pr_ip6s = ip6s; 1590 pr->pr_ip6 = ip6; 1591 ip6 = NULL; 1592 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1593#ifdef VIMAGE 1594 if (tpr->pr_flags & PR_VNET) { 1595 descend = 0; 1596 continue; 1597 } 1598#endif 1599 if (prison_restrict_ip6(tpr, NULL)) { 1600 redo_ip6 = 1; 1601 descend = 0; 1602 } 1603 } 1604 } 1605#endif 1606 if (gotslevel) { 1607 pr->pr_securelevel = slevel; 1608 /* Set all child jails to be at least this level. */ 1609 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1610 if (tpr->pr_securelevel < slevel) 1611 tpr->pr_securelevel = slevel; 1612 } 1613 if (gotchildmax) { 1614 pr->pr_childmax = childmax; 1615 /* Set all child jails to under this limit. */ 1616 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) 1617 if (tpr->pr_childmax > childmax - level) 1618 tpr->pr_childmax = childmax > level 1619 ? childmax - level : 0; 1620 } 1621 if (gotenforce) { 1622 pr->pr_enforce_statfs = enforce; 1623 /* Pass this restriction on to the children. */ 1624 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1625 if (tpr->pr_enforce_statfs < enforce) 1626 tpr->pr_enforce_statfs = enforce; 1627 } 1628 if (gotrsnum) { 1629 pr->pr_devfs_rsnum = rsnum; 1630 /* Pass this restriction on to the children. */ 1631 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1632 tpr->pr_devfs_rsnum = rsnum; 1633 } 1634 if (name != NULL) { 1635 if (ppr == &prison0) 1636 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1637 else 1638 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1639 ppr->pr_name, name); 1640 /* Change this component of child names. */ 1641 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1642 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1643 strlen(tpr->pr_name + onamelen) + 1); 1644 bcopy(pr->pr_name, tpr->pr_name, namelen); 1645 } 1646 } 1647 if (path != NULL) { 1648 /* Try to keep a real-rooted full pathname. */ 1649 if (fullpath_disabled && path[0] == '/' && 1650 strcmp(mypr->pr_path, "/")) 1651 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1652 mypr->pr_path, path); 1653 else 1654 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1655 pr->pr_root = root; 1656 } 1657 if (PR_HOST & ch_flags & ~pr_flags) { 1658 if (pr->pr_flags & PR_HOST) { 1659 /* 1660 * Copy the parent's host info. As with pr_ip4 above, 1661 * the lack of a lock on the parent is not a problem; 1662 * it is always set with allprison_lock at least 1663 * shared, and is held exclusively here. 1664 */ 1665 strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, 1666 sizeof(pr->pr_hostname)); 1667 strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, 1668 sizeof(pr->pr_domainname)); 1669 strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, 1670 sizeof(pr->pr_hostuuid)); 1671 pr->pr_hostid = pr->pr_parent->pr_hostid; 1672 } 1673 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1674 /* Set this prison, and any descendants without PR_HOST. */ 1675 if (host != NULL) 1676 strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); 1677 if (domain != NULL) 1678 strlcpy(pr->pr_domainname, domain, 1679 sizeof(pr->pr_domainname)); 1680 if (uuid != NULL) 1681 strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); 1682 if (gothid) 1683 pr->pr_hostid = hid; 1684 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1685 if (tpr->pr_flags & PR_HOST) 1686 descend = 0; 1687 else { 1688 if (host != NULL) 1689 strlcpy(tpr->pr_hostname, 1690 pr->pr_hostname, 1691 sizeof(tpr->pr_hostname)); 1692 if (domain != NULL) 1693 strlcpy(tpr->pr_domainname, 1694 pr->pr_domainname, 1695 sizeof(tpr->pr_domainname)); 1696 if (uuid != NULL) 1697 strlcpy(tpr->pr_hostuuid, 1698 pr->pr_hostuuid, 1699 sizeof(tpr->pr_hostuuid)); 1700 if (gothid) 1701 tpr->pr_hostid = hid; 1702 } 1703 } 1704 } 1705 if ((tallow = ch_allow & ~pr_allow)) { 1706 /* Clear allow bits in all children. */ 1707 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1708 tpr->pr_allow &= ~tallow; 1709 } 1710 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1711 /* 1712 * Persistent prisons get an extra reference, and prisons losing their 1713 * persist flag lose that reference. Only do this for existing prisons 1714 * for now, so new ones will remain unseen until after the module 1715 * handlers have completed. 1716 */ 1717 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1718 if (pr_flags & PR_PERSIST) { 1719 pr->pr_ref++; 1720 pr->pr_uref++; 1721 } else { 1722 pr->pr_ref--; 1723 pr->pr_uref--; 1724 } 1725 } 1726 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1727 mtx_unlock(&pr->pr_mtx); 1728 1729#ifdef RACCT 1730 if (created) 1731 prison_racct_attach(pr); 1732#endif 1733 1734 /* Locks may have prevented a complete restriction of child IP 1735 * addresses. If so, allocate some more memory and try again. 1736 */ 1737#ifdef INET 1738 while (redo_ip4) { 1739 ip4s = pr->pr_ip4s; 1740 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1741 mtx_lock(&pr->pr_mtx); 1742 redo_ip4 = 0; 1743 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1744#ifdef VIMAGE 1745 if (tpr->pr_flags & PR_VNET) { 1746 descend = 0; 1747 continue; 1748 } 1749#endif 1750 if (prison_restrict_ip4(tpr, ip4)) { 1751 if (ip4 != NULL) 1752 ip4 = NULL; 1753 else 1754 redo_ip4 = 1; 1755 } 1756 } 1757 mtx_unlock(&pr->pr_mtx); 1758 } 1759#endif 1760#ifdef INET6 1761 while (redo_ip6) { 1762 ip6s = pr->pr_ip6s; 1763 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1764 mtx_lock(&pr->pr_mtx); 1765 redo_ip6 = 0; 1766 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1767#ifdef VIMAGE 1768 if (tpr->pr_flags & PR_VNET) { 1769 descend = 0; 1770 continue; 1771 } 1772#endif 1773 if (prison_restrict_ip6(tpr, ip6)) { 1774 if (ip6 != NULL) 1775 ip6 = NULL; 1776 else 1777 redo_ip6 = 1; 1778 } 1779 } 1780 mtx_unlock(&pr->pr_mtx); 1781 } 1782#endif 1783 1784 /* Let the modules do their work. */ 1785 sx_downgrade(&allprison_lock); 1786 if (created) { 1787 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1788 if (error) { 1789 prison_deref(pr, PD_LIST_SLOCKED); 1790 goto done_errmsg; 1791 } 1792 } 1793 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1794 if (error) { 1795 prison_deref(pr, created 1796 ? PD_LIST_SLOCKED 1797 : PD_DEREF | PD_LIST_SLOCKED); 1798 goto done_errmsg; 1799 } 1800 1801 /* Attach this process to the prison if requested. */ 1802 if (flags & JAIL_ATTACH) { 1803 mtx_lock(&pr->pr_mtx); 1804 error = do_jail_attach(td, pr); 1805 if (error) { 1806 vfs_opterror(opts, "attach failed"); 1807 if (!created) 1808 prison_deref(pr, PD_DEREF); 1809 goto done_errmsg; 1810 } 1811 } 1812 1813 /* 1814 * Now that it is all there, drop the temporary reference from existing 1815 * prisons. Or add a reference to newly created persistent prisons 1816 * (which was not done earlier so that the prison would not be publicly 1817 * visible). 1818 */ 1819 if (!created) { 1820 prison_deref(pr, (flags & JAIL_ATTACH) 1821 ? PD_DEREF 1822 : PD_DEREF | PD_LIST_SLOCKED); 1823 } else { 1824 if (pr_flags & PR_PERSIST) { 1825 mtx_lock(&pr->pr_mtx); 1826 pr->pr_ref++; 1827 pr->pr_uref++; 1828 mtx_unlock(&pr->pr_mtx); 1829 } 1830 if (!(flags & JAIL_ATTACH)) 1831 sx_sunlock(&allprison_lock); 1832 } 1833 td->td_retval[0] = pr->pr_id; 1834 goto done_errmsg; 1835 1836 done_deref_locked: 1837 prison_deref(pr, created 1838 ? PD_LOCKED | PD_LIST_XLOCKED 1839 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1840 goto done_releroot; 1841 done_unlock_list: 1842 sx_xunlock(&allprison_lock); 1843 done_releroot: 1844 if (root != NULL) { 1845 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1846 vrele(root); 1847 VFS_UNLOCK_GIANT(vfslocked); 1848 } 1849 done_errmsg: 1850 if (error) { 1851 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1852 if (errmsg_len > 0) { 1853 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1854 if (errmsg_pos > 0) { 1855 if (optuio->uio_segflg == UIO_SYSSPACE) 1856 bcopy(errmsg, 1857 optuio->uio_iov[errmsg_pos].iov_base, 1858 errmsg_len); 1859 else 1860 copyout(errmsg, 1861 optuio->uio_iov[errmsg_pos].iov_base, 1862 errmsg_len); 1863 } 1864 } 1865 } 1866 done_free: 1867#ifdef INET 1868 free(ip4, M_PRISON); 1869#endif 1870#ifdef INET6 1871 free(ip6, M_PRISON); 1872#endif 1873 if (g_path != NULL) 1874 free(g_path, M_TEMP); 1875 vfs_freeopts(opts); 1876 return (error); 1877} 1878 1879 1880/* 1881 * struct jail_get_args { 1882 * struct iovec *iovp; 1883 * unsigned int iovcnt; 1884 * int flags; 1885 * }; 1886 */ 1887int 1888sys_jail_get(struct thread *td, struct jail_get_args *uap) 1889{ 1890 struct uio *auio; 1891 int error; 1892 1893 /* Check that we have an even number of iovecs. */ 1894 if (uap->iovcnt & 1) 1895 return (EINVAL); 1896 1897 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1898 if (error) 1899 return (error); 1900 error = kern_jail_get(td, auio, uap->flags); 1901 if (error == 0) 1902 error = copyout(auio->uio_iov, uap->iovp, 1903 uap->iovcnt * sizeof (struct iovec)); 1904 free(auio, M_IOV); 1905 return (error); 1906} 1907 1908int 1909kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1910{ 1911 struct prison *pr, *mypr; 1912 struct vfsopt *opt; 1913 struct vfsoptlist *opts; 1914 char *errmsg, *name; 1915 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1916 1917 if (flags & ~JAIL_GET_MASK) 1918 return (EINVAL); 1919 1920 /* Get the parameter list. */ 1921 error = vfs_buildopts(optuio, &opts); 1922 if (error) 1923 return (error); 1924 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1925 mypr = td->td_ucred->cr_prison; 1926 1927 /* 1928 * Find the prison specified by one of: lastjid, jid, name. 1929 */ 1930 sx_slock(&allprison_lock); 1931 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1932 if (error == 0) { 1933 TAILQ_FOREACH(pr, &allprison, pr_list) { 1934 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1935 mtx_lock(&pr->pr_mtx); 1936 if (pr->pr_ref > 0 && 1937 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1938 break; 1939 mtx_unlock(&pr->pr_mtx); 1940 } 1941 } 1942 if (pr != NULL) 1943 goto found_prison; 1944 error = ENOENT; 1945 vfs_opterror(opts, "no jail after %d", jid); 1946 goto done_unlock_list; 1947 } else if (error != ENOENT) 1948 goto done_unlock_list; 1949 1950 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1951 if (error == 0) { 1952 if (jid != 0) { 1953 pr = prison_find_child(mypr, jid); 1954 if (pr != NULL) { 1955 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1956 mtx_unlock(&pr->pr_mtx); 1957 error = ENOENT; 1958 vfs_opterror(opts, "jail %d is dying", 1959 jid); 1960 goto done_unlock_list; 1961 } 1962 goto found_prison; 1963 } 1964 error = ENOENT; 1965 vfs_opterror(opts, "jail %d not found", jid); 1966 goto done_unlock_list; 1967 } 1968 } else if (error != ENOENT) 1969 goto done_unlock_list; 1970 1971 error = vfs_getopt(opts, "name", (void **)&name, &len); 1972 if (error == 0) { 1973 if (len == 0 || name[len - 1] != '\0') { 1974 error = EINVAL; 1975 goto done_unlock_list; 1976 } 1977 pr = prison_find_name(mypr, name); 1978 if (pr != NULL) { 1979 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1980 mtx_unlock(&pr->pr_mtx); 1981 error = ENOENT; 1982 vfs_opterror(opts, "jail \"%s\" is dying", 1983 name); 1984 goto done_unlock_list; 1985 } 1986 goto found_prison; 1987 } 1988 error = ENOENT; 1989 vfs_opterror(opts, "jail \"%s\" not found", name); 1990 goto done_unlock_list; 1991 } else if (error != ENOENT) 1992 goto done_unlock_list; 1993 1994 vfs_opterror(opts, "no jail specified"); 1995 error = ENOENT; 1996 goto done_unlock_list; 1997 1998 found_prison: 1999 /* Get the parameters of the prison. */ 2000 pr->pr_ref++; 2001 locked = PD_LOCKED; 2002 td->td_retval[0] = pr->pr_id; 2003 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 2004 if (error != 0 && error != ENOENT) 2005 goto done_deref; 2006 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 2007 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 2008 if (error != 0 && error != ENOENT) 2009 goto done_deref; 2010 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 2011 if (error != 0 && error != ENOENT) 2012 goto done_deref; 2013 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 2014 sizeof(pr->pr_cpuset->cs_id)); 2015 if (error != 0 && error != ENOENT) 2016 goto done_deref; 2017 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 2018 if (error != 0 && error != ENOENT) 2019 goto done_deref; 2020#ifdef INET 2021 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 2022 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2023 if (error != 0 && error != ENOENT) 2024 goto done_deref; 2025#endif 2026#ifdef INET6 2027 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 2028 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2029 if (error != 0 && error != ENOENT) 2030 goto done_deref; 2031#endif 2032 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 2033 sizeof(pr->pr_securelevel)); 2034 if (error != 0 && error != ENOENT) 2035 goto done_deref; 2036 error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, 2037 sizeof(pr->pr_childcount)); 2038 if (error != 0 && error != ENOENT) 2039 goto done_deref; 2040 error = vfs_setopt(opts, "children.max", &pr->pr_childmax, 2041 sizeof(pr->pr_childmax)); 2042 if (error != 0 && error != ENOENT) 2043 goto done_deref; 2044 error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); 2045 if (error != 0 && error != ENOENT) 2046 goto done_deref; 2047 error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); 2048 if (error != 0 && error != ENOENT) 2049 goto done_deref; 2050 error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); 2051 if (error != 0 && error != ENOENT) 2052 goto done_deref; 2053#ifdef COMPAT_FREEBSD32 2054 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 2055 uint32_t hid32 = pr->pr_hostid; 2056 2057 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 2058 } else 2059#endif 2060 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 2061 sizeof(pr->pr_hostid)); 2062 if (error != 0 && error != ENOENT) 2063 goto done_deref; 2064 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 2065 sizeof(pr->pr_enforce_statfs)); 2066 if (error != 0 && error != ENOENT) 2067 goto done_deref; 2068 error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, 2069 sizeof(pr->pr_devfs_rsnum)); 2070 if (error != 0 && error != ENOENT) 2071 goto done_deref; 2072 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 2073 fi++) { 2074 if (pr_flag_names[fi] == NULL) 2075 continue; 2076 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 2077 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 2078 if (error != 0 && error != ENOENT) 2079 goto done_deref; 2080 i = !i; 2081 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 2082 if (error != 0 && error != ENOENT) 2083 goto done_deref; 2084 } 2085 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 2086 fi++) { 2087 i = pr->pr_flags & 2088 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 2089 i = pr_flag_jailsys[fi].disable && 2090 (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE 2091 : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW 2092 : JAIL_SYS_INHERIT; 2093 error = 2094 vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); 2095 if (error != 0 && error != ENOENT) 2096 goto done_deref; 2097 } 2098 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 2099 fi++) { 2100 if (pr_allow_names[fi] == NULL) 2101 continue; 2102 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 2103 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 2104 if (error != 0 && error != ENOENT) 2105 goto done_deref; 2106 i = !i; 2107 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 2108 if (error != 0 && error != ENOENT) 2109 goto done_deref; 2110 } 2111 i = (pr->pr_uref == 0); 2112 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 2113 if (error != 0 && error != ENOENT) 2114 goto done_deref; 2115 i = !i; 2116 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 2117 if (error != 0 && error != ENOENT) 2118 goto done_deref; 2119 2120 /* Get the module parameters. */ 2121 mtx_unlock(&pr->pr_mtx); 2122 locked = 0; 2123 error = osd_jail_call(pr, PR_METHOD_GET, opts); 2124 if (error) 2125 goto done_deref; 2126 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 2127 2128 /* By now, all parameters should have been noted. */ 2129 TAILQ_FOREACH(opt, opts, link) { 2130 if (!opt->seen && strcmp(opt->name, "errmsg")) { 2131 error = EINVAL; 2132 vfs_opterror(opts, "unknown parameter: %s", opt->name); 2133 goto done_errmsg; 2134 } 2135 } 2136 2137 /* Write the fetched parameters back to userspace. */ 2138 error = 0; 2139 TAILQ_FOREACH(opt, opts, link) { 2140 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 2141 pos = 2 * opt->pos + 1; 2142 optuio->uio_iov[pos].iov_len = opt->len; 2143 if (opt->value != NULL) { 2144 if (optuio->uio_segflg == UIO_SYSSPACE) { 2145 bcopy(opt->value, 2146 optuio->uio_iov[pos].iov_base, 2147 opt->len); 2148 } else { 2149 error = copyout(opt->value, 2150 optuio->uio_iov[pos].iov_base, 2151 opt->len); 2152 if (error) 2153 break; 2154 } 2155 } 2156 } 2157 } 2158 goto done_errmsg; 2159 2160 done_deref: 2161 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 2162 goto done_errmsg; 2163 2164 done_unlock_list: 2165 sx_sunlock(&allprison_lock); 2166 done_errmsg: 2167 if (error && errmsg_pos >= 0) { 2168 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 2169 errmsg_pos = 2 * errmsg_pos + 1; 2170 if (errmsg_len > 0) { 2171 if (optuio->uio_segflg == UIO_SYSSPACE) 2172 bcopy(errmsg, 2173 optuio->uio_iov[errmsg_pos].iov_base, 2174 errmsg_len); 2175 else 2176 copyout(errmsg, 2177 optuio->uio_iov[errmsg_pos].iov_base, 2178 errmsg_len); 2179 } 2180 } 2181 vfs_freeopts(opts); 2182 return (error); 2183} 2184 2185 2186/* 2187 * struct jail_remove_args { 2188 * int jid; 2189 * }; 2190 */ 2191int 2192sys_jail_remove(struct thread *td, struct jail_remove_args *uap) 2193{ 2194 struct prison *pr, *cpr, *lpr, *tpr; 2195 int descend, error; 2196 2197 error = priv_check(td, PRIV_JAIL_REMOVE); 2198 if (error) 2199 return (error); 2200 2201 sx_xlock(&allprison_lock); 2202 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2203 if (pr == NULL) { 2204 sx_xunlock(&allprison_lock); 2205 return (EINVAL); 2206 } 2207 2208 /* Remove all descendants of this prison, then remove this prison. */ 2209 pr->pr_ref++; 2210 pr->pr_flags |= PR_REMOVE; 2211 if (!LIST_EMPTY(&pr->pr_children)) { 2212 mtx_unlock(&pr->pr_mtx); 2213 lpr = NULL; 2214 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2215 mtx_lock(&cpr->pr_mtx); 2216 if (cpr->pr_ref > 0) { 2217 tpr = cpr; 2218 cpr->pr_ref++; 2219 cpr->pr_flags |= PR_REMOVE; 2220 } else { 2221 /* Already removed - do not do it again. */ 2222 tpr = NULL; 2223 } 2224 mtx_unlock(&cpr->pr_mtx); 2225 if (lpr != NULL) { 2226 mtx_lock(&lpr->pr_mtx); 2227 prison_remove_one(lpr); 2228 sx_xlock(&allprison_lock); 2229 } 2230 lpr = tpr; 2231 } 2232 if (lpr != NULL) { 2233 mtx_lock(&lpr->pr_mtx); 2234 prison_remove_one(lpr); 2235 sx_xlock(&allprison_lock); 2236 } 2237 mtx_lock(&pr->pr_mtx); 2238 } 2239 prison_remove_one(pr); 2240 return (0); 2241} 2242 2243static void 2244prison_remove_one(struct prison *pr) 2245{ 2246 struct proc *p; 2247 int deuref; 2248 2249 /* If the prison was persistent, it is not anymore. */ 2250 deuref = 0; 2251 if (pr->pr_flags & PR_PERSIST) { 2252 pr->pr_ref--; 2253 deuref = PD_DEUREF; 2254 pr->pr_flags &= ~PR_PERSIST; 2255 } 2256 2257 /* 2258 * jail_remove added a reference. If that's the only one, remove 2259 * the prison now. 2260 */ 2261 KASSERT(pr->pr_ref > 0, 2262 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2263 if (pr->pr_ref == 1) { 2264 prison_deref(pr, 2265 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2266 return; 2267 } 2268 2269 mtx_unlock(&pr->pr_mtx); 2270 sx_xunlock(&allprison_lock); 2271 /* 2272 * Kill all processes unfortunate enough to be attached to this prison. 2273 */ 2274 sx_slock(&allproc_lock); 2275 LIST_FOREACH(p, &allproc, p_list) { 2276 PROC_LOCK(p); 2277 if (p->p_state != PRS_NEW && p->p_ucred && 2278 p->p_ucred->cr_prison == pr) 2279 kern_psignal(p, SIGKILL); 2280 PROC_UNLOCK(p); 2281 } 2282 sx_sunlock(&allproc_lock); 2283 /* Remove the temporary reference added by jail_remove. */ 2284 prison_deref(pr, deuref | PD_DEREF); 2285} 2286 2287 2288/* 2289 * struct jail_attach_args { 2290 * int jid; 2291 * }; 2292 */ 2293int 2294sys_jail_attach(struct thread *td, struct jail_attach_args *uap) 2295{ 2296 struct prison *pr; 2297 int error; 2298 2299 error = priv_check(td, PRIV_JAIL_ATTACH); 2300 if (error) 2301 return (error); 2302 2303 sx_slock(&allprison_lock); 2304 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2305 if (pr == NULL) { 2306 sx_sunlock(&allprison_lock); 2307 return (EINVAL); 2308 } 2309 2310 /* 2311 * Do not allow a process to attach to a prison that is not 2312 * considered to be "alive". 2313 */ 2314 if (pr->pr_uref == 0) { 2315 mtx_unlock(&pr->pr_mtx); 2316 sx_sunlock(&allprison_lock); 2317 return (EINVAL); 2318 } 2319 2320 return (do_jail_attach(td, pr)); 2321} 2322 2323static int 2324do_jail_attach(struct thread *td, struct prison *pr) 2325{ 2326 struct prison *ppr; 2327 struct proc *p; 2328 struct ucred *newcred, *oldcred; 2329 int vfslocked, error; 2330 2331 /* 2332 * XXX: Note that there is a slight race here if two threads 2333 * in the same privileged process attempt to attach to two 2334 * different jails at the same time. It is important for 2335 * user processes not to do this, or they might end up with 2336 * a process root from one prison, but attached to the jail 2337 * of another. 2338 */ 2339 pr->pr_ref++; 2340 pr->pr_uref++; 2341 mtx_unlock(&pr->pr_mtx); 2342 2343 /* Let modules do whatever they need to prepare for attaching. */ 2344 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2345 if (error) { 2346 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2347 return (error); 2348 } 2349 sx_sunlock(&allprison_lock); 2350 2351 /* 2352 * Reparent the newly attached process to this jail. 2353 */ 2354 ppr = td->td_ucred->cr_prison; 2355 p = td->td_proc; 2356 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2357 if (error) 2358 goto e_revert_osd; 2359 2360 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2361 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2362 if ((error = change_dir(pr->pr_root, td)) != 0) 2363 goto e_unlock; 2364#ifdef MAC 2365 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2366 goto e_unlock; 2367#endif 2368 VOP_UNLOCK(pr->pr_root, 0); 2369 if ((error = change_root(pr->pr_root, td))) 2370 goto e_unlock_giant; 2371 VFS_UNLOCK_GIANT(vfslocked); 2372 2373 newcred = crget(); 2374 PROC_LOCK(p); 2375 oldcred = p->p_ucred; 2376 setsugid(p); 2377 crcopy(newcred, oldcred); 2378 newcred->cr_prison = pr; 2379 p->p_ucred = newcred; 2380 PROC_UNLOCK(p); 2381#ifdef RACCT 2382 racct_proc_ucred_changed(p, oldcred, newcred); 2383#endif 2384 crfree(oldcred); 2385 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2386 return (0); 2387 e_unlock: 2388 VOP_UNLOCK(pr->pr_root, 0); 2389 e_unlock_giant: 2390 VFS_UNLOCK_GIANT(vfslocked); 2391 e_revert_osd: 2392 /* Tell modules this thread is still in its old jail after all. */ 2393 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2394 prison_deref(pr, PD_DEREF | PD_DEUREF); 2395 return (error); 2396} 2397 2398 2399/* 2400 * Returns a locked prison instance, or NULL on failure. 2401 */ 2402struct prison * 2403prison_find(int prid) 2404{ 2405 struct prison *pr; 2406 2407 sx_assert(&allprison_lock, SX_LOCKED); 2408 TAILQ_FOREACH(pr, &allprison, pr_list) { 2409 if (pr->pr_id == prid) { 2410 mtx_lock(&pr->pr_mtx); 2411 if (pr->pr_ref > 0) 2412 return (pr); 2413 mtx_unlock(&pr->pr_mtx); 2414 } 2415 } 2416 return (NULL); 2417} 2418 2419/* 2420 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2421 */ 2422struct prison * 2423prison_find_child(struct prison *mypr, int prid) 2424{ 2425 struct prison *pr; 2426 int descend; 2427 2428 sx_assert(&allprison_lock, SX_LOCKED); 2429 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2430 if (pr->pr_id == prid) { 2431 mtx_lock(&pr->pr_mtx); 2432 if (pr->pr_ref > 0) 2433 return (pr); 2434 mtx_unlock(&pr->pr_mtx); 2435 } 2436 } 2437 return (NULL); 2438} 2439 2440/* 2441 * Look for the name relative to mypr. Returns a locked prison or NULL. 2442 */ 2443struct prison * 2444prison_find_name(struct prison *mypr, const char *name) 2445{ 2446 struct prison *pr, *deadpr; 2447 size_t mylen; 2448 int descend; 2449 2450 sx_assert(&allprison_lock, SX_LOCKED); 2451 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2452 again: 2453 deadpr = NULL; 2454 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2455 if (!strcmp(pr->pr_name + mylen, name)) { 2456 mtx_lock(&pr->pr_mtx); 2457 if (pr->pr_ref > 0) { 2458 if (pr->pr_uref > 0) 2459 return (pr); 2460 deadpr = pr; 2461 } 2462 mtx_unlock(&pr->pr_mtx); 2463 } 2464 } 2465 /* There was no valid prison - perhaps there was a dying one. */ 2466 if (deadpr != NULL) { 2467 mtx_lock(&deadpr->pr_mtx); 2468 if (deadpr->pr_ref == 0) { 2469 mtx_unlock(&deadpr->pr_mtx); 2470 goto again; 2471 } 2472 } 2473 return (deadpr); 2474} 2475 2476/* 2477 * See if a prison has the specific flag set. 2478 */ 2479int 2480prison_flag(struct ucred *cred, unsigned flag) 2481{ 2482 2483 /* This is an atomic read, so no locking is necessary. */ 2484 return (cred->cr_prison->pr_flags & flag); 2485} 2486 2487int 2488prison_allow(struct ucred *cred, unsigned flag) 2489{ 2490 2491 /* This is an atomic read, so no locking is necessary. */ 2492 return (cred->cr_prison->pr_allow & flag); 2493} 2494 2495/* 2496 * Remove a prison reference. If that was the last reference, remove the 2497 * prison itself - but not in this context in case there are locks held. 2498 */ 2499void 2500prison_free_locked(struct prison *pr) 2501{ 2502 2503 mtx_assert(&pr->pr_mtx, MA_OWNED); 2504 pr->pr_ref--; 2505 if (pr->pr_ref == 0) { 2506 mtx_unlock(&pr->pr_mtx); 2507 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2508 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2509 return; 2510 } 2511 mtx_unlock(&pr->pr_mtx); 2512} 2513 2514void 2515prison_free(struct prison *pr) 2516{ 2517 2518 mtx_lock(&pr->pr_mtx); 2519 prison_free_locked(pr); 2520} 2521 2522static void 2523prison_complete(void *context, int pending) 2524{ 2525 2526 prison_deref((struct prison *)context, 0); 2527} 2528 2529/* 2530 * Remove a prison reference (usually). This internal version assumes no 2531 * mutexes are held, except perhaps the prison itself. If there are no more 2532 * references, release and delist the prison. On completion, the prison lock 2533 * and the allprison lock are both unlocked. 2534 */ 2535static void 2536prison_deref(struct prison *pr, int flags) 2537{ 2538 struct prison *ppr, *tpr; 2539 int vfslocked; 2540 2541 if (!(flags & PD_LOCKED)) 2542 mtx_lock(&pr->pr_mtx); 2543 for (;;) { 2544 if (flags & PD_DEUREF) { 2545 pr->pr_uref--; 2546 KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); 2547 } 2548 if (flags & PD_DEREF) 2549 pr->pr_ref--; 2550 /* If the prison still has references, nothing else to do. */ 2551 if (pr->pr_ref > 0) { 2552 mtx_unlock(&pr->pr_mtx); 2553 if (flags & PD_LIST_SLOCKED) 2554 sx_sunlock(&allprison_lock); 2555 else if (flags & PD_LIST_XLOCKED) 2556 sx_xunlock(&allprison_lock); 2557 return; 2558 } 2559 2560 mtx_unlock(&pr->pr_mtx); 2561 if (flags & PD_LIST_SLOCKED) { 2562 if (!sx_try_upgrade(&allprison_lock)) { 2563 sx_sunlock(&allprison_lock); 2564 sx_xlock(&allprison_lock); 2565 } 2566 } else if (!(flags & PD_LIST_XLOCKED)) 2567 sx_xlock(&allprison_lock); 2568 2569 TAILQ_REMOVE(&allprison, pr, pr_list); 2570 LIST_REMOVE(pr, pr_sibling); 2571 ppr = pr->pr_parent; 2572 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2573 tpr->pr_childcount--; 2574 sx_xunlock(&allprison_lock); 2575 2576#ifdef VIMAGE 2577 if (pr->pr_vnet != ppr->pr_vnet) 2578 vnet_destroy(pr->pr_vnet); 2579#endif 2580 if (pr->pr_root != NULL) { 2581 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2582 vrele(pr->pr_root); 2583 VFS_UNLOCK_GIANT(vfslocked); 2584 } 2585 mtx_destroy(&pr->pr_mtx); 2586#ifdef INET 2587 free(pr->pr_ip4, M_PRISON); 2588#endif 2589#ifdef INET6 2590 free(pr->pr_ip6, M_PRISON); 2591#endif 2592 if (pr->pr_cpuset != NULL) 2593 cpuset_rel(pr->pr_cpuset); 2594 osd_jail_exit(pr); 2595#ifdef RACCT 2596 prison_racct_detach(pr); 2597#endif 2598 free(pr, M_PRISON); 2599 2600 /* Removing a prison frees a reference on its parent. */ 2601 pr = ppr; 2602 mtx_lock(&pr->pr_mtx); 2603 flags = PD_DEREF | PD_DEUREF; 2604 } 2605} 2606 2607void 2608prison_hold_locked(struct prison *pr) 2609{ 2610 2611 mtx_assert(&pr->pr_mtx, MA_OWNED); 2612 KASSERT(pr->pr_ref > 0, 2613 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2614 pr->pr_ref++; 2615} 2616 2617void 2618prison_hold(struct prison *pr) 2619{ 2620 2621 mtx_lock(&pr->pr_mtx); 2622 prison_hold_locked(pr); 2623 mtx_unlock(&pr->pr_mtx); 2624} 2625 2626void 2627prison_proc_hold(struct prison *pr) 2628{ 2629 2630 mtx_lock(&pr->pr_mtx); 2631 KASSERT(pr->pr_uref > 0, 2632 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2633 pr->pr_uref++; 2634 mtx_unlock(&pr->pr_mtx); 2635} 2636 2637void 2638prison_proc_free(struct prison *pr) 2639{ 2640 2641 mtx_lock(&pr->pr_mtx); 2642 KASSERT(pr->pr_uref > 0, 2643 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2644 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2645} 2646 2647 2648#ifdef INET 2649/* 2650 * Restrict a prison's IP address list with its parent's, possibly replacing 2651 * it. Return true if the replacement buffer was used (or would have been). 2652 */ 2653static int 2654prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2655{ 2656 int ii, ij, used; 2657 struct prison *ppr; 2658 2659 ppr = pr->pr_parent; 2660 if (!(pr->pr_flags & PR_IP4_USER)) { 2661 /* This has no user settings, so just copy the parent's list. */ 2662 if (pr->pr_ip4s < ppr->pr_ip4s) { 2663 /* 2664 * There's no room for the parent's list. Use the 2665 * new list buffer, which is assumed to be big enough 2666 * (if it was passed). If there's no buffer, try to 2667 * allocate one. 2668 */ 2669 used = 1; 2670 if (newip4 == NULL) { 2671 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2672 M_PRISON, M_NOWAIT); 2673 if (newip4 != NULL) 2674 used = 0; 2675 } 2676 if (newip4 != NULL) { 2677 bcopy(ppr->pr_ip4, newip4, 2678 ppr->pr_ip4s * sizeof(*newip4)); 2679 free(pr->pr_ip4, M_PRISON); 2680 pr->pr_ip4 = newip4; 2681 pr->pr_ip4s = ppr->pr_ip4s; 2682 } 2683 return (used); 2684 } 2685 pr->pr_ip4s = ppr->pr_ip4s; 2686 if (pr->pr_ip4s > 0) 2687 bcopy(ppr->pr_ip4, pr->pr_ip4, 2688 pr->pr_ip4s * sizeof(*newip4)); 2689 else if (pr->pr_ip4 != NULL) { 2690 free(pr->pr_ip4, M_PRISON); 2691 pr->pr_ip4 = NULL; 2692 } 2693 } else if (pr->pr_ip4s > 0) { 2694 /* Remove addresses that aren't in the parent. */ 2695 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2696 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2697 break; 2698 if (ij < ppr->pr_ip4s) 2699 ii = 1; 2700 else { 2701 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2702 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2703 ii = 0; 2704 } 2705 for (ij = 1; ii < pr->pr_ip4s; ) { 2706 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2707 ii++; 2708 continue; 2709 } 2710 switch (ij >= ppr->pr_ip4s ? -1 : 2711 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2712 case -1: 2713 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2714 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2715 break; 2716 case 0: 2717 ii++; 2718 ij++; 2719 break; 2720 case 1: 2721 ij++; 2722 break; 2723 } 2724 } 2725 if (pr->pr_ip4s == 0) { 2726 pr->pr_flags |= PR_IP4_DISABLE; 2727 free(pr->pr_ip4, M_PRISON); 2728 pr->pr_ip4 = NULL; 2729 } 2730 } 2731 return (0); 2732} 2733 2734/* 2735 * Pass back primary IPv4 address of this jail. 2736 * 2737 * If not restricted return success but do not alter the address. Caller has 2738 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2739 * 2740 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2741 * Address returned in NBO. 2742 */ 2743int 2744prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2745{ 2746 struct prison *pr; 2747 2748 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2749 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2750 2751 pr = cred->cr_prison; 2752 if (!(pr->pr_flags & PR_IP4)) 2753 return (0); 2754 mtx_lock(&pr->pr_mtx); 2755 if (!(pr->pr_flags & PR_IP4)) { 2756 mtx_unlock(&pr->pr_mtx); 2757 return (0); 2758 } 2759 if (pr->pr_ip4 == NULL) { 2760 mtx_unlock(&pr->pr_mtx); 2761 return (EAFNOSUPPORT); 2762 } 2763 2764 ia->s_addr = pr->pr_ip4[0].s_addr; 2765 mtx_unlock(&pr->pr_mtx); 2766 return (0); 2767} 2768 2769/* 2770 * Return 1 if we should do proper source address selection or are not jailed. 2771 * We will return 0 if we should bypass source address selection in favour 2772 * of the primary jail IPv4 address. Only in this case *ia will be updated and 2773 * returned in NBO. 2774 * Return EAFNOSUPPORT, in case this jail does not allow IPv4. 2775 */ 2776int 2777prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 2778{ 2779 struct prison *pr; 2780 struct in_addr lia; 2781 int error; 2782 2783 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2784 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2785 2786 if (!jailed(cred)) 2787 return (1); 2788 2789 pr = cred->cr_prison; 2790 if (pr->pr_flags & PR_IP4_SADDRSEL) 2791 return (1); 2792 2793 lia.s_addr = INADDR_ANY; 2794 error = prison_get_ip4(cred, &lia); 2795 if (error) 2796 return (error); 2797 if (lia.s_addr == INADDR_ANY) 2798 return (1); 2799 2800 ia->s_addr = lia.s_addr; 2801 return (0); 2802} 2803 2804/* 2805 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2806 */ 2807int 2808prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2809{ 2810 2811 if (pr1 == pr2) 2812 return (1); 2813 2814 /* 2815 * No need to lock since the PR_IP4_USER flag can't be altered for 2816 * existing prisons. 2817 */ 2818 while (pr1 != &prison0 && 2819#ifdef VIMAGE 2820 !(pr1->pr_flags & PR_VNET) && 2821#endif 2822 !(pr1->pr_flags & PR_IP4_USER)) 2823 pr1 = pr1->pr_parent; 2824 while (pr2 != &prison0 && 2825#ifdef VIMAGE 2826 !(pr2->pr_flags & PR_VNET) && 2827#endif 2828 !(pr2->pr_flags & PR_IP4_USER)) 2829 pr2 = pr2->pr_parent; 2830 return (pr1 == pr2); 2831} 2832 2833/* 2834 * Make sure our (source) address is set to something meaningful to this 2835 * jail. 2836 * 2837 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2838 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2839 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2840 */ 2841int 2842prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2843{ 2844 struct prison *pr; 2845 struct in_addr ia0; 2846 int error; 2847 2848 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2849 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2850 2851 pr = cred->cr_prison; 2852 if (!(pr->pr_flags & PR_IP4)) 2853 return (0); 2854 mtx_lock(&pr->pr_mtx); 2855 if (!(pr->pr_flags & PR_IP4)) { 2856 mtx_unlock(&pr->pr_mtx); 2857 return (0); 2858 } 2859 if (pr->pr_ip4 == NULL) { 2860 mtx_unlock(&pr->pr_mtx); 2861 return (EAFNOSUPPORT); 2862 } 2863 2864 ia0.s_addr = ntohl(ia->s_addr); 2865 if (ia0.s_addr == INADDR_LOOPBACK) { 2866 ia->s_addr = pr->pr_ip4[0].s_addr; 2867 mtx_unlock(&pr->pr_mtx); 2868 return (0); 2869 } 2870 2871 if (ia0.s_addr == INADDR_ANY) { 2872 /* 2873 * In case there is only 1 IPv4 address, bind directly. 2874 */ 2875 if (pr->pr_ip4s == 1) 2876 ia->s_addr = pr->pr_ip4[0].s_addr; 2877 mtx_unlock(&pr->pr_mtx); 2878 return (0); 2879 } 2880 2881 error = _prison_check_ip4(pr, ia); 2882 mtx_unlock(&pr->pr_mtx); 2883 return (error); 2884} 2885 2886/* 2887 * Rewrite destination address in case we will connect to loopback address. 2888 * 2889 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2890 * Address passed in in NBO and returned in NBO. 2891 */ 2892int 2893prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2894{ 2895 struct prison *pr; 2896 2897 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2898 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2899 2900 pr = cred->cr_prison; 2901 if (!(pr->pr_flags & PR_IP4)) 2902 return (0); 2903 mtx_lock(&pr->pr_mtx); 2904 if (!(pr->pr_flags & PR_IP4)) { 2905 mtx_unlock(&pr->pr_mtx); 2906 return (0); 2907 } 2908 if (pr->pr_ip4 == NULL) { 2909 mtx_unlock(&pr->pr_mtx); 2910 return (EAFNOSUPPORT); 2911 } 2912 2913 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2914 ia->s_addr = pr->pr_ip4[0].s_addr; 2915 mtx_unlock(&pr->pr_mtx); 2916 return (0); 2917 } 2918 2919 /* 2920 * Return success because nothing had to be changed. 2921 */ 2922 mtx_unlock(&pr->pr_mtx); 2923 return (0); 2924} 2925 2926/* 2927 * Check if given address belongs to the jail referenced by cred/prison. 2928 * 2929 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2930 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2931 * doesn't allow IPv4. Address passed in in NBO. 2932 */ 2933static int 2934_prison_check_ip4(struct prison *pr, struct in_addr *ia) 2935{ 2936 int i, a, z, d; 2937 2938 /* 2939 * Check the primary IP. 2940 */ 2941 if (pr->pr_ip4[0].s_addr == ia->s_addr) 2942 return (0); 2943 2944 /* 2945 * All the other IPs are sorted so we can do a binary search. 2946 */ 2947 a = 0; 2948 z = pr->pr_ip4s - 2; 2949 while (a <= z) { 2950 i = (a + z) / 2; 2951 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2952 if (d > 0) 2953 z = i - 1; 2954 else if (d < 0) 2955 a = i + 1; 2956 else 2957 return (0); 2958 } 2959 2960 return (EADDRNOTAVAIL); 2961} 2962 2963int 2964prison_check_ip4(struct ucred *cred, struct in_addr *ia) 2965{ 2966 struct prison *pr; 2967 int error; 2968 2969 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2970 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2971 2972 pr = cred->cr_prison; 2973 if (!(pr->pr_flags & PR_IP4)) 2974 return (0); 2975 mtx_lock(&pr->pr_mtx); 2976 if (!(pr->pr_flags & PR_IP4)) { 2977 mtx_unlock(&pr->pr_mtx); 2978 return (0); 2979 } 2980 if (pr->pr_ip4 == NULL) { 2981 mtx_unlock(&pr->pr_mtx); 2982 return (EAFNOSUPPORT); 2983 } 2984 2985 error = _prison_check_ip4(pr, ia); 2986 mtx_unlock(&pr->pr_mtx); 2987 return (error); 2988} 2989#endif 2990 2991#ifdef INET6 2992static int 2993prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2994{ 2995 int ii, ij, used; 2996 struct prison *ppr; 2997 2998 ppr = pr->pr_parent; 2999 if (!(pr->pr_flags & PR_IP6_USER)) { 3000 /* This has no user settings, so just copy the parent's list. */ 3001 if (pr->pr_ip6s < ppr->pr_ip6s) { 3002 /* 3003 * There's no room for the parent's list. Use the 3004 * new list buffer, which is assumed to be big enough 3005 * (if it was passed). If there's no buffer, try to 3006 * allocate one. 3007 */ 3008 used = 1; 3009 if (newip6 == NULL) { 3010 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 3011 M_PRISON, M_NOWAIT); 3012 if (newip6 != NULL) 3013 used = 0; 3014 } 3015 if (newip6 != NULL) { 3016 bcopy(ppr->pr_ip6, newip6, 3017 ppr->pr_ip6s * sizeof(*newip6)); 3018 free(pr->pr_ip6, M_PRISON); 3019 pr->pr_ip6 = newip6; 3020 pr->pr_ip6s = ppr->pr_ip6s; 3021 } 3022 return (used); 3023 } 3024 pr->pr_ip6s = ppr->pr_ip6s; 3025 if (pr->pr_ip6s > 0) 3026 bcopy(ppr->pr_ip6, pr->pr_ip6, 3027 pr->pr_ip6s * sizeof(*newip6)); 3028 else if (pr->pr_ip6 != NULL) { 3029 free(pr->pr_ip6, M_PRISON); 3030 pr->pr_ip6 = NULL; 3031 } 3032 } else if (pr->pr_ip6s > 0) { 3033 /* Remove addresses that aren't in the parent. */ 3034 for (ij = 0; ij < ppr->pr_ip6s; ij++) 3035 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 3036 &ppr->pr_ip6[ij])) 3037 break; 3038 if (ij < ppr->pr_ip6s) 3039 ii = 1; 3040 else { 3041 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 3042 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 3043 ii = 0; 3044 } 3045 for (ij = 1; ii < pr->pr_ip6s; ) { 3046 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 3047 &ppr->pr_ip6[0])) { 3048 ii++; 3049 continue; 3050 } 3051 switch (ij >= ppr->pr_ip4s ? -1 : 3052 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 3053 case -1: 3054 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 3055 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 3056 break; 3057 case 0: 3058 ii++; 3059 ij++; 3060 break; 3061 case 1: 3062 ij++; 3063 break; 3064 } 3065 } 3066 if (pr->pr_ip6s == 0) { 3067 pr->pr_flags |= PR_IP6_DISABLE; 3068 free(pr->pr_ip6, M_PRISON); 3069 pr->pr_ip6 = NULL; 3070 } 3071 } 3072 return 0; 3073} 3074 3075/* 3076 * Pass back primary IPv6 address for this jail. 3077 * 3078 * If not restricted return success but do not alter the address. Caller has 3079 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 3080 * 3081 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3082 */ 3083int 3084prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 3085{ 3086 struct prison *pr; 3087 3088 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3089 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3090 3091 pr = cred->cr_prison; 3092 if (!(pr->pr_flags & PR_IP6)) 3093 return (0); 3094 mtx_lock(&pr->pr_mtx); 3095 if (!(pr->pr_flags & PR_IP6)) { 3096 mtx_unlock(&pr->pr_mtx); 3097 return (0); 3098 } 3099 if (pr->pr_ip6 == NULL) { 3100 mtx_unlock(&pr->pr_mtx); 3101 return (EAFNOSUPPORT); 3102 } 3103 3104 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3105 mtx_unlock(&pr->pr_mtx); 3106 return (0); 3107} 3108 3109/* 3110 * Return 1 if we should do proper source address selection or are not jailed. 3111 * We will return 0 if we should bypass source address selection in favour 3112 * of the primary jail IPv6 address. Only in this case *ia will be updated and 3113 * returned in NBO. 3114 * Return EAFNOSUPPORT, in case this jail does not allow IPv6. 3115 */ 3116int 3117prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6) 3118{ 3119 struct prison *pr; 3120 struct in6_addr lia6; 3121 int error; 3122 3123 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3124 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3125 3126 if (!jailed(cred)) 3127 return (1); 3128 3129 pr = cred->cr_prison; 3130 if (pr->pr_flags & PR_IP6_SADDRSEL) 3131 return (1); 3132 3133 lia6 = in6addr_any; 3134 error = prison_get_ip6(cred, &lia6); 3135 if (error) 3136 return (error); 3137 if (IN6_IS_ADDR_UNSPECIFIED(&lia6)) 3138 return (1); 3139 3140 bcopy(&lia6, ia6, sizeof(struct in6_addr)); 3141 return (0); 3142} 3143 3144/* 3145 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 3146 */ 3147int 3148prison_equal_ip6(struct prison *pr1, struct prison *pr2) 3149{ 3150 3151 if (pr1 == pr2) 3152 return (1); 3153 3154 while (pr1 != &prison0 && 3155#ifdef VIMAGE 3156 !(pr1->pr_flags & PR_VNET) && 3157#endif 3158 !(pr1->pr_flags & PR_IP6_USER)) 3159 pr1 = pr1->pr_parent; 3160 while (pr2 != &prison0 && 3161#ifdef VIMAGE 3162 !(pr2->pr_flags & PR_VNET) && 3163#endif 3164 !(pr2->pr_flags & PR_IP6_USER)) 3165 pr2 = pr2->pr_parent; 3166 return (pr1 == pr2); 3167} 3168 3169/* 3170 * Make sure our (source) address is set to something meaningful to this jail. 3171 * 3172 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 3173 * when needed while binding. 3174 * 3175 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3176 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3177 * doesn't allow IPv6. 3178 */ 3179int 3180prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 3181{ 3182 struct prison *pr; 3183 int error; 3184 3185 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3186 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3187 3188 pr = cred->cr_prison; 3189 if (!(pr->pr_flags & PR_IP6)) 3190 return (0); 3191 mtx_lock(&pr->pr_mtx); 3192 if (!(pr->pr_flags & PR_IP6)) { 3193 mtx_unlock(&pr->pr_mtx); 3194 return (0); 3195 } 3196 if (pr->pr_ip6 == NULL) { 3197 mtx_unlock(&pr->pr_mtx); 3198 return (EAFNOSUPPORT); 3199 } 3200 3201 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3202 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3203 mtx_unlock(&pr->pr_mtx); 3204 return (0); 3205 } 3206 3207 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 3208 /* 3209 * In case there is only 1 IPv6 address, and v6only is true, 3210 * then bind directly. 3211 */ 3212 if (v6only != 0 && pr->pr_ip6s == 1) 3213 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3214 mtx_unlock(&pr->pr_mtx); 3215 return (0); 3216 } 3217 3218 error = _prison_check_ip6(pr, ia6); 3219 mtx_unlock(&pr->pr_mtx); 3220 return (error); 3221} 3222 3223/* 3224 * Rewrite destination address in case we will connect to loopback address. 3225 * 3226 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3227 */ 3228int 3229prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 3230{ 3231 struct prison *pr; 3232 3233 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3234 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3235 3236 pr = cred->cr_prison; 3237 if (!(pr->pr_flags & PR_IP6)) 3238 return (0); 3239 mtx_lock(&pr->pr_mtx); 3240 if (!(pr->pr_flags & PR_IP6)) { 3241 mtx_unlock(&pr->pr_mtx); 3242 return (0); 3243 } 3244 if (pr->pr_ip6 == NULL) { 3245 mtx_unlock(&pr->pr_mtx); 3246 return (EAFNOSUPPORT); 3247 } 3248 3249 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3250 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3251 mtx_unlock(&pr->pr_mtx); 3252 return (0); 3253 } 3254 3255 /* 3256 * Return success because nothing had to be changed. 3257 */ 3258 mtx_unlock(&pr->pr_mtx); 3259 return (0); 3260} 3261 3262/* 3263 * Check if given address belongs to the jail referenced by cred/prison. 3264 * 3265 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3266 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3267 * doesn't allow IPv6. 3268 */ 3269static int 3270_prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3271{ 3272 int i, a, z, d; 3273 3274 /* 3275 * Check the primary IP. 3276 */ 3277 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3278 return (0); 3279 3280 /* 3281 * All the other IPs are sorted so we can do a binary search. 3282 */ 3283 a = 0; 3284 z = pr->pr_ip6s - 2; 3285 while (a <= z) { 3286 i = (a + z) / 2; 3287 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3288 if (d > 0) 3289 z = i - 1; 3290 else if (d < 0) 3291 a = i + 1; 3292 else 3293 return (0); 3294 } 3295 3296 return (EADDRNOTAVAIL); 3297} 3298 3299int 3300prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3301{ 3302 struct prison *pr; 3303 int error; 3304 3305 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3306 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3307 3308 pr = cred->cr_prison; 3309 if (!(pr->pr_flags & PR_IP6)) 3310 return (0); 3311 mtx_lock(&pr->pr_mtx); 3312 if (!(pr->pr_flags & PR_IP6)) { 3313 mtx_unlock(&pr->pr_mtx); 3314 return (0); 3315 } 3316 if (pr->pr_ip6 == NULL) { 3317 mtx_unlock(&pr->pr_mtx); 3318 return (EAFNOSUPPORT); 3319 } 3320 3321 error = _prison_check_ip6(pr, ia6); 3322 mtx_unlock(&pr->pr_mtx); 3323 return (error); 3324} 3325#endif 3326 3327/* 3328 * Check if a jail supports the given address family. 3329 * 3330 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3331 * if not. 3332 */ 3333int 3334prison_check_af(struct ucred *cred, int af) 3335{ 3336 struct prison *pr; 3337 int error; 3338 3339 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3340 3341 pr = cred->cr_prison; 3342#ifdef VIMAGE 3343 /* Prisons with their own network stack are not limited. */ 3344 if (prison_owns_vnet(cred)) 3345 return (0); 3346#endif 3347 3348 error = 0; 3349 switch (af) 3350 { 3351#ifdef INET 3352 case AF_INET: 3353 if (pr->pr_flags & PR_IP4) 3354 { 3355 mtx_lock(&pr->pr_mtx); 3356 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3357 error = EAFNOSUPPORT; 3358 mtx_unlock(&pr->pr_mtx); 3359 } 3360 break; 3361#endif 3362#ifdef INET6 3363 case AF_INET6: 3364 if (pr->pr_flags & PR_IP6) 3365 { 3366 mtx_lock(&pr->pr_mtx); 3367 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3368 error = EAFNOSUPPORT; 3369 mtx_unlock(&pr->pr_mtx); 3370 } 3371 break; 3372#endif 3373 case AF_LOCAL: 3374 case AF_ROUTE: 3375 break; 3376 default: 3377 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3378 error = EAFNOSUPPORT; 3379 } 3380 return (error); 3381} 3382 3383/* 3384 * Check if given address belongs to the jail referenced by cred (wrapper to 3385 * prison_check_ip[46]). 3386 * 3387 * Returns 0 if jail doesn't restrict the address family or if address belongs 3388 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3389 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3390 */ 3391int 3392prison_if(struct ucred *cred, struct sockaddr *sa) 3393{ 3394#ifdef INET 3395 struct sockaddr_in *sai; 3396#endif 3397#ifdef INET6 3398 struct sockaddr_in6 *sai6; 3399#endif 3400 int error; 3401 3402 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3403 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3404 3405#ifdef VIMAGE 3406 if (prison_owns_vnet(cred)) 3407 return (0); 3408#endif 3409 3410 error = 0; 3411 switch (sa->sa_family) 3412 { 3413#ifdef INET 3414 case AF_INET: 3415 sai = (struct sockaddr_in *)sa; 3416 error = prison_check_ip4(cred, &sai->sin_addr); 3417 break; 3418#endif 3419#ifdef INET6 3420 case AF_INET6: 3421 sai6 = (struct sockaddr_in6 *)sa; 3422 error = prison_check_ip6(cred, &sai6->sin6_addr); 3423 break; 3424#endif 3425 default: 3426 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3427 error = EAFNOSUPPORT; 3428 } 3429 return (error); 3430} 3431 3432/* 3433 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3434 */ 3435int 3436prison_check(struct ucred *cred1, struct ucred *cred2) 3437{ 3438 3439 return ((cred1->cr_prison == cred2->cr_prison || 3440 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3441} 3442 3443/* 3444 * Return 1 if p2 is a child of p1, otherwise 0. 3445 */ 3446int 3447prison_ischild(struct prison *pr1, struct prison *pr2) 3448{ 3449 3450 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3451 if (pr1 == pr2) 3452 return (1); 3453 return (0); 3454} 3455 3456/* 3457 * Return 1 if the passed credential is in a jail, otherwise 0. 3458 */ 3459int 3460jailed(struct ucred *cred) 3461{ 3462 3463 return (cred->cr_prison != &prison0); 3464} 3465 3466/* 3467 * Return 1 if the passed credential is in a jail and that jail does not 3468 * have its own virtual network stack, otherwise 0. 3469 */ 3470int 3471jailed_without_vnet(struct ucred *cred) 3472{ 3473 3474 if (!jailed(cred)) 3475 return (0); 3476#ifdef VIMAGE 3477 if (prison_owns_vnet(cred)) 3478 return (0); 3479#endif 3480 3481 return (1); 3482} 3483 3484/* 3485 * Return the correct hostname (domainname, et al) for the passed credential. 3486 */ 3487void 3488getcredhostname(struct ucred *cred, char *buf, size_t size) 3489{ 3490 struct prison *pr; 3491 3492 /* 3493 * A NULL credential can be used to shortcut to the physical 3494 * system's hostname. 3495 */ 3496 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3497 mtx_lock(&pr->pr_mtx); 3498 strlcpy(buf, pr->pr_hostname, size); 3499 mtx_unlock(&pr->pr_mtx); 3500} 3501 3502void 3503getcreddomainname(struct ucred *cred, char *buf, size_t size) 3504{ 3505 3506 mtx_lock(&cred->cr_prison->pr_mtx); 3507 strlcpy(buf, cred->cr_prison->pr_domainname, size); 3508 mtx_unlock(&cred->cr_prison->pr_mtx); 3509} 3510 3511void 3512getcredhostuuid(struct ucred *cred, char *buf, size_t size) 3513{ 3514 3515 mtx_lock(&cred->cr_prison->pr_mtx); 3516 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 3517 mtx_unlock(&cred->cr_prison->pr_mtx); 3518} 3519 3520void 3521getcredhostid(struct ucred *cred, unsigned long *hostid) 3522{ 3523 3524 mtx_lock(&cred->cr_prison->pr_mtx); 3525 *hostid = cred->cr_prison->pr_hostid; 3526 mtx_unlock(&cred->cr_prison->pr_mtx); 3527} 3528 3529#ifdef VIMAGE 3530/* 3531 * Determine whether the prison represented by cred owns 3532 * its vnet rather than having it inherited. 3533 * 3534 * Returns 1 in case the prison owns the vnet, 0 otherwise. 3535 */ 3536int 3537prison_owns_vnet(struct ucred *cred) 3538{ 3539 3540 /* 3541 * vnets cannot be added/removed after jail creation, 3542 * so no need to lock here. 3543 */ 3544 return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0); 3545} 3546#endif 3547 3548/* 3549 * Determine whether the subject represented by cred can "see" 3550 * status of a mount point. 3551 * Returns: 0 for permitted, ENOENT otherwise. 3552 * XXX: This function should be called cr_canseemount() and should be 3553 * placed in kern_prot.c. 3554 */ 3555int 3556prison_canseemount(struct ucred *cred, struct mount *mp) 3557{ 3558 struct prison *pr; 3559 struct statfs *sp; 3560 size_t len; 3561 3562 pr = cred->cr_prison; 3563 if (pr->pr_enforce_statfs == 0) 3564 return (0); 3565 if (pr->pr_root->v_mount == mp) 3566 return (0); 3567 if (pr->pr_enforce_statfs == 2) 3568 return (ENOENT); 3569 /* 3570 * If jail's chroot directory is set to "/" we should be able to see 3571 * all mount-points from inside a jail. 3572 * This is ugly check, but this is the only situation when jail's 3573 * directory ends with '/'. 3574 */ 3575 if (strcmp(pr->pr_path, "/") == 0) 3576 return (0); 3577 len = strlen(pr->pr_path); 3578 sp = &mp->mnt_stat; 3579 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3580 return (ENOENT); 3581 /* 3582 * Be sure that we don't have situation where jail's root directory 3583 * is "/some/path" and mount point is "/some/pathpath". 3584 */ 3585 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3586 return (ENOENT); 3587 return (0); 3588} 3589 3590void 3591prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3592{ 3593 char jpath[MAXPATHLEN]; 3594 struct prison *pr; 3595 size_t len; 3596 3597 pr = cred->cr_prison; 3598 if (pr->pr_enforce_statfs == 0) 3599 return; 3600 if (prison_canseemount(cred, mp) != 0) { 3601 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3602 strlcpy(sp->f_mntonname, "[restricted]", 3603 sizeof(sp->f_mntonname)); 3604 return; 3605 } 3606 if (pr->pr_root->v_mount == mp) { 3607 /* 3608 * Clear current buffer data, so we are sure nothing from 3609 * the valid path left there. 3610 */ 3611 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3612 *sp->f_mntonname = '/'; 3613 return; 3614 } 3615 /* 3616 * If jail's chroot directory is set to "/" we should be able to see 3617 * all mount-points from inside a jail. 3618 */ 3619 if (strcmp(pr->pr_path, "/") == 0) 3620 return; 3621 len = strlen(pr->pr_path); 3622 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3623 /* 3624 * Clear current buffer data, so we are sure nothing from 3625 * the valid path left there. 3626 */ 3627 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3628 if (*jpath == '\0') { 3629 /* Should never happen. */ 3630 *sp->f_mntonname = '/'; 3631 } else { 3632 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3633 } 3634} 3635 3636/* 3637 * Check with permission for a specific privilege is granted within jail. We 3638 * have a specific list of accepted privileges; the rest are denied. 3639 */ 3640int 3641prison_priv_check(struct ucred *cred, int priv) 3642{ 3643 3644 if (!jailed(cred)) 3645 return (0); 3646 3647#ifdef VIMAGE 3648 /* 3649 * Privileges specific to prisons with a virtual network stack. 3650 * There might be a duplicate entry here in case the privilege 3651 * is only granted conditionally in the legacy jail case. 3652 */ 3653 switch (priv) { 3654#ifdef notyet 3655 /* 3656 * NFS-specific privileges. 3657 */ 3658 case PRIV_NFS_DAEMON: 3659 case PRIV_NFS_LOCKD: 3660#endif 3661 /* 3662 * Network stack privileges. 3663 */ 3664 case PRIV_NET_BRIDGE: 3665 case PRIV_NET_GRE: 3666 case PRIV_NET_BPF: 3667 case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ 3668 case PRIV_NET_ROUTE: 3669 case PRIV_NET_TAP: 3670 case PRIV_NET_SETIFMTU: 3671 case PRIV_NET_SETIFFLAGS: 3672 case PRIV_NET_SETIFCAP: 3673 case PRIV_NET_SETIFDESCR: 3674 case PRIV_NET_SETIFNAME : 3675 case PRIV_NET_SETIFMETRIC: 3676 case PRIV_NET_SETIFPHYS: 3677 case PRIV_NET_SETIFMAC: 3678 case PRIV_NET_ADDMULTI: 3679 case PRIV_NET_DELMULTI: 3680 case PRIV_NET_HWIOCTL: 3681 case PRIV_NET_SETLLADDR: 3682 case PRIV_NET_ADDIFGROUP: 3683 case PRIV_NET_DELIFGROUP: 3684 case PRIV_NET_IFCREATE: 3685 case PRIV_NET_IFDESTROY: 3686 case PRIV_NET_ADDIFADDR: 3687 case PRIV_NET_DELIFADDR: 3688 case PRIV_NET_LAGG: 3689 case PRIV_NET_GIF: 3690 case PRIV_NET_SETIFVNET: 3691 case PRIV_NET_SETIFFIB: 3692 3693 /* 3694 * 802.11-related privileges. 3695 */ 3696 case PRIV_NET80211_GETKEY: 3697#ifdef notyet 3698 case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ 3699#endif 3700 3701#ifdef notyet 3702 /* 3703 * AppleTalk privileges. 3704 */ 3705 case PRIV_NETATALK_RESERVEDPORT: 3706 3707 /* 3708 * ATM privileges. 3709 */ 3710 case PRIV_NETATM_CFG: 3711 case PRIV_NETATM_ADD: 3712 case PRIV_NETATM_DEL: 3713 case PRIV_NETATM_SET: 3714 3715 /* 3716 * Bluetooth privileges. 3717 */ 3718 case PRIV_NETBLUETOOTH_RAW: 3719#endif 3720 3721 /* 3722 * Netgraph and netgraph module privileges. 3723 */ 3724 case PRIV_NETGRAPH_CONTROL: 3725#ifdef notyet 3726 case PRIV_NETGRAPH_TTY: 3727#endif 3728 3729 /* 3730 * IPv4 and IPv6 privileges. 3731 */ 3732 case PRIV_NETINET_IPFW: 3733 case PRIV_NETINET_DIVERT: 3734 case PRIV_NETINET_PF: 3735 case PRIV_NETINET_DUMMYNET: 3736 case PRIV_NETINET_CARP: 3737 case PRIV_NETINET_MROUTE: 3738 case PRIV_NETINET_RAW: 3739 case PRIV_NETINET_ADDRCTRL6: 3740 case PRIV_NETINET_ND6: 3741 case PRIV_NETINET_SCOPE6: 3742 case PRIV_NETINET_ALIFETIME6: 3743 case PRIV_NETINET_IPSEC: 3744 case PRIV_NETINET_BINDANY: 3745 3746#ifdef notyet 3747 /* 3748 * IPX/SPX privileges. 3749 */ 3750 case PRIV_NETIPX_RESERVEDPORT: 3751 case PRIV_NETIPX_RAW: 3752 3753 /* 3754 * NCP privileges. 3755 */ 3756 case PRIV_NETNCP: 3757 3758 /* 3759 * SMB privileges. 3760 */ 3761 case PRIV_NETSMB: 3762#endif 3763 3764 /* 3765 * No default: or deny here. 3766 * In case of no permit fall through to next switch(). 3767 */ 3768 if (cred->cr_prison->pr_flags & PR_VNET) 3769 return (0); 3770 } 3771#endif /* VIMAGE */ 3772 3773 switch (priv) { 3774 3775 /* 3776 * Allow ktrace privileges for root in jail. 3777 */ 3778 case PRIV_KTRACE: 3779 3780#if 0 3781 /* 3782 * Allow jailed processes to configure audit identity and 3783 * submit audit records (login, etc). In the future we may 3784 * want to further refine the relationship between audit and 3785 * jail. 3786 */ 3787 case PRIV_AUDIT_GETAUDIT: 3788 case PRIV_AUDIT_SETAUDIT: 3789 case PRIV_AUDIT_SUBMIT: 3790#endif 3791 3792 /* 3793 * Allow jailed processes to manipulate process UNIX 3794 * credentials in any way they see fit. 3795 */ 3796 case PRIV_CRED_SETUID: 3797 case PRIV_CRED_SETEUID: 3798 case PRIV_CRED_SETGID: 3799 case PRIV_CRED_SETEGID: 3800 case PRIV_CRED_SETGROUPS: 3801 case PRIV_CRED_SETREUID: 3802 case PRIV_CRED_SETREGID: 3803 case PRIV_CRED_SETRESUID: 3804 case PRIV_CRED_SETRESGID: 3805 3806 /* 3807 * Jail implements visibility constraints already, so allow 3808 * jailed root to override uid/gid-based constraints. 3809 */ 3810 case PRIV_SEEOTHERGIDS: 3811 case PRIV_SEEOTHERUIDS: 3812 3813 /* 3814 * Jail implements inter-process debugging limits already, so 3815 * allow jailed root various debugging privileges. 3816 */ 3817 case PRIV_DEBUG_DIFFCRED: 3818 case PRIV_DEBUG_SUGID: 3819 case PRIV_DEBUG_UNPRIV: 3820 3821 /* 3822 * Allow jail to set various resource limits and login 3823 * properties, and for now, exceed process resource limits. 3824 */ 3825 case PRIV_PROC_LIMIT: 3826 case PRIV_PROC_SETLOGIN: 3827 case PRIV_PROC_SETRLIMIT: 3828 3829 /* 3830 * System V and POSIX IPC privileges are granted in jail. 3831 */ 3832 case PRIV_IPC_READ: 3833 case PRIV_IPC_WRITE: 3834 case PRIV_IPC_ADMIN: 3835 case PRIV_IPC_MSGSIZE: 3836 case PRIV_MQ_ADMIN: 3837 3838 /* 3839 * Jail operations within a jail work on child jails. 3840 */ 3841 case PRIV_JAIL_ATTACH: 3842 case PRIV_JAIL_SET: 3843 case PRIV_JAIL_REMOVE: 3844 3845 /* 3846 * Jail implements its own inter-process limits, so allow 3847 * root processes in jail to change scheduling on other 3848 * processes in the same jail. Likewise for signalling. 3849 */ 3850 case PRIV_SCHED_DIFFCRED: 3851 case PRIV_SCHED_CPUSET: 3852 case PRIV_SIGNAL_DIFFCRED: 3853 case PRIV_SIGNAL_SUGID: 3854 3855 /* 3856 * Allow jailed processes to write to sysctls marked as jail 3857 * writable. 3858 */ 3859 case PRIV_SYSCTL_WRITEJAIL: 3860 3861 /* 3862 * Allow root in jail to manage a variety of quota 3863 * properties. These should likely be conditional on a 3864 * configuration option. 3865 */ 3866 case PRIV_VFS_GETQUOTA: 3867 case PRIV_VFS_SETQUOTA: 3868 3869 /* 3870 * Since Jail relies on chroot() to implement file system 3871 * protections, grant many VFS privileges to root in jail. 3872 * Be careful to exclude mount-related and NFS-related 3873 * privileges. 3874 */ 3875 case PRIV_VFS_READ: 3876 case PRIV_VFS_WRITE: 3877 case PRIV_VFS_ADMIN: 3878 case PRIV_VFS_EXEC: 3879 case PRIV_VFS_LOOKUP: 3880 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3881 case PRIV_VFS_CHFLAGS_DEV: 3882 case PRIV_VFS_CHOWN: 3883 case PRIV_VFS_CHROOT: 3884 case PRIV_VFS_RETAINSUGID: 3885 case PRIV_VFS_FCHROOT: 3886 case PRIV_VFS_LINK: 3887 case PRIV_VFS_SETGID: 3888 case PRIV_VFS_STAT: 3889 case PRIV_VFS_STICKYFILE: 3890 return (0); 3891 3892 /* 3893 * Depending on the global setting, allow privilege of 3894 * setting system flags. 3895 */ 3896 case PRIV_VFS_SYSFLAGS: 3897 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3898 return (0); 3899 else 3900 return (EPERM); 3901 3902 /* 3903 * Depending on the global setting, allow privilege of 3904 * mounting/unmounting file systems. 3905 */ 3906 case PRIV_VFS_MOUNT: 3907 case PRIV_VFS_UNMOUNT: 3908 case PRIV_VFS_MOUNT_NONUSER: 3909 case PRIV_VFS_MOUNT_OWNER: 3910 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT && 3911 cred->cr_prison->pr_enforce_statfs < 2) 3912 return (0); 3913 else 3914 return (EPERM); 3915 3916 /* 3917 * Allow jailed root to bind reserved ports and reuse in-use 3918 * ports. 3919 */ 3920 case PRIV_NETINET_RESERVEDPORT: 3921 case PRIV_NETINET_REUSEPORT: 3922 return (0); 3923 3924 /* 3925 * Allow jailed root to set certian IPv4/6 (option) headers. 3926 */ 3927 case PRIV_NETINET_SETHDROPTS: 3928 return (0); 3929 3930 /* 3931 * Conditionally allow creating raw sockets in jail. 3932 */ 3933 case PRIV_NETINET_RAW: 3934 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3935 return (0); 3936 else 3937 return (EPERM); 3938 3939 /* 3940 * Since jail implements its own visibility limits on netstat 3941 * sysctls, allow getcred. This allows identd to work in 3942 * jail. 3943 */ 3944 case PRIV_NETINET_GETCRED: 3945 return (0); 3946 3947 /* 3948 * Allow jailed root to set loginclass. 3949 */ 3950 case PRIV_PROC_SETLOGINCLASS: 3951 return (0); 3952 3953 default: 3954 /* 3955 * In all remaining cases, deny the privilege request. This 3956 * includes almost all network privileges, many system 3957 * configuration privileges. 3958 */ 3959 return (EPERM); 3960 } 3961} 3962 3963/* 3964 * Return the part of pr2's name that is relative to pr1, or the whole name 3965 * if it does not directly follow. 3966 */ 3967 3968char * 3969prison_name(struct prison *pr1, struct prison *pr2) 3970{ 3971 char *name; 3972 3973 /* Jails see themselves as "0" (if they see themselves at all). */ 3974 if (pr1 == pr2) 3975 return "0"; 3976 name = pr2->pr_name; 3977 if (prison_ischild(pr1, pr2)) { 3978 /* 3979 * pr1 isn't locked (and allprison_lock may not be either) 3980 * so its length can't be counted on. But the number of dots 3981 * can be counted on - and counted. 3982 */ 3983 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3984 name = strchr(name, '.') + 1; 3985 } 3986 return (name); 3987} 3988 3989/* 3990 * Return the part of pr2's path that is relative to pr1, or the whole path 3991 * if it does not directly follow. 3992 */ 3993static char * 3994prison_path(struct prison *pr1, struct prison *pr2) 3995{ 3996 char *path1, *path2; 3997 int len1; 3998 3999 path1 = pr1->pr_path; 4000 path2 = pr2->pr_path; 4001 if (!strcmp(path1, "/")) 4002 return (path2); 4003 len1 = strlen(path1); 4004 if (strncmp(path1, path2, len1)) 4005 return (path2); 4006 if (path2[len1] == '\0') 4007 return "/"; 4008 if (path2[len1] == '/') 4009 return (path2 + len1); 4010 return (path2); 4011} 4012 4013 4014/* 4015 * Jail-related sysctls. 4016 */ 4017static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4018 "Jails"); 4019 4020static int 4021sysctl_jail_list(SYSCTL_HANDLER_ARGS) 4022{ 4023 struct xprison *xp; 4024 struct prison *pr, *cpr; 4025#ifdef INET 4026 struct in_addr *ip4 = NULL; 4027 int ip4s = 0; 4028#endif 4029#ifdef INET6 4030 struct in6_addr *ip6 = NULL; 4031 int ip6s = 0; 4032#endif 4033 int descend, error; 4034 4035 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 4036 pr = req->td->td_ucred->cr_prison; 4037 error = 0; 4038 sx_slock(&allprison_lock); 4039 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 4040#if defined(INET) || defined(INET6) 4041 again: 4042#endif 4043 mtx_lock(&cpr->pr_mtx); 4044#ifdef INET 4045 if (cpr->pr_ip4s > 0) { 4046 if (ip4s < cpr->pr_ip4s) { 4047 ip4s = cpr->pr_ip4s; 4048 mtx_unlock(&cpr->pr_mtx); 4049 ip4 = realloc(ip4, ip4s * 4050 sizeof(struct in_addr), M_TEMP, M_WAITOK); 4051 goto again; 4052 } 4053 bcopy(cpr->pr_ip4, ip4, 4054 cpr->pr_ip4s * sizeof(struct in_addr)); 4055 } 4056#endif 4057#ifdef INET6 4058 if (cpr->pr_ip6s > 0) { 4059 if (ip6s < cpr->pr_ip6s) { 4060 ip6s = cpr->pr_ip6s; 4061 mtx_unlock(&cpr->pr_mtx); 4062 ip6 = realloc(ip6, ip6s * 4063 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 4064 goto again; 4065 } 4066 bcopy(cpr->pr_ip6, ip6, 4067 cpr->pr_ip6s * sizeof(struct in6_addr)); 4068 } 4069#endif 4070 if (cpr->pr_ref == 0) { 4071 mtx_unlock(&cpr->pr_mtx); 4072 continue; 4073 } 4074 bzero(xp, sizeof(*xp)); 4075 xp->pr_version = XPRISON_VERSION; 4076 xp->pr_id = cpr->pr_id; 4077 xp->pr_state = cpr->pr_uref > 0 4078 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 4079 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 4080 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); 4081 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 4082#ifdef INET 4083 xp->pr_ip4s = cpr->pr_ip4s; 4084#endif 4085#ifdef INET6 4086 xp->pr_ip6s = cpr->pr_ip6s; 4087#endif 4088 mtx_unlock(&cpr->pr_mtx); 4089 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 4090 if (error) 4091 break; 4092#ifdef INET 4093 if (xp->pr_ip4s > 0) { 4094 error = SYSCTL_OUT(req, ip4, 4095 xp->pr_ip4s * sizeof(struct in_addr)); 4096 if (error) 4097 break; 4098 } 4099#endif 4100#ifdef INET6 4101 if (xp->pr_ip6s > 0) { 4102 error = SYSCTL_OUT(req, ip6, 4103 xp->pr_ip6s * sizeof(struct in6_addr)); 4104 if (error) 4105 break; 4106 } 4107#endif 4108 } 4109 sx_sunlock(&allprison_lock); 4110 free(xp, M_TEMP); 4111#ifdef INET 4112 free(ip4, M_TEMP); 4113#endif 4114#ifdef INET6 4115 free(ip6, M_TEMP); 4116#endif 4117 return (error); 4118} 4119 4120SYSCTL_OID(_security_jail, OID_AUTO, list, 4121 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4122 sysctl_jail_list, "S", "List of active jails"); 4123 4124static int 4125sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 4126{ 4127 int error, injail; 4128 4129 injail = jailed(req->td->td_ucred); 4130 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 4131 4132 return (error); 4133} 4134 4135SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 4136 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4137 sysctl_jail_jailed, "I", "Process in jail?"); 4138 4139#if defined(INET) || defined(INET6) 4140SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 4141 &jail_max_af_ips, 0, 4142 "Number of IP addresses a jail may have at most per address family"); 4143#endif 4144 4145/* 4146 * Default parameters for jail(2) compatability. For historical reasons, 4147 * the sysctl names have varying similarity to the parameter names. Prisons 4148 * just see their own parameters, and can't change them. 4149 */ 4150static int 4151sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 4152{ 4153 struct prison *pr; 4154 int allow, error, i; 4155 4156 pr = req->td->td_ucred->cr_prison; 4157 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 4158 4159 /* Get the current flag value, and convert it to a boolean. */ 4160 i = (allow & arg2) ? 1 : 0; 4161 if (arg1 != NULL) 4162 i = !i; 4163 error = sysctl_handle_int(oidp, &i, 0, req); 4164 if (error || !req->newptr) 4165 return (error); 4166 i = i ? arg2 : 0; 4167 if (arg1 != NULL) 4168 i ^= arg2; 4169 /* 4170 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 4171 * for writing. 4172 */ 4173 mtx_lock(&prison0.pr_mtx); 4174 jail_default_allow = (jail_default_allow & ~arg2) | i; 4175 mtx_unlock(&prison0.pr_mtx); 4176 return (0); 4177} 4178 4179SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 4180 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4181 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 4182 "Processes in jail can set their hostnames"); 4183SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 4184 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4185 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 4186 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 4187SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 4188 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4189 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 4190 "Processes in jail can use System V IPC primitives"); 4191SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 4192 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4193 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 4194 "Prison root can create raw sockets"); 4195SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 4196 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4197 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 4198 "Processes in jail can alter system file flags"); 4199SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 4200 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4201 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 4202 "Processes in jail can mount/unmount jail-friendly file systems"); 4203SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed, 4204 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4205 NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I", 4206 "Processes in jail can mount the devfs file system"); 4207SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed, 4208 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4209 NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I", 4210 "Processes in jail can mount the nullfs file system");
|