1303863Sstevek/*- 2303863Sstevek * Copyright (c) 1999 Poul-Henning Kamp. 3303863Sstevek * Copyright (c) 2008 Bjoern A. Zeeb. 4303863Sstevek * Copyright (c) 2009 James Gritton. 5303863Sstevek * All rights reserved. 6303863Sstevek * 7303863Sstevek * Redistribution and use in source and binary forms, with or without 8303863Sstevek * modification, are permitted provided that the following conditions 9303863Sstevek * are met: 10303863Sstevek * 1. Redistributions of source code must retain the above copyright 11303863Sstevek * notice, this list of conditions and the following disclaimer. 12303863Sstevek * 2. Redistributions in binary form must reproduce the above copyright 13303863Sstevek * notice, this list of conditions and the following disclaimer in the 14303863Sstevek * documentation and/or other materials provided with the distribution. 15303863Sstevek * 16303863Sstevek * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17303863Sstevek * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18303863Sstevek * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19303863Sstevek * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20303863Sstevek * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21303863Sstevek * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22303863Sstevek * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23303863Sstevek * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24303863Sstevek * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25303863Sstevek * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26303863Sstevek * SUCH DAMAGE. 27303863Sstevek */ 28303863Sstevek 29303863Sstevek#include <sys/cdefs.h> 30303863Sstevek__FBSDID("$FreeBSD: stable/11/sys/netinet/in_jail.c 316944 2017-04-14 22:02:08Z smh $"); 31303863Sstevek 32303863Sstevek#include "opt_compat.h" 33303863Sstevek#include "opt_ddb.h" 34303863Sstevek#include "opt_inet.h" 35303863Sstevek#include "opt_inet6.h" 36303863Sstevek 37303863Sstevek#include <sys/param.h> 38303863Sstevek#include <sys/types.h> 39303863Sstevek#include <sys/kernel.h> 40303863Sstevek#include <sys/systm.h> 41303863Sstevek#include <sys/errno.h> 42303863Sstevek#include <sys/sysproto.h> 43303863Sstevek#include <sys/malloc.h> 44303863Sstevek#include <sys/osd.h> 45303863Sstevek#include <sys/priv.h> 46303863Sstevek#include <sys/proc.h> 47303863Sstevek#include <sys/taskqueue.h> 48303863Sstevek#include <sys/fcntl.h> 49303863Sstevek#include <sys/jail.h> 50303863Sstevek#include <sys/lock.h> 51303863Sstevek#include <sys/mutex.h> 52303863Sstevek#include <sys/racct.h> 53303863Sstevek#include <sys/refcount.h> 54303863Sstevek#include <sys/sx.h> 55303863Sstevek#include <sys/sysent.h> 56303863Sstevek#include <sys/namei.h> 57303863Sstevek#include <sys/mount.h> 58303863Sstevek#include <sys/queue.h> 59303863Sstevek#include <sys/socket.h> 60303863Sstevek#include <sys/syscallsubr.h> 61303863Sstevek#include <sys/sysctl.h> 62303863Sstevek#include <sys/vnode.h> 63303863Sstevek 64303863Sstevek#include <net/if.h> 65303863Sstevek#include <net/vnet.h> 66303863Sstevek 67303863Sstevek#include <netinet/in.h> 68303863Sstevek 69303863Sstevekint 70303863Sstevekprison_qcmp_v4(const void *ip1, const void *ip2) 71303863Sstevek{ 72303863Sstevek in_addr_t iaa, iab; 73303863Sstevek 74303863Sstevek /* 75303863Sstevek * We need to compare in HBO here to get the list sorted as expected 76303863Sstevek * by the result of the code. Sorting NBO addresses gives you 77303863Sstevek * interesting results. If you do not understand, do not try. 78303863Sstevek */ 79303863Sstevek iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 80303863Sstevek iab = ntohl(((const struct in_addr *)ip2)->s_addr); 81303863Sstevek 82303863Sstevek /* 83303863Sstevek * Do not simply return the difference of the two numbers, the int is 84303863Sstevek * not wide enough. 85303863Sstevek */ 86303863Sstevek if (iaa > iab) 87303863Sstevek return (1); 88303863Sstevek else if (iaa < iab) 89303863Sstevek return (-1); 90303863Sstevek else 91303863Sstevek return (0); 92303863Sstevek} 93303863Sstevek 94303863Sstevek/* 95303863Sstevek * Restrict a prison's IP address list with its parent's, possibly replacing 96303863Sstevek * it. Return true if the replacement buffer was used (or would have been). 97303863Sstevek */ 98303863Sstevekint 99303863Sstevekprison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 100303863Sstevek{ 101303863Sstevek int ii, ij, used; 102303863Sstevek struct prison *ppr; 103303863Sstevek 104303863Sstevek ppr = pr->pr_parent; 105303863Sstevek if (!(pr->pr_flags & PR_IP4_USER)) { 106303863Sstevek /* This has no user settings, so just copy the parent's list. */ 107303863Sstevek if (pr->pr_ip4s < ppr->pr_ip4s) { 108303863Sstevek /* 109303863Sstevek * There's no room for the parent's list. Use the 110303863Sstevek * new list buffer, which is assumed to be big enough 111303863Sstevek * (if it was passed). If there's no buffer, try to 112303863Sstevek * allocate one. 113303863Sstevek */ 114303863Sstevek used = 1; 115303863Sstevek if (newip4 == NULL) { 116303863Sstevek newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 117303863Sstevek M_PRISON, M_NOWAIT); 118303863Sstevek if (newip4 != NULL) 119303863Sstevek used = 0; 120303863Sstevek } 121303863Sstevek if (newip4 != NULL) { 122303863Sstevek bcopy(ppr->pr_ip4, newip4, 123303863Sstevek ppr->pr_ip4s * sizeof(*newip4)); 124303863Sstevek free(pr->pr_ip4, M_PRISON); 125303863Sstevek pr->pr_ip4 = newip4; 126303863Sstevek pr->pr_ip4s = ppr->pr_ip4s; 127303863Sstevek } 128303863Sstevek return (used); 129303863Sstevek } 130303863Sstevek pr->pr_ip4s = ppr->pr_ip4s; 131303863Sstevek if (pr->pr_ip4s > 0) 132303863Sstevek bcopy(ppr->pr_ip4, pr->pr_ip4, 133303863Sstevek pr->pr_ip4s * sizeof(*newip4)); 134303863Sstevek else if (pr->pr_ip4 != NULL) { 135303863Sstevek free(pr->pr_ip4, M_PRISON); 136303863Sstevek pr->pr_ip4 = NULL; 137303863Sstevek } 138303863Sstevek } else if (pr->pr_ip4s > 0) { 139303863Sstevek /* Remove addresses that aren't in the parent. */ 140303863Sstevek for (ij = 0; ij < ppr->pr_ip4s; ij++) 141303863Sstevek if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 142303863Sstevek break; 143303863Sstevek if (ij < ppr->pr_ip4s) 144303863Sstevek ii = 1; 145303863Sstevek else { 146303863Sstevek bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 147303863Sstevek --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 148303863Sstevek ii = 0; 149303863Sstevek } 150303863Sstevek for (ij = 1; ii < pr->pr_ip4s; ) { 151303863Sstevek if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 152303863Sstevek ii++; 153303863Sstevek continue; 154303863Sstevek } 155303863Sstevek switch (ij >= ppr->pr_ip4s ? -1 : 156303863Sstevek prison_qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 157303863Sstevek case -1: 158303863Sstevek bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 159303863Sstevek (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 160303863Sstevek break; 161303863Sstevek case 0: 162303863Sstevek ii++; 163303863Sstevek ij++; 164303863Sstevek break; 165303863Sstevek case 1: 166303863Sstevek ij++; 167303863Sstevek break; 168303863Sstevek } 169303863Sstevek } 170303863Sstevek if (pr->pr_ip4s == 0) { 171303863Sstevek free(pr->pr_ip4, M_PRISON); 172303863Sstevek pr->pr_ip4 = NULL; 173303863Sstevek } 174303863Sstevek } 175303863Sstevek return (0); 176303863Sstevek} 177303863Sstevek 178303863Sstevek/* 179303863Sstevek * Pass back primary IPv4 address of this jail. 180303863Sstevek * 181303863Sstevek * If not restricted return success but do not alter the address. Caller has 182303863Sstevek * to make sure to initialize it correctly (e.g. INADDR_ANY). 183303863Sstevek * 184303863Sstevek * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 185303863Sstevek * Address returned in NBO. 186303863Sstevek */ 187303863Sstevekint 188303863Sstevekprison_get_ip4(struct ucred *cred, struct in_addr *ia) 189303863Sstevek{ 190303863Sstevek struct prison *pr; 191303863Sstevek 192303863Sstevek KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 193303863Sstevek KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 194303863Sstevek 195303863Sstevek pr = cred->cr_prison; 196303863Sstevek if (!(pr->pr_flags & PR_IP4)) 197303863Sstevek return (0); 198303863Sstevek mtx_lock(&pr->pr_mtx); 199303863Sstevek if (!(pr->pr_flags & PR_IP4)) { 200303863Sstevek mtx_unlock(&pr->pr_mtx); 201303863Sstevek return (0); 202303863Sstevek } 203303863Sstevek if (pr->pr_ip4 == NULL) { 204303863Sstevek mtx_unlock(&pr->pr_mtx); 205303863Sstevek return (EAFNOSUPPORT); 206303863Sstevek } 207303863Sstevek 208303863Sstevek ia->s_addr = pr->pr_ip4[0].s_addr; 209303863Sstevek mtx_unlock(&pr->pr_mtx); 210303863Sstevek return (0); 211303863Sstevek} 212303863Sstevek 213303863Sstevek/* 214303863Sstevek * Return 1 if we should do proper source address selection or are not jailed. 215303863Sstevek * We will return 0 if we should bypass source address selection in favour 216303863Sstevek * of the primary jail IPv4 address. Only in this case *ia will be updated and 217303863Sstevek * returned in NBO. 218303863Sstevek * Return EAFNOSUPPORT, in case this jail does not allow IPv4. 219303863Sstevek */ 220303863Sstevekint 221303863Sstevekprison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 222303863Sstevek{ 223303863Sstevek struct prison *pr; 224303863Sstevek struct in_addr lia; 225303863Sstevek int error; 226303863Sstevek 227303863Sstevek KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 228303863Sstevek KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 229303863Sstevek 230303863Sstevek if (!jailed(cred)) 231303863Sstevek return (1); 232303863Sstevek 233303863Sstevek pr = cred->cr_prison; 234303863Sstevek if (pr->pr_flags & PR_IP4_SADDRSEL) 235303863Sstevek return (1); 236303863Sstevek 237303863Sstevek lia.s_addr = INADDR_ANY; 238303863Sstevek error = prison_get_ip4(cred, &lia); 239303863Sstevek if (error) 240303863Sstevek return (error); 241303863Sstevek if (lia.s_addr == INADDR_ANY) 242303863Sstevek return (1); 243303863Sstevek 244303863Sstevek ia->s_addr = lia.s_addr; 245303863Sstevek return (0); 246303863Sstevek} 247303863Sstevek 248303863Sstevek/* 249303863Sstevek * Return true if pr1 and pr2 have the same IPv4 address restrictions. 250303863Sstevek */ 251303863Sstevekint 252303863Sstevekprison_equal_ip4(struct prison *pr1, struct prison *pr2) 253303863Sstevek{ 254303863Sstevek 255303863Sstevek if (pr1 == pr2) 256303863Sstevek return (1); 257303863Sstevek 258303863Sstevek /* 259303863Sstevek * No need to lock since the PR_IP4_USER flag can't be altered for 260303863Sstevek * existing prisons. 261303863Sstevek */ 262303863Sstevek while (pr1 != &prison0 && 263303863Sstevek#ifdef VIMAGE 264303863Sstevek !(pr1->pr_flags & PR_VNET) && 265303863Sstevek#endif 266303863Sstevek !(pr1->pr_flags & PR_IP4_USER)) 267303863Sstevek pr1 = pr1->pr_parent; 268303863Sstevek while (pr2 != &prison0 && 269303863Sstevek#ifdef VIMAGE 270303863Sstevek !(pr2->pr_flags & PR_VNET) && 271303863Sstevek#endif 272303863Sstevek !(pr2->pr_flags & PR_IP4_USER)) 273303863Sstevek pr2 = pr2->pr_parent; 274303863Sstevek return (pr1 == pr2); 275303863Sstevek} 276303863Sstevek 277303863Sstevek/* 278303863Sstevek * Make sure our (source) address is set to something meaningful to this 279303863Sstevek * jail. 280303863Sstevek * 281303863Sstevek * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 282303863Sstevek * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 283303863Sstevek * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 284303863Sstevek */ 285303863Sstevekint 286303863Sstevekprison_local_ip4(struct ucred *cred, struct in_addr *ia) 287303863Sstevek{ 288303863Sstevek struct prison *pr; 289303863Sstevek struct in_addr ia0; 290303863Sstevek int error; 291303863Sstevek 292303863Sstevek KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 293303863Sstevek KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 294303863Sstevek 295303863Sstevek pr = cred->cr_prison; 296303863Sstevek if (!(pr->pr_flags & PR_IP4)) 297303863Sstevek return (0); 298303863Sstevek mtx_lock(&pr->pr_mtx); 299303863Sstevek if (!(pr->pr_flags & PR_IP4)) { 300303863Sstevek mtx_unlock(&pr->pr_mtx); 301303863Sstevek return (0); 302303863Sstevek } 303303863Sstevek if (pr->pr_ip4 == NULL) { 304303863Sstevek mtx_unlock(&pr->pr_mtx); 305303863Sstevek return (EAFNOSUPPORT); 306303863Sstevek } 307303863Sstevek 308303863Sstevek ia0.s_addr = ntohl(ia->s_addr); 309303863Sstevek 310303863Sstevek if (ia0.s_addr == INADDR_ANY) { 311303863Sstevek /* 312303863Sstevek * In case there is only 1 IPv4 address, bind directly. 313303863Sstevek */ 314303863Sstevek if (pr->pr_ip4s == 1) 315303863Sstevek ia->s_addr = pr->pr_ip4[0].s_addr; 316303863Sstevek mtx_unlock(&pr->pr_mtx); 317303863Sstevek return (0); 318303863Sstevek } 319303863Sstevek 320303863Sstevek error = prison_check_ip4_locked(pr, ia); 321316944Ssmh if (error == EADDRNOTAVAIL && ia0.s_addr == INADDR_LOOPBACK) { 322316944Ssmh ia->s_addr = pr->pr_ip4[0].s_addr; 323316944Ssmh error = 0; 324316944Ssmh } 325316944Ssmh 326303863Sstevek mtx_unlock(&pr->pr_mtx); 327303863Sstevek return (error); 328303863Sstevek} 329303863Sstevek 330303863Sstevek/* 331303863Sstevek * Rewrite destination address in case we will connect to loopback address. 332303863Sstevek * 333303863Sstevek * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 334303863Sstevek * Address passed in in NBO and returned in NBO. 335303863Sstevek */ 336303863Sstevekint 337303863Sstevekprison_remote_ip4(struct ucred *cred, struct in_addr *ia) 338303863Sstevek{ 339303863Sstevek struct prison *pr; 340303863Sstevek 341303863Sstevek KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 342303863Sstevek KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 343303863Sstevek 344303863Sstevek pr = cred->cr_prison; 345303863Sstevek if (!(pr->pr_flags & PR_IP4)) 346303863Sstevek return (0); 347303863Sstevek mtx_lock(&pr->pr_mtx); 348303863Sstevek if (!(pr->pr_flags & PR_IP4)) { 349303863Sstevek mtx_unlock(&pr->pr_mtx); 350303863Sstevek return (0); 351303863Sstevek } 352303863Sstevek if (pr->pr_ip4 == NULL) { 353303863Sstevek mtx_unlock(&pr->pr_mtx); 354303863Sstevek return (EAFNOSUPPORT); 355303863Sstevek } 356303863Sstevek 357316944Ssmh if (ntohl(ia->s_addr) == INADDR_LOOPBACK && 358316944Ssmh prison_check_ip4_locked(pr, ia) == EADDRNOTAVAIL) { 359303863Sstevek ia->s_addr = pr->pr_ip4[0].s_addr; 360303863Sstevek mtx_unlock(&pr->pr_mtx); 361303863Sstevek return (0); 362303863Sstevek } 363303863Sstevek 364303863Sstevek /* 365303863Sstevek * Return success because nothing had to be changed. 366303863Sstevek */ 367303863Sstevek mtx_unlock(&pr->pr_mtx); 368303863Sstevek return (0); 369303863Sstevek} 370303863Sstevek 371303863Sstevek/* 372303863Sstevek * Check if given address belongs to the jail referenced by cred/prison. 373303863Sstevek * 374316944Ssmh * Returns 0 if address belongs to jail, 375316944Ssmh * EADDRNOTAVAIL if the address doesn't belong to the jail. 376303863Sstevek */ 377303863Sstevekint 378303863Sstevekprison_check_ip4_locked(const struct prison *pr, const struct in_addr *ia) 379303863Sstevek{ 380303863Sstevek int i, a, z, d; 381303863Sstevek 382303863Sstevek /* 383303863Sstevek * Check the primary IP. 384303863Sstevek */ 385303863Sstevek if (pr->pr_ip4[0].s_addr == ia->s_addr) 386303863Sstevek return (0); 387303863Sstevek 388303863Sstevek /* 389303863Sstevek * All the other IPs are sorted so we can do a binary search. 390303863Sstevek */ 391303863Sstevek a = 0; 392303863Sstevek z = pr->pr_ip4s - 2; 393303863Sstevek while (a <= z) { 394303863Sstevek i = (a + z) / 2; 395303863Sstevek d = prison_qcmp_v4(&pr->pr_ip4[i+1], ia); 396303863Sstevek if (d > 0) 397303863Sstevek z = i - 1; 398303863Sstevek else if (d < 0) 399303863Sstevek a = i + 1; 400303863Sstevek else 401303863Sstevek return (0); 402303863Sstevek } 403303863Sstevek 404303863Sstevek return (EADDRNOTAVAIL); 405303863Sstevek} 406303863Sstevek 407303863Sstevekint 408303863Sstevekprison_check_ip4(const struct ucred *cred, const struct in_addr *ia) 409303863Sstevek{ 410303863Sstevek struct prison *pr; 411303863Sstevek int error; 412303863Sstevek 413303863Sstevek KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 414303863Sstevek KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 415303863Sstevek 416303863Sstevek pr = cred->cr_prison; 417303863Sstevek if (!(pr->pr_flags & PR_IP4)) 418303863Sstevek return (0); 419303863Sstevek mtx_lock(&pr->pr_mtx); 420303863Sstevek if (!(pr->pr_flags & PR_IP4)) { 421303863Sstevek mtx_unlock(&pr->pr_mtx); 422303863Sstevek return (0); 423303863Sstevek } 424303863Sstevek if (pr->pr_ip4 == NULL) { 425303863Sstevek mtx_unlock(&pr->pr_mtx); 426303863Sstevek return (EAFNOSUPPORT); 427303863Sstevek } 428303863Sstevek 429303863Sstevek error = prison_check_ip4_locked(pr, ia); 430303863Sstevek mtx_unlock(&pr->pr_mtx); 431303863Sstevek return (error); 432303863Sstevek} 433