1303863Sstevek/*-
2303863Sstevek * Copyright (c) 1999 Poul-Henning Kamp.
3303863Sstevek * Copyright (c) 2008 Bjoern A. Zeeb.
4303863Sstevek * Copyright (c) 2009 James Gritton.
5303863Sstevek * All rights reserved.
6303863Sstevek *
7303863Sstevek * Redistribution and use in source and binary forms, with or without
8303863Sstevek * modification, are permitted provided that the following conditions
9303863Sstevek * are met:
10303863Sstevek * 1. Redistributions of source code must retain the above copyright
11303863Sstevek *    notice, this list of conditions and the following disclaimer.
12303863Sstevek * 2. Redistributions in binary form must reproduce the above copyright
13303863Sstevek *    notice, this list of conditions and the following disclaimer in the
14303863Sstevek *    documentation and/or other materials provided with the distribution.
15303863Sstevek *
16303863Sstevek * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17303863Sstevek * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18303863Sstevek * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19303863Sstevek * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20303863Sstevek * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21303863Sstevek * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22303863Sstevek * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23303863Sstevek * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24303863Sstevek * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25303863Sstevek * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26303863Sstevek * SUCH DAMAGE.
27303863Sstevek */
28303863Sstevek
29303863Sstevek#include <sys/cdefs.h>
30303863Sstevek__FBSDID("$FreeBSD: stable/11/sys/netinet/in_jail.c 316944 2017-04-14 22:02:08Z smh $");
31303863Sstevek
32303863Sstevek#include "opt_compat.h"
33303863Sstevek#include "opt_ddb.h"
34303863Sstevek#include "opt_inet.h"
35303863Sstevek#include "opt_inet6.h"
36303863Sstevek
37303863Sstevek#include <sys/param.h>
38303863Sstevek#include <sys/types.h>
39303863Sstevek#include <sys/kernel.h>
40303863Sstevek#include <sys/systm.h>
41303863Sstevek#include <sys/errno.h>
42303863Sstevek#include <sys/sysproto.h>
43303863Sstevek#include <sys/malloc.h>
44303863Sstevek#include <sys/osd.h>
45303863Sstevek#include <sys/priv.h>
46303863Sstevek#include <sys/proc.h>
47303863Sstevek#include <sys/taskqueue.h>
48303863Sstevek#include <sys/fcntl.h>
49303863Sstevek#include <sys/jail.h>
50303863Sstevek#include <sys/lock.h>
51303863Sstevek#include <sys/mutex.h>
52303863Sstevek#include <sys/racct.h>
53303863Sstevek#include <sys/refcount.h>
54303863Sstevek#include <sys/sx.h>
55303863Sstevek#include <sys/sysent.h>
56303863Sstevek#include <sys/namei.h>
57303863Sstevek#include <sys/mount.h>
58303863Sstevek#include <sys/queue.h>
59303863Sstevek#include <sys/socket.h>
60303863Sstevek#include <sys/syscallsubr.h>
61303863Sstevek#include <sys/sysctl.h>
62303863Sstevek#include <sys/vnode.h>
63303863Sstevek
64303863Sstevek#include <net/if.h>
65303863Sstevek#include <net/vnet.h>
66303863Sstevek
67303863Sstevek#include <netinet/in.h>
68303863Sstevek
69303863Sstevekint
70303863Sstevekprison_qcmp_v4(const void *ip1, const void *ip2)
71303863Sstevek{
72303863Sstevek	in_addr_t iaa, iab;
73303863Sstevek
74303863Sstevek	/*
75303863Sstevek	 * We need to compare in HBO here to get the list sorted as expected
76303863Sstevek	 * by the result of the code.  Sorting NBO addresses gives you
77303863Sstevek	 * interesting results.  If you do not understand, do not try.
78303863Sstevek	 */
79303863Sstevek	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
80303863Sstevek	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
81303863Sstevek
82303863Sstevek	/*
83303863Sstevek	 * Do not simply return the difference of the two numbers, the int is
84303863Sstevek	 * not wide enough.
85303863Sstevek	 */
86303863Sstevek	if (iaa > iab)
87303863Sstevek		return (1);
88303863Sstevek	else if (iaa < iab)
89303863Sstevek		return (-1);
90303863Sstevek	else
91303863Sstevek		return (0);
92303863Sstevek}
93303863Sstevek
94303863Sstevek/*
95303863Sstevek * Restrict a prison's IP address list with its parent's, possibly replacing
96303863Sstevek * it.  Return true if the replacement buffer was used (or would have been).
97303863Sstevek */
98303863Sstevekint
99303863Sstevekprison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
100303863Sstevek{
101303863Sstevek	int ii, ij, used;
102303863Sstevek	struct prison *ppr;
103303863Sstevek
104303863Sstevek	ppr = pr->pr_parent;
105303863Sstevek	if (!(pr->pr_flags & PR_IP4_USER)) {
106303863Sstevek		/* This has no user settings, so just copy the parent's list. */
107303863Sstevek		if (pr->pr_ip4s < ppr->pr_ip4s) {
108303863Sstevek			/*
109303863Sstevek			 * There's no room for the parent's list.  Use the
110303863Sstevek			 * new list buffer, which is assumed to be big enough
111303863Sstevek			 * (if it was passed).  If there's no buffer, try to
112303863Sstevek			 * allocate one.
113303863Sstevek			 */
114303863Sstevek			used = 1;
115303863Sstevek			if (newip4 == NULL) {
116303863Sstevek				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
117303863Sstevek				    M_PRISON, M_NOWAIT);
118303863Sstevek				if (newip4 != NULL)
119303863Sstevek					used = 0;
120303863Sstevek			}
121303863Sstevek			if (newip4 != NULL) {
122303863Sstevek				bcopy(ppr->pr_ip4, newip4,
123303863Sstevek				    ppr->pr_ip4s * sizeof(*newip4));
124303863Sstevek				free(pr->pr_ip4, M_PRISON);
125303863Sstevek				pr->pr_ip4 = newip4;
126303863Sstevek				pr->pr_ip4s = ppr->pr_ip4s;
127303863Sstevek			}
128303863Sstevek			return (used);
129303863Sstevek		}
130303863Sstevek		pr->pr_ip4s = ppr->pr_ip4s;
131303863Sstevek		if (pr->pr_ip4s > 0)
132303863Sstevek			bcopy(ppr->pr_ip4, pr->pr_ip4,
133303863Sstevek			    pr->pr_ip4s * sizeof(*newip4));
134303863Sstevek		else if (pr->pr_ip4 != NULL) {
135303863Sstevek			free(pr->pr_ip4, M_PRISON);
136303863Sstevek			pr->pr_ip4 = NULL;
137303863Sstevek		}
138303863Sstevek	} else if (pr->pr_ip4s > 0) {
139303863Sstevek		/* Remove addresses that aren't in the parent. */
140303863Sstevek		for (ij = 0; ij < ppr->pr_ip4s; ij++)
141303863Sstevek			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
142303863Sstevek				break;
143303863Sstevek		if (ij < ppr->pr_ip4s)
144303863Sstevek			ii = 1;
145303863Sstevek		else {
146303863Sstevek			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
147303863Sstevek			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
148303863Sstevek			ii = 0;
149303863Sstevek		}
150303863Sstevek		for (ij = 1; ii < pr->pr_ip4s; ) {
151303863Sstevek			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
152303863Sstevek				ii++;
153303863Sstevek				continue;
154303863Sstevek			}
155303863Sstevek			switch (ij >= ppr->pr_ip4s ? -1 :
156303863Sstevek				prison_qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
157303863Sstevek			case -1:
158303863Sstevek				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
159303863Sstevek				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
160303863Sstevek				break;
161303863Sstevek			case 0:
162303863Sstevek				ii++;
163303863Sstevek				ij++;
164303863Sstevek				break;
165303863Sstevek			case 1:
166303863Sstevek				ij++;
167303863Sstevek				break;
168303863Sstevek			}
169303863Sstevek		}
170303863Sstevek		if (pr->pr_ip4s == 0) {
171303863Sstevek			free(pr->pr_ip4, M_PRISON);
172303863Sstevek			pr->pr_ip4 = NULL;
173303863Sstevek		}
174303863Sstevek	}
175303863Sstevek	return (0);
176303863Sstevek}
177303863Sstevek
178303863Sstevek/*
179303863Sstevek * Pass back primary IPv4 address of this jail.
180303863Sstevek *
181303863Sstevek * If not restricted return success but do not alter the address.  Caller has
182303863Sstevek * to make sure to initialize it correctly (e.g. INADDR_ANY).
183303863Sstevek *
184303863Sstevek * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
185303863Sstevek * Address returned in NBO.
186303863Sstevek */
187303863Sstevekint
188303863Sstevekprison_get_ip4(struct ucred *cred, struct in_addr *ia)
189303863Sstevek{
190303863Sstevek	struct prison *pr;
191303863Sstevek
192303863Sstevek	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
193303863Sstevek	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
194303863Sstevek
195303863Sstevek	pr = cred->cr_prison;
196303863Sstevek	if (!(pr->pr_flags & PR_IP4))
197303863Sstevek		return (0);
198303863Sstevek	mtx_lock(&pr->pr_mtx);
199303863Sstevek	if (!(pr->pr_flags & PR_IP4)) {
200303863Sstevek		mtx_unlock(&pr->pr_mtx);
201303863Sstevek		return (0);
202303863Sstevek	}
203303863Sstevek	if (pr->pr_ip4 == NULL) {
204303863Sstevek		mtx_unlock(&pr->pr_mtx);
205303863Sstevek		return (EAFNOSUPPORT);
206303863Sstevek	}
207303863Sstevek
208303863Sstevek	ia->s_addr = pr->pr_ip4[0].s_addr;
209303863Sstevek	mtx_unlock(&pr->pr_mtx);
210303863Sstevek	return (0);
211303863Sstevek}
212303863Sstevek
213303863Sstevek/*
214303863Sstevek * Return 1 if we should do proper source address selection or are not jailed.
215303863Sstevek * We will return 0 if we should bypass source address selection in favour
216303863Sstevek * of the primary jail IPv4 address. Only in this case *ia will be updated and
217303863Sstevek * returned in NBO.
218303863Sstevek * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
219303863Sstevek */
220303863Sstevekint
221303863Sstevekprison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
222303863Sstevek{
223303863Sstevek	struct prison *pr;
224303863Sstevek	struct in_addr lia;
225303863Sstevek	int error;
226303863Sstevek
227303863Sstevek	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
228303863Sstevek	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
229303863Sstevek
230303863Sstevek	if (!jailed(cred))
231303863Sstevek		return (1);
232303863Sstevek
233303863Sstevek	pr = cred->cr_prison;
234303863Sstevek	if (pr->pr_flags & PR_IP4_SADDRSEL)
235303863Sstevek		return (1);
236303863Sstevek
237303863Sstevek	lia.s_addr = INADDR_ANY;
238303863Sstevek	error = prison_get_ip4(cred, &lia);
239303863Sstevek	if (error)
240303863Sstevek		return (error);
241303863Sstevek	if (lia.s_addr == INADDR_ANY)
242303863Sstevek		return (1);
243303863Sstevek
244303863Sstevek	ia->s_addr = lia.s_addr;
245303863Sstevek	return (0);
246303863Sstevek}
247303863Sstevek
248303863Sstevek/*
249303863Sstevek * Return true if pr1 and pr2 have the same IPv4 address restrictions.
250303863Sstevek */
251303863Sstevekint
252303863Sstevekprison_equal_ip4(struct prison *pr1, struct prison *pr2)
253303863Sstevek{
254303863Sstevek
255303863Sstevek	if (pr1 == pr2)
256303863Sstevek		return (1);
257303863Sstevek
258303863Sstevek	/*
259303863Sstevek	 * No need to lock since the PR_IP4_USER flag can't be altered for
260303863Sstevek	 * existing prisons.
261303863Sstevek	 */
262303863Sstevek	while (pr1 != &prison0 &&
263303863Sstevek#ifdef VIMAGE
264303863Sstevek	       !(pr1->pr_flags & PR_VNET) &&
265303863Sstevek#endif
266303863Sstevek	       !(pr1->pr_flags & PR_IP4_USER))
267303863Sstevek		pr1 = pr1->pr_parent;
268303863Sstevek	while (pr2 != &prison0 &&
269303863Sstevek#ifdef VIMAGE
270303863Sstevek	       !(pr2->pr_flags & PR_VNET) &&
271303863Sstevek#endif
272303863Sstevek	       !(pr2->pr_flags & PR_IP4_USER))
273303863Sstevek		pr2 = pr2->pr_parent;
274303863Sstevek	return (pr1 == pr2);
275303863Sstevek}
276303863Sstevek
277303863Sstevek/*
278303863Sstevek * Make sure our (source) address is set to something meaningful to this
279303863Sstevek * jail.
280303863Sstevek *
281303863Sstevek * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
282303863Sstevek * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
283303863Sstevek * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
284303863Sstevek */
285303863Sstevekint
286303863Sstevekprison_local_ip4(struct ucred *cred, struct in_addr *ia)
287303863Sstevek{
288303863Sstevek	struct prison *pr;
289303863Sstevek	struct in_addr ia0;
290303863Sstevek	int error;
291303863Sstevek
292303863Sstevek	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
293303863Sstevek	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
294303863Sstevek
295303863Sstevek	pr = cred->cr_prison;
296303863Sstevek	if (!(pr->pr_flags & PR_IP4))
297303863Sstevek		return (0);
298303863Sstevek	mtx_lock(&pr->pr_mtx);
299303863Sstevek	if (!(pr->pr_flags & PR_IP4)) {
300303863Sstevek		mtx_unlock(&pr->pr_mtx);
301303863Sstevek		return (0);
302303863Sstevek	}
303303863Sstevek	if (pr->pr_ip4 == NULL) {
304303863Sstevek		mtx_unlock(&pr->pr_mtx);
305303863Sstevek		return (EAFNOSUPPORT);
306303863Sstevek	}
307303863Sstevek
308303863Sstevek	ia0.s_addr = ntohl(ia->s_addr);
309303863Sstevek
310303863Sstevek	if (ia0.s_addr == INADDR_ANY) {
311303863Sstevek		/*
312303863Sstevek		 * In case there is only 1 IPv4 address, bind directly.
313303863Sstevek		 */
314303863Sstevek		if (pr->pr_ip4s == 1)
315303863Sstevek			ia->s_addr = pr->pr_ip4[0].s_addr;
316303863Sstevek		mtx_unlock(&pr->pr_mtx);
317303863Sstevek		return (0);
318303863Sstevek	}
319303863Sstevek
320303863Sstevek	error = prison_check_ip4_locked(pr, ia);
321316944Ssmh	if (error == EADDRNOTAVAIL && ia0.s_addr == INADDR_LOOPBACK) {
322316944Ssmh		ia->s_addr = pr->pr_ip4[0].s_addr;
323316944Ssmh		error = 0;
324316944Ssmh	}
325316944Ssmh
326303863Sstevek	mtx_unlock(&pr->pr_mtx);
327303863Sstevek	return (error);
328303863Sstevek}
329303863Sstevek
330303863Sstevek/*
331303863Sstevek * Rewrite destination address in case we will connect to loopback address.
332303863Sstevek *
333303863Sstevek * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
334303863Sstevek * Address passed in in NBO and returned in NBO.
335303863Sstevek */
336303863Sstevekint
337303863Sstevekprison_remote_ip4(struct ucred *cred, struct in_addr *ia)
338303863Sstevek{
339303863Sstevek	struct prison *pr;
340303863Sstevek
341303863Sstevek	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
342303863Sstevek	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
343303863Sstevek
344303863Sstevek	pr = cred->cr_prison;
345303863Sstevek	if (!(pr->pr_flags & PR_IP4))
346303863Sstevek		return (0);
347303863Sstevek	mtx_lock(&pr->pr_mtx);
348303863Sstevek	if (!(pr->pr_flags & PR_IP4)) {
349303863Sstevek		mtx_unlock(&pr->pr_mtx);
350303863Sstevek		return (0);
351303863Sstevek	}
352303863Sstevek	if (pr->pr_ip4 == NULL) {
353303863Sstevek		mtx_unlock(&pr->pr_mtx);
354303863Sstevek		return (EAFNOSUPPORT);
355303863Sstevek	}
356303863Sstevek
357316944Ssmh	if (ntohl(ia->s_addr) == INADDR_LOOPBACK &&
358316944Ssmh	    prison_check_ip4_locked(pr, ia) == EADDRNOTAVAIL) {
359303863Sstevek		ia->s_addr = pr->pr_ip4[0].s_addr;
360303863Sstevek		mtx_unlock(&pr->pr_mtx);
361303863Sstevek		return (0);
362303863Sstevek	}
363303863Sstevek
364303863Sstevek	/*
365303863Sstevek	 * Return success because nothing had to be changed.
366303863Sstevek	 */
367303863Sstevek	mtx_unlock(&pr->pr_mtx);
368303863Sstevek	return (0);
369303863Sstevek}
370303863Sstevek
371303863Sstevek/*
372303863Sstevek * Check if given address belongs to the jail referenced by cred/prison.
373303863Sstevek *
374316944Ssmh * Returns 0 if address belongs to jail,
375316944Ssmh * EADDRNOTAVAIL if the address doesn't belong to the jail.
376303863Sstevek */
377303863Sstevekint
378303863Sstevekprison_check_ip4_locked(const struct prison *pr, const struct in_addr *ia)
379303863Sstevek{
380303863Sstevek	int i, a, z, d;
381303863Sstevek
382303863Sstevek	/*
383303863Sstevek	 * Check the primary IP.
384303863Sstevek	 */
385303863Sstevek	if (pr->pr_ip4[0].s_addr == ia->s_addr)
386303863Sstevek		return (0);
387303863Sstevek
388303863Sstevek	/*
389303863Sstevek	 * All the other IPs are sorted so we can do a binary search.
390303863Sstevek	 */
391303863Sstevek	a = 0;
392303863Sstevek	z = pr->pr_ip4s - 2;
393303863Sstevek	while (a <= z) {
394303863Sstevek		i = (a + z) / 2;
395303863Sstevek		d = prison_qcmp_v4(&pr->pr_ip4[i+1], ia);
396303863Sstevek		if (d > 0)
397303863Sstevek			z = i - 1;
398303863Sstevek		else if (d < 0)
399303863Sstevek			a = i + 1;
400303863Sstevek		else
401303863Sstevek			return (0);
402303863Sstevek	}
403303863Sstevek
404303863Sstevek	return (EADDRNOTAVAIL);
405303863Sstevek}
406303863Sstevek
407303863Sstevekint
408303863Sstevekprison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
409303863Sstevek{
410303863Sstevek	struct prison *pr;
411303863Sstevek	int error;
412303863Sstevek
413303863Sstevek	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
414303863Sstevek	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
415303863Sstevek
416303863Sstevek	pr = cred->cr_prison;
417303863Sstevek	if (!(pr->pr_flags & PR_IP4))
418303863Sstevek		return (0);
419303863Sstevek	mtx_lock(&pr->pr_mtx);
420303863Sstevek	if (!(pr->pr_flags & PR_IP4)) {
421303863Sstevek		mtx_unlock(&pr->pr_mtx);
422303863Sstevek		return (0);
423303863Sstevek	}
424303863Sstevek	if (pr->pr_ip4 == NULL) {
425303863Sstevek		mtx_unlock(&pr->pr_mtx);
426303863Sstevek		return (EAFNOSUPPORT);
427303863Sstevek	}
428303863Sstevek
429303863Sstevek	error = prison_check_ip4_locked(pr, ia);
430303863Sstevek	mtx_unlock(&pr->pr_mtx);
431303863Sstevek	return (error);
432303863Sstevek}
433