1/*-
2 * Copyright (c) 1999 Poul-Henning Kamp.
3 * Copyright (c) 2008 Bjoern A. Zeeb.
4 * Copyright (c) 2009 James Gritton.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/netinet/in_jail.c 316944 2017-04-14 22:02:08Z smh $");
31
32#include "opt_compat.h"
33#include "opt_ddb.h"
34#include "opt_inet.h"
35#include "opt_inet6.h"
36
37#include <sys/param.h>
38#include <sys/types.h>
39#include <sys/kernel.h>
40#include <sys/systm.h>
41#include <sys/errno.h>
42#include <sys/sysproto.h>
43#include <sys/malloc.h>
44#include <sys/osd.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/taskqueue.h>
48#include <sys/fcntl.h>
49#include <sys/jail.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/racct.h>
53#include <sys/refcount.h>
54#include <sys/sx.h>
55#include <sys/sysent.h>
56#include <sys/namei.h>
57#include <sys/mount.h>
58#include <sys/queue.h>
59#include <sys/socket.h>
60#include <sys/syscallsubr.h>
61#include <sys/sysctl.h>
62#include <sys/vnode.h>
63
64#include <net/if.h>
65#include <net/vnet.h>
66
67#include <netinet/in.h>
68
69int
70prison_qcmp_v4(const void *ip1, const void *ip2)
71{
72	in_addr_t iaa, iab;
73
74	/*
75	 * We need to compare in HBO here to get the list sorted as expected
76	 * by the result of the code.  Sorting NBO addresses gives you
77	 * interesting results.  If you do not understand, do not try.
78	 */
79	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
80	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
81
82	/*
83	 * Do not simply return the difference of the two numbers, the int is
84	 * not wide enough.
85	 */
86	if (iaa > iab)
87		return (1);
88	else if (iaa < iab)
89		return (-1);
90	else
91		return (0);
92}
93
94/*
95 * Restrict a prison's IP address list with its parent's, possibly replacing
96 * it.  Return true if the replacement buffer was used (or would have been).
97 */
98int
99prison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
100{
101	int ii, ij, used;
102	struct prison *ppr;
103
104	ppr = pr->pr_parent;
105	if (!(pr->pr_flags & PR_IP4_USER)) {
106		/* This has no user settings, so just copy the parent's list. */
107		if (pr->pr_ip4s < ppr->pr_ip4s) {
108			/*
109			 * There's no room for the parent's list.  Use the
110			 * new list buffer, which is assumed to be big enough
111			 * (if it was passed).  If there's no buffer, try to
112			 * allocate one.
113			 */
114			used = 1;
115			if (newip4 == NULL) {
116				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
117				    M_PRISON, M_NOWAIT);
118				if (newip4 != NULL)
119					used = 0;
120			}
121			if (newip4 != NULL) {
122				bcopy(ppr->pr_ip4, newip4,
123				    ppr->pr_ip4s * sizeof(*newip4));
124				free(pr->pr_ip4, M_PRISON);
125				pr->pr_ip4 = newip4;
126				pr->pr_ip4s = ppr->pr_ip4s;
127			}
128			return (used);
129		}
130		pr->pr_ip4s = ppr->pr_ip4s;
131		if (pr->pr_ip4s > 0)
132			bcopy(ppr->pr_ip4, pr->pr_ip4,
133			    pr->pr_ip4s * sizeof(*newip4));
134		else if (pr->pr_ip4 != NULL) {
135			free(pr->pr_ip4, M_PRISON);
136			pr->pr_ip4 = NULL;
137		}
138	} else if (pr->pr_ip4s > 0) {
139		/* Remove addresses that aren't in the parent. */
140		for (ij = 0; ij < ppr->pr_ip4s; ij++)
141			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
142				break;
143		if (ij < ppr->pr_ip4s)
144			ii = 1;
145		else {
146			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
147			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
148			ii = 0;
149		}
150		for (ij = 1; ii < pr->pr_ip4s; ) {
151			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
152				ii++;
153				continue;
154			}
155			switch (ij >= ppr->pr_ip4s ? -1 :
156				prison_qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
157			case -1:
158				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
159				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
160				break;
161			case 0:
162				ii++;
163				ij++;
164				break;
165			case 1:
166				ij++;
167				break;
168			}
169		}
170		if (pr->pr_ip4s == 0) {
171			free(pr->pr_ip4, M_PRISON);
172			pr->pr_ip4 = NULL;
173		}
174	}
175	return (0);
176}
177
178/*
179 * Pass back primary IPv4 address of this jail.
180 *
181 * If not restricted return success but do not alter the address.  Caller has
182 * to make sure to initialize it correctly (e.g. INADDR_ANY).
183 *
184 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
185 * Address returned in NBO.
186 */
187int
188prison_get_ip4(struct ucred *cred, struct in_addr *ia)
189{
190	struct prison *pr;
191
192	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
193	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
194
195	pr = cred->cr_prison;
196	if (!(pr->pr_flags & PR_IP4))
197		return (0);
198	mtx_lock(&pr->pr_mtx);
199	if (!(pr->pr_flags & PR_IP4)) {
200		mtx_unlock(&pr->pr_mtx);
201		return (0);
202	}
203	if (pr->pr_ip4 == NULL) {
204		mtx_unlock(&pr->pr_mtx);
205		return (EAFNOSUPPORT);
206	}
207
208	ia->s_addr = pr->pr_ip4[0].s_addr;
209	mtx_unlock(&pr->pr_mtx);
210	return (0);
211}
212
213/*
214 * Return 1 if we should do proper source address selection or are not jailed.
215 * We will return 0 if we should bypass source address selection in favour
216 * of the primary jail IPv4 address. Only in this case *ia will be updated and
217 * returned in NBO.
218 * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
219 */
220int
221prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
222{
223	struct prison *pr;
224	struct in_addr lia;
225	int error;
226
227	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
228	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
229
230	if (!jailed(cred))
231		return (1);
232
233	pr = cred->cr_prison;
234	if (pr->pr_flags & PR_IP4_SADDRSEL)
235		return (1);
236
237	lia.s_addr = INADDR_ANY;
238	error = prison_get_ip4(cred, &lia);
239	if (error)
240		return (error);
241	if (lia.s_addr == INADDR_ANY)
242		return (1);
243
244	ia->s_addr = lia.s_addr;
245	return (0);
246}
247
248/*
249 * Return true if pr1 and pr2 have the same IPv4 address restrictions.
250 */
251int
252prison_equal_ip4(struct prison *pr1, struct prison *pr2)
253{
254
255	if (pr1 == pr2)
256		return (1);
257
258	/*
259	 * No need to lock since the PR_IP4_USER flag can't be altered for
260	 * existing prisons.
261	 */
262	while (pr1 != &prison0 &&
263#ifdef VIMAGE
264	       !(pr1->pr_flags & PR_VNET) &&
265#endif
266	       !(pr1->pr_flags & PR_IP4_USER))
267		pr1 = pr1->pr_parent;
268	while (pr2 != &prison0 &&
269#ifdef VIMAGE
270	       !(pr2->pr_flags & PR_VNET) &&
271#endif
272	       !(pr2->pr_flags & PR_IP4_USER))
273		pr2 = pr2->pr_parent;
274	return (pr1 == pr2);
275}
276
277/*
278 * Make sure our (source) address is set to something meaningful to this
279 * jail.
280 *
281 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
282 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
283 * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
284 */
285int
286prison_local_ip4(struct ucred *cred, struct in_addr *ia)
287{
288	struct prison *pr;
289	struct in_addr ia0;
290	int error;
291
292	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
293	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
294
295	pr = cred->cr_prison;
296	if (!(pr->pr_flags & PR_IP4))
297		return (0);
298	mtx_lock(&pr->pr_mtx);
299	if (!(pr->pr_flags & PR_IP4)) {
300		mtx_unlock(&pr->pr_mtx);
301		return (0);
302	}
303	if (pr->pr_ip4 == NULL) {
304		mtx_unlock(&pr->pr_mtx);
305		return (EAFNOSUPPORT);
306	}
307
308	ia0.s_addr = ntohl(ia->s_addr);
309
310	if (ia0.s_addr == INADDR_ANY) {
311		/*
312		 * In case there is only 1 IPv4 address, bind directly.
313		 */
314		if (pr->pr_ip4s == 1)
315			ia->s_addr = pr->pr_ip4[0].s_addr;
316		mtx_unlock(&pr->pr_mtx);
317		return (0);
318	}
319
320	error = prison_check_ip4_locked(pr, ia);
321	if (error == EADDRNOTAVAIL && ia0.s_addr == INADDR_LOOPBACK) {
322		ia->s_addr = pr->pr_ip4[0].s_addr;
323		error = 0;
324	}
325
326	mtx_unlock(&pr->pr_mtx);
327	return (error);
328}
329
330/*
331 * Rewrite destination address in case we will connect to loopback address.
332 *
333 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
334 * Address passed in in NBO and returned in NBO.
335 */
336int
337prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
338{
339	struct prison *pr;
340
341	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
342	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
343
344	pr = cred->cr_prison;
345	if (!(pr->pr_flags & PR_IP4))
346		return (0);
347	mtx_lock(&pr->pr_mtx);
348	if (!(pr->pr_flags & PR_IP4)) {
349		mtx_unlock(&pr->pr_mtx);
350		return (0);
351	}
352	if (pr->pr_ip4 == NULL) {
353		mtx_unlock(&pr->pr_mtx);
354		return (EAFNOSUPPORT);
355	}
356
357	if (ntohl(ia->s_addr) == INADDR_LOOPBACK &&
358	    prison_check_ip4_locked(pr, ia) == EADDRNOTAVAIL) {
359		ia->s_addr = pr->pr_ip4[0].s_addr;
360		mtx_unlock(&pr->pr_mtx);
361		return (0);
362	}
363
364	/*
365	 * Return success because nothing had to be changed.
366	 */
367	mtx_unlock(&pr->pr_mtx);
368	return (0);
369}
370
371/*
372 * Check if given address belongs to the jail referenced by cred/prison.
373 *
374 * Returns 0 if address belongs to jail,
375 * EADDRNOTAVAIL if the address doesn't belong to the jail.
376 */
377int
378prison_check_ip4_locked(const struct prison *pr, const struct in_addr *ia)
379{
380	int i, a, z, d;
381
382	/*
383	 * Check the primary IP.
384	 */
385	if (pr->pr_ip4[0].s_addr == ia->s_addr)
386		return (0);
387
388	/*
389	 * All the other IPs are sorted so we can do a binary search.
390	 */
391	a = 0;
392	z = pr->pr_ip4s - 2;
393	while (a <= z) {
394		i = (a + z) / 2;
395		d = prison_qcmp_v4(&pr->pr_ip4[i+1], ia);
396		if (d > 0)
397			z = i - 1;
398		else if (d < 0)
399			a = i + 1;
400		else
401			return (0);
402	}
403
404	return (EADDRNOTAVAIL);
405}
406
407int
408prison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
409{
410	struct prison *pr;
411	int error;
412
413	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
414	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
415
416	pr = cred->cr_prison;
417	if (!(pr->pr_flags & PR_IP4))
418		return (0);
419	mtx_lock(&pr->pr_mtx);
420	if (!(pr->pr_flags & PR_IP4)) {
421		mtx_unlock(&pr->pr_mtx);
422		return (0);
423	}
424	if (pr->pr_ip4 == NULL) {
425		mtx_unlock(&pr->pr_mtx);
426		return (EAFNOSUPPORT);
427	}
428
429	error = prison_check_ip4_locked(pr, ia);
430	mtx_unlock(&pr->pr_mtx);
431	return (error);
432}
433