1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*	$FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.2 2001/07/03 11:01:46 ume Exp $	*/
29/*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
30
31/*
32 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
33 * All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the project nor the names of its contributors
44 *    may be used to endorse or promote products derived from this software
45 *    without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 */
59/*
60 * My grandfather said that there's a devil inside tunnelling technology...
61 *
62 * We have surprisingly many protocols that want packets with IP protocol
63 * #4 or #41.  Here's a list of protocols that want protocol #41:
64 *	RFC1933 configured tunnel
65 *	RFC1933 automatic tunnel
66 *	RFC2401 IPsec tunnel
67 *	RFC2473 IPv6 generic packet tunnelling
68 *	RFC2529 6over4 tunnel
69 *	mobile-ip6 (uses RFC2473)
70 *	6to4 tunnel
71 * Here's a list of protocol that want protocol #4:
72 *	RFC1853 IPv4-in-IPv4 tunnelling
73 *	RFC2003 IPv4 encapsulation within IPv4
74 *	RFC2344 reverse tunnelling for mobile-ip4
75 *	RFC2401 IPsec tunnel
76 * Well, what can I say.  They impose different en/decapsulation mechanism
77 * from each other, so they need separate protocol handler.  The only one
78 * we can easily determine by protocol # is IPsec, which always has
79 * AH/ESP/IPComp header right after outer IP header.
80 *
81 * So, clearly good old protosw does not work for protocol #4 and #41.
82 * The code will let you match protocol via src/dst address pair.
83 */
84/* XXX is M_NETADDR correct? */
85
86#include <sys/param.h>
87#include <sys/systm.h>
88#include <sys/socket.h>
89#include <sys/sockio.h>
90#include <sys/mbuf.h>
91#include <sys/mcache.h>
92#include <sys/errno.h>
93#include <sys/domain.h>
94#include <sys/protosw.h>
95#include <sys/queue.h>
96
97#include <net/if.h>
98#include <net/route.h>
99
100#include <netinet/in.h>
101#include <netinet/in_systm.h>
102#include <netinet/ip.h>
103#include <netinet/ip_var.h>
104#include <netinet/ip_encap.h>
105#if MROUTING
106#include <netinet/ip_mroute.h>
107#endif /* MROUTING */
108
109#if INET6
110#include <netinet/ip6.h>
111#include <netinet6/ip6_var.h>
112#include <netinet6/ip6protosw.h>
113#endif
114
115
116#include <net/net_osdep.h>
117
118#ifndef __APPLE__
119#include <sys/kernel.h>
120#include <sys/malloc.h>
121MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
122#endif
123
124static void encap_init(struct protosw *, struct domain *);
125static void encap_add(struct encaptab *);
126static int mask_match(const struct encaptab *, const struct sockaddr *,
127		const struct sockaddr *);
128static void encap_fillarg(struct mbuf *, const struct encaptab *);
129
130#ifndef LIST_HEAD_INITIALIZER
131/* rely upon BSS initialization */
132LIST_HEAD(, encaptab) encaptab;
133#else
134LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab);
135#endif
136
137static void
138encap_init(struct protosw *pp, struct domain *dp)
139{
140#pragma unused(dp)
141	static int encap_initialized = 0;
142
143	VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
144
145	/* This gets called by more than one protocols, so initialize once */
146	if (encap_initialized)
147		return;
148	encap_initialized = 1;
149#if 0
150	/*
151	 * we cannot use LIST_INIT() here, since drivers may want to call
152	 * encap_attach(), on driver attach.  encap_init() will be called
153	 * on AF_INET{,6} initialization, which happens after driver
154	 * initialization - using LIST_INIT() here can nuke encap_attach()
155	 * from drivers.
156	 */
157	LIST_INIT(&encaptab);
158#endif
159}
160
161void
162encap4_init(struct protosw *pp, struct domain *dp)
163{
164	encap_init(pp, dp);
165}
166
167void
168encap6_init(struct ip6protosw *pp, struct domain *dp)
169{
170	encap_init((struct protosw *)pp, dp);
171}
172
173#if INET
174void
175encap4_input(m, off)
176	struct mbuf *m;
177	int off;
178{
179	int proto;
180	struct ip *ip;
181	struct sockaddr_in s, d;
182	const struct protosw *psw;
183	struct encaptab *ep, *match;
184	int prio, matchprio;
185
186#ifndef __APPLE__
187	va_start(ap, m);
188	off = va_arg(ap, int);
189	proto = va_arg(ap, int);
190	va_end(ap);
191#endif
192
193	/* Expect 32-bit aligned data pointer on strict-align platforms */
194	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
195
196	ip = mtod(m, struct ip *);
197#ifdef __APPLE__
198	proto = ip->ip_p;
199#endif
200
201	bzero(&s, sizeof(s));
202	s.sin_family = AF_INET;
203	s.sin_len = sizeof(struct sockaddr_in);
204	s.sin_addr = ip->ip_src;
205	bzero(&d, sizeof(d));
206	d.sin_family = AF_INET;
207	d.sin_len = sizeof(struct sockaddr_in);
208	d.sin_addr = ip->ip_dst;
209
210	match = NULL;
211	matchprio = 0;
212	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
213		if (ep->af != AF_INET)
214			continue;
215		if (ep->proto >= 0 && ep->proto != proto)
216			continue;
217		if (ep->func)
218			prio = (*ep->func)(m, off, proto, ep->arg);
219		else {
220			/*
221			 * it's inbound traffic, we need to match in reverse
222			 * order
223			 */
224			prio = mask_match(ep, (struct sockaddr *)&d,
225			    (struct sockaddr *)&s);
226		}
227
228		/*
229		 * We prioritize the matches by using bit length of the
230		 * matches.  mask_match() and user-supplied matching function
231		 * should return the bit length of the matches (for example,
232		 * if both src/dst are matched for IPv4, 64 should be returned).
233		 * 0 or negative return value means "it did not match".
234		 *
235		 * The question is, since we have two "mask" portion, we
236		 * cannot really define total order between entries.
237		 * For example, which of these should be preferred?
238		 * mask_match() returns 48 (32 + 16) for both of them.
239		 *	src=3ffe::/16, dst=3ffe:501::/32
240		 *	src=3ffe:501::/32, dst=3ffe::/16
241		 *
242		 * We need to loop through all the possible candidates
243		 * to get the best match - the search takes O(n) for
244		 * n attachments (i.e. interfaces).
245		 */
246		if (prio <= 0)
247			continue;
248		if (prio > matchprio) {
249			matchprio = prio;
250			match = ep;
251		}
252	}
253
254	if (match) {
255		/* found a match, "match" has the best one */
256		psw = (const struct protosw *)match->psw;
257		if (psw && psw->pr_input) {
258			encap_fillarg(m, match);
259			(*psw->pr_input)(m, off);
260		} else
261			m_freem(m);
262		return;
263	}
264
265	/* for backward compatibility */
266# if MROUTING
267#  define COMPATFUNC	ipip_input
268# endif /*MROUTING*/
269
270#if COMPATFUNC
271	if (proto == IPPROTO_IPV4) {
272		COMPATFUNC(m, off);
273		return;
274	}
275#endif
276
277	/* last resort: inject to raw socket */
278	rip_input(m, off);
279}
280#endif
281
282#if INET6
283int
284encap6_input(struct mbuf **mp, int *offp, int proto)
285{
286	struct mbuf *m = *mp;
287	struct ip6_hdr *ip6;
288	struct sockaddr_in6 s, d;
289	const struct ip6protosw *psw;
290	struct encaptab *ep, *match;
291	int prio, matchprio;
292
293	/* Expect 32-bit aligned data pointer on strict-align platforms */
294	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
295
296	ip6 = mtod(m, struct ip6_hdr *);
297	bzero(&s, sizeof(s));
298	s.sin6_family = AF_INET6;
299	s.sin6_len = sizeof(struct sockaddr_in6);
300	s.sin6_addr = ip6->ip6_src;
301	bzero(&d, sizeof(d));
302	d.sin6_family = AF_INET6;
303	d.sin6_len = sizeof(struct sockaddr_in6);
304	d.sin6_addr = ip6->ip6_dst;
305
306	match = NULL;
307	matchprio = 0;
308	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
309		if (ep->af != AF_INET6)
310			continue;
311		if (ep->proto >= 0 && ep->proto != proto)
312			continue;
313		if (ep->func)
314			prio = (*ep->func)(m, *offp, proto, ep->arg);
315		else {
316			/*
317			 * it's inbound traffic, we need to match in reverse
318			 * order
319			 */
320			prio = mask_match(ep, (struct sockaddr *)&d,
321			    (struct sockaddr *)&s);
322		}
323
324		/* see encap4_input() for issues here */
325		if (prio <= 0)
326			continue;
327		if (prio > matchprio) {
328			matchprio = prio;
329			match = ep;
330		}
331	}
332
333	if (match) {
334		/* found a match */
335		psw = (const struct ip6protosw *)match->psw;
336		if (psw && psw->pr_input) {
337			encap_fillarg(m, match);
338			return (*psw->pr_input)(mp, offp, proto);
339		} else {
340			m_freem(m);
341			return IPPROTO_DONE;
342		}
343	}
344
345	/* last resort: inject to raw socket */
346	return rip6_input(mp, offp, proto);
347}
348#endif
349
350static void
351encap_add(ep)
352	struct encaptab *ep;
353{
354
355	LIST_INSERT_HEAD(&encaptab, ep, chain);
356}
357
358/*
359 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
360 * length of mask (sm and dm) is assumed to be same as sp/dp.
361 * Return value will be necessary as input (cookie) for encap_detach().
362 */
363const struct encaptab *
364encap_attach(af, proto, sp, sm, dp, dm, psw, arg)
365	int af;
366	int proto;
367	const struct sockaddr *sp, *sm;
368	const struct sockaddr *dp, *dm;
369	const struct protosw *psw;
370	void *arg;
371{
372	struct encaptab *ep;
373	int error;
374
375	/* sanity check on args */
376	if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) {
377		error = EINVAL;
378		goto fail;
379	}
380	if (sp->sa_len != dp->sa_len) {
381		error = EINVAL;
382		goto fail;
383	}
384	if (af != sp->sa_family || af != dp->sa_family) {
385		error = EINVAL;
386		goto fail;
387	}
388
389	/* check if anyone have already attached with exactly same config */
390	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
391		if (ep->af != af)
392			continue;
393		if (ep->proto != proto)
394			continue;
395		if (ep->src.ss_len != sp->sa_len ||
396		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
397		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
398			continue;
399		if (ep->dst.ss_len != dp->sa_len ||
400		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
401		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
402			continue;
403
404		error = EEXIST;
405		goto fail;
406	}
407
408	ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK);	/*XXX*/
409	if (ep == NULL) {
410		error = ENOBUFS;
411		goto fail;
412	}
413	bzero(ep, sizeof(*ep));
414
415	ep->af = af;
416	ep->proto = proto;
417	bcopy(sp, &ep->src, sp->sa_len);
418	bcopy(sm, &ep->srcmask, sp->sa_len);
419	bcopy(dp, &ep->dst, dp->sa_len);
420	bcopy(dm, &ep->dstmask, dp->sa_len);
421	ep->psw = psw;
422	ep->arg = arg;
423
424	encap_add(ep);
425
426	error = 0;
427	return ep;
428
429fail:
430	return NULL;
431}
432
433const struct encaptab *
434encap_attach_func(af, proto, func, psw, arg)
435	int af;
436	int proto;
437	int (*func)(const struct mbuf *, int, int, void *);
438	const struct protosw *psw;
439	void *arg;
440{
441	struct encaptab *ep;
442	int error;
443
444	/* sanity check on args */
445	if (!func) {
446		error = EINVAL;
447		goto fail;
448	}
449
450	ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK);	/*XXX*/
451	if (ep == NULL) {
452		error = ENOBUFS;
453		goto fail;
454	}
455	bzero(ep, sizeof(*ep));
456
457	ep->af = af;
458	ep->proto = proto;
459	ep->func = func;
460	ep->psw = psw;
461	ep->arg = arg;
462
463	encap_add(ep);
464
465	error = 0;
466	return ep;
467
468fail:
469	return NULL;
470}
471
472int
473encap_detach(cookie)
474	const struct encaptab *cookie;
475{
476	const struct encaptab *ep = cookie;
477	struct encaptab *p;
478
479	for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) {
480		if (p == ep) {
481			LIST_REMOVE(p, chain);
482			_FREE(p, M_NETADDR);	/*XXX*/
483			return 0;
484		}
485	}
486
487	return EINVAL;
488}
489
490static int
491mask_match(ep, sp, dp)
492	const struct encaptab *ep;
493	const struct sockaddr *sp;
494	const struct sockaddr *dp;
495{
496	struct sockaddr_storage s;
497	struct sockaddr_storage d;
498	int i;
499	const u_int8_t *p, *q;
500	u_int8_t *r;
501	int matchlen;
502
503	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
504		return 0;
505	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
506		return 0;
507	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
508		return 0;
509
510	matchlen = 0;
511
512	p = (const u_int8_t *)sp;
513	q = (const u_int8_t *)&ep->srcmask;
514	r = (u_int8_t *)&s;
515	for (i = 0 ; i < sp->sa_len; i++) {
516		r[i] = p[i] & q[i];
517		/* XXX estimate */
518		matchlen += (q[i] ? 8 : 0);
519	}
520
521	p = (const u_int8_t *)dp;
522	q = (const u_int8_t *)&ep->dstmask;
523	r = (u_int8_t *)&d;
524	for (i = 0 ; i < dp->sa_len; i++) {
525		r[i] = p[i] & q[i];
526		/* XXX rough estimate */
527		matchlen += (q[i] ? 8 : 0);
528	}
529
530	/* need to overwrite len/family portion as we don't compare them */
531	s.ss_len = sp->sa_len;
532	s.ss_family = sp->sa_family;
533	d.ss_len = dp->sa_len;
534	d.ss_family = dp->sa_family;
535
536	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
537	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
538		return matchlen;
539	} else
540		return 0;
541}
542
543struct encaptabtag {
544	void*			*arg;
545};
546
547static void
548encap_fillarg(
549	struct mbuf *m,
550	const struct encaptab *ep)
551{
552	struct m_tag	*tag;
553	struct encaptabtag *et;
554
555	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP,
556					  sizeof(struct encaptabtag), M_WAITOK, m);
557
558	if (tag != NULL) {
559		et = (struct encaptabtag*)(tag + 1);
560		et->arg = ep->arg;
561		m_tag_prepend(m, tag);
562	}
563}
564
565void *
566encap_getarg(m)
567	struct mbuf *m;
568{
569	struct m_tag	*tag;
570	struct encaptabtag *et;
571	void *p = NULL;
572
573	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL);
574	if (tag) {
575		et = (struct encaptabtag*)(tag + 1);
576		p = et->arg;
577		m_tag_delete(m, tag);
578	}
579
580	return p;
581}
582