1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.2 2001/07/03 11:01:46 ume Exp $ */ 29/* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 30 31/* 32 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 33 * All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the project nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 */ 59/* 60 * My grandfather said that there's a devil inside tunnelling technology... 61 * 62 * We have surprisingly many protocols that want packets with IP protocol 63 * #4 or #41. Here's a list of protocols that want protocol #41: 64 * RFC1933 configured tunnel 65 * RFC1933 automatic tunnel 66 * RFC2401 IPsec tunnel 67 * RFC2473 IPv6 generic packet tunnelling 68 * RFC2529 6over4 tunnel 69 * mobile-ip6 (uses RFC2473) 70 * 6to4 tunnel 71 * Here's a list of protocol that want protocol #4: 72 * RFC1853 IPv4-in-IPv4 tunnelling 73 * RFC2003 IPv4 encapsulation within IPv4 74 * RFC2344 reverse tunnelling for mobile-ip4 75 * RFC2401 IPsec tunnel 76 * Well, what can I say. They impose different en/decapsulation mechanism 77 * from each other, so they need separate protocol handler. The only one 78 * we can easily determine by protocol # is IPsec, which always has 79 * AH/ESP/IPComp header right after outer IP header. 80 * 81 * So, clearly good old protosw does not work for protocol #4 and #41. 82 * The code will let you match protocol via src/dst address pair. 83 */ 84/* XXX is M_NETADDR correct? */ 85 86#include <sys/param.h> 87#include <sys/systm.h> 88#include <sys/socket.h> 89#include <sys/sockio.h> 90#include <sys/mbuf.h> 91#include <sys/mcache.h> 92#include <sys/errno.h> 93#include <sys/domain.h> 94#include <sys/protosw.h> 95#include <sys/queue.h> 96 97#include <net/if.h> 98#include <net/route.h> 99 100#include <netinet/in.h> 101#include <netinet/in_systm.h> 102#include <netinet/ip.h> 103#include <netinet/ip_var.h> 104#include <netinet/ip_encap.h> 105#if MROUTING 106#include <netinet/ip_mroute.h> 107#endif /* MROUTING */ 108 109#if INET6 110#include <netinet/ip6.h> 111#include <netinet6/ip6_var.h> 112#include <netinet6/ip6protosw.h> 113#endif 114 115 116#include <net/net_osdep.h> 117 118#ifndef __APPLE__ 119#include <sys/kernel.h> 120#include <sys/malloc.h> 121MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 122#endif 123 124static void encap_init(struct protosw *, struct domain *); 125static void encap_add(struct encaptab *); 126static int mask_match(const struct encaptab *, const struct sockaddr *, 127 const struct sockaddr *); 128static void encap_fillarg(struct mbuf *, const struct encaptab *); 129 130#ifndef LIST_HEAD_INITIALIZER 131/* rely upon BSS initialization */ 132LIST_HEAD(, encaptab) encaptab; 133#else 134LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 135#endif 136 137static void 138encap_init(struct protosw *pp, struct domain *dp) 139{ 140#pragma unused(dp) 141 static int encap_initialized = 0; 142 143 VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); 144 145 /* This gets called by more than one protocols, so initialize once */ 146 if (encap_initialized) 147 return; 148 encap_initialized = 1; 149#if 0 150 /* 151 * we cannot use LIST_INIT() here, since drivers may want to call 152 * encap_attach(), on driver attach. encap_init() will be called 153 * on AF_INET{,6} initialization, which happens after driver 154 * initialization - using LIST_INIT() here can nuke encap_attach() 155 * from drivers. 156 */ 157 LIST_INIT(&encaptab); 158#endif 159} 160 161void 162encap4_init(struct protosw *pp, struct domain *dp) 163{ 164 encap_init(pp, dp); 165} 166 167void 168encap6_init(struct ip6protosw *pp, struct domain *dp) 169{ 170 encap_init((struct protosw *)pp, dp); 171} 172 173#if INET 174void 175encap4_input(m, off) 176 struct mbuf *m; 177 int off; 178{ 179 int proto; 180 struct ip *ip; 181 struct sockaddr_in s, d; 182 const struct protosw *psw; 183 struct encaptab *ep, *match; 184 int prio, matchprio; 185 186#ifndef __APPLE__ 187 va_start(ap, m); 188 off = va_arg(ap, int); 189 proto = va_arg(ap, int); 190 va_end(ap); 191#endif 192 193 /* Expect 32-bit aligned data pointer on strict-align platforms */ 194 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 195 196 ip = mtod(m, struct ip *); 197#ifdef __APPLE__ 198 proto = ip->ip_p; 199#endif 200 201 bzero(&s, sizeof(s)); 202 s.sin_family = AF_INET; 203 s.sin_len = sizeof(struct sockaddr_in); 204 s.sin_addr = ip->ip_src; 205 bzero(&d, sizeof(d)); 206 d.sin_family = AF_INET; 207 d.sin_len = sizeof(struct sockaddr_in); 208 d.sin_addr = ip->ip_dst; 209 210 match = NULL; 211 matchprio = 0; 212 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 213 if (ep->af != AF_INET) 214 continue; 215 if (ep->proto >= 0 && ep->proto != proto) 216 continue; 217 if (ep->func) 218 prio = (*ep->func)(m, off, proto, ep->arg); 219 else { 220 /* 221 * it's inbound traffic, we need to match in reverse 222 * order 223 */ 224 prio = mask_match(ep, (struct sockaddr *)&d, 225 (struct sockaddr *)&s); 226 } 227 228 /* 229 * We prioritize the matches by using bit length of the 230 * matches. mask_match() and user-supplied matching function 231 * should return the bit length of the matches (for example, 232 * if both src/dst are matched for IPv4, 64 should be returned). 233 * 0 or negative return value means "it did not match". 234 * 235 * The question is, since we have two "mask" portion, we 236 * cannot really define total order between entries. 237 * For example, which of these should be preferred? 238 * mask_match() returns 48 (32 + 16) for both of them. 239 * src=3ffe::/16, dst=3ffe:501::/32 240 * src=3ffe:501::/32, dst=3ffe::/16 241 * 242 * We need to loop through all the possible candidates 243 * to get the best match - the search takes O(n) for 244 * n attachments (i.e. interfaces). 245 */ 246 if (prio <= 0) 247 continue; 248 if (prio > matchprio) { 249 matchprio = prio; 250 match = ep; 251 } 252 } 253 254 if (match) { 255 /* found a match, "match" has the best one */ 256 psw = (const struct protosw *)match->psw; 257 if (psw && psw->pr_input) { 258 encap_fillarg(m, match); 259 (*psw->pr_input)(m, off); 260 } else 261 m_freem(m); 262 return; 263 } 264 265 /* for backward compatibility */ 266# if MROUTING 267# define COMPATFUNC ipip_input 268# endif /*MROUTING*/ 269 270#if COMPATFUNC 271 if (proto == IPPROTO_IPV4) { 272 COMPATFUNC(m, off); 273 return; 274 } 275#endif 276 277 /* last resort: inject to raw socket */ 278 rip_input(m, off); 279} 280#endif 281 282#if INET6 283int 284encap6_input(struct mbuf **mp, int *offp, int proto) 285{ 286 struct mbuf *m = *mp; 287 struct ip6_hdr *ip6; 288 struct sockaddr_in6 s, d; 289 const struct ip6protosw *psw; 290 struct encaptab *ep, *match; 291 int prio, matchprio; 292 293 /* Expect 32-bit aligned data pointer on strict-align platforms */ 294 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 295 296 ip6 = mtod(m, struct ip6_hdr *); 297 bzero(&s, sizeof(s)); 298 s.sin6_family = AF_INET6; 299 s.sin6_len = sizeof(struct sockaddr_in6); 300 s.sin6_addr = ip6->ip6_src; 301 bzero(&d, sizeof(d)); 302 d.sin6_family = AF_INET6; 303 d.sin6_len = sizeof(struct sockaddr_in6); 304 d.sin6_addr = ip6->ip6_dst; 305 306 match = NULL; 307 matchprio = 0; 308 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 309 if (ep->af != AF_INET6) 310 continue; 311 if (ep->proto >= 0 && ep->proto != proto) 312 continue; 313 if (ep->func) 314 prio = (*ep->func)(m, *offp, proto, ep->arg); 315 else { 316 /* 317 * it's inbound traffic, we need to match in reverse 318 * order 319 */ 320 prio = mask_match(ep, (struct sockaddr *)&d, 321 (struct sockaddr *)&s); 322 } 323 324 /* see encap4_input() for issues here */ 325 if (prio <= 0) 326 continue; 327 if (prio > matchprio) { 328 matchprio = prio; 329 match = ep; 330 } 331 } 332 333 if (match) { 334 /* found a match */ 335 psw = (const struct ip6protosw *)match->psw; 336 if (psw && psw->pr_input) { 337 encap_fillarg(m, match); 338 return (*psw->pr_input)(mp, offp, proto); 339 } else { 340 m_freem(m); 341 return IPPROTO_DONE; 342 } 343 } 344 345 /* last resort: inject to raw socket */ 346 return rip6_input(mp, offp, proto); 347} 348#endif 349 350static void 351encap_add(ep) 352 struct encaptab *ep; 353{ 354 355 LIST_INSERT_HEAD(&encaptab, ep, chain); 356} 357 358/* 359 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 360 * length of mask (sm and dm) is assumed to be same as sp/dp. 361 * Return value will be necessary as input (cookie) for encap_detach(). 362 */ 363const struct encaptab * 364encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 365 int af; 366 int proto; 367 const struct sockaddr *sp, *sm; 368 const struct sockaddr *dp, *dm; 369 const struct protosw *psw; 370 void *arg; 371{ 372 struct encaptab *ep; 373 int error; 374 375 /* sanity check on args */ 376 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { 377 error = EINVAL; 378 goto fail; 379 } 380 if (sp->sa_len != dp->sa_len) { 381 error = EINVAL; 382 goto fail; 383 } 384 if (af != sp->sa_family || af != dp->sa_family) { 385 error = EINVAL; 386 goto fail; 387 } 388 389 /* check if anyone have already attached with exactly same config */ 390 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 391 if (ep->af != af) 392 continue; 393 if (ep->proto != proto) 394 continue; 395 if (ep->src.ss_len != sp->sa_len || 396 bcmp(&ep->src, sp, sp->sa_len) != 0 || 397 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 398 continue; 399 if (ep->dst.ss_len != dp->sa_len || 400 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 401 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 402 continue; 403 404 error = EEXIST; 405 goto fail; 406 } 407 408 ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/ 409 if (ep == NULL) { 410 error = ENOBUFS; 411 goto fail; 412 } 413 bzero(ep, sizeof(*ep)); 414 415 ep->af = af; 416 ep->proto = proto; 417 bcopy(sp, &ep->src, sp->sa_len); 418 bcopy(sm, &ep->srcmask, sp->sa_len); 419 bcopy(dp, &ep->dst, dp->sa_len); 420 bcopy(dm, &ep->dstmask, dp->sa_len); 421 ep->psw = psw; 422 ep->arg = arg; 423 424 encap_add(ep); 425 426 error = 0; 427 return ep; 428 429fail: 430 return NULL; 431} 432 433const struct encaptab * 434encap_attach_func(af, proto, func, psw, arg) 435 int af; 436 int proto; 437 int (*func)(const struct mbuf *, int, int, void *); 438 const struct protosw *psw; 439 void *arg; 440{ 441 struct encaptab *ep; 442 int error; 443 444 /* sanity check on args */ 445 if (!func) { 446 error = EINVAL; 447 goto fail; 448 } 449 450 ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/ 451 if (ep == NULL) { 452 error = ENOBUFS; 453 goto fail; 454 } 455 bzero(ep, sizeof(*ep)); 456 457 ep->af = af; 458 ep->proto = proto; 459 ep->func = func; 460 ep->psw = psw; 461 ep->arg = arg; 462 463 encap_add(ep); 464 465 error = 0; 466 return ep; 467 468fail: 469 return NULL; 470} 471 472int 473encap_detach(cookie) 474 const struct encaptab *cookie; 475{ 476 const struct encaptab *ep = cookie; 477 struct encaptab *p; 478 479 for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { 480 if (p == ep) { 481 LIST_REMOVE(p, chain); 482 _FREE(p, M_NETADDR); /*XXX*/ 483 return 0; 484 } 485 } 486 487 return EINVAL; 488} 489 490static int 491mask_match(ep, sp, dp) 492 const struct encaptab *ep; 493 const struct sockaddr *sp; 494 const struct sockaddr *dp; 495{ 496 struct sockaddr_storage s; 497 struct sockaddr_storage d; 498 int i; 499 const u_int8_t *p, *q; 500 u_int8_t *r; 501 int matchlen; 502 503 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 504 return 0; 505 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 506 return 0; 507 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 508 return 0; 509 510 matchlen = 0; 511 512 p = (const u_int8_t *)sp; 513 q = (const u_int8_t *)&ep->srcmask; 514 r = (u_int8_t *)&s; 515 for (i = 0 ; i < sp->sa_len; i++) { 516 r[i] = p[i] & q[i]; 517 /* XXX estimate */ 518 matchlen += (q[i] ? 8 : 0); 519 } 520 521 p = (const u_int8_t *)dp; 522 q = (const u_int8_t *)&ep->dstmask; 523 r = (u_int8_t *)&d; 524 for (i = 0 ; i < dp->sa_len; i++) { 525 r[i] = p[i] & q[i]; 526 /* XXX rough estimate */ 527 matchlen += (q[i] ? 8 : 0); 528 } 529 530 /* need to overwrite len/family portion as we don't compare them */ 531 s.ss_len = sp->sa_len; 532 s.ss_family = sp->sa_family; 533 d.ss_len = dp->sa_len; 534 d.ss_family = dp->sa_family; 535 536 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 537 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 538 return matchlen; 539 } else 540 return 0; 541} 542 543struct encaptabtag { 544 void* *arg; 545}; 546 547static void 548encap_fillarg( 549 struct mbuf *m, 550 const struct encaptab *ep) 551{ 552 struct m_tag *tag; 553 struct encaptabtag *et; 554 555 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, 556 sizeof(struct encaptabtag), M_WAITOK, m); 557 558 if (tag != NULL) { 559 et = (struct encaptabtag*)(tag + 1); 560 et->arg = ep->arg; 561 m_tag_prepend(m, tag); 562 } 563} 564 565void * 566encap_getarg(m) 567 struct mbuf *m; 568{ 569 struct m_tag *tag; 570 struct encaptabtag *et; 571 void *p = NULL; 572 573 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL); 574 if (tag) { 575 et = (struct encaptabtag*)(tag + 1); 576 p = et->arg; 577 m_tag_delete(m, tag); 578 } 579 580 return p; 581} 582