ip_encap.c revision 82884
1/* $FreeBSD: head/sys/netinet/ip_encap.c 82884 2001-09-03 20:03:55Z julian $ */ 2/* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4/* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32/* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * 6to4 tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57/* XXX is M_NETADDR correct? */ 58 59#include "opt_mrouting.h" 60#include "opt_inet.h" 61#include "opt_inet6.h" 62 63#include <sys/param.h> 64#include <sys/systm.h> 65#include <sys/socket.h> 66#include <sys/sockio.h> 67#include <sys/mbuf.h> 68#include <sys/errno.h> 69#include <sys/protosw.h> 70#include <sys/queue.h> 71 72#include <net/if.h> 73#include <net/route.h> 74 75#include <netinet/in.h> 76#include <netinet/in_systm.h> 77#include <netinet/ip.h> 78#include <netinet/ip_var.h> 79#include <netinet/ip_encap.h> 80 81#ifdef INET6 82#include <netinet/ip6.h> 83#include <netinet6/ip6_var.h> 84#include <netinet6/ip6protosw.h> 85#endif 86 87#include <machine/stdarg.h> 88 89#include <net/net_osdep.h> 90 91#include <sys/kernel.h> 92#include <sys/malloc.h> 93static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 94 95static void encap_add __P((struct encaptab *)); 96static int mask_match __P((const struct encaptab *, const struct sockaddr *, 97 const struct sockaddr *)); 98static void encap_fillarg __P((struct mbuf *, const struct encaptab *)); 99 100#ifndef LIST_HEAD_INITIALIZER 101/* rely upon BSS initialization */ 102LIST_HEAD(, encaptab) encaptab; 103#else 104LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 105#endif 106 107void 108encap_init() 109{ 110 static int initialized = 0; 111 112 if (initialized) 113 return; 114 initialized++; 115#if 0 116 /* 117 * we cannot use LIST_INIT() here, since drivers may want to call 118 * encap_attach(), on driver attach. encap_init() will be called 119 * on AF_INET{,6} initialization, which happens after driver 120 * initialization - using LIST_INIT() here can nuke encap_attach() 121 * from drivers. 122 */ 123 LIST_INIT(&encaptab); 124#endif 125} 126 127#ifdef INET 128void 129#if __STDC__ 130encap4_input(struct mbuf *m, ...) 131#else 132encap4_input(m, va_alist) 133 struct mbuf *m; 134 va_dcl 135#endif 136{ 137 int off, proto; 138 struct ip *ip; 139 struct sockaddr_in s, d; 140 const struct protosw *psw; 141 struct encaptab *ep, *match; 142 va_list ap; 143 int prio, matchprio; 144 145 va_start(ap, m); 146 off = va_arg(ap, int); 147 va_end(ap); 148 149 ip = mtod(m, struct ip *); 150 proto = ip->ip_p; 151 152 bzero(&s, sizeof(s)); 153 s.sin_family = AF_INET; 154 s.sin_len = sizeof(struct sockaddr_in); 155 s.sin_addr = ip->ip_src; 156 bzero(&d, sizeof(d)); 157 d.sin_family = AF_INET; 158 d.sin_len = sizeof(struct sockaddr_in); 159 d.sin_addr = ip->ip_dst; 160 161 match = NULL; 162 matchprio = 0; 163 LIST_FOREACH(ep, &encaptab, chain) { 164 if (ep->af != AF_INET) 165 continue; 166 if (ep->proto >= 0 && ep->proto != proto) 167 continue; 168 if (ep->func) 169 prio = (*ep->func)(m, off, proto, ep->arg); 170 else { 171 /* 172 * it's inbound traffic, we need to match in reverse 173 * order 174 */ 175 prio = mask_match(ep, (struct sockaddr *)&d, 176 (struct sockaddr *)&s); 177 } 178 179 /* 180 * We prioritize the matches by using bit length of the 181 * matches. mask_match() and user-supplied matching function 182 * should return the bit length of the matches (for example, 183 * if both src/dst are matched for IPv4, 64 should be returned). 184 * 0 or negative return value means "it did not match". 185 * 186 * The question is, since we have two "mask" portion, we 187 * cannot really define total order between entries. 188 * For example, which of these should be preferred? 189 * mask_match() returns 48 (32 + 16) for both of them. 190 * src=3ffe::/16, dst=3ffe:501::/32 191 * src=3ffe:501::/32, dst=3ffe::/16 192 * 193 * We need to loop through all the possible candidates 194 * to get the best match - the search takes O(n) for 195 * n attachments (i.e. interfaces). 196 */ 197 if (prio <= 0) 198 continue; 199 if (prio > matchprio) { 200 matchprio = prio; 201 match = ep; 202 } 203 } 204 205 if (match) { 206 /* found a match, "match" has the best one */ 207 psw = match->psw; 208 if (psw && psw->pr_input) { 209 encap_fillarg(m, match); 210 (*psw->pr_input)(m, off); 211 } else 212 m_freem(m); 213 return; 214 } 215 216 /* for backward compatibility - messy... */ 217 if (proto == IPPROTO_IPV4) { 218 ipip_input(m, off); 219 return; 220 } 221 222 /* last resort: inject to raw socket */ 223 rip_input(m, off); 224} 225#endif 226 227#ifdef INET6 228int 229encap6_input(mp, offp, proto) 230 struct mbuf **mp; 231 int *offp; 232 int proto; 233{ 234 struct mbuf *m = *mp; 235 struct ip6_hdr *ip6; 236 struct sockaddr_in6 s, d; 237 const struct ip6protosw *psw; 238 struct encaptab *ep, *match; 239 int prio, matchprio; 240 241 ip6 = mtod(m, struct ip6_hdr *); 242 243 bzero(&s, sizeof(s)); 244 s.sin6_family = AF_INET6; 245 s.sin6_len = sizeof(struct sockaddr_in6); 246 s.sin6_addr = ip6->ip6_src; 247 bzero(&d, sizeof(d)); 248 d.sin6_family = AF_INET6; 249 d.sin6_len = sizeof(struct sockaddr_in6); 250 d.sin6_addr = ip6->ip6_dst; 251 252 match = NULL; 253 matchprio = 0; 254 LIST_FOREACH(ep, &encaptab, chain) { 255 if (ep->af != AF_INET6) 256 continue; 257 if (ep->proto >= 0 && ep->proto != proto) 258 continue; 259 if (ep->func) 260 prio = (*ep->func)(m, *offp, proto, ep->arg); 261 else { 262 /* 263 * it's inbound traffic, we need to match in reverse 264 * order 265 */ 266 prio = mask_match(ep, (struct sockaddr *)&d, 267 (struct sockaddr *)&s); 268 } 269 270 /* see encap4_input() for issues here */ 271 if (prio <= 0) 272 continue; 273 if (prio > matchprio) { 274 matchprio = prio; 275 match = ep; 276 } 277 } 278 279 if (match) { 280 /* found a match */ 281 psw = (const struct ip6protosw *)match->psw; 282 if (psw && psw->pr_input) { 283 encap_fillarg(m, match); 284 return (*psw->pr_input)(mp, offp, proto); 285 } else { 286 m_freem(m); 287 return IPPROTO_DONE; 288 } 289 } 290 291 /* last resort: inject to raw socket */ 292 return rip6_input(mp, offp, proto); 293} 294#endif 295 296static void 297encap_add(ep) 298 struct encaptab *ep; 299{ 300 301 LIST_INSERT_HEAD(&encaptab, ep, chain); 302} 303 304/* 305 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 306 * length of mask (sm and dm) is assumed to be same as sp/dp. 307 * Return value will be necessary as input (cookie) for encap_detach(). 308 */ 309const struct encaptab * 310encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 311 int af; 312 int proto; 313 const struct sockaddr *sp, *sm; 314 const struct sockaddr *dp, *dm; 315 const struct protosw *psw; 316 void *arg; 317{ 318 struct encaptab *ep; 319 int error; 320 int s; 321 322 s = splnet(); 323 /* sanity check on args */ 324 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { 325 error = EINVAL; 326 goto fail; 327 } 328 if (sp->sa_len != dp->sa_len) { 329 error = EINVAL; 330 goto fail; 331 } 332 if (af != sp->sa_family || af != dp->sa_family) { 333 error = EINVAL; 334 goto fail; 335 } 336 337 /* check if anyone have already attached with exactly same config */ 338 LIST_FOREACH(ep, &encaptab, chain) { 339 if (ep->af != af) 340 continue; 341 if (ep->proto != proto) 342 continue; 343 if (ep->src.ss_len != sp->sa_len || 344 bcmp(&ep->src, sp, sp->sa_len) != 0 || 345 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 346 continue; 347 if (ep->dst.ss_len != dp->sa_len || 348 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 349 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 350 continue; 351 352 error = EEXIST; 353 goto fail; 354 } 355 356 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 357 if (ep == NULL) { 358 error = ENOBUFS; 359 goto fail; 360 } 361 bzero(ep, sizeof(*ep)); 362 363 ep->af = af; 364 ep->proto = proto; 365 bcopy(sp, &ep->src, sp->sa_len); 366 bcopy(sm, &ep->srcmask, sp->sa_len); 367 bcopy(dp, &ep->dst, dp->sa_len); 368 bcopy(dm, &ep->dstmask, dp->sa_len); 369 ep->psw = psw; 370 ep->arg = arg; 371 372 encap_add(ep); 373 374 error = 0; 375 splx(s); 376 return ep; 377 378fail: 379 splx(s); 380 return NULL; 381} 382 383const struct encaptab * 384encap_attach_func(af, proto, func, psw, arg) 385 int af; 386 int proto; 387 int (*func) __P((const struct mbuf *, int, int, void *)); 388 const struct protosw *psw; 389 void *arg; 390{ 391 struct encaptab *ep; 392 int error; 393 int s; 394 395 s = splnet(); 396 /* sanity check on args */ 397 if (!func) { 398 error = EINVAL; 399 goto fail; 400 } 401 402 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 403 if (ep == NULL) { 404 error = ENOBUFS; 405 goto fail; 406 } 407 bzero(ep, sizeof(*ep)); 408 409 ep->af = af; 410 ep->proto = proto; 411 ep->func = func; 412 ep->psw = psw; 413 ep->arg = arg; 414 415 encap_add(ep); 416 417 error = 0; 418 splx(s); 419 return ep; 420 421fail: 422 splx(s); 423 return NULL; 424} 425 426int 427encap_detach(cookie) 428 const struct encaptab *cookie; 429{ 430 const struct encaptab *ep = cookie; 431 struct encaptab *p; 432 433 LIST_FOREACH(p, &encaptab, chain) { 434 if (p == ep) { 435 LIST_REMOVE(p, chain); 436 free(p, M_NETADDR); /*XXX*/ 437 return 0; 438 } 439 } 440 441 return EINVAL; 442} 443 444static int 445mask_match(ep, sp, dp) 446 const struct encaptab *ep; 447 const struct sockaddr *sp; 448 const struct sockaddr *dp; 449{ 450 struct sockaddr_storage s; 451 struct sockaddr_storage d; 452 int i; 453 const u_int8_t *p, *q; 454 u_int8_t *r; 455 int matchlen; 456 457 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 458 return 0; 459 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 460 return 0; 461 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 462 return 0; 463 464 matchlen = 0; 465 466 p = (const u_int8_t *)sp; 467 q = (const u_int8_t *)&ep->srcmask; 468 r = (u_int8_t *)&s; 469 for (i = 0 ; i < sp->sa_len; i++) { 470 r[i] = p[i] & q[i]; 471 /* XXX estimate */ 472 matchlen += (q[i] ? 8 : 0); 473 } 474 475 p = (const u_int8_t *)dp; 476 q = (const u_int8_t *)&ep->dstmask; 477 r = (u_int8_t *)&d; 478 for (i = 0 ; i < dp->sa_len; i++) { 479 r[i] = p[i] & q[i]; 480 /* XXX rough estimate */ 481 matchlen += (q[i] ? 8 : 0); 482 } 483 484 /* need to overwrite len/family portion as we don't compare them */ 485 s.ss_len = sp->sa_len; 486 s.ss_family = sp->sa_family; 487 d.ss_len = dp->sa_len; 488 d.ss_family = dp->sa_family; 489 490 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 491 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 492 return matchlen; 493 } else 494 return 0; 495} 496 497static void 498encap_fillarg(m, ep) 499 struct mbuf *m; 500 const struct encaptab *ep; 501{ 502#if 0 503 m->m_pkthdr.aux = ep->arg; 504#else 505 struct mbuf *n; 506 507 n = m_aux_add(m, AF_INET, IPPROTO_IPV4); 508 if (n) { 509 *mtod(n, void **) = ep->arg; 510 n->m_len = sizeof(void *); 511 } 512#endif 513} 514 515void * 516encap_getarg(m) 517 struct mbuf *m; 518{ 519 void *p; 520#if 0 521 p = m->m_pkthdr.aux; 522 m->m_pkthdr.aux = NULL; 523 return p; 524#else 525 struct mbuf *n; 526 527 p = NULL; 528 n = m_aux_find(m, AF_INET, IPPROTO_IPV4); 529 if (n) { 530 if (n->m_len == sizeof(void *)) 531 p = *mtod(n, void **); 532 m_aux_delete(m, n); 533 } 534 return p; 535#endif 536} 537