ip_encap.c revision 335031
1/* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 2 3/*- 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31/* 32 * My grandfather said that there's a devil inside tunnelling technology... 33 * 34 * We have surprisingly many protocols that want packets with IP protocol 35 * #4 or #41. Here's a list of protocols that want protocol #41: 36 * RFC1933 configured tunnel 37 * RFC1933 automatic tunnel 38 * RFC2401 IPsec tunnel 39 * RFC2473 IPv6 generic packet tunnelling 40 * RFC2529 6over4 tunnel 41 * mobile-ip6 (uses RFC2473) 42 * RFC3056 6to4 tunnel 43 * isatap tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57/* XXX is M_NETADDR correct? */ 58 59#include <sys/cdefs.h> 60__FBSDID("$FreeBSD: stable/11/sys/netinet/ip_encap.c 335031 2018-06-13 07:14:34Z ae $"); 61 62#include "opt_mrouting.h" 63#include "opt_inet.h" 64#include "opt_inet6.h" 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/lock.h> 69#include <sys/mutex.h> 70#include <sys/socket.h> 71#include <sys/sockio.h> 72#include <sys/mbuf.h> 73#include <sys/errno.h> 74#include <sys/protosw.h> 75#include <sys/queue.h> 76 77#include <net/if.h> 78#include <net/route.h> 79 80#include <netinet/in.h> 81#include <netinet/in_systm.h> 82#include <netinet/ip.h> 83#include <netinet/ip_var.h> 84#include <netinet/ip_encap.h> 85 86#ifdef INET6 87#include <netinet/ip6.h> 88#include <netinet6/ip6_var.h> 89#endif 90 91#include <machine/stdarg.h> 92 93#include <sys/kernel.h> 94#include <sys/malloc.h> 95static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure"); 96 97static void encap_add(struct encaptab *); 98static int mask_match(const struct encaptab *, const struct sockaddr *, 99 const struct sockaddr *); 100static void encap_fillarg(struct mbuf *, void *); 101 102/* 103 * All global variables in ip_encap.c are locked using encapmtx. 104 */ 105static struct mtx encapmtx; 106MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); 107static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab); 108 109#ifdef INET 110int 111encap4_input(struct mbuf **mp, int *offp, int proto) 112{ 113 struct ip *ip; 114 struct mbuf *m; 115 struct sockaddr_in s, d; 116 const struct protosw *psw; 117 struct encaptab *ep, *match; 118 void *arg; 119 int matchprio, off, prio; 120 121 m = *mp; 122 off = *offp; 123 ip = mtod(m, struct ip *); 124 125 bzero(&s, sizeof(s)); 126 s.sin_family = AF_INET; 127 s.sin_len = sizeof(struct sockaddr_in); 128 s.sin_addr = ip->ip_src; 129 bzero(&d, sizeof(d)); 130 d.sin_family = AF_INET; 131 d.sin_len = sizeof(struct sockaddr_in); 132 d.sin_addr = ip->ip_dst; 133 134 arg = NULL; 135 psw = NULL; 136 match = NULL; 137 matchprio = 0; 138 mtx_lock(&encapmtx); 139 LIST_FOREACH(ep, &encaptab, chain) { 140 if (ep->af != AF_INET) 141 continue; 142 if (ep->proto >= 0 && ep->proto != proto) 143 continue; 144 if (ep->func) 145 prio = (*ep->func)(m, off, proto, ep->arg); 146 else { 147 /* 148 * it's inbound traffic, we need to match in reverse 149 * order 150 */ 151 prio = mask_match(ep, (struct sockaddr *)&d, 152 (struct sockaddr *)&s); 153 } 154 155 /* 156 * We prioritize the matches by using bit length of the 157 * matches. mask_match() and user-supplied matching function 158 * should return the bit length of the matches (for example, 159 * if both src/dst are matched for IPv4, 64 should be returned). 160 * 0 or negative return value means "it did not match". 161 * 162 * The question is, since we have two "mask" portion, we 163 * cannot really define total order between entries. 164 * For example, which of these should be preferred? 165 * mask_match() returns 48 (32 + 16) for both of them. 166 * src=3ffe::/16, dst=3ffe:501::/32 167 * src=3ffe:501::/32, dst=3ffe::/16 168 * 169 * We need to loop through all the possible candidates 170 * to get the best match - the search takes O(n) for 171 * n attachments (i.e. interfaces). 172 */ 173 if (prio <= 0) 174 continue; 175 if (prio > matchprio) { 176 matchprio = prio; 177 match = ep; 178 } 179 } 180 if (match != NULL) { 181 psw = match->psw; 182 arg = match->arg; 183 } 184 mtx_unlock(&encapmtx); 185 186 if (match != NULL) { 187 /* found a match, "match" has the best one */ 188 if (psw != NULL && psw->pr_input != NULL) { 189 encap_fillarg(m, arg); 190 (*psw->pr_input)(mp, offp, proto); 191 } else 192 m_freem(m); 193 return (IPPROTO_DONE); 194 } 195 196 /* last resort: inject to raw socket */ 197 return (rip_input(mp, offp, proto)); 198} 199#endif 200 201#ifdef INET6 202int 203encap6_input(struct mbuf **mp, int *offp, int proto) 204{ 205 struct mbuf *m = *mp; 206 struct ip6_hdr *ip6; 207 struct sockaddr_in6 s, d; 208 const struct protosw *psw; 209 struct encaptab *ep, *match; 210 void *arg; 211 int prio, matchprio; 212 213 ip6 = mtod(m, struct ip6_hdr *); 214 215 bzero(&s, sizeof(s)); 216 s.sin6_family = AF_INET6; 217 s.sin6_len = sizeof(struct sockaddr_in6); 218 s.sin6_addr = ip6->ip6_src; 219 bzero(&d, sizeof(d)); 220 d.sin6_family = AF_INET6; 221 d.sin6_len = sizeof(struct sockaddr_in6); 222 d.sin6_addr = ip6->ip6_dst; 223 224 arg = NULL; 225 psw = NULL; 226 match = NULL; 227 matchprio = 0; 228 mtx_lock(&encapmtx); 229 LIST_FOREACH(ep, &encaptab, chain) { 230 if (ep->af != AF_INET6) 231 continue; 232 if (ep->proto >= 0 && ep->proto != proto) 233 continue; 234 if (ep->func) 235 prio = (*ep->func)(m, *offp, proto, ep->arg); 236 else { 237 /* 238 * it's inbound traffic, we need to match in reverse 239 * order 240 */ 241 prio = mask_match(ep, (struct sockaddr *)&d, 242 (struct sockaddr *)&s); 243 } 244 245 /* see encap4_input() for issues here */ 246 if (prio <= 0) 247 continue; 248 if (prio > matchprio) { 249 matchprio = prio; 250 match = ep; 251 } 252 } 253 if (match != NULL) { 254 psw = match->psw; 255 arg = match->arg; 256 } 257 mtx_unlock(&encapmtx); 258 259 if (match != NULL) { 260 /* found a match */ 261 if (psw != NULL && psw->pr_input != NULL) { 262 encap_fillarg(m, arg); 263 return (*psw->pr_input)(mp, offp, proto); 264 } else { 265 m_freem(m); 266 return (IPPROTO_DONE); 267 } 268 } 269 270 /* last resort: inject to raw socket */ 271 return rip6_input(mp, offp, proto); 272} 273#endif 274 275/*lint -sem(encap_add, custodial(1)) */ 276static void 277encap_add(struct encaptab *ep) 278{ 279 280 mtx_assert(&encapmtx, MA_OWNED); 281 LIST_INSERT_HEAD(&encaptab, ep, chain); 282} 283 284/* 285 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 286 * length of mask (sm and dm) is assumed to be same as sp/dp. 287 * Return value will be necessary as input (cookie) for encap_detach(). 288 */ 289const struct encaptab * 290encap_attach(int af, int proto, const struct sockaddr *sp, 291 const struct sockaddr *sm, const struct sockaddr *dp, 292 const struct sockaddr *dm, const struct protosw *psw, void *arg) 293{ 294 struct encaptab *ep; 295 296 /* sanity check on args */ 297 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) 298 return (NULL); 299 if (sp->sa_len != dp->sa_len) 300 return (NULL); 301 if (af != sp->sa_family || af != dp->sa_family) 302 return (NULL); 303 304 /* check if anyone have already attached with exactly same config */ 305 mtx_lock(&encapmtx); 306 LIST_FOREACH(ep, &encaptab, chain) { 307 if (ep->af != af) 308 continue; 309 if (ep->proto != proto) 310 continue; 311 if (ep->src.ss_len != sp->sa_len || 312 bcmp(&ep->src, sp, sp->sa_len) != 0 || 313 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 314 continue; 315 if (ep->dst.ss_len != dp->sa_len || 316 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 317 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 318 continue; 319 320 mtx_unlock(&encapmtx); 321 return (NULL); 322 } 323 324 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 325 if (ep == NULL) { 326 mtx_unlock(&encapmtx); 327 return (NULL); 328 } 329 bzero(ep, sizeof(*ep)); 330 331 ep->af = af; 332 ep->proto = proto; 333 bcopy(sp, &ep->src, sp->sa_len); 334 bcopy(sm, &ep->srcmask, sp->sa_len); 335 bcopy(dp, &ep->dst, dp->sa_len); 336 bcopy(dm, &ep->dstmask, dp->sa_len); 337 ep->psw = psw; 338 ep->arg = arg; 339 340 encap_add(ep); 341 mtx_unlock(&encapmtx); 342 return (ep); 343} 344 345const struct encaptab * 346encap_attach_func(int af, int proto, 347 int (*func)(const struct mbuf *, int, int, void *), 348 const struct protosw *psw, void *arg) 349{ 350 struct encaptab *ep; 351 352 /* sanity check on args */ 353 if (!func) 354 return (NULL); 355 356 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 357 if (ep == NULL) 358 return (NULL); 359 bzero(ep, sizeof(*ep)); 360 361 ep->af = af; 362 ep->proto = proto; 363 ep->func = func; 364 ep->psw = psw; 365 ep->arg = arg; 366 367 mtx_lock(&encapmtx); 368 encap_add(ep); 369 mtx_unlock(&encapmtx); 370 return (ep); 371} 372 373int 374encap_detach(const struct encaptab *cookie) 375{ 376 const struct encaptab *ep = cookie; 377 struct encaptab *p; 378 379 mtx_lock(&encapmtx); 380 LIST_FOREACH(p, &encaptab, chain) { 381 if (p == ep) { 382 LIST_REMOVE(p, chain); 383 mtx_unlock(&encapmtx); 384 free(p, M_NETADDR); /*XXX*/ 385 return 0; 386 } 387 } 388 mtx_unlock(&encapmtx); 389 390 return EINVAL; 391} 392 393static int 394mask_match(const struct encaptab *ep, const struct sockaddr *sp, 395 const struct sockaddr *dp) 396{ 397 struct sockaddr_storage s; 398 struct sockaddr_storage d; 399 int i; 400 const u_int8_t *p, *q; 401 u_int8_t *r; 402 int matchlen; 403 404 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 405 return 0; 406 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 407 return 0; 408 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 409 return 0; 410 411 matchlen = 0; 412 413 p = (const u_int8_t *)sp; 414 q = (const u_int8_t *)&ep->srcmask; 415 r = (u_int8_t *)&s; 416 for (i = 0 ; i < sp->sa_len; i++) { 417 r[i] = p[i] & q[i]; 418 /* XXX estimate */ 419 matchlen += (q[i] ? 8 : 0); 420 } 421 422 p = (const u_int8_t *)dp; 423 q = (const u_int8_t *)&ep->dstmask; 424 r = (u_int8_t *)&d; 425 for (i = 0 ; i < dp->sa_len; i++) { 426 r[i] = p[i] & q[i]; 427 /* XXX rough estimate */ 428 matchlen += (q[i] ? 8 : 0); 429 } 430 431 /* need to overwrite len/family portion as we don't compare them */ 432 s.ss_len = sp->sa_len; 433 s.ss_family = sp->sa_family; 434 d.ss_len = dp->sa_len; 435 d.ss_family = dp->sa_family; 436 437 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 438 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 439 return matchlen; 440 } else 441 return 0; 442} 443 444static void 445encap_fillarg(struct mbuf *m, void *arg) 446{ 447 struct m_tag *tag; 448 449 if (arg != NULL) { 450 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); 451 if (tag != NULL) { 452 *(void**)(tag+1) = arg; 453 m_tag_prepend(m, tag); 454 } 455 } 456} 457 458void * 459encap_getarg(struct mbuf *m) 460{ 461 void *p = NULL; 462 struct m_tag *tag; 463 464 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 465 if (tag) { 466 p = *(void**)(tag+1); 467 m_tag_delete(m, tag); 468 } 469 return p; 470} 471