1/* $NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $ */ 2/* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */ 3 4/* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32/* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * RFC3056 6to4 tunnel 43 * isatap tunnel 44 * mobile-ip6 (uses RFC2473) 45 * Here's a list of protocol that want protocol #4: 46 * RFC1853 IPv4-in-IPv4 tunnelling 47 * RFC2003 IPv4 encapsulation within IPv4 48 * RFC2344 reverse tunnelling for mobile-ip4 49 * RFC2401 IPsec tunnel 50 * Well, what can I say. They impose different en/decapsulation mechanism 51 * from each other, so they need separate protocol handler. The only one 52 * we can easily determine by protocol # is IPsec, which always has 53 * AH/ESP/IPComp header right after outer IP header. 54 * 55 * So, clearly good old protosw does not work for protocol #4 and #41. 56 * The code will let you match protocol via src/dst address pair. 57 */ 58/* XXX is M_NETADDR correct? */ 59 60#include <sys/cdefs.h> 61__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.77 2022/12/07 08:33:02 knakahara Exp $"); 62 63#ifdef _KERNEL_OPT 64#include "opt_mrouting.h" 65#include "opt_inet.h" 66#include "opt_net_mpsafe.h" 67#endif 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/socket.h> 72#include <sys/socketvar.h> /* for softnet_lock */ 73#include <sys/sockio.h> 74#include <sys/mbuf.h> 75#include <sys/errno.h> 76#include <sys/queue.h> 77#include <sys/kmem.h> 78#include <sys/mutex.h> 79#include <sys/condvar.h> 80#include <sys/psref.h> 81#include <sys/pslist.h> 82#include <sys/thmap.h> 83 84#include <net/if.h> 85 86#include <netinet/in.h> 87#include <netinet/in_systm.h> 88#include <netinet/ip.h> 89#include <netinet/ip_var.h> 90#include <netinet/ip_encap.h> 91#ifdef MROUTING 92#include <netinet/ip_mroute.h> 93#endif /* MROUTING */ 94 95#ifdef INET6 96#include <netinet/ip6.h> 97#include <netinet6/ip6_var.h> 98#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */ 99#include <netinet6/in6_var.h> 100#include <netinet6/in6_pcb.h> 101#include <netinet/icmp6.h> 102#endif 103 104#ifdef NET_MPSAFE 105#define ENCAP_MPSAFE 1 106#endif 107 108enum direction { INBOUND, OUTBOUND }; 109 110#ifdef INET 111static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction, 112 struct psref *); 113#endif 114#ifdef INET6 115static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction, 116 struct psref *); 117#endif 118static int encap_add(struct encaptab *); 119static int encap_remove(struct encaptab *); 120static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *); 121static void encap_key_init(struct encap_key *, const struct sockaddr *, 122 const struct sockaddr *); 123static void encap_key_inc(struct encap_key *); 124 125/* 126 * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking 127 * encap_table. So, it cannot use pserialize_read_enter() 128 */ 129static struct { 130 struct pslist_head list; 131 pserialize_t psz; 132 struct psref_class *elem_class; /* for the element of et_list */ 133} encaptab __cacheline_aligned = { 134 .list = PSLIST_INITIALIZER, 135}; 136#define encap_table encaptab.list 137 138static struct { 139 kmutex_t lock; 140 kcondvar_t cv; 141 struct lwp *busy; 142} encap_whole __cacheline_aligned; 143 144static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */ 145 146static bool encap_initialized = false; 147/* 148 * must be done before other encap interfaces initialization. 149 */ 150void 151encapinit(void) 152{ 153 154 if (encap_initialized) 155 return; 156 157 encaptab.psz = pserialize_create(); 158 encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET); 159 160 mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE); 161 cv_init(&encap_whole.cv, "ip_encap cv"); 162 encap_whole.busy = NULL; 163 164 encap_initialized = true; 165} 166 167void 168encap_init(void) 169{ 170 static int initialized = 0; 171 172 if (initialized) 173 return; 174 initialized++; 175#if 0 176 /* 177 * we cannot use LIST_INIT() here, since drivers may want to call 178 * encap_attach(), on driver attach. encap_init() will be called 179 * on AF_INET{,6} initialization, which happens after driver 180 * initialization - using LIST_INIT() here can nuke encap_attach() 181 * from drivers. 182 */ 183 PSLIST_INIT(&encap_table); 184#endif 185 186 encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY); 187#ifdef INET6 188 encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY); 189#endif 190} 191 192#ifdef INET 193static struct encaptab * 194encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir, 195 struct psref *match_psref) 196{ 197 struct ip *ip; 198 struct ip_pack4 pack; 199 struct encaptab *ep, *match; 200 int prio, matchprio; 201 int s; 202 thmap_t *emap = encap_map[0]; 203 struct encap_key key; 204 205 KASSERT(m->m_len >= sizeof(*ip)); 206 207 ip = mtod(m, struct ip *); 208 209 memset(&pack, 0, sizeof(pack)); 210 pack.p.sp_len = sizeof(pack); 211 pack.mine.sin_family = pack.yours.sin_family = AF_INET; 212 pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in); 213 if (dir == INBOUND) { 214 pack.mine.sin_addr = ip->ip_dst; 215 pack.yours.sin_addr = ip->ip_src; 216 } else { 217 pack.mine.sin_addr = ip->ip_src; 218 pack.yours.sin_addr = ip->ip_dst; 219 } 220 221 match = NULL; 222 matchprio = 0; 223 224 s = pserialize_read_enter(); 225 226 encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours)); 227 while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { 228 struct psref elem_psref; 229 230 KASSERT(ep->af == AF_INET); 231 232 if (ep->proto >= 0 && ep->proto != proto) { 233 encap_key_inc(&key); 234 continue; 235 } 236 237 psref_acquire(&elem_psref, &ep->psref, 238 encaptab.elem_class); 239 if (ep->func) { 240 pserialize_read_exit(s); 241 prio = (*ep->func)(m, off, proto, ep->arg); 242 s = pserialize_read_enter(); 243 } else { 244 prio = pack.mine.sin_len + pack.yours.sin_len; 245 } 246 247 if (prio <= 0) { 248 psref_release(&elem_psref, &ep->psref, 249 encaptab.elem_class); 250 encap_key_inc(&key); 251 continue; 252 } 253 if (prio > matchprio) { 254 /* release last matched ep */ 255 if (match != NULL) 256 psref_release(match_psref, &match->psref, 257 encaptab.elem_class); 258 259 psref_copy(match_psref, &elem_psref, 260 encaptab.elem_class); 261 matchprio = prio; 262 match = ep; 263 } 264 265 psref_release(&elem_psref, &ep->psref, 266 encaptab.elem_class); 267 encap_key_inc(&key); 268 } 269 270 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 271 struct psref elem_psref; 272 273 if (ep->af != AF_INET) 274 continue; 275 if (ep->proto >= 0 && ep->proto != proto) 276 continue; 277 278 psref_acquire(&elem_psref, &ep->psref, 279 encaptab.elem_class); 280 pserialize_read_exit(s); 281 /* ep->func is sleepable. e.g. rtalloc1 */ 282 prio = (*ep->func)(m, off, proto, ep->arg); 283 s = pserialize_read_enter(); 284 285 /* 286 * We prioritize the matches by using bit length of the 287 * matches. user-supplied matching function 288 * should return the bit length of the matches (for example, 289 * if both src/dst are matched for IPv4, 64 should be returned). 290 * 0 or negative return value means "it did not match". 291 * 292 * We need to loop through all the possible candidates 293 * to get the best match - the search takes O(n) for 294 * n attachments (i.e. interfaces). 295 */ 296 if (prio <= 0) { 297 psref_release(&elem_psref, &ep->psref, 298 encaptab.elem_class); 299 continue; 300 } 301 if (prio > matchprio) { 302 /* release last matched ep */ 303 if (match != NULL) 304 psref_release(match_psref, &match->psref, 305 encaptab.elem_class); 306 307 psref_copy(match_psref, &elem_psref, 308 encaptab.elem_class); 309 matchprio = prio; 310 match = ep; 311 } 312 KASSERTMSG((match == NULL) || psref_held(&match->psref, 313 encaptab.elem_class), 314 "current match = %p, but not hold its psref", match); 315 316 psref_release(&elem_psref, &ep->psref, 317 encaptab.elem_class); 318 } 319 pserialize_read_exit(s); 320 321 return match; 322} 323 324void 325encap4_input(struct mbuf *m, int off, int proto) 326{ 327 const struct encapsw *esw; 328 struct encaptab *match; 329 struct psref match_psref; 330 331 match = encap4_lookup(m, off, proto, INBOUND, &match_psref); 332 if (match) { 333 /* found a match, "match" has the best one */ 334 esw = match->esw; 335 if (esw && esw->encapsw4.pr_input) { 336 (*esw->encapsw4.pr_input)(m, off, proto, match->arg); 337 psref_release(&match_psref, &match->psref, 338 encaptab.elem_class); 339 } else { 340 psref_release(&match_psref, &match->psref, 341 encaptab.elem_class); 342 m_freem(m); 343 } 344 return; 345 } 346 347 /* last resort: inject to raw socket */ 348 SOFTNET_LOCK_IF_NET_MPSAFE(); 349 rip_input(m, off, proto); 350 SOFTNET_UNLOCK_IF_NET_MPSAFE(); 351} 352#endif 353 354#ifdef INET6 355static struct encaptab * 356encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir, 357 struct psref *match_psref) 358{ 359 struct ip6_hdr *ip6; 360 struct ip_pack6 pack; 361 int prio, matchprio; 362 int s; 363 struct encaptab *ep, *match; 364 thmap_t *emap = encap_map[1]; 365 struct encap_key key; 366 367 KASSERT(m->m_len >= sizeof(*ip6)); 368 369 ip6 = mtod(m, struct ip6_hdr *); 370 371 memset(&pack, 0, sizeof(pack)); 372 pack.p.sp_len = sizeof(pack); 373 pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6; 374 pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6); 375 if (dir == INBOUND) { 376 pack.mine.sin6_addr = ip6->ip6_dst; 377 pack.yours.sin6_addr = ip6->ip6_src; 378 } else { 379 pack.mine.sin6_addr = ip6->ip6_src; 380 pack.yours.sin6_addr = ip6->ip6_dst; 381 } 382 383 match = NULL; 384 matchprio = 0; 385 386 s = pserialize_read_enter(); 387 388 encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours)); 389 while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { 390 struct psref elem_psref; 391 392 KASSERT(ep->af == AF_INET6); 393 394 if (ep->proto >= 0 && ep->proto != proto) { 395 encap_key_inc(&key); 396 continue; 397 } 398 399 psref_acquire(&elem_psref, &ep->psref, 400 encaptab.elem_class); 401 if (ep->func) { 402 pserialize_read_exit(s); 403 prio = (*ep->func)(m, off, proto, ep->arg); 404 s = pserialize_read_enter(); 405 } else { 406 prio = pack.mine.sin6_len + pack.yours.sin6_len; 407 } 408 409 if (prio <= 0) { 410 psref_release(&elem_psref, &ep->psref, 411 encaptab.elem_class); 412 encap_key_inc(&key); 413 continue; 414 } 415 if (prio > matchprio) { 416 /* release last matched ep */ 417 if (match != NULL) 418 psref_release(match_psref, &match->psref, 419 encaptab.elem_class); 420 421 psref_copy(match_psref, &elem_psref, 422 encaptab.elem_class); 423 matchprio = prio; 424 match = ep; 425 } 426 psref_release(&elem_psref, &ep->psref, 427 encaptab.elem_class); 428 encap_key_inc(&key); 429 } 430 431 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 432 struct psref elem_psref; 433 434 if (ep->af != AF_INET6) 435 continue; 436 if (ep->proto >= 0 && ep->proto != proto) 437 continue; 438 439 psref_acquire(&elem_psref, &ep->psref, 440 encaptab.elem_class); 441 442 pserialize_read_exit(s); 443 /* ep->func is sleepable. e.g. rtalloc1 */ 444 prio = (*ep->func)(m, off, proto, ep->arg); 445 s = pserialize_read_enter(); 446 447 /* see encap4_lookup() for issues here */ 448 if (prio <= 0) { 449 psref_release(&elem_psref, &ep->psref, 450 encaptab.elem_class); 451 continue; 452 } 453 if (prio > matchprio) { 454 /* release last matched ep */ 455 if (match != NULL) 456 psref_release(match_psref, &match->psref, 457 encaptab.elem_class); 458 459 psref_copy(match_psref, &elem_psref, 460 encaptab.elem_class); 461 matchprio = prio; 462 match = ep; 463 } 464 KASSERTMSG((match == NULL) || psref_held(&match->psref, 465 encaptab.elem_class), 466 "current match = %p, but not hold its psref", match); 467 468 psref_release(&elem_psref, &ep->psref, 469 encaptab.elem_class); 470 } 471 pserialize_read_exit(s); 472 473 return match; 474} 475 476int 477encap6_input(struct mbuf **mp, int *offp, int proto) 478{ 479 struct mbuf *m = *mp; 480 const struct encapsw *esw; 481 struct encaptab *match; 482 struct psref match_psref; 483 int rv; 484 485 match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref); 486 487 if (match) { 488 /* found a match */ 489 esw = match->esw; 490 if (esw && esw->encapsw6.pr_input) { 491 int ret; 492 ret = (*esw->encapsw6.pr_input)(mp, offp, proto, 493 match->arg); 494 psref_release(&match_psref, &match->psref, 495 encaptab.elem_class); 496 return ret; 497 } else { 498 psref_release(&match_psref, &match->psref, 499 encaptab.elem_class); 500 m_freem(m); 501 return IPPROTO_DONE; 502 } 503 } 504 505 /* last resort: inject to raw socket */ 506 SOFTNET_LOCK_IF_NET_MPSAFE(); 507 rv = rip6_input(mp, offp, proto); 508 SOFTNET_UNLOCK_IF_NET_MPSAFE(); 509 return rv; 510} 511#endif 512 513static int 514encap_add(struct encaptab *ep) 515{ 516 517 KASSERT(encap_lock_held()); 518 519 PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain); 520 521 return 0; 522} 523 524static int 525encap_remove(struct encaptab *ep) 526{ 527 int error = 0; 528 529 KASSERT(encap_lock_held()); 530 531 PSLIST_WRITER_REMOVE(ep, chain); 532 533 return error; 534} 535 536static void 537encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp) 538{ 539 540 KASSERT(sp != NULL && dp != NULL); 541 KASSERT(sp->sa_len == dp->sa_len); 542 KASSERT(af == sp->sa_family && af == dp->sa_family); 543 544 socklen_t len __diagused = sockaddr_getsize_by_family(af); 545 KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len); 546} 547 548const struct encaptab * 549encap_attach_func(int af, int proto, 550 encap_priofunc_t *func, 551 const struct encapsw *esw, void *arg) 552{ 553 struct encaptab *ep; 554 int error; 555#ifndef ENCAP_MPSAFE 556 int s; 557 558 s = splsoftnet(); 559#endif 560 561 ASSERT_SLEEPABLE(); 562 563 /* sanity check on args */ 564 KASSERT(func != NULL); 565 KASSERT(af == AF_INET 566#ifdef INET6 567 || af == AF_INET6 568#endif 569 ); 570 571 ep = kmem_alloc(sizeof(*ep), KM_SLEEP); 572 memset(ep, 0, sizeof(*ep)); 573 574 ep->af = af; 575 ep->proto = proto; 576 ep->func = func; 577 ep->esw = esw; 578 ep->arg = arg; 579 psref_target_init(&ep->psref, encaptab.elem_class); 580 581 error = encap_add(ep); 582 if (error) 583 goto gc; 584 585 error = 0; 586#ifndef ENCAP_MPSAFE 587 splx(s); 588#endif 589 return ep; 590 591gc: 592 kmem_free(ep, sizeof(*ep)); 593#ifndef ENCAP_MPSAFE 594 splx(s); 595#endif 596 return NULL; 597} 598 599static void 600encap_key_init(struct encap_key *key, 601 const struct sockaddr *local, const struct sockaddr *remote) 602{ 603 604 memset(key, 0, sizeof(*key)); 605 606 sockaddr_copy(&key->local_sa, sizeof(key->local_u), local); 607 sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote); 608} 609 610static void 611encap_key_inc(struct encap_key *key) 612{ 613 614 (key->seq)++; 615} 616 617static void 618encap_key_dec(struct encap_key *key) 619{ 620 621 (key->seq)--; 622} 623 624static void 625encap_key_copy(struct encap_key *dst, const struct encap_key *src) 626{ 627 628 memset(dst, 0, sizeof(*dst)); 629 *dst = *src; 630} 631 632/* 633 * src is always my side, and dst is always remote side. 634 * Return value will be necessary as input (cookie) for encap_detach(). 635 */ 636const struct encaptab * 637encap_attach_addr(int af, int proto, 638 const struct sockaddr *src, const struct sockaddr *dst, 639 encap_priofunc_t *func, 640 const struct encapsw *esw, void *arg) 641{ 642 struct encaptab *ep; 643 size_t l; 644 thmap_t *emap; 645 void *retep; 646 struct ip_pack4 *pack4; 647#ifdef INET6 648 struct ip_pack6 *pack6; 649#endif 650 651 ASSERT_SLEEPABLE(); 652 653 encap_afcheck(af, src, dst); 654 655 switch (af) { 656 case AF_INET: 657 l = sizeof(*pack4); 658 emap = encap_map[0]; 659 break; 660#ifdef INET6 661 case AF_INET6: 662 l = sizeof(*pack6); 663 emap = encap_map[1]; 664 break; 665#endif 666 default: 667 return NULL; 668 } 669 670 ep = kmem_zalloc(sizeof(*ep), KM_SLEEP); 671 ep->addrpack = kmem_zalloc(l, KM_SLEEP); 672 ep->addrpack->sa_len = l & 0xff; 673 ep->af = af; 674 ep->proto = proto; 675 ep->flag = IP_ENCAP_ADDR_ENABLE; 676 switch (af) { 677 case AF_INET: 678 pack4 = (struct ip_pack4 *)ep->addrpack; 679 ep->src = (struct sockaddr *)&pack4->mine; 680 ep->dst = (struct sockaddr *)&pack4->yours; 681 break; 682#ifdef INET6 683 case AF_INET6: 684 pack6 = (struct ip_pack6 *)ep->addrpack; 685 ep->src = (struct sockaddr *)&pack6->mine; 686 ep->dst = (struct sockaddr *)&pack6->yours; 687 break; 688#endif 689 } 690 memcpy(ep->src, src, src->sa_len); 691 memcpy(ep->dst, dst, dst->sa_len); 692 ep->esw = esw; 693 ep->arg = arg; 694 ep->func = func; 695 psref_target_init(&ep->psref, encaptab.elem_class); 696 697 encap_key_init(&ep->key, src, dst); 698 while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep) 699 encap_key_inc(&ep->key); 700 return ep; 701} 702 703 704/* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */ 705 706#ifdef INET6 707void * 708encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0) 709{ 710 void *d = d0; 711 struct ip6_hdr *ip6; 712 struct mbuf *m; 713 int off; 714 struct ip6ctlparam *ip6cp = NULL; 715 int nxt; 716 int s; 717 struct encaptab *ep; 718 const struct encapsw *esw; 719 720 if (sa->sa_family != AF_INET6 || 721 sa->sa_len != sizeof(struct sockaddr_in6)) 722 return NULL; 723 724 if ((unsigned)cmd >= PRC_NCMDS) 725 return NULL; 726 if (cmd == PRC_HOSTDEAD) 727 d = NULL; 728 else if (cmd == PRC_MSGSIZE) 729 ; /* special code is present, see below */ 730 else if (inet6ctlerrmap[cmd] == 0) 731 return NULL; 732 733 /* if the parameter is from icmp6, decode it. */ 734 if (d != NULL) { 735 ip6cp = (struct ip6ctlparam *)d; 736 m = ip6cp->ip6c_m; 737 ip6 = ip6cp->ip6c_ip6; 738 off = ip6cp->ip6c_off; 739 nxt = ip6cp->ip6c_nxt; 740 741 if (ip6 && cmd == PRC_MSGSIZE) { 742 int valid = 0; 743 struct encaptab *match; 744 struct psref elem_psref; 745 746 /* 747 * Check to see if we have a valid encap configuration. 748 */ 749 match = encap6_lookup(m, off, nxt, OUTBOUND, 750 &elem_psref); 751 if (match) { 752 valid++; 753 psref_release(&elem_psref, &match->psref, 754 encaptab.elem_class); 755 } 756 757 /* 758 * Depending on the value of "valid" and routing table 759 * size (mtudisc_{hi,lo}wat), we will: 760 * - recalcurate the new MTU and create the 761 * corresponding routing entry, or 762 * - ignore the MTU change notification. 763 */ 764 icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); 765 } 766 } else { 767 m = NULL; 768 ip6 = NULL; 769 nxt = -1; 770 } 771 772 /* inform all listeners */ 773 774 s = pserialize_read_enter(); 775 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { 776 struct psref elem_psref; 777 778 if (ep->af != AF_INET6) 779 continue; 780 if (ep->proto >= 0 && ep->proto != nxt) 781 continue; 782 783 /* should optimize by looking at address pairs */ 784 785 /* XXX need to pass ep->arg or ep itself to listeners */ 786 psref_acquire(&elem_psref, &ep->psref, 787 encaptab.elem_class); 788 esw = ep->esw; 789 if (esw && esw->encapsw6.pr_ctlinput) { 790 pserialize_read_exit(s); 791 /* pr_ctlinput is sleepable. e.g. rtcache_free */ 792 (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg); 793 s = pserialize_read_enter(); 794 } 795 psref_release(&elem_psref, &ep->psref, 796 encaptab.elem_class); 797 } 798 pserialize_read_exit(s); 799 800 rip6_ctlinput(cmd, sa, d0); 801 return NULL; 802} 803#endif 804 805static int 806encap_detach_addr(const struct encaptab *ep) 807{ 808 thmap_t *emap; 809 struct encaptab *retep; 810 struct encaptab *target; 811 void *thgc; 812 struct encap_key key; 813 814 KASSERT(encap_lock_held()); 815 KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE); 816 817 switch (ep->af) { 818 case AF_INET: 819 emap = encap_map[0]; 820 break; 821#ifdef INET6 822 case AF_INET6: 823 emap = encap_map[1]; 824 break; 825#endif 826 default: 827 return EINVAL; 828 } 829 830 retep = thmap_del(emap, &ep->key, sizeof(ep->key)); 831 if (retep != ep) { 832 return ENOENT; 833 } 834 target = retep; 835 836 /* 837 * To keep continuity, decrement seq after detached encaptab. 838 */ 839 encap_key_copy(&key, &ep->key); 840 encap_key_inc(&key); 841 while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) { 842 void *pp; 843 844 encap_key_dec(&retep->key); 845 pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep); 846 KASSERT(retep == pp); 847 848 encap_key_inc(&key); 849 } 850 851 thgc = thmap_stage_gc(emap); 852 pserialize_perform(encaptab.psz); 853 thmap_gc(emap, thgc); 854 psref_target_destroy(&target->psref, encaptab.elem_class); 855 kmem_free(target->addrpack, target->addrpack->sa_len); 856 kmem_free(target, sizeof(*target)); 857 858 return 0; 859} 860 861int 862encap_detach(const struct encaptab *cookie) 863{ 864 const struct encaptab *ep = cookie; 865 struct encaptab *p; 866 int error; 867 868 KASSERT(encap_lock_held()); 869 870 if (ep->flag & IP_ENCAP_ADDR_ENABLE) 871 return encap_detach_addr(ep); 872 873 PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) { 874 if (p == ep) { 875 error = encap_remove(p); 876 if (error) 877 return error; 878 else 879 break; 880 } 881 } 882 if (p == NULL) 883 return ENOENT; 884 885 pserialize_perform(encaptab.psz); 886 psref_target_destroy(&p->psref, 887 encaptab.elem_class); 888 kmem_free(p, sizeof(*p)); 889 890 return 0; 891} 892 893int 894encap_lock_enter(void) 895{ 896 int error; 897 898 mutex_enter(&encap_whole.lock); 899 while (encap_whole.busy != NULL) { 900 error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock); 901 if (error) { 902 mutex_exit(&encap_whole.lock); 903 return error; 904 } 905 } 906 KASSERT(encap_whole.busy == NULL); 907 encap_whole.busy = curlwp; 908 mutex_exit(&encap_whole.lock); 909 910 return 0; 911} 912 913void 914encap_lock_exit(void) 915{ 916 917 mutex_enter(&encap_whole.lock); 918 KASSERT(encap_whole.busy == curlwp); 919 encap_whole.busy = NULL; 920 cv_broadcast(&encap_whole.cv); 921 mutex_exit(&encap_whole.lock); 922} 923 924bool 925encap_lock_held(void) 926{ 927 928 return (encap_whole.busy == curlwp); 929} 930