in6_src.c revision 148417
1/* $FreeBSD: head/sys/netinet6/in6_src.c 148417 2005-07-26 11:46:15Z ume $ */ 2/* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ 3 4/*- 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33/*- 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 4. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 62 */ 63 64#include "opt_inet.h" 65#include "opt_inet6.h" 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/malloc.h> 70#include <sys/mbuf.h> 71#include <sys/protosw.h> 72#include <sys/socket.h> 73#include <sys/socketvar.h> 74#include <sys/sockio.h> 75#include <sys/sysctl.h> 76#include <sys/errno.h> 77#include <sys/time.h> 78#include <sys/kernel.h> 79 80#include <net/if.h> 81#include <net/route.h> 82 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85#include <netinet/in_systm.h> 86#include <netinet/ip.h> 87#include <netinet/in_pcb.h> 88#include <netinet6/in6_var.h> 89#include <netinet/ip6.h> 90#include <netinet6/in6_pcb.h> 91#include <netinet6/ip6_var.h> 92#include <netinet6/scope6_var.h> 93#include <netinet6/nd6.h> 94 95#include <net/net_osdep.h> 96 97static struct mtx addrsel_lock; 98#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 99#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 100#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 101#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 102 103#define ADDR_LABEL_NOTAPP (-1) 104struct in6_addrpolicy defaultaddrpolicy; 105 106int ip6_prefer_tempaddr = 0; 107 108static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 109 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 110 struct rtentry **, int, int)); 111static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 112 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); 113 114static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 115 116static void init_policy_queue __P((void)); 117static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 118static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 119static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 120 void *)); 121static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 122static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 123 124/* 125 * Return an IPv6 address, which is the most appropriate for a given 126 * destination and user specified options. 127 * If necessary, this function lookups the routing table and returns 128 * an entry to the caller for later use. 129 */ 130#define REPLACE(r) do {\ 131 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 132 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 133 ip6stat.ip6s_sources_rule[(r)]++; \ 134 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 135 goto replace; \ 136} while(0) 137#define NEXT(r) do {\ 138 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 139 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 140 ip6stat.ip6s_sources_rule[(r)]++; \ 141 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 142 goto next; /* XXX: we can't use 'continue' here */ \ 143} while(0) 144#define BREAK(r) do { \ 145 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 146 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 147 ip6stat.ip6s_sources_rule[(r)]++; \ 148 goto out; /* XXX: we can't use 'break' here */ \ 149} while(0) 150 151struct in6_addr * 152in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp) 153 struct sockaddr_in6 *dstsock; 154 struct ip6_pktopts *opts; 155 struct ip6_moptions *mopts; 156 struct route_in6 *ro; 157 struct in6_addr *laddr; 158 struct ifnet **ifpp; 159 int *errorp; 160{ 161 struct in6_addr dst; 162 struct ifnet *ifp = NULL; 163 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 164 struct in6_pktinfo *pi = NULL; 165 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 166 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 167 u_int32_t odstzone; 168 int prefer_tempaddr; 169 170 dst = dstsock->sin6_addr; /* make a copy for local operation */ 171 *errorp = 0; 172 if (ifpp) 173 *ifpp = NULL; 174 175 /* 176 * If the source address is explicitly specified by the caller, 177 * check if the requested source address is indeed a unicast address 178 * assigned to the node, and can be used as the packet's source 179 * address. If everything is okay, use the address as source. 180 */ 181 if (opts && (pi = opts->ip6po_pktinfo) && 182 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 183 struct sockaddr_in6 srcsock; 184 struct in6_ifaddr *ia6; 185 186 /* get the outgoing interface */ 187 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 188 != 0) { 189 return (NULL); 190 } 191 192 /* 193 * determine the appropriate zone id of the source based on 194 * the zone of the destination and the outgoing interface. 195 * If the specified address is ambiguous wrt the scope zone, 196 * the interface must be specified; otherwise, ifa_ifwithaddr() 197 * will fail matching the address. 198 */ 199 bzero(&srcsock, sizeof(srcsock)); 200 srcsock.sin6_family = AF_INET6; 201 srcsock.sin6_len = sizeof(srcsock); 202 srcsock.sin6_addr = pi->ipi6_addr; 203 if (ifp) { 204 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 205 if (*errorp != 0) 206 return (NULL); 207 } 208 209 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 210 if (ia6 == NULL || 211 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 212 *errorp = EADDRNOTAVAIL; 213 return (NULL); 214 } 215 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 216 if (ifpp) 217 *ifpp = ifp; 218 return (&ia6->ia_addr.sin6_addr); 219 } 220 221 /* 222 * Otherwise, if the socket has already bound the source, just use it. 223 */ 224 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 225 return (laddr); 226 227 /* 228 * If the address is not specified, choose the best one based on 229 * the outgoing interface and the destination address. 230 */ 231 /* get the outgoing interface */ 232 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 233 return (NULL); 234 235#ifdef DIAGNOSTIC 236 if (ifp == NULL) /* this should not happen */ 237 panic("in6_selectsrc: NULL ifp"); 238#endif 239 *errorp = in6_setscope(&dst, ifp, &odstzone); 240 if (*errorp != 0) 241 return (NULL); 242 243 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 244 int new_scope = -1, new_matchlen = -1; 245 struct in6_addrpolicy *new_policy = NULL; 246 u_int32_t srczone, osrczone, dstzone; 247 struct in6_addr src; 248 struct ifnet *ifp1 = ia->ia_ifp; 249 250 /* 251 * We'll never take an address that breaks the scope zone 252 * of the destination. We also skip an address if its zone 253 * does not contain the outgoing interface. 254 * XXX: we should probably use sin6_scope_id here. 255 */ 256 if (in6_setscope(&dst, ifp1, &dstzone) || 257 odstzone != dstzone) { 258 continue; 259 } 260 src = ia->ia_addr.sin6_addr; 261 if (in6_setscope(&src, ifp, &osrczone) || 262 in6_setscope(&src, ifp1, &srczone) || 263 osrczone != srczone) { 264 continue; 265 } 266 267 /* avoid unusable addresses */ 268 if ((ia->ia6_flags & 269 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 270 continue; 271 } 272 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 273 continue; 274 275 /* Rule 1: Prefer same address */ 276 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 277 ia_best = ia; 278 BREAK(1); /* there should be no better candidate */ 279 } 280 281 if (ia_best == NULL) 282 REPLACE(0); 283 284 /* Rule 2: Prefer appropriate scope */ 285 if (dst_scope < 0) 286 dst_scope = in6_addrscope(&dst); 287 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 288 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 289 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 290 REPLACE(2); 291 NEXT(2); 292 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 293 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 294 NEXT(2); 295 REPLACE(2); 296 } 297 298 /* 299 * Rule 3: Avoid deprecated addresses. Note that the case of 300 * !ip6_use_deprecated is already rejected above. 301 */ 302 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 303 NEXT(3); 304 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 305 REPLACE(3); 306 307 /* Rule 4: Prefer home addresses */ 308 /* 309 * XXX: This is a TODO. We should probably merge the MIP6 310 * case above. 311 */ 312 313 /* Rule 5: Prefer outgoing interface */ 314 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 315 NEXT(5); 316 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 317 REPLACE(5); 318 319 /* 320 * Rule 6: Prefer matching label 321 * Note that best_policy should be non-NULL here. 322 */ 323 if (dst_policy == NULL) 324 dst_policy = lookup_addrsel_policy(dstsock); 325 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 326 new_policy = lookup_addrsel_policy(&ia->ia_addr); 327 if (dst_policy->label == best_policy->label && 328 dst_policy->label != new_policy->label) 329 NEXT(6); 330 if (dst_policy->label != best_policy->label && 331 dst_policy->label == new_policy->label) 332 REPLACE(6); 333 } 334 335 /* 336 * Rule 7: Prefer public addresses. 337 * We allow users to reverse the logic by configuring 338 * a sysctl variable, so that privacy conscious users can 339 * always prefer temporary addresses. 340 */ 341 if (opts == NULL || 342 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 343 prefer_tempaddr = ip6_prefer_tempaddr; 344 } else if (opts->ip6po_prefer_tempaddr == 345 IP6PO_TEMPADDR_NOTPREFER) { 346 prefer_tempaddr = 0; 347 } else 348 prefer_tempaddr = 1; 349 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 350 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 351 if (prefer_tempaddr) 352 REPLACE(7); 353 else 354 NEXT(7); 355 } 356 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 357 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 358 if (prefer_tempaddr) 359 NEXT(7); 360 else 361 REPLACE(7); 362 } 363 364 /* 365 * Rule 8: prefer addresses on alive interfaces. 366 * This is a KAME specific rule. 367 */ 368 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 369 !(ia->ia_ifp->if_flags & IFF_UP)) 370 NEXT(8); 371 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 372 (ia->ia_ifp->if_flags & IFF_UP)) 373 REPLACE(8); 374 375 /* 376 * Rule 14: Use longest matching prefix. 377 * Note: in the address selection draft, this rule is 378 * documented as "Rule 8". However, since it is also 379 * documented that this rule can be overridden, we assign 380 * a large number so that it is easy to assign smaller numbers 381 * to more preferred rules. 382 */ 383 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 384 if (best_matchlen < new_matchlen) 385 REPLACE(14); 386 if (new_matchlen < best_matchlen) 387 NEXT(14); 388 389 /* Rule 15 is reserved. */ 390 391 /* 392 * Last resort: just keep the current candidate. 393 * Or, do we need more rules? 394 */ 395 continue; 396 397 replace: 398 ia_best = ia; 399 best_scope = (new_scope >= 0 ? new_scope : 400 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 401 best_policy = (new_policy ? new_policy : 402 lookup_addrsel_policy(&ia_best->ia_addr)); 403 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 404 in6_matchlen(&ia_best->ia_addr.sin6_addr, 405 &dst)); 406 407 next: 408 continue; 409 410 out: 411 break; 412 } 413 414 if ((ia = ia_best) == NULL) { 415 *errorp = EADDRNOTAVAIL; 416 return (NULL); 417 } 418 419 if (ifpp) 420 *ifpp = ifp; 421 422 return (&ia->ia_addr.sin6_addr); 423} 424 425static int 426selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok) 427 struct sockaddr_in6 *dstsock; 428 struct ip6_pktopts *opts; 429 struct ip6_moptions *mopts; 430 struct route_in6 *ro; 431 struct ifnet **retifp; 432 struct rtentry **retrt; 433 int clone; /* meaningful only for bsdi and freebsd. */ 434 int norouteok; 435{ 436 int error = 0; 437 struct ifnet *ifp = NULL; 438 struct rtentry *rt = NULL; 439 struct sockaddr_in6 *sin6_next; 440 struct in6_pktinfo *pi = NULL; 441 struct in6_addr *dst = &dstsock->sin6_addr; 442 443#if 0 444 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 445 dstsock->sin6_addr.s6_addr32[1] == 0 && 446 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 447 printf("in6_selectroute: strange destination %s\n", 448 ip6_sprintf(&dstsock->sin6_addr)); 449 } else { 450 printf("in6_selectroute: destination = %s%%%d\n", 451 ip6_sprintf(&dstsock->sin6_addr), 452 dstsock->sin6_scope_id); /* for debug */ 453 } 454#endif 455 456 /* If the caller specify the outgoing interface explicitly, use it. */ 457 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 458 /* XXX boundary check is assumed to be already done. */ 459 ifp = ifnet_byindex(pi->ipi6_ifindex); 460 if (ifp != NULL && 461 (norouteok || retrt == NULL || 462 IN6_IS_ADDR_MULTICAST(dst))) { 463 /* 464 * we do not have to check nor get the route for 465 * multicast. 466 */ 467 goto done; 468 } else 469 goto getroute; 470 } 471 472 /* 473 * If the destination address is a multicast address and the outgoing 474 * interface for the address is specified by the caller, use it. 475 */ 476 if (IN6_IS_ADDR_MULTICAST(dst) && 477 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 478 goto done; /* we do not need a route for multicast. */ 479 } 480 481 getroute: 482 /* 483 * If the next hop address for the packet is specified by the caller, 484 * use it as the gateway. 485 */ 486 if (opts && opts->ip6po_nexthop) { 487 struct route_in6 *ron; 488 489 sin6_next = satosin6(opts->ip6po_nexthop); 490 491 /* at this moment, we only support AF_INET6 next hops */ 492 if (sin6_next->sin6_family != AF_INET6) { 493 error = EAFNOSUPPORT; /* or should we proceed? */ 494 goto done; 495 } 496 497 /* 498 * If the next hop is an IPv6 address, then the node identified 499 * by that address must be a neighbor of the sending host. 500 */ 501 ron = &opts->ip6po_nextroute; 502 if ((ron->ro_rt && 503 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 504 (RTF_UP | RTF_LLINFO)) || 505 !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { 506 if (ron->ro_rt) { 507 RTFREE(ron->ro_rt); 508 ron->ro_rt = NULL; 509 } 510 *satosin6(&ron->ro_dst) = *sin6_next; 511 } 512 if (ron->ro_rt == NULL) { 513 rtalloc((struct route *)ron); /* multi path case? */ 514 if (ron->ro_rt == NULL || 515 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 516 if (ron->ro_rt) { 517 RTFREE(ron->ro_rt); 518 ron->ro_rt = NULL; 519 } 520 error = EHOSTUNREACH; 521 goto done; 522 } 523 } 524 rt = ron->ro_rt; 525 ifp = rt->rt_ifp; 526 527 /* 528 * When cloning is required, try to allocate a route to the 529 * destination so that the caller can store path MTU 530 * information. 531 */ 532 if (!clone) 533 goto done; 534 } 535 536 /* 537 * Use a cached route if it exists and is valid, else try to allocate 538 * a new one. Note that we should check the address family of the 539 * cached destination, in case of sharing the cache with IPv4. 540 */ 541 if (ro) { 542 if (ro->ro_rt && 543 (!(ro->ro_rt->rt_flags & RTF_UP) || 544 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 545 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 546 dst))) { 547 RTFREE(ro->ro_rt); 548 ro->ro_rt = (struct rtentry *)NULL; 549 } 550 if (ro->ro_rt == (struct rtentry *)NULL) { 551 struct sockaddr_in6 *sa6; 552 553 /* No route yet, so try to acquire one */ 554 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 555 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 556 *sa6 = *dstsock; 557 sa6->sin6_scope_id = 0; 558 559 if (clone) { 560 rtalloc((struct route *)ro); 561 } else { 562 ro->ro_rt = rtalloc1(&((struct route *)ro) 563 ->ro_dst, 0, 0UL); 564 if (ro->ro_rt) 565 RT_UNLOCK(ro->ro_rt); 566 } 567 } 568 569 /* 570 * do not care about the result if we have the nexthop 571 * explicitly specified. 572 */ 573 if (opts && opts->ip6po_nexthop) 574 goto done; 575 576 if (ro->ro_rt) { 577 ifp = ro->ro_rt->rt_ifp; 578 579 if (ifp == NULL) { /* can this really happen? */ 580 RTFREE(ro->ro_rt); 581 ro->ro_rt = NULL; 582 } 583 } 584 if (ro->ro_rt == NULL) 585 error = EHOSTUNREACH; 586 rt = ro->ro_rt; 587 588 /* 589 * Check if the outgoing interface conflicts with 590 * the interface specified by ipi6_ifindex (if specified). 591 * Note that loopback interface is always okay. 592 * (this may happen when we are sending a packet to one of 593 * our own addresses.) 594 */ 595 if (ifp && opts && opts->ip6po_pktinfo && 596 opts->ip6po_pktinfo->ipi6_ifindex) { 597 if (!(ifp->if_flags & IFF_LOOPBACK) && 598 ifp->if_index != 599 opts->ip6po_pktinfo->ipi6_ifindex) { 600 error = EHOSTUNREACH; 601 goto done; 602 } 603 } 604 } 605 606 done: 607 if (ifp == NULL && rt == NULL) { 608 /* 609 * This can happen if the caller did not pass a cached route 610 * nor any other hints. We treat this case an error. 611 */ 612 error = EHOSTUNREACH; 613 } 614 if (error == EHOSTUNREACH) 615 ip6stat.ip6s_noroute++; 616 617 if (retifp != NULL) 618 *retifp = ifp; 619 if (retrt != NULL) 620 *retrt = rt; /* rt may be NULL */ 621 622 return (error); 623} 624 625static int 626in6_selectif(dstsock, opts, mopts, ro, retifp) 627 struct sockaddr_in6 *dstsock; 628 struct ip6_pktopts *opts; 629 struct ip6_moptions *mopts; 630 struct route_in6 *ro; 631 struct ifnet **retifp; 632{ 633 int error; 634 struct route_in6 sro; 635 struct rtentry *rt = NULL; 636 637 if (ro == NULL) { 638 bzero(&sro, sizeof(sro)); 639 ro = &sro; 640 } 641 642 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 643 &rt, 0, 1)) != 0) { 644 if (rt && rt == sro.ro_rt) 645 RTFREE(rt); 646 return (error); 647 } 648 649 /* 650 * do not use a rejected or black hole route. 651 * XXX: this check should be done in the L2 output routine. 652 * However, if we skipped this check here, we'd see the following 653 * scenario: 654 * - install a rejected route for a scoped address prefix 655 * (like fe80::/10) 656 * - send a packet to a destination that matches the scoped prefix, 657 * with ambiguity about the scope zone. 658 * - pick the outgoing interface from the route, and disambiguate the 659 * scope zone with the interface. 660 * - ip6_output() would try to get another route with the "new" 661 * destination, which may be valid. 662 * - we'd see no error on output. 663 * Although this may not be very harmful, it should still be confusing. 664 * We thus reject the case here. 665 */ 666 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 667 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 668 669 if (rt && rt == sro.ro_rt) 670 RTFREE(rt); 671 return (flags); 672 } 673 674 /* 675 * Adjust the "outgoing" interface. If we're going to loop the packet 676 * back to ourselves, the ifp would be the loopback interface. 677 * However, we'd rather know the interface associated to the 678 * destination address (which should probably be one of our own 679 * addresses.) 680 */ 681 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 682 *retifp = rt->rt_ifa->ifa_ifp; 683 684 if (rt && rt == sro.ro_rt) 685 RTFREE(rt); 686 return (0); 687} 688 689int 690in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 691 struct sockaddr_in6 *dstsock; 692 struct ip6_pktopts *opts; 693 struct ip6_moptions *mopts; 694 struct route_in6 *ro; 695 struct ifnet **retifp; 696 struct rtentry **retrt; 697 int clone; /* meaningful only for bsdi and freebsd. */ 698{ 699 return (selectroute(dstsock, opts, mopts, ro, retifp, 700 retrt, clone, 0)); 701} 702 703/* 704 * Default hop limit selection. The precedence is as follows: 705 * 1. Hoplimit value specified via ioctl. 706 * 2. (If the outgoing interface is detected) the current 707 * hop limit of the interface specified by router advertisement. 708 * 3. The system default hoplimit. 709 */ 710int 711in6_selecthlim(in6p, ifp) 712 struct in6pcb *in6p; 713 struct ifnet *ifp; 714{ 715 if (in6p && in6p->in6p_hops >= 0) 716 return (in6p->in6p_hops); 717 else if (ifp) 718 return (ND_IFINFO(ifp)->chlim); 719 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 720 struct route_in6 ro6; 721 struct ifnet *lifp; 722 723 bzero(&ro6, sizeof(ro6)); 724 ro6.ro_dst.sin6_family = AF_INET6; 725 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 726 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 727 rtalloc((struct route *)&ro6); 728 if (ro6.ro_rt) { 729 lifp = ro6.ro_rt->rt_ifp; 730 RTFREE(ro6.ro_rt); 731 if (lifp) 732 return (ND_IFINFO(lifp)->chlim); 733 } else 734 return (ip6_defhlim); 735 } 736 return (ip6_defhlim); 737} 738 739/* 740 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 741 * share this function by all *bsd*... 742 */ 743int 744in6_pcbsetport(laddr, inp, cred) 745 struct in6_addr *laddr; 746 struct inpcb *inp; 747 struct ucred *cred; 748{ 749 struct socket *so = inp->inp_socket; 750 u_int16_t lport = 0, first, last, *lastport; 751 int count, error = 0, wild = 0; 752 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 753 754 /* XXX: this is redundant when called from in6_pcbbind */ 755 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 756 wild = INPLOOKUP_WILDCARD; 757 758 inp->inp_flags |= INP_ANONPORT; 759 760 if (inp->inp_flags & INP_HIGHPORT) { 761 first = ipport_hifirstauto; /* sysctl */ 762 last = ipport_hilastauto; 763 lastport = &pcbinfo->lasthi; 764 } else if (inp->inp_flags & INP_LOWPORT) { 765 if ((error = suser_cred(cred, 0))) 766 return error; 767 first = ipport_lowfirstauto; /* 1023 */ 768 last = ipport_lowlastauto; /* 600 */ 769 lastport = &pcbinfo->lastlow; 770 } else { 771 first = ipport_firstauto; /* sysctl */ 772 last = ipport_lastauto; 773 lastport = &pcbinfo->lastport; 774 } 775 /* 776 * Simple check to ensure all ports are not used up causing 777 * a deadlock here. 778 * 779 * We split the two cases (up and down) so that the direction 780 * is not being tested on each round of the loop. 781 */ 782 if (first > last) { 783 /* 784 * counting down 785 */ 786 count = first - last; 787 788 do { 789 if (count-- < 0) { /* completely used? */ 790 /* 791 * Undo any address bind that may have 792 * occurred above. 793 */ 794 inp->in6p_laddr = in6addr_any; 795 return (EAGAIN); 796 } 797 --*lastport; 798 if (*lastport > first || *lastport < last) 799 *lastport = first; 800 lport = htons(*lastport); 801 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 802 lport, wild)); 803 } else { 804 /* 805 * counting up 806 */ 807 count = last - first; 808 809 do { 810 if (count-- < 0) { /* completely used? */ 811 /* 812 * Undo any address bind that may have 813 * occurred above. 814 */ 815 inp->in6p_laddr = in6addr_any; 816 return (EAGAIN); 817 } 818 ++*lastport; 819 if (*lastport < first || *lastport > last) 820 *lastport = first; 821 lport = htons(*lastport); 822 } while (in6_pcblookup_local(pcbinfo, 823 &inp->in6p_laddr, lport, wild)); 824 } 825 826 inp->inp_lport = lport; 827 if (in_pcbinshash(inp) != 0) { 828 inp->in6p_laddr = in6addr_any; 829 inp->inp_lport = 0; 830 return (EAGAIN); 831 } 832 833 return (0); 834} 835 836void 837addrsel_policy_init() 838{ 839 ADDRSEL_LOCK_INIT(); 840 841 init_policy_queue(); 842 843 /* initialize the "last resort" policy */ 844 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 845 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 846} 847 848static struct in6_addrpolicy * 849lookup_addrsel_policy(key) 850 struct sockaddr_in6 *key; 851{ 852 struct in6_addrpolicy *match = NULL; 853 854 ADDRSEL_LOCK(); 855 match = match_addrsel_policy(key); 856 857 if (match == NULL) 858 match = &defaultaddrpolicy; 859 else 860 match->use++; 861 ADDRSEL_UNLOCK(); 862 863 return (match); 864} 865 866/* 867 * Subroutines to manage the address selection policy table via sysctl. 868 */ 869struct walkarg { 870 struct sysctl_req *w_req; 871}; 872 873static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 874SYSCTL_DECL(_net_inet6_ip6); 875SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 876 CTLFLAG_RD, in6_src_sysctl, ""); 877 878static int 879in6_src_sysctl(SYSCTL_HANDLER_ARGS) 880{ 881 struct walkarg w; 882 883 if (req->newptr) 884 return EPERM; 885 886 bzero(&w, sizeof(w)); 887 w.w_req = req; 888 889 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 890} 891 892int 893in6_src_ioctl(cmd, data) 894 u_long cmd; 895 caddr_t data; 896{ 897 int i; 898 struct in6_addrpolicy ent0; 899 900 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 901 return (EOPNOTSUPP); /* check for safety */ 902 903 ent0 = *(struct in6_addrpolicy *)data; 904 905 if (ent0.label == ADDR_LABEL_NOTAPP) 906 return (EINVAL); 907 /* check if the prefix mask is consecutive. */ 908 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 909 return (EINVAL); 910 /* clear trailing garbages (if any) of the prefix address. */ 911 for (i = 0; i < 4; i++) { 912 ent0.addr.sin6_addr.s6_addr32[i] &= 913 ent0.addrmask.sin6_addr.s6_addr32[i]; 914 } 915 ent0.use = 0; 916 917 switch (cmd) { 918 case SIOCAADDRCTL_POLICY: 919 return (add_addrsel_policyent(&ent0)); 920 case SIOCDADDRCTL_POLICY: 921 return (delete_addrsel_policyent(&ent0)); 922 } 923 924 return (0); /* XXX: compromise compilers */ 925} 926 927/* 928 * The followings are implementation of the policy table using a 929 * simple tail queue. 930 * XXX such details should be hidden. 931 * XXX implementation using binary tree should be more efficient. 932 */ 933struct addrsel_policyent { 934 TAILQ_ENTRY(addrsel_policyent) ape_entry; 935 struct in6_addrpolicy ape_policy; 936}; 937 938TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 939 940struct addrsel_policyhead addrsel_policytab; 941 942static void 943init_policy_queue() 944{ 945 TAILQ_INIT(&addrsel_policytab); 946} 947 948static int 949add_addrsel_policyent(newpolicy) 950 struct in6_addrpolicy *newpolicy; 951{ 952 struct addrsel_policyent *new, *pol; 953 954 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 955 M_WAITOK); 956 ADDRSEL_LOCK(); 957 958 /* duplication check */ 959 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 960 pol = TAILQ_NEXT(pol, ape_entry)) { 961 if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, 962 &pol->ape_policy.addr) && 963 SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, 964 &pol->ape_policy.addrmask)) { 965 ADDRSEL_UNLOCK(); 966 FREE(new, M_IFADDR); 967 return (EEXIST); /* or override it? */ 968 } 969 } 970 971 bzero(new, sizeof(*new)); 972 973 /* XXX: should validate entry */ 974 new->ape_policy = *newpolicy; 975 976 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 977 ADDRSEL_UNLOCK(); 978 979 return (0); 980} 981 982static int 983delete_addrsel_policyent(key) 984 struct in6_addrpolicy *key; 985{ 986 struct addrsel_policyent *pol; 987 988 ADDRSEL_LOCK(); 989 990 /* search for the entry in the table */ 991 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 992 pol = TAILQ_NEXT(pol, ape_entry)) { 993 if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && 994 SA6_ARE_ADDR_EQUAL(&key->addrmask, 995 &pol->ape_policy.addrmask)) { 996 break; 997 } 998 } 999 if (pol == NULL) { 1000 ADDRSEL_UNLOCK(); 1001 return (ESRCH); 1002 } 1003 1004 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1005 ADDRSEL_UNLOCK(); 1006 1007 return (0); 1008} 1009 1010static int 1011walk_addrsel_policy(callback, w) 1012 int (*callback) __P((struct in6_addrpolicy *, void *)); 1013 void *w; 1014{ 1015 struct addrsel_policyent *pol; 1016 int error = 0; 1017 1018 ADDRSEL_LOCK(); 1019 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1020 pol = TAILQ_NEXT(pol, ape_entry)) { 1021 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1022 ADDRSEL_UNLOCK(); 1023 return (error); 1024 } 1025 } 1026 ADDRSEL_UNLOCK(); 1027 1028 return (error); 1029} 1030 1031static int 1032dump_addrsel_policyent(pol, arg) 1033 struct in6_addrpolicy *pol; 1034 void *arg; 1035{ 1036 int error = 0; 1037 struct walkarg *w = arg; 1038 1039 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1040 1041 return (error); 1042} 1043 1044static struct in6_addrpolicy * 1045match_addrsel_policy(key) 1046 struct sockaddr_in6 *key; 1047{ 1048 struct addrsel_policyent *pent; 1049 struct in6_addrpolicy *bestpol = NULL, *pol; 1050 int matchlen, bestmatchlen = -1; 1051 u_char *mp, *ep, *k, *p, m; 1052 1053 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1054 pent = TAILQ_NEXT(pent, ape_entry)) { 1055 matchlen = 0; 1056 1057 pol = &pent->ape_policy; 1058 mp = (u_char *)&pol->addrmask.sin6_addr; 1059 ep = mp + 16; /* XXX: scope field? */ 1060 k = (u_char *)&key->sin6_addr; 1061 p = (u_char *)&pol->addr.sin6_addr; 1062 for (; mp < ep && *mp; mp++, k++, p++) { 1063 m = *mp; 1064 if ((*k & m) != *p) 1065 goto next; /* not match */ 1066 if (m == 0xff) /* short cut for a typical case */ 1067 matchlen += 8; 1068 else { 1069 while (m >= 0x80) { 1070 matchlen++; 1071 m <<= 1; 1072 } 1073 } 1074 } 1075 1076 /* matched. check if this is better than the current best. */ 1077 if (bestpol == NULL || 1078 matchlen > bestmatchlen) { 1079 bestpol = pol; 1080 bestmatchlen = matchlen; 1081 } 1082 1083 next: 1084 continue; 1085 } 1086 1087 return (bestpol); 1088} 1089