in6_src.c revision 122581
1/* $FreeBSD: head/sys/netinet6/in6_src.c 122581 2003-11-12 21:39:12Z ume $ */ 2/* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ 3 4/* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 66 */ 67 68#include "opt_inet.h" 69#include "opt_inet6.h" 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/malloc.h> 74#include <sys/mbuf.h> 75#include <sys/protosw.h> 76#include <sys/socket.h> 77#include <sys/socketvar.h> 78#include <sys/sockio.h> 79#include <sys/sysctl.h> 80#include <sys/errno.h> 81#include <sys/time.h> 82#include <sys/kernel.h> 83 84#include <net/if.h> 85#include <net/route.h> 86 87#include <netinet/in.h> 88#include <netinet/in_var.h> 89#include <netinet/in_systm.h> 90#include <netinet/ip.h> 91#include <netinet/in_pcb.h> 92#include <netinet6/in6_var.h> 93#include <netinet/ip6.h> 94#include <netinet6/in6_pcb.h> 95#include <netinet6/ip6_var.h> 96#include <netinet6/nd6.h> 97#ifdef ENABLE_DEFAULT_SCOPE 98#include <netinet6/scope6_var.h> 99#endif 100 101#include <net/net_osdep.h> 102 103static struct mtx addrsel_lock; 104#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 105#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 106#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 107#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 108 109#define ADDR_LABEL_NOTAPP (-1) 110struct in6_addrpolicy defaultaddrpolicy; 111 112int ip6_prefer_tempaddr = 0; 113 114static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 115 struct ip6_moptions *, 116 struct route_in6 *ro, 117 struct ifnet **)); 118 119static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 120 121static void init_policy_queue __P((void)); 122static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 123static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 124static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 125 void *)); 126static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 127static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 128 129/* 130 * Return an IPv6 address, which is the most appropriate for a given 131 * destination and user specified options. 132 * If necessary, this function lookups the routing table and returns 133 * an entry to the caller for later use. 134 */ 135#define REPLACE(r) do {\ 136 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 137 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 138 ip6stat.ip6s_sources_rule[(r)]++; \ 139 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 140 goto replace; \ 141} while(0) 142#define NEXT(r) do {\ 143 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 144 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 145 ip6stat.ip6s_sources_rule[(r)]++; \ 146 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 147 goto next; /* XXX: we can't use 'continue' here */ \ 148} while(0) 149#define BREAK(r) do { \ 150 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 151 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 152 ip6stat.ip6s_sources_rule[(r)]++; \ 153 goto out; /* XXX: we can't use 'break' here */ \ 154} while(0) 155 156struct in6_addr * 157in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) 158 struct sockaddr_in6 *dstsock; 159 struct ip6_pktopts *opts; 160 struct ip6_moptions *mopts; 161 struct route_in6 *ro; 162 struct in6_addr *laddr; 163 int *errorp; 164{ 165 struct in6_addr *dst; 166 struct ifnet *ifp = NULL; 167 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 168 struct in6_pktinfo *pi = NULL; 169 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 170 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 171 u_int32_t odstzone; 172 int prefer_tempaddr; 173 struct sockaddr_in6 dstsock0; 174 175 dstsock0 = *dstsock; 176 if (IN6_IS_SCOPE_LINKLOCAL(&dstsock0.sin6_addr) || 177 IN6_IS_ADDR_MC_INTFACELOCAL(&dstsock0.sin6_addr)) { 178 /* KAME assumption: link id == interface id */ 179 if (opts && opts->ip6po_pktinfo && 180 opts->ip6po_pktinfo->ipi6_ifindex) { 181 ifp = ifnet_byindex(opts->ip6po_pktinfo->ipi6_ifindex); 182 dstsock0.sin6_addr.s6_addr16[1] = 183 htons(opts->ip6po_pktinfo->ipi6_ifindex); 184 } else if (mopts && 185 IN6_IS_ADDR_MULTICAST(&dstsock0.sin6_addr) && 186 mopts->im6o_multicast_ifp) { 187 ifp = mopts->im6o_multicast_ifp; 188 dstsock0.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 189 } else if ((*errorp = in6_embedscope(&dstsock0.sin6_addr, 190 &dstsock0, NULL, NULL)) != 0) 191 return (NULL); 192 } 193 dstsock = &dstsock0; 194 195 dst = &dstsock->sin6_addr; 196 *errorp = 0; 197 198 /* 199 * If the source address is explicitly specified by the caller, 200 * check if the requested source address is indeed a unicast address 201 * assigned to the node, and can be used as the packet's source 202 * address. If everything is okay, use the address as source. 203 */ 204 if (opts && (pi = opts->ip6po_pktinfo) && 205 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 206 struct sockaddr_in6 srcsock; 207 struct in6_ifaddr *ia6; 208 209 /* get the outgoing interface */ 210 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 211 != 0) { 212 return (NULL); 213 } 214 215 /* 216 * determine the appropriate zone id of the source based on 217 * the zone of the destination and the outgoing interface. 218 */ 219 bzero(&srcsock, sizeof(srcsock)); 220 srcsock.sin6_family = AF_INET6; 221 srcsock.sin6_len = sizeof(srcsock); 222 srcsock.sin6_addr = pi->ipi6_addr; 223 if (ifp) { 224 if (in6_addr2zoneid(ifp, &pi->ipi6_addr, 225 &srcsock.sin6_scope_id)) { 226 *errorp = EINVAL; /* XXX */ 227 return (NULL); 228 } 229 } 230 if ((*errorp = in6_embedscope(&srcsock.sin6_addr, &srcsock, 231 NULL, NULL)) != 0) { 232 return (NULL); 233 } 234 srcsock.sin6_scope_id = 0; /* XXX: ifa_ifwithaddr expects 0 */ 235 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 236 if (ia6 == NULL || 237 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 238 *errorp = EADDRNOTAVAIL; 239 return (NULL); 240 } 241 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 242 return (&ia6->ia_addr.sin6_addr); 243 } 244 245 /* 246 * Otherwise, if the socket has already bound the source, just use it. 247 */ 248 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 249 return (laddr); 250 251 /* 252 * If the address is not specified, choose the best one based on 253 * the outgoing interface and the destination address. 254 */ 255 /* get the outgoing interface */ 256 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 257 return (NULL); 258 259#ifdef DIAGNOSTIC 260 if (ifp == NULL) /* this should not happen */ 261 panic("in6_selectsrc: NULL ifp"); 262#endif 263 if (in6_addr2zoneid(ifp, dst, &odstzone)) { /* impossible */ 264 *errorp = EIO; /* XXX */ 265 return (NULL); 266 } 267 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 268 int new_scope = -1, new_matchlen = -1; 269 struct in6_addrpolicy *new_policy = NULL; 270 u_int32_t srczone, osrczone, dstzone; 271 struct ifnet *ifp1 = ia->ia_ifp; 272 273 /* 274 * We'll never take an address that breaks the scope zone 275 * of the destination. We also skip an address if its zone 276 * does not contain the outgoing interface. 277 * XXX: we should probably use sin6_scope_id here. 278 */ 279 if (in6_addr2zoneid(ifp1, dst, &dstzone) || 280 odstzone != dstzone) { 281 continue; 282 } 283 if (in6_addr2zoneid(ifp, &ia->ia_addr.sin6_addr, &osrczone) || 284 in6_addr2zoneid(ifp1, &ia->ia_addr.sin6_addr, &srczone) || 285 osrczone != srczone) { 286 continue; 287 } 288 289 /* avoid unusable addresses */ 290 if ((ia->ia6_flags & 291 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 292 continue; 293 } 294 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 295 continue; 296 297 /* Rule 1: Prefer same address */ 298 if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) { 299 ia_best = ia; 300 BREAK(1); /* there should be no better candidate */ 301 } 302 303 if (ia_best == NULL) 304 REPLACE(0); 305 306 /* Rule 2: Prefer appropriate scope */ 307 if (dst_scope < 0) 308 dst_scope = in6_addrscope(dst); 309 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 310 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 311 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 312 REPLACE(2); 313 NEXT(2); 314 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 315 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 316 NEXT(2); 317 REPLACE(2); 318 } 319 320 /* 321 * Rule 3: Avoid deprecated addresses. Note that the case of 322 * !ip6_use_deprecated is already rejected above. 323 */ 324 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 325 NEXT(3); 326 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 327 REPLACE(3); 328 329 /* Rule 4: Prefer home addresses */ 330 /* 331 * XXX: This is a TODO. We should probably merge the MIP6 332 * case above. 333 */ 334 335 /* Rule 5: Prefer outgoing interface */ 336 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 337 NEXT(5); 338 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 339 REPLACE(5); 340 341 /* 342 * Rule 6: Prefer matching label 343 * Note that best_policy should be non-NULL here. 344 */ 345 if (dst_policy == NULL) 346 dst_policy = lookup_addrsel_policy(dstsock); 347 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 348 new_policy = lookup_addrsel_policy(&ia->ia_addr); 349 if (dst_policy->label == best_policy->label && 350 dst_policy->label != new_policy->label) 351 NEXT(6); 352 if (dst_policy->label != best_policy->label && 353 dst_policy->label == new_policy->label) 354 REPLACE(6); 355 } 356 357 /* 358 * Rule 7: Prefer public addresses. 359 * We allow users to reverse the logic by configuring 360 * a sysctl variable, so that privacy conscious users can 361 * always prefer temporary addresses. 362 */ 363 if (opts == NULL || 364 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 365 prefer_tempaddr = ip6_prefer_tempaddr; 366 } else if (opts->ip6po_prefer_tempaddr == 367 IP6PO_TEMPADDR_NOTPREFER) { 368 prefer_tempaddr = 0; 369 } else 370 prefer_tempaddr = 1; 371 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 372 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 373 if (prefer_tempaddr) 374 REPLACE(7); 375 else 376 NEXT(7); 377 } 378 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 379 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 380 if (prefer_tempaddr) 381 NEXT(7); 382 else 383 REPLACE(7); 384 } 385 386 /* 387 * Rule 8: prefer addresses on alive interfaces. 388 * This is a KAME specific rule. 389 */ 390 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 391 !(ia->ia_ifp->if_flags & IFF_UP)) 392 NEXT(8); 393 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 394 (ia->ia_ifp->if_flags & IFF_UP)) 395 REPLACE(8); 396 397 /* 398 * Rule 14: Use longest matching prefix. 399 * Note: in the address selection draft, this rule is 400 * documented as "Rule 8". However, since it is also 401 * documented that this rule can be overridden, we assign 402 * a large number so that it is easy to assign smaller numbers 403 * to more preferred rules. 404 */ 405 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst); 406 if (best_matchlen < new_matchlen) 407 REPLACE(14); 408 if (new_matchlen < best_matchlen) 409 NEXT(14); 410 411 /* Rule 15 is reserved. */ 412 413 /* 414 * Last resort: just keep the current candidate. 415 * Or, do we need more rules? 416 */ 417 continue; 418 419 replace: 420 ia_best = ia; 421 best_scope = (new_scope >= 0 ? new_scope : 422 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 423 best_policy = (new_policy ? new_policy : 424 lookup_addrsel_policy(&ia_best->ia_addr)); 425 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 426 in6_matchlen(&ia_best->ia_addr.sin6_addr, 427 dst)); 428 429 next: 430 continue; 431 432 out: 433 break; 434 } 435 436 if ((ia = ia_best) == NULL) { 437 *errorp = EADDRNOTAVAIL; 438 return (NULL); 439 } 440 441 return (&ia->ia_addr.sin6_addr); 442} 443 444static int 445in6_selectif(dstsock, opts, mopts, ro, retifp) 446 struct sockaddr_in6 *dstsock; 447 struct ip6_pktopts *opts; 448 struct ip6_moptions *mopts; 449 struct route_in6 *ro; 450 struct ifnet **retifp; 451{ 452 int error, clone; 453 struct rtentry *rt = NULL; 454 455 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1; 456 if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp, 457 &rt, clone)) != 0) { 458 return (error); 459 } 460 461 /* 462 * do not use a rejected or black hole route. 463 * XXX: this check should be done in the L2 output routine. 464 * However, if we skipped this check here, we'd see the following 465 * scenario: 466 * - install a rejected route for a scoped address prefix 467 * (like fe80::/10) 468 * - send a packet to a destination that matches the scoped prefix, 469 * with ambiguity about the scope zone. 470 * - pick the outgoing interface from the route, and disambiguate the 471 * scope zone with the interface. 472 * - ip6_output() would try to get another route with the "new" 473 * destination, which may be valid. 474 * - we'd see no error on output. 475 * Although this may not be very harmful, it should still be confusing. 476 * We thus reject the case here. 477 */ 478 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 479 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 480 } 481 482 /* 483 * Adjust the "outgoing" interface. If we're going to loop the packet 484 * back to ourselves, the ifp would be the loopback interface. 485 * However, we'd rather know the interface associated to the 486 * destination address (which should probably be one of our own 487 * addresses.) 488 */ 489 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 490 *retifp = rt->rt_ifa->ifa_ifp; 491 492 return (0); 493} 494 495int 496in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 497 struct sockaddr_in6 *dstsock; 498 struct ip6_pktopts *opts; 499 struct ip6_moptions *mopts; 500 struct route_in6 *ro; 501 struct ifnet **retifp; 502 struct rtentry **retrt; 503 int clone; /* meaningful only for bsdi and freebsd. */ 504{ 505 int error = 0; 506 struct ifnet *ifp = NULL; 507 struct rtentry *rt = NULL; 508 struct sockaddr_in6 *sin6_next; 509 struct in6_pktinfo *pi = NULL; 510 struct in6_addr *dst = &dstsock->sin6_addr; 511 512#if 0 513 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 514 dstsock->sin6_addr.s6_addr32[1] == 0 && 515 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 516 printf("in6_selectroute: strange destination %s\n", 517 ip6_sprintf(&dstsock->sin6_addr)); 518 } else { 519 printf("in6_selectroute: destination = %s%%%d\n", 520 ip6_sprintf(&dstsock->sin6_addr), 521 dstsock->sin6_scope_id); /* for debug */ 522 } 523#endif 524 525 /* If the caller specify the outgoing interface explicitly, use it. */ 526 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 527 /* XXX boundary check is assumed to be already done. */ 528 ifp = ifnet_byindex(pi->ipi6_ifindex); 529 if (ifp != NULL && 530 (retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { 531 /* 532 * we do not have to check nor get the route for 533 * multicast. 534 */ 535 goto done; 536 } else 537 goto getroute; 538 } 539 540 /* 541 * If the destination address is a multicast address and the outgoing 542 * interface for the address is specified by the caller, use it. 543 */ 544 if (IN6_IS_ADDR_MULTICAST(dst) && 545 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 546 goto done; /* we do not need a route for multicast. */ 547 } 548 549 getroute: 550 /* 551 * If the next hop address for the packet is specified by the caller, 552 * use it as the gateway. 553 */ 554 if (opts && opts->ip6po_nexthop) { 555 struct route_in6 *ron; 556 557 sin6_next = satosin6(opts->ip6po_nexthop); 558 559 /* at this moment, we only support AF_INET6 next hops */ 560 if (sin6_next->sin6_family != AF_INET6) { 561 error = EAFNOSUPPORT; /* or should we proceed? */ 562 goto done; 563 } 564 565 /* 566 * If the next hop is an IPv6 address, then the node identified 567 * by that address must be a neighbor of the sending host. 568 */ 569 ron = &opts->ip6po_nextroute; 570 if ((ron->ro_rt && 571 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 572 (RTF_UP | RTF_LLINFO)) || 573 !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { 574 if (ron->ro_rt) { 575 RTFREE(ron->ro_rt); 576 ron->ro_rt = NULL; 577 } 578 *satosin6(&ron->ro_dst) = *sin6_next; 579 } 580 if (ron->ro_rt == NULL) { 581 rtalloc((struct route *)ron); /* multi path case? */ 582 if (ron->ro_rt == NULL || 583 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 584 if (ron->ro_rt) { 585 RTFREE(ron->ro_rt); 586 ron->ro_rt = NULL; 587 } 588 error = EHOSTUNREACH; 589 goto done; 590 } 591 } 592 rt = ron->ro_rt; 593 ifp = rt->rt_ifp; 594 595 /* 596 * When cloning is required, try to allocate a route to the 597 * destination so that the caller can store path MTU 598 * information. 599 */ 600 if (!clone) 601 goto done; 602 } 603 604 /* 605 * Use a cached route if it exists and is valid, else try to allocate 606 * a new one. Note that we should check the address family of the 607 * cached destination, in case of sharing the cache with IPv4. 608 */ 609 if (ro) { 610 if (ro->ro_rt && 611 (!(ro->ro_rt->rt_flags & RTF_UP) || 612 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 613 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 614 dst))) { 615 RTFREE(ro->ro_rt); 616 ro->ro_rt = (struct rtentry *)NULL; 617 } 618 if (ro->ro_rt == (struct rtentry *)NULL) { 619 struct sockaddr_in6 *sa6; 620 621 /* No route yet, so try to acquire one */ 622 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 623 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 624 *sa6 = *dstsock; 625 sa6->sin6_scope_id = 0; 626 if (clone) { 627 rtalloc((struct route *)ro); 628 } else { 629 ro->ro_rt = rtalloc1(&((struct route *)ro) 630 ->ro_dst, NULL, 0UL); 631 if (ro->ro_rt) 632 RT_UNLOCK(ro->ro_rt); 633 } 634 } 635 636 /* 637 * do not care about the result if we have the nexthop 638 * explicitly specified. 639 */ 640 if (opts && opts->ip6po_nexthop) 641 goto done; 642 643 if (ro->ro_rt) { 644 ifp = ro->ro_rt->rt_ifp; 645 646 if (ifp == NULL) { /* can this really happen? */ 647 RTFREE(ro->ro_rt); 648 ro->ro_rt = NULL; 649 } 650 } 651 if (ro->ro_rt == NULL) 652 error = EHOSTUNREACH; 653 rt = ro->ro_rt; 654 655 /* 656 * Check if the outgoing interface conflicts with 657 * the interface specified by ipi6_ifindex (if specified). 658 * Note that loopback interface is always okay. 659 * (this may happen when we are sending a packet to one of 660 * our own addresses.) 661 */ 662 if (opts && opts->ip6po_pktinfo 663 && opts->ip6po_pktinfo->ipi6_ifindex) { 664 if (!(ifp->if_flags & IFF_LOOPBACK) && 665 ifp->if_index != 666 opts->ip6po_pktinfo->ipi6_ifindex) { 667 error = EHOSTUNREACH; 668 goto done; 669 } 670 } 671 } 672 673 done: 674 if (ifp == NULL && rt == NULL) { 675 /* 676 * This can happen if the caller did not pass a cached route 677 * nor any other hints. We treat this case an error. 678 */ 679 error = EHOSTUNREACH; 680 } 681 if (error == EHOSTUNREACH) 682 ip6stat.ip6s_noroute++; 683 684 if (retifp != NULL) 685 *retifp = ifp; 686 if (retrt != NULL) 687 *retrt = rt; /* rt may be NULL */ 688 689 return (error); 690} 691 692/* 693 * Default hop limit selection. The precedence is as follows: 694 * 1. Hoplimit value specified via ioctl. 695 * 2. (If the outgoing interface is detected) the current 696 * hop limit of the interface specified by router advertisement. 697 * 3. The system default hoplimit. 698*/ 699int 700in6_selecthlim(in6p, ifp) 701 struct in6pcb *in6p; 702 struct ifnet *ifp; 703{ 704 if (in6p && in6p->in6p_hops >= 0) 705 return (in6p->in6p_hops); 706 else if (ifp) 707 return (ND_IFINFO(ifp)->chlim); 708 else 709 return (ip6_defhlim); 710} 711 712/* 713 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 714 * share this function by all *bsd*... 715 */ 716int 717in6_pcbsetport(laddr, inp, td) 718 struct in6_addr *laddr; 719 struct inpcb *inp; 720 struct thread *td; 721{ 722 struct socket *so = inp->inp_socket; 723 u_int16_t lport = 0, first, last, *lastport; 724 int count, error = 0, wild = 0; 725 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 726 727 /* XXX: this is redundant when called from in6_pcbbind */ 728 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 729 wild = INPLOOKUP_WILDCARD; 730 731 inp->inp_flags |= INP_ANONPORT; 732 733 if (inp->inp_flags & INP_HIGHPORT) { 734 first = ipport_hifirstauto; /* sysctl */ 735 last = ipport_hilastauto; 736 lastport = &pcbinfo->lasthi; 737 } else if (inp->inp_flags & INP_LOWPORT) { 738 if (td && (error = suser(td))) 739 return error; 740 first = ipport_lowfirstauto; /* 1023 */ 741 last = ipport_lowlastauto; /* 600 */ 742 lastport = &pcbinfo->lastlow; 743 } else { 744 first = ipport_firstauto; /* sysctl */ 745 last = ipport_lastauto; 746 lastport = &pcbinfo->lastport; 747 } 748 /* 749 * Simple check to ensure all ports are not used up causing 750 * a deadlock here. 751 * 752 * We split the two cases (up and down) so that the direction 753 * is not being tested on each round of the loop. 754 */ 755 if (first > last) { 756 /* 757 * counting down 758 */ 759 count = first - last; 760 761 do { 762 if (count-- < 0) { /* completely used? */ 763 /* 764 * Undo any address bind that may have 765 * occurred above. 766 */ 767 inp->in6p_laddr = in6addr_any; 768 return (EAGAIN); 769 } 770 --*lastport; 771 if (*lastport > first || *lastport < last) 772 *lastport = first; 773 lport = htons(*lastport); 774 } while (in6_pcblookup_local(pcbinfo, 775 &inp->in6p_laddr, lport, wild)); 776 } else { 777 /* 778 * counting up 779 */ 780 count = last - first; 781 782 do { 783 if (count-- < 0) { /* completely used? */ 784 /* 785 * Undo any address bind that may have 786 * occurred above. 787 */ 788 inp->in6p_laddr = in6addr_any; 789 return (EAGAIN); 790 } 791 ++*lastport; 792 if (*lastport < first || *lastport > last) 793 *lastport = first; 794 lport = htons(*lastport); 795 } while (in6_pcblookup_local(pcbinfo, 796 &inp->in6p_laddr, lport, wild)); 797 } 798 799 inp->inp_lport = lport; 800 if (in_pcbinshash(inp) != 0) { 801 inp->in6p_laddr = in6addr_any; 802 inp->inp_lport = 0; 803 return (EAGAIN); 804 } 805 806 return (0); 807} 808 809/* 810 * Generate kernel-internal form (scopeid embedded into s6_addr16[1]). 811 * If the address scope of is link-local, embed the interface index in the 812 * address. The routine determines our precedence 813 * between advanced API scope/interface specification and basic API 814 * specification. 815 * 816 * This function should be nuked in the future, when we get rid of embedded 817 * scopeid thing. 818 * 819 * XXX actually, it is over-specification to return ifp against sin6_scope_id. 820 * there can be multiple interfaces that belong to a particular scope zone 821 * (in specification, we have 1:N mapping between a scope zone and interfaces). 822 * we may want to change the function to return something other than ifp. 823 */ 824int 825in6_embedscope(in6, sin6, in6p, ifpp) 826 struct in6_addr *in6; 827 const struct sockaddr_in6 *sin6; 828 struct in6pcb *in6p; 829 struct ifnet **ifpp; 830{ 831 struct ifnet *ifp = NULL; 832 u_int32_t zoneid = sin6->sin6_scope_id; 833 834 *in6 = sin6->sin6_addr; 835 if (ifpp) 836 *ifpp = NULL; 837 838 /* 839 * don't try to read sin6->sin6_addr beyond here, since the caller may 840 * ask us to overwrite existing sockaddr_in6 841 */ 842 843#ifdef ENABLE_DEFAULT_SCOPE 844 if (zoneid == 0) 845 zoneid = scope6_addr2default(in6); 846#endif 847 848 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 849 struct in6_pktinfo *pi; 850 851 /* KAME assumption: link id == interface id */ 852 if (in6p && in6p->in6p_outputopts && 853 (pi = in6p->in6p_outputopts->ip6po_pktinfo) && 854 pi->ipi6_ifindex) { 855 ifp = ifnet_byindex(pi->ipi6_ifindex); 856 in6->s6_addr16[1] = htons(pi->ipi6_ifindex); 857 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && 858 in6p->in6p_moptions && 859 in6p->in6p_moptions->im6o_multicast_ifp) { 860 ifp = in6p->in6p_moptions->im6o_multicast_ifp; 861 in6->s6_addr16[1] = htons(ifp->if_index); 862 } else if (zoneid) { 863 if (if_index < zoneid) 864 return (ENXIO); /* XXX EINVAL? */ 865 ifp = ifnet_byindex(zoneid); 866 867 /* XXX assignment to 16bit from 32bit variable */ 868 in6->s6_addr16[1] = htons(zoneid & 0xffff); 869 } 870 871 if (ifpp) 872 *ifpp = ifp; 873 } 874 875 return 0; 876} 877 878/* 879 * generate standard sockaddr_in6 from embedded form. 880 * touches sin6_addr and sin6_scope_id only. 881 * 882 * this function should be nuked in the future, when we get rid of 883 * embedded scopeid thing. 884 */ 885int 886in6_recoverscope(sin6, in6, ifp) 887 struct sockaddr_in6 *sin6; 888 const struct in6_addr *in6; 889 struct ifnet *ifp; 890{ 891 u_int32_t zoneid; 892 893 sin6->sin6_addr = *in6; 894 895 /* 896 * don't try to read *in6 beyond here, since the caller may 897 * ask us to overwrite existing sockaddr_in6 898 */ 899 900 sin6->sin6_scope_id = 0; 901 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 902 /* 903 * KAME assumption: link id == interface id 904 */ 905 zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); 906 if (zoneid) { 907 /* sanity check */ 908 if (zoneid < 0 || if_index < zoneid) 909 return ENXIO; 910 if (ifp && ifp->if_index != zoneid) 911 return ENXIO; 912 sin6->sin6_addr.s6_addr16[1] = 0; 913 sin6->sin6_scope_id = zoneid; 914 } 915 } 916 917 return 0; 918} 919 920/* 921 * just clear the embedded scope identifier. 922 */ 923void 924in6_clearscope(addr) 925 struct in6_addr *addr; 926{ 927 if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) 928 addr->s6_addr16[1] = 0; 929} 930 931void 932addrsel_policy_init() 933{ 934 ADDRSEL_LOCK_INIT(); 935 936 init_policy_queue(); 937 938 /* initialize the "last resort" policy */ 939 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 940 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 941} 942 943static struct in6_addrpolicy * 944lookup_addrsel_policy(key) 945 struct sockaddr_in6 *key; 946{ 947 struct in6_addrpolicy *match = NULL; 948 949 ADDRSEL_LOCK(); 950 match = match_addrsel_policy(key); 951 952 if (match == NULL) 953 match = &defaultaddrpolicy; 954 else 955 match->use++; 956 ADDRSEL_UNLOCK(); 957 958 return (match); 959} 960 961/* 962 * Subroutines to manage the address selection policy table via sysctl. 963 */ 964struct walkarg { 965 struct sysctl_req *w_req; 966}; 967 968static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 969SYSCTL_DECL(_net_inet6_ip6); 970SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 971 CTLFLAG_RD, in6_src_sysctl, ""); 972 973static int 974in6_src_sysctl(SYSCTL_HANDLER_ARGS) 975{ 976 struct walkarg w; 977 978 if (req->newptr) 979 return EPERM; 980 981 bzero(&w, sizeof(w)); 982 w.w_req = req; 983 984 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 985} 986 987int 988in6_src_ioctl(cmd, data) 989 u_long cmd; 990 caddr_t data; 991{ 992 int i; 993 struct in6_addrpolicy ent0; 994 995 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 996 return (EOPNOTSUPP); /* check for safety */ 997 998 ent0 = *(struct in6_addrpolicy *)data; 999 1000 if (ent0.label == ADDR_LABEL_NOTAPP) 1001 return (EINVAL); 1002 /* check if the prefix mask is consecutive. */ 1003 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1004 return (EINVAL); 1005 /* clear trailing garbages (if any) of the prefix address. */ 1006 for (i = 0; i < 4; i++) { 1007 ent0.addr.sin6_addr.s6_addr32[i] &= 1008 ent0.addrmask.sin6_addr.s6_addr32[i]; 1009 } 1010 ent0.use = 0; 1011 1012 switch (cmd) { 1013 case SIOCAADDRCTL_POLICY: 1014 return (add_addrsel_policyent(&ent0)); 1015 case SIOCDADDRCTL_POLICY: 1016 return (delete_addrsel_policyent(&ent0)); 1017 } 1018 1019 return (0); /* XXX: compromise compilers */ 1020} 1021 1022/* 1023 * The followings are implementation of the policy table using a 1024 * simple tail queue. 1025 * XXX such details should be hidden. 1026 * XXX implementation using binary tree should be more efficient. 1027 */ 1028struct addrsel_policyent { 1029 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1030 struct in6_addrpolicy ape_policy; 1031}; 1032 1033TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1034 1035struct addrsel_policyhead addrsel_policytab; 1036 1037static void 1038init_policy_queue() 1039{ 1040 TAILQ_INIT(&addrsel_policytab); 1041} 1042 1043static int 1044add_addrsel_policyent(newpolicy) 1045 struct in6_addrpolicy *newpolicy; 1046{ 1047 struct addrsel_policyent *new, *pol; 1048 1049 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1050 M_WAITOK); 1051 ADDRSEL_LOCK(); 1052 1053 /* duplication check */ 1054 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1055 pol = TAILQ_NEXT(pol, ape_entry)) { 1056 if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, 1057 &pol->ape_policy.addr) && 1058 SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, 1059 &pol->ape_policy.addrmask)) { 1060 ADDRSEL_UNLOCK(); 1061 FREE(new, M_IFADDR); 1062 return (EEXIST); /* or override it? */ 1063 } 1064 } 1065 1066 bzero(new, sizeof(*new)); 1067 1068 /* XXX: should validate entry */ 1069 new->ape_policy = *newpolicy; 1070 1071 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1072 ADDRSEL_UNLOCK(); 1073 1074 return (0); 1075} 1076 1077static int 1078delete_addrsel_policyent(key) 1079 struct in6_addrpolicy *key; 1080{ 1081 struct addrsel_policyent *pol; 1082 1083 ADDRSEL_LOCK(); 1084 1085 /* search for the entry in the table */ 1086 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1087 pol = TAILQ_NEXT(pol, ape_entry)) { 1088 if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && 1089 SA6_ARE_ADDR_EQUAL(&key->addrmask, 1090 &pol->ape_policy.addrmask)) { 1091 break; 1092 } 1093 } 1094 if (pol == NULL) { 1095 ADDRSEL_UNLOCK(); 1096 return (ESRCH); 1097 } 1098 1099 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1100 ADDRSEL_UNLOCK(); 1101 1102 return (0); 1103} 1104 1105static int 1106walk_addrsel_policy(callback, w) 1107 int (*callback) __P((struct in6_addrpolicy *, void *)); 1108 void *w; 1109{ 1110 struct addrsel_policyent *pol; 1111 int error = 0; 1112 1113 ADDRSEL_LOCK(); 1114 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1115 pol = TAILQ_NEXT(pol, ape_entry)) { 1116 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1117 ADDRSEL_UNLOCK(); 1118 return (error); 1119 } 1120 } 1121 ADDRSEL_UNLOCK(); 1122 1123 return (error); 1124} 1125 1126static int 1127dump_addrsel_policyent(pol, arg) 1128 struct in6_addrpolicy *pol; 1129 void *arg; 1130{ 1131 int error = 0; 1132 struct walkarg *w = arg; 1133 1134 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1135 1136 return (error); 1137} 1138 1139static struct in6_addrpolicy * 1140match_addrsel_policy(key) 1141 struct sockaddr_in6 *key; 1142{ 1143 struct addrsel_policyent *pent; 1144 struct in6_addrpolicy *bestpol = NULL, *pol; 1145 int matchlen, bestmatchlen = -1; 1146 u_char *mp, *ep, *k, *p, m; 1147 1148 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1149 pent = TAILQ_NEXT(pent, ape_entry)) { 1150 matchlen = 0; 1151 1152 pol = &pent->ape_policy; 1153 mp = (u_char *)&pol->addrmask.sin6_addr; 1154 ep = mp + 16; /* XXX: scope field? */ 1155 k = (u_char *)&key->sin6_addr; 1156 p = (u_char *)&pol->addr.sin6_addr; 1157 for (; mp < ep && *mp; mp++, k++, p++) { 1158 m = *mp; 1159 if ((*k & m) != *p) 1160 goto next; /* not match */ 1161 if (m == 0xff) /* short cut for a typical case */ 1162 matchlen += 8; 1163 else { 1164 while (m >= 0x80) { 1165 matchlen++; 1166 m <<= 1; 1167 } 1168 } 1169 } 1170 1171 /* matched. check if this is better than the current best. */ 1172 if (bestpol == NULL || 1173 matchlen > bestmatchlen) { 1174 bestpol = pol; 1175 bestmatchlen = matchlen; 1176 } 1177 1178 next: 1179 continue; 1180 } 1181 1182 return (bestpol); 1183} 1184