1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */ 30/* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */ 31 32/* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61/* 62 * Copyright (c) 1982, 1986, 1991, 1993 63 * The Regents of the University of California. All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. All advertising materials mentioning features or use of this software 74 * must display the following acknowledgement: 75 * This product includes software developed by the University of 76 * California, Berkeley and its contributors. 77 * 4. Neither the name of the University nor the names of its contributors 78 * may be used to endorse or promote products derived from this software 79 * without specific prior written permission. 80 * 81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 91 * SUCH DAMAGE. 92 * 93 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 94 */ 95 96 97#include <sys/param.h> 98#include <sys/systm.h> 99#include <sys/malloc.h> 100#include <sys/mbuf.h> 101#include <sys/protosw.h> 102#include <sys/socket.h> 103#include <sys/socketvar.h> 104#include <sys/errno.h> 105#include <sys/time.h> 106#include <sys/proc.h> 107#include <sys/sysctl.h> 108#include <sys/kauth.h> 109#include <sys/priv.h> 110#include <kern/lock.h> 111 112#include <net/if.h> 113#include <net/if_types.h> 114#include <net/route.h> 115 116#include <netinet/in.h> 117#include <netinet/in_var.h> 118#include <netinet/in_systm.h> 119#include <netinet/ip.h> 120#include <netinet/in_pcb.h> 121#include <netinet6/in6_var.h> 122#include <netinet/ip6.h> 123#include <netinet6/in6_pcb.h> 124#include <netinet6/ip6_var.h> 125#include <netinet6/scope6_var.h> 126#include <netinet6/nd6.h> 127 128#include <net/net_osdep.h> 129 130#include "loop.h" 131 132SYSCTL_DECL(_net_inet6_ip6); 133 134static int ip6_select_srcif_debug = 0; 135SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug, 136 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0, 137 "log source interface selection debug info"); 138 139#define ADDR_LABEL_NOTAPP (-1) 140struct in6_addrpolicy defaultaddrpolicy; 141 142int ip6_prefer_tempaddr = 1; 143#ifdef ENABLE_ADDRSEL 144extern lck_mtx_t *addrsel_mutex; 145#define ADDRSEL_LOCK() lck_mtx_lock(addrsel_mutex) 146#define ADDRSEL_UNLOCK() lck_mtx_unlock(addrsel_mutex) 147#else 148#define ADDRSEL_LOCK() 149#define ADDRSEL_UNLOCK() 150#endif 151 152static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *, 153 struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, 154 struct ifnet **, struct rtentry **, int, int, 155 const struct ip6_out_args *ip6oa); 156static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, 157 struct ip6_moptions *, struct route_in6 *ro, 158 const struct ip6_out_args *, struct ifnet **); 159static void init_policy_queue(void); 160static int add_addrsel_policyent(const struct in6_addrpolicy *); 161#ifdef ENABLE_ADDRSEL 162static int delete_addrsel_policyent(const struct in6_addrpolicy *); 163#endif 164static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *), 165 void *); 166static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *); 167static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 168void addrsel_policy_init(void); 169 170/* 171 * Return an IPv6 address, which is the most appropriate for a given 172 * destination and user specified options. 173 * If necessary, this function lookups the routing table and returns 174 * an entry to the caller for later use. 175 */ 176#define REPLACE(r) do {\ 177 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 178 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 179 ip6stat.ip6s_sources_rule[(r)]++; \ 180 goto replace; \ 181} while(0) 182#define NEXTSRC(r) do {\ 183 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 184 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 185 ip6stat.ip6s_sources_rule[(r)]++; \ 186 goto next; /* XXX: we can't use 'continue' here */ \ 187} while(0) 188#define BREAK(r) do { \ 189 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 190 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 191 ip6stat.ip6s_sources_rule[(r)]++; \ 192 goto out; /* XXX: we can't use 'break' here */ \ 193} while(0) 194 195/* 196 * Regardless of error, it will return an ifp with a reference held if the 197 * caller provides a non-NULL ifpp. The caller is responsible for checking 198 * if the returned ifp is valid and release its reference at all times. 199 */ 200struct in6_addr * 201in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 202 struct inpcb *inp, struct route_in6 *ro, 203 struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope, 204 int *errorp) 205{ 206 struct in6_addr dst; 207 struct ifnet *ifp = NULL; 208 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 209 struct in6_pktinfo *pi = NULL; 210 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 211 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 212 u_int32_t odstzone; 213 int prefer_tempaddr; 214 struct ip6_moptions *mopts; 215 struct timeval timenow; 216 struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF }; 217 boolean_t islocal = FALSE; 218 219 getmicrotime(&timenow); 220 221 dst = dstsock->sin6_addr; /* make a copy for local operation */ 222 *errorp = 0; 223 if (ifpp != NULL) 224 *ifpp = NULL; 225 226 if (inp != NULL) { 227 mopts = inp->in6p_moptions; 228 if (inp->inp_flags & INP_NO_IFT_CELLULAR) 229 ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; 230 } else { 231 mopts = NULL; 232 } 233 234 if (ip6oa.ip6oa_boundif != IFSCOPE_NONE) 235 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; 236 237 /* 238 * If the source address is explicitly specified by the caller, 239 * check if the requested source address is indeed a unicast address 240 * assigned to the node, and can be used as the packet's source 241 * address. If everything is okay, use the address as source. 242 */ 243 if (opts && (pi = opts->ip6po_pktinfo) && 244 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 245 struct sockaddr_in6 srcsock; 246 struct in6_ifaddr *ia6; 247 248 /* get the outgoing interface */ 249 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa, 250 &ifp)) != 0) { 251 src_storage = NULL; 252 goto done; 253 } 254 255 /* 256 * determine the appropriate zone id of the source based on 257 * the zone of the destination and the outgoing interface. 258 * If the specified address is ambiguous wrt the scope zone, 259 * the interface must be specified; otherwise, ifa_ifwithaddr() 260 * will fail matching the address. 261 */ 262 bzero(&srcsock, sizeof(srcsock)); 263 srcsock.sin6_family = AF_INET6; 264 srcsock.sin6_len = sizeof(srcsock); 265 srcsock.sin6_addr = pi->ipi6_addr; 266 if (ifp != NULL) { 267 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 268 if (*errorp != 0) { 269 src_storage = NULL; 270 goto done; 271 } 272 } 273 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *) 274 (&srcsock)); 275 if (ia6 == NULL) { 276 *errorp = EADDRNOTAVAIL; 277 src_storage = NULL; 278 goto done; 279 } 280 IFA_LOCK_SPIN(&ia6->ia_ifa); 281 if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) || 282 ((ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && 283 (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) { 284 IFA_UNLOCK(&ia6->ia_ifa); 285 IFA_REMREF(&ia6->ia_ifa); 286 *errorp = EADDRNOTAVAIL; 287 src_storage = NULL; 288 goto done; 289 } 290 291 *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; 292 IFA_UNLOCK(&ia6->ia_ifa); 293 IFA_REMREF(&ia6->ia_ifa); 294 goto done; 295 } 296 297 /* 298 * Otherwise, if the socket has already bound the source, just use it. 299 */ 300 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 301 src_storage = &inp->in6p_laddr; 302 goto done; 303 } 304 305 /* 306 * If the address is not specified, choose the best one based on 307 * the outgoing interface and the destination address. 308 */ 309 310 /* get the outgoing interface */ 311 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa, 312 &ifp)) != 0) { 313 src_storage = NULL; 314 goto done; 315 } 316 317 *errorp = in6_setscope(&dst, ifp, &odstzone); 318 if (*errorp != 0) { 319 src_storage = NULL; 320 goto done; 321 } 322 lck_rw_lock_shared(&in6_ifaddr_rwlock); 323 324 for (ia = in6_ifaddrs; ia; ia = ia->ia_next) { 325 int new_scope = -1, new_matchlen = -1; 326 struct in6_addrpolicy *new_policy = NULL; 327 u_int32_t srczone, osrczone, dstzone; 328 struct in6_addr src; 329 struct ifnet *ifp1 = ia->ia_ifp; 330 331 IFA_LOCK(&ia->ia_ifa); 332 /* 333 * We'll never take an address that breaks the scope zone 334 * of the destination. We also skip an address if its zone 335 * does not contain the outgoing interface. 336 * XXX: we should probably use sin6_scope_id here. 337 */ 338 if (in6_setscope(&dst, ifp1, &dstzone) || 339 odstzone != dstzone) 340 goto next; 341 342 src = ia->ia_addr.sin6_addr; 343 if (in6_setscope(&src, ifp, &osrczone) || 344 in6_setscope(&src, ifp1, &srczone) || 345 osrczone != srczone) 346 goto next; 347 348 /* avoid unusable addresses */ 349 if ((ia->ia6_flags & 350 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) 351 goto next; 352 353 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 354 goto next; 355 356 if (!nd6_optimistic_dad && 357 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) 358 goto next; 359 360 /* Rule 1: Prefer same address */ 361 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) 362 BREAK(1); /* there should be no better candidate */ 363 364 if (ia_best == NULL) 365 REPLACE(0); 366 367 /* Rule 2: Prefer appropriate scope */ 368 if (dst_scope < 0) 369 dst_scope = in6_addrscope(&dst); 370 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 371 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 372 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 373 REPLACE(2); 374 NEXTSRC(2); 375 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 376 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 377 NEXTSRC(2); 378 REPLACE(2); 379 } 380 381 /* 382 * Rule 3: Avoid deprecated addresses. Note that the case of 383 * !ip6_use_deprecated is already rejected above. 384 */ 385 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 386 NEXTSRC(3); 387 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 388 REPLACE(3); 389 390 /* 391 * RFC 4429 says that optimistic addresses are equivalent to 392 * deprecated addresses, so avoid them here. 393 */ 394 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 && 395 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) 396 NEXTSRC(3); 397 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 && 398 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0) 399 REPLACE(3); 400 401 /* Rule 4: Prefer home addresses */ 402 /* 403 * XXX: This is a TODO. We should probably merge the MIP6 404 * case above. 405 */ 406 407 /* Rule 5: Prefer outgoing interface */ 408 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 409 NEXTSRC(5); 410 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 411 REPLACE(5); 412 413 /* 414 * Rule 6: Prefer matching label 415 * Note that best_policy should be non-NULL here. 416 */ 417 if (dst_policy == NULL) 418 dst_policy = in6_addrsel_lookup_policy(dstsock); 419 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 420 new_policy = in6_addrsel_lookup_policy(&ia->ia_addr); 421 if (dst_policy->label == best_policy->label && 422 dst_policy->label != new_policy->label) 423 NEXTSRC(6); 424 if (dst_policy->label != best_policy->label && 425 dst_policy->label == new_policy->label) 426 REPLACE(6); 427 } 428 429 /* 430 * Rule 7: Prefer public addresses. 431 * We allow users to reverse the logic by configuring 432 * a sysctl variable, so that privacy conscious users can 433 * always prefer temporary addresses. 434 * Don't use temporary addresses for local destinations or 435 * for multicast addresses unless we were passed in an option. 436 */ 437 if (IN6_IS_ADDR_MULTICAST(&dst) || 438 in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >= 439 in6_mask2len(&ia_best->ia_prefixmask.sin6_addr, NULL)) 440 islocal = TRUE; 441 if (opts == NULL || 442 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 443 prefer_tempaddr = islocal ? 0 : ip6_prefer_tempaddr; 444 } else if (opts->ip6po_prefer_tempaddr == 445 IP6PO_TEMPADDR_NOTPREFER) { 446 prefer_tempaddr = 0; 447 } else 448 prefer_tempaddr = 1; 449 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 450 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 451 if (prefer_tempaddr) 452 REPLACE(7); 453 else 454 NEXTSRC(7); 455 } 456 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 457 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 458 if (prefer_tempaddr) 459 NEXTSRC(7); 460 else 461 REPLACE(7); 462 } 463 464 /* 465 * Rule 8: prefer addresses on alive interfaces. 466 * This is a KAME specific rule. 467 */ 468 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 469 !(ia->ia_ifp->if_flags & IFF_UP)) 470 NEXTSRC(8); 471 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 472 (ia->ia_ifp->if_flags & IFF_UP)) 473 REPLACE(8); 474 475 /* 476 * Rule 14: Use longest matching prefix. 477 * Note: in the address selection draft, this rule is 478 * documented as "Rule 8". However, since it is also 479 * documented that this rule can be overridden, we assign 480 * a large number so that it is easy to assign smaller numbers 481 * to more preferred rules. 482 */ 483 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 484 if (best_matchlen < new_matchlen) 485 REPLACE(14); 486 if (new_matchlen < best_matchlen) 487 NEXTSRC(14); 488 489 /* Rule 15 is reserved. */ 490 491 /* 492 * Last resort: just keep the current candidate. 493 * Or, do we need more rules? 494 */ 495 IFA_UNLOCK(&ia->ia_ifa); 496 continue; 497 498replace: 499 best_scope = (new_scope >= 0 ? new_scope : 500 in6_addrscope(&ia->ia_addr.sin6_addr)); 501 best_policy = (new_policy ? new_policy : 502 in6_addrsel_lookup_policy(&ia->ia_addr)); 503 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 504 in6_matchlen(&ia->ia_addr.sin6_addr, &dst)); 505 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */ 506 IFA_UNLOCK(&ia->ia_ifa); 507 if (ia_best != NULL) 508 IFA_REMREF(&ia_best->ia_ifa); 509 ia_best = ia; 510 continue; 511 512next: 513 IFA_UNLOCK(&ia->ia_ifa); 514 continue; 515 516out: 517 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */ 518 IFA_UNLOCK(&ia->ia_ifa); 519 if (ia_best != NULL) 520 IFA_REMREF(&ia_best->ia_ifa); 521 ia_best = ia; 522 break; 523 } 524 525 lck_rw_done(&in6_ifaddr_rwlock); 526 527 if (ia_best != NULL && 528 (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && 529 ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) { 530 IFA_REMREF(&ia_best->ia_ifa); 531 ia_best = NULL; 532 } 533 534 if ((ia = ia_best) == NULL) { 535 *errorp = EADDRNOTAVAIL; 536 src_storage = NULL; 537 goto done; 538 } 539 540 IFA_LOCK_SPIN(&ia->ia_ifa); 541 *src_storage = satosin6(&ia->ia_addr)->sin6_addr; 542 IFA_UNLOCK(&ia->ia_ifa); 543 IFA_REMREF(&ia->ia_ifa); 544done: 545 if (ifpp != NULL) { 546 /* if ifp is non-NULL, refcnt held in in6_selectif() */ 547 *ifpp = ifp; 548 } else if (ifp != NULL) { 549 ifnet_release(ifp); 550 } 551 return (src_storage); 552} 553 554/* 555 * Given a source IPv6 address (and route, if available), determine the best 556 * interface to send the packet from. Checking for (and updating) the 557 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done 558 * without any locks, based on the assumption that in the event this is 559 * called from ip6_output(), the output operation is single-threaded per-pcb, 560 * i.e. for any given pcb there can only be one thread performing output at 561 * the IPv6 layer. 562 * 563 * This routine is analogous to in_selectsrcif() for IPv4. Regardless of 564 * error, it will return an ifp with a reference held if the caller provides 565 * a non-NULL retifp. The caller is responsible for checking if the 566 * returned ifp is valid and release its reference at all times. 567 * 568 * clone - meaningful only for bsdi and freebsd 569 */ 570static int 571selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, 572 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, 573 struct ifnet **retifp, struct rtentry **retrt, int clone, 574 int norouteok, const struct ip6_out_args *ip6oa) 575{ 576 int error = 0; 577 struct ifnet *ifp = NULL, *ifp0 = NULL; 578 struct route_in6 *route = NULL; 579 struct sockaddr_in6 *sin6_next; 580 struct in6_pktinfo *pi = NULL; 581 struct in6_addr *dst = &dstsock->sin6_addr; 582 struct ifaddr *ifa = NULL; 583 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN]; 584 boolean_t select_srcif, proxied_ifa = FALSE; 585 unsigned int ifscope = ip6oa->ip6oa_boundif; 586 587#if 0 588 char ip6buf[INET6_ADDRSTRLEN]; 589 590 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 591 dstsock->sin6_addr.s6_addr32[1] == 0 && 592 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 593 printf("in6_selectroute: strange destination %s\n", 594 ip6_sprintf(ip6buf, &dstsock->sin6_addr)); 595 } else { 596 printf("in6_selectroute: destination = %s%%%d\n", 597 ip6_sprintf(ip6buf, &dstsock->sin6_addr), 598 dstsock->sin6_scope_id); /* for debug */ 599 } 600#endif 601 602 if (retifp != NULL) 603 *retifp = NULL; 604 605 if (retrt != NULL) 606 *retrt = NULL; 607 608 if (ip6_select_srcif_debug) { 609 struct in6_addr src; 610 src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any; 611 (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src)); 612 (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst)); 613 } 614 615 /* 616 * If the destination address is UNSPECIFIED addr, bail out. 617 */ 618 if (IN6_IS_ADDR_UNSPECIFIED(dst)) { 619 error = EHOSTUNREACH; 620 goto done; 621 } 622 623 /* 624 * Perform source interface selection only if Scoped Routing 625 * is enabled and a source address that isn't unspecified. 626 */ 627 select_srcif = (ip6_doscopedroute && srcsock != NULL && 628 !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr)); 629 630 /* 631 * If Scoped Routing is disabled, ignore the given ifscope. 632 * Otherwise even if source selection won't be performed, 633 * we still obey IPV6_BOUND_IF. 634 */ 635 if (!ip6_doscopedroute && ifscope != IFSCOPE_NONE) 636 ifscope = IFSCOPE_NONE; 637 638 /* If the caller specified the outgoing interface explicitly, use it */ 639 if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL && 640 pi->ipi6_ifindex != 0) { 641 /* 642 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF. 643 */ 644 ifscope = pi->ipi6_ifindex; 645 ifnet_head_lock_shared(); 646 /* ifp may be NULL if detached or out of range */ 647 ifp = ifp0 = 648 ((ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL); 649 ifnet_head_done(); 650 if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) { 651 /* 652 * We do not have to check or get the route for 653 * multicast. If the caller didn't ask/care for 654 * the route and we have no interface to use, 655 * it's an error. 656 */ 657 if (ifp == NULL) 658 error = EHOSTUNREACH; 659 goto done; 660 } else { 661 goto getsrcif; 662 } 663 } 664 665 /* 666 * If the destination address is a multicast address and the outgoing 667 * interface for the address is specified by the caller, use it. 668 */ 669 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) { 670 IM6O_LOCK(mopts); 671 if ((ifp = ifp0 = mopts->im6o_multicast_ifp) != NULL) { 672 IM6O_UNLOCK(mopts); 673 goto done; /* we do not need a route for multicast. */ 674 } 675 IM6O_UNLOCK(mopts); 676 } 677 678getsrcif: 679 /* 680 * If the outgoing interface was not set via IPV6_BOUND_IF or 681 * IPV6_PKTINFO, use the scope ID in the destination address. 682 */ 683 if (ip6_doscopedroute && ifscope == IFSCOPE_NONE) 684 ifscope = dstsock->sin6_scope_id; 685 686 /* 687 * Perform source interface selection; the source IPv6 address 688 * must belong to one of the addresses of the interface used 689 * by the route. For performance reasons, do this only if 690 * there is no route, or if the routing table has changed, 691 * or if we haven't done source interface selection on this 692 * route (for this PCB instance) before. 693 */ 694 if (!select_srcif || (ro != NULL && ro->ro_rt != NULL && 695 (ro->ro_rt->rt_flags & RTF_UP) && 696 ro->ro_rt->generation_id == route_generation && 697 (ro->ro_flags & ROF_SRCIF_SELECTED))) { 698 if (ro != NULL && ro->ro_rt != NULL) { 699 ifa = ro->ro_rt->rt_ifa; 700 IFA_ADDREF(ifa); 701 } 702 goto getroute; 703 } 704 705 /* 706 * Given the source IPv6 address, find a suitable source interface 707 * to use for transmission; if a scope ID has been specified, 708 * optimize the search by looking at the addresses only for that 709 * interface. This is still suboptimal, however, as we need to 710 * traverse the per-interface list. 711 */ 712 if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) { 713 unsigned int scope = ifscope; 714 struct ifnet *rt_ifp; 715 716 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL; 717 718 /* 719 * If no scope is specified and the route is stale (pointing 720 * to a defunct interface) use the current primary interface; 721 * this happens when switching between interfaces configured 722 * with the same IPv6 address. Otherwise pick up the scope 723 * information from the route; the ULP may have looked up a 724 * correct route and we just need to verify it here and mark 725 * it with the ROF_SRCIF_SELECTED flag below. 726 */ 727 if (scope == IFSCOPE_NONE) { 728 scope = rt_ifp->if_index; 729 if (scope != get_primary_ifscope(AF_INET6) && 730 ro->ro_rt->generation_id != route_generation) 731 scope = get_primary_ifscope(AF_INET6); 732 } 733 734 ifa = (struct ifaddr *) 735 ifa_foraddr6_scoped(&srcsock->sin6_addr, scope); 736 737 /* 738 * If we are forwarding and proxying prefix(es), see if the 739 * source address is one of ours and is a proxied address; 740 * if so, use it. 741 */ 742 if (ifa == NULL && ip6_forwarding && nd6_prproxy) { 743 ifa = (struct ifaddr *) 744 ifa_foraddr6(&srcsock->sin6_addr); 745 if (ifa != NULL && !(proxied_ifa = 746 nd6_prproxy_ifaddr((struct in6_ifaddr *)ifa))) { 747 IFA_REMREF(ifa); 748 ifa = NULL; 749 } 750 } 751 752 if (ip6_select_srcif_debug && ifa != NULL) { 753 if (ro->ro_rt != NULL) { 754 printf("%s->%s ifscope %d->%d ifa_if %s " 755 "ro_if %s\n", s_src, s_dst, ifscope, 756 scope, if_name(ifa->ifa_ifp), 757 if_name(rt_ifp)); 758 } else { 759 printf("%s->%s ifscope %d->%d ifa_if %s\n", 760 s_src, s_dst, ifscope, scope, 761 if_name(ifa->ifa_ifp)); 762 } 763 } 764 } 765 766 /* 767 * Slow path; search for an interface having the corresponding source 768 * IPv6 address if the scope was not specified by the caller, and: 769 * 770 * 1) There currently isn't any route, or, 771 * 2) The interface used by the route does not own that source 772 * IPv6 address; in this case, the route will get blown away 773 * and we'll do a more specific scoped search using the newly 774 * found interface. 775 */ 776 if (ifa == NULL && ifscope == IFSCOPE_NONE) { 777 ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr); 778 779 if (ip6_select_srcif_debug && ifa != NULL) { 780 printf("%s->%s ifscope %d ifa_if %s\n", 781 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp)); 782 } 783 784 } 785 786getroute: 787 if (ifa != NULL && !proxied_ifa) 788 ifscope = ifa->ifa_ifp->if_index; 789 790 /* 791 * If the next hop address for the packet is specified by the caller, 792 * use it as the gateway. 793 */ 794 if (opts != NULL && opts->ip6po_nexthop != NULL) { 795 struct route_in6 *ron; 796 797 sin6_next = satosin6(opts->ip6po_nexthop); 798 799 /* at this moment, we only support AF_INET6 next hops */ 800 if (sin6_next->sin6_family != AF_INET6) { 801 error = EAFNOSUPPORT; /* or should we proceed? */ 802 goto done; 803 } 804 805 /* 806 * If the next hop is an IPv6 address, then the node identified 807 * by that address must be a neighbor of the sending host. 808 */ 809 ron = &opts->ip6po_nextroute; 810 if (ron->ro_rt != NULL) 811 RT_LOCK(ron->ro_rt); 812 if ((ron->ro_rt != NULL && 813 ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 814 (RTF_UP | RTF_LLINFO) || 815 ron->ro_rt->generation_id != route_generation || 816 (select_srcif && (ifa == NULL || 817 (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) || 818 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 819 &sin6_next->sin6_addr)) { 820 if (ron->ro_rt != NULL) { 821 RT_UNLOCK(ron->ro_rt); 822 rtfree(ron->ro_rt); 823 ron->ro_rt = NULL; 824 } 825 *satosin6(&ron->ro_dst) = *sin6_next; 826 } 827 if (ron->ro_rt == NULL) { 828 rtalloc_scoped((struct route *)ron, ifscope); 829 if (ron->ro_rt != NULL) 830 RT_LOCK(ron->ro_rt); 831 if (ron->ro_rt == NULL || 832 !(ron->ro_rt->rt_flags & RTF_LLINFO) || 833 !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))-> 834 sin6_addr, &sin6_next->sin6_addr)) { 835 if (ron->ro_rt != NULL) { 836 RT_UNLOCK(ron->ro_rt); 837 rtfree(ron->ro_rt); 838 ron->ro_rt = NULL; 839 } 840 error = EHOSTUNREACH; 841 goto done; 842 } 843 } 844 route = ron; 845 ifp = ifp0 = ron->ro_rt->rt_ifp; 846 847 /* 848 * When cloning is required, try to allocate a route to the 849 * destination so that the caller can store path MTU 850 * information. 851 */ 852 if (!clone) { 853 if (select_srcif) { 854 /* Keep the route locked */ 855 goto validateroute; 856 } 857 RT_UNLOCK(ron->ro_rt); 858 goto done; 859 } 860 RT_UNLOCK(ron->ro_rt); 861 } 862 863 /* 864 * Use a cached route if it exists and is valid, else try to allocate 865 * a new one. Note that we should check the address family of the 866 * cached destination, in case of sharing the cache with IPv4. 867 */ 868 if (ro == NULL) 869 goto done; 870 if (ro->ro_rt != NULL) 871 RT_LOCK(ro->ro_rt); 872 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || 873 satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || 874 ro->ro_rt->generation_id != route_generation || 875 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) || 876 (select_srcif && (ifa == NULL || 877 (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa))))) { 878 RT_UNLOCK(ro->ro_rt); 879 rtfree(ro->ro_rt); 880 ro->ro_rt = NULL; 881 } 882 if (ro->ro_rt == NULL) { 883 struct sockaddr_in6 *sa6; 884 885 if (ro->ro_rt != NULL) 886 RT_UNLOCK(ro->ro_rt); 887 /* No route yet, so try to acquire one */ 888 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 889 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 890 sa6->sin6_family = AF_INET6; 891 sa6->sin6_len = sizeof(struct sockaddr_in6); 892 sa6->sin6_addr = *dst; 893 if (IN6_IS_ADDR_MULTICAST(dst)) { 894 ro->ro_rt = rtalloc1_scoped( 895 &((struct route *)ro)->ro_dst, 0, 0, ifscope); 896 } else { 897 rtalloc_scoped((struct route *)ro, ifscope); 898 } 899 if (ro->ro_rt != NULL) 900 RT_LOCK(ro->ro_rt); 901 } 902 903 /* 904 * Do not care about the result if we have the nexthop 905 * explicitly specified (in case we're asked to clone.) 906 */ 907 if (opts != NULL && opts->ip6po_nexthop != NULL) { 908 if (ro->ro_rt != NULL) 909 RT_UNLOCK(ro->ro_rt); 910 goto done; 911 } 912 913 if (ro->ro_rt != NULL) { 914 RT_LOCK_ASSERT_HELD(ro->ro_rt); 915 ifp = ifp0 = ro->ro_rt->rt_ifp; 916 } else { 917 error = EHOSTUNREACH; 918 } 919 route = ro; 920 921validateroute: 922 if (select_srcif) { 923 boolean_t has_route = (route != NULL && route->ro_rt != NULL); 924 boolean_t srcif_selected = FALSE; 925 926 if (has_route) 927 RT_LOCK_ASSERT_HELD(route->ro_rt); 928 /* 929 * If there is a non-loopback route with the wrong interface, 930 * or if there is no interface configured with such an address, 931 * blow it away. Except for local/loopback, we look for one 932 * with a matching interface scope/index. 933 */ 934 if (has_route && (ifa == NULL || 935 (ifa->ifa_ifp != ifp && ifp != lo_ifp) || 936 !(route->ro_rt->rt_flags & RTF_UP))) { 937 /* 938 * If the destination address belongs to a proxied 939 * prefix, relax the requirement and allow the packet 940 * to come out of the proxy interface with the source 941 * address of the real interface. 942 */ 943 if (ifa != NULL && proxied_ifa && 944 (route->ro_rt->rt_flags & (RTF_UP|RTF_PROXY)) == 945 (RTF_UP|RTF_PROXY)) { 946 srcif_selected = TRUE; 947 } else { 948 if (ip6_select_srcif_debug) { 949 if (ifa != NULL) { 950 printf("%s->%s ifscope %d " 951 "ro_if %s != ifa_if %s " 952 "(cached route cleared)\n", 953 s_src, s_dst, 954 ifscope, if_name(ifp), 955 if_name(ifa->ifa_ifp)); 956 } else { 957 printf("%s->%s ifscope %d " 958 "ro_if %s (no ifa_if " 959 "found)\n", s_src, s_dst, 960 ifscope, if_name(ifp)); 961 } 962 } 963 RT_UNLOCK(route->ro_rt); 964 rtfree(route->ro_rt); 965 route->ro_rt = NULL; 966 route->ro_flags &= ~ROF_SRCIF_SELECTED; 967 error = EHOSTUNREACH; 968 /* Undo the settings done above */ 969 route = NULL; 970 ifp = NULL; /* ditch ifp; keep ifp0 */ 971 has_route = FALSE; 972 } 973 } else if (has_route) { 974 srcif_selected = TRUE; 975 } 976 977 if (srcif_selected) { 978 VERIFY(has_route); 979 route->ro_flags |= ROF_SRCIF_SELECTED; 980 route->ro_rt->generation_id = route_generation; 981 RT_UNLOCK(route->ro_rt); 982 } 983 } else { 984 if (ro->ro_rt != NULL) 985 RT_UNLOCK(ro->ro_rt); 986 if (ifp != NULL && opts != NULL && 987 opts->ip6po_pktinfo != NULL && 988 opts->ip6po_pktinfo->ipi6_ifindex != 0) { 989 /* 990 * Check if the outgoing interface conflicts with the 991 * interface specified by ipi6_ifindex (if specified). 992 * Note that loopback interface is always okay. 993 * (this may happen when we are sending a packet to 994 * one of our own addresses.) 995 */ 996 if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != 997 opts->ip6po_pktinfo->ipi6_ifindex) { 998 error = EHOSTUNREACH; 999 goto done; 1000 } 1001 } 1002 } 1003 1004done: 1005 if (error == 0) { 1006 if ((ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) && 1007 ((ifp != NULL && ifp->if_type == IFT_CELLULAR) || 1008 (route != NULL && route->ro_rt != NULL && 1009 route->ro_rt->rt_ifp->if_type == IFT_CELLULAR))) { 1010 if (route != NULL && route->ro_rt != NULL) { 1011 rtfree(route->ro_rt); 1012 route->ro_rt = NULL; 1013 route->ro_flags &= ~ROF_SRCIF_SELECTED; 1014 route = NULL; 1015 } 1016 ifp = NULL; /* ditch ifp; keep ifp0 */ 1017 error = EHOSTUNREACH; 1018 } 1019 } 1020 1021 if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) { 1022 /* 1023 * This can happen if the caller did not pass a cached route 1024 * nor any other hints. We treat this case an error. 1025 */ 1026 error = EHOSTUNREACH; 1027 } 1028 if (error == EHOSTUNREACH) 1029 ip6stat.ip6s_noroute++; 1030 1031 /* 1032 * We'll return ifp regardless of error, so pick it up from ifp0 1033 * in case it was nullified above. Caller is responsible for 1034 * releasing the ifp if it is non-NULL. 1035 */ 1036 ifp = ifp0; 1037 if (retifp != NULL) { 1038 if (ifp != NULL) 1039 ifnet_reference(ifp); /* for caller */ 1040 *retifp = ifp; 1041 } 1042 1043 if (error == 0) { 1044 if (retrt != NULL && route != NULL) 1045 *retrt = route->ro_rt; /* ro_rt may be NULL */ 1046 } else if (select_srcif && ip6_select_srcif_debug) { 1047 printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n", 1048 s_src, s_dst, ifscope, 1049 (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE", 1050 (ifp != NULL) ? if_name(ifp) : "NONE", error); 1051 } 1052 1053 if (ifa != NULL) 1054 IFA_REMREF(ifa); 1055 1056 return (error); 1057} 1058 1059/* 1060 * Regardless of error, it will return an ifp with a reference held if the 1061 * caller provides a non-NULL retifp. The caller is responsible for checking 1062 * if the returned ifp is valid and release its reference at all times. 1063 */ 1064static int 1065in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 1066 struct ip6_moptions *mopts, struct route_in6 *ro, 1067 const struct ip6_out_args *ip6oa, struct ifnet **retifp) 1068{ 1069 int err = 0; 1070 struct route_in6 sro; 1071 struct rtentry *rt = NULL; 1072 1073 if (ro == NULL) { 1074 bzero(&sro, sizeof(sro)); 1075 ro = &sro; 1076 } 1077 1078 if ((err = selectroute(NULL, dstsock, opts, mopts, ro, retifp, 1079 &rt, 0, 1, ip6oa)) != 0) 1080 goto done; 1081 1082 /* 1083 * do not use a rejected or black hole route. 1084 * XXX: this check should be done in the L2 output routine. 1085 * However, if we skipped this check here, we'd see the following 1086 * scenario: 1087 * - install a rejected route for a scoped address prefix 1088 * (like fe80::/10) 1089 * - send a packet to a destination that matches the scoped prefix, 1090 * with ambiguity about the scope zone. 1091 * - pick the outgoing interface from the route, and disambiguate the 1092 * scope zone with the interface. 1093 * - ip6_output() would try to get another route with the "new" 1094 * destination, which may be valid. 1095 * - we'd see no error on output. 1096 * Although this may not be very harmful, it should still be confusing. 1097 * We thus reject the case here. 1098 */ 1099 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 1100 err = ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); 1101 goto done; 1102 } 1103 1104 /* 1105 * Adjust the "outgoing" interface. If we're going to loop the packet 1106 * back to ourselves, the ifp would be the loopback interface. 1107 * However, we'd rather know the interface associated to the 1108 * destination address (which should probably be one of our own 1109 * addresses.) 1110 */ 1111 if (rt != NULL && rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp != NULL && 1112 retifp != NULL) { 1113 ifnet_reference(rt->rt_ifa->ifa_ifp); 1114 if (*retifp != NULL) 1115 ifnet_release(*retifp); 1116 *retifp = rt->rt_ifa->ifa_ifp; 1117 } 1118 1119done: 1120 if (ro == &sro && rt && rt == sro.ro_rt) 1121 rtfree(rt); 1122 1123 /* 1124 * retifp might point to a valid ifp with a reference held; 1125 * caller is responsible for releasing it if non-NULL. 1126 */ 1127 return (err); 1128} 1129 1130/* 1131 * Regardless of error, it will return an ifp with a reference held if the 1132 * caller provides a non-NULL retifp. The caller is responsible for checking 1133 * if the returned ifp is valid and release its reference at all times. 1134 * 1135 * clone - meaningful only for bsdi and freebsd 1136 */ 1137int 1138in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, 1139 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, 1140 struct ifnet **retifp, struct rtentry **retrt, int clone, 1141 const struct ip6_out_args *ip6oa) 1142{ 1143 1144 return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp, 1145 retrt, clone, 0, ip6oa)); 1146} 1147 1148/* 1149 * Default hop limit selection. The precedence is as follows: 1150 * 1. Hoplimit value specified via ioctl. 1151 * 2. (If the outgoing interface is detected) the current 1152 * hop limit of the interface specified by router advertisement. 1153 * 3. The system default hoplimit. 1154*/ 1155int 1156in6_selecthlim( 1157 struct in6pcb *in6p, 1158 struct ifnet *ifp) 1159{ 1160 if (in6p && in6p->in6p_hops >= 0) { 1161 return(in6p->in6p_hops); 1162 } else { 1163 lck_rw_lock_shared(nd_if_rwlock); 1164 if (ifp && ifp->if_index < nd_ifinfo_indexlim) { 1165 u_int8_t chlim; 1166 struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; 1167 1168 if (ndi->initialized) { 1169 lck_mtx_lock(&ndi->lock); 1170 chlim = ndi->chlim; 1171 lck_mtx_unlock(&ndi->lock); 1172 } else { 1173 chlim = ip6_defhlim; 1174 } 1175 lck_rw_done(nd_if_rwlock); 1176 return (chlim); 1177 } else { 1178 lck_rw_done(nd_if_rwlock); 1179 return(ip6_defhlim); 1180 } 1181 } 1182} 1183 1184/* 1185 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 1186 * share this function by all *bsd*... 1187 */ 1188int 1189in6_pcbsetport( 1190 __unused struct in6_addr *laddr, 1191 struct inpcb *inp, 1192 struct proc *p, 1193 int locked) 1194{ 1195 struct socket *so = inp->inp_socket; 1196 u_int16_t lport = 0, first, last, *lastport; 1197 int count, error = 0, wild = 0; 1198 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1199 kauth_cred_t cred; 1200 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */ 1201 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { 1202 socket_unlock(inp->inp_socket, 0); 1203 lck_rw_lock_exclusive(pcbinfo->mtx); 1204 socket_lock(inp->inp_socket, 0); 1205 } 1206 } 1207 1208 /* XXX: this is redundant when called from in6_pcbbind */ 1209 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 1210 wild = INPLOOKUP_WILDCARD; 1211 1212 inp->inp_flags |= INP_ANONPORT; 1213 1214 if (inp->inp_flags & INP_HIGHPORT) { 1215 first = ipport_hifirstauto; /* sysctl */ 1216 last = ipport_hilastauto; 1217 lastport = &pcbinfo->lasthi; 1218 } else if (inp->inp_flags & INP_LOWPORT) { 1219 cred = kauth_cred_proc_ref(p); 1220 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 1221 kauth_cred_unref(&cred); 1222 if (error != 0) { 1223 if (!locked) 1224 lck_rw_done(pcbinfo->mtx); 1225 return error; 1226 } 1227 first = ipport_lowfirstauto; /* 1023 */ 1228 last = ipport_lowlastauto; /* 600 */ 1229 lastport = &pcbinfo->lastlow; 1230 } else { 1231 first = ipport_firstauto; /* sysctl */ 1232 last = ipport_lastauto; 1233 lastport = &pcbinfo->lastport; 1234 } 1235 /* 1236 * Simple check to ensure all ports are not used up causing 1237 * a deadlock here. 1238 * 1239 * We split the two cases (up and down) so that the direction 1240 * is not being tested on each round of the loop. 1241 */ 1242 if (first > last) { 1243 /* 1244 * counting down 1245 */ 1246 count = first - last; 1247 1248 do { 1249 if (count-- < 0) { /* completely used? */ 1250 /* 1251 * Undo any address bind that may have 1252 * occurred above. 1253 */ 1254 inp->in6p_laddr = in6addr_any; 1255 inp->in6p_last_outifp = NULL; 1256 if (!locked) 1257 lck_rw_done(pcbinfo->mtx); 1258 return (EAGAIN); 1259 } 1260 --*lastport; 1261 if (*lastport > first || *lastport < last) 1262 *lastport = first; 1263 lport = htons(*lastport); 1264 } while (in6_pcblookup_local(pcbinfo, 1265 &inp->in6p_laddr, lport, wild)); 1266 } else { 1267 /* 1268 * counting up 1269 */ 1270 count = last - first; 1271 1272 do { 1273 if (count-- < 0) { /* completely used? */ 1274 /* 1275 * Undo any address bind that may have 1276 * occurred above. 1277 */ 1278 inp->in6p_laddr = in6addr_any; 1279 inp->in6p_last_outifp = NULL; 1280 if (!locked) 1281 lck_rw_done(pcbinfo->mtx); 1282 return (EAGAIN); 1283 } 1284 ++*lastport; 1285 if (*lastport < first || *lastport > last) 1286 *lastport = first; 1287 lport = htons(*lastport); 1288 } while (in6_pcblookup_local(pcbinfo, 1289 &inp->in6p_laddr, lport, wild)); 1290 } 1291 1292 inp->inp_lport = lport; 1293 if (in_pcbinshash(inp, 1) != 0) { 1294 inp->in6p_laddr = in6addr_any; 1295 inp->inp_lport = 0; 1296 inp->in6p_last_outifp = NULL; 1297 if (!locked) 1298 lck_rw_done(pcbinfo->mtx); 1299 return (EAGAIN); 1300 } 1301 1302 if (!locked) 1303 lck_rw_done(pcbinfo->mtx); 1304 return(0); 1305} 1306 1307/* 1308 * * The followings are implementation of the policy table using a 1309 * * simple tail queue. 1310 * * XXX such details should be hidden. 1311 * * XXX implementation using binary tree should be more efficient. 1312 * */ 1313struct addrsel_policyent { 1314 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1315 struct in6_addrpolicy ape_policy; 1316}; 1317 1318TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1319 1320struct addrsel_policyhead addrsel_policytab; 1321 1322static void 1323init_policy_queue(void) 1324{ 1325 1326 TAILQ_INIT(&addrsel_policytab); 1327} 1328 1329void 1330addrsel_policy_init(void) 1331{ 1332 /* 1333 * Default address selection policy based on RFC 3484 and 1334 * draft-arifumi-6man-rfc3484-revise-03. 1335 */ 1336 static const struct in6_addrpolicy defaddrsel[] = { 1337 /* localhost */ 1338 { .addr = { .sin6_family = AF_INET6, 1339 .sin6_addr = IN6ADDR_LOOPBACK_INIT, 1340 .sin6_len = sizeof(struct sockaddr_in6) }, 1341 .addrmask = { .sin6_family = AF_INET6, 1342 .sin6_addr = IN6MASK128, 1343 .sin6_len = sizeof(struct sockaddr_in6) }, 1344 .preced = 60, 1345 .label = 0 }, 1346 /* ULA */ 1347 { .addr = { .sin6_family = AF_INET6, 1348 .sin6_addr = {{{ 0xfc }}}, 1349 .sin6_len = sizeof(struct sockaddr_in6) }, 1350 .addrmask = { .sin6_family = AF_INET6, 1351 .sin6_addr = IN6MASK7, 1352 .sin6_len = sizeof(struct sockaddr_in6) }, 1353 .preced = 50, 1354 .label = 1 }, 1355 /* any IPv6 src */ 1356 { .addr = { .sin6_family = AF_INET6, 1357 .sin6_addr = IN6ADDR_ANY_INIT, 1358 .sin6_len = sizeof(struct sockaddr_in6) }, 1359 .addrmask = { .sin6_family = AF_INET6, 1360 .sin6_addr = IN6MASK0, 1361 .sin6_len = sizeof(struct sockaddr_in6) }, 1362 .preced = 40, 1363 .label = 2 }, 1364 /* any IPv4 src */ 1365 { .addr = { .sin6_family = AF_INET6, 1366 .sin6_addr = IN6ADDR_V4MAPPED_INIT, 1367 .sin6_len = sizeof(struct sockaddr_in6) }, 1368 .addrmask = { .sin6_family = AF_INET6, 1369 .sin6_addr = IN6MASK96, 1370 .sin6_len = sizeof(struct sockaddr_in6) }, 1371 .preced = 30, 1372 .label = 3 }, 1373 /* 6to4 */ 1374 { .addr = { .sin6_family = AF_INET6, 1375 .sin6_addr = {{{ 0x20, 0x02 }}}, 1376 .sin6_len = sizeof(struct sockaddr_in6) }, 1377 .addrmask = { .sin6_family = AF_INET6, 1378 .sin6_addr = IN6MASK16, 1379 .sin6_len = sizeof(struct sockaddr_in6) }, 1380 .preced = 20, 1381 .label = 4 }, 1382 /* Teredo */ 1383 { .addr = { .sin6_family = AF_INET6, 1384 .sin6_addr = {{{ 0x20, 0x01 }}}, 1385 .sin6_len = sizeof(struct sockaddr_in6) }, 1386 .addrmask = { .sin6_family = AF_INET6, 1387 .sin6_addr = IN6MASK32, 1388 .sin6_len = sizeof(struct sockaddr_in6) }, 1389 .preced = 10, 1390 .label = 5 }, 1391 /* v4 compat addresses */ 1392 { .addr = { .sin6_family = AF_INET6, 1393 .sin6_addr = IN6ADDR_ANY_INIT, 1394 .sin6_len = sizeof(struct sockaddr_in6) }, 1395 .addrmask = { .sin6_family = AF_INET6, 1396 .sin6_addr = IN6MASK96, 1397 .sin6_len = sizeof(struct sockaddr_in6) }, 1398 .preced = 1, 1399 .label = 10 }, 1400 /* site-local (deprecated) */ 1401 { .addr = { .sin6_family = AF_INET6, 1402 .sin6_addr = {{{ 0xfe, 0xc0 }}}, 1403 .sin6_len = sizeof(struct sockaddr_in6) }, 1404 .addrmask = { .sin6_family = AF_INET6, 1405 .sin6_addr = IN6MASK16, 1406 .sin6_len = sizeof(struct sockaddr_in6) }, 1407 .preced = 1, 1408 .label = 11 }, 1409 /* 6bone (deprecated) */ 1410 { .addr = { .sin6_family = AF_INET6, 1411 .sin6_addr = {{{ 0x3f, 0xfe }}}, 1412 .sin6_len = sizeof(struct sockaddr_in6) }, 1413 .addrmask = { .sin6_family = AF_INET6, 1414 .sin6_addr = IN6MASK16, 1415 .sin6_len = sizeof(struct sockaddr_in6) }, 1416 .preced = 1, 1417 .label = 12 }, 1418 }; 1419 int i; 1420 1421 init_policy_queue(); 1422 1423 /* initialize the "last resort" policy */ 1424 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 1425 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 1426 1427 for (i = 0; i < sizeof(defaddrsel) / sizeof(defaddrsel[0]); i++) 1428 add_addrsel_policyent(&defaddrsel[i]); 1429 1430} 1431 1432struct in6_addrpolicy * 1433in6_addrsel_lookup_policy(struct sockaddr_in6 *key) 1434{ 1435 struct in6_addrpolicy *match = NULL; 1436 1437 ADDRSEL_LOCK(); 1438 match = match_addrsel_policy(key); 1439 1440 if (match == NULL) 1441 match = &defaultaddrpolicy; 1442 else 1443 match->use++; 1444 ADDRSEL_UNLOCK(); 1445 1446 return (match); 1447} 1448 1449static struct in6_addrpolicy * 1450match_addrsel_policy(struct sockaddr_in6 *key) 1451{ 1452 struct addrsel_policyent *pent; 1453 struct in6_addrpolicy *bestpol = NULL, *pol; 1454 int matchlen, bestmatchlen = -1; 1455 u_char *mp, *ep, *k, *p, m; 1456 1457 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) { 1458 matchlen = 0; 1459 1460 pol = &pent->ape_policy; 1461 mp = (u_char *)&pol->addrmask.sin6_addr; 1462 ep = mp + 16; /* XXX: scope field? */ 1463 k = (u_char *)&key->sin6_addr; 1464 p = (u_char *)&pol->addr.sin6_addr; 1465 for (; mp < ep && *mp; mp++, k++, p++) { 1466 m = *mp; 1467 if ((*k & m) != *p) 1468 goto next; /* not match */ 1469 if (m == 0xff) /* short cut for a typical case */ 1470 matchlen += 8; 1471 else { 1472 while (m >= 0x80) { 1473 matchlen++; 1474 m <<= 1; 1475 } 1476 } 1477 } 1478 1479 /* matched. check if this is better than the current best. */ 1480 if (bestpol == NULL || 1481 matchlen > bestmatchlen) { 1482 bestpol = pol; 1483 bestmatchlen = matchlen; 1484 } 1485 1486 next: 1487 continue; 1488 } 1489 1490 return (bestpol); 1491} 1492 1493static int 1494add_addrsel_policyent(const struct in6_addrpolicy *newpolicy) 1495{ 1496 struct addrsel_policyent *new, *pol; 1497 1498 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1499 M_WAITOK); 1500 1501 ADDRSEL_LOCK(); 1502 1503 /* duplication check */ 1504 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 1505 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1506 &pol->ape_policy.addr.sin6_addr) && 1507 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1508 &pol->ape_policy.addrmask.sin6_addr)) { 1509 ADDRSEL_UNLOCK(); 1510 FREE(new, M_IFADDR); 1511 return (EEXIST); /* or override it? */ 1512 } 1513 } 1514 1515 bzero(new, sizeof(*new)); 1516 1517 /* XXX: should validate entry */ 1518 new->ape_policy = *newpolicy; 1519 1520 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1521 ADDRSEL_UNLOCK(); 1522 1523 return (0); 1524} 1525#ifdef ENABLE_ADDRSEL 1526static int 1527delete_addrsel_policyent(const struct in6_addrpolicy *key) 1528{ 1529 struct addrsel_policyent *pol; 1530 1531 1532 ADDRSEL_LOCK(); 1533 1534 /* search for the entry in the table */ 1535 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 1536 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1537 &pol->ape_policy.addr.sin6_addr) && 1538 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1539 &pol->ape_policy.addrmask.sin6_addr)) { 1540 break; 1541 } 1542 } 1543 if (pol == NULL) { 1544 ADDRSEL_UNLOCK(); 1545 return (ESRCH); 1546 } 1547 1548 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1549 FREE(pol, M_IFADDR); 1550 pol = NULL; 1551 ADDRSEL_UNLOCK(); 1552 1553 return (0); 1554} 1555#endif /* ENABLE_ADDRSEL */ 1556 1557int 1558walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *), 1559 void *w) 1560{ 1561 struct addrsel_policyent *pol; 1562 int error = 0; 1563 1564 ADDRSEL_LOCK(); 1565 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 1566 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1567 ADDRSEL_UNLOCK(); 1568 return (error); 1569 } 1570 } 1571 ADDRSEL_UNLOCK(); 1572 return (error); 1573} 1574/* 1575 * Subroutines to manage the address selection policy table via sysctl. 1576 */ 1577struct walkarg { 1578 struct sysctl_req *w_req; 1579}; 1580 1581 1582static int 1583dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg) 1584{ 1585 int error = 0; 1586 struct walkarg *w = arg; 1587 1588 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1589 1590 return (error); 1591} 1592 1593static int 1594in6_src_sysctl SYSCTL_HANDLER_ARGS 1595{ 1596#pragma unused(oidp, arg1, arg2) 1597struct walkarg w; 1598 1599 if (req->newptr) 1600 return EPERM; 1601 bzero(&w, sizeof(w)); 1602 w.w_req = req; 1603 1604 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 1605} 1606 1607 1608SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 1609 CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, ""); 1610int 1611in6_src_ioctl(u_long cmd, caddr_t data) 1612{ 1613 int i; 1614 struct in6_addrpolicy ent0; 1615 1616 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1617 return (EOPNOTSUPP); /* check for safety */ 1618 1619 bcopy(data, &ent0, sizeof (ent0)); 1620 1621 if (ent0.label == ADDR_LABEL_NOTAPP) 1622 return (EINVAL); 1623 /* check if the prefix mask is consecutive. */ 1624 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1625 return (EINVAL); 1626 /* clear trailing garbages (if any) of the prefix address. */ 1627 for (i = 0; i < 4; i++) { 1628 ent0.addr.sin6_addr.s6_addr32[i] &= 1629 ent0.addrmask.sin6_addr.s6_addr32[i]; 1630 } 1631 ent0.use = 0; 1632 1633 switch (cmd) { 1634 case SIOCAADDRCTL_POLICY: 1635#ifdef ENABLE_ADDRSEL 1636 return (add_addrsel_policyent(&ent0)); 1637#else 1638 return (ENOTSUP); 1639#endif 1640 case SIOCDADDRCTL_POLICY: 1641#ifdef ENABLE_ADDRSEL 1642 return (delete_addrsel_policyent(&ent0)); 1643#else 1644 return (ENOTSUP); 1645#endif 1646 } 1647 1648 return (0); /* XXX: compromise compilers */ 1649} 1650 1651/* 1652 * generate kernel-internal form (scopeid embedded into s6_addr16[1]). 1653 * If the address scope of is link-local, embed the interface index in the 1654 * address. The routine determines our precedence 1655 * between advanced API scope/interface specification and basic API 1656 * specification. 1657 * 1658 * this function should be nuked in the future, when we get rid of 1659 * embedded scopeid thing. 1660 * 1661 * XXX actually, it is over-specification to return ifp against sin6_scope_id. 1662 * there can be multiple interfaces that belong to a particular scope zone 1663 * (in specification, we have 1:N mapping between a scope zone and interfaces). 1664 * we may want to change the function to return something other than ifp. 1665 */ 1666int 1667in6_embedscope( 1668 struct in6_addr *in6, 1669 const struct sockaddr_in6 *sin6, 1670 struct in6pcb *in6p, 1671 struct ifnet **ifpp, 1672 struct ip6_pktopts *opt) 1673{ 1674 struct ifnet *ifp = NULL; 1675 u_int32_t scopeid; 1676 struct ip6_pktopts *optp = NULL; 1677 1678 *in6 = sin6->sin6_addr; 1679 scopeid = sin6->sin6_scope_id; 1680 if (ifpp != NULL) 1681 *ifpp = NULL; 1682 1683 /* 1684 * don't try to read sin6->sin6_addr beyond here, since the caller may 1685 * ask us to overwrite existing sockaddr_in6 1686 */ 1687 1688#ifdef ENABLE_DEFAULT_SCOPE 1689 if (scopeid == 0) 1690 scopeid = scope6_addr2default(in6); 1691#endif 1692 1693 if (IN6_IS_SCOPE_LINKLOCAL(in6)) { 1694 struct in6_pktinfo *pi; 1695 struct ifnet *im6o_multicast_ifp = NULL; 1696 1697 if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) && 1698 in6p->in6p_moptions != NULL) { 1699 IM6O_LOCK(in6p->in6p_moptions); 1700 im6o_multicast_ifp = 1701 in6p->in6p_moptions->im6o_multicast_ifp; 1702 IM6O_UNLOCK(in6p->in6p_moptions); 1703 } 1704 1705 if (opt) 1706 optp = opt; 1707 else if (in6p) 1708 optp = in6p->in6p_outputopts; 1709 /* 1710 * KAME assumption: link id == interface id 1711 */ 1712 ifnet_head_lock_shared(); 1713 if (in6p && optp && (pi = optp->ip6po_pktinfo) && 1714 pi->ipi6_ifindex) { 1715 ifp = ifindex2ifnet[pi->ipi6_ifindex]; 1716 in6->s6_addr16[1] = htons(pi->ipi6_ifindex); 1717 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && 1718 in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) { 1719 ifp = im6o_multicast_ifp; 1720 in6->s6_addr16[1] = htons(ifp->if_index); 1721 } else if (scopeid) { 1722 /* 1723 * Since scopeid is unsigned, we only have to check it 1724 * against if_index 1725 */ 1726 if (if_index < scopeid) { 1727 ifnet_head_done(); 1728 return ENXIO; /* XXX EINVAL? */ 1729 1730 } 1731 ifp = ifindex2ifnet[scopeid]; 1732 /*XXX assignment to 16bit from 32bit variable */ 1733 in6->s6_addr16[1] = htons(scopeid & 0xffff); 1734 } 1735 ifnet_head_done(); 1736 1737 if (ifpp != NULL) { 1738 if (ifp != NULL) 1739 ifnet_reference(ifp); /* for caller */ 1740 *ifpp = ifp; 1741 } 1742 } 1743 1744 return 0; 1745} 1746 1747/* 1748 * generate standard sockaddr_in6 from embedded form. 1749 * touches sin6_addr and sin6_scope_id only. 1750 * 1751 * this function should be nuked in the future, when we get rid of 1752 * embedded scopeid thing. 1753 */ 1754int 1755in6_recoverscope( 1756 struct sockaddr_in6 *sin6, 1757 const struct in6_addr *in6, 1758 struct ifnet *ifp) 1759{ 1760 u_int32_t scopeid; 1761 1762 sin6->sin6_addr = *in6; 1763 1764 /* 1765 * don't try to read *in6 beyond here, since the caller may 1766 * ask us to overwrite existing sockaddr_in6 1767 */ 1768 1769 sin6->sin6_scope_id = 0; 1770 if (IN6_IS_SCOPE_LINKLOCAL(in6)) { 1771 /* 1772 * KAME assumption: link id == interface id 1773 */ 1774 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]); 1775 if (scopeid) { 1776 /* 1777 * sanity check 1778 * 1779 * Since scopeid is unsigned, we only have to check it 1780 * against if_index 1781 */ 1782 if (if_index < scopeid) 1783 return ENXIO; 1784 if (ifp && ifp->if_index != scopeid) 1785 return ENXIO; 1786 sin6->sin6_addr.s6_addr16[1] = 0; 1787 sin6->sin6_scope_id = scopeid; 1788 } 1789 } 1790 1791 return 0; 1792} 1793