1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the project nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58/* 59 * XXX 60 * KAME 970409 note: 61 * BSD/OS version heavily modifies this code, related to llinfo. 62 * Since we don't have BSD/OS version of net/route.c in our hand, 63 * I left the code mostly as it was in 970310. -- itojun 64 */ 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/malloc.h> 69#include <sys/mbuf.h> 70#include <sys/socket.h> 71#include <sys/sockio.h> 72#include <sys/time.h> 73#include <sys/kernel.h> 74#include <sys/sysctl.h> 75#include <sys/errno.h> 76#include <sys/syslog.h> 77#include <sys/protosw.h> 78#include <sys/proc.h> 79#include <sys/mcache.h> 80 81#include <dev/random/randomdev.h> 82 83#include <kern/queue.h> 84#include <kern/zalloc.h> 85 86#include <net/if.h> 87#include <net/if_dl.h> 88#include <net/if_types.h> 89#include <net/if_llreach.h> 90#include <net/route.h> 91#include <net/dlil.h> 92#include <net/ntstat.h> 93#include <net/net_osdep.h> 94 95#include <netinet/in.h> 96#include <netinet/in_arp.h> 97#include <netinet/if_ether.h> 98#include <netinet6/in6_var.h> 99#include <netinet/ip6.h> 100#include <netinet6/ip6_var.h> 101#include <netinet6/nd6.h> 102#include <netinet6/scope6_var.h> 103#include <netinet/icmp6.h> 104 105#include "loop.h" 106 107#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ 108#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ 109 110#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) 111 112/* timer values */ 113int nd6_prune = 1; /* walk list every 1 seconds */ 114int nd6_prune_lazy = 5; /* lazily walk list every 5 seconds */ 115int nd6_delay = 5; /* delay first probe time 5 second */ 116int nd6_umaxtries = 3; /* maximum unicast query */ 117int nd6_mmaxtries = 3; /* maximum multicast query */ 118int nd6_useloopback = 1; /* use loopback interface for local traffic */ 119int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ 120 121/* preventing too many loops in ND option parsing */ 122int nd6_maxndopt = 10; /* max # of ND options allowed */ 123 124int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */ 125 126#if ND6_DEBUG 127int nd6_debug = 1; 128#else 129int nd6_debug = 0; 130#endif 131 132int nd6_optimistic_dad = 133 (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF| 134 ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC| 135 ND6_OPTIMISTIC_DAD_SECURED|ND6_OPTIMISTIC_DAD_MANUAL); 136 137/* for debugging? */ 138static int nd6_inuse, nd6_allocated; 139 140/* 141 * Synchronization notes: 142 * 143 * The global list of ND entries are stored in llinfo_nd6; an entry 144 * gets inserted into the list when the route is created and gets 145 * removed from the list when it is deleted; this is done as part 146 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest(). 147 * 148 * Because rnh_lock and rt_lock for the entry are held during those 149 * operations, the same locks (and thus lock ordering) must be used 150 * elsewhere to access the relevant data structure fields: 151 * 152 * ln_next, ln_prev, ln_rt 153 * 154 * - Routing lock (rnh_lock) 155 * 156 * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags, 157 * ln_llreach, ln_lastused 158 * 159 * - Routing entry lock (rt_lock) 160 * 161 * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime 162 * as the route entry itself. When a route is deleted (RTM_DELETE), 163 * it is simply removed from the global list but the memory is not 164 * freed until the route itself is freed. 165 */ 166struct llinfo_nd6 llinfo_nd6 = { 167 .ln_next = &llinfo_nd6, 168 .ln_prev = &llinfo_nd6, 169}; 170 171/* Protected by nd_if_rwlock */ 172size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */ 173struct nd_ifinfo *nd_ifinfo = NULL; 174 175static lck_grp_attr_t *nd_if_lock_grp_attr; 176static lck_grp_t *nd_if_lock_grp; 177static lck_attr_t *nd_if_lock_attr; 178decl_lck_rw_data(, nd_if_rwlock_data); 179lck_rw_t *nd_if_rwlock = &nd_if_rwlock_data; 180 181/* Protected by nd6_mutex */ 182struct nd_drhead nd_defrouter; 183struct nd_prhead nd_prefix = { 0 }; 184 185/* 186 * nd6_timeout() is scheduled on a demand basis. nd6_timeout_run is used 187 * to indicate whether or not a timeout has been scheduled. The rnh_lock 188 * mutex is used to protect this scheduling; it is a natural choice given 189 * the work done in the timer callback. Unfortunately, there are cases 190 * when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily 191 * held, due to lock ordering. In those cases, we utilize a "demand" counter 192 * nd6_sched_timeout_want which can be atomically incremented without 193 * having to hold rnh_lock. On places where we acquire rnh_lock, such as 194 * nd6_rtrequest(), we check this counter and schedule the timer if it is 195 * non-zero. The increment happens on various places when we allocate 196 * new ND entries, default routers, prefixes and addresses. 197 */ 198static int nd6_timeout_run; /* nd6_timeout is scheduled to run */ 199static void nd6_timeout(void *); 200int nd6_sched_timeout_want; /* demand count for timer to be sched */ 201static boolean_t nd6_fast_timer_on = FALSE; 202 203/* Serialization variables for nd6_service(), protected by rnh_lock */ 204static boolean_t nd6_service_busy; 205static void *nd6_service_wc = &nd6_service_busy; 206static int nd6_service_waiters = 0; 207 208int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; 209static struct sockaddr_in6 all1_sa; 210 211static int regen_tmpaddr(struct in6_ifaddr *); 212extern lck_mtx_t *nd6_mutex; 213 214static struct llinfo_nd6 *nd6_llinfo_alloc(int); 215static void nd6_llinfo_free(void *); 216static void nd6_llinfo_purge(struct rtentry *); 217static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); 218static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); 219static uint64_t ln_getexpire(struct llinfo_nd6 *); 220 221static void nd6_service(void *); 222static void nd6_slowtimo(void *); 223static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *); 224static int nd6_siocgdrlst(void *, int); 225static int nd6_siocgprlst(void *, int); 226 227static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS; 228static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS; 229 230/* 231 * Insertion and removal from llinfo_nd6 must be done with rnh_lock held. 232 */ 233#define LN_DEQUEUE(_ln) do { \ 234 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ 235 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ 236 (_ln)->ln_next->ln_prev = (_ln)->ln_prev; \ 237 (_ln)->ln_prev->ln_next = (_ln)->ln_next; \ 238 (_ln)->ln_prev = (_ln)->ln_next = NULL; \ 239 (_ln)->ln_flags &= ~ND6_LNF_IN_USE; \ 240} while (0) 241 242#define LN_INSERTHEAD(_ln) do { \ 243 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ 244 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ 245 (_ln)->ln_next = llinfo_nd6.ln_next; \ 246 llinfo_nd6.ln_next = (_ln); \ 247 (_ln)->ln_prev = &llinfo_nd6; \ 248 (_ln)->ln_next->ln_prev = (_ln); \ 249 (_ln)->ln_flags |= ND6_LNF_IN_USE; \ 250} while (0) 251 252static struct zone *llinfo_nd6_zone; 253#define LLINFO_ND6_ZONE_MAX 256 /* maximum elements in zone */ 254#define LLINFO_ND6_ZONE_NAME "llinfo_nd6" /* name for zone */ 255 256extern int tvtohz(struct timeval *); 257 258static int nd6_init_done; 259 260SYSCTL_DECL(_net_inet6_icmp6); 261 262SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, 263 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, 264 nd6_sysctl_drlist, "S,in6_defrouter", ""); 265 266SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, 267 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, 268 nd6_sysctl_prlist, "S,in6_defrouter", ""); 269 270void 271nd6_init(void) 272{ 273 int i; 274 275 VERIFY(!nd6_init_done); 276 277 all1_sa.sin6_family = AF_INET6; 278 all1_sa.sin6_len = sizeof (struct sockaddr_in6); 279 for (i = 0; i < sizeof (all1_sa.sin6_addr); i++) 280 all1_sa.sin6_addr.s6_addr[i] = 0xff; 281 282 /* initialization of the default router list */ 283 TAILQ_INIT(&nd_defrouter); 284 285 nd_if_lock_grp_attr = lck_grp_attr_alloc_init(); 286 nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr); 287 nd_if_lock_attr = lck_attr_alloc_init(); 288 lck_rw_init(nd_if_rwlock, nd_if_lock_grp, nd_if_lock_attr); 289 290 llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6), 291 LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0, 292 LLINFO_ND6_ZONE_NAME); 293 if (llinfo_nd6_zone == NULL) 294 panic("%s: failed allocating llinfo_nd6_zone", __func__); 295 296 zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE); 297 zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE); 298 299 nd6_nbr_init(); 300 nd6_rtr_init(); 301 nd6_prproxy_init(); 302 303 nd6_init_done = 1; 304 305 /* start timer */ 306 timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz); 307} 308 309static struct llinfo_nd6 * 310nd6_llinfo_alloc(int how) 311{ 312 struct llinfo_nd6 *ln; 313 314 ln = (how == M_WAITOK) ? zalloc(llinfo_nd6_zone) : 315 zalloc_noblock(llinfo_nd6_zone); 316 if (ln != NULL) 317 bzero(ln, sizeof (*ln)); 318 319 return (ln); 320} 321 322static void 323nd6_llinfo_free(void *arg) 324{ 325 struct llinfo_nd6 *ln = arg; 326 327 if (ln->ln_next != NULL || ln->ln_prev != NULL) { 328 panic("%s: trying to free %p when it is in use", __func__, ln); 329 /* NOTREACHED */ 330 } 331 332 /* Just in case there's anything there, free it */ 333 if (ln->ln_hold != NULL) { 334 m_freem(ln->ln_hold); 335 ln->ln_hold = NULL; 336 } 337 338 /* Purge any link-layer info caching */ 339 VERIFY(ln->ln_rt->rt_llinfo == ln); 340 if (ln->ln_rt->rt_llinfo_purge != NULL) 341 ln->ln_rt->rt_llinfo_purge(ln->ln_rt); 342 343 zfree(llinfo_nd6_zone, ln); 344} 345 346static void 347nd6_llinfo_purge(struct rtentry *rt) 348{ 349 struct llinfo_nd6 *ln = rt->rt_llinfo; 350 351 RT_LOCK_ASSERT_HELD(rt); 352 VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL); 353 354 if (ln->ln_llreach != NULL) { 355 RT_CONVERT_LOCK(rt); 356 ifnet_llreach_free(ln->ln_llreach); 357 ln->ln_llreach = NULL; 358 } 359 ln->ln_lastused = 0; 360} 361 362static void 363nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri) 364{ 365 struct llinfo_nd6 *ln = rt->rt_llinfo; 366 struct if_llreach *lr = ln->ln_llreach; 367 368 if (lr == NULL) { 369 bzero(ri, sizeof (*ri)); 370 ri->ri_rssi = IFNET_RSSI_UNKNOWN; 371 ri->ri_lqm = IFNET_LQM_THRESH_OFF; 372 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN; 373 } else { 374 IFLR_LOCK(lr); 375 /* Export to rt_reach_info structure */ 376 ifnet_lr2ri(lr, ri); 377 /* Export ND6 send expiration (calendar) time */ 378 ri->ri_snd_expire = 379 ifnet_llreach_up2calexp(lr, ln->ln_lastused); 380 IFLR_UNLOCK(lr); 381 } 382} 383 384static void 385nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri) 386{ 387 struct llinfo_nd6 *ln = rt->rt_llinfo; 388 struct if_llreach *lr = ln->ln_llreach; 389 390 if (lr == NULL) { 391 bzero(iflri, sizeof (*iflri)); 392 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN; 393 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF; 394 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN; 395 } else { 396 IFLR_LOCK(lr); 397 /* Export to ifnet_llreach_info structure */ 398 ifnet_lr2iflri(lr, iflri); 399 /* Export ND6 send expiration (uptime) time */ 400 iflri->iflri_snd_expire = 401 ifnet_llreach_up2upexp(lr, ln->ln_lastused); 402 IFLR_UNLOCK(lr); 403 } 404} 405 406void 407ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry) 408{ 409 ln->ln_expire = expiry; 410} 411 412static uint64_t 413ln_getexpire(struct llinfo_nd6 *ln) 414{ 415 struct timeval caltime; 416 uint64_t expiry; 417 418 if (ln->ln_expire != 0) { 419 struct rtentry *rt = ln->ln_rt; 420 421 VERIFY(rt != NULL); 422 /* account for system time change */ 423 getmicrotime(&caltime); 424 425 rt->base_calendartime += 426 NET_CALCULATE_CLOCKSKEW(caltime, 427 rt->base_calendartime, net_uptime(), rt->base_uptime); 428 429 expiry = rt->base_calendartime + 430 ln->ln_expire - rt->base_uptime; 431 } else { 432 expiry = 0; 433 } 434 return (expiry); 435} 436 437void 438nd6_ifreset(struct ifnet *ifp) 439{ 440 struct nd_ifinfo *ndi; 441 442 lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_HELD); 443 VERIFY(ifp != NULL && ifp->if_index < nd_ifinfo_indexlim); 444 ndi = &nd_ifinfo[ifp->if_index]; 445 446 VERIFY(ndi->initialized); 447 lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED); 448 ndi->linkmtu = ifp->if_mtu; 449 ndi->chlim = IPV6_DEFHLIM; 450 ndi->basereachable = REACHABLE_TIME; 451 ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); 452 ndi->retrans = RETRANS_TIMER; 453} 454 455int 456nd6_ifattach(struct ifnet *ifp) 457{ 458 size_t newlim; 459 struct nd_ifinfo *ndi; 460 461 /* 462 * We have some arrays that should be indexed by if_index. 463 * since if_index will grow dynamically, they should grow too. 464 */ 465 lck_rw_lock_shared(nd_if_rwlock); 466 newlim = nd_ifinfo_indexlim; 467 if (nd_ifinfo == NULL || if_index >= newlim) { 468 if (!lck_rw_lock_shared_to_exclusive(nd_if_rwlock)) 469 lck_rw_lock_exclusive(nd_if_rwlock); 470 lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_EXCLUSIVE); 471 472 newlim = nd_ifinfo_indexlim; 473 if (nd_ifinfo == NULL || if_index >= newlim) { 474 size_t n; 475 caddr_t q; 476 477 while (if_index >= newlim) 478 newlim <<= 1; 479 480 /* grow nd_ifinfo */ 481 n = newlim * sizeof (struct nd_ifinfo); 482 q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK); 483 if (q == NULL) { 484 lck_rw_done(nd_if_rwlock); 485 return (ENOBUFS); 486 } 487 bzero(q, n); 488 if (nd_ifinfo != NULL) { 489 bcopy((caddr_t)nd_ifinfo, q, n/2); 490 /* 491 * We might want to pattern fill the old 492 * array to catch use-after-free cases. 493 */ 494 FREE((caddr_t)nd_ifinfo, M_IP6NDP); 495 } 496 nd_ifinfo = (struct nd_ifinfo *)(void *)q; 497 nd_ifinfo_indexlim = newlim; 498 } 499 } 500 501 VERIFY(ifp != NULL); 502 ndi = &nd_ifinfo[ifp->if_index]; 503 if (!ndi->initialized) { 504 lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr); 505 ndi->flags = ND6_IFF_PERFORMNUD; 506 ndi->initialized = TRUE; 507 } 508 509 lck_mtx_lock(&ndi->lock); 510 511 if (!(ifp->if_flags & IFF_MULTICAST)) 512 ndi->flags |= ND6_IFF_IFDISABLED; 513 514 nd6_ifreset(ifp); 515 lck_mtx_unlock(&ndi->lock); 516 517 lck_rw_done(nd_if_rwlock); 518 519 nd6_setmtu(ifp); 520 521 return (0); 522} 523 524/* 525 * Reset ND level link MTU. This function is called when the physical MTU 526 * changes, which means we might have to adjust the ND level MTU. 527 */ 528void 529nd6_setmtu(struct ifnet *ifp) 530{ 531 struct nd_ifinfo *ndi; 532 u_int32_t oldmaxmtu, maxmtu; 533 534 /* 535 * Make sure IPv6 is enabled for the interface first, 536 * because this can be called directly from SIOCSIFMTU for IPv4 537 */ 538 lck_rw_lock_shared(nd_if_rwlock); 539 if (ifp->if_index >= nd_ifinfo_indexlim || 540 !nd_ifinfo[ifp->if_index].initialized) { 541 lck_rw_done(nd_if_rwlock); 542 return; /* nd_ifinfo out of bound, or not yet initialized */ 543 } 544 545 ndi = &nd_ifinfo[ifp->if_index]; 546 VERIFY(ndi->initialized); 547 lck_mtx_lock(&ndi->lock); 548 oldmaxmtu = ndi->maxmtu; 549 550 /* 551 * The ND level maxmtu is somewhat redundant to the interface MTU 552 * and is an implementation artifact of KAME. Instead of hard- 553 * limiting the maxmtu based on the interface type here, we simply 554 * take the if_mtu value since SIOCSIFMTU would have taken care of 555 * the sanity checks related to the maximum MTU allowed for the 556 * interface (a value that is known only by the interface layer), 557 * by sending the request down via ifnet_ioctl(). The use of the 558 * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which 559 * does further checking against if_mtu. 560 */ 561 maxmtu = ndi->maxmtu = ifp->if_mtu; 562 563 /* 564 * Decreasing the interface MTU under IPV6 minimum MTU may cause 565 * undesirable situation. We thus notify the operator of the change 566 * explicitly. The check for oldmaxmtu is necessary to restrict the 567 * log to the case of changing the MTU, not initializing it. 568 */ 569 if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { 570 log(LOG_NOTICE, "nd6_setmtu: " 571 "new link MTU on %s (%u) is too small for IPv6\n", 572 if_name(ifp), (uint32_t)ndi->maxmtu); 573 } 574 ndi->linkmtu = ifp->if_mtu; 575 lck_mtx_unlock(&ndi->lock); 576 lck_rw_done(nd_if_rwlock); 577 578 /* also adjust in6_maxmtu if necessary. */ 579 if (maxmtu > in6_maxmtu) 580 in6_setmaxmtu(); 581} 582 583void 584nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) 585{ 586 bzero(ndopts, sizeof (*ndopts)); 587 ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; 588 ndopts->nd_opts_last = 589 (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); 590 591 if (icmp6len == 0) { 592 ndopts->nd_opts_done = 1; 593 ndopts->nd_opts_search = NULL; 594 } 595} 596 597/* 598 * Take one ND option. 599 */ 600struct nd_opt_hdr * 601nd6_option(union nd_opts *ndopts) 602{ 603 struct nd_opt_hdr *nd_opt; 604 int olen; 605 606 if (!ndopts) 607 panic("ndopts == NULL in nd6_option\n"); 608 if (!ndopts->nd_opts_last) 609 panic("uninitialized ndopts in nd6_option\n"); 610 if (!ndopts->nd_opts_search) 611 return (NULL); 612 if (ndopts->nd_opts_done) 613 return (NULL); 614 615 nd_opt = ndopts->nd_opts_search; 616 617 /* make sure nd_opt_len is inside the buffer */ 618 if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { 619 bzero(ndopts, sizeof (*ndopts)); 620 return (NULL); 621 } 622 623 olen = nd_opt->nd_opt_len << 3; 624 if (olen == 0) { 625 /* 626 * Message validation requires that all included 627 * options have a length that is greater than zero. 628 */ 629 bzero(ndopts, sizeof (*ndopts)); 630 return (NULL); 631 } 632 633 ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); 634 if (ndopts->nd_opts_search > ndopts->nd_opts_last) { 635 /* option overruns the end of buffer, invalid */ 636 bzero(ndopts, sizeof (*ndopts)); 637 return (NULL); 638 } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { 639 /* reached the end of options chain */ 640 ndopts->nd_opts_done = 1; 641 ndopts->nd_opts_search = NULL; 642 } 643 return (nd_opt); 644} 645 646/* 647 * Parse multiple ND options. 648 * This function is much easier to use, for ND routines that do not need 649 * multiple options of the same type. 650 */ 651int 652nd6_options(union nd_opts *ndopts) 653{ 654 struct nd_opt_hdr *nd_opt; 655 int i = 0; 656 657 if (ndopts == NULL) 658 panic("ndopts == NULL in nd6_options"); 659 if (ndopts->nd_opts_last == NULL) 660 panic("uninitialized ndopts in nd6_options"); 661 if (ndopts->nd_opts_search == NULL) 662 return (0); 663 664 while (1) { 665 nd_opt = nd6_option(ndopts); 666 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { 667 /* 668 * Message validation requires that all included 669 * options have a length that is greater than zero. 670 */ 671 icmp6stat.icp6s_nd_badopt++; 672 bzero(ndopts, sizeof (*ndopts)); 673 return (-1); 674 } 675 676 if (nd_opt == NULL) 677 goto skip1; 678 679 switch (nd_opt->nd_opt_type) { 680 case ND_OPT_SOURCE_LINKADDR: 681 case ND_OPT_TARGET_LINKADDR: 682 case ND_OPT_MTU: 683 case ND_OPT_REDIRECTED_HEADER: 684 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { 685 nd6log((LOG_INFO, 686 "duplicated ND6 option found (type=%d)\n", 687 nd_opt->nd_opt_type)); 688 /* XXX bark? */ 689 } else { 690 ndopts->nd_opt_array[nd_opt->nd_opt_type] = 691 nd_opt; 692 } 693 break; 694 case ND_OPT_PREFIX_INFORMATION: 695 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { 696 ndopts->nd_opt_array[nd_opt->nd_opt_type] = 697 nd_opt; 698 } 699 ndopts->nd_opts_pi_end = 700 (struct nd_opt_prefix_info *)nd_opt; 701 break; 702 case ND_OPT_RDNSS: 703 /* ignore */ 704 break; 705 default: 706 /* 707 * Unknown options must be silently ignored, 708 * to accomodate future extension to the protocol. 709 */ 710 nd6log((LOG_DEBUG, 711 "nd6_options: unsupported option %d - " 712 "option ignored\n", nd_opt->nd_opt_type)); 713 } 714 715skip1: 716 i++; 717 if (i > nd6_maxndopt) { 718 icmp6stat.icp6s_nd_toomanyopt++; 719 nd6log((LOG_INFO, "too many loop in nd opt\n")); 720 break; 721 } 722 723 if (ndopts->nd_opts_done) 724 break; 725 } 726 727 return (0); 728} 729 730struct nd6svc_arg { 731 int draining; 732 uint32_t killed; 733 uint32_t aging_lazy; 734 uint32_t aging; 735 uint32_t sticky; 736 uint32_t found; 737}; 738 739/* 740 * ND6 service routine to expire default route list and prefix list 741 */ 742static void 743nd6_service(void *arg) 744{ 745 struct nd6svc_arg *ap = arg; 746 struct llinfo_nd6 *ln; 747 struct nd_defrouter *dr; 748 struct nd_prefix *pr; 749 struct ifnet *ifp = NULL; 750 struct in6_ifaddr *ia6, *nia6; 751 uint64_t timenow; 752 753 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 754 /* 755 * Since we may drop rnh_lock and nd6_mutex below, we want 756 * to run this entire operation single threaded. 757 */ 758 while (nd6_service_busy) { 759 nd6log2((LOG_DEBUG, "%s: %s is blocked by %d waiters\n", 760 __func__, ap->draining ? "drainer" : "timer", 761 nd6_service_waiters)); 762 nd6_service_waiters++; 763 (void) msleep(nd6_service_wc, rnh_lock, (PZERO-1), 764 __func__, NULL); 765 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 766 } 767 768 /* We are busy now; tell everyone else to go away */ 769 nd6_service_busy = TRUE; 770 771 net_update_uptime(); 772 timenow = net_uptime(); 773again: 774 /* 775 * The global list llinfo_nd6 is modified by nd6_request() and is 776 * therefore protected by rnh_lock. For obvious reasons, we cannot 777 * hold rnh_lock across calls that might lead to code paths which 778 * attempt to acquire rnh_lock, else we deadlock. Hence for such 779 * cases we drop rt_lock and rnh_lock, make the calls, and repeat the 780 * loop. To ensure that we don't process the same entry more than 781 * once in a single timeout, we mark the "already-seen" entries with 782 * ND6_LNF_TIMER_SKIP flag. At the end of the loop, we do a second 783 * pass thru the entries and clear the flag so they can be processed 784 * during the next timeout. 785 */ 786 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 787 788 ln = llinfo_nd6.ln_next; 789 while (ln != NULL && ln != &llinfo_nd6) { 790 struct rtentry *rt; 791 struct sockaddr_in6 *dst; 792 struct llinfo_nd6 *next; 793 u_int32_t retrans, flags; 794 795 /* ln_next/prev/rt is protected by rnh_lock */ 796 next = ln->ln_next; 797 rt = ln->ln_rt; 798 RT_LOCK(rt); 799 800 /* We've seen this already; skip it */ 801 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) { 802 RT_UNLOCK(rt); 803 ln = next; 804 continue; 805 } 806 ap->found++; 807 808 /* rt->rt_ifp should never be NULL */ 809 if ((ifp = rt->rt_ifp) == NULL) { 810 panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__, 811 ln, rt); 812 /* NOTREACHED */ 813 } 814 815 /* rt_llinfo must always be equal to ln */ 816 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) { 817 panic("%s: rt_llinfo(%p) is not equal to ln(%p)", 818 __func__, rt->rt_llinfo, ln); 819 /* NOTREACHED */ 820 } 821 822 /* rt_key should never be NULL */ 823 dst = SIN6(rt_key(rt)); 824 if (dst == NULL) { 825 panic("%s: rt(%p) key is NULL ln(%p)", __func__, 826 rt, ln); 827 /* NOTREACHED */ 828 } 829 830 /* Set the flag in case we jump to "again" */ 831 ln->ln_flags |= ND6_LNF_TIMER_SKIP; 832 833 if (ln->ln_expire == 0 || (rt->rt_flags & RTF_STATIC)) { 834 ap->sticky++; 835 } else if (ap->draining && (rt->rt_refcnt == 0)) { 836 /* 837 * If we are draining, immediately purge non-static 838 * entries without oustanding route refcnt. 839 */ 840 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) 841 ln->ln_state = ND6_LLINFO_STALE; 842 else 843 ln->ln_state = ND6_LLINFO_PURGE; 844 ln_setexpire(ln, timenow); 845 } 846 847 /* 848 * If the entry has not expired, skip it. Take note on the 849 * state, as entries that are in the STALE state are simply 850 * waiting to be garbage collected, in which case we can 851 * relax the callout scheduling (use nd6_prune_lazy). 852 */ 853 if (ln->ln_expire > timenow) { 854 switch (ln->ln_state) { 855 case ND6_LLINFO_STALE: 856 ap->aging_lazy++; 857 break; 858 default: 859 ap->aging++; 860 break; 861 } 862 RT_UNLOCK(rt); 863 ln = next; 864 continue; 865 } 866 867 lck_rw_lock_shared(nd_if_rwlock); 868 if (ifp->if_index >= nd_ifinfo_indexlim) { 869 /* 870 * In the event the nd_ifinfo[] array is not in synch 871 * by now, we don't want to hold on to the llinfo entry 872 * forever; just purge it rather than have it consume 873 * resources. That's better than transmitting out of 874 * the interface as the rest of the layers may not be 875 * ready as well. 876 * 877 * We can retire this logic once we get rid of the 878 * separate array and utilize a per-ifnet structure. 879 */ 880 retrans = RETRANS_TIMER; 881 flags = ND6_IFF_PERFORMNUD; 882 if (ln->ln_expire != 0) { 883 ln->ln_state = ND6_LLINFO_PURGE; 884 log (LOG_ERR, "%s: purging rt(0x%llx) " 885 "ln(0x%llx) dst %s, if_index %d >= %d\n", 886 __func__, (uint64_t)VM_KERNEL_ADDRPERM(rt), 887 (uint64_t)VM_KERNEL_ADDRPERM(ln), 888 ip6_sprintf(&dst->sin6_addr), ifp->if_index, 889 nd_ifinfo_indexlim); 890 } 891 } else { 892 struct nd_ifinfo *ndi = ND_IFINFO(ifp); 893 VERIFY(ndi->initialized); 894 retrans = ndi->retrans; 895 flags = ndi->flags; 896 } 897 lck_rw_done(nd_if_rwlock); 898 899 RT_LOCK_ASSERT_HELD(rt); 900 901 switch (ln->ln_state) { 902 case ND6_LLINFO_INCOMPLETE: 903 if (ln->ln_asked < nd6_mmaxtries) { 904 struct ifnet *exclifp = ln->ln_exclifp; 905 ln->ln_asked++; 906 ln_setexpire(ln, timenow + retrans / 1000); 907 RT_ADDREF_LOCKED(rt); 908 RT_UNLOCK(rt); 909 lck_mtx_unlock(rnh_lock); 910 if (ip6_forwarding) { 911 nd6_prproxy_ns_output(ifp, exclifp, 912 NULL, &dst->sin6_addr, ln); 913 } else { 914 nd6_ns_output(ifp, NULL, 915 &dst->sin6_addr, ln, 0); 916 } 917 RT_REMREF(rt); 918 ap->aging++; 919 lck_mtx_lock(rnh_lock); 920 } else { 921 struct mbuf *m = ln->ln_hold; 922 ln->ln_hold = NULL; 923 if (m != NULL) { 924 /* 925 * Fake rcvif to make ICMP error 926 * more helpful in diagnosing 927 * for the receiver. 928 * XXX: should we consider 929 * older rcvif? 930 */ 931 m->m_pkthdr.rcvif = ifp; 932 RT_ADDREF_LOCKED(rt); 933 RT_UNLOCK(rt); 934 lck_mtx_unlock(rnh_lock); 935 icmp6_error(m, ICMP6_DST_UNREACH, 936 ICMP6_DST_UNREACH_ADDR, 0); 937 } else { 938 RT_ADDREF_LOCKED(rt); 939 RT_UNLOCK(rt); 940 lck_mtx_unlock(rnh_lock); 941 } 942 nd6_free(rt); 943 ap->killed++; 944 lck_mtx_lock(rnh_lock); 945 rtfree_locked(rt); 946 } 947 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 948 goto again; 949 950 case ND6_LLINFO_REACHABLE: 951 if (ln->ln_expire != 0) { 952 ln->ln_state = ND6_LLINFO_STALE; 953 ln_setexpire(ln, timenow + nd6_gctimer); 954 ap->aging_lazy++; 955 } 956 RT_UNLOCK(rt); 957 break; 958 959 case ND6_LLINFO_STALE: 960 case ND6_LLINFO_PURGE: 961 /* Garbage Collection(RFC 4861 5.3) */ 962 if (ln->ln_expire != 0) { 963 RT_ADDREF_LOCKED(rt); 964 RT_UNLOCK(rt); 965 lck_mtx_unlock(rnh_lock); 966 nd6_free(rt); 967 ap->killed++; 968 lck_mtx_lock(rnh_lock); 969 rtfree_locked(rt); 970 goto again; 971 } else { 972 RT_UNLOCK(rt); 973 } 974 break; 975 976 case ND6_LLINFO_DELAY: 977 if ((flags & ND6_IFF_PERFORMNUD) != 0) { 978 /* We need NUD */ 979 ln->ln_asked = 1; 980 ln->ln_state = ND6_LLINFO_PROBE; 981 ln_setexpire(ln, timenow + retrans / 1000); 982 RT_ADDREF_LOCKED(rt); 983 RT_UNLOCK(rt); 984 lck_mtx_unlock(rnh_lock); 985 nd6_ns_output(ifp, &dst->sin6_addr, 986 &dst->sin6_addr, ln, 0); 987 RT_REMREF(rt); 988 ap->aging++; 989 lck_mtx_lock(rnh_lock); 990 goto again; 991 } 992 ln->ln_state = ND6_LLINFO_STALE; /* XXX */ 993 ln_setexpire(ln, timenow + nd6_gctimer); 994 RT_UNLOCK(rt); 995 ap->aging_lazy++; 996 break; 997 998 case ND6_LLINFO_PROBE: 999 if (ln->ln_asked < nd6_umaxtries) { 1000 ln->ln_asked++; 1001 ln_setexpire(ln, timenow + retrans / 1000); 1002 RT_ADDREF_LOCKED(rt); 1003 RT_UNLOCK(rt); 1004 lck_mtx_unlock(rnh_lock); 1005 nd6_ns_output(ifp, &dst->sin6_addr, 1006 &dst->sin6_addr, ln, 0); 1007 RT_REMREF(rt); 1008 ap->aging++; 1009 lck_mtx_lock(rnh_lock); 1010 } else { 1011 RT_ADDREF_LOCKED(rt); 1012 RT_UNLOCK(rt); 1013 lck_mtx_unlock(rnh_lock); 1014 nd6_free(rt); 1015 ap->killed++; 1016 lck_mtx_lock(rnh_lock); 1017 rtfree_locked(rt); 1018 } 1019 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1020 goto again; 1021 1022 default: 1023 RT_UNLOCK(rt); 1024 break; 1025 } 1026 ln = next; 1027 } 1028 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1029 1030 /* Now clear the flag from all entries */ 1031 ln = llinfo_nd6.ln_next; 1032 while (ln != NULL && ln != &llinfo_nd6) { 1033 struct rtentry *rt = ln->ln_rt; 1034 struct llinfo_nd6 *next = ln->ln_next; 1035 1036 RT_LOCK_SPIN(rt); 1037 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) 1038 ln->ln_flags &= ~ND6_LNF_TIMER_SKIP; 1039 RT_UNLOCK(rt); 1040 ln = next; 1041 } 1042 lck_mtx_unlock(rnh_lock); 1043 1044 /* expire default router list */ 1045 lck_mtx_lock(nd6_mutex); 1046 dr = TAILQ_FIRST(&nd_defrouter); 1047 while (dr) { 1048 ap->found++; 1049 if (dr->expire != 0 && dr->expire < timenow) { 1050 struct nd_defrouter *t; 1051 t = TAILQ_NEXT(dr, dr_entry); 1052 defrtrlist_del(dr); 1053 dr = t; 1054 ap->killed++; 1055 } else { 1056 if (dr->expire == 0 || (dr->stateflags & NDDRF_STATIC)) 1057 ap->sticky++; 1058 else 1059 ap->aging_lazy++; 1060 dr = TAILQ_NEXT(dr, dr_entry); 1061 } 1062 } 1063 lck_mtx_unlock(nd6_mutex); 1064 1065 /* 1066 * expire interface addresses. 1067 * in the past the loop was inside prefix expiry processing. 1068 * However, from a stricter speci-confrmance standpoint, we should 1069 * rather separate address lifetimes and prefix lifetimes. 1070 */ 1071addrloop: 1072 lck_rw_lock_exclusive(&in6_ifaddr_rwlock); 1073 for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) { 1074 ap->found++; 1075 nia6 = ia6->ia_next; 1076 IFA_LOCK(&ia6->ia_ifa); 1077 /* 1078 * Extra reference for ourselves; it's no-op if 1079 * we don't have to regenerate temporary address, 1080 * otherwise it protects the address from going 1081 * away since we drop in6_ifaddr_rwlock below. 1082 */ 1083 IFA_ADDREF_LOCKED(&ia6->ia_ifa); 1084 /* check address lifetime */ 1085 if (IFA6_IS_INVALID(ia6, timenow)) { 1086 /* 1087 * If the expiring address is temporary, try 1088 * regenerating a new one. This would be useful when 1089 * we suspended a laptop PC, then turned it on after a 1090 * period that could invalidate all temporary 1091 * addresses. Although we may have to restart the 1092 * loop (see below), it must be after purging the 1093 * address. Otherwise, we'd see an infinite loop of 1094 * regeneration. 1095 */ 1096 if (ip6_use_tempaddr && 1097 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { 1098 /* 1099 * NOTE: We have to drop the lock here 1100 * because regen_tmpaddr() eventually calls 1101 * in6_update_ifa(), which must take the lock 1102 * and would otherwise cause a hang. This is 1103 * safe because the goto addrloop leads to a 1104 * re-evaluation of the in6_ifaddrs list 1105 */ 1106 IFA_UNLOCK(&ia6->ia_ifa); 1107 lck_rw_done(&in6_ifaddr_rwlock); 1108 (void) regen_tmpaddr(ia6); 1109 } else { 1110 IFA_UNLOCK(&ia6->ia_ifa); 1111 lck_rw_done(&in6_ifaddr_rwlock); 1112 } 1113 1114 /* 1115 * Purging the address would have caused 1116 * in6_ifaddr_rwlock to be dropped and reacquired; 1117 * therefore search again from the beginning 1118 * of in6_ifaddrs list. 1119 */ 1120 in6_purgeaddr(&ia6->ia_ifa); 1121 ap->killed++; 1122 1123 /* Release extra reference taken above */ 1124 IFA_REMREF(&ia6->ia_ifa); 1125 goto addrloop; 1126 } 1127 /* 1128 * The lazy timer runs every nd6_prune_lazy seconds with at 1129 * most "2 * nd6_prune_lazy - 1" leeway. We consider the worst 1130 * case here and make sure we schedule the regular timer if an 1131 * interface address is about to expire. 1132 */ 1133 if (IFA6_IS_INVALID(ia6, timenow + 3 * nd6_prune_lazy)) 1134 ap->aging++; 1135 else 1136 ap->aging_lazy++; 1137 IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa); 1138 if (IFA6_IS_DEPRECATED(ia6, timenow)) { 1139 int oldflags = ia6->ia6_flags; 1140 1141 ia6->ia6_flags |= IN6_IFF_DEPRECATED; 1142 1143 /* 1144 * If a temporary address has just become deprecated, 1145 * regenerate a new one if possible. 1146 */ 1147 if (ip6_use_tempaddr && 1148 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && 1149 (oldflags & IN6_IFF_DEPRECATED) == 0) { 1150 1151 /* see NOTE above */ 1152 IFA_UNLOCK(&ia6->ia_ifa); 1153 lck_rw_done(&in6_ifaddr_rwlock); 1154 if (regen_tmpaddr(ia6) == 0) { 1155 /* 1156 * A new temporary address is 1157 * generated. 1158 * XXX: this means the address chain 1159 * has changed while we are still in 1160 * the loop. Although the change 1161 * would not cause disaster (because 1162 * it's not a deletion, but an 1163 * addition,) we'd rather restart the 1164 * loop just for safety. Or does this 1165 * significantly reduce performance?? 1166 */ 1167 /* Release extra reference */ 1168 IFA_REMREF(&ia6->ia_ifa); 1169 goto addrloop; 1170 } 1171 lck_rw_lock_exclusive(&in6_ifaddr_rwlock); 1172 } else { 1173 IFA_UNLOCK(&ia6->ia_ifa); 1174 } 1175 } else { 1176 /* 1177 * A new RA might have made a deprecated address 1178 * preferred. 1179 */ 1180 ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; 1181 IFA_UNLOCK(&ia6->ia_ifa); 1182 } 1183 lck_rw_assert(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); 1184 /* Release extra reference taken above */ 1185 IFA_REMREF(&ia6->ia_ifa); 1186 } 1187 lck_rw_done(&in6_ifaddr_rwlock); 1188 1189 lck_mtx_lock(nd6_mutex); 1190 /* expire prefix list */ 1191 pr = nd_prefix.lh_first; 1192 while (pr != NULL) { 1193 ap->found++; 1194 /* 1195 * check prefix lifetime. 1196 * since pltime is just for autoconf, pltime processing for 1197 * prefix is not necessary. 1198 */ 1199 NDPR_LOCK(pr); 1200 if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE || 1201 pr->ndpr_stateflags & NDPRF_DEFUNCT) { 1202 pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; 1203 NDPR_UNLOCK(pr); 1204 pr = pr->ndpr_next; 1205 continue; 1206 } 1207 if (pr->ndpr_expire != 0 && pr->ndpr_expire < timenow) { 1208 /* 1209 * address expiration and prefix expiration are 1210 * separate. NEVER perform in6_purgeaddr here. 1211 */ 1212 pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; 1213 NDPR_ADDREF_LOCKED(pr); 1214 prelist_remove(pr); 1215 NDPR_UNLOCK(pr); 1216 NDPR_REMREF(pr); 1217 pfxlist_onlink_check(); 1218 pr = nd_prefix.lh_first; 1219 ap->killed++; 1220 } else { 1221 if (pr->ndpr_expire == 0 || 1222 (pr->ndpr_stateflags & NDPRF_STATIC)) 1223 ap->sticky++; 1224 else 1225 ap->aging_lazy++; 1226 pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; 1227 NDPR_UNLOCK(pr); 1228 pr = pr->ndpr_next; 1229 } 1230 } 1231 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { 1232 NDPR_LOCK(pr); 1233 pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE; 1234 NDPR_UNLOCK(pr); 1235 } 1236 lck_mtx_unlock(nd6_mutex); 1237 1238 lck_mtx_lock(rnh_lock); 1239 /* We're done; let others enter */ 1240 nd6_service_busy = FALSE; 1241 if (nd6_service_waiters > 0) { 1242 nd6_service_waiters = 0; 1243 wakeup(nd6_service_wc); 1244 } 1245} 1246 1247void 1248nd6_drain(void *arg) 1249{ 1250#pragma unused(arg) 1251 struct nd6svc_arg sarg; 1252 1253 nd6log2((LOG_DEBUG, "%s: draining ND6 entries\n", __func__)); 1254 1255 lck_mtx_lock(rnh_lock); 1256 bzero(&sarg, sizeof (sarg)); 1257 sarg.draining = 1; 1258 nd6_service(&sarg); 1259 nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, " 1260 "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy, 1261 sarg.aging, sarg.sticky, sarg.killed)); 1262 lck_mtx_unlock(rnh_lock); 1263} 1264 1265/* 1266 * We use the ``arg'' variable to decide whether or not the timer we're 1267 * running is the fast timer. We do this to reset the nd6_fast_timer_on 1268 * variable so that later we don't end up ignoring a ``fast timer'' 1269 * request if the 5 second timer is running (see nd6_sched_timeout). 1270 */ 1271static void 1272nd6_timeout(void *arg) 1273{ 1274 struct nd6svc_arg sarg; 1275 uint32_t buf; 1276 1277 lck_mtx_lock(rnh_lock); 1278 bzero(&sarg, sizeof (sarg)); 1279 nd6_service(&sarg); 1280 nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, " 1281 "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy, 1282 sarg.aging, sarg.sticky, sarg.killed)); 1283 /* re-arm the timer if there's work to do */ 1284 nd6_timeout_run--; 1285 VERIFY(nd6_timeout_run >= 0 && nd6_timeout_run < 2); 1286 if (arg == &nd6_fast_timer_on) 1287 nd6_fast_timer_on = FALSE; 1288 if (sarg.aging_lazy > 0 || sarg.aging > 0 || nd6_sched_timeout_want) { 1289 struct timeval atv, ltv, *leeway; 1290 int lazy = nd6_prune_lazy; 1291 1292 if (sarg.aging > 0 || lazy < 1) { 1293 atv.tv_usec = 0; 1294 atv.tv_sec = nd6_prune; 1295 leeway = NULL; 1296 } else { 1297 VERIFY(lazy >= 1); 1298 atv.tv_usec = 0; 1299 atv.tv_sec = MAX(nd6_prune, lazy); 1300 ltv.tv_usec = 0; 1301 read_frandom(&buf, sizeof(buf)); 1302 ltv.tv_sec = MAX(buf % lazy, 1) * 2; 1303 leeway = <v; 1304 } 1305 nd6_sched_timeout(&atv, leeway); 1306 } else if (nd6_debug) { 1307 nd6log2((LOG_DEBUG, "%s: not rescheduling timer\n", __func__)); 1308 } 1309 lck_mtx_unlock(rnh_lock); 1310} 1311 1312void 1313nd6_sched_timeout(struct timeval *atv, struct timeval *ltv) 1314{ 1315 struct timeval tv; 1316 1317 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1318 if (atv == NULL) { 1319 tv.tv_usec = 0; 1320 tv.tv_sec = MAX(nd6_prune, 1); 1321 atv = &tv; 1322 ltv = NULL; /* ignore leeway */ 1323 } 1324 /* see comments on top of this file */ 1325 if (nd6_timeout_run == 0) { 1326 if (ltv == NULL) { 1327 nd6log2((LOG_DEBUG, "%s: timer scheduled in " 1328 "T+%llus.%lluu (demand %d)\n", __func__, 1329 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, 1330 nd6_sched_timeout_want)); 1331 nd6_fast_timer_on = TRUE; 1332 timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv)); 1333 } else { 1334 nd6log2((LOG_DEBUG, "%s: timer scheduled in " 1335 "T+%llus.%lluu with %llus.%lluu leeway " 1336 "(demand %d)\n", __func__, (uint64_t)atv->tv_sec, 1337 (uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec, 1338 (uint64_t)ltv->tv_usec, nd6_sched_timeout_want)); 1339 nd6_fast_timer_on = FALSE; 1340 timeout_with_leeway(nd6_timeout, NULL, 1341 tvtohz(atv), tvtohz(ltv)); 1342 } 1343 nd6_timeout_run++; 1344 nd6_sched_timeout_want = 0; 1345 } else if (nd6_timeout_run == 1 && ltv == NULL && 1346 nd6_fast_timer_on == FALSE) { 1347 nd6log2((LOG_DEBUG, "%s: fast timer scheduled in " 1348 "T+%llus.%lluu (demand %d)\n", __func__, 1349 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, 1350 nd6_sched_timeout_want)); 1351 nd6_fast_timer_on = TRUE; 1352 nd6_sched_timeout_want = 0; 1353 nd6_timeout_run++; 1354 timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv)); 1355 } else { 1356 if (ltv == NULL) { 1357 nd6log2((LOG_DEBUG, "%s: not scheduling timer: " 1358 "timers %d, fast_timer %d, T+%llus.%lluu\n", 1359 __func__, nd6_timeout_run, nd6_fast_timer_on, 1360 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec)); 1361 } else { 1362 nd6log2((LOG_DEBUG, "%s: not scheduling timer: " 1363 "timers %d, fast_timer %d, T+%llus.%lluu " 1364 "with %llus.%lluu leeway\n", __func__, 1365 nd6_timeout_run, nd6_fast_timer_on, 1366 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, 1367 (uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec)); 1368 } 1369 } 1370} 1371 1372/* 1373 * ND6 router advertisement kernel notification 1374 */ 1375void 1376nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list, 1377 u_int32_t list_length, u_int32_t mtu, char *dl_addr, u_int32_t dl_addr_len) 1378{ 1379 struct kev_msg ev_msg; 1380 struct kev_nd6_ra_data nd6_ra_msg_data; 1381 struct nd_prefix_list *itr = prefix_list; 1382 1383 bzero(&ev_msg, sizeof (struct kev_msg)); 1384 ev_msg.vendor_code = KEV_VENDOR_APPLE; 1385 ev_msg.kev_class = KEV_NETWORK_CLASS; 1386 ev_msg.kev_subclass = KEV_ND6_SUBCLASS; 1387 ev_msg.event_code = code; 1388 1389 bzero(&nd6_ra_msg_data, sizeof (nd6_ra_msg_data)); 1390 nd6_ra_msg_data.lladdrlen = (dl_addr_len <= ND6_ROUTER_LL_SIZE) ? 1391 dl_addr_len : ND6_ROUTER_LL_SIZE; 1392 bcopy(dl_addr, &nd6_ra_msg_data.lladdr, nd6_ra_msg_data.lladdrlen); 1393 1394 if (mtu > 0 && mtu >= IPV6_MMTU) { 1395 nd6_ra_msg_data.mtu = mtu; 1396 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_MTU; 1397 } 1398 1399 if (list_length > 0 && prefix_list != NULL) { 1400 nd6_ra_msg_data.list_length = list_length; 1401 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_PREFIX; 1402 } 1403 1404 while (itr != NULL && nd6_ra_msg_data.list_index < list_length) { 1405 bcopy(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix, 1406 sizeof (nd6_ra_msg_data.prefix.prefix)); 1407 nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf; 1408 nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen; 1409 nd6_ra_msg_data.prefix.origin = PR_ORIG_RA; 1410 nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime; 1411 nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime; 1412 nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr); 1413 nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags; 1414 nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt; 1415 nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index; 1416 1417 /* send the message up */ 1418 ev_msg.dv[0].data_ptr = &nd6_ra_msg_data; 1419 ev_msg.dv[0].data_length = sizeof (nd6_ra_msg_data); 1420 ev_msg.dv[1].data_length = 0; 1421 kev_post_msg(&ev_msg); 1422 1423 /* clean up for the next prefix */ 1424 bzero(&nd6_ra_msg_data.prefix, sizeof (nd6_ra_msg_data.prefix)); 1425 itr = itr->next; 1426 nd6_ra_msg_data.list_index++; 1427 } 1428} 1429 1430/* 1431 * Regenerate deprecated/invalidated temporary address 1432 */ 1433static int 1434regen_tmpaddr(struct in6_ifaddr *ia6) 1435{ 1436 struct ifaddr *ifa; 1437 struct ifnet *ifp; 1438 struct in6_ifaddr *public_ifa6 = NULL; 1439 uint64_t timenow = net_uptime(); 1440 1441 ifp = ia6->ia_ifa.ifa_ifp; 1442 ifnet_lock_shared(ifp); 1443 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1444 struct in6_ifaddr *it6; 1445 1446 IFA_LOCK(ifa); 1447 if (ifa->ifa_addr->sa_family != AF_INET6) { 1448 IFA_UNLOCK(ifa); 1449 continue; 1450 } 1451 it6 = (struct in6_ifaddr *)ifa; 1452 1453 /* ignore no autoconf addresses. */ 1454 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) { 1455 IFA_UNLOCK(ifa); 1456 continue; 1457 } 1458 /* ignore autoconf addresses with different prefixes. */ 1459 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) { 1460 IFA_UNLOCK(ifa); 1461 continue; 1462 } 1463 /* 1464 * Now we are looking at an autoconf address with the same 1465 * prefix as ours. If the address is temporary and is still 1466 * preferred, do not create another one. It would be rare, but 1467 * could happen, for example, when we resume a laptop PC after 1468 * a long period. 1469 */ 1470 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && 1471 !IFA6_IS_DEPRECATED(it6, timenow)) { 1472 IFA_UNLOCK(ifa); 1473 if (public_ifa6 != NULL) 1474 IFA_REMREF(&public_ifa6->ia_ifa); 1475 public_ifa6 = NULL; 1476 break; 1477 } 1478 1479 /* 1480 * This is a public autoconf address that has the same prefix 1481 * as ours. If it is preferred, keep it. We can't break the 1482 * loop here, because there may be a still-preferred temporary 1483 * address with the prefix. 1484 */ 1485 if (!IFA6_IS_DEPRECATED(it6, timenow)) { 1486 IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */ 1487 IFA_UNLOCK(ifa); 1488 if (public_ifa6 != NULL) 1489 IFA_REMREF(&public_ifa6->ia_ifa); 1490 public_ifa6 = it6; 1491 } else { 1492 IFA_UNLOCK(ifa); 1493 } 1494 } 1495 ifnet_lock_done(ifp); 1496 1497 if (public_ifa6 != NULL) { 1498 int e; 1499 1500 if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) { 1501 log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" 1502 " tmp addr,errno=%d\n", e); 1503 IFA_REMREF(&public_ifa6->ia_ifa); 1504 return (-1); 1505 } 1506 IFA_REMREF(&public_ifa6->ia_ifa); 1507 return (0); 1508 } 1509 1510 return (-1); 1511} 1512 1513/* 1514 * Nuke neighbor cache/prefix/default router management table, right before 1515 * ifp goes away. 1516 */ 1517void 1518nd6_purge(struct ifnet *ifp) 1519{ 1520 struct llinfo_nd6 *ln; 1521 struct nd_defrouter *dr, *ndr; 1522 struct nd_prefix *pr, *npr; 1523 boolean_t removed; 1524 1525 /* Nuke default router list entries toward ifp */ 1526 lck_mtx_lock(nd6_mutex); 1527 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { 1528 /* 1529 * The first entry of the list may be stored in 1530 * the routing table, so we'll delete it later. 1531 */ 1532 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) { 1533 ndr = TAILQ_NEXT(dr, dr_entry); 1534 if (dr->stateflags & NDDRF_INSTALLED) 1535 continue; 1536 if (dr->ifp == ifp) 1537 defrtrlist_del(dr); 1538 } 1539 dr = TAILQ_FIRST(&nd_defrouter); 1540 if (dr->ifp == ifp) 1541 defrtrlist_del(dr); 1542 } 1543 1544 for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = ndr) { 1545 ndr = TAILQ_NEXT(dr, dr_entry); 1546 if (!(dr->stateflags & NDDRF_INSTALLED)) 1547 continue; 1548 1549 if (dr->ifp == ifp) 1550 defrtrlist_del(dr); 1551 } 1552 1553 /* Nuke prefix list entries toward ifp */ 1554 removed = FALSE; 1555 for (pr = nd_prefix.lh_first; pr; pr = npr) { 1556 NDPR_LOCK(pr); 1557 npr = pr->ndpr_next; 1558 if (pr->ndpr_ifp == ifp && 1559 !(pr->ndpr_stateflags & NDPRF_DEFUNCT)) { 1560 /* 1561 * Because if_detach() does *not* release prefixes 1562 * while purging addresses the reference count will 1563 * still be above zero. We therefore reset it to 1564 * make sure that the prefix really gets purged. 1565 */ 1566 pr->ndpr_addrcnt = 0; 1567 1568 /* 1569 * Previously, pr->ndpr_addr is removed as well, 1570 * but I strongly believe we don't have to do it. 1571 * nd6_purge() is only called from in6_ifdetach(), 1572 * which removes all the associated interface addresses 1573 * by itself. 1574 * (jinmei@kame.net 20010129) 1575 */ 1576 NDPR_ADDREF_LOCKED(pr); 1577 prelist_remove(pr); 1578 NDPR_UNLOCK(pr); 1579 NDPR_REMREF(pr); 1580 removed = TRUE; 1581 npr = nd_prefix.lh_first; 1582 } else { 1583 NDPR_UNLOCK(pr); 1584 } 1585 } 1586 if (removed) 1587 pfxlist_onlink_check(); 1588 lck_mtx_unlock(nd6_mutex); 1589 1590 /* cancel default outgoing interface setting */ 1591 if (nd6_defifindex == ifp->if_index) { 1592 nd6_setdefaultiface(0); 1593 } 1594 1595 /* 1596 * Perform default router selection even when we are a router, 1597 * if Scoped Routing is enabled. 1598 */ 1599 if (ip6_doscopedroute || !ip6_forwarding) { 1600 lck_mtx_lock(nd6_mutex); 1601 /* refresh default router list */ 1602 defrouter_select(ifp); 1603 lck_mtx_unlock(nd6_mutex); 1604 } 1605 1606 /* 1607 * Nuke neighbor cache entries for the ifp. 1608 * Note that rt->rt_ifp may not be the same as ifp, 1609 * due to KAME goto ours hack. See RTM_RESOLVE case in 1610 * nd6_rtrequest(), and ip6_input(). 1611 */ 1612again: 1613 lck_mtx_lock(rnh_lock); 1614 ln = llinfo_nd6.ln_next; 1615 while (ln != NULL && ln != &llinfo_nd6) { 1616 struct rtentry *rt; 1617 struct llinfo_nd6 *nln; 1618 1619 nln = ln->ln_next; 1620 rt = ln->ln_rt; 1621 RT_LOCK(rt); 1622 if (rt->rt_gateway != NULL && 1623 rt->rt_gateway->sa_family == AF_LINK && 1624 SDL(rt->rt_gateway)->sdl_index == ifp->if_index) { 1625 RT_ADDREF_LOCKED(rt); 1626 RT_UNLOCK(rt); 1627 lck_mtx_unlock(rnh_lock); 1628 /* 1629 * See comments on nd6_service() for reasons why 1630 * this loop is repeated; we bite the costs of 1631 * going thru the same llinfo_nd6 more than once 1632 * here, since this purge happens during detach, 1633 * and that unlike the timer case, it's possible 1634 * there's more than one purges happening at the 1635 * same time (thus a flag wouldn't buy anything). 1636 */ 1637 nd6_free(rt); 1638 RT_REMREF(rt); 1639 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 1640 goto again; 1641 } else { 1642 RT_UNLOCK(rt); 1643 } 1644 ln = nln; 1645 } 1646 lck_mtx_unlock(rnh_lock); 1647} 1648 1649/* 1650 * Upon success, the returned route will be locked and the caller is 1651 * responsible for releasing the reference and doing RT_UNLOCK(rt). 1652 * This routine does not require rnh_lock to be held by the caller, 1653 * although it needs to be indicated of such a case in order to call 1654 * the correct variant of the relevant routing routines. 1655 */ 1656struct rtentry * 1657nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp, int rt_locked) 1658{ 1659 struct rtentry *rt; 1660 struct sockaddr_in6 sin6; 1661 unsigned int ifscope; 1662 1663 bzero(&sin6, sizeof (sin6)); 1664 sin6.sin6_len = sizeof (struct sockaddr_in6); 1665 sin6.sin6_family = AF_INET6; 1666 sin6.sin6_addr = *addr6; 1667 1668 ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; 1669 if (rt_locked) { 1670 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 1671 rt = rtalloc1_scoped_locked(SA(&sin6), create, 0, ifscope); 1672 } else { 1673 rt = rtalloc1_scoped(SA(&sin6), create, 0, ifscope); 1674 } 1675 1676 if (rt != NULL) { 1677 RT_LOCK(rt); 1678 if ((rt->rt_flags & RTF_LLINFO) == 0) { 1679 /* 1680 * This is the case for the default route. 1681 * If we want to create a neighbor cache for the 1682 * address, we should free the route for the 1683 * destination and allocate an interface route. 1684 */ 1685 if (create) { 1686 RT_UNLOCK(rt); 1687 if (rt_locked) 1688 rtfree_locked(rt); 1689 else 1690 rtfree(rt); 1691 rt = NULL; 1692 } 1693 } 1694 } 1695 if (rt == NULL) { 1696 if (create && ifp) { 1697 struct ifaddr *ifa; 1698 u_int32_t ifa_flags; 1699 int e; 1700 1701 /* 1702 * If no route is available and create is set, 1703 * we allocate a host route for the destination 1704 * and treat it like an interface route. 1705 * This hack is necessary for a neighbor which can't 1706 * be covered by our own prefix. 1707 */ 1708 ifa = ifaof_ifpforaddr(SA(&sin6), ifp); 1709 if (ifa == NULL) 1710 return (NULL); 1711 1712 /* 1713 * Create a new route. RTF_LLINFO is necessary 1714 * to create a Neighbor Cache entry for the 1715 * destination in nd6_rtrequest which will be 1716 * called in rtrequest via ifa->ifa_rtrequest. 1717 */ 1718 if (!rt_locked) 1719 lck_mtx_lock(rnh_lock); 1720 IFA_LOCK_SPIN(ifa); 1721 ifa_flags = ifa->ifa_flags; 1722 IFA_UNLOCK(ifa); 1723 if ((e = rtrequest_scoped_locked(RTM_ADD, 1724 SA(&sin6), ifa->ifa_addr, SA(&all1_sa), 1725 (ifa_flags | RTF_HOST | RTF_LLINFO) & 1726 ~RTF_CLONING, &rt, ifscope)) != 0) { 1727 if (e != EEXIST) 1728 log(LOG_ERR, "%s: failed to add route " 1729 "for a neighbor(%s), errno=%d\n", 1730 __func__, ip6_sprintf(addr6), e); 1731 } 1732 if (!rt_locked) 1733 lck_mtx_unlock(rnh_lock); 1734 IFA_REMREF(ifa); 1735 if (rt == NULL) 1736 return (NULL); 1737 1738 RT_LOCK(rt); 1739 if (rt->rt_llinfo) { 1740 struct llinfo_nd6 *ln = rt->rt_llinfo; 1741 ln->ln_state = ND6_LLINFO_NOSTATE; 1742 } 1743 } else { 1744 return (NULL); 1745 } 1746 } 1747 RT_LOCK_ASSERT_HELD(rt); 1748 /* 1749 * Validation for the entry. 1750 * Note that the check for rt_llinfo is necessary because a cloned 1751 * route from a parent route that has the L flag (e.g. the default 1752 * route to a p2p interface) may have the flag, too, while the 1753 * destination is not actually a neighbor. 1754 * XXX: we can't use rt->rt_ifp to check for the interface, since 1755 * it might be the loopback interface if the entry is for our 1756 * own address on a non-loopback interface. Instead, we should 1757 * use rt->rt_ifa->ifa_ifp, which would specify the REAL 1758 * interface. 1759 * Note also that ifa_ifp and ifp may differ when we connect two 1760 * interfaces to a same link, install a link prefix to an interface, 1761 * and try to install a neighbor cache on an interface that does not 1762 * have a route to the prefix. 1763 * 1764 * If the address is from a proxied prefix, the ifa_ifp and ifp might 1765 * not match, because nd6_na_input() could have modified the ifp 1766 * of the route to point to the interface where the NA arrived on, 1767 * hence the test for RTF_PROXY. 1768 */ 1769 if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || 1770 rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || 1771 (ifp && rt->rt_ifa->ifa_ifp != ifp && 1772 !(rt->rt_flags & RTF_PROXY))) { 1773 RT_REMREF_LOCKED(rt); 1774 RT_UNLOCK(rt); 1775 if (create) { 1776 log(LOG_DEBUG, "%s: failed to lookup %s " 1777 "(if = %s)\n", __func__, ip6_sprintf(addr6), 1778 ifp ? if_name(ifp) : "unspec"); 1779 /* xxx more logs... kazu */ 1780 } 1781 return (NULL); 1782 } 1783 /* 1784 * Caller needs to release reference and call RT_UNLOCK(rt). 1785 */ 1786 return (rt); 1787} 1788 1789/* 1790 * Test whether a given IPv6 address is a neighbor or not, ignoring 1791 * the actual neighbor cache. The neighbor cache is ignored in order 1792 * to not reenter the routing code from within itself. 1793 */ 1794static int 1795nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) 1796{ 1797 struct nd_prefix *pr; 1798 struct ifaddr *dstaddr; 1799 1800 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); 1801 1802 /* 1803 * A link-local address is always a neighbor. 1804 * XXX: a link does not necessarily specify a single interface. 1805 */ 1806 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { 1807 struct sockaddr_in6 sin6_copy; 1808 u_int32_t zone; 1809 1810 /* 1811 * We need sin6_copy since sa6_recoverscope() may modify the 1812 * content (XXX). 1813 */ 1814 sin6_copy = *addr; 1815 if (sa6_recoverscope(&sin6_copy, FALSE)) 1816 return (0); /* XXX: should be impossible */ 1817 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) 1818 return (0); 1819 if (sin6_copy.sin6_scope_id == zone) 1820 return (1); 1821 else 1822 return (0); 1823 } 1824 1825 /* 1826 * If the address matches one of our addresses, 1827 * it should be a neighbor. 1828 * If the address matches one of our on-link prefixes, it should be a 1829 * neighbor. 1830 */ 1831 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { 1832 NDPR_LOCK(pr); 1833 if (pr->ndpr_ifp != ifp) { 1834 NDPR_UNLOCK(pr); 1835 continue; 1836 } 1837 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { 1838 NDPR_UNLOCK(pr); 1839 continue; 1840 } 1841 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, 1842 &addr->sin6_addr, &pr->ndpr_mask)) { 1843 NDPR_UNLOCK(pr); 1844 return (1); 1845 } 1846 NDPR_UNLOCK(pr); 1847 } 1848 1849 /* 1850 * If the address is assigned on the node of the other side of 1851 * a p2p interface, the address should be a neighbor. 1852 */ 1853 dstaddr = ifa_ifwithdstaddr(SA(addr)); 1854 if (dstaddr != NULL) { 1855 if (dstaddr->ifa_ifp == ifp) { 1856 IFA_REMREF(dstaddr); 1857 return (1); 1858 } 1859 IFA_REMREF(dstaddr); 1860 dstaddr = NULL; 1861 } 1862 1863 /* 1864 * If the default router list is empty, all addresses are regarded 1865 * as on-link, and thus, as a neighbor. 1866 * XXX: we restrict the condition to hosts, because routers usually do 1867 * not have the "default router list". 1868 * XXX: this block should eventually be removed (it is disabled when 1869 * Scoped Routing is in effect); treating all destinations as on-link 1870 * in the absence of a router is rather harmful. 1871 */ 1872 if (!ip6_doscopedroute && !ip6_forwarding && 1873 TAILQ_FIRST(&nd_defrouter) == NULL && 1874 nd6_defifindex == ifp->if_index) { 1875 return (1); 1876 } 1877 1878 return (0); 1879} 1880 1881 1882/* 1883 * Detect if a given IPv6 address identifies a neighbor on a given link. 1884 * XXX: should take care of the destination of a p2p link? 1885 */ 1886int 1887nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, 1888 int rt_locked) 1889{ 1890 struct rtentry *rt; 1891 1892 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED); 1893 lck_mtx_lock(nd6_mutex); 1894 if (nd6_is_new_addr_neighbor(addr, ifp)) { 1895 lck_mtx_unlock(nd6_mutex); 1896 return (1); 1897 } 1898 lck_mtx_unlock(nd6_mutex); 1899 1900 /* 1901 * Even if the address matches none of our addresses, it might be 1902 * in the neighbor cache. 1903 */ 1904 if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) { 1905 RT_LOCK_ASSERT_HELD(rt); 1906 RT_REMREF_LOCKED(rt); 1907 RT_UNLOCK(rt); 1908 return (1); 1909 } 1910 1911 return (0); 1912} 1913 1914/* 1915 * Free an nd6 llinfo entry. 1916 * Since the function would cause significant changes in the kernel, DO NOT 1917 * make it global, unless you have a strong reason for the change, and are sure 1918 * that the change is safe. 1919 */ 1920void 1921nd6_free(struct rtentry *rt) 1922{ 1923 struct llinfo_nd6 *ln; 1924 struct in6_addr in6; 1925 struct nd_defrouter *dr; 1926 1927 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); 1928 RT_LOCK_ASSERT_NOTHELD(rt); 1929 lck_mtx_lock(nd6_mutex); 1930 1931 RT_LOCK(rt); 1932 RT_ADDREF_LOCKED(rt); /* Extra ref */ 1933 ln = rt->rt_llinfo; 1934 in6 = SIN6(rt_key(rt))->sin6_addr; 1935 1936 /* 1937 * Prevent another thread from modifying rt_key, rt_gateway 1938 * via rt_setgate() after the rt_lock is dropped by marking 1939 * the route as defunct. 1940 */ 1941 rt->rt_flags |= RTF_CONDEMNED; 1942 1943 /* 1944 * We used to have pfctlinput(PRC_HOSTDEAD) here. Even though it is 1945 * not harmful, it was not really necessary. Perform default router 1946 * selection even when we are a router, if Scoped Routing is enabled. 1947 */ 1948 if (ip6_doscopedroute || !ip6_forwarding) { 1949 dr = defrouter_lookup(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp); 1950 1951 if ((ln && ln->ln_router) || dr) { 1952 /* 1953 * rt6_flush must be called whether or not the neighbor 1954 * is in the Default Router List. 1955 * See a corresponding comment in nd6_na_input(). 1956 */ 1957 RT_UNLOCK(rt); 1958 lck_mtx_unlock(nd6_mutex); 1959 rt6_flush(&in6, rt->rt_ifp); 1960 lck_mtx_lock(nd6_mutex); 1961 } else { 1962 RT_UNLOCK(rt); 1963 } 1964 1965 if (dr) { 1966 NDDR_REMREF(dr); 1967 /* 1968 * Unreachablity of a router might affect the default 1969 * router selection and on-link detection of advertised 1970 * prefixes. 1971 */ 1972 1973 /* 1974 * Temporarily fake the state to choose a new default 1975 * router and to perform on-link determination of 1976 * prefixes correctly. 1977 * Below the state will be set correctly, 1978 * or the entry itself will be deleted. 1979 */ 1980 RT_LOCK_SPIN(rt); 1981 ln->ln_state = ND6_LLINFO_INCOMPLETE; 1982 1983 /* 1984 * Since defrouter_select() does not affect the 1985 * on-link determination and MIP6 needs the check 1986 * before the default router selection, we perform 1987 * the check now. 1988 */ 1989 RT_UNLOCK(rt); 1990 pfxlist_onlink_check(); 1991 1992 /* 1993 * refresh default router list 1994 */ 1995 defrouter_select(rt->rt_ifp); 1996 } 1997 RT_LOCK_ASSERT_NOTHELD(rt); 1998 } else { 1999 RT_UNLOCK(rt); 2000 } 2001 2002 lck_mtx_unlock(nd6_mutex); 2003 /* 2004 * Detach the route from the routing tree and the list of neighbor 2005 * caches, and disable the route entry not to be used in already 2006 * cached routes. 2007 */ 2008 (void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL); 2009 2010 /* Extra ref held above; now free it */ 2011 rtfree(rt); 2012} 2013 2014void 2015nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) 2016{ 2017#pragma unused(sa) 2018 struct sockaddr *gate = rt->rt_gateway; 2019 struct llinfo_nd6 *ln = rt->rt_llinfo; 2020 static struct sockaddr_dl null_sdl = 2021 { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK }; 2022 struct ifnet *ifp = rt->rt_ifp; 2023 struct ifaddr *ifa; 2024 uint64_t timenow; 2025 char buf[MAX_IPv6_STR_LEN]; 2026 2027 VERIFY(nd6_init_done); 2028 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); 2029 RT_LOCK_ASSERT_HELD(rt); 2030 2031 /* 2032 * We have rnh_lock held, see if we need to schedule the timer; 2033 * we might do this again below during RTM_RESOLVE, but doing it 2034 * now handles all other cases. 2035 */ 2036 if (nd6_sched_timeout_want) 2037 nd6_sched_timeout(NULL, NULL); 2038 2039 if (rt->rt_flags & RTF_GATEWAY) 2040 return; 2041 2042 if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) { 2043 /* 2044 * This is probably an interface direct route for a link 2045 * which does not need neighbor caches (e.g. fe80::%lo0/64). 2046 * We do not need special treatment below for such a route. 2047 * Moreover, the RTF_LLINFO flag which would be set below 2048 * would annoy the ndp(8) command. 2049 */ 2050 return; 2051 } 2052 2053 if (req == RTM_RESOLVE) { 2054 int no_nd_cache; 2055 2056 if (!nd6_need_cache(ifp)) { /* stf case */ 2057 no_nd_cache = 1; 2058 } else { 2059 struct sockaddr_in6 sin6; 2060 2061 rtkey_to_sa6(rt, &sin6); 2062 /* 2063 * nd6_is_addr_neighbor() may call nd6_lookup(), 2064 * therefore we drop rt_lock to avoid deadlock 2065 * during the lookup. 2066 */ 2067 RT_ADDREF_LOCKED(rt); 2068 RT_UNLOCK(rt); 2069 no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1); 2070 RT_LOCK(rt); 2071 RT_REMREF_LOCKED(rt); 2072 } 2073 2074 /* 2075 * FreeBSD and BSD/OS often make a cloned host route based 2076 * on a less-specific route (e.g. the default route). 2077 * If the less specific route does not have a "gateway" 2078 * (this is the case when the route just goes to a p2p or an 2079 * stf interface), we'll mistakenly make a neighbor cache for 2080 * the host route, and will see strange neighbor solicitation 2081 * for the corresponding destination. In order to avoid the 2082 * confusion, we check if the destination of the route is 2083 * a neighbor in terms of neighbor discovery, and stop the 2084 * process if not. Additionally, we remove the LLINFO flag 2085 * so that ndp(8) will not try to get the neighbor information 2086 * of the destination. 2087 */ 2088 if (no_nd_cache) { 2089 rt->rt_flags &= ~RTF_LLINFO; 2090 return; 2091 } 2092 } 2093 2094 timenow = net_uptime(); 2095 2096 switch (req) { 2097 case RTM_ADD: 2098 /* 2099 * There is no backward compatibility :) 2100 * 2101 * if ((rt->rt_flags & RTF_HOST) == 0 && 2102 * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) 2103 * rt->rt_flags |= RTF_CLONING; 2104 */ 2105 if ((rt->rt_flags & RTF_CLONING) || 2106 ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) { 2107 /* 2108 * Case 1: This route should come from a route to 2109 * interface (RTF_CLONING case) or the route should be 2110 * treated as on-link but is currently not 2111 * (RTF_LLINFO && ln == NULL case). 2112 */ 2113 if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) { 2114 gate = rt->rt_gateway; 2115 SDL(gate)->sdl_type = ifp->if_type; 2116 SDL(gate)->sdl_index = ifp->if_index; 2117 /* 2118 * In case we're called before 1.0 sec. 2119 * has elapsed. 2120 */ 2121 if (ln != NULL) { 2122 ln_setexpire(ln, 2123 (ifp->if_eflags & IFEF_IPV6_ND6ALT) 2124 ? 0 : MAX(timenow, 1)); 2125 } 2126 } 2127 if (rt->rt_flags & RTF_CLONING) 2128 break; 2129 } 2130 /* 2131 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here. 2132 * We don't do that here since llinfo is not ready yet. 2133 * 2134 * There are also couple of other things to be discussed: 2135 * - unsolicited NA code needs improvement beforehand 2136 * - RFC4861 says we MAY send multicast unsolicited NA 2137 * (7.2.6 paragraph 4), however, it also says that we 2138 * SHOULD provide a mechanism to prevent multicast NA storm. 2139 * we don't have anything like it right now. 2140 * note that the mechanism needs a mutual agreement 2141 * between proxies, which means that we need to implement 2142 * a new protocol, or a new kludge. 2143 * - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA. 2144 * we need to check ip6forwarding before sending it. 2145 * (or should we allow proxy ND configuration only for 2146 * routers? there's no mention about proxy ND from hosts) 2147 */ 2148 /* FALLTHROUGH */ 2149 case RTM_RESOLVE: 2150 if (!(ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK))) { 2151 /* 2152 * Address resolution isn't necessary for a point to 2153 * point link, so we can skip this test for a p2p link. 2154 */ 2155 if (gate->sa_family != AF_LINK || 2156 gate->sa_len < sizeof (null_sdl)) { 2157 /* Don't complain in case of RTM_ADD */ 2158 if (req == RTM_RESOLVE) { 2159 log(LOG_ERR, "%s: route to %s has bad " 2160 "gateway address (sa_family %u " 2161 "sa_len %u) on %s\n", __func__, 2162 inet_ntop(AF_INET6, 2163 &SIN6(rt_key(rt))->sin6_addr, buf, 2164 sizeof (buf)), gate->sa_family, 2165 gate->sa_len, if_name(ifp)); 2166 } 2167 break; 2168 } 2169 SDL(gate)->sdl_type = ifp->if_type; 2170 SDL(gate)->sdl_index = ifp->if_index; 2171 } 2172 if (ln != NULL) 2173 break; /* This happens on a route change */ 2174 /* 2175 * Case 2: This route may come from cloning, or a manual route 2176 * add with a LL address. 2177 */ 2178 rt->rt_llinfo = ln = nd6_llinfo_alloc(M_WAITOK); 2179 if (ln == NULL) 2180 break; 2181 2182 nd6_allocated++; 2183 rt->rt_llinfo_get_ri = nd6_llinfo_get_ri; 2184 rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri; 2185 rt->rt_llinfo_purge = nd6_llinfo_purge; 2186 rt->rt_llinfo_free = nd6_llinfo_free; 2187 rt->rt_flags |= RTF_LLINFO; 2188 ln->ln_rt = rt; 2189 /* this is required for "ndp" command. - shin */ 2190 if (req == RTM_ADD) { 2191 /* 2192 * gate should have some valid AF_LINK entry, 2193 * and ln->ln_expire should have some lifetime 2194 * which is specified by ndp command. 2195 */ 2196 ln->ln_state = ND6_LLINFO_REACHABLE; 2197 } else { 2198 /* 2199 * When req == RTM_RESOLVE, rt is created and 2200 * initialized in rtrequest(), so rt_expire is 0. 2201 */ 2202 ln->ln_state = ND6_LLINFO_NOSTATE; 2203 2204 /* In case we're called before 1.0 sec. has elapsed */ 2205 ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) ? 2206 0 : MAX(timenow, 1)); 2207 } 2208 LN_INSERTHEAD(ln); 2209 nd6_inuse++; 2210 2211 /* We have at least one entry; arm the timer if not already */ 2212 nd6_sched_timeout(NULL, NULL); 2213 2214 /* 2215 * If we have too many cache entries, initiate immediate 2216 * purging for some "less recently used" entries. Note that 2217 * we cannot directly call nd6_free() here because it would 2218 * cause re-entering rtable related routines triggering an LOR 2219 * problem. 2220 */ 2221 if (ip6_neighborgcthresh > 0 && 2222 nd6_inuse >= ip6_neighborgcthresh) { 2223 int i; 2224 2225 for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) { 2226 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev; 2227 struct rtentry *rt_end = ln_end->ln_rt; 2228 2229 /* Move this entry to the head */ 2230 RT_LOCK(rt_end); 2231 LN_DEQUEUE(ln_end); 2232 LN_INSERTHEAD(ln_end); 2233 2234 if (ln_end->ln_expire == 0) { 2235 RT_UNLOCK(rt_end); 2236 continue; 2237 } 2238 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE) 2239 ln_end->ln_state = ND6_LLINFO_STALE; 2240 else 2241 ln_end->ln_state = ND6_LLINFO_PURGE; 2242 ln_setexpire(ln_end, timenow); 2243 RT_UNLOCK(rt_end); 2244 } 2245 } 2246 2247 /* 2248 * check if rt_key(rt) is one of my address assigned 2249 * to the interface. 2250 */ 2251 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp, 2252 &SIN6(rt_key(rt))->sin6_addr); 2253 if (ifa != NULL) { 2254 caddr_t macp = nd6_ifptomac(ifp); 2255 ln_setexpire(ln, 0); 2256 ln->ln_state = ND6_LLINFO_REACHABLE; 2257 if (macp != NULL) { 2258 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen); 2259 SDL(gate)->sdl_alen = ifp->if_addrlen; 2260 } 2261 if (nd6_useloopback) { 2262 if (rt->rt_ifp != lo_ifp) { 2263 /* 2264 * Purge any link-layer info caching. 2265 */ 2266 if (rt->rt_llinfo_purge != NULL) 2267 rt->rt_llinfo_purge(rt); 2268 2269 /* 2270 * Adjust route ref count for the 2271 * interfaces. 2272 */ 2273 if (rt->rt_if_ref_fn != NULL) { 2274 rt->rt_if_ref_fn(lo_ifp, 1); 2275 rt->rt_if_ref_fn(rt->rt_ifp, 2276 -1); 2277 } 2278 } 2279 rt->rt_ifp = lo_ifp; 2280 /* 2281 * If rmx_mtu is not locked, update it 2282 * to the MTU used by the new interface. 2283 */ 2284 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) 2285 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; 2286 /* 2287 * Make sure rt_ifa be equal to the ifaddr 2288 * corresponding to the address. 2289 * We need this because when we refer 2290 * rt_ifa->ia6_flags in ip6_input, we assume 2291 * that the rt_ifa points to the address instead 2292 * of the loopback address. 2293 */ 2294 if (ifa != rt->rt_ifa) { 2295 rtsetifa(rt, ifa); 2296 } 2297 } 2298 IFA_REMREF(ifa); 2299 } else if (rt->rt_flags & RTF_ANNOUNCE) { 2300 ln_setexpire(ln, 0); 2301 ln->ln_state = ND6_LLINFO_REACHABLE; 2302 2303 /* join solicited node multicast for proxy ND */ 2304 if (ifp->if_flags & IFF_MULTICAST) { 2305 struct in6_addr llsol; 2306 struct in6_multi *in6m; 2307 int error; 2308 2309 llsol = SIN6(rt_key(rt))->sin6_addr; 2310 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; 2311 llsol.s6_addr32[1] = 0; 2312 llsol.s6_addr32[2] = htonl(1); 2313 llsol.s6_addr8[12] = 0xff; 2314 if (in6_setscope(&llsol, ifp, NULL)) 2315 break; 2316 error = in6_mc_join(ifp, &llsol, 2317 NULL, &in6m, 0); 2318 if (error) { 2319 nd6log((LOG_ERR, "%s: failed to join " 2320 "%s (errno=%d)\n", if_name(ifp), 2321 ip6_sprintf(&llsol), error)); 2322 } else { 2323 IN6M_REMREF(in6m); 2324 } 2325 } 2326 } 2327 break; 2328 2329 case RTM_DELETE: 2330 if (ln == NULL) 2331 break; 2332 /* leave from solicited node multicast for proxy ND */ 2333 if ((rt->rt_flags & RTF_ANNOUNCE) && 2334 (ifp->if_flags & IFF_MULTICAST)) { 2335 struct in6_addr llsol; 2336 struct in6_multi *in6m; 2337 2338 llsol = SIN6(rt_key(rt))->sin6_addr; 2339 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; 2340 llsol.s6_addr32[1] = 0; 2341 llsol.s6_addr32[2] = htonl(1); 2342 llsol.s6_addr8[12] = 0xff; 2343 if (in6_setscope(&llsol, ifp, NULL) == 0) { 2344 in6_multihead_lock_shared(); 2345 IN6_LOOKUP_MULTI(&llsol, ifp, in6m); 2346 in6_multihead_lock_done(); 2347 if (in6m != NULL) { 2348 in6_mc_leave(in6m, NULL); 2349 IN6M_REMREF(in6m); 2350 } 2351 } 2352 } 2353 nd6_inuse--; 2354 /* 2355 * Unchain it but defer the actual freeing until the route 2356 * itself is to be freed. rt->rt_llinfo still points to 2357 * llinfo_nd6, and likewise, ln->ln_rt stil points to this 2358 * route entry, except that RTF_LLINFO is now cleared. 2359 */ 2360 if (ln->ln_flags & ND6_LNF_IN_USE) 2361 LN_DEQUEUE(ln); 2362 2363 /* 2364 * Purge any link-layer info caching. 2365 */ 2366 if (rt->rt_llinfo_purge != NULL) 2367 rt->rt_llinfo_purge(rt); 2368 2369 rt->rt_flags &= ~RTF_LLINFO; 2370 if (ln->ln_hold != NULL) { 2371 m_freem(ln->ln_hold); 2372 ln->ln_hold = NULL; 2373 } 2374 } 2375} 2376 2377static int 2378nd6_siocgdrlst(void *data, int data_is_64) 2379{ 2380 struct in6_drlist_32 *drl_32; 2381 struct nd_defrouter *dr; 2382 int i = 0; 2383 2384 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); 2385 2386 dr = TAILQ_FIRST(&nd_defrouter); 2387 2388 /* For 64-bit process */ 2389 if (data_is_64) { 2390 struct in6_drlist_64 *drl_64; 2391 2392 drl_64 = _MALLOC(sizeof (*drl_64), M_TEMP, M_WAITOK|M_ZERO); 2393 if (drl_64 == NULL) 2394 return (ENOMEM); 2395 2396 /* preserve the interface name */ 2397 bcopy(data, drl_64, sizeof (drl_64->ifname)); 2398 2399 while (dr && i < DRLSTSIZ) { 2400 drl_64->defrouter[i].rtaddr = dr->rtaddr; 2401 if (IN6_IS_ADDR_LINKLOCAL( 2402 &drl_64->defrouter[i].rtaddr)) { 2403 /* XXX: need to this hack for KAME stack */ 2404 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0; 2405 } else { 2406 log(LOG_ERR, 2407 "default router list contains a " 2408 "non-linklocal address(%s)\n", 2409 ip6_sprintf(&drl_64->defrouter[i].rtaddr)); 2410 } 2411 drl_64->defrouter[i].flags = dr->flags; 2412 drl_64->defrouter[i].rtlifetime = dr->rtlifetime; 2413 drl_64->defrouter[i].expire = nddr_getexpire(dr); 2414 drl_64->defrouter[i].if_index = dr->ifp->if_index; 2415 i++; 2416 dr = TAILQ_NEXT(dr, dr_entry); 2417 } 2418 bcopy(drl_64, data, sizeof (*drl_64)); 2419 _FREE(drl_64, M_TEMP); 2420 return (0); 2421 } 2422 2423 /* For 32-bit process */ 2424 drl_32 = _MALLOC(sizeof (*drl_32), M_TEMP, M_WAITOK|M_ZERO); 2425 if (drl_32 == NULL) 2426 return (ENOMEM); 2427 2428 /* preserve the interface name */ 2429 bcopy(data, drl_32, sizeof (drl_32->ifname)); 2430 2431 while (dr != NULL && i < DRLSTSIZ) { 2432 drl_32->defrouter[i].rtaddr = dr->rtaddr; 2433 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) { 2434 /* XXX: need to this hack for KAME stack */ 2435 drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0; 2436 } else { 2437 log(LOG_ERR, 2438 "default router list contains a " 2439 "non-linklocal address(%s)\n", 2440 ip6_sprintf(&drl_32->defrouter[i].rtaddr)); 2441 } 2442 drl_32->defrouter[i].flags = dr->flags; 2443 drl_32->defrouter[i].rtlifetime = dr->rtlifetime; 2444 drl_32->defrouter[i].expire = nddr_getexpire(dr); 2445 drl_32->defrouter[i].if_index = dr->ifp->if_index; 2446 i++; 2447 dr = TAILQ_NEXT(dr, dr_entry); 2448 } 2449 bcopy(drl_32, data, sizeof (*drl_32)); 2450 _FREE(drl_32, M_TEMP); 2451 return (0); 2452} 2453 2454/* 2455 * XXX meaning of fields, especialy "raflags", is very 2456 * differnet between RA prefix list and RR/static prefix list. 2457 * how about separating ioctls into two? 2458 */ 2459static int 2460nd6_siocgprlst(void *data, int data_is_64) 2461{ 2462 struct in6_prlist_32 *prl_32; 2463 struct nd_prefix *pr; 2464 int i = 0; 2465 2466 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); 2467 2468 pr = nd_prefix.lh_first; 2469 2470 /* For 64-bit process */ 2471 if (data_is_64) { 2472 struct in6_prlist_64 *prl_64; 2473 2474 prl_64 = _MALLOC(sizeof (*prl_64), M_TEMP, M_WAITOK|M_ZERO); 2475 if (prl_64 == NULL) 2476 return (ENOMEM); 2477 2478 /* preserve the interface name */ 2479 bcopy(data, prl_64, sizeof (prl_64->ifname)); 2480 2481 while (pr && i < PRLSTSIZ) { 2482 struct nd_pfxrouter *pfr; 2483 int j; 2484 2485 NDPR_LOCK(pr); 2486 (void) in6_embedscope(&prl_64->prefix[i].prefix, 2487 &pr->ndpr_prefix, NULL, NULL, NULL); 2488 prl_64->prefix[i].raflags = pr->ndpr_raf; 2489 prl_64->prefix[i].prefixlen = pr->ndpr_plen; 2490 prl_64->prefix[i].vltime = pr->ndpr_vltime; 2491 prl_64->prefix[i].pltime = pr->ndpr_pltime; 2492 prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index; 2493 prl_64->prefix[i].expire = ndpr_getexpire(pr); 2494 2495 pfr = pr->ndpr_advrtrs.lh_first; 2496 j = 0; 2497 while (pfr) { 2498 if (j < DRLSTSIZ) { 2499#define RTRADDR prl_64->prefix[i].advrtr[j] 2500 RTRADDR = pfr->router->rtaddr; 2501 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { 2502 /* XXX: hack for KAME */ 2503 RTRADDR.s6_addr16[1] = 0; 2504 } else { 2505 log(LOG_ERR, 2506 "a router(%s) advertises " 2507 "a prefix with " 2508 "non-link local address\n", 2509 ip6_sprintf(&RTRADDR)); 2510 } 2511#undef RTRADDR 2512 } 2513 j++; 2514 pfr = pfr->pfr_next; 2515 } 2516 prl_64->prefix[i].advrtrs = j; 2517 prl_64->prefix[i].origin = PR_ORIG_RA; 2518 NDPR_UNLOCK(pr); 2519 2520 i++; 2521 pr = pr->ndpr_next; 2522 } 2523 bcopy(prl_64, data, sizeof (*prl_64)); 2524 _FREE(prl_64, M_TEMP); 2525 return (0); 2526 } 2527 2528 /* For 32-bit process */ 2529 prl_32 = _MALLOC(sizeof (*prl_32), M_TEMP, M_WAITOK|M_ZERO); 2530 if (prl_32 == NULL) 2531 return (ENOMEM); 2532 2533 /* preserve the interface name */ 2534 bcopy(data, prl_32, sizeof (prl_32->ifname)); 2535 2536 while (pr && i < PRLSTSIZ) { 2537 struct nd_pfxrouter *pfr; 2538 int j; 2539 2540 NDPR_LOCK(pr); 2541 (void) in6_embedscope(&prl_32->prefix[i].prefix, 2542 &pr->ndpr_prefix, NULL, NULL, NULL); 2543 prl_32->prefix[i].raflags = pr->ndpr_raf; 2544 prl_32->prefix[i].prefixlen = pr->ndpr_plen; 2545 prl_32->prefix[i].vltime = pr->ndpr_vltime; 2546 prl_32->prefix[i].pltime = pr->ndpr_pltime; 2547 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index; 2548 prl_32->prefix[i].expire = ndpr_getexpire(pr); 2549 2550 pfr = pr->ndpr_advrtrs.lh_first; 2551 j = 0; 2552 while (pfr) { 2553 if (j < DRLSTSIZ) { 2554#define RTRADDR prl_32->prefix[i].advrtr[j] 2555 RTRADDR = pfr->router->rtaddr; 2556 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { 2557 /* XXX: hack for KAME */ 2558 RTRADDR.s6_addr16[1] = 0; 2559 } else { 2560 log(LOG_ERR, 2561 "a router(%s) advertises " 2562 "a prefix with " 2563 "non-link local address\n", 2564 ip6_sprintf(&RTRADDR)); 2565 } 2566#undef RTRADDR 2567 } 2568 j++; 2569 pfr = pfr->pfr_next; 2570 } 2571 prl_32->prefix[i].advrtrs = j; 2572 prl_32->prefix[i].origin = PR_ORIG_RA; 2573 NDPR_UNLOCK(pr); 2574 2575 i++; 2576 pr = pr->ndpr_next; 2577 } 2578 bcopy(prl_32, data, sizeof (*prl_32)); 2579 _FREE(prl_32, M_TEMP); 2580 return (0); 2581} 2582 2583int 2584nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) 2585{ 2586 struct nd_defrouter *dr; 2587 struct nd_prefix *pr; 2588 struct rtentry *rt; 2589 int i, error = 0; 2590 2591 VERIFY(ifp != NULL); 2592 i = ifp->if_index; 2593 2594 switch (cmd) { 2595 case SIOCGDRLST_IN6_32: /* struct in6_drlist_32 */ 2596 case SIOCGDRLST_IN6_64: /* struct in6_drlist_64 */ 2597 /* 2598 * obsolete API, use sysctl under net.inet6.icmp6 2599 */ 2600 lck_mtx_lock(nd6_mutex); 2601 error = nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64); 2602 lck_mtx_unlock(nd6_mutex); 2603 break; 2604 2605 case SIOCGPRLST_IN6_32: /* struct in6_prlist_32 */ 2606 case SIOCGPRLST_IN6_64: /* struct in6_prlist_64 */ 2607 /* 2608 * obsolete API, use sysctl under net.inet6.icmp6 2609 */ 2610 lck_mtx_lock(nd6_mutex); 2611 error = nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64); 2612 lck_mtx_unlock(nd6_mutex); 2613 break; 2614 2615 case OSIOCGIFINFO_IN6: /* struct in6_ondireq */ 2616 case SIOCGIFINFO_IN6: { /* struct in6_ondireq */ 2617 u_int32_t linkmtu; 2618 struct in6_ondireq *ondi = (struct in6_ondireq *)(void *)data; 2619 struct nd_ifinfo *ndi; 2620 /* 2621 * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq 2622 * instead of in6_ndireq, so we treat it as such. 2623 */ 2624 lck_rw_lock_shared(nd_if_rwlock); 2625 ndi = ND_IFINFO(ifp); 2626 if (!nd_ifinfo || i >= nd_ifinfo_indexlim || 2627 !ndi->initialized) { 2628 lck_rw_done(nd_if_rwlock); 2629 error = EINVAL; 2630 break; 2631 } 2632 lck_mtx_lock(&ndi->lock); 2633 linkmtu = IN6_LINKMTU(ifp); 2634 bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu)); 2635 bcopy(&nd_ifinfo[i].maxmtu, &ondi->ndi.maxmtu, 2636 sizeof (u_int32_t)); 2637 bcopy(&nd_ifinfo[i].basereachable, &ondi->ndi.basereachable, 2638 sizeof (u_int32_t)); 2639 bcopy(&nd_ifinfo[i].reachable, &ondi->ndi.reachable, 2640 sizeof (u_int32_t)); 2641 bcopy(&nd_ifinfo[i].retrans, &ondi->ndi.retrans, 2642 sizeof (u_int32_t)); 2643 bcopy(&nd_ifinfo[i].flags, &ondi->ndi.flags, 2644 sizeof (u_int32_t)); 2645 bcopy(&nd_ifinfo[i].recalctm, &ondi->ndi.recalctm, 2646 sizeof (int)); 2647 ondi->ndi.chlim = nd_ifinfo[i].chlim; 2648 ondi->ndi.receivedra = 0; 2649 lck_mtx_unlock(&ndi->lock); 2650 lck_rw_done(nd_if_rwlock); 2651 break; 2652 } 2653 2654 case SIOCSIFINFO_FLAGS: { /* struct in6_ndireq */ 2655 struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data; 2656 u_int32_t oflags, flags; 2657 struct nd_ifinfo *ndi; 2658 2659 /* XXX: almost all other fields of cndi->ndi is unused */ 2660 lck_rw_lock_shared(nd_if_rwlock); 2661 ndi = ND_IFINFO(ifp); 2662 if (!nd_ifinfo || i >= nd_ifinfo_indexlim || 2663 !ndi->initialized) { 2664 lck_rw_done(nd_if_rwlock); 2665 error = EINVAL; 2666 break; 2667 } 2668 lck_mtx_lock(&ndi->lock); 2669 oflags = nd_ifinfo[i].flags; 2670 bcopy(&cndi->ndi.flags, &nd_ifinfo[i].flags, sizeof (flags)); 2671 flags = nd_ifinfo[i].flags; 2672 lck_mtx_unlock(&ndi->lock); 2673 lck_rw_done(nd_if_rwlock); 2674 2675 if (oflags == flags) 2676 break; 2677 2678 error = nd6_setifinfo(ifp, oflags, flags); 2679 break; 2680 } 2681 2682 case SIOCSNDFLUSH_IN6: /* struct in6_ifreq */ 2683 /* flush default router list */ 2684 /* 2685 * xxx sumikawa: should not delete route if default 2686 * route equals to the top of default router list 2687 */ 2688 lck_mtx_lock(nd6_mutex); 2689 defrouter_reset(); 2690 defrouter_select(ifp); 2691 lck_mtx_unlock(nd6_mutex); 2692 /* xxx sumikawa: flush prefix list */ 2693 break; 2694 2695 case SIOCSPFXFLUSH_IN6: { /* struct in6_ifreq */ 2696 /* flush all the prefix advertised by routers */ 2697 struct nd_prefix *next; 2698 2699 lck_mtx_lock(nd6_mutex); 2700 for (pr = nd_prefix.lh_first; pr; pr = next) { 2701 struct in6_ifaddr *ia; 2702 2703 next = pr->ndpr_next; 2704 2705 NDPR_LOCK(pr); 2706 if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) { 2707 NDPR_UNLOCK(pr); 2708 continue; /* XXX */ 2709 } 2710 if (ifp != lo_ifp && pr->ndpr_ifp != ifp) { 2711 NDPR_UNLOCK(pr); 2712 continue; 2713 } 2714 /* do we really have to remove addresses as well? */ 2715 NDPR_ADDREF_LOCKED(pr); 2716 NDPR_UNLOCK(pr); 2717 lck_rw_lock_exclusive(&in6_ifaddr_rwlock); 2718 ia = in6_ifaddrs; 2719 while (ia != NULL) { 2720 IFA_LOCK(&ia->ia_ifa); 2721 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) { 2722 IFA_UNLOCK(&ia->ia_ifa); 2723 ia = ia->ia_next; 2724 continue; 2725 } 2726 2727 if (ia->ia6_ndpr == pr) { 2728 IFA_ADDREF_LOCKED(&ia->ia_ifa); 2729 IFA_UNLOCK(&ia->ia_ifa); 2730 lck_rw_done(&in6_ifaddr_rwlock); 2731 lck_mtx_unlock(nd6_mutex); 2732 in6_purgeaddr(&ia->ia_ifa); 2733 IFA_REMREF(&ia->ia_ifa); 2734 lck_mtx_lock(nd6_mutex); 2735 lck_rw_lock_exclusive( 2736 &in6_ifaddr_rwlock); 2737 /* 2738 * Purging the address caused 2739 * in6_ifaddr_rwlock to be 2740 * dropped and 2741 * reacquired; therefore search again 2742 * from the beginning of in6_ifaddrs. 2743 * The same applies for the prefix list. 2744 */ 2745 ia = in6_ifaddrs; 2746 next = nd_prefix.lh_first; 2747 continue; 2748 2749 } 2750 IFA_UNLOCK(&ia->ia_ifa); 2751 ia = ia->ia_next; 2752 } 2753 lck_rw_done(&in6_ifaddr_rwlock); 2754 NDPR_LOCK(pr); 2755 prelist_remove(pr); 2756 NDPR_UNLOCK(pr); 2757 pfxlist_onlink_check(); 2758 /* 2759 * If we were trying to restart this loop 2760 * above by changing the value of 'next', we might 2761 * end up freeing the only element on the list 2762 * when we call NDPR_REMREF(). 2763 * When this happens, we also have get out of this 2764 * loop because we have nothing else to do. 2765 */ 2766 if (pr == next) 2767 next = NULL; 2768 NDPR_REMREF(pr); 2769 } 2770 lck_mtx_unlock(nd6_mutex); 2771 break; 2772 } 2773 2774 case SIOCSRTRFLUSH_IN6: { /* struct in6_ifreq */ 2775 /* flush all the default routers */ 2776 struct nd_defrouter *next; 2777 2778 lck_mtx_lock(nd6_mutex); 2779 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { 2780 /* 2781 * The first entry of the list may be stored in 2782 * the routing table, so we'll delete it later. 2783 */ 2784 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) { 2785 next = TAILQ_NEXT(dr, dr_entry); 2786 if (ifp == lo_ifp || dr->ifp == ifp) 2787 defrtrlist_del(dr); 2788 } 2789 if (ifp == lo_ifp || 2790 TAILQ_FIRST(&nd_defrouter)->ifp == ifp) 2791 defrtrlist_del(TAILQ_FIRST(&nd_defrouter)); 2792 } 2793 lck_mtx_unlock(nd6_mutex); 2794 break; 2795 } 2796 2797 case SIOCGNBRINFO_IN6_32: { /* struct in6_nbrinfo_32 */ 2798 struct llinfo_nd6 *ln; 2799 struct in6_nbrinfo_32 nbi_32; 2800 struct in6_addr nb_addr; /* make local for safety */ 2801 2802 bcopy(data, &nbi_32, sizeof (nbi_32)); 2803 nb_addr = nbi_32.addr; 2804 /* 2805 * XXX: KAME specific hack for scoped addresses 2806 * XXXX: for other scopes than link-local? 2807 */ 2808 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) || 2809 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) { 2810 u_int16_t *idp = 2811 (u_int16_t *)(void *)&nb_addr.s6_addr[2]; 2812 2813 if (*idp == 0) 2814 *idp = htons(ifp->if_index); 2815 } 2816 2817 /* Callee returns a locked route upon success */ 2818 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) { 2819 error = EINVAL; 2820 break; 2821 } 2822 RT_LOCK_ASSERT_HELD(rt); 2823 ln = rt->rt_llinfo; 2824 nbi_32.state = ln->ln_state; 2825 nbi_32.asked = ln->ln_asked; 2826 nbi_32.isrouter = ln->ln_router; 2827 nbi_32.expire = ln_getexpire(ln); 2828 RT_REMREF_LOCKED(rt); 2829 RT_UNLOCK(rt); 2830 bcopy(&nbi_32, data, sizeof (nbi_32)); 2831 break; 2832 } 2833 2834 case SIOCGNBRINFO_IN6_64: { /* struct in6_nbrinfo_64 */ 2835 struct llinfo_nd6 *ln; 2836 struct in6_nbrinfo_64 nbi_64; 2837 struct in6_addr nb_addr; /* make local for safety */ 2838 2839 bcopy(data, &nbi_64, sizeof (nbi_64)); 2840 nb_addr = nbi_64.addr; 2841 /* 2842 * XXX: KAME specific hack for scoped addresses 2843 * XXXX: for other scopes than link-local? 2844 */ 2845 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) || 2846 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) { 2847 u_int16_t *idp = 2848 (u_int16_t *)(void *)&nb_addr.s6_addr[2]; 2849 2850 if (*idp == 0) 2851 *idp = htons(ifp->if_index); 2852 } 2853 2854 /* Callee returns a locked route upon success */ 2855 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) { 2856 error = EINVAL; 2857 break; 2858 } 2859 RT_LOCK_ASSERT_HELD(rt); 2860 ln = rt->rt_llinfo; 2861 nbi_64.state = ln->ln_state; 2862 nbi_64.asked = ln->ln_asked; 2863 nbi_64.isrouter = ln->ln_router; 2864 nbi_64.expire = ln_getexpire(ln); 2865 RT_REMREF_LOCKED(rt); 2866 RT_UNLOCK(rt); 2867 bcopy(&nbi_64, data, sizeof (nbi_64)); 2868 break; 2869 } 2870 2871 case SIOCGDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ 2872 case SIOCGDEFIFACE_IN6_64: { /* struct in6_ndifreq_64 */ 2873 struct in6_ndifreq_64 *ndif_64 = 2874 (struct in6_ndifreq_64 *)(void *)data; 2875 struct in6_ndifreq_32 *ndif_32 = 2876 (struct in6_ndifreq_32 *)(void *)data; 2877 2878 if (cmd == SIOCGDEFIFACE_IN6_64) { 2879 u_int64_t j = nd6_defifindex; 2880 bcopy(&j, &ndif_64->ifindex, sizeof (j)); 2881 } else { 2882 bcopy(&nd6_defifindex, &ndif_32->ifindex, 2883 sizeof (u_int32_t)); 2884 } 2885 break; 2886 } 2887 2888 case SIOCSDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ 2889 case SIOCSDEFIFACE_IN6_64: { /* struct in6_ndifreq_64 */ 2890 struct in6_ndifreq_64 *ndif_64 = 2891 (struct in6_ndifreq_64 *)(void *)data; 2892 struct in6_ndifreq_32 *ndif_32 = 2893 (struct in6_ndifreq_32 *)(void *)data; 2894 u_int32_t idx; 2895 2896 if (cmd == SIOCSDEFIFACE_IN6_64) { 2897 u_int64_t j; 2898 bcopy(&ndif_64->ifindex, &j, sizeof (j)); 2899 idx = (u_int32_t)j; 2900 } else { 2901 bcopy(&ndif_32->ifindex, &idx, sizeof (idx)); 2902 } 2903 2904 error = nd6_setdefaultiface(idx); 2905 return (error); 2906 /* NOTREACHED */ 2907 } 2908 } 2909 return (error); 2910} 2911 2912/* 2913 * Create neighbor cache entry and cache link-layer address, 2914 * on reception of inbound ND6 packets. (RS/RA/NS/redirect) 2915 */ 2916void 2917nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, 2918 int lladdrlen, int type, int code) 2919{ 2920#pragma unused(lladdrlen) 2921 struct rtentry *rt = NULL; 2922 struct llinfo_nd6 *ln = NULL; 2923 int is_newentry; 2924 struct sockaddr_dl *sdl = NULL; 2925 int do_update; 2926 int olladdr; 2927 int llchange; 2928 int newstate = 0; 2929 uint64_t timenow; 2930 boolean_t sched_timeout = FALSE; 2931 2932 if (ifp == NULL) 2933 panic("ifp == NULL in nd6_cache_lladdr"); 2934 if (from == NULL) 2935 panic("from == NULL in nd6_cache_lladdr"); 2936 2937 /* nothing must be updated for unspecified address */ 2938 if (IN6_IS_ADDR_UNSPECIFIED(from)) 2939 return; 2940 2941 /* 2942 * Validation about ifp->if_addrlen and lladdrlen must be done in 2943 * the caller. 2944 */ 2945 timenow = net_uptime(); 2946 2947 rt = nd6_lookup(from, 0, ifp, 0); 2948 if (rt == NULL) { 2949 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL) 2950 return; 2951 RT_LOCK_ASSERT_HELD(rt); 2952 is_newentry = 1; 2953 } else { 2954 RT_LOCK_ASSERT_HELD(rt); 2955 /* do nothing if static ndp is set */ 2956 if (rt->rt_flags & RTF_STATIC) { 2957 RT_REMREF_LOCKED(rt); 2958 RT_UNLOCK(rt); 2959 return; 2960 } 2961 is_newentry = 0; 2962 } 2963 2964 if (rt == NULL) 2965 return; 2966 if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { 2967fail: 2968 RT_UNLOCK(rt); 2969 nd6_free(rt); 2970 rtfree(rt); 2971 return; 2972 } 2973 ln = (struct llinfo_nd6 *)rt->rt_llinfo; 2974 if (ln == NULL) 2975 goto fail; 2976 if (rt->rt_gateway == NULL) 2977 goto fail; 2978 if (rt->rt_gateway->sa_family != AF_LINK) 2979 goto fail; 2980 sdl = SDL(rt->rt_gateway); 2981 2982 olladdr = (sdl->sdl_alen) ? 1 : 0; 2983 if (olladdr && lladdr) { 2984 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) 2985 llchange = 1; 2986 else 2987 llchange = 0; 2988 } else 2989 llchange = 0; 2990 2991 /* 2992 * newentry olladdr lladdr llchange (*=record) 2993 * 0 n n -- (1) 2994 * 0 y n -- (2) 2995 * 0 n y -- (3) * STALE 2996 * 0 y y n (4) * 2997 * 0 y y y (5) * STALE 2998 * 1 -- n -- (6) NOSTATE(= PASSIVE) 2999 * 1 -- y -- (7) * STALE 3000 */ 3001 3002 if (lladdr) { /* (3-5) and (7) */ 3003 /* 3004 * Record source link-layer address 3005 * XXX is it dependent to ifp->if_type? 3006 */ 3007 sdl->sdl_alen = ifp->if_addrlen; 3008 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); 3009 3010 /* cache the gateway (sender HW) address */ 3011 nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE); 3012 } 3013 3014 if (!is_newentry) { 3015 if ((!olladdr && lladdr != NULL) || /* (3) */ 3016 (olladdr && lladdr != NULL && llchange)) { /* (5) */ 3017 do_update = 1; 3018 newstate = ND6_LLINFO_STALE; 3019 } else /* (1-2,4) */ 3020 do_update = 0; 3021 } else { 3022 do_update = 1; 3023 if (lladdr == NULL) /* (6) */ 3024 newstate = ND6_LLINFO_NOSTATE; 3025 else /* (7) */ 3026 newstate = ND6_LLINFO_STALE; 3027 } 3028 3029 if (do_update) { 3030 /* 3031 * Update the state of the neighbor cache. 3032 */ 3033 ln->ln_state = newstate; 3034 3035 if (ln->ln_state == ND6_LLINFO_STALE) { 3036 struct mbuf *m = ln->ln_hold; 3037 /* 3038 * XXX: since nd6_output() below will cause 3039 * state tansition to DELAY and reset the timer, 3040 * we must set the timer now, although it is actually 3041 * meaningless. 3042 */ 3043 ln_setexpire(ln, timenow + nd6_gctimer); 3044 ln->ln_hold = NULL; 3045 3046 if (m != NULL) { 3047 struct sockaddr_in6 sin6; 3048 3049 rtkey_to_sa6(rt, &sin6); 3050 /* 3051 * we assume ifp is not a p2p here, so just 3052 * set the 2nd argument as the 1st one. 3053 */ 3054 RT_UNLOCK(rt); 3055 nd6_output(ifp, ifp, m, &sin6, rt, NULL); 3056 RT_LOCK(rt); 3057 } 3058 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { 3059 /* probe right away */ 3060 ln_setexpire(ln, timenow); 3061 sched_timeout = TRUE; 3062 } 3063 } 3064 3065 /* 3066 * ICMP6 type dependent behavior. 3067 * 3068 * NS: clear IsRouter if new entry 3069 * RS: clear IsRouter 3070 * RA: set IsRouter if there's lladdr 3071 * redir: clear IsRouter if new entry 3072 * 3073 * RA case, (1): 3074 * The spec says that we must set IsRouter in the following cases: 3075 * - If lladdr exist, set IsRouter. This means (1-5). 3076 * - If it is old entry (!newentry), set IsRouter. This means (7). 3077 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. 3078 * A quetion arises for (1) case. (1) case has no lladdr in the 3079 * neighbor cache, this is similar to (6). 3080 * This case is rare but we figured that we MUST NOT set IsRouter. 3081 * 3082 * newentry olladdr lladdr llchange NS RS RA redir 3083 * D R 3084 * 0 n n -- (1) c ? s 3085 * 0 y n -- (2) c s s 3086 * 0 n y -- (3) c s s 3087 * 0 y y n (4) c s s 3088 * 0 y y y (5) c s s 3089 * 1 -- n -- (6) c c c s 3090 * 1 -- y -- (7) c c s c s 3091 * 3092 * (c=clear s=set) 3093 */ 3094 switch (type & 0xff) { 3095 case ND_NEIGHBOR_SOLICIT: 3096 /* 3097 * New entry must have is_router flag cleared. 3098 */ 3099 if (is_newentry) /* (6-7) */ 3100 ln->ln_router = 0; 3101 break; 3102 case ND_REDIRECT: 3103 /* 3104 * If the ICMP message is a Redirect to a better router, always 3105 * set the is_router flag. Otherwise, if the entry is newly 3106 * created, then clear the flag. [RFC 4861, sec 8.3] 3107 */ 3108 if (code == ND_REDIRECT_ROUTER) 3109 ln->ln_router = 1; 3110 else if (is_newentry) /* (6-7) */ 3111 ln->ln_router = 0; 3112 break; 3113 case ND_ROUTER_SOLICIT: 3114 /* 3115 * is_router flag must always be cleared. 3116 */ 3117 ln->ln_router = 0; 3118 break; 3119 case ND_ROUTER_ADVERT: 3120 /* 3121 * Mark an entry with lladdr as a router. 3122 */ 3123 if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ 3124 (is_newentry && lladdr)) { /* (7) */ 3125 ln->ln_router = 1; 3126 } 3127 break; 3128 } 3129 3130 /* 3131 * When the link-layer address of a router changes, select the 3132 * best router again. In particular, when the neighbor entry is newly 3133 * created, it might affect the selection policy. 3134 * Question: can we restrict the first condition to the "is_newentry" 3135 * case? 3136 * 3137 * Note: Perform default router selection even when we are a router, 3138 * if Scoped Routing is enabled. 3139 */ 3140 if (do_update && ln->ln_router && 3141 (ip6_doscopedroute || !ip6_forwarding)) { 3142 RT_REMREF_LOCKED(rt); 3143 RT_UNLOCK(rt); 3144 lck_mtx_lock(nd6_mutex); 3145 defrouter_select(ifp); 3146 lck_mtx_unlock(nd6_mutex); 3147 } else { 3148 RT_REMREF_LOCKED(rt); 3149 RT_UNLOCK(rt); 3150 } 3151 if (sched_timeout) { 3152 lck_mtx_lock(rnh_lock); 3153 nd6_sched_timeout(NULL, NULL); 3154 lck_mtx_unlock(rnh_lock); 3155 } 3156} 3157 3158static void 3159nd6_slowtimo(void *arg) 3160{ 3161#pragma unused(arg) 3162 int i; 3163 struct nd_ifinfo *nd6if; 3164 3165 lck_rw_lock_shared(nd_if_rwlock); 3166 for (i = 1; i < if_index + 1; i++) { 3167 if (!nd_ifinfo || i >= nd_ifinfo_indexlim) 3168 break; 3169 nd6if = &nd_ifinfo[i]; 3170 if (!nd6if->initialized) 3171 break; 3172 lck_mtx_lock(&nd6if->lock); 3173 if (nd6if->basereachable && /* already initialized */ 3174 (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { 3175 /* 3176 * Since reachable time rarely changes by router 3177 * advertisements, we SHOULD insure that a new random 3178 * value gets recomputed at least once every few hours. 3179 * (RFC 4861, 6.3.4) 3180 */ 3181 nd6if->recalctm = nd6_recalc_reachtm_interval; 3182 nd6if->reachable = 3183 ND_COMPUTE_RTIME(nd6if->basereachable); 3184 } 3185 lck_mtx_unlock(&nd6if->lock); 3186 } 3187 lck_rw_done(nd_if_rwlock); 3188 timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz); 3189} 3190 3191#define senderr(e) { error = (e); goto bad; } 3192int 3193nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, 3194 struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv) 3195{ 3196 struct mbuf *m = m0; 3197 struct rtentry *rt = hint0, *hint = hint0; 3198 struct llinfo_nd6 *ln = NULL; 3199 int error = 0; 3200 uint64_t timenow; 3201 struct rtentry *rtrele = NULL; 3202 struct nd_ifinfo *ndi; 3203 3204 if (rt != NULL) { 3205 RT_LOCK_SPIN(rt); 3206 RT_ADDREF_LOCKED(rt); 3207 } 3208 3209 if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) { 3210 if (rt != NULL) 3211 RT_UNLOCK(rt); 3212 goto sendpkt; 3213 } 3214 3215 /* 3216 * Next hop determination. Because we may involve the gateway route 3217 * in addition to the original route, locking is rather complicated. 3218 * The general concept is that regardless of whether the route points 3219 * to the original route or to the gateway route, this routine takes 3220 * an extra reference on such a route. This extra reference will be 3221 * released at the end. 3222 * 3223 * Care must be taken to ensure that the "hint0" route never gets freed 3224 * via rtfree(), since the caller may have stored it inside a struct 3225 * route with a reference held for that placeholder. 3226 * 3227 * This logic is similar to, though not exactly the same as the one 3228 * used by route_to_gwroute(). 3229 */ 3230 if (rt != NULL) { 3231 /* 3232 * We have a reference to "rt" by now (or below via rtalloc1), 3233 * which will either be released or freed at the end of this 3234 * routine. 3235 */ 3236 RT_LOCK_ASSERT_HELD(rt); 3237 if (!(rt->rt_flags & RTF_UP)) { 3238 RT_REMREF_LOCKED(rt); 3239 RT_UNLOCK(rt); 3240 if ((hint = rt = rtalloc1_scoped(SA(dst), 1, 0, 3241 ifp->if_index)) != NULL) { 3242 RT_LOCK_SPIN(rt); 3243 if (rt->rt_ifp != ifp) { 3244 /* XXX: loop care? */ 3245 RT_UNLOCK(rt); 3246 error = nd6_output(ifp, origifp, m0, 3247 dst, rt, adv); 3248 rtfree(rt); 3249 return (error); 3250 } 3251 } else { 3252 senderr(EHOSTUNREACH); 3253 } 3254 } 3255 3256 if (rt->rt_flags & RTF_GATEWAY) { 3257 struct rtentry *gwrt; 3258 struct in6_ifaddr *ia6 = NULL; 3259 struct sockaddr_in6 gw6; 3260 3261 rtgw_to_sa6(rt, &gw6); 3262 /* 3263 * Must drop rt_lock since nd6_is_addr_neighbor() 3264 * calls nd6_lookup() and acquires rnh_lock. 3265 */ 3266 RT_UNLOCK(rt); 3267 3268 /* 3269 * We skip link-layer address resolution and NUD 3270 * if the gateway is not a neighbor from ND point 3271 * of view, regardless of the value of nd_ifinfo.flags. 3272 * The second condition is a bit tricky; we skip 3273 * if the gateway is our own address, which is 3274 * sometimes used to install a route to a p2p link. 3275 */ 3276 if (!nd6_is_addr_neighbor(&gw6, ifp, 0) || 3277 (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) { 3278 /* 3279 * We allow this kind of tricky route only 3280 * when the outgoing interface is p2p. 3281 * XXX: we may need a more generic rule here. 3282 */ 3283 if (ia6 != NULL) 3284 IFA_REMREF(&ia6->ia_ifa); 3285 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) 3286 senderr(EHOSTUNREACH); 3287 goto sendpkt; 3288 } 3289 3290 RT_LOCK_SPIN(rt); 3291 gw6 = *(SIN6(rt->rt_gateway)); 3292 3293 /* If hint is now down, give up */ 3294 if (!(rt->rt_flags & RTF_UP)) { 3295 RT_UNLOCK(rt); 3296 senderr(EHOSTUNREACH); 3297 } 3298 3299 /* If there's no gateway route, look it up */ 3300 if ((gwrt = rt->rt_gwroute) == NULL) { 3301 RT_UNLOCK(rt); 3302 goto lookup; 3303 } 3304 /* Become a regular mutex */ 3305 RT_CONVERT_LOCK(rt); 3306 3307 /* 3308 * Take gwrt's lock while holding route's lock; 3309 * this is okay since gwrt never points back 3310 * to rt, so no lock ordering issues. 3311 */ 3312 RT_LOCK_SPIN(gwrt); 3313 if (!(gwrt->rt_flags & RTF_UP)) { 3314 rt->rt_gwroute = NULL; 3315 RT_UNLOCK(gwrt); 3316 RT_UNLOCK(rt); 3317 rtfree(gwrt); 3318lookup: 3319 lck_mtx_lock(rnh_lock); 3320 gwrt = rtalloc1_scoped_locked(SA(&gw6), 1, 0, 3321 ifp->if_index); 3322 3323 RT_LOCK(rt); 3324 /* 3325 * Bail out if the route is down, no route 3326 * to gateway, circular route, or if the 3327 * gateway portion of "rt" has changed. 3328 */ 3329 if (!(rt->rt_flags & RTF_UP) || 3330 gwrt == NULL || gwrt == rt || 3331 !equal(SA(&gw6), rt->rt_gateway)) { 3332 if (gwrt == rt) { 3333 RT_REMREF_LOCKED(gwrt); 3334 gwrt = NULL; 3335 } 3336 RT_UNLOCK(rt); 3337 if (gwrt != NULL) 3338 rtfree_locked(gwrt); 3339 lck_mtx_unlock(rnh_lock); 3340 senderr(EHOSTUNREACH); 3341 } 3342 VERIFY(gwrt != NULL); 3343 /* 3344 * Set gateway route; callee adds ref to gwrt; 3345 * gwrt has an extra ref from rtalloc1() for 3346 * this routine. 3347 */ 3348 rt_set_gwroute(rt, rt_key(rt), gwrt); 3349 RT_UNLOCK(rt); 3350 lck_mtx_unlock(rnh_lock); 3351 /* Remember to release/free "rt" at the end */ 3352 rtrele = rt; 3353 rt = gwrt; 3354 } else { 3355 RT_ADDREF_LOCKED(gwrt); 3356 RT_UNLOCK(gwrt); 3357 RT_UNLOCK(rt); 3358 /* Remember to release/free "rt" at the end */ 3359 rtrele = rt; 3360 rt = gwrt; 3361 } 3362 VERIFY(rt == gwrt); 3363 3364 /* 3365 * This is an opportunity to revalidate the parent 3366 * route's gwroute, in case it now points to a dead 3367 * route entry. Parent route won't go away since the 3368 * clone (hint) holds a reference to it. rt == gwrt. 3369 */ 3370 RT_LOCK_SPIN(hint); 3371 if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) == 3372 (RTF_WASCLONED | RTF_UP)) { 3373 struct rtentry *prt = hint->rt_parent; 3374 VERIFY(prt != NULL); 3375 3376 RT_CONVERT_LOCK(hint); 3377 RT_ADDREF(prt); 3378 RT_UNLOCK(hint); 3379 rt_revalidate_gwroute(prt, rt); 3380 RT_REMREF(prt); 3381 } else { 3382 RT_UNLOCK(hint); 3383 } 3384 3385 RT_LOCK_SPIN(rt); 3386 /* rt == gwrt; if it is now down, give up */ 3387 if (!(rt->rt_flags & RTF_UP)) { 3388 RT_UNLOCK(rt); 3389 rtfree(rt); 3390 rt = NULL; 3391 /* "rtrele" == original "rt" */ 3392 senderr(EHOSTUNREACH); 3393 } 3394 } 3395 3396 /* Become a regular mutex */ 3397 RT_CONVERT_LOCK(rt); 3398 } 3399 3400 /* 3401 * Address resolution or Neighbor Unreachability Detection 3402 * for the next hop. 3403 * At this point, the destination of the packet must be a unicast 3404 * or an anycast address(i.e. not a multicast). 3405 */ 3406 3407 /* Look up the neighbor cache for the nexthop */ 3408 if (rt && (rt->rt_flags & RTF_LLINFO) != 0) { 3409 ln = rt->rt_llinfo; 3410 } else { 3411 struct sockaddr_in6 sin6; 3412 /* 3413 * Clear out Scope ID field in case it is set. 3414 */ 3415 sin6 = *dst; 3416 sin6.sin6_scope_id = 0; 3417 /* 3418 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), 3419 * the condition below is not very efficient. But we believe 3420 * it is tolerable, because this should be a rare case. 3421 * Must drop rt_lock since nd6_is_addr_neighbor() calls 3422 * nd6_lookup() and acquires rnh_lock. 3423 */ 3424 if (rt != NULL) 3425 RT_UNLOCK(rt); 3426 if (nd6_is_addr_neighbor(&sin6, ifp, 0)) { 3427 /* "rtrele" may have been used, so clean up "rt" now */ 3428 if (rt != NULL) { 3429 /* Don't free "hint0" */ 3430 if (rt == hint0) 3431 RT_REMREF(rt); 3432 else 3433 rtfree(rt); 3434 } 3435 /* Callee returns a locked route upon success */ 3436 rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0); 3437 if (rt != NULL) { 3438 RT_LOCK_ASSERT_HELD(rt); 3439 ln = rt->rt_llinfo; 3440 } 3441 } else if (rt != NULL) { 3442 RT_LOCK(rt); 3443 } 3444 } 3445 3446 if (!ln || !rt) { 3447 if (rt != NULL) 3448 RT_UNLOCK(rt); 3449 lck_rw_lock_shared(nd_if_rwlock); 3450 ndi = ND_IFINFO(ifp); 3451 VERIFY(ndi != NULL && ndi->initialized); 3452 lck_mtx_lock(&ndi->lock); 3453 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && 3454 !(ndi->flags & ND6_IFF_PERFORMNUD)) { 3455 lck_mtx_unlock(&ndi->lock); 3456 lck_rw_done(nd_if_rwlock); 3457 log(LOG_DEBUG, 3458 "nd6_output: can't allocate llinfo for %s " 3459 "(ln=0x%llx, rt=0x%llx)\n", 3460 ip6_sprintf(&dst->sin6_addr), 3461 (uint64_t)VM_KERNEL_ADDRPERM(ln), 3462 (uint64_t)VM_KERNEL_ADDRPERM(rt)); 3463 senderr(EIO); /* XXX: good error? */ 3464 } 3465 lck_mtx_unlock(&ndi->lock); 3466 lck_rw_done(nd_if_rwlock); 3467 3468 goto sendpkt; /* send anyway */ 3469 } 3470 3471 net_update_uptime(); 3472 timenow = net_uptime(); 3473 3474 /* We don't have to do link-layer address resolution on a p2p link. */ 3475 if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && 3476 ln->ln_state < ND6_LLINFO_REACHABLE) { 3477 ln->ln_state = ND6_LLINFO_STALE; 3478 ln_setexpire(ln, timenow + nd6_gctimer); 3479 } 3480 3481 /* 3482 * The first time we send a packet to a neighbor whose entry is 3483 * STALE, we have to change the state to DELAY and a sets a timer to 3484 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 3485 * neighbor unreachability detection on expiration. 3486 * (RFC 4861 7.3.3) 3487 */ 3488 if (ln->ln_state == ND6_LLINFO_STALE) { 3489 ln->ln_asked = 0; 3490 ln->ln_state = ND6_LLINFO_DELAY; 3491 ln_setexpire(ln, timenow + nd6_delay); 3492 /* N.B.: we will re-arm the timer below. */ 3493 _CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE); 3494 } 3495 3496 /* 3497 * If the neighbor cache entry has a state other than INCOMPLETE 3498 * (i.e. its link-layer address is already resolved), just 3499 * send the packet. 3500 */ 3501 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) { 3502 RT_UNLOCK(rt); 3503 /* 3504 * Move this entry to the head of the queue so that it is 3505 * less likely for this entry to be a target of forced 3506 * garbage collection (see nd6_rtrequest()). Do this only 3507 * if the entry is non-permanent (as permanent ones will 3508 * never be purged), and if the number of active entries 3509 * is at least half of the threshold. 3510 */ 3511 if (ln->ln_state == ND6_LLINFO_DELAY || 3512 (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && 3513 nd6_inuse >= (ip6_neighborgcthresh >> 1))) { 3514 lck_mtx_lock(rnh_lock); 3515 if (ln->ln_state == ND6_LLINFO_DELAY) 3516 nd6_sched_timeout(NULL, NULL); 3517 if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && 3518 nd6_inuse >= (ip6_neighborgcthresh >> 1)) { 3519 RT_LOCK_SPIN(rt); 3520 if (ln->ln_flags & ND6_LNF_IN_USE) { 3521 LN_DEQUEUE(ln); 3522 LN_INSERTHEAD(ln); 3523 } 3524 RT_UNLOCK(rt); 3525 } 3526 lck_mtx_unlock(rnh_lock); 3527 } 3528 goto sendpkt; 3529 } 3530 3531 /* 3532 * If this is a prefix proxy route, record the inbound interface 3533 * so that it can be excluded from the list of interfaces eligible 3534 * for forwarding the proxied NS in nd6_prproxy_ns_output(). 3535 */ 3536 if (rt->rt_flags & RTF_PROXY) 3537 ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp); 3538 3539 /* 3540 * There is a neighbor cache entry, but no ethernet address 3541 * response yet. Replace the held mbuf (if any) with this 3542 * latest one. 3543 * 3544 * This code conforms to the rate-limiting rule described in Section 3545 * 7.2.2 of RFC 4861, because the timer is set correctly after sending 3546 * an NS below. 3547 */ 3548 if (ln->ln_state == ND6_LLINFO_NOSTATE) 3549 ln->ln_state = ND6_LLINFO_INCOMPLETE; 3550 if (ln->ln_hold) 3551 m_freem(ln->ln_hold); 3552 ln->ln_hold = m; 3553 if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries && 3554 ln->ln_expire <= timenow) { 3555 ln->ln_asked++; 3556 lck_rw_lock_shared(nd_if_rwlock); 3557 ndi = ND_IFINFO(ifp); 3558 VERIFY(ndi != NULL && ndi->initialized); 3559 lck_mtx_lock(&ndi->lock); 3560 ln_setexpire(ln, timenow + ndi->retrans / 1000); 3561 lck_mtx_unlock(&ndi->lock); 3562 lck_rw_done(nd_if_rwlock); 3563 RT_UNLOCK(rt); 3564 /* We still have a reference on rt (for ln) */ 3565 if (ip6_forwarding) 3566 nd6_prproxy_ns_output(ifp, origifp, NULL, 3567 &dst->sin6_addr, ln); 3568 else 3569 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); 3570 lck_mtx_lock(rnh_lock); 3571 nd6_sched_timeout(NULL, NULL); 3572 lck_mtx_unlock(rnh_lock); 3573 } else { 3574 RT_UNLOCK(rt); 3575 } 3576 /* 3577 * Move this entry to the head of the queue so that it is 3578 * less likely for this entry to be a target of forced 3579 * garbage collection (see nd6_rtrequest()). Do this only 3580 * if the entry is non-permanent (as permanent ones will 3581 * never be purged), and if the number of active entries 3582 * is at least half of the threshold. 3583 */ 3584 if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && 3585 nd6_inuse >= (ip6_neighborgcthresh >> 1)) { 3586 lck_mtx_lock(rnh_lock); 3587 RT_LOCK_SPIN(rt); 3588 if (ln->ln_flags & ND6_LNF_IN_USE) { 3589 LN_DEQUEUE(ln); 3590 LN_INSERTHEAD(ln); 3591 } 3592 /* Clean up "rt" now while we can */ 3593 if (rt == hint0) { 3594 RT_REMREF_LOCKED(rt); 3595 RT_UNLOCK(rt); 3596 } else { 3597 RT_UNLOCK(rt); 3598 rtfree_locked(rt); 3599 } 3600 rt = NULL; /* "rt" has been taken care of */ 3601 lck_mtx_unlock(rnh_lock); 3602 } 3603 error = 0; 3604 goto release; 3605 3606sendpkt: 3607 if (rt != NULL) 3608 RT_LOCK_ASSERT_NOTHELD(rt); 3609 3610 /* discard the packet if IPv6 operation is disabled on the interface */ 3611 if (ifp->if_eflags & IFEF_IPV6_DISABLED) { 3612 error = ENETDOWN; /* better error? */ 3613 goto bad; 3614 } 3615 3616 if (ifp->if_flags & IFF_LOOPBACK) { 3617 /* forwarding rules require the original scope_id */ 3618 m->m_pkthdr.rcvif = origifp; 3619 error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, 3620 SA(dst), 0, adv); 3621 goto release; 3622 } else { 3623 /* Do not allow loopback address to wind up on a wire */ 3624 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 3625 3626 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) || 3627 IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { 3628 ip6stat.ip6s_badscope++; 3629 error = EADDRNOTAVAIL; 3630 goto bad; 3631 } 3632 } 3633 3634 if (rt != NULL) { 3635 RT_LOCK_SPIN(rt); 3636 /* Mark use timestamp */ 3637 if (rt->rt_llinfo != NULL) 3638 nd6_llreach_use(rt->rt_llinfo); 3639 RT_UNLOCK(rt); 3640 } 3641 3642 if (hint != NULL && nstat_collect) { 3643 int scnt; 3644 3645 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && 3646 (m->m_pkthdr.tso_segsz > 0)) 3647 scnt = m->m_pkthdr.len / m->m_pkthdr.tso_segsz; 3648 else 3649 scnt = 1; 3650 3651 nstat_route_tx(hint, scnt, m->m_pkthdr.len, 0); 3652 } 3653 3654 m->m_pkthdr.rcvif = NULL; 3655 error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, SA(dst), 0, adv); 3656 goto release; 3657 3658bad: 3659 if (m != NULL) 3660 m_freem(m); 3661 3662release: 3663 /* Clean up "rt" unless it's already been done */ 3664 if (rt != NULL) { 3665 RT_LOCK_SPIN(rt); 3666 if (rt == hint0) { 3667 RT_REMREF_LOCKED(rt); 3668 RT_UNLOCK(rt); 3669 } else { 3670 RT_UNLOCK(rt); 3671 rtfree(rt); 3672 } 3673 } 3674 /* And now clean up "rtrele" if there is any */ 3675 if (rtrele != NULL) { 3676 RT_LOCK_SPIN(rtrele); 3677 if (rtrele == hint0) { 3678 RT_REMREF_LOCKED(rtrele); 3679 RT_UNLOCK(rtrele); 3680 } else { 3681 RT_UNLOCK(rtrele); 3682 rtfree(rtrele); 3683 } 3684 } 3685 return (error); 3686} 3687#undef senderr 3688 3689int 3690nd6_need_cache(struct ifnet *ifp) 3691{ 3692 /* 3693 * XXX: we currently do not make neighbor cache on any interface 3694 * other than ARCnet, Ethernet, FDDI and GIF. 3695 * 3696 * RFC2893 says: 3697 * - unidirectional tunnels needs no ND 3698 */ 3699 switch (ifp->if_type) { 3700 case IFT_ARCNET: 3701 case IFT_ETHER: 3702 case IFT_FDDI: 3703 case IFT_IEEE1394: 3704 case IFT_L2VLAN: 3705 case IFT_IEEE8023ADLAG: 3706#if IFT_IEEE80211 3707 case IFT_IEEE80211: 3708#endif 3709 case IFT_GIF: /* XXX need more cases? */ 3710 case IFT_PPP: 3711#if IFT_TUNNEL 3712 case IFT_TUNNEL: 3713#endif 3714 case IFT_BRIDGE: 3715 case IFT_CELLULAR: 3716 return (1); 3717 default: 3718 return (0); 3719 } 3720} 3721 3722int 3723nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, 3724 struct sockaddr *dst, u_char *desten) 3725{ 3726 int i; 3727 struct sockaddr_dl *sdl; 3728 3729 if (m->m_flags & M_MCAST) { 3730 switch (ifp->if_type) { 3731 case IFT_ETHER: 3732 case IFT_FDDI: 3733 case IFT_L2VLAN: 3734 case IFT_IEEE8023ADLAG: 3735#if IFT_IEEE80211 3736 case IFT_IEEE80211: 3737#endif 3738 case IFT_BRIDGE: 3739 ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten); 3740 return (1); 3741 case IFT_IEEE1394: 3742 for (i = 0; i < ifp->if_addrlen; i++) 3743 desten[i] = ~0; 3744 return (1); 3745 case IFT_ARCNET: 3746 *desten = 0; 3747 return (1); 3748 default: 3749 return (0); /* caller will free mbuf */ 3750 } 3751 } 3752 3753 if (rt == NULL) { 3754 /* this could happen, if we could not allocate memory */ 3755 return (0); /* caller will free mbuf */ 3756 } 3757 RT_LOCK(rt); 3758 if (rt->rt_gateway->sa_family != AF_LINK) { 3759 printf("nd6_storelladdr: something odd happens\n"); 3760 RT_UNLOCK(rt); 3761 return (0); /* caller will free mbuf */ 3762 } 3763 sdl = SDL(rt->rt_gateway); 3764 if (sdl->sdl_alen == 0) { 3765 /* this should be impossible, but we bark here for debugging */ 3766 printf("nd6_storelladdr: sdl_alen == 0\n"); 3767 RT_UNLOCK(rt); 3768 return (0); /* caller will free mbuf */ 3769 } 3770 3771 bcopy(LLADDR(sdl), desten, sdl->sdl_alen); 3772 RT_UNLOCK(rt); 3773 return (1); 3774} 3775 3776/* 3777 * This is the ND pre-output routine; care must be taken to ensure that 3778 * the "hint" route never gets freed via rtfree(), since the caller may 3779 * have stored it inside a struct route with a reference held for that 3780 * placeholder. 3781 */ 3782errno_t 3783nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest, 3784 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, 3785 mbuf_t packet) 3786{ 3787 route_t route = hint; 3788 errno_t result = 0; 3789 struct sockaddr_dl *sdl = NULL; 3790 size_t copy_len; 3791 3792 if (ip6_dest->sin6_family != AF_INET6) 3793 return (EAFNOSUPPORT); 3794 3795 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) 3796 return (ENETDOWN); 3797 3798 if (hint != NULL) { 3799 /* 3800 * Callee holds a reference on the route and returns 3801 * with the route entry locked, upon success. 3802 */ 3803 result = route_to_gwroute((const struct sockaddr *)ip6_dest, 3804 hint, &route); 3805 if (result != 0) 3806 return (result); 3807 if (route != NULL) 3808 RT_LOCK_ASSERT_HELD(route); 3809 } 3810 3811 if ((packet->m_flags & M_MCAST) != 0) { 3812 if (route != NULL) 3813 RT_UNLOCK(route); 3814 result = dlil_resolve_multi(ifp, 3815 (const struct sockaddr *)ip6_dest, 3816 SA(ll_dest), ll_dest_len); 3817 if (route != NULL) 3818 RT_LOCK(route); 3819 goto release; 3820 } 3821 3822 if (route == NULL) { 3823 /* 3824 * This could happen, if we could not allocate memory or 3825 * if route_to_gwroute() didn't return a route. 3826 */ 3827 result = ENOBUFS; 3828 goto release; 3829 } 3830 3831 if (route->rt_gateway->sa_family != AF_LINK) { 3832 printf("%s: route %s on %s%d gateway address not AF_LINK\n", 3833 __func__, ip6_sprintf(&ip6_dest->sin6_addr), 3834 route->rt_ifp->if_name, route->rt_ifp->if_unit); 3835 result = EADDRNOTAVAIL; 3836 goto release; 3837 } 3838 3839 sdl = SDL(route->rt_gateway); 3840 if (sdl->sdl_alen == 0) { 3841 /* this should be impossible, but we bark here for debugging */ 3842 printf("%s: route %s on %s%d sdl_alen == 0\n", __func__, 3843 ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name, 3844 route->rt_ifp->if_unit); 3845 result = EHOSTUNREACH; 3846 goto release; 3847 } 3848 3849 copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len; 3850 bcopy(sdl, ll_dest, copy_len); 3851 3852release: 3853 if (route != NULL) { 3854 if (route == hint) { 3855 RT_REMREF_LOCKED(route); 3856 RT_UNLOCK(route); 3857 } else { 3858 RT_UNLOCK(route); 3859 rtfree(route); 3860 } 3861 } 3862 return (result); 3863} 3864 3865int 3866nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after) 3867{ 3868 uint32_t b, a; 3869 int err = 0; 3870 3871 /* 3872 * Handle ND6_IFF_IFDISABLED 3873 */ 3874 if ((before & ND6_IFF_IFDISABLED) || 3875 (after & ND6_IFF_IFDISABLED)) { 3876 b = (before & ND6_IFF_IFDISABLED); 3877 a = (after & ND6_IFF_IFDISABLED); 3878 3879 if (b != a && (err = nd6_if_disable(ifp, 3880 ((int32_t)(a - b) > 0))) != 0) 3881 goto done; 3882 } 3883 3884 /* 3885 * Handle ND6_IFF_PROXY_PREFIXES 3886 */ 3887 if ((before & ND6_IFF_PROXY_PREFIXES) || 3888 (after & ND6_IFF_PROXY_PREFIXES)) { 3889 b = (before & ND6_IFF_PROXY_PREFIXES); 3890 a = (after & ND6_IFF_PROXY_PREFIXES); 3891 3892 if (b != a && (err = nd6_if_prproxy(ifp, 3893 ((int32_t)(a - b) > 0))) != 0) 3894 goto done; 3895 } 3896done: 3897 return (err); 3898} 3899 3900/* 3901 * Enable/disable IPv6 on an interface, called as part of 3902 * setting/clearing ND6_IFF_IFDISABLED, or during DAD failure. 3903 */ 3904int 3905nd6_if_disable(struct ifnet *ifp, boolean_t enable) 3906{ 3907 ifnet_lock_shared(ifp); 3908 if (enable) 3909 ifp->if_eflags |= IFEF_IPV6_DISABLED; 3910 else 3911 ifp->if_eflags &= ~IFEF_IPV6_DISABLED; 3912 ifnet_lock_done(ifp); 3913 3914 return (0); 3915} 3916 3917static int 3918nd6_sysctl_drlist SYSCTL_HANDLER_ARGS 3919{ 3920#pragma unused(oidp, arg1, arg2) 3921 char pbuf[MAX_IPv6_STR_LEN]; 3922 struct nd_defrouter *dr; 3923 int error = 0; 3924 3925 if (req->newptr != USER_ADDR_NULL) 3926 return (EPERM); 3927 3928 lck_mtx_lock(nd6_mutex); 3929 if (proc_is64bit(req->p)) { 3930 struct in6_defrouter_64 d; 3931 3932 bzero(&d, sizeof (d)); 3933 d.rtaddr.sin6_family = AF_INET6; 3934 d.rtaddr.sin6_len = sizeof (d.rtaddr); 3935 3936 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) { 3937 d.rtaddr.sin6_addr = dr->rtaddr; 3938 if (in6_recoverscope(&d.rtaddr, 3939 &dr->rtaddr, dr->ifp) != 0) 3940 log(LOG_ERR, "scope error in default router " 3941 "list (%s)\n", inet_ntop(AF_INET6, 3942 &dr->rtaddr, pbuf, sizeof (pbuf))); 3943 d.flags = dr->flags; 3944 d.stateflags = dr->stateflags; 3945 d.stateflags &= ~NDDRF_PROCESSED; 3946 d.rtlifetime = dr->rtlifetime; 3947 d.expire = nddr_getexpire(dr); 3948 d.if_index = dr->ifp->if_index; 3949 error = SYSCTL_OUT(req, &d, sizeof (d)); 3950 if (error != 0) 3951 break; 3952 } 3953 } else { 3954 struct in6_defrouter_32 d; 3955 3956 bzero(&d, sizeof (d)); 3957 d.rtaddr.sin6_family = AF_INET6; 3958 d.rtaddr.sin6_len = sizeof (d.rtaddr); 3959 3960 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) { 3961 d.rtaddr.sin6_addr = dr->rtaddr; 3962 if (in6_recoverscope(&d.rtaddr, 3963 &dr->rtaddr, dr->ifp) != 0) 3964 log(LOG_ERR, "scope error in default router " 3965 "list (%s)\n", inet_ntop(AF_INET6, 3966 &dr->rtaddr, pbuf, sizeof (pbuf))); 3967 d.flags = dr->flags; 3968 d.stateflags = dr->stateflags; 3969 d.stateflags &= ~NDDRF_PROCESSED; 3970 d.rtlifetime = dr->rtlifetime; 3971 d.expire = nddr_getexpire(dr); 3972 d.if_index = dr->ifp->if_index; 3973 error = SYSCTL_OUT(req, &d, sizeof (d)); 3974 if (error != 0) 3975 break; 3976 } 3977 } 3978 lck_mtx_unlock(nd6_mutex); 3979 return (error); 3980} 3981 3982static int 3983nd6_sysctl_prlist SYSCTL_HANDLER_ARGS 3984{ 3985#pragma unused(oidp, arg1, arg2) 3986 char pbuf[MAX_IPv6_STR_LEN]; 3987 struct nd_pfxrouter *pfr; 3988 struct sockaddr_in6 s6; 3989 struct nd_prefix *pr; 3990 int error = 0; 3991 3992 if (req->newptr != USER_ADDR_NULL) 3993 return (EPERM); 3994 3995 bzero(&s6, sizeof (s6)); 3996 s6.sin6_family = AF_INET6; 3997 s6.sin6_len = sizeof (s6); 3998 3999 lck_mtx_lock(nd6_mutex); 4000 if (proc_is64bit(req->p)) { 4001 struct in6_prefix_64 p; 4002 4003 bzero(&p, sizeof (p)); 4004 p.origin = PR_ORIG_RA; 4005 4006 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { 4007 NDPR_LOCK(pr); 4008 p.prefix = pr->ndpr_prefix; 4009 if (in6_recoverscope(&p.prefix, 4010 &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0) 4011 log(LOG_ERR, "scope error in " 4012 "prefix list (%s)\n", inet_ntop(AF_INET6, 4013 &p.prefix.sin6_addr, pbuf, sizeof (pbuf))); 4014 p.raflags = pr->ndpr_raf; 4015 p.prefixlen = pr->ndpr_plen; 4016 p.vltime = pr->ndpr_vltime; 4017 p.pltime = pr->ndpr_pltime; 4018 p.if_index = pr->ndpr_ifp->if_index; 4019 p.expire = ndpr_getexpire(pr); 4020 p.refcnt = pr->ndpr_addrcnt; 4021 p.flags = pr->ndpr_stateflags; 4022 p.advrtrs = 0; 4023 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) 4024 p.advrtrs++; 4025 error = SYSCTL_OUT(req, &p, sizeof (p)); 4026 if (error != 0) { 4027 NDPR_UNLOCK(pr); 4028 break; 4029 } 4030 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { 4031 s6.sin6_addr = pfr->router->rtaddr; 4032 if (in6_recoverscope(&s6, &pfr->router->rtaddr, 4033 pfr->router->ifp) != 0) 4034 log(LOG_ERR, 4035 "scope error in prefix list (%s)\n", 4036 inet_ntop(AF_INET6, &s6.sin6_addr, 4037 pbuf, sizeof (pbuf))); 4038 error = SYSCTL_OUT(req, &s6, sizeof (s6)); 4039 if (error != 0) 4040 break; 4041 } 4042 NDPR_UNLOCK(pr); 4043 if (error != 0) 4044 break; 4045 } 4046 } else { 4047 struct in6_prefix_32 p; 4048 4049 bzero(&p, sizeof (p)); 4050 p.origin = PR_ORIG_RA; 4051 4052 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { 4053 NDPR_LOCK(pr); 4054 p.prefix = pr->ndpr_prefix; 4055 if (in6_recoverscope(&p.prefix, 4056 &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0) 4057 log(LOG_ERR, 4058 "scope error in prefix list (%s)\n", 4059 inet_ntop(AF_INET6, &p.prefix.sin6_addr, 4060 pbuf, sizeof (pbuf))); 4061 p.raflags = pr->ndpr_raf; 4062 p.prefixlen = pr->ndpr_plen; 4063 p.vltime = pr->ndpr_vltime; 4064 p.pltime = pr->ndpr_pltime; 4065 p.if_index = pr->ndpr_ifp->if_index; 4066 p.expire = ndpr_getexpire(pr); 4067 p.refcnt = pr->ndpr_addrcnt; 4068 p.flags = pr->ndpr_stateflags; 4069 p.advrtrs = 0; 4070 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) 4071 p.advrtrs++; 4072 error = SYSCTL_OUT(req, &p, sizeof (p)); 4073 if (error != 0) { 4074 NDPR_UNLOCK(pr); 4075 break; 4076 } 4077 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { 4078 s6.sin6_addr = pfr->router->rtaddr; 4079 if (in6_recoverscope(&s6, &pfr->router->rtaddr, 4080 pfr->router->ifp) != 0) 4081 log(LOG_ERR, 4082 "scope error in prefix list (%s)\n", 4083 inet_ntop(AF_INET6, &s6.sin6_addr, 4084 pbuf, sizeof (pbuf))); 4085 error = SYSCTL_OUT(req, &s6, sizeof (s6)); 4086 if (error != 0) 4087 break; 4088 } 4089 NDPR_UNLOCK(pr); 4090 if (error != 0) 4091 break; 4092 } 4093 } 4094 lck_mtx_unlock(nd6_mutex); 4095 4096 return (error); 4097} 4098