1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
32 */
33
34#include <sys/cdefs.h>
35#include "opt_inet.h"
36#include "opt_inet6.h"
37#include "opt_route.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/eventhandler.h>
42#include <sys/callout.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/mutex.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/time.h>
50#include <sys/kernel.h>
51#include <sys/protosw.h>
52#include <sys/errno.h>
53#include <sys/syslog.h>
54#include <sys/rwlock.h>
55#include <sys/queue.h>
56#include <sys/sdt.h>
57#include <sys/sysctl.h>
58
59#include <net/if.h>
60#include <net/if_var.h>
61#include <net/if_dl.h>
62#include <net/if_private.h>
63#include <net/if_types.h>
64#include <net/route.h>
65#include <net/route/route_ctl.h>
66#include <net/route/nhop.h>
67#include <net/vnet.h>
68
69#include <netinet/in.h>
70#include <netinet/in_kdtrace.h>
71#include <net/if_llatbl.h>
72#include <netinet/if_ether.h>
73#include <netinet6/in6_fib.h>
74#include <netinet6/in6_var.h>
75#include <netinet/ip6.h>
76#include <netinet6/ip6_var.h>
77#include <netinet6/scope6_var.h>
78#include <netinet6/nd6.h>
79#include <netinet6/in6_ifattach.h>
80#include <netinet/icmp6.h>
81#include <netinet6/send.h>
82
83#include <sys/limits.h>
84
85#include <security/mac/mac_framework.h>
86
87#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
88#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
89
90#define SIN6(s) ((const struct sockaddr_in6 *)(s))
91
92MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
93
94VNET_DEFINE_STATIC(int, nd6_prune) = 1;
95#define	V_nd6_prune	VNET(nd6_prune)
96SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
97    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0,
98    "Frequency in seconds of checks for expired prefixes and routers");
99
100VNET_DEFINE_STATIC(int, nd6_delay) = 5;
101#define	V_nd6_delay	VNET(nd6_delay)
102SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
103    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0,
104    "Delay in seconds before probing for reachability");
105
106VNET_DEFINE_STATIC(int, nd6_umaxtries) = 3;
107#define	V_nd6_umaxtries	VNET(nd6_umaxtries)
108SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
109    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0,
110    "Number of ICMPv6 NS messages sent during reachability detection");
111
112VNET_DEFINE(int, nd6_mmaxtries) = 3;
113#define	V_nd6_mmaxtries	VNET(nd6_mmaxtries)
114SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
115    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0,
116    "Number of ICMPv6 NS messages sent during address resolution");
117
118VNET_DEFINE_STATIC(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage
119							* collection timer */
120#define	V_nd6_gctimer	VNET(nd6_gctimer)
121
122/* preventing too many loops in ND option parsing */
123VNET_DEFINE_STATIC(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
124
125VNET_DEFINE_STATIC(int, nd6_maxqueuelen) = 16; /* max pkts cached in unresolved
126					 * ND entries */
127#define	V_nd6_maxndopt			VNET(nd6_maxndopt)
128#define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
129
130#ifdef ND6_DEBUG
131VNET_DEFINE(int, nd6_debug) = 1;
132#else
133VNET_DEFINE(int, nd6_debug) = 0;
134#endif
135#define	V_nd6_debug	VNET(nd6_debug)
136SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
137    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0,
138    "Log NDP debug messages");
139
140static eventhandler_tag lle_event_eh, iflladdr_event_eh, ifnet_link_event_eh;
141
142VNET_DEFINE(struct nd_prhead, nd_prefix);
143VNET_DEFINE(struct rwlock, nd6_lock);
144VNET_DEFINE(uint64_t, nd6_list_genid);
145VNET_DEFINE(struct mtx, nd6_onlink_mtx);
146
147VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
148#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
149
150int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
151
152static bool nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
153	struct ifnet *);
154static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
155static void nd6_slowtimo(void *);
156static int regen_tmpaddr(struct in6_ifaddr *);
157static void nd6_free(struct llentry **, int);
158static void nd6_free_redirect(const struct llentry *);
159static void nd6_llinfo_timer(void *);
160static void nd6_llinfo_settimer_locked(struct llentry *, long);
161static int nd6_resolve_slow(struct ifnet *, int, int, struct mbuf *,
162    const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
163static int nd6_need_cache(struct ifnet *);
164
165VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch);
166#define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
167
168VNET_DEFINE_STATIC(struct callout, nd6_timer_ch);
169#define	V_nd6_timer_ch			VNET(nd6_timer_ch)
170
171static void
172nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
173{
174	struct rt_addrinfo rtinfo;
175	struct sockaddr_in6 dst;
176	struct sockaddr_dl gw;
177	struct ifnet *ifp;
178	int type;
179	int fibnum;
180
181	LLE_WLOCK_ASSERT(lle);
182
183	if (lltable_get_af(lle->lle_tbl) != AF_INET6)
184		return;
185
186	switch (evt) {
187	case LLENTRY_RESOLVED:
188		type = RTM_ADD;
189		KASSERT(lle->la_flags & LLE_VALID,
190		    ("%s: %p resolved but not valid?", __func__, lle));
191		break;
192	case LLENTRY_EXPIRED:
193		type = RTM_DELETE;
194		break;
195	default:
196		return;
197	}
198
199	ifp = lltable_get_ifp(lle->lle_tbl);
200
201	bzero(&dst, sizeof(dst));
202	bzero(&gw, sizeof(gw));
203	bzero(&rtinfo, sizeof(rtinfo));
204	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
205	dst.sin6_scope_id = in6_getscopezone(ifp,
206	    in6_addrscope(&dst.sin6_addr));
207	gw.sdl_len = sizeof(struct sockaddr_dl);
208	gw.sdl_family = AF_LINK;
209	gw.sdl_alen = ifp->if_addrlen;
210	gw.sdl_index = ifp->if_index;
211	gw.sdl_type = ifp->if_type;
212	if (evt == LLENTRY_RESOLVED)
213		bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
214	rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
215	rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
216	rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
217	fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ifp->if_fib;
218	rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
219	    type == RTM_ADD ? RTF_UP: 0), 0, fibnum);
220}
221
222/*
223 * A handler for interface link layer address change event.
224 */
225static void
226nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
227{
228	if (ifp->if_afdata[AF_INET6] == NULL)
229		return;
230
231	lltable_update_ifaddr(LLTABLE6(ifp));
232}
233
234void
235nd6_init(void)
236{
237
238	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
239	rw_init(&V_nd6_lock, "nd6 list");
240
241	LIST_INIT(&V_nd_prefix);
242	nd6_defrouter_init();
243
244	/* Start timers. */
245	callout_init(&V_nd6_slowtimo_ch, 1);
246	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
247	    nd6_slowtimo, curvnet);
248
249	callout_init(&V_nd6_timer_ch, 1);
250	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
251
252	nd6_dad_init();
253	if (IS_DEFAULT_VNET(curvnet)) {
254		lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
255		    NULL, EVENTHANDLER_PRI_ANY);
256		iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
257		    nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
258		ifnet_link_event_eh = EVENTHANDLER_REGISTER(ifnet_link_event,
259		    nd6_ifnet_link_event, NULL, EVENTHANDLER_PRI_ANY);
260	}
261}
262
263#ifdef VIMAGE
264void
265nd6_destroy(void)
266{
267
268	callout_drain(&V_nd6_slowtimo_ch);
269	callout_drain(&V_nd6_timer_ch);
270	if (IS_DEFAULT_VNET(curvnet)) {
271		EVENTHANDLER_DEREGISTER(ifnet_link_event, ifnet_link_event_eh);
272		EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
273		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
274	}
275	rw_destroy(&V_nd6_lock);
276	mtx_destroy(&V_nd6_onlink_mtx);
277}
278#endif
279
280struct nd_ifinfo *
281nd6_ifattach(struct ifnet *ifp)
282{
283	struct nd_ifinfo *nd;
284
285	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
286	nd->initialized = 1;
287
288	nd->chlim = IPV6_DEFHLIM;
289	nd->basereachable = REACHABLE_TIME;
290	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
291	nd->retrans = RETRANS_TIMER;
292
293	nd->flags = ND6_IFF_PERFORMNUD;
294
295	/* Set IPv6 disabled on all interfaces but loopback by default. */
296	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
297		nd->flags |= ND6_IFF_IFDISABLED;
298
299	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
300	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
301	 * default regardless of the V_ip6_auto_linklocal configuration to
302	 * give a reasonable default behavior.
303	 */
304	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE &&
305	    ifp->if_type != IFT_WIREGUARD) || (ifp->if_flags & IFF_LOOPBACK))
306		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
307	/*
308	 * A loopback interface does not need to accept RTADV.
309	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
310	 * default regardless of the V_ip6_accept_rtadv configuration to
311	 * prevent the interface from accepting RA messages arrived
312	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
313	 */
314	if (V_ip6_accept_rtadv &&
315	    !(ifp->if_flags & IFF_LOOPBACK) &&
316	    (ifp->if_type != IFT_BRIDGE)) {
317			nd->flags |= ND6_IFF_ACCEPT_RTADV;
318			/* If we globally accept rtadv, assume IPv6 on. */
319			nd->flags &= ~ND6_IFF_IFDISABLED;
320	}
321	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
322		nd->flags |= ND6_IFF_NO_RADR;
323
324	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
325	nd6_setmtu0(ifp, nd);
326
327	return nd;
328}
329
330void
331nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
332{
333	struct epoch_tracker et;
334	struct ifaddr *ifa, *next;
335
336	NET_EPOCH_ENTER(et);
337	CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
338		if (ifa->ifa_addr->sa_family != AF_INET6)
339			continue;
340
341		/* stop DAD processing */
342		nd6_dad_stop(ifa);
343	}
344	NET_EPOCH_EXIT(et);
345
346	free(nd, M_IP6NDP);
347}
348
349/*
350 * Reset ND level link MTU. This function is called when the physical MTU
351 * changes, which means we might have to adjust the ND level MTU.
352 */
353void
354nd6_setmtu(struct ifnet *ifp)
355{
356	if (ifp->if_afdata[AF_INET6] == NULL)
357		return;
358
359	nd6_setmtu0(ifp, ND_IFINFO(ifp));
360}
361
362/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
363void
364nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
365{
366	u_int32_t omaxmtu;
367
368	omaxmtu = ndi->maxmtu;
369	ndi->maxmtu = ifp->if_mtu;
370
371	/*
372	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
373	 * undesirable situation.  We thus notify the operator of the change
374	 * explicitly.  The check for omaxmtu is necessary to restrict the
375	 * log to the case of changing the MTU, not initializing it.
376	 */
377	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
378		log(LOG_NOTICE, "nd6_setmtu0: "
379		    "new link MTU on %s (%lu) is too small for IPv6\n",
380		    if_name(ifp), (unsigned long)ndi->maxmtu);
381	}
382
383	if (ndi->maxmtu > V_in6_maxmtu)
384		in6_setmaxmtu(); /* check all interfaces just in case */
385
386}
387
388void
389nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
390{
391
392	bzero(ndopts, sizeof(*ndopts));
393	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
394	ndopts->nd_opts_last
395		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
396
397	if (icmp6len == 0) {
398		ndopts->nd_opts_done = 1;
399		ndopts->nd_opts_search = NULL;
400	}
401}
402
403/*
404 * Take one ND option.
405 */
406struct nd_opt_hdr *
407nd6_option(union nd_opts *ndopts)
408{
409	struct nd_opt_hdr *nd_opt;
410	int olen;
411
412	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
413	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
414	    __func__));
415	if (ndopts->nd_opts_search == NULL)
416		return NULL;
417	if (ndopts->nd_opts_done)
418		return NULL;
419
420	nd_opt = ndopts->nd_opts_search;
421
422	/* make sure nd_opt_len is inside the buffer */
423	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
424		bzero(ndopts, sizeof(*ndopts));
425		return NULL;
426	}
427
428	olen = nd_opt->nd_opt_len << 3;
429	if (olen == 0) {
430		/*
431		 * Message validation requires that all included
432		 * options have a length that is greater than zero.
433		 */
434		bzero(ndopts, sizeof(*ndopts));
435		return NULL;
436	}
437
438	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
439	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
440		/* option overruns the end of buffer, invalid */
441		bzero(ndopts, sizeof(*ndopts));
442		return NULL;
443	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
444		/* reached the end of options chain */
445		ndopts->nd_opts_done = 1;
446		ndopts->nd_opts_search = NULL;
447	}
448	return nd_opt;
449}
450
451/*
452 * Parse multiple ND options.
453 * This function is much easier to use, for ND routines that do not need
454 * multiple options of the same type.
455 */
456int
457nd6_options(union nd_opts *ndopts)
458{
459	struct nd_opt_hdr *nd_opt;
460	int i = 0;
461
462	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
463	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
464	    __func__));
465	if (ndopts->nd_opts_search == NULL)
466		return 0;
467
468	while (1) {
469		nd_opt = nd6_option(ndopts);
470		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
471			/*
472			 * Message validation requires that all included
473			 * options have a length that is greater than zero.
474			 */
475			ICMP6STAT_INC(icp6s_nd_badopt);
476			bzero(ndopts, sizeof(*ndopts));
477			return -1;
478		}
479
480		if (nd_opt == NULL)
481			goto skip1;
482
483		switch (nd_opt->nd_opt_type) {
484		case ND_OPT_SOURCE_LINKADDR:
485		case ND_OPT_TARGET_LINKADDR:
486		case ND_OPT_MTU:
487		case ND_OPT_REDIRECTED_HEADER:
488		case ND_OPT_NONCE:
489			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
490				nd6log((LOG_INFO,
491				    "duplicated ND6 option found (type=%d)\n",
492				    nd_opt->nd_opt_type));
493				/* XXX bark? */
494			} else {
495				ndopts->nd_opt_array[nd_opt->nd_opt_type]
496					= nd_opt;
497			}
498			break;
499		case ND_OPT_PREFIX_INFORMATION:
500			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
501				ndopts->nd_opt_array[nd_opt->nd_opt_type]
502					= nd_opt;
503			}
504			ndopts->nd_opts_pi_end =
505				(struct nd_opt_prefix_info *)nd_opt;
506			break;
507		/* What about ND_OPT_ROUTE_INFO? RFC 4191 */
508		case ND_OPT_RDNSS:	/* RFC 6106 */
509		case ND_OPT_DNSSL:	/* RFC 6106 */
510			/*
511			 * Silently ignore options we know and do not care about
512			 * in the kernel.
513			 */
514			break;
515		default:
516			/*
517			 * Unknown options must be silently ignored,
518			 * to accommodate future extension to the protocol.
519			 */
520			nd6log((LOG_DEBUG,
521			    "nd6_options: unsupported option %d - "
522			    "option ignored\n", nd_opt->nd_opt_type));
523		}
524
525skip1:
526		i++;
527		if (i > V_nd6_maxndopt) {
528			ICMP6STAT_INC(icp6s_nd_toomanyopt);
529			nd6log((LOG_INFO, "too many loop in nd opt\n"));
530			break;
531		}
532
533		if (ndopts->nd_opts_done)
534			break;
535	}
536
537	return 0;
538}
539
540/*
541 * ND6 timer routine to handle ND6 entries
542 */
543static void
544nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
545{
546	int canceled;
547
548	LLE_WLOCK_ASSERT(ln);
549
550	/* Do not schedule timers for child LLEs. */
551	if (ln->la_flags & LLE_CHILD)
552		return;
553
554	if (tick < 0) {
555		ln->la_expire = 0;
556		ln->ln_ntick = 0;
557		canceled = callout_stop(&ln->lle_timer);
558	} else {
559		ln->la_expire = time_uptime + tick / hz;
560		LLE_ADDREF(ln);
561		if (tick > INT_MAX) {
562			ln->ln_ntick = tick - INT_MAX;
563			canceled = callout_reset(&ln->lle_timer, INT_MAX,
564			    nd6_llinfo_timer, ln);
565		} else {
566			ln->ln_ntick = 0;
567			canceled = callout_reset(&ln->lle_timer, tick,
568			    nd6_llinfo_timer, ln);
569		}
570	}
571	if (canceled > 0)
572		LLE_REMREF(ln);
573}
574
575/*
576 * Gets source address of the first packet in hold queue
577 * and stores it in @src.
578 * Returns pointer to @src (if hold queue is not empty) or NULL.
579 *
580 * Set noinline to be dtrace-friendly
581 */
582static __noinline struct in6_addr *
583nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
584{
585	struct ip6_hdr hdr;
586	struct mbuf *m;
587
588	if (ln->la_hold == NULL)
589		return (NULL);
590
591	/*
592	 * assume every packet in la_hold has the same IP header
593	 */
594	m = ln->la_hold;
595	if (sizeof(hdr) > m->m_len)
596		return (NULL);
597
598	m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
599	*src = hdr.ip6_src;
600
601	return (src);
602}
603
604/*
605 * Checks if we need to switch from STALE state.
606 *
607 * RFC 4861 requires switching from STALE to DELAY state
608 * on first packet matching entry, waiting V_nd6_delay and
609 * transition to PROBE state (if upper layer confirmation was
610 * not received).
611 *
612 * This code performs a bit differently:
613 * On packet hit we don't change state (but desired state
614 * can be guessed by control plane). However, after V_nd6_delay
615 * seconds code will transition to PROBE state (so DELAY state
616 * is kinda skipped in most situations).
617 *
618 * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
619 * we perform the following upon entering STALE state:
620 *
621 * 1) Arm timer to run each V_nd6_delay seconds to make sure that
622 * if packet was transmitted at the start of given interval, we
623 * would be able to switch to PROBE state in V_nd6_delay seconds
624 * as user expects.
625 *
626 * 2) Reschedule timer until original V_nd6_gctimer expires keeping
627 * lle in STALE state (remaining timer value stored in lle_remtime).
628 *
629 * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
630 * seconds ago.
631 *
632 * Returns non-zero value if the entry is still STALE (storing
633 * the next timer interval in @pdelay).
634 *
635 * Returns zero value if original timer expired or we need to switch to
636 * PROBE (store that in @do_switch variable).
637 */
638static int
639nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
640{
641	int nd_delay, nd_gctimer;
642	time_t lle_hittime;
643	long delay;
644
645	*do_switch = 0;
646	nd_gctimer = V_nd6_gctimer;
647	nd_delay = V_nd6_delay;
648
649	lle_hittime = llentry_get_hittime(lle);
650
651	if (lle_hittime == 0) {
652		/*
653		 * Datapath feedback has been requested upon entering
654		 * STALE state. No packets has been passed using this lle.
655		 * Ask for the timer reschedule and keep STALE state.
656		 */
657		delay = (long)(MIN(nd_gctimer, nd_delay));
658		delay *= hz;
659		if (lle->lle_remtime > delay)
660			lle->lle_remtime -= delay;
661		else {
662			delay = lle->lle_remtime;
663			lle->lle_remtime = 0;
664		}
665
666		if (delay == 0) {
667			/*
668			 * The original ng6_gctime timeout ended,
669			 * no more rescheduling.
670			 */
671			return (0);
672		}
673
674		*pdelay = delay;
675		return (1);
676	}
677
678	/*
679	 * Packet received. Verify timestamp
680	 */
681	delay = (long)(time_uptime - lle_hittime);
682	if (delay < nd_delay) {
683		/*
684		 * V_nd6_delay still not passed since the first
685		 * hit in STALE state.
686		 * Reschedule timer and return.
687		 */
688		*pdelay = (long)(nd_delay - delay) * hz;
689		return (1);
690	}
691
692	/* Request switching to probe */
693	*do_switch = 1;
694	return (0);
695}
696
697/*
698 * Switch @lle state to new state optionally arming timers.
699 *
700 * Set noinline to be dtrace-friendly
701 */
702__noinline void
703nd6_llinfo_setstate(struct llentry *lle, int newstate)
704{
705	struct ifnet *ifp;
706	int nd_gctimer, nd_delay;
707	long delay, remtime;
708
709	delay = 0;
710	remtime = 0;
711
712	switch (newstate) {
713	case ND6_LLINFO_INCOMPLETE:
714		ifp = lle->lle_tbl->llt_ifp;
715		delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
716		break;
717	case ND6_LLINFO_REACHABLE:
718		if (!ND6_LLINFO_PERMANENT(lle)) {
719			ifp = lle->lle_tbl->llt_ifp;
720			delay = (long)ND_IFINFO(ifp)->reachable * hz;
721		}
722		break;
723	case ND6_LLINFO_STALE:
724
725		llentry_request_feedback(lle);
726		nd_delay = V_nd6_delay;
727		nd_gctimer = V_nd6_gctimer;
728
729		delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
730		remtime = (long)nd_gctimer * hz - delay;
731		break;
732	case ND6_LLINFO_DELAY:
733		lle->la_asked = 0;
734		delay = (long)V_nd6_delay * hz;
735		break;
736	}
737
738	if (delay > 0)
739		nd6_llinfo_settimer_locked(lle, delay);
740
741	lle->lle_remtime = remtime;
742	lle->ln_state = newstate;
743}
744
745/*
746 * Timer-dependent part of nd state machine.
747 *
748 * Set noinline to be dtrace-friendly
749 */
750static __noinline void
751nd6_llinfo_timer(void *arg)
752{
753	struct epoch_tracker et;
754	struct llentry *ln;
755	struct in6_addr *dst, *pdst, *psrc, src;
756	struct ifnet *ifp;
757	struct nd_ifinfo *ndi;
758	int do_switch, send_ns;
759	long delay;
760
761	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
762	ln = (struct llentry *)arg;
763	ifp = lltable_get_ifp(ln->lle_tbl);
764	CURVNET_SET(ifp->if_vnet);
765
766	ND6_RLOCK();
767	LLE_WLOCK(ln);
768	if (callout_pending(&ln->lle_timer)) {
769		/*
770		 * Here we are a bit odd here in the treatment of
771		 * active/pending. If the pending bit is set, it got
772		 * rescheduled before I ran. The active
773		 * bit we ignore, since if it was stopped
774		 * in ll_tablefree() and was currently running
775		 * it would have return 0 so the code would
776		 * not have deleted it since the callout could
777		 * not be stopped so we want to go through
778		 * with the delete here now. If the callout
779		 * was restarted, the pending bit will be back on and
780		 * we just want to bail since the callout_reset would
781		 * return 1 and our reference would have been removed
782		 * by nd6_llinfo_settimer_locked above since canceled
783		 * would have been 1.
784		 */
785		LLE_WUNLOCK(ln);
786		ND6_RUNLOCK();
787		CURVNET_RESTORE();
788		return;
789	}
790	NET_EPOCH_ENTER(et);
791	ndi = ND_IFINFO(ifp);
792	send_ns = 0;
793	dst = &ln->r_l3addr.addr6;
794	pdst = dst;
795
796	if (ln->ln_ntick > 0) {
797		if (ln->ln_ntick > INT_MAX) {
798			ln->ln_ntick -= INT_MAX;
799			nd6_llinfo_settimer_locked(ln, INT_MAX);
800		} else {
801			ln->ln_ntick = 0;
802			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
803		}
804		goto done;
805	}
806
807	if (ln->la_flags & LLE_STATIC) {
808		goto done;
809	}
810
811	if (ln->la_flags & LLE_DELETED) {
812		nd6_free(&ln, 0);
813		goto done;
814	}
815
816	switch (ln->ln_state) {
817	case ND6_LLINFO_INCOMPLETE:
818		if (ln->la_asked < V_nd6_mmaxtries) {
819			ln->la_asked++;
820			send_ns = 1;
821			/* Send NS to multicast address */
822			pdst = NULL;
823		} else {
824			struct mbuf *m;
825
826			ICMP6STAT_ADD(icp6s_dropped, ln->la_numheld);
827
828			m = ln->la_hold;
829			if (m != NULL) {
830				/*
831				 * assuming every packet in la_hold has the
832				 * same IP header.  Send error after unlock.
833				 */
834				ln->la_hold = m->m_nextpkt;
835				m->m_nextpkt = NULL;
836				ln->la_numheld--;
837			}
838			nd6_free(&ln, 0);
839			if (m != NULL) {
840				struct mbuf *n = m;
841
842				/*
843				 * if there are any ummapped mbufs, we
844				 * must free them, rather than using
845				 * them for an ICMP, as they cannot be
846				 * checksummed.
847				 */
848				while ((n = n->m_next) != NULL) {
849					if (n->m_flags & M_EXTPG)
850						break;
851				}
852				if (n != NULL) {
853					m_freem(m);
854					m = NULL;
855				} else {
856					icmp6_error2(m, ICMP6_DST_UNREACH,
857					    ICMP6_DST_UNREACH_ADDR, 0, ifp);
858				}
859			}
860		}
861		break;
862	case ND6_LLINFO_REACHABLE:
863		if (!ND6_LLINFO_PERMANENT(ln))
864			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
865		break;
866
867	case ND6_LLINFO_STALE:
868		if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
869			/*
870			 * No packet has used this entry and GC timeout
871			 * has not been passed. Reschedule timer and
872			 * return.
873			 */
874			nd6_llinfo_settimer_locked(ln, delay);
875			break;
876		}
877
878		if (do_switch == 0) {
879			/*
880			 * GC timer has ended and entry hasn't been used.
881			 * Run Garbage collector (RFC 4861, 5.3)
882			 */
883			if (!ND6_LLINFO_PERMANENT(ln))
884				nd6_free(&ln, 1);
885			break;
886		}
887
888		/* Entry has been used AND delay timer has ended. */
889
890		/* FALLTHROUGH */
891
892	case ND6_LLINFO_DELAY:
893		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
894			/* We need NUD */
895			ln->la_asked = 1;
896			nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
897			send_ns = 1;
898		} else
899			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
900		break;
901	case ND6_LLINFO_PROBE:
902		if (ln->la_asked < V_nd6_umaxtries) {
903			ln->la_asked++;
904			send_ns = 1;
905		} else {
906			nd6_free(&ln, 0);
907		}
908		break;
909	default:
910		panic("%s: paths in a dark night can be confusing: %d",
911		    __func__, ln->ln_state);
912	}
913done:
914	if (ln != NULL)
915		ND6_RUNLOCK();
916	if (send_ns != 0) {
917		nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
918		psrc = nd6_llinfo_get_holdsrc(ln, &src);
919		LLE_FREE_LOCKED(ln);
920		ln = NULL;
921		nd6_ns_output(ifp, psrc, pdst, dst, NULL);
922	}
923
924	if (ln != NULL)
925		LLE_FREE_LOCKED(ln);
926	NET_EPOCH_EXIT(et);
927	CURVNET_RESTORE();
928}
929
930/*
931 * ND6 timer routine to expire default route list and prefix list
932 */
933void
934nd6_timer(void *arg)
935{
936	CURVNET_SET((struct vnet *) arg);
937	struct epoch_tracker et;
938	struct nd_prhead prl;
939	struct nd_prefix *pr, *npr;
940	struct ifnet *ifp;
941	struct in6_ifaddr *ia6, *nia6;
942	uint64_t genid;
943
944	LIST_INIT(&prl);
945
946	NET_EPOCH_ENTER(et);
947	nd6_defrouter_timer();
948
949	/*
950	 * expire interface addresses.
951	 * in the past the loop was inside prefix expiry processing.
952	 * However, from a stricter speci-confrmance standpoint, we should
953	 * rather separate address lifetimes and prefix lifetimes.
954	 *
955	 * XXXRW: in6_ifaddrhead locking.
956	 */
957  addrloop:
958	CK_STAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
959		/* check address lifetime */
960		if (IFA6_IS_INVALID(ia6)) {
961			int regen = 0;
962
963			/*
964			 * If the expiring address is temporary, try
965			 * regenerating a new one.  This would be useful when
966			 * we suspended a laptop PC, then turned it on after a
967			 * period that could invalidate all temporary
968			 * addresses.  Although we may have to restart the
969			 * loop (see below), it must be after purging the
970			 * address.  Otherwise, we'd see an infinite loop of
971			 * regeneration.
972			 */
973			if (V_ip6_use_tempaddr &&
974			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
975				if (regen_tmpaddr(ia6) == 0)
976					regen = 1;
977			}
978
979			in6_purgeaddr(&ia6->ia_ifa);
980
981			if (regen)
982				goto addrloop; /* XXX: see below */
983		} else if (IFA6_IS_DEPRECATED(ia6)) {
984			int oldflags = ia6->ia6_flags;
985
986			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
987
988			/*
989			 * If a temporary address has just become deprecated,
990			 * regenerate a new one if possible.
991			 */
992			if (V_ip6_use_tempaddr &&
993			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
994			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
995				if (regen_tmpaddr(ia6) == 0) {
996					/*
997					 * A new temporary address is
998					 * generated.
999					 * XXX: this means the address chain
1000					 * has changed while we are still in
1001					 * the loop.  Although the change
1002					 * would not cause disaster (because
1003					 * it's not a deletion, but an
1004					 * addition,) we'd rather restart the
1005					 * loop just for safety.  Or does this
1006					 * significantly reduce performance??
1007					 */
1008					goto addrloop;
1009				}
1010			}
1011		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
1012			/*
1013			 * Schedule DAD for a tentative address.  This happens
1014			 * if the interface was down or not running
1015			 * when the address was configured.
1016			 */
1017			int delay;
1018
1019			delay = arc4random() %
1020			    (MAX_RTR_SOLICITATION_DELAY * hz);
1021			nd6_dad_start((struct ifaddr *)ia6, delay);
1022		} else {
1023			/*
1024			 * Check status of the interface.  If it is down,
1025			 * mark the address as tentative for future DAD.
1026			 */
1027			ifp = ia6->ia_ifp;
1028			if ((ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0 &&
1029			    ((ifp->if_flags & IFF_UP) == 0 ||
1030			    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1031			    (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)){
1032				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
1033				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
1034			}
1035
1036			/*
1037			 * A new RA might have made a deprecated address
1038			 * preferred.
1039			 */
1040			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1041		}
1042	}
1043	NET_EPOCH_EXIT(et);
1044
1045	ND6_WLOCK();
1046restart:
1047	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
1048		/*
1049		 * Expire prefixes. Since the pltime is only used for
1050		 * autoconfigured addresses, pltime processing for prefixes is
1051		 * not necessary.
1052		 *
1053		 * Only unlink after all derived addresses have expired. This
1054		 * may not occur until two hours after the prefix has expired
1055		 * per RFC 4862. If the prefix expires before its derived
1056		 * addresses, mark it off-link. This will be done automatically
1057		 * after unlinking if no address references remain.
1058		 */
1059		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
1060		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
1061			continue;
1062
1063		if (pr->ndpr_addrcnt == 0) {
1064			nd6_prefix_unlink(pr, &prl);
1065			continue;
1066		}
1067		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1068			genid = V_nd6_list_genid;
1069			nd6_prefix_ref(pr);
1070			ND6_WUNLOCK();
1071			ND6_ONLINK_LOCK();
1072			(void)nd6_prefix_offlink(pr);
1073			ND6_ONLINK_UNLOCK();
1074			ND6_WLOCK();
1075			nd6_prefix_rele(pr);
1076			if (genid != V_nd6_list_genid)
1077				goto restart;
1078		}
1079	}
1080	ND6_WUNLOCK();
1081
1082	while ((pr = LIST_FIRST(&prl)) != NULL) {
1083		LIST_REMOVE(pr, ndpr_entry);
1084		nd6_prefix_del(pr);
1085	}
1086
1087	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
1088	    nd6_timer, curvnet);
1089
1090	CURVNET_RESTORE();
1091}
1092
1093/*
1094 * ia6 - deprecated/invalidated temporary address
1095 */
1096static int
1097regen_tmpaddr(struct in6_ifaddr *ia6)
1098{
1099	struct ifaddr *ifa;
1100	struct ifnet *ifp;
1101	struct in6_ifaddr *public_ifa6 = NULL;
1102
1103	NET_EPOCH_ASSERT();
1104
1105	ifp = ia6->ia_ifa.ifa_ifp;
1106	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1107		struct in6_ifaddr *it6;
1108
1109		if (ifa->ifa_addr->sa_family != AF_INET6)
1110			continue;
1111
1112		it6 = (struct in6_ifaddr *)ifa;
1113
1114		/* ignore no autoconf addresses. */
1115		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1116			continue;
1117
1118		/* ignore autoconf addresses with different prefixes. */
1119		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
1120			continue;
1121
1122		/*
1123		 * Now we are looking at an autoconf address with the same
1124		 * prefix as ours.  If the address is temporary and is still
1125		 * preferred, do not create another one.  It would be rare, but
1126		 * could happen, for example, when we resume a laptop PC after
1127		 * a long period.
1128		 */
1129		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1130		    !IFA6_IS_DEPRECATED(it6)) {
1131			public_ifa6 = NULL;
1132			break;
1133		}
1134
1135		/*
1136		 * This is a public autoconf address that has the same prefix
1137		 * as ours.  If it is preferred, keep it.  We can't break the
1138		 * loop here, because there may be a still-preferred temporary
1139		 * address with the prefix.
1140		 */
1141		if (!IFA6_IS_DEPRECATED(it6))
1142			public_ifa6 = it6;
1143	}
1144	if (public_ifa6 != NULL)
1145		ifa_ref(&public_ifa6->ia_ifa);
1146
1147	if (public_ifa6 != NULL) {
1148		int e;
1149
1150		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
1151			ifa_free(&public_ifa6->ia_ifa);
1152			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1153			    " tmp addr,errno=%d\n", e);
1154			return (-1);
1155		}
1156		ifa_free(&public_ifa6->ia_ifa);
1157		return (0);
1158	}
1159
1160	return (-1);
1161}
1162
1163/*
1164 * Remove prefix and default router list entries corresponding to ifp. Neighbor
1165 * cache entries are freed in in6_domifdetach().
1166 */
1167void
1168nd6_purge(struct ifnet *ifp)
1169{
1170	struct nd_prhead prl;
1171	struct nd_prefix *pr, *npr;
1172
1173	LIST_INIT(&prl);
1174
1175	/* Purge default router list entries toward ifp. */
1176	nd6_defrouter_purge(ifp);
1177
1178	ND6_WLOCK();
1179	/*
1180	 * Remove prefixes on ifp. We should have already removed addresses on
1181	 * this interface, so no addresses should be referencing these prefixes.
1182	 */
1183	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
1184		if (pr->ndpr_ifp == ifp)
1185			nd6_prefix_unlink(pr, &prl);
1186	}
1187	ND6_WUNLOCK();
1188
1189	/* Delete the unlinked prefix objects. */
1190	while ((pr = LIST_FIRST(&prl)) != NULL) {
1191		LIST_REMOVE(pr, ndpr_entry);
1192		nd6_prefix_del(pr);
1193	}
1194
1195	/* cancel default outgoing interface setting */
1196	if (V_nd6_defifindex == ifp->if_index)
1197		nd6_setdefaultiface(0);
1198
1199	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
1200		/* Refresh default router list. */
1201		defrouter_select_fib(ifp->if_fib);
1202	}
1203}
1204
1205/*
1206 * the caller acquires and releases the lock on the lltbls
1207 * Returns the llentry locked
1208 */
1209struct llentry *
1210nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
1211{
1212	struct sockaddr_in6 sin6;
1213	struct llentry *ln;
1214
1215	bzero(&sin6, sizeof(sin6));
1216	sin6.sin6_len = sizeof(struct sockaddr_in6);
1217	sin6.sin6_family = AF_INET6;
1218	sin6.sin6_addr = *addr6;
1219
1220	IF_AFDATA_LOCK_ASSERT(ifp);
1221
1222	ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
1223
1224	return (ln);
1225}
1226
1227static struct llentry *
1228nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
1229{
1230	struct sockaddr_in6 sin6;
1231	struct llentry *ln;
1232
1233	bzero(&sin6, sizeof(sin6));
1234	sin6.sin6_len = sizeof(struct sockaddr_in6);
1235	sin6.sin6_family = AF_INET6;
1236	sin6.sin6_addr = *addr6;
1237
1238	ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
1239	if (ln != NULL)
1240		ln->ln_state = ND6_LLINFO_NOSTATE;
1241
1242	return (ln);
1243}
1244
1245/*
1246 * Test whether a given IPv6 address can be a neighbor.
1247 */
1248static bool
1249nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
1250{
1251
1252	/*
1253	 * A link-local address is always a neighbor.
1254	 * XXX: a link does not necessarily specify a single interface.
1255	 */
1256	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
1257		struct sockaddr_in6 sin6_copy;
1258		u_int32_t zone;
1259
1260		/*
1261		 * We need sin6_copy since sa6_recoverscope() may modify the
1262		 * content (XXX).
1263		 */
1264		sin6_copy = *addr;
1265		if (sa6_recoverscope(&sin6_copy))
1266			return (0); /* XXX: should be impossible */
1267		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
1268			return (0);
1269		if (sin6_copy.sin6_scope_id == zone)
1270			return (1);
1271		else
1272			return (0);
1273	}
1274	/* Checking global unicast */
1275
1276	/* If an address is directly reachable, it is a neigbor */
1277	struct nhop_object *nh;
1278	nh = fib6_lookup(ifp->if_fib, &addr->sin6_addr, 0, NHR_NONE, 0);
1279	if (nh != NULL && nh->nh_aifp == ifp && (nh->nh_flags & NHF_GATEWAY) == 0)
1280		return (true);
1281
1282	/*
1283	 * Check prefixes with desired on-link state, as some may be not
1284	 * installed in the routing table.
1285	 */
1286	bool matched = false;
1287	struct nd_prefix *pr;
1288	ND6_RLOCK();
1289	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1290		if (pr->ndpr_ifp != ifp)
1291			continue;
1292		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1293			continue;
1294		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1295		    &addr->sin6_addr, &pr->ndpr_mask)) {
1296			matched = true;
1297			break;
1298		}
1299	}
1300	ND6_RUNLOCK();
1301	if (matched)
1302		return (true);
1303
1304	/*
1305	 * If the address is assigned on the node of the other side of
1306	 * a p2p interface, the address should be a neighbor.
1307	 */
1308	if (ifp->if_flags & IFF_POINTOPOINT) {
1309		struct ifaddr *ifa;
1310
1311		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1312			if (ifa->ifa_addr->sa_family != addr->sin6_family)
1313				continue;
1314			if (ifa->ifa_dstaddr != NULL &&
1315			    sa_equal(addr, ifa->ifa_dstaddr)) {
1316				return (true);
1317			}
1318		}
1319	}
1320
1321	/*
1322	 * If the default router list is empty, all addresses are regarded
1323	 * as on-link, and thus, as a neighbor.
1324	 */
1325	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
1326	    nd6_defrouter_list_empty() &&
1327	    V_nd6_defifindex == ifp->if_index) {
1328		return (1);
1329	}
1330
1331	return (0);
1332}
1333
1334/*
1335 * Detect if a given IPv6 address identifies a neighbor on a given link.
1336 * XXX: should take care of the destination of a p2p link?
1337 */
1338int
1339nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
1340{
1341	struct llentry *lle;
1342	int rc = 0;
1343
1344	NET_EPOCH_ASSERT();
1345	IF_AFDATA_UNLOCK_ASSERT(ifp);
1346	if (nd6_is_new_addr_neighbor(addr, ifp))
1347		return (1);
1348
1349	/*
1350	 * Even if the address matches none of our addresses, it might be
1351	 * in the neighbor cache.
1352	 */
1353	if ((lle = nd6_lookup(&addr->sin6_addr, LLE_SF(AF_INET6, 0), ifp)) != NULL) {
1354		LLE_RUNLOCK(lle);
1355		rc = 1;
1356	}
1357	return (rc);
1358}
1359
1360static __noinline void
1361nd6_free_children(struct llentry *lle)
1362{
1363	struct llentry *child_lle;
1364
1365	NET_EPOCH_ASSERT();
1366	LLE_WLOCK_ASSERT(lle);
1367
1368	while ((child_lle = CK_SLIST_FIRST(&lle->lle_children)) != NULL) {
1369		LLE_WLOCK(child_lle);
1370		lltable_unlink_child_entry(child_lle);
1371		llentry_free(child_lle);
1372	}
1373}
1374
1375/*
1376 * Tries to update @lle address/prepend data with new @lladdr.
1377 *
1378 * Returns true on success.
1379 * In any case, @lle is returned wlocked.
1380 */
1381static __noinline bool
1382nd6_try_set_entry_addr_locked(struct ifnet *ifp, struct llentry *lle, char *lladdr)
1383{
1384	u_char buf[LLE_MAX_LINKHDR];
1385	int fam, off;
1386	size_t sz;
1387
1388	sz = sizeof(buf);
1389	if (lltable_calc_llheader(ifp, AF_INET6, lladdr, buf, &sz, &off) != 0)
1390		return (false);
1391
1392	/* Update data */
1393	lltable_set_entry_addr(ifp, lle, buf, sz, off);
1394
1395	struct llentry *child_lle;
1396	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
1397		LLE_WLOCK(child_lle);
1398		fam = child_lle->r_family;
1399		sz = sizeof(buf);
1400		if (lltable_calc_llheader(ifp, fam, lladdr, buf, &sz, &off) == 0) {
1401			/* success */
1402			lltable_set_entry_addr(ifp, child_lle, buf, sz, off);
1403			child_lle->ln_state = ND6_LLINFO_REACHABLE;
1404		}
1405		LLE_WUNLOCK(child_lle);
1406	}
1407
1408	return (true);
1409}
1410
1411bool
1412nd6_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, char *lladdr)
1413{
1414	NET_EPOCH_ASSERT();
1415	LLE_WLOCK_ASSERT(lle);
1416
1417	if (!lltable_acquire_wlock(ifp, lle))
1418		return (false);
1419	bool ret = nd6_try_set_entry_addr_locked(ifp, lle, lladdr);
1420	IF_AFDATA_WUNLOCK(ifp);
1421
1422	return (ret);
1423}
1424
1425/*
1426 * Free an nd6 llinfo entry.
1427 * Since the function would cause significant changes in the kernel, DO NOT
1428 * make it global, unless you have a strong reason for the change, and are sure
1429 * that the change is safe.
1430 *
1431 * Set noinline to be dtrace-friendly
1432 */
1433static __noinline void
1434nd6_free(struct llentry **lnp, int gc)
1435{
1436	struct ifnet *ifp;
1437	struct llentry *ln;
1438	struct nd_defrouter *dr;
1439
1440	ln = *lnp;
1441	*lnp = NULL;
1442
1443	LLE_WLOCK_ASSERT(ln);
1444	ND6_RLOCK_ASSERT();
1445
1446	KASSERT((ln->la_flags & LLE_CHILD) == 0, ("child lle"));
1447
1448	ifp = lltable_get_ifp(ln->lle_tbl);
1449	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
1450		dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
1451	else
1452		dr = NULL;
1453	ND6_RUNLOCK();
1454
1455	if ((ln->la_flags & LLE_DELETED) == 0)
1456		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
1457
1458	/*
1459	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
1460	 * even though it is not harmful, it was not really necessary.
1461	 */
1462
1463	/* cancel timer */
1464	nd6_llinfo_settimer_locked(ln, -1);
1465
1466	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
1467		if (dr != NULL && dr->expire &&
1468		    ln->ln_state == ND6_LLINFO_STALE && gc) {
1469			/*
1470			 * If the reason for the deletion is just garbage
1471			 * collection, and the neighbor is an active default
1472			 * router, do not delete it.  Instead, reset the GC
1473			 * timer using the router's lifetime.
1474			 * Simply deleting the entry would affect default
1475			 * router selection, which is not necessarily a good
1476			 * thing, especially when we're using router preference
1477			 * values.
1478			 * XXX: the check for ln_state would be redundant,
1479			 *      but we intentionally keep it just in case.
1480			 */
1481			if (dr->expire > time_uptime)
1482				nd6_llinfo_settimer_locked(ln,
1483				    (dr->expire - time_uptime) * hz);
1484			else
1485				nd6_llinfo_settimer_locked(ln,
1486				    (long)V_nd6_gctimer * hz);
1487
1488			LLE_REMREF(ln);
1489			LLE_WUNLOCK(ln);
1490			defrouter_rele(dr);
1491			return;
1492		}
1493
1494		if (dr) {
1495			/*
1496			 * Unreachability of a router might affect the default
1497			 * router selection and on-link detection of advertised
1498			 * prefixes.
1499			 */
1500
1501			/*
1502			 * Temporarily fake the state to choose a new default
1503			 * router and to perform on-link determination of
1504			 * prefixes correctly.
1505			 * Below the state will be set correctly,
1506			 * or the entry itself will be deleted.
1507			 */
1508			ln->ln_state = ND6_LLINFO_INCOMPLETE;
1509		}
1510
1511		if (ln->ln_router || dr) {
1512			/*
1513			 * We need to unlock to avoid a LOR with rt6_flush() with the
1514			 * rnh and for the calls to pfxlist_onlink_check() and
1515			 * defrouter_select_fib() in the block further down for calls
1516			 * into nd6_lookup().  We still hold a ref.
1517			 */
1518			LLE_WUNLOCK(ln);
1519
1520			/*
1521			 * rt6_flush must be called whether or not the neighbor
1522			 * is in the Default Router List.
1523			 * See a corresponding comment in nd6_na_input().
1524			 */
1525			rt6_flush(&ln->r_l3addr.addr6, ifp);
1526		}
1527
1528		if (dr) {
1529			/*
1530			 * Since defrouter_select_fib() does not affect the
1531			 * on-link determination and MIP6 needs the check
1532			 * before the default router selection, we perform
1533			 * the check now.
1534			 */
1535			pfxlist_onlink_check();
1536
1537			/*
1538			 * Refresh default router list.
1539			 */
1540			defrouter_select_fib(dr->ifp->if_fib);
1541		}
1542
1543		/*
1544		 * If this entry was added by an on-link redirect, remove the
1545		 * corresponding host route.
1546		 */
1547		if (ln->la_flags & LLE_REDIRECT)
1548			nd6_free_redirect(ln);
1549
1550		if (ln->ln_router || dr)
1551			LLE_WLOCK(ln);
1552	}
1553
1554	/*
1555	 * Save to unlock. We still hold an extra reference and will not
1556	 * free(9) in llentry_free() if someone else holds one as well.
1557	 */
1558	LLE_WUNLOCK(ln);
1559	IF_AFDATA_LOCK(ifp);
1560	LLE_WLOCK(ln);
1561	/* Guard against race with other llentry_free(). */
1562	if (ln->la_flags & LLE_LINKED) {
1563		/* Remove callout reference */
1564		LLE_REMREF(ln);
1565		lltable_unlink_entry(ln->lle_tbl, ln);
1566	}
1567	IF_AFDATA_UNLOCK(ifp);
1568
1569	nd6_free_children(ln);
1570
1571	llentry_free(ln);
1572	if (dr != NULL)
1573		defrouter_rele(dr);
1574}
1575
1576static int
1577nd6_isdynrte(const struct rtentry *rt, const struct nhop_object *nh, void *xap)
1578{
1579
1580	if (nh->nh_flags & NHF_REDIRECT)
1581		return (1);
1582
1583	return (0);
1584}
1585
1586/*
1587 * Remove the rtentry for the given llentry,
1588 * both of which were installed by a redirect.
1589 */
1590static void
1591nd6_free_redirect(const struct llentry *ln)
1592{
1593	int fibnum;
1594	struct sockaddr_in6 sin6;
1595	struct rib_cmd_info rc;
1596	struct epoch_tracker et;
1597
1598	lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
1599
1600	NET_EPOCH_ENTER(et);
1601	for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
1602		rib_del_route_px(fibnum, (struct sockaddr *)&sin6, 128,
1603		    nd6_isdynrte, NULL, 0, &rc);
1604	NET_EPOCH_EXIT(et);
1605}
1606
1607/*
1608 * Updates status of the default router route.
1609 */
1610static void
1611check_release_defrouter(const struct rib_cmd_info *rc, void *_cbdata)
1612{
1613	struct nd_defrouter *dr;
1614	struct nhop_object *nh;
1615
1616	nh = rc->rc_nh_old;
1617
1618	if ((nh != NULL) && (nh->nh_flags & NHF_DEFAULT)) {
1619		dr = defrouter_lookup(&nh->gw6_sa.sin6_addr, nh->nh_ifp);
1620		if (dr != NULL) {
1621			dr->installed = 0;
1622			defrouter_rele(dr);
1623		}
1624	}
1625}
1626
1627void
1628nd6_subscription_cb(struct rib_head *rnh, struct rib_cmd_info *rc, void *arg)
1629{
1630
1631#ifdef ROUTE_MPATH
1632	rib_decompose_notification(rc, check_release_defrouter, NULL);
1633#else
1634	check_release_defrouter(rc, NULL);
1635#endif
1636}
1637
1638int
1639nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
1640{
1641	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1642	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1643	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1644	struct epoch_tracker et;
1645	int error = 0;
1646
1647	if (ifp->if_afdata[AF_INET6] == NULL)
1648		return (EPFNOSUPPORT);
1649	switch (cmd) {
1650	case OSIOCGIFINFO_IN6:
1651#define ND	ndi->ndi
1652		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
1653		bzero(&ND, sizeof(ND));
1654		ND.linkmtu = IN6_LINKMTU(ifp);
1655		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
1656		ND.basereachable = ND_IFINFO(ifp)->basereachable;
1657		ND.reachable = ND_IFINFO(ifp)->reachable;
1658		ND.retrans = ND_IFINFO(ifp)->retrans;
1659		ND.flags = ND_IFINFO(ifp)->flags;
1660		ND.recalctm = ND_IFINFO(ifp)->recalctm;
1661		ND.chlim = ND_IFINFO(ifp)->chlim;
1662		break;
1663	case SIOCGIFINFO_IN6:
1664		ND = *ND_IFINFO(ifp);
1665		break;
1666	case SIOCSIFINFO_IN6:
1667		/*
1668		 * used to change host variables from userland.
1669		 * intended for a use on router to reflect RA configurations.
1670		 */
1671		/* 0 means 'unspecified' */
1672		if (ND.linkmtu != 0) {
1673			if (ND.linkmtu < IPV6_MMTU ||
1674			    ND.linkmtu > IN6_LINKMTU(ifp)) {
1675				error = EINVAL;
1676				break;
1677			}
1678			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
1679		}
1680
1681		if (ND.basereachable != 0) {
1682			int obasereachable = ND_IFINFO(ifp)->basereachable;
1683
1684			ND_IFINFO(ifp)->basereachable = ND.basereachable;
1685			if (ND.basereachable != obasereachable)
1686				ND_IFINFO(ifp)->reachable =
1687				    ND_COMPUTE_RTIME(ND.basereachable);
1688		}
1689		if (ND.retrans != 0)
1690			ND_IFINFO(ifp)->retrans = ND.retrans;
1691		if (ND.chlim != 0)
1692			ND_IFINFO(ifp)->chlim = ND.chlim;
1693		/* FALLTHROUGH */
1694	case SIOCSIFINFO_FLAGS:
1695	{
1696		struct ifaddr *ifa;
1697		struct in6_ifaddr *ia;
1698
1699		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1700		    !(ND.flags & ND6_IFF_IFDISABLED)) {
1701			/* ifdisabled 1->0 transision */
1702
1703			/*
1704			 * If the interface is marked as ND6_IFF_IFDISABLED and
1705			 * has an link-local address with IN6_IFF_DUPLICATED,
1706			 * do not clear ND6_IFF_IFDISABLED.
1707			 * See RFC 4862, Section 5.4.5.
1708			 */
1709			NET_EPOCH_ENTER(et);
1710			CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1711				if (ifa->ifa_addr->sa_family != AF_INET6)
1712					continue;
1713				ia = (struct in6_ifaddr *)ifa;
1714				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
1715				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
1716					break;
1717			}
1718			NET_EPOCH_EXIT(et);
1719
1720			if (ifa != NULL) {
1721				/* LLA is duplicated. */
1722				ND.flags |= ND6_IFF_IFDISABLED;
1723				log(LOG_ERR, "Cannot enable an interface"
1724				    " with a link-local address marked"
1725				    " duplicate.\n");
1726			} else {
1727				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
1728				if (ifp->if_flags & IFF_UP)
1729					in6_if_up(ifp);
1730			}
1731		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1732			    (ND.flags & ND6_IFF_IFDISABLED)) {
1733			/* ifdisabled 0->1 transision */
1734			/* Mark all IPv6 address as tentative. */
1735
1736			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
1737			if (V_ip6_dad_count > 0 &&
1738			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
1739				NET_EPOCH_ENTER(et);
1740				CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
1741				    ifa_link) {
1742					if (ifa->ifa_addr->sa_family !=
1743					    AF_INET6)
1744						continue;
1745					ia = (struct in6_ifaddr *)ifa;
1746					ia->ia6_flags |= IN6_IFF_TENTATIVE;
1747				}
1748				NET_EPOCH_EXIT(et);
1749			}
1750		}
1751
1752		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
1753			if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
1754				/* auto_linklocal 0->1 transision */
1755
1756				/* If no link-local address on ifp, configure */
1757				ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
1758				in6_ifattach(ifp, NULL);
1759			} else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
1760			    ifp->if_flags & IFF_UP) {
1761				/*
1762				 * When the IF already has
1763				 * ND6_IFF_AUTO_LINKLOCAL, no link-local
1764				 * address is assigned, and IFF_UP, try to
1765				 * assign one.
1766				 */
1767				NET_EPOCH_ENTER(et);
1768				CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
1769				    ifa_link) {
1770					if (ifa->ifa_addr->sa_family !=
1771					    AF_INET6)
1772						continue;
1773					ia = (struct in6_ifaddr *)ifa;
1774					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
1775						break;
1776				}
1777				NET_EPOCH_EXIT(et);
1778				if (ifa != NULL)
1779					/* No LLA is configured. */
1780					in6_ifattach(ifp, NULL);
1781			}
1782		}
1783		ND_IFINFO(ifp)->flags = ND.flags;
1784		break;
1785	}
1786#undef ND
1787	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1788		/* sync kernel routing table with the default router list */
1789		defrouter_reset();
1790		defrouter_select_fib(RT_ALL_FIBS);
1791		break;
1792	case SIOCSPFXFLUSH_IN6:
1793	{
1794		/* flush all the prefix advertised by routers */
1795		struct in6_ifaddr *ia, *ia_next;
1796		struct nd_prefix *pr, *next;
1797		struct nd_prhead prl;
1798
1799		LIST_INIT(&prl);
1800
1801		ND6_WLOCK();
1802		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
1803			if (pr->ndpr_raf_ra_derived)
1804				nd6_prefix_unlink(pr, &prl);
1805		}
1806		ND6_WUNLOCK();
1807
1808		while ((pr = LIST_FIRST(&prl)) != NULL) {
1809			LIST_REMOVE(pr, ndpr_entry);
1810			/* XXXRW: in6_ifaddrhead locking. */
1811			CK_STAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
1812			    ia_next) {
1813				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1814					continue;
1815
1816				if (ia->ia6_ndpr == pr)
1817					in6_purgeaddr(&ia->ia_ifa);
1818			}
1819			nd6_prefix_del(pr);
1820		}
1821		break;
1822	}
1823	case SIOCSRTRFLUSH_IN6:
1824	{
1825		/* flush all the default routers */
1826
1827		defrouter_reset();
1828		nd6_defrouter_flush_all();
1829		defrouter_select_fib(RT_ALL_FIBS);
1830		break;
1831	}
1832	case SIOCGNBRINFO_IN6:
1833	{
1834		struct llentry *ln;
1835		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1836
1837		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
1838			return (error);
1839
1840		NET_EPOCH_ENTER(et);
1841		ln = nd6_lookup(&nb_addr, LLE_SF(AF_INET6, 0), ifp);
1842		NET_EPOCH_EXIT(et);
1843
1844		if (ln == NULL) {
1845			error = EINVAL;
1846			break;
1847		}
1848		nbi->state = ln->ln_state;
1849		nbi->asked = ln->la_asked;
1850		nbi->isrouter = ln->ln_router;
1851		if (ln->la_expire == 0)
1852			nbi->expire = 0;
1853		else
1854			nbi->expire = ln->la_expire + ln->lle_remtime / hz +
1855			    (time_second - time_uptime);
1856		LLE_RUNLOCK(ln);
1857		break;
1858	}
1859	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1860		ndif->ifindex = V_nd6_defifindex;
1861		break;
1862	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1863		return (nd6_setdefaultiface(ndif->ifindex));
1864	}
1865	return (error);
1866}
1867
1868/*
1869 * Calculates new isRouter value based on provided parameters and
1870 * returns it.
1871 */
1872static int
1873nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
1874    int ln_router)
1875{
1876
1877	/*
1878	 * ICMP6 type dependent behavior.
1879	 *
1880	 * NS: clear IsRouter if new entry
1881	 * RS: clear IsRouter
1882	 * RA: set IsRouter if there's lladdr
1883	 * redir: clear IsRouter if new entry
1884	 *
1885	 * RA case, (1):
1886	 * The spec says that we must set IsRouter in the following cases:
1887	 * - If lladdr exist, set IsRouter.  This means (1-5).
1888	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1889	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1890	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1891	 * neighbor cache, this is similar to (6).
1892	 * This case is rare but we figured that we MUST NOT set IsRouter.
1893	 *
1894	 *   is_new  old_addr new_addr 	    NS  RS  RA	redir
1895	 *							D R
1896	 *	0	n	n	(1)	c   ?     s
1897	 *	0	y	n	(2)	c   s     s
1898	 *	0	n	y	(3)	c   s     s
1899	 *	0	y	y	(4)	c   s     s
1900	 *	0	y	y	(5)	c   s     s
1901	 *	1	--	n	(6) c	c	c s
1902	 *	1	--	y	(7) c	c   s	c s
1903	 *
1904	 *					(c=clear s=set)
1905	 */
1906	switch (type & 0xff) {
1907	case ND_NEIGHBOR_SOLICIT:
1908		/*
1909		 * New entry must have is_router flag cleared.
1910		 */
1911		if (is_new)					/* (6-7) */
1912			ln_router = 0;
1913		break;
1914	case ND_REDIRECT:
1915		/*
1916		 * If the icmp is a redirect to a better router, always set the
1917		 * is_router flag.  Otherwise, if the entry is newly created,
1918		 * clear the flag.  [RFC 2461, sec 8.3]
1919		 */
1920		if (code == ND_REDIRECT_ROUTER)
1921			ln_router = 1;
1922		else {
1923			if (is_new)				/* (6-7) */
1924				ln_router = 0;
1925		}
1926		break;
1927	case ND_ROUTER_SOLICIT:
1928		/*
1929		 * is_router flag must always be cleared.
1930		 */
1931		ln_router = 0;
1932		break;
1933	case ND_ROUTER_ADVERT:
1934		/*
1935		 * Mark an entry with lladdr as a router.
1936		 */
1937		if ((!is_new && (old_addr || new_addr)) ||	/* (2-5) */
1938		    (is_new && new_addr)) {			/* (7) */
1939			ln_router = 1;
1940		}
1941		break;
1942	}
1943
1944	return (ln_router);
1945}
1946
1947/*
1948 * Create neighbor cache entry and cache link-layer address,
1949 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1950 *
1951 * type - ICMP6 type
1952 * code - type dependent information
1953 *
1954 */
1955void
1956nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
1957    int lladdrlen, int type, int code)
1958{
1959	struct llentry *ln = NULL, *ln_tmp;
1960	int is_newentry;
1961	int do_update;
1962	int olladdr;
1963	int llchange;
1964	int flags;
1965	uint16_t router = 0;
1966	struct mbuf *chain = NULL;
1967	u_char linkhdr[LLE_MAX_LINKHDR];
1968	size_t linkhdrsize;
1969	int lladdr_off;
1970
1971	NET_EPOCH_ASSERT();
1972	IF_AFDATA_UNLOCK_ASSERT(ifp);
1973
1974	KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
1975	KASSERT(from != NULL, ("%s: from == NULL", __func__));
1976
1977	/* nothing must be updated for unspecified address */
1978	if (IN6_IS_ADDR_UNSPECIFIED(from))
1979		return;
1980
1981	/*
1982	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1983	 * the caller.
1984	 *
1985	 * XXX If the link does not have link-layer adderss, what should
1986	 * we do? (ifp->if_addrlen == 0)
1987	 * Spec says nothing in sections for RA, RS and NA.  There's small
1988	 * description on it in NS section (RFC 2461 7.2.3).
1989	 */
1990	flags = lladdr ? LLE_EXCLUSIVE : 0;
1991	ln = nd6_lookup(from, LLE_SF(AF_INET6, flags), ifp);
1992	is_newentry = 0;
1993	if (ln == NULL) {
1994		flags |= LLE_EXCLUSIVE;
1995		ln = nd6_alloc(from, 0, ifp);
1996		if (ln == NULL)
1997			return;
1998
1999		/*
2000		 * Since we already know all the data for the new entry,
2001		 * fill it before insertion.
2002		 */
2003		if (lladdr != NULL) {
2004			linkhdrsize = sizeof(linkhdr);
2005			if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
2006			    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
2007				lltable_free_entry(LLTABLE6(ifp), ln);
2008				return;
2009			}
2010			lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
2011			    lladdr_off);
2012		}
2013
2014		IF_AFDATA_WLOCK(ifp);
2015		LLE_WLOCK(ln);
2016		/* Prefer any existing lle over newly-created one */
2017		ln_tmp = nd6_lookup(from, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
2018		if (ln_tmp == NULL)
2019			lltable_link_entry(LLTABLE6(ifp), ln);
2020		IF_AFDATA_WUNLOCK(ifp);
2021		if (ln_tmp == NULL) {
2022			/* No existing lle, mark as new entry (6,7) */
2023			is_newentry = 1;
2024			if (lladdr != NULL) {	/* (7) */
2025				nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
2026				EVENTHANDLER_INVOKE(lle_event, ln,
2027				    LLENTRY_RESOLVED);
2028			}
2029		} else {
2030			lltable_free_entry(LLTABLE6(ifp), ln);
2031			ln = ln_tmp;
2032			ln_tmp = NULL;
2033		}
2034	}
2035	/* do nothing if static ndp is set */
2036	if ((ln->la_flags & LLE_STATIC)) {
2037		if (flags & LLE_EXCLUSIVE)
2038			LLE_WUNLOCK(ln);
2039		else
2040			LLE_RUNLOCK(ln);
2041		return;
2042	}
2043
2044	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
2045	if (olladdr && lladdr) {
2046		llchange = bcmp(lladdr, ln->ll_addr,
2047		    ifp->if_addrlen);
2048	} else if (!olladdr && lladdr)
2049		llchange = 1;
2050	else
2051		llchange = 0;
2052
2053	/*
2054	 * newentry olladdr  lladdr  llchange	(*=record)
2055	 *	0	n	n	--	(1)
2056	 *	0	y	n	--	(2)
2057	 *	0	n	y	y	(3) * STALE
2058	 *	0	y	y	n	(4) *
2059	 *	0	y	y	y	(5) * STALE
2060	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
2061	 *	1	--	y	--	(7) * STALE
2062	 */
2063
2064	do_update = 0;
2065	if (is_newentry == 0 && llchange != 0) {
2066		do_update = 1;	/* (3,5) */
2067
2068		/*
2069		 * Record source link-layer address
2070		 * XXX is it dependent to ifp->if_type?
2071		 */
2072		if (!nd6_try_set_entry_addr(ifp, ln, lladdr)) {
2073			/* Entry was deleted */
2074			LLE_WUNLOCK(ln);
2075			return;
2076		}
2077
2078		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
2079
2080		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
2081
2082		if (ln->la_hold != NULL)
2083			chain = nd6_grab_holdchain(ln);
2084	}
2085
2086	/* Calculates new router status */
2087	router = nd6_is_router(type, code, is_newentry, olladdr,
2088	    lladdr != NULL ? 1 : 0, ln->ln_router);
2089
2090	ln->ln_router = router;
2091	/* Mark non-router redirects with special flag */
2092	if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
2093		ln->la_flags |= LLE_REDIRECT;
2094
2095	if (flags & LLE_EXCLUSIVE)
2096		LLE_WUNLOCK(ln);
2097	else
2098		LLE_RUNLOCK(ln);
2099
2100	if (chain != NULL)
2101		nd6_flush_holdchain(ifp, ln, chain);
2102	if (do_update)
2103		nd6_flush_children_holdchain(ifp, ln);
2104
2105	/*
2106	 * When the link-layer address of a router changes, select the
2107	 * best router again.  In particular, when the neighbor entry is newly
2108	 * created, it might affect the selection policy.
2109	 * Question: can we restrict the first condition to the "is_newentry"
2110	 * case?
2111	 * XXX: when we hear an RA from a new router with the link-layer
2112	 * address option, defrouter_select_fib() is called twice, since
2113	 * defrtrlist_update called the function as well.  However, I believe
2114	 * we can compromise the overhead, since it only happens the first
2115	 * time.
2116	 * XXX: although defrouter_select_fib() should not have a bad effect
2117	 * for those are not autoconfigured hosts, we explicitly avoid such
2118	 * cases for safety.
2119	 */
2120	if ((do_update || is_newentry) && router &&
2121	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
2122		/*
2123		 * guaranteed recursion
2124		 */
2125		defrouter_select_fib(ifp->if_fib);
2126	}
2127}
2128
2129static void
2130nd6_slowtimo(void *arg)
2131{
2132	struct epoch_tracker et;
2133	CURVNET_SET((struct vnet *) arg);
2134	struct nd_ifinfo *nd6if;
2135	struct ifnet *ifp;
2136
2137	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
2138	    nd6_slowtimo, curvnet);
2139	NET_EPOCH_ENTER(et);
2140	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2141		if (ifp->if_afdata[AF_INET6] == NULL)
2142			continue;
2143		nd6if = ND_IFINFO(ifp);
2144		if (nd6if->basereachable && /* already initialized */
2145		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
2146			/*
2147			 * Since reachable time rarely changes by router
2148			 * advertisements, we SHOULD insure that a new random
2149			 * value gets recomputed at least once every few hours.
2150			 * (RFC 2461, 6.3.4)
2151			 */
2152			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
2153			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
2154		}
2155	}
2156	NET_EPOCH_EXIT(et);
2157	CURVNET_RESTORE();
2158}
2159
2160struct mbuf *
2161nd6_grab_holdchain(struct llentry *ln)
2162{
2163	struct mbuf *chain;
2164
2165	LLE_WLOCK_ASSERT(ln);
2166
2167	chain = ln->la_hold;
2168	ln->la_hold = NULL;
2169	ln->la_numheld = 0;
2170
2171	if (ln->ln_state == ND6_LLINFO_STALE) {
2172		/*
2173		 * The first time we send a packet to a
2174		 * neighbor whose entry is STALE, we have
2175		 * to change the state to DELAY and a sets
2176		 * a timer to expire in DELAY_FIRST_PROBE_TIME
2177		 * seconds to ensure do neighbor unreachability
2178		 * detection on expiration.
2179		 * (RFC 2461 7.3.3)
2180		 */
2181		nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
2182	}
2183
2184	return (chain);
2185}
2186
2187int
2188nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
2189    struct sockaddr_in6 *dst, struct route *ro)
2190{
2191	int error;
2192	int ip6len;
2193	struct ip6_hdr *ip6;
2194	struct m_tag *mtag;
2195
2196#ifdef MAC
2197	mac_netinet6_nd6_send(ifp, m);
2198#endif
2199
2200	/*
2201	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
2202	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
2203	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
2204	 * to be diverted to user space.  When re-injected into the kernel,
2205	 * send_output() will directly dispatch them to the outgoing interface.
2206	 */
2207	if (send_sendso_input_hook != NULL) {
2208		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
2209		if (mtag != NULL) {
2210			ip6 = mtod(m, struct ip6_hdr *);
2211			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
2212			/* Use the SEND socket */
2213			error = send_sendso_input_hook(m, ifp, SND_OUT,
2214			    ip6len);
2215			/* -1 == no app on SEND socket */
2216			if (error == 0 || error != -1)
2217			    return (error);
2218		}
2219	}
2220
2221	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
2222	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
2223	    mtod(m, struct ip6_hdr *));
2224
2225	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
2226		origifp = ifp;
2227
2228	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
2229	return (error);
2230}
2231
2232/*
2233 * Lookup link headerfor @sa_dst address. Stores found
2234 * data in @desten buffer. Copy of lle ln_flags can be also
2235 * saved in @pflags if @pflags is non-NULL.
2236 *
2237 * If destination LLE does not exists or lle state modification
2238 * is required, call "slow" version.
2239 *
2240 * Return values:
2241 * - 0 on success (address copied to buffer).
2242 * - EWOULDBLOCK (no local error, but address is still unresolved)
2243 * - other errors (alloc failure, etc)
2244 */
2245int
2246nd6_resolve(struct ifnet *ifp, int gw_flags, struct mbuf *m,
2247    const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
2248    struct llentry **plle)
2249{
2250	struct llentry *ln = NULL;
2251	const struct sockaddr_in6 *dst6;
2252
2253	NET_EPOCH_ASSERT();
2254
2255	if (pflags != NULL)
2256		*pflags = 0;
2257
2258	dst6 = (const struct sockaddr_in6 *)sa_dst;
2259
2260	/* discard the packet if IPv6 operation is disabled on the interface */
2261	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
2262		m_freem(m);
2263		return (ENETDOWN); /* better error? */
2264	}
2265
2266	if (m != NULL && m->m_flags & M_MCAST) {
2267		switch (ifp->if_type) {
2268		case IFT_ETHER:
2269		case IFT_L2VLAN:
2270		case IFT_BRIDGE:
2271			ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
2272						 desten);
2273			return (0);
2274		default:
2275			m_freem(m);
2276			return (EAFNOSUPPORT);
2277		}
2278	}
2279
2280	int family = gw_flags >> 16;
2281	int lookup_flags = plle ? LLE_EXCLUSIVE : LLE_UNLOCKED;
2282	ln = nd6_lookup(&dst6->sin6_addr, LLE_SF(family, lookup_flags), ifp);
2283	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
2284		/* Entry found, let's copy lle info */
2285		bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
2286		if (pflags != NULL)
2287			*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
2288		llentry_provide_feedback(ln);
2289		if (plle) {
2290			LLE_ADDREF(ln);
2291			*plle = ln;
2292			LLE_WUNLOCK(ln);
2293		}
2294		return (0);
2295	} else if (plle && ln)
2296		LLE_WUNLOCK(ln);
2297
2298	return (nd6_resolve_slow(ifp, family, 0, m, dst6, desten, pflags, plle));
2299}
2300
2301/*
2302 * Finds or creates a new llentry for @addr and @family.
2303 * Returns wlocked llentry or NULL.
2304 *
2305 *
2306 * Child LLEs.
2307 *
2308 * Do not have their own state machine (gets marked as static)
2309 *  settimer bails out for child LLEs just in case.
2310 *
2311 * Locking order: parent lle gets locked first, chen goes the child.
2312 */
2313static __noinline struct llentry *
2314nd6_get_llentry(struct ifnet *ifp, const struct in6_addr *addr, int family)
2315{
2316	struct llentry *child_lle = NULL;
2317	struct llentry *lle, *lle_tmp;
2318
2319	lle = nd6_alloc(addr, 0, ifp);
2320	if (lle != NULL && family != AF_INET6) {
2321		child_lle = nd6_alloc(addr, 0, ifp);
2322		if (child_lle == NULL) {
2323			lltable_free_entry(LLTABLE6(ifp), lle);
2324			return (NULL);
2325		}
2326		child_lle->r_family = family;
2327		child_lle->la_flags |= LLE_CHILD | LLE_STATIC;
2328		child_lle->ln_state = ND6_LLINFO_INCOMPLETE;
2329	}
2330
2331	if (lle == NULL) {
2332		char ip6buf[INET6_ADDRSTRLEN];
2333		log(LOG_DEBUG,
2334		    "nd6_get_llentry: can't allocate llinfo for %s "
2335		    "(ln=%p)\n",
2336		    ip6_sprintf(ip6buf, addr), lle);
2337		return (NULL);
2338	}
2339
2340	IF_AFDATA_WLOCK(ifp);
2341	LLE_WLOCK(lle);
2342	/* Prefer any existing entry over newly-created one */
2343	lle_tmp = nd6_lookup(addr, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
2344	if (lle_tmp == NULL)
2345		lltable_link_entry(LLTABLE6(ifp), lle);
2346	else {
2347		lltable_free_entry(LLTABLE6(ifp), lle);
2348		lle = lle_tmp;
2349	}
2350	if (child_lle != NULL) {
2351		/* Check if child lle for the same family exists */
2352		lle_tmp = llentry_lookup_family(lle, child_lle->r_family);
2353		LLE_WLOCK(child_lle);
2354		if (lle_tmp == NULL) {
2355			/* Attach */
2356			lltable_link_child_entry(lle, child_lle);
2357		} else {
2358			/* child lle already exists, free newly-created one */
2359			lltable_free_entry(LLTABLE6(ifp), child_lle);
2360			child_lle = lle_tmp;
2361		}
2362		LLE_WUNLOCK(lle);
2363		lle = child_lle;
2364	}
2365	IF_AFDATA_WUNLOCK(ifp);
2366	return (lle);
2367}
2368
2369/*
2370 * Do L2 address resolution for @sa_dst address. Stores found
2371 * address in @desten buffer. Copy of lle ln_flags can be also
2372 * saved in @pflags if @pflags is non-NULL.
2373 *
2374 * Heavy version.
2375 * Function assume that destination LLE does not exist,
2376 * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
2377 *
2378 * Set noinline to be dtrace-friendly
2379 */
2380static __noinline int
2381nd6_resolve_slow(struct ifnet *ifp, int family, int flags, struct mbuf *m,
2382    const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
2383    struct llentry **plle)
2384{
2385	struct llentry *lle = NULL;
2386	struct in6_addr *psrc, src;
2387	int send_ns, ll_len;
2388	char *lladdr;
2389
2390	NET_EPOCH_ASSERT();
2391
2392	/*
2393	 * Address resolution or Neighbor Unreachability Detection
2394	 * for the next hop.
2395	 * At this point, the destination of the packet must be a unicast
2396	 * or an anycast address(i.e. not a multicast).
2397	 */
2398	lle = nd6_lookup(&dst->sin6_addr, LLE_SF(family, LLE_EXCLUSIVE), ifp);
2399	if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
2400		/*
2401		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
2402		 * the condition below is not very efficient.  But we believe
2403		 * it is tolerable, because this should be a rare case.
2404		 */
2405		lle = nd6_get_llentry(ifp, &dst->sin6_addr, family);
2406	}
2407
2408	if (lle == NULL) {
2409		m_freem(m);
2410		return (ENOBUFS);
2411	}
2412
2413	LLE_WLOCK_ASSERT(lle);
2414
2415	/*
2416	 * The first time we send a packet to a neighbor whose entry is
2417	 * STALE, we have to change the state to DELAY and a sets a timer to
2418	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
2419	 * neighbor unreachability detection on expiration.
2420	 * (RFC 2461 7.3.3)
2421	 */
2422	if ((!(lle->la_flags & LLE_CHILD)) && (lle->ln_state == ND6_LLINFO_STALE))
2423		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
2424
2425	/*
2426	 * If the neighbor cache entry has a state other than INCOMPLETE
2427	 * (i.e. its link-layer address is already resolved), just
2428	 * send the packet.
2429	 */
2430	if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
2431		if (flags & LLE_ADDRONLY) {
2432			lladdr = lle->ll_addr;
2433			ll_len = ifp->if_addrlen;
2434		} else {
2435			lladdr = lle->r_linkdata;
2436			ll_len = lle->r_hdrlen;
2437		}
2438		bcopy(lladdr, desten, ll_len);
2439		if (pflags != NULL)
2440			*pflags = lle->la_flags;
2441		if (plle) {
2442			LLE_ADDREF(lle);
2443			*plle = lle;
2444		}
2445		LLE_WUNLOCK(lle);
2446		return (0);
2447	}
2448
2449	/*
2450	 * There is a neighbor cache entry, but no ethernet address
2451	 * response yet.  Append this latest packet to the end of the
2452	 * packet queue in the mbuf.  When it exceeds nd6_maxqueuelen,
2453	 * the oldest packet in the queue will be removed.
2454	 */
2455	if (m != NULL) {
2456		size_t dropped;
2457
2458		dropped = lltable_append_entry_queue(lle, m, V_nd6_maxqueuelen);
2459		ICMP6STAT_ADD(icp6s_dropped, dropped);
2460	}
2461
2462	/*
2463	 * If there has been no NS for the neighbor after entering the
2464	 * INCOMPLETE state, send the first solicitation.
2465	 * Note that for newly-created lle la_asked will be 0,
2466	 * so we will transition from ND6_LLINFO_NOSTATE to
2467	 * ND6_LLINFO_INCOMPLETE state here.
2468	 */
2469	psrc = NULL;
2470	send_ns = 0;
2471
2472	/* If we have child lle, switch to the parent to send NS */
2473	if (lle->la_flags & LLE_CHILD) {
2474		struct llentry *lle_parent = lle->lle_parent;
2475		LLE_WUNLOCK(lle);
2476		lle = lle_parent;
2477		LLE_WLOCK(lle);
2478	}
2479	if (lle->la_asked == 0) {
2480		lle->la_asked++;
2481		send_ns = 1;
2482		psrc = nd6_llinfo_get_holdsrc(lle, &src);
2483
2484		nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
2485	}
2486	LLE_WUNLOCK(lle);
2487	if (send_ns != 0)
2488		nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
2489
2490	return (EWOULDBLOCK);
2491}
2492
2493/*
2494 * Do L2 address resolution for @sa_dst address. Stores found
2495 * address in @desten buffer. Copy of lle ln_flags can be also
2496 * saved in @pflags if @pflags is non-NULL.
2497 *
2498 * Return values:
2499 * - 0 on success (address copied to buffer).
2500 * - EWOULDBLOCK (no local error, but address is still unresolved)
2501 * - other errors (alloc failure, etc)
2502 */
2503int
2504nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
2505    char *desten, uint32_t *pflags)
2506{
2507	int error;
2508
2509	flags |= LLE_ADDRONLY;
2510	error = nd6_resolve_slow(ifp, AF_INET6, flags, NULL,
2511	    (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
2512	return (error);
2513}
2514
2515int
2516nd6_flush_holdchain(struct ifnet *ifp, struct llentry *lle, struct mbuf *chain)
2517{
2518	struct mbuf *m, *m_head;
2519	struct sockaddr_in6 dst6;
2520	int error = 0;
2521
2522	NET_EPOCH_ASSERT();
2523
2524	struct route_in6 ro = {
2525		.ro_prepend = lle->r_linkdata,
2526		.ro_plen = lle->r_hdrlen,
2527	};
2528
2529	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst6);
2530	m_head = chain;
2531
2532	while (m_head) {
2533		m = m_head;
2534		m_head = m_head->m_nextpkt;
2535		m->m_nextpkt = NULL;
2536		error = nd6_output_ifp(ifp, ifp, m, &dst6, (struct route *)&ro);
2537	}
2538
2539	/*
2540	 * XXX
2541	 * note that intermediate errors are blindly ignored
2542	 */
2543	return (error);
2544}
2545
2546__noinline void
2547nd6_flush_children_holdchain(struct ifnet *ifp, struct llentry *lle)
2548{
2549	struct llentry *child_lle;
2550	struct mbuf *chain;
2551
2552	NET_EPOCH_ASSERT();
2553
2554	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
2555		LLE_WLOCK(child_lle);
2556		chain = nd6_grab_holdchain(child_lle);
2557		LLE_WUNLOCK(child_lle);
2558		nd6_flush_holdchain(ifp, child_lle, chain);
2559	}
2560}
2561
2562static int
2563nd6_need_cache(struct ifnet *ifp)
2564{
2565	/*
2566	 * XXX: we currently do not make neighbor cache on any interface
2567	 * other than Ethernet and GIF.
2568	 *
2569	 * RFC2893 says:
2570	 * - unidirectional tunnels needs no ND
2571	 */
2572	switch (ifp->if_type) {
2573	case IFT_ETHER:
2574	case IFT_IEEE1394:
2575	case IFT_L2VLAN:
2576	case IFT_INFINIBAND:
2577	case IFT_BRIDGE:
2578	case IFT_PROPVIRTUAL:
2579		return (1);
2580	default:
2581		return (0);
2582	}
2583}
2584
2585/*
2586 * Add pernament ND6 link-layer record for given
2587 * interface address.
2588 *
2589 * Very similar to IPv4 arp_ifinit(), but:
2590 * 1) IPv6 DAD is performed in different place
2591 * 2) It is called by IPv6 protocol stack in contrast to
2592 * arp_ifinit() which is typically called in SIOCSIFADDR
2593 * driver ioctl handler.
2594 *
2595 */
2596int
2597nd6_add_ifa_lle(struct in6_ifaddr *ia)
2598{
2599	struct ifnet *ifp;
2600	struct llentry *ln, *ln_tmp;
2601	struct sockaddr *dst;
2602
2603	ifp = ia->ia_ifa.ifa_ifp;
2604	if (nd6_need_cache(ifp) == 0)
2605		return (0);
2606
2607	dst = (struct sockaddr *)&ia->ia_addr;
2608	ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
2609	if (ln == NULL)
2610		return (ENOBUFS);
2611
2612	IF_AFDATA_WLOCK(ifp);
2613	LLE_WLOCK(ln);
2614	/* Unlink any entry if exists */
2615	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_SF(AF_INET6, LLE_EXCLUSIVE), dst);
2616	if (ln_tmp != NULL)
2617		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
2618	lltable_link_entry(LLTABLE6(ifp), ln);
2619	IF_AFDATA_WUNLOCK(ifp);
2620
2621	if (ln_tmp != NULL)
2622		EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
2623	EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
2624
2625	LLE_WUNLOCK(ln);
2626	if (ln_tmp != NULL)
2627		llentry_free(ln_tmp);
2628
2629	return (0);
2630}
2631
2632/*
2633 * Removes either all lle entries for given @ia, or lle
2634 * corresponding to @ia address.
2635 */
2636void
2637nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
2638{
2639	struct sockaddr_in6 mask, addr;
2640	struct sockaddr *saddr, *smask;
2641	struct ifnet *ifp;
2642
2643	ifp = ia->ia_ifa.ifa_ifp;
2644	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
2645	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
2646	saddr = (struct sockaddr *)&addr;
2647	smask = (struct sockaddr *)&mask;
2648
2649	if (all != 0)
2650		lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
2651	else
2652		lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
2653}
2654
2655static int
2656nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2657{
2658	struct in6_prefix p;
2659	struct sockaddr_in6 s6;
2660	struct nd_prefix *pr;
2661	struct nd_pfxrouter *pfr;
2662	time_t maxexpire;
2663	int error;
2664	char ip6buf[INET6_ADDRSTRLEN];
2665
2666	if (req->newptr)
2667		return (EPERM);
2668
2669	error = sysctl_wire_old_buffer(req, 0);
2670	if (error != 0)
2671		return (error);
2672
2673	bzero(&p, sizeof(p));
2674	p.origin = PR_ORIG_RA;
2675	bzero(&s6, sizeof(s6));
2676	s6.sin6_family = AF_INET6;
2677	s6.sin6_len = sizeof(s6);
2678
2679	ND6_RLOCK();
2680	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
2681		if (!pr->ndpr_raf_ra_derived)
2682			continue;
2683		p.prefix = pr->ndpr_prefix;
2684		if (sa6_recoverscope(&p.prefix)) {
2685			log(LOG_ERR, "scope error in prefix list (%s)\n",
2686			    ip6_sprintf(ip6buf, &p.prefix.sin6_addr));
2687			/* XXX: press on... */
2688		}
2689		p.raflags = pr->ndpr_raf;
2690		p.prefixlen = pr->ndpr_plen;
2691		p.vltime = pr->ndpr_vltime;
2692		p.pltime = pr->ndpr_pltime;
2693		p.if_index = pr->ndpr_ifp->if_index;
2694		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2695			p.expire = 0;
2696		else {
2697			/* XXX: we assume time_t is signed. */
2698			maxexpire = (-1) &
2699			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
2700			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
2701				p.expire = pr->ndpr_lastupdate +
2702				    pr->ndpr_vltime +
2703				    (time_second - time_uptime);
2704			else
2705				p.expire = maxexpire;
2706		}
2707		p.refcnt = pr->ndpr_addrcnt;
2708		p.flags = pr->ndpr_stateflags;
2709		p.advrtrs = 0;
2710		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
2711			p.advrtrs++;
2712		error = SYSCTL_OUT(req, &p, sizeof(p));
2713		if (error != 0)
2714			break;
2715		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
2716			s6.sin6_addr = pfr->router->rtaddr;
2717			if (sa6_recoverscope(&s6))
2718				log(LOG_ERR,
2719				    "scope error in prefix list (%s)\n",
2720				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
2721			error = SYSCTL_OUT(req, &s6, sizeof(s6));
2722			if (error != 0)
2723				goto out;
2724		}
2725	}
2726out:
2727	ND6_RUNLOCK();
2728	return (error);
2729}
2730SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2731	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
2732	NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
2733	"NDP prefix list");
2734SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
2735	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
2736SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
2737	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
2738