1/*	$OpenBSD: nd6.c,v 1.280 2023/05/13 16:27:59 bluhm Exp $	*/
2/*	$KAME: nd6.c,v 1.280 2002/06/08 19:52:07 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/timeout.h>
36#include <sys/malloc.h>
37#include <sys/mbuf.h>
38#include <sys/socket.h>
39#include <sys/sockio.h>
40#include <sys/time.h>
41#include <sys/kernel.h>
42#include <sys/pool.h>
43#include <sys/errno.h>
44#include <sys/ioctl.h>
45#include <sys/syslog.h>
46#include <sys/queue.h>
47#include <sys/stdint.h>
48#include <sys/task.h>
49
50#include <net/if.h>
51#include <net/if_dl.h>
52#include <net/if_types.h>
53#include <net/route.h>
54
55#include <netinet/in.h>
56#include <netinet/if_ether.h>
57#include <netinet/ip_ipsp.h>
58
59#include <netinet6/in6_var.h>
60#include <netinet/ip6.h>
61#include <netinet6/ip6_var.h>
62#include <netinet6/nd6.h>
63#include <netinet/icmp6.h>
64
65/*
66 * Locks used to protect struct members in this file:
67 *	a	atomic operations
68 *	I	immutable after creation
69 *	K	kernel lock
70 *	m	nd6 mutex, needed when net lock is shared
71 *	N	net lock
72 */
73
74#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
75#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
76
77/* timer values */
78int	nd6_timer_next	= -1;	/* at which uptime nd6_timer runs */
79time_t	nd6_expire_next	= -1;	/* at which uptime nd6_expire runs */
80int	nd6_delay	= 5;	/* delay first probe time 5 second */
81int	nd6_umaxtries	= 3;	/* maximum unicast query */
82int	nd6_mmaxtries	= 3;	/* maximum multicast query */
83int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
84
85/* preventing too many loops in ND option parsing */
86int nd6_maxndopt = 10;	/* max # of ND options allowed */
87
88int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
89
90#ifdef ND6_DEBUG
91int nd6_debug = 1;
92#else
93int nd6_debug = 0;
94#endif
95
96/* llinfo_nd6 live time, rt_llinfo and RTF_LLINFO are protected by nd6_mtx */
97struct mutex nd6_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
98
99TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list =
100    TAILQ_HEAD_INITIALIZER(nd6_list);	/* [mN] list of llinfo_nd6 structures */
101struct	pool nd6_pool;		/* [I] pool for llinfo_nd6 structures */
102int	nd6_inuse;		/* [m] limit neigbor discovery routes */
103unsigned int	ln_hold_total;	/* [a] packets currently in the nd6 queue */
104
105void nd6_timer(void *);
106void nd6_slowtimo(void *);
107void nd6_expire(void *);
108void nd6_expire_timer(void *);
109void nd6_invalidate(struct rtentry *);
110void nd6_free(struct rtentry *);
111int nd6_llinfo_timer(struct rtentry *);
112
113struct timeout nd6_timer_to;
114struct timeout nd6_slowtimo_ch;
115struct timeout nd6_expire_timeout;
116struct task nd6_expire_task;
117
118void
119nd6_init(void)
120{
121	pool_init(&nd6_pool, sizeof(struct llinfo_nd6), 0,
122	    IPL_SOFTNET, 0, "nd6", NULL);
123
124	task_set(&nd6_expire_task, nd6_expire, NULL);
125
126	/* start timer */
127	timeout_set_proc(&nd6_timer_to, nd6_timer, NULL);
128	timeout_set_proc(&nd6_slowtimo_ch, nd6_slowtimo, NULL);
129	timeout_add_sec(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL);
130	timeout_set(&nd6_expire_timeout, nd6_expire_timer, NULL);
131}
132
133void
134nd6_ifattach(struct ifnet *ifp)
135{
136	struct nd_ifinfo *nd;
137
138	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
139
140	nd->reachable = ND_COMPUTE_RTIME(REACHABLE_TIME);
141
142	ifp->if_nd = nd;
143}
144
145void
146nd6_ifdetach(struct ifnet *ifp)
147{
148	struct nd_ifinfo *nd = ifp->if_nd;
149
150	free(nd, M_IP6NDP, sizeof(*nd));
151}
152
153/*
154 * Parse multiple ND options.
155 * This function is much easier to use, for ND routines that do not need
156 * multiple options of the same type.
157 */
158int
159nd6_options(void *opt, int icmp6len, struct nd_opts *ndopts)
160{
161	struct nd_opt_hdr *nd_opt, *next_opt, *last_opt;
162	int i = 0;
163
164	bzero(ndopts, sizeof(*ndopts));
165
166	if (icmp6len == 0)
167		return 0;
168
169	next_opt = opt;
170	last_opt = (struct nd_opt_hdr *)((u_char *)opt + icmp6len);
171
172	while (next_opt != NULL) {
173		int olen;
174
175		nd_opt = next_opt;
176
177		/* make sure nd_opt_len is inside the buffer */
178		if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)last_opt)
179			goto invalid;
180
181		/* every option must have a length greater than zero */
182		olen = nd_opt->nd_opt_len << 3;
183		if (olen == 0)
184			goto invalid;
185
186		next_opt = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
187		if (next_opt > last_opt) {
188			/* option overruns the end of buffer */
189			goto invalid;
190		} else if (next_opt == last_opt) {
191			/* reached the end of options chain */
192			next_opt = NULL;
193		}
194
195		switch (nd_opt->nd_opt_type) {
196		case ND_OPT_SOURCE_LINKADDR:
197			if (ndopts->nd_opts_src_lladdr != NULL)
198				nd6log((LOG_INFO, "duplicated ND6 option found "
199				    "(type=%d)\n", nd_opt->nd_opt_type));
200			else
201				ndopts->nd_opts_src_lladdr = nd_opt;
202			break;
203		case ND_OPT_TARGET_LINKADDR:
204			if (ndopts->nd_opts_tgt_lladdr != NULL)
205				nd6log((LOG_INFO, "duplicated ND6 option found "
206				    "(type=%d)\n", nd_opt->nd_opt_type));
207			else
208				ndopts->nd_opts_tgt_lladdr = nd_opt;
209			break;
210		case ND_OPT_MTU:
211		case ND_OPT_REDIRECTED_HEADER:
212		case ND_OPT_PREFIX_INFORMATION:
213		case ND_OPT_DNSSL:
214		case ND_OPT_RDNSS:
215			/* Don't warn, not used by kernel */
216			break;
217		default:
218			/*
219			 * Unknown options must be silently ignored,
220			 * to accommodate future extension to the protocol.
221			 */
222			nd6log((LOG_DEBUG,
223			    "nd6_options: unsupported option %d - "
224			    "option ignored\n", nd_opt->nd_opt_type));
225			break;
226		}
227
228		i++;
229		if (i > nd6_maxndopt) {
230			icmp6stat_inc(icp6s_nd_toomanyopt);
231			nd6log((LOG_INFO, "too many loop in nd opt\n"));
232			break;
233		}
234	}
235
236	return 0;
237
238invalid:
239	bzero(ndopts, sizeof(*ndopts));
240	icmp6stat_inc(icp6s_nd_badopt);
241	return -1;
242}
243
244/*
245 * ND6 timer routine to handle ND6 entries
246 */
247void
248nd6_llinfo_settimer(const struct llinfo_nd6 *ln, unsigned int secs)
249{
250	time_t expire = getuptime() + secs;
251
252	NET_ASSERT_LOCKED();
253	KASSERT(!ISSET(ln->ln_rt->rt_flags, RTF_LOCAL));
254
255	ln->ln_rt->rt_expire = expire;
256	if (!timeout_pending(&nd6_timer_to) || expire < nd6_timer_next) {
257		nd6_timer_next = expire;
258		timeout_add_sec(&nd6_timer_to, secs);
259	}
260}
261
262void
263nd6_timer(void *unused)
264{
265	struct llinfo_nd6 *ln, *nln;
266	time_t uptime, expire;
267	int secs;
268
269	NET_LOCK();
270
271	uptime = getuptime();
272	expire = uptime + nd6_gctimer;
273
274	/* Net lock is exclusive, no nd6 mutex needed for nd6_list here. */
275	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
276		struct rtentry *rt = ln->ln_rt;
277
278		if (rt->rt_expire && rt->rt_expire <= uptime)
279			if (nd6_llinfo_timer(rt))
280				continue;
281
282		if (rt->rt_expire && rt->rt_expire < expire)
283			expire = rt->rt_expire;
284	}
285
286	secs = expire - uptime;
287	if (secs < 0)
288		secs = 0;
289	if (!TAILQ_EMPTY(&nd6_list)) {
290		nd6_timer_next = uptime + secs;
291		timeout_add_sec(&nd6_timer_to, secs);
292	}
293
294	NET_UNLOCK();
295}
296
297/*
298 * ND timer state handling.
299 *
300 * Returns 1 if `rt' should no longer be used, 0 otherwise.
301 */
302int
303nd6_llinfo_timer(struct rtentry *rt)
304{
305	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
306	struct sockaddr_in6 *dst = satosin6(rt_key(rt));
307	struct ifnet *ifp;
308
309	NET_ASSERT_LOCKED_EXCLUSIVE();
310
311	if ((ifp = if_get(rt->rt_ifidx)) == NULL)
312		return 1;
313
314	switch (ln->ln_state) {
315	case ND6_LLINFO_INCOMPLETE:
316		if (ln->ln_asked < nd6_mmaxtries) {
317			ln->ln_asked++;
318			nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000);
319			nd6_ns_output(ifp, NULL, &dst->sin6_addr,
320			    &ln->ln_saddr6, 0);
321		} else {
322			struct mbuf_list ml;
323			struct mbuf *m;
324			unsigned int len;
325
326			mq_delist(&ln->ln_mq, &ml);
327			len = ml_len(&ml);
328			while ((m = ml_dequeue(&ml)) != NULL) {
329				/*
330				 * Fake rcvif to make the ICMP error
331				 * more helpful in diagnosing for the
332				 * receiver.
333				 * XXX: should we consider older rcvif?
334				 */
335				m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
336
337				icmp6_error(m, ICMP6_DST_UNREACH,
338				    ICMP6_DST_UNREACH_ADDR, 0);
339			}
340
341			/* XXXSMP we also discard if other CPU enqueues */
342			if (mq_len(&ln->ln_mq) > 0) {
343				/* mbuf is back in queue. Discard. */
344				atomic_sub_int(&ln_hold_total,
345				    len + mq_purge(&ln->ln_mq));
346			} else
347				atomic_sub_int(&ln_hold_total, len);
348
349			nd6_free(rt);
350			ln = NULL;
351		}
352		break;
353
354	case ND6_LLINFO_REACHABLE:
355		if (!ND6_LLINFO_PERMANENT(ln)) {
356			ln->ln_state = ND6_LLINFO_STALE;
357			nd6_llinfo_settimer(ln, nd6_gctimer);
358		}
359		break;
360
361	case ND6_LLINFO_STALE:
362	case ND6_LLINFO_PURGE:
363		/* Garbage Collection(RFC 2461 5.3) */
364		if (!ND6_LLINFO_PERMANENT(ln)) {
365			nd6_free(rt);
366			ln = NULL;
367		}
368		break;
369
370	case ND6_LLINFO_DELAY:
371		/* We need NUD */
372		ln->ln_asked = 1;
373		ln->ln_state = ND6_LLINFO_PROBE;
374		nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000);
375		nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr,
376		    &ln->ln_saddr6, 0);
377		break;
378
379	case ND6_LLINFO_PROBE:
380		if (ln->ln_asked < nd6_umaxtries) {
381			ln->ln_asked++;
382			nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000);
383			nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr,
384			    &ln->ln_saddr6, 0);
385		} else {
386			nd6_free(rt);
387			ln = NULL;
388		}
389		break;
390	}
391
392	if_put(ifp);
393
394	return (ln == NULL);
395}
396
397void
398nd6_expire_timer_update(struct in6_ifaddr *ia6)
399{
400	time_t expire_time = INT64_MAX;
401
402	if (ia6->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME)
403		expire_time = ia6->ia6_lifetime.ia6t_expire;
404
405	if (!(ia6->ia6_flags & IN6_IFF_DEPRECATED) &&
406	    ia6->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME &&
407	    expire_time > ia6->ia6_lifetime.ia6t_preferred)
408		expire_time = ia6->ia6_lifetime.ia6t_preferred;
409
410	if (expire_time == INT64_MAX)
411		return;
412
413	/*
414	 * IFA6_IS_INVALID() and IFA6_IS_DEPRECATED() check for uptime
415	 * greater than ia6t_expire or ia6t_preferred, not greater or equal.
416	 * Schedule timeout one second later so that either IFA6_IS_INVALID()
417	 * or IFA6_IS_DEPRECATED() is true.
418	 */
419	expire_time++;
420
421	if (!timeout_pending(&nd6_expire_timeout) ||
422	    nd6_expire_next > expire_time) {
423		int secs;
424
425		secs = expire_time - getuptime();
426		if (secs < 0)
427			secs = 0;
428
429		timeout_add_sec(&nd6_expire_timeout, secs);
430		nd6_expire_next = expire_time;
431	}
432}
433
434/*
435 * Expire interface addresses.
436 */
437void
438nd6_expire(void *unused)
439{
440	struct ifnet *ifp;
441
442	NET_LOCK();
443
444	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
445		struct ifaddr *ifa, *nifa;
446		struct in6_ifaddr *ia6;
447
448		TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrlist, ifa_list, nifa) {
449			if (ifa->ifa_addr->sa_family != AF_INET6)
450				continue;
451			ia6 = ifatoia6(ifa);
452			/* check address lifetime */
453			if (IFA6_IS_INVALID(ia6)) {
454				in6_purgeaddr(&ia6->ia_ifa);
455			} else {
456				if (IFA6_IS_DEPRECATED(ia6))
457					ia6->ia6_flags |= IN6_IFF_DEPRECATED;
458				nd6_expire_timer_update(ia6);
459			}
460		}
461	}
462
463	NET_UNLOCK();
464}
465
466void
467nd6_expire_timer(void *unused)
468{
469	task_add(net_tq(0), &nd6_expire_task);
470}
471
472/*
473 * Nuke neighbor cache/prefix/default router management table, right before
474 * ifp goes away.
475 */
476void
477nd6_purge(struct ifnet *ifp)
478{
479	struct llinfo_nd6 *ln, *nln;
480
481	NET_ASSERT_LOCKED_EXCLUSIVE();
482
483	/*
484	 * Nuke neighbor cache entries for the ifp.
485	 */
486	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
487		struct rtentry *rt;
488		struct sockaddr_dl *sdl;
489
490		rt = ln->ln_rt;
491		if (rt != NULL && rt->rt_gateway != NULL &&
492		    rt->rt_gateway->sa_family == AF_LINK) {
493			sdl = satosdl(rt->rt_gateway);
494			if (sdl->sdl_index == ifp->if_index)
495				nd6_free(rt);
496		}
497	}
498}
499
500struct rtentry *
501nd6_lookup(const struct in6_addr *addr6, int create, struct ifnet *ifp,
502    u_int rtableid)
503{
504	struct rtentry *rt;
505	struct sockaddr_in6 sin6;
506	int flags;
507
508	bzero(&sin6, sizeof(sin6));
509	sin6.sin6_len = sizeof(struct sockaddr_in6);
510	sin6.sin6_family = AF_INET6;
511	sin6.sin6_addr = *addr6;
512	flags = (create) ? RT_RESOLVE : 0;
513
514	rt = rtalloc(sin6tosa(&sin6), flags, rtableid);
515	if (rt != NULL && (rt->rt_flags & RTF_LLINFO) == 0) {
516		/*
517		 * This is the case for the default route.
518		 * If we want to create a neighbor cache for the address, we
519		 * should free the route for the destination and allocate an
520		 * interface route.
521		 */
522		if (create) {
523			rtfree(rt);
524			rt = NULL;
525		}
526	}
527	if (rt == NULL) {
528		if (create && ifp) {
529			struct rt_addrinfo info;
530			struct llinfo_nd6 *ln;
531			struct ifaddr *ifa;
532			int error;
533
534			/*
535			 * If no route is available and create is set,
536			 * we allocate a host route for the destination
537			 * and treat it like an interface route.
538			 * This hack is necessary for a neighbor which can't
539			 * be covered by our own prefix.
540			 */
541			ifa = ifaof_ifpforaddr(sin6tosa(&sin6), ifp);
542			if (ifa == NULL)
543				return (NULL);
544
545			/*
546			 * Create a new route.  RTF_LLINFO is necessary
547			 * to create a Neighbor Cache entry for the
548			 * destination in nd6_rtrequest which will be
549			 * called in rtrequest.
550			 */
551			bzero(&info, sizeof(info));
552			info.rti_ifa = ifa;
553			info.rti_flags = RTF_HOST | RTF_LLINFO;
554			info.rti_info[RTAX_DST] = sin6tosa(&sin6);
555			info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
556			error = rtrequest(RTM_ADD, &info, RTP_CONNECTED, &rt,
557			    rtableid);
558			if (error)
559				return (NULL);
560			mtx_enter(&nd6_mtx);
561			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
562			if (ln != NULL)
563				ln->ln_state = ND6_LLINFO_NOSTATE;
564			mtx_leave(&nd6_mtx);
565		} else
566			return (NULL);
567	}
568	/*
569	 * Validation for the entry.
570	 * Note that the check for rt_llinfo is necessary because a cloned
571	 * route from a parent route that has the L flag (e.g. the default
572	 * route to a p2p interface) may have the flag, too, while the
573	 * destination is not actually a neighbor.
574	 */
575	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
576	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
577	    (ifp != NULL && rt->rt_ifidx != ifp->if_index)) {
578		if (create) {
579			char addr[INET6_ADDRSTRLEN];
580			nd6log((LOG_DEBUG, "%s: failed to lookup %s (if=%s)\n",
581			    __func__,
582			    inet_ntop(AF_INET6, addr6, addr, sizeof(addr)),
583			    ifp ? ifp->if_xname : "unspec"));
584		}
585		rtfree(rt);
586		return (NULL);
587	}
588	return (rt);
589}
590
591/*
592 * Detect if a given IPv6 address identifies a neighbor on a given link.
593 * XXX: should take care of the destination of a p2p link?
594 */
595int
596nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
597{
598	struct in6_ifaddr *ia6;
599	struct ifaddr *ifa;
600	struct rtentry *rt;
601
602	/*
603	 * A link-local address is always a neighbor.
604	 * XXX: we should use the sin6_scope_id field rather than the embedded
605	 * interface index.
606	 * XXX: a link does not necessarily specify a single interface.
607	 */
608	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
609	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
610		return (1);
611
612	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
613		if (ifa->ifa_addr->sa_family != AF_INET6)
614			continue;
615
616		ia6 = ifatoia6(ifa);
617
618		/* Prefix check down below. */
619		if (ia6->ia6_flags & IN6_IFF_AUTOCONF)
620			continue;
621
622		if (IN6_ARE_MASKED_ADDR_EQUAL(&addr->sin6_addr,
623		    &ia6->ia_addr.sin6_addr,
624		    &ia6->ia_prefixmask.sin6_addr))
625			return (1);
626	}
627
628	/*
629	 * Even if the address matches none of our addresses, it might be
630	 * in the neighbor cache.
631	 */
632	rt = nd6_lookup(&addr->sin6_addr, 0, ifp, ifp->if_rdomain);
633	if (rt != NULL) {
634		rtfree(rt);
635		return (1);
636	}
637
638	return (0);
639}
640
641void
642nd6_invalidate(struct rtentry *rt)
643{
644	struct llinfo_nd6 *ln;
645	struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
646
647	mtx_enter(&nd6_mtx);
648	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
649	if (ln == NULL) {
650		mtx_leave(&nd6_mtx);
651		return;
652	}
653	atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
654	sdl->sdl_alen = 0;
655	ln->ln_state = ND6_LLINFO_INCOMPLETE;
656	ln->ln_asked = 0;
657	mtx_leave(&nd6_mtx);
658}
659
660/*
661 * Free an nd6 llinfo entry.
662 */
663void
664nd6_free(struct rtentry *rt)
665{
666	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
667	struct in6_addr in6 = satosin6(rt_key(rt))->sin6_addr;
668	struct ifnet *ifp;
669
670	NET_ASSERT_LOCKED_EXCLUSIVE();
671
672	ifp = if_get(rt->rt_ifidx);
673
674	if (!ip6_forwarding) {
675		if (ln->ln_router) {
676			/*
677			 * rt6_flush must be called whether or not the neighbor
678			 * is in the Default Router List.
679			 * See a corresponding comment in nd6_na_input().
680			 */
681			rt6_flush(&in6, ifp);
682		}
683	}
684
685	KASSERT(!ISSET(rt->rt_flags, RTF_LOCAL));
686	nd6_invalidate(rt);
687
688	/*
689	 * Detach the route from the routing tree and the list of neighbor
690	 * caches, and disable the route entry not to be used in already
691	 * cached routes.
692	 */
693	if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
694		rtdeletemsg(rt, ifp, ifp->if_rdomain);
695
696	if_put(ifp);
697}
698
699/*
700 * Upper-layer reachability hint for Neighbor Unreachability Detection.
701 *
702 * XXX cost-effective methods?
703 */
704void
705nd6_nud_hint(struct rtentry *rt)
706{
707	struct llinfo_nd6 *ln;
708	struct ifnet *ifp;
709
710	NET_ASSERT_LOCKED_EXCLUSIVE();
711
712	ifp = if_get(rt->rt_ifidx);
713	if (ifp == NULL)
714		return;
715
716	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
717	    (rt->rt_flags & RTF_LLINFO) == 0 ||
718	    rt->rt_llinfo == NULL || rt->rt_gateway == NULL ||
719	    rt->rt_gateway->sa_family != AF_LINK) {
720		/* This is not a host route. */
721		goto out;
722	}
723
724	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
725	if (ln->ln_state < ND6_LLINFO_REACHABLE)
726		goto out;
727
728	/*
729	 * if we get upper-layer reachability confirmation many times,
730	 * it is possible we have false information.
731	 */
732	ln->ln_byhint++;
733	if (ln->ln_byhint > nd6_maxnudhint)
734		goto out;
735
736	ln->ln_state = ND6_LLINFO_REACHABLE;
737	if (!ND6_LLINFO_PERMANENT(ln))
738		nd6_llinfo_settimer(ln, ifp->if_nd->reachable);
739out:
740	if_put(ifp);
741}
742
743void
744nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
745{
746	struct sockaddr *gate = rt->rt_gateway;
747	struct llinfo_nd6 *ln;
748	struct ifaddr *ifa;
749	struct in6_ifaddr *ifa6;
750
751	if (ISSET(rt->rt_flags, RTF_GATEWAY|RTF_MULTICAST|RTF_MPLS))
752		return;
753
754	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
755		/*
756		 * This is probably an interface direct route for a link
757		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
758		 * We do not need special treatment below for such a route.
759		 * Moreover, the RTF_LLINFO flag which would be set below
760		 * would annoy the ndp(8) command.
761		 */
762		return;
763	}
764
765	if (req == RTM_RESOLVE && nd6_need_cache(ifp) == 0) {
766		/*
767		 * For routing daemons like ospf6d we allow neighbor discovery
768		 * based on the cloning route only.  This allows us to send
769		 * packets directly into a network without having an address
770		 * with matching prefix on the interface.  If the cloning
771		 * route is used for an 6to4 interface, we would mistakenly
772		 * make a neighbor cache for the host route, and would see
773		 * strange neighbor solicitation for the corresponding
774		 * destination.  In order to avoid confusion, we check if the
775		 * interface is suitable for neighbor discovery, and stop the
776		 * process if not.  Additionally, we remove the LLINFO flag
777		 * so that ndp(8) will not try to get the neighbor information
778		 * of the destination.
779		 */
780		rt->rt_flags &= ~RTF_LLINFO;
781		return;
782	}
783
784	switch (req) {
785	case RTM_ADD:
786		if (rt->rt_flags & RTF_CLONING) {
787			rt->rt_expire = 0;
788			break;
789		}
790		if ((rt->rt_flags & RTF_LOCAL) && rt->rt_llinfo == NULL)
791			rt->rt_expire = 0;
792		/* FALLTHROUGH */
793	case RTM_RESOLVE:
794		if (gate->sa_family != AF_LINK ||
795		    gate->sa_len < sizeof(struct sockaddr_dl)) {
796			log(LOG_DEBUG, "%s: bad gateway value: %s\n",
797			    __func__, ifp->if_xname);
798			break;
799		}
800		satosdl(gate)->sdl_type = ifp->if_type;
801		satosdl(gate)->sdl_index = ifp->if_index;
802		/*
803		 * Case 2: This route may come from cloning, or a manual route
804		 * add with a LL address.
805		 */
806		ln = pool_get(&nd6_pool, PR_NOWAIT | PR_ZERO);
807		if (ln == NULL) {
808			log(LOG_DEBUG, "%s: pool get failed\n", __func__);
809			break;
810		}
811
812		mtx_enter(&nd6_mtx);
813		if (rt->rt_llinfo != NULL) {
814			/* we lost the race, another thread has entered it */
815			mtx_leave(&nd6_mtx);
816			pool_put(&nd6_pool, ln);
817			break;
818		}
819		nd6_inuse++;
820		mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
821		rt->rt_llinfo = (caddr_t)ln;
822		ln->ln_rt = rt;
823		rt->rt_flags |= RTF_LLINFO;
824		TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
825		/* this is required for "ndp" command. - shin */
826		if (req == RTM_ADD) {
827			/*
828			 * gate should have some valid AF_LINK entry,
829			 * and ln expire should have some lifetime
830			 * which is specified by ndp command.
831			 */
832			ln->ln_state = ND6_LLINFO_REACHABLE;
833			ln->ln_byhint = 0;
834		} else {
835			/*
836			 * When req == RTM_RESOLVE, rt is created and
837			 * initialized in rtrequest(), so rt_expire is 0.
838			 */
839			ln->ln_state = ND6_LLINFO_NOSTATE;
840			nd6_llinfo_settimer(ln, 0);
841		}
842
843		/*
844		 * If we have too many cache entries, initiate immediate
845		 * purging for some "less recently used" entries.  Note that
846		 * we cannot directly call nd6_free() here because it would
847		 * cause re-entering rtable related routines triggering
848		 * lock-order-reversal problems.
849		 */
850		if (ip6_neighborgcthresh >= 0 &&
851		    nd6_inuse >= ip6_neighborgcthresh) {
852			int i;
853
854			for (i = 0; i < 10; i++) {
855				struct llinfo_nd6 *ln_end;
856
857				ln_end = TAILQ_LAST(&nd6_list, llinfo_nd6_head);
858				if (ln_end == ln)
859					break;
860
861				/* Move this entry to the head */
862				TAILQ_REMOVE(&nd6_list, ln_end, ln_list);
863				TAILQ_INSERT_HEAD(&nd6_list, ln_end, ln_list);
864
865				if (ND6_LLINFO_PERMANENT(ln_end))
866					continue;
867
868				if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
869					ln_end->ln_state = ND6_LLINFO_STALE;
870				else
871					ln_end->ln_state = ND6_LLINFO_PURGE;
872				nd6_llinfo_settimer(ln_end, 0);
873			}
874		}
875
876		/*
877		 * check if rt_key(rt) is one of my address assigned
878		 * to the interface.
879		 */
880		ifa6 = in6ifa_ifpwithaddr(ifp,
881		    &satosin6(rt_key(rt))->sin6_addr);
882		ifa = ifa6 ? &ifa6->ia_ifa : NULL;
883		if (ifa != NULL ||
884		    (rt->rt_flags & RTF_ANNOUNCE)) {
885			ln->ln_state = ND6_LLINFO_REACHABLE;
886			ln->ln_byhint = 0;
887			rt->rt_expire = 0;
888		}
889		mtx_leave(&nd6_mtx);
890
891		/* join solicited node multicast for proxy ND */
892		if (ifa == NULL &&
893		    (rt->rt_flags & RTF_ANNOUNCE) &&
894		    (ifp->if_flags & IFF_MULTICAST)) {
895			struct in6_addr llsol;
896			int error;
897
898			llsol = satosin6(rt_key(rt))->sin6_addr;
899			llsol.s6_addr16[0] = htons(0xff02);
900			llsol.s6_addr16[1] = htons(ifp->if_index);
901			llsol.s6_addr32[1] = 0;
902			llsol.s6_addr32[2] = htonl(1);
903			llsol.s6_addr8[12] = 0xff;
904
905			KERNEL_LOCK();
906			if (in6_addmulti(&llsol, ifp, &error)) {
907				char addr[INET6_ADDRSTRLEN];
908				nd6log((LOG_ERR, "%s: failed to join "
909				    "%s (errno=%d)\n", ifp->if_xname,
910				    inet_ntop(AF_INET6, &llsol,
911					addr, sizeof(addr)),
912				    error));
913			}
914			KERNEL_UNLOCK();
915		}
916		break;
917
918	case RTM_DELETE:
919		mtx_enter(&nd6_mtx);
920		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
921		if (ln == NULL) {
922			/* we lost the race, another thread has removed it */
923			mtx_leave(&nd6_mtx);
924			break;
925		}
926		nd6_inuse--;
927		TAILQ_REMOVE(&nd6_list, ln, ln_list);
928		rt->rt_expire = 0;
929		rt->rt_llinfo = NULL;
930		rt->rt_flags &= ~RTF_LLINFO;
931		atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
932		mtx_leave(&nd6_mtx);
933
934		pool_put(&nd6_pool, ln);
935
936		/* leave from solicited node multicast for proxy ND */
937		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
938		    (ifp->if_flags & IFF_MULTICAST) != 0) {
939			struct in6_addr llsol;
940			struct in6_multi *in6m;
941
942			llsol = satosin6(rt_key(rt))->sin6_addr;
943			llsol.s6_addr16[0] = htons(0xff02);
944			llsol.s6_addr16[1] = htons(ifp->if_index);
945			llsol.s6_addr32[1] = 0;
946			llsol.s6_addr32[2] = htonl(1);
947			llsol.s6_addr8[12] = 0xff;
948
949			KERNEL_LOCK();
950			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
951			if (in6m)
952				in6_delmulti(in6m);
953			KERNEL_UNLOCK();
954		}
955		break;
956
957	case RTM_INVALIDATE:
958		if (!ISSET(rt->rt_flags, RTF_LOCAL))
959			nd6_invalidate(rt);
960		break;
961	}
962}
963
964int
965nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
966{
967	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
968	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
969	struct rtentry *rt;
970
971	switch (cmd) {
972	case SIOCGIFINFO_IN6:
973		NET_LOCK_SHARED();
974		ndi->ndi = *ifp->if_nd;
975		NET_UNLOCK_SHARED();
976		return (0);
977	case SIOCGNBRINFO_IN6:
978	{
979		struct llinfo_nd6 *ln;
980		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
981		time_t expire;
982
983		NET_LOCK_SHARED();
984		/*
985		 * XXX: KAME specific hack for scoped addresses
986		 *      XXXX: for other scopes than link-local?
987		 */
988		if (IN6_IS_ADDR_LINKLOCAL(&nb_addr) ||
989		    IN6_IS_ADDR_MC_LINKLOCAL(&nb_addr)) {
990			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
991
992			if (*idp == 0)
993				*idp = htons(ifp->if_index);
994		}
995
996		rt = nd6_lookup(&nb_addr, 0, ifp, ifp->if_rdomain);
997		mtx_enter(&nd6_mtx);
998		if (rt == NULL ||
999		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
1000			mtx_leave(&nd6_mtx);
1001			rtfree(rt);
1002			NET_UNLOCK_SHARED();
1003			return (EINVAL);
1004		}
1005		expire = ln->ln_rt->rt_expire;
1006		if (expire != 0) {
1007			expire -= getuptime();
1008			expire += gettime();
1009		}
1010
1011		nbi->state = ln->ln_state;
1012		nbi->asked = ln->ln_asked;
1013		nbi->isrouter = ln->ln_router;
1014		nbi->expire = expire;
1015		mtx_leave(&nd6_mtx);
1016
1017		rtfree(rt);
1018		NET_UNLOCK_SHARED();
1019		return (0);
1020	}
1021	}
1022	return (0);
1023}
1024
1025/*
1026 * Create neighbor cache entry and cache link-layer address,
1027 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1028 *
1029 * type - ICMP6 type
1030 * code - type dependent information
1031 */
1032void
1033nd6_cache_lladdr(struct ifnet *ifp, const struct in6_addr *from, char *lladdr,
1034    int lladdrlen, int type, int code)
1035{
1036	struct rtentry *rt;
1037	struct llinfo_nd6 *ln;
1038	int is_newentry;
1039	struct sockaddr_dl *sdl;
1040	int do_update;
1041	int olladdr;
1042	int llchange;
1043	int newstate = 0;
1044
1045	NET_ASSERT_LOCKED_EXCLUSIVE();
1046
1047	if (!ifp)
1048		panic("%s: ifp == NULL", __func__);
1049	if (!from)
1050		panic("%s: from == NULL", __func__);
1051
1052	/* nothing must be updated for unspecified address */
1053	if (IN6_IS_ADDR_UNSPECIFIED(from))
1054		return;
1055
1056	/*
1057	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1058	 * the caller.
1059	 *
1060	 * XXX If the link does not have link-layer address, what should
1061	 * we do? (ifp->if_addrlen == 0)
1062	 * Spec says nothing in sections for RA, RS and NA.  There's small
1063	 * description on it in NS section (RFC 2461 7.2.3).
1064	 */
1065
1066	rt = nd6_lookup(from, 0, ifp, ifp->if_rdomain);
1067	if (rt == NULL) {
1068		rt = nd6_lookup(from, 1, ifp, ifp->if_rdomain);
1069		is_newentry = 1;
1070	} else {
1071		/* do not overwrite local or static entry */
1072		if (ISSET(rt->rt_flags, RTF_STATIC|RTF_LOCAL)) {
1073			rtfree(rt);
1074			return;
1075		}
1076		is_newentry = 0;
1077	}
1078
1079	if (!rt)
1080		return;
1081	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1082fail:
1083		nd6_free(rt);
1084		rtfree(rt);
1085		return;
1086	}
1087	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1088	if (ln == NULL)
1089		goto fail;
1090	if (rt->rt_gateway == NULL)
1091		goto fail;
1092	if (rt->rt_gateway->sa_family != AF_LINK)
1093		goto fail;
1094	sdl = satosdl(rt->rt_gateway);
1095
1096	olladdr = (sdl->sdl_alen) ? 1 : 0;
1097	if (olladdr && lladdr) {
1098		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1099			llchange = 1;
1100		else
1101			llchange = 0;
1102	} else
1103		llchange = 0;
1104
1105	/*
1106	 * newentry olladdr  lladdr  llchange	(*=record)
1107	 *	0	n	n	--	(1)
1108	 *	0	y	n	--	(2)
1109	 *	0	n	y	--	(3) * STALE
1110	 *	0	y	y	n	(4) *
1111	 *	0	y	y	y	(5) * STALE
1112	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1113	 *	1	--	y	--	(7) * STALE
1114	 */
1115
1116	if (llchange) {
1117		char addr[INET6_ADDRSTRLEN];
1118		log(LOG_INFO, "ndp info overwritten for %s by %s on %s\n",
1119		    inet_ntop(AF_INET6, from, addr, sizeof(addr)),
1120		    ether_sprintf(lladdr), ifp->if_xname);
1121	}
1122	if (lladdr) {		/* (3-5) and (7) */
1123		/*
1124		 * Record source link-layer address
1125		 * XXX is it dependent to ifp->if_type?
1126		 */
1127		sdl->sdl_alen = ifp->if_addrlen;
1128		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1129	}
1130
1131	if (!is_newentry) {
1132		if ((!olladdr && lladdr) ||		/* (3) */
1133		    (olladdr && lladdr && llchange)) {	/* (5) */
1134			do_update = 1;
1135			newstate = ND6_LLINFO_STALE;
1136		} else					/* (1-2,4) */
1137			do_update = 0;
1138	} else {
1139		do_update = 1;
1140		if (!lladdr)				/* (6) */
1141			newstate = ND6_LLINFO_NOSTATE;
1142		else					/* (7) */
1143			newstate = ND6_LLINFO_STALE;
1144	}
1145
1146	if (do_update) {
1147		/*
1148		 * Update the state of the neighbor cache.
1149		 */
1150		ln->ln_state = newstate;
1151
1152		if (ln->ln_state == ND6_LLINFO_STALE) {
1153			/*
1154			 * Since nd6_resolve() in ifp->if_output() will cause
1155			 * state transition to DELAY and reset the timer,
1156			 * we must set the timer now, although it is actually
1157			 * meaningless.
1158			 */
1159			nd6_llinfo_settimer(ln, nd6_gctimer);
1160			if_output_mq(ifp, &ln->ln_mq, &ln_hold_total,
1161			    rt_key(rt), rt);
1162		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1163			/* probe right away */
1164			nd6_llinfo_settimer(ln, 0);
1165		}
1166	}
1167
1168	/*
1169	 * ICMP6 type dependent behavior.
1170	 *
1171	 * NS: clear IsRouter if new entry
1172	 * RS: clear IsRouter
1173	 * RA: set IsRouter if there's lladdr
1174	 * redir: clear IsRouter if new entry
1175	 *
1176	 * RA case, (1):
1177	 * The spec says that we must set IsRouter in the following cases:
1178	 * - If lladdr exist, set IsRouter.  This means (1-5).
1179	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1180	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1181	 * A question arises for (1) case.  (1) case has no lladdr in the
1182	 * neighbor cache, this is similar to (6).
1183	 * This case is rare but we figured that we MUST NOT set IsRouter.
1184	 *
1185	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1186	 *							D R
1187	 *	0	n	n	--	(1)	c   ?     s
1188	 *	0	y	n	--	(2)	c   s     s
1189	 *	0	n	y	--	(3)	c   s     s
1190	 *	0	y	y	n	(4)	c   s     s
1191	 *	0	y	y	y	(5)	c   s     s
1192	 *	1	--	n	--	(6) c	c	c s
1193	 *	1	--	y	--	(7) c	c   s	c s
1194	 *
1195	 *					(c=clear s=set)
1196	 */
1197	switch (type & 0xff) {
1198	case ND_NEIGHBOR_SOLICIT:
1199		/*
1200		 * New entry must have is_router flag cleared.
1201		 */
1202		if (is_newentry)	/* (6-7) */
1203			ln->ln_router = 0;
1204		break;
1205	case ND_REDIRECT:
1206		/*
1207		 * If the icmp is a redirect to a better router, always set the
1208		 * is_router flag.  Otherwise, if the entry is newly created,
1209		 * clear the flag.  [RFC 2461, sec 8.3]
1210		 */
1211		if (code == ND_REDIRECT_ROUTER)
1212			ln->ln_router = 1;
1213		else if (is_newentry) /* (6-7) */
1214			ln->ln_router = 0;
1215		break;
1216	case ND_ROUTER_SOLICIT:
1217		/*
1218		 * is_router flag must always be cleared.
1219		 */
1220		ln->ln_router = 0;
1221		break;
1222	case ND_ROUTER_ADVERT:
1223		/*
1224		 * Mark an entry with lladdr as a router.
1225		 */
1226		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
1227		    (is_newentry && lladdr)) {			/* (7) */
1228			ln->ln_router = 1;
1229		}
1230		break;
1231	}
1232
1233	rtfree(rt);
1234}
1235
1236void
1237nd6_slowtimo(void *ignored_arg)
1238{
1239	struct nd_ifinfo *nd6if;
1240	struct ifnet *ifp;
1241
1242	NET_LOCK();
1243
1244	timeout_add_sec(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL);
1245
1246	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1247		nd6if = ifp->if_nd;
1248		if ((nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1249			/*
1250			 * Since reachable time rarely changes by router
1251			 * advertisements, we SHOULD insure that a new random
1252			 * value gets recomputed at least once every few hours.
1253			 * (RFC 2461, 6.3.4)
1254			 */
1255			nd6if->recalctm = ND6_RECALC_REACHTM_INTERVAL;
1256			nd6if->reachable = ND_COMPUTE_RTIME(REACHABLE_TIME);
1257		}
1258	}
1259	NET_UNLOCK();
1260}
1261
1262int
1263nd6_resolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
1264    struct sockaddr *dst, u_char *desten)
1265{
1266	struct sockaddr_dl *sdl;
1267	struct rtentry *rt;
1268	struct llinfo_nd6 *ln;
1269	struct in6_addr saddr6;
1270	time_t uptime;
1271	int solicit = 0;
1272
1273	if (m->m_flags & M_MCAST) {
1274		ETHER_MAP_IPV6_MULTICAST(&satosin6(dst)->sin6_addr, desten);
1275		return (0);
1276	}
1277
1278	uptime = getuptime();
1279	rt = rt_getll(rt0);
1280
1281	if (ISSET(rt->rt_flags, RTF_REJECT) &&
1282	    (rt->rt_expire == 0 || rt->rt_expire > uptime)) {
1283		m_freem(m);
1284		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
1285	}
1286
1287	/*
1288	 * Address resolution or Neighbor Unreachability Detection
1289	 * for the next hop.
1290	 * At this point, the destination of the packet must be a unicast
1291	 * or an anycast address(i.e. not a multicast).
1292	 */
1293	if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
1294		char addr[INET6_ADDRSTRLEN];
1295		log(LOG_DEBUG, "%s: %s: route contains no ND information\n",
1296		    __func__, inet_ntop(AF_INET6,
1297		    &satosin6(rt_key(rt))->sin6_addr, addr, sizeof(addr)));
1298		goto bad;
1299	}
1300
1301	if (rt->rt_gateway->sa_family != AF_LINK) {
1302		printf("%s: something odd happens\n", __func__);
1303		goto bad;
1304	}
1305
1306	mtx_enter(&nd6_mtx);
1307	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1308	if (ln == NULL) {
1309		mtx_leave(&nd6_mtx);
1310		goto bad;
1311	}
1312
1313	/*
1314	 * Move this entry to the head of the queue so that it is less likely
1315	 * for this entry to be a target of forced garbage collection (see
1316	 * nd6_rtrequest()).
1317	 */
1318	TAILQ_REMOVE(&nd6_list, ln, ln_list);
1319	TAILQ_INSERT_HEAD(&nd6_list, ln, ln_list);
1320
1321	/*
1322	 * The first time we send a packet to a neighbor whose entry is
1323	 * STALE, we have to change the state to DELAY and set a timer to
1324	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure we do
1325	 * neighbor unreachability detection on expiration.
1326	 * (RFC 2461 7.3.3)
1327	 */
1328	if (ln->ln_state == ND6_LLINFO_STALE) {
1329		ln->ln_asked = 0;
1330		ln->ln_state = ND6_LLINFO_DELAY;
1331		nd6_llinfo_settimer(ln, nd6_delay);
1332	}
1333
1334	/*
1335	 * If the neighbor cache entry has a state other than INCOMPLETE
1336	 * (i.e. its link-layer address is already resolved), just
1337	 * send the packet.
1338	 */
1339	if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
1340		mtx_leave(&nd6_mtx);
1341
1342		sdl = satosdl(rt->rt_gateway);
1343		if (sdl->sdl_alen != ETHER_ADDR_LEN) {
1344			char addr[INET6_ADDRSTRLEN];
1345			log(LOG_DEBUG, "%s: %s: incorrect nd6 information\n",
1346			    __func__,
1347			    inet_ntop(AF_INET6, &satosin6(dst)->sin6_addr,
1348				addr, sizeof(addr)));
1349			goto bad;
1350		}
1351
1352		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
1353		return (0);
1354	}
1355
1356	/*
1357	 * There is a neighbor cache entry, but no ethernet address
1358	 * response yet.  Insert mbuf in hold queue if below limit.
1359	 * If above the limit free the queue without queuing the new packet.
1360	 */
1361	if (ln->ln_state == ND6_LLINFO_NOSTATE)
1362		ln->ln_state = ND6_LLINFO_INCOMPLETE;
1363	/* source address of prompting packet is needed by nd6_ns_output() */
1364	if (m->m_len >= sizeof(struct ip6_hdr)) {
1365		memcpy(&ln->ln_saddr6, &mtod(m, struct ip6_hdr *)->ip6_src,
1366		    sizeof(ln->ln_saddr6));
1367	}
1368	if (atomic_inc_int_nv(&ln_hold_total) <= LN_HOLD_TOTAL) {
1369		if (mq_push(&ln->ln_mq, m) != 0)
1370			atomic_dec_int(&ln_hold_total);
1371	} else {
1372		atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq) + 1);
1373		m_freem(m);
1374	}
1375
1376	/*
1377	 * If there has been no NS for the neighbor after entering the
1378	 * INCOMPLETE state, send the first solicitation.
1379	 */
1380	if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
1381		ln->ln_asked++;
1382		nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000);
1383		saddr6 = ln->ln_saddr6;
1384		solicit = 1;
1385	}
1386	mtx_leave(&nd6_mtx);
1387
1388	if (solicit)
1389		nd6_ns_output(ifp, NULL, &satosin6(dst)->sin6_addr, &saddr6, 0);
1390	return (EAGAIN);
1391
1392bad:
1393	m_freem(m);
1394	return (EINVAL);
1395}
1396
1397int
1398nd6_need_cache(struct ifnet *ifp)
1399{
1400	/*
1401	 * RFC2893 says:
1402	 * - unidirectional tunnels needs no ND
1403	 */
1404	switch (ifp->if_type) {
1405	case IFT_ETHER:
1406	case IFT_IEEE80211:
1407	case IFT_CARP:
1408		return (1);
1409	default:
1410		return (0);
1411	}
1412}
1413