1/*	$NetBSD: nd6.c,v 1.141.8.2 2013/12/17 20:47:49 bouyer Exp $	*/
2/*	$KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.141.8.2 2013/12/17 20:47:49 bouyer Exp $");
35
36#include "opt_ipsec.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/callout.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/sockio.h>
46#include <sys/time.h>
47#include <sys/kernel.h>
48#include <sys/protosw.h>
49#include <sys/errno.h>
50#include <sys/ioctl.h>
51#include <sys/syslog.h>
52#include <sys/queue.h>
53#include <sys/cprng.h>
54
55#include <net/if.h>
56#include <net/if_dl.h>
57#include <net/if_types.h>
58#include <net/route.h>
59#include <net/if_ether.h>
60#include <net/if_fddi.h>
61#include <net/if_arc.h>
62
63#include <netinet/in.h>
64#include <netinet6/in6_var.h>
65#include <netinet/ip6.h>
66#include <netinet6/ip6_var.h>
67#include <netinet6/scope6_var.h>
68#include <netinet6/nd6.h>
69#include <netinet/icmp6.h>
70#include <netinet6/icmp6_private.h>
71
72#ifdef KAME_IPSEC
73#include <netinet6/ipsec.h>
74#endif
75
76#include <net/net_osdep.h>
77
78#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
79#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
80
81/* timer values */
82int	nd6_prune	= 1;	/* walk list every 1 seconds */
83int	nd6_delay	= 5;	/* delay first probe time 5 second */
84int	nd6_umaxtries	= 3;	/* maximum unicast query */
85int	nd6_mmaxtries	= 3;	/* maximum multicast query */
86int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
87int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
88
89/* preventing too many loops in ND option parsing */
90int nd6_maxndopt = 10;	/* max # of ND options allowed */
91
92int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
93
94int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
95
96#ifdef ND6_DEBUG
97int nd6_debug = 1;
98#else
99int nd6_debug = 0;
100#endif
101
102/* for debugging? */
103static int nd6_inuse, nd6_allocated;
104
105struct llinfo_nd6 llinfo_nd6 = {
106	.ln_prev = &llinfo_nd6,
107	.ln_next = &llinfo_nd6,
108};
109struct nd_drhead nd_defrouter;
110struct nd_prhead nd_prefix = { 0 };
111
112int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
113static const struct sockaddr_in6 all1_sa = {
114	  .sin6_family = AF_INET6
115	, .sin6_len = sizeof(struct sockaddr_in6)
116	, .sin6_addr = {.s6_addr = {0xff, 0xff, 0xff, 0xff,
117				    0xff, 0xff, 0xff, 0xff,
118				    0xff, 0xff, 0xff, 0xff,
119				    0xff, 0xff, 0xff, 0xff}}
120};
121
122static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
123static void nd6_slowtimo(void *);
124static int regen_tmpaddr(struct in6_ifaddr *);
125static struct llinfo_nd6 *nd6_free(struct rtentry *, int);
126static void nd6_llinfo_timer(void *);
127static void clear_llinfo_pqueue(struct llinfo_nd6 *);
128
129callout_t nd6_slowtimo_ch;
130callout_t nd6_timer_ch;
131extern callout_t in6_tmpaddrtimer_ch;
132
133static int fill_drlist(void *, size_t *, size_t);
134static int fill_prlist(void *, size_t *, size_t);
135
136MALLOC_DEFINE(M_IP6NDP, "NDP", "IPv6 Neighbour Discovery");
137
138#define LN_DEQUEUE(ln) do { \
139	(ln)->ln_next->ln_prev = (ln)->ln_prev; \
140	(ln)->ln_prev->ln_next = (ln)->ln_next; \
141	} while (/*CONSTCOND*/0)
142#define LN_INSERTHEAD(ln) do { \
143	(ln)->ln_next = llinfo_nd6.ln_next; \
144	llinfo_nd6.ln_next = (ln); \
145	(ln)->ln_prev = &llinfo_nd6; \
146	(ln)->ln_next->ln_prev = (ln); \
147	} while (/*CONSTCOND*/0)
148
149void
150nd6_init(void)
151{
152	static int nd6_init_done = 0;
153
154	if (nd6_init_done) {
155		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
156		return;
157	}
158
159	/* initialization of the default router list */
160	TAILQ_INIT(&nd_defrouter);
161
162	nd6_init_done = 1;
163
164	callout_init(&nd6_slowtimo_ch, CALLOUT_MPSAFE);
165	callout_init(&nd6_timer_ch, CALLOUT_MPSAFE);
166
167	/* start timer */
168	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
169	    nd6_slowtimo, NULL);
170}
171
172struct nd_ifinfo *
173nd6_ifattach(struct ifnet *ifp)
174{
175	struct nd_ifinfo *nd;
176
177	nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
178
179	nd->initialized = 1;
180
181	nd->chlim = IPV6_DEFHLIM;
182	nd->basereachable = REACHABLE_TIME;
183	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
184	nd->retrans = RETRANS_TIMER;
185	/*
186	 * Note that the default value of ip6_accept_rtadv is 0.
187	 * Because we do not set ND6_IFF_OVERRIDE_RTADV here, we won't
188	 * accept RAs by default.
189	 */
190	nd->flags = ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV;
191
192	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
193	nd6_setmtu0(ifp, nd);
194
195	return nd;
196}
197
198void
199nd6_ifdetach(struct nd_ifinfo *nd)
200{
201
202	free(nd, M_IP6NDP);
203}
204
205void
206nd6_setmtu(struct ifnet *ifp)
207{
208	nd6_setmtu0(ifp, ND_IFINFO(ifp));
209}
210
211void
212nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
213{
214	u_int32_t omaxmtu;
215
216	omaxmtu = ndi->maxmtu;
217
218	switch (ifp->if_type) {
219	case IFT_ARCNET:
220		ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
221		break;
222	case IFT_FDDI:
223		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
224		break;
225	default:
226		ndi->maxmtu = ifp->if_mtu;
227		break;
228	}
229
230	/*
231	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
232	 * undesirable situation.  We thus notify the operator of the change
233	 * explicitly.  The check for omaxmtu is necessary to restrict the
234	 * log to the case of changing the MTU, not initializing it.
235	 */
236	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
237		log(LOG_NOTICE, "nd6_setmtu0: new link MTU on %s (%lu) is too"
238		    " small for IPv6 which needs %lu\n",
239		    if_name(ifp), (unsigned long)ndi->maxmtu, (unsigned long)
240		    IPV6_MMTU);
241	}
242
243	if (ndi->maxmtu > in6_maxmtu)
244		in6_setmaxmtu(); /* check all interfaces just in case */
245}
246
247void
248nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
249{
250
251	memset(ndopts, 0, sizeof(*ndopts));
252	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
253	ndopts->nd_opts_last
254		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
255
256	if (icmp6len == 0) {
257		ndopts->nd_opts_done = 1;
258		ndopts->nd_opts_search = NULL;
259	}
260}
261
262/*
263 * Take one ND option.
264 */
265struct nd_opt_hdr *
266nd6_option(union nd_opts *ndopts)
267{
268	struct nd_opt_hdr *nd_opt;
269	int olen;
270
271	if (ndopts == NULL)
272		panic("ndopts == NULL in nd6_option");
273	if (ndopts->nd_opts_last == NULL)
274		panic("uninitialized ndopts in nd6_option");
275	if (ndopts->nd_opts_search == NULL)
276		return NULL;
277	if (ndopts->nd_opts_done)
278		return NULL;
279
280	nd_opt = ndopts->nd_opts_search;
281
282	/* make sure nd_opt_len is inside the buffer */
283	if ((void *)&nd_opt->nd_opt_len >= (void *)ndopts->nd_opts_last) {
284		memset(ndopts, 0, sizeof(*ndopts));
285		return NULL;
286	}
287
288	olen = nd_opt->nd_opt_len << 3;
289	if (olen == 0) {
290		/*
291		 * Message validation requires that all included
292		 * options have a length that is greater than zero.
293		 */
294		memset(ndopts, 0, sizeof(*ndopts));
295		return NULL;
296	}
297
298	ndopts->nd_opts_search = (struct nd_opt_hdr *)((char *)nd_opt + olen);
299	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
300		/* option overruns the end of buffer, invalid */
301		memset(ndopts, 0, sizeof(*ndopts));
302		return NULL;
303	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
304		/* reached the end of options chain */
305		ndopts->nd_opts_done = 1;
306		ndopts->nd_opts_search = NULL;
307	}
308	return nd_opt;
309}
310
311/*
312 * Parse multiple ND options.
313 * This function is much easier to use, for ND routines that do not need
314 * multiple options of the same type.
315 */
316int
317nd6_options(union nd_opts *ndopts)
318{
319	struct nd_opt_hdr *nd_opt;
320	int i = 0;
321
322	if (ndopts == NULL)
323		panic("ndopts == NULL in nd6_options");
324	if (ndopts->nd_opts_last == NULL)
325		panic("uninitialized ndopts in nd6_options");
326	if (ndopts->nd_opts_search == NULL)
327		return 0;
328
329	while (1) {
330		nd_opt = nd6_option(ndopts);
331		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
332			/*
333			 * Message validation requires that all included
334			 * options have a length that is greater than zero.
335			 */
336			ICMP6_STATINC(ICMP6_STAT_ND_BADOPT);
337			memset(ndopts, 0, sizeof(*ndopts));
338			return -1;
339		}
340
341		if (nd_opt == NULL)
342			goto skip1;
343
344		switch (nd_opt->nd_opt_type) {
345		case ND_OPT_SOURCE_LINKADDR:
346		case ND_OPT_TARGET_LINKADDR:
347		case ND_OPT_MTU:
348		case ND_OPT_REDIRECTED_HEADER:
349			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
350				nd6log((LOG_INFO,
351				    "duplicated ND6 option found (type=%d)\n",
352				    nd_opt->nd_opt_type));
353				/* XXX bark? */
354			} else {
355				ndopts->nd_opt_array[nd_opt->nd_opt_type]
356					= nd_opt;
357			}
358			break;
359		case ND_OPT_PREFIX_INFORMATION:
360			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
361				ndopts->nd_opt_array[nd_opt->nd_opt_type]
362					= nd_opt;
363			}
364			ndopts->nd_opts_pi_end =
365				(struct nd_opt_prefix_info *)nd_opt;
366			break;
367		default:
368			/*
369			 * Unknown options must be silently ignored,
370			 * to accommodate future extension to the protocol.
371			 */
372			nd6log((LOG_DEBUG,
373			    "nd6_options: unsupported option %d - "
374			    "option ignored\n", nd_opt->nd_opt_type));
375		}
376
377skip1:
378		i++;
379		if (i > nd6_maxndopt) {
380			ICMP6_STATINC(ICMP6_STAT_ND_TOOMANYOPT);
381			nd6log((LOG_INFO, "too many loop in nd opt\n"));
382			break;
383		}
384
385		if (ndopts->nd_opts_done)
386			break;
387	}
388
389	return 0;
390}
391
392/*
393 * ND6 timer routine to handle ND6 entries
394 */
395void
396nd6_llinfo_settimer(struct llinfo_nd6 *ln, long xtick)
397{
398	int s;
399
400	s = splsoftnet();
401
402	if (xtick < 0) {
403		ln->ln_expire = 0;
404		ln->ln_ntick = 0;
405		callout_stop(&ln->ln_timer_ch);
406	} else {
407		ln->ln_expire = time_second + xtick / hz;
408		if (xtick > INT_MAX) {
409			ln->ln_ntick = xtick - INT_MAX;
410			callout_reset(&ln->ln_timer_ch, INT_MAX,
411			    nd6_llinfo_timer, ln);
412		} else {
413			ln->ln_ntick = 0;
414			callout_reset(&ln->ln_timer_ch, xtick,
415			    nd6_llinfo_timer, ln);
416		}
417	}
418
419	splx(s);
420}
421
422static void
423nd6_llinfo_timer(void *arg)
424{
425	struct llinfo_nd6 *ln;
426	struct rtentry *rt;
427	const struct sockaddr_in6 *dst;
428	struct ifnet *ifp;
429	struct nd_ifinfo *ndi = NULL;
430
431	mutex_enter(softnet_lock);
432	KERNEL_LOCK(1, NULL);
433
434	ln = (struct llinfo_nd6 *)arg;
435
436	if (ln->ln_ntick > 0) {
437		nd6_llinfo_settimer(ln, ln->ln_ntick);
438		KERNEL_UNLOCK_ONE(NULL);
439		mutex_exit(softnet_lock);
440		return;
441	}
442
443	if ((rt = ln->ln_rt) == NULL)
444		panic("ln->ln_rt == NULL");
445	if ((ifp = rt->rt_ifp) == NULL)
446		panic("ln->ln_rt->rt_ifp == NULL");
447	ndi = ND_IFINFO(ifp);
448	dst = satocsin6(rt_getkey(rt));
449
450	/* sanity check */
451	if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
452		panic("rt_llinfo(%p) is not equal to ln(%p)",
453		      rt->rt_llinfo, ln);
454	if (!dst)
455		panic("dst=0 in nd6_timer(ln=%p)", ln);
456
457	switch (ln->ln_state) {
458	case ND6_LLINFO_INCOMPLETE:
459		if (ln->ln_asked < nd6_mmaxtries) {
460			ln->ln_asked++;
461			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
462			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
463		} else {
464			struct mbuf *m = ln->ln_hold;
465			if (m) {
466				struct mbuf *m0;
467
468				/*
469				 * assuming every packet in ln_hold has
470				 * the same IP header
471				 */
472				m0 = m->m_nextpkt;
473				m->m_nextpkt = NULL;
474				icmp6_error2(m, ICMP6_DST_UNREACH,
475				    ICMP6_DST_UNREACH_ADDR, 0, rt->rt_ifp);
476
477				ln->ln_hold = m0;
478				clear_llinfo_pqueue(ln);
479 			}
480			(void)nd6_free(rt, 0);
481			ln = NULL;
482		}
483		break;
484	case ND6_LLINFO_REACHABLE:
485		if (!ND6_LLINFO_PERMANENT(ln)) {
486			ln->ln_state = ND6_LLINFO_STALE;
487			nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz);
488		}
489		break;
490	case ND6_LLINFO_PURGE:
491	case ND6_LLINFO_STALE:
492		/* Garbage Collection(RFC 2461 5.3) */
493		if (!ND6_LLINFO_PERMANENT(ln)) {
494			(void)nd6_free(rt, 1);
495			ln = NULL;
496		}
497		break;
498
499	case ND6_LLINFO_DELAY:
500		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
501			/* We need NUD */
502			ln->ln_asked = 1;
503			ln->ln_state = ND6_LLINFO_PROBE;
504			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
505			nd6_ns_output(ifp, &dst->sin6_addr,
506			    &dst->sin6_addr, ln, 0);
507		} else {
508			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
509			nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz);
510		}
511		break;
512	case ND6_LLINFO_PROBE:
513		if (ln->ln_asked < nd6_umaxtries) {
514			ln->ln_asked++;
515			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
516			nd6_ns_output(ifp, &dst->sin6_addr,
517			    &dst->sin6_addr, ln, 0);
518		} else {
519			(void)nd6_free(rt, 0);
520			ln = NULL;
521		}
522		break;
523	}
524
525	KERNEL_UNLOCK_ONE(NULL);
526	mutex_exit(softnet_lock);
527}
528
529/*
530 * ND6 timer routine to expire default route list and prefix list
531 */
532void
533nd6_timer(void *ignored_arg)
534{
535	struct nd_defrouter *next_dr, *dr;
536	struct nd_prefix *next_pr, *pr;
537	struct in6_ifaddr *ia6, *nia6;
538
539	callout_reset(&nd6_timer_ch, nd6_prune * hz,
540	    nd6_timer, NULL);
541
542	mutex_enter(softnet_lock);
543	KERNEL_LOCK(1, NULL);
544
545	/* expire default router list */
546
547	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, next_dr) {
548		if (dr->expire && dr->expire < time_second) {
549			defrtrlist_del(dr);
550		}
551	}
552
553	/*
554	 * expire interface addresses.
555	 * in the past the loop was inside prefix expiry processing.
556	 * However, from a stricter speci-confrmance standpoint, we should
557	 * rather separate address lifetimes and prefix lifetimes.
558	 */
559  addrloop:
560	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
561		nia6 = ia6->ia_next;
562		/* check address lifetime */
563		if (IFA6_IS_INVALID(ia6)) {
564			int regen = 0;
565
566			/*
567			 * If the expiring address is temporary, try
568			 * regenerating a new one.  This would be useful when
569			 * we suspended a laptop PC, then turned it on after a
570			 * period that could invalidate all temporary
571			 * addresses.  Although we may have to restart the
572			 * loop (see below), it must be after purging the
573			 * address.  Otherwise, we'd see an infinite loop of
574			 * regeneration.
575			 */
576			if (ip6_use_tempaddr &&
577			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
578				if (regen_tmpaddr(ia6) == 0)
579					regen = 1;
580			}
581
582 			in6_purgeaddr(&ia6->ia_ifa);
583
584			if (regen)
585				goto addrloop; /* XXX: see below */
586		} else if (IFA6_IS_DEPRECATED(ia6)) {
587			int oldflags = ia6->ia6_flags;
588
589 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
590
591			/*
592			 * If a temporary address has just become deprecated,
593			 * regenerate a new one if possible.
594			 */
595			if (ip6_use_tempaddr &&
596			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
597			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
598
599				if (regen_tmpaddr(ia6) == 0) {
600					/*
601					 * A new temporary address is
602					 * generated.
603					 * XXX: this means the address chain
604					 * has changed while we are still in
605					 * the loop.  Although the change
606					 * would not cause disaster (because
607					 * it's not a deletion, but an
608					 * addition,) we'd rather restart the
609					 * loop just for safety.  Or does this
610					 * significantly reduce performance??
611					 */
612					goto addrloop;
613				}
614			}
615		} else {
616			/*
617			 * A new RA might have made a deprecated address
618			 * preferred.
619			 */
620			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
621		}
622	}
623
624	/* expire prefix list */
625	LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, next_pr) {
626		/*
627		 * check prefix lifetime.
628		 * since pltime is just for autoconf, pltime processing for
629		 * prefix is not necessary.
630		 */
631		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
632		    time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
633
634			/*
635			 * address expiration and prefix expiration are
636			 * separate.  NEVER perform in6_purgeaddr here.
637			 */
638
639			prelist_remove(pr);
640		}
641	}
642
643	KERNEL_UNLOCK_ONE(NULL);
644	mutex_exit(softnet_lock);
645}
646
647/* ia6: deprecated/invalidated temporary address */
648static int
649regen_tmpaddr(struct in6_ifaddr *ia6)
650{
651	struct ifaddr *ifa;
652	struct ifnet *ifp;
653	struct in6_ifaddr *public_ifa6 = NULL;
654
655	ifp = ia6->ia_ifa.ifa_ifp;
656	IFADDR_FOREACH(ifa, ifp) {
657		struct in6_ifaddr *it6;
658
659		if (ifa->ifa_addr->sa_family != AF_INET6)
660			continue;
661
662		it6 = (struct in6_ifaddr *)ifa;
663
664		/* ignore no autoconf addresses. */
665		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
666			continue;
667
668		/* ignore autoconf addresses with different prefixes. */
669		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
670			continue;
671
672		/*
673		 * Now we are looking at an autoconf address with the same
674		 * prefix as ours.  If the address is temporary and is still
675		 * preferred, do not create another one.  It would be rare, but
676		 * could happen, for example, when we resume a laptop PC after
677		 * a long period.
678		 */
679		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
680		    !IFA6_IS_DEPRECATED(it6)) {
681			public_ifa6 = NULL;
682			break;
683		}
684
685		/*
686		 * This is a public autoconf address that has the same prefix
687		 * as ours.  If it is preferred, keep it.  We can't break the
688		 * loop here, because there may be a still-preferred temporary
689		 * address with the prefix.
690		 */
691		if (!IFA6_IS_DEPRECATED(it6))
692		    public_ifa6 = it6;
693	}
694
695	if (public_ifa6 != NULL) {
696		int e;
697
698		/*
699		 * Random factor is introduced in the preferred lifetime, so
700		 * we do not need additional delay (3rd arg to in6_tmpifadd).
701		 */
702		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
703			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
704			    " tmp addr, errno=%d\n", e);
705			return -1;
706		}
707		return 0;
708	}
709
710	return -1;
711}
712
713bool
714nd6_accepts_rtadv(const struct nd_ifinfo *ndi)
715{
716	switch (ndi->flags & (ND6_IFF_ACCEPT_RTADV|ND6_IFF_OVERRIDE_RTADV)) {
717	case ND6_IFF_OVERRIDE_RTADV|ND6_IFF_ACCEPT_RTADV:
718		return true;
719	case ND6_IFF_ACCEPT_RTADV:
720		return ip6_accept_rtadv != 0;
721	case ND6_IFF_OVERRIDE_RTADV:
722	case 0:
723	default:
724		return false;
725	}
726}
727
728/*
729 * Nuke neighbor cache/prefix/default router management table, right before
730 * ifp goes away.
731 */
732void
733nd6_purge(struct ifnet *ifp)
734{
735	struct llinfo_nd6 *ln, *nln;
736	struct nd_defrouter *dr, *ndr;
737	struct nd_prefix *pr, *npr;
738
739	/*
740	 * Nuke default router list entries toward ifp.
741	 * We defer removal of default router list entries that is installed
742	 * in the routing table, in order to keep additional side effects as
743	 * small as possible.
744	 */
745	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
746		if (dr->installed)
747			continue;
748
749		if (dr->ifp == ifp)
750			defrtrlist_del(dr);
751	}
752
753	TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
754		if (!dr->installed)
755			continue;
756
757		if (dr->ifp == ifp)
758			defrtrlist_del(dr);
759	}
760
761	/* Nuke prefix list entries toward ifp */
762	LIST_FOREACH_SAFE(pr, &nd_prefix, ndpr_entry, npr) {
763		if (pr->ndpr_ifp == ifp) {
764			/*
765			 * Because if_detach() does *not* release prefixes
766			 * while purging addresses the reference count will
767			 * still be above zero. We therefore reset it to
768			 * make sure that the prefix really gets purged.
769			 */
770			pr->ndpr_refcnt = 0;
771			/*
772			 * Previously, pr->ndpr_addr is removed as well,
773			 * but I strongly believe we don't have to do it.
774			 * nd6_purge() is only called from in6_ifdetach(),
775			 * which removes all the associated interface addresses
776			 * by itself.
777			 * (jinmei@kame.net 20010129)
778			 */
779			prelist_remove(pr);
780		}
781	}
782
783	/* cancel default outgoing interface setting */
784	if (nd6_defifindex == ifp->if_index)
785		nd6_setdefaultiface(0);
786
787	/* XXX: too restrictive? */
788	if (!ip6_forwarding && ifp->if_afdata[AF_INET6]) {
789		struct nd_ifinfo *ndi = ND_IFINFO(ifp);
790		if (ndi && nd6_accepts_rtadv(ndi)) {
791			/* refresh default router list */
792			defrouter_select();
793		}
794	}
795
796	/*
797	 * Nuke neighbor cache entries for the ifp.
798	 * Note that rt->rt_ifp may not be the same as ifp,
799	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
800	 * nd6_rtrequest(), and ip6_input().
801	 */
802	ln = llinfo_nd6.ln_next;
803	while (ln != NULL && ln != &llinfo_nd6) {
804		struct rtentry *rt;
805		const struct sockaddr_dl *sdl;
806
807		nln = ln->ln_next;
808		rt = ln->ln_rt;
809		if (rt && rt->rt_gateway &&
810		    rt->rt_gateway->sa_family == AF_LINK) {
811			sdl = satocsdl(rt->rt_gateway);
812			if (sdl->sdl_index == ifp->if_index)
813				nln = nd6_free(rt, 0);
814		}
815		ln = nln;
816	}
817}
818
819struct rtentry *
820nd6_lookup(const struct in6_addr *addr6, int create, struct ifnet *ifp)
821{
822	struct rtentry *rt;
823	struct sockaddr_in6 sin6;
824
825	sockaddr_in6_init(&sin6, addr6, 0, 0, 0);
826	rt = rtalloc1((struct sockaddr *)&sin6, create);
827	if (rt != NULL && (rt->rt_flags & RTF_LLINFO) == 0) {
828		/*
829		 * This is the case for the default route.
830		 * If we want to create a neighbor cache for the address, we
831		 * should free the route for the destination and allocate an
832		 * interface route.
833		 */
834		if (create) {
835			RTFREE(rt);
836			rt = NULL;
837		}
838	}
839	if (rt != NULL)
840		;
841	else if (create && ifp) {
842		int e;
843
844		/*
845		 * If no route is available and create is set,
846		 * we allocate a host route for the destination
847		 * and treat it like an interface route.
848		 * This hack is necessary for a neighbor which can't
849		 * be covered by our own prefix.
850		 */
851		struct ifaddr *ifa =
852		    ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
853		if (ifa == NULL)
854			return NULL;
855
856		/*
857		 * Create a new route.  RTF_LLINFO is necessary
858		 * to create a Neighbor Cache entry for the
859		 * destination in nd6_rtrequest which will be
860		 * called in rtrequest via ifa->ifa_rtrequest.
861		 */
862		if ((e = rtrequest(RTM_ADD, (const struct sockaddr *)&sin6,
863		    ifa->ifa_addr, (const struct sockaddr *)&all1_sa,
864		    (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) &
865		    ~RTF_CLONING, &rt)) != 0) {
866#if 0
867			log(LOG_ERR,
868			    "nd6_lookup: failed to add route for a "
869			    "neighbor(%s), errno=%d\n",
870			    ip6_sprintf(addr6), e);
871#endif
872			return NULL;
873		}
874		if (rt == NULL)
875			return NULL;
876		if (rt->rt_llinfo) {
877			struct llinfo_nd6 *ln =
878			    (struct llinfo_nd6 *)rt->rt_llinfo;
879			ln->ln_state = ND6_LLINFO_NOSTATE;
880		}
881	} else
882		return NULL;
883	rt->rt_refcnt--;
884	/*
885	 * Validation for the entry.
886	 * Note that the check for rt_llinfo is necessary because a cloned
887	 * route from a parent route that has the L flag (e.g. the default
888	 * route to a p2p interface) may have the flag, too, while the
889	 * destination is not actually a neighbor.
890	 * XXX: we can't use rt->rt_ifp to check for the interface, since
891	 *      it might be the loopback interface if the entry is for our
892	 *      own address on a non-loopback interface. Instead, we should
893	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
894	 *	interface.
895	 * Note also that ifa_ifp and ifp may differ when we connect two
896	 * interfaces to a same link, install a link prefix to an interface,
897	 * and try to install a neighbor cache on an interface that does not
898	 * have a route to the prefix.
899	 */
900	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
901	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
902	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
903		if (create) {
904			nd6log((LOG_DEBUG,
905			    "nd6_lookup: failed to lookup %s (if = %s)\n",
906			    ip6_sprintf(addr6),
907			    ifp ? if_name(ifp) : "unspec"));
908		}
909		return NULL;
910	}
911	return rt;
912}
913
914/*
915 * Detect if a given IPv6 address identifies a neighbor on a given link.
916 * XXX: should take care of the destination of a p2p link?
917 */
918int
919nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
920{
921	struct nd_prefix *pr;
922
923	/*
924	 * A link-local address is always a neighbor.
925	 * XXX: a link does not necessarily specify a single interface.
926	 */
927	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
928		struct sockaddr_in6 sin6_copy;
929		u_int32_t zone;
930
931		/*
932		 * We need sin6_copy since sa6_recoverscope() may modify the
933		 * content (XXX).
934		 */
935		sin6_copy = *addr;
936		if (sa6_recoverscope(&sin6_copy))
937			return 0; /* XXX: should be impossible */
938		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
939			return 0;
940		if (sin6_copy.sin6_scope_id == zone)
941			return 1;
942		else
943			return 0;
944	}
945
946	/*
947	 * If the address matches one of our on-link prefixes, it should be a
948	 * neighbor.
949	 */
950	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
951		if (pr->ndpr_ifp != ifp)
952			continue;
953
954		if (!(pr->ndpr_stateflags & NDPRF_ONLINK))
955			continue;
956
957		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
958		    &addr->sin6_addr, &pr->ndpr_mask))
959			return 1;
960	}
961
962	/*
963	 * If the default router list is empty, all addresses are regarded
964	 * as on-link, and thus, as a neighbor.
965	 * XXX: we restrict the condition to hosts, because routers usually do
966	 * not have the "default router list".
967	 */
968	if (!ip6_forwarding && TAILQ_FIRST(&nd_defrouter) == NULL &&
969	    nd6_defifindex == ifp->if_index) {
970		return 1;
971	}
972
973	/*
974	 * Even if the address matches none of our addresses, it might be
975	 * in the neighbor cache.
976	 */
977	if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL)
978		return 1;
979
980	return 0;
981}
982
983/*
984 * Free an nd6 llinfo entry.
985 * Since the function would cause significant changes in the kernel, DO NOT
986 * make it global, unless you have a strong reason for the change, and are sure
987 * that the change is safe.
988 */
989static struct llinfo_nd6 *
990nd6_free(struct rtentry *rt, int gc)
991{
992	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
993	struct in6_addr in6 = satocsin6(rt_getkey(rt))->sin6_addr;
994	struct nd_defrouter *dr;
995
996	/*
997	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
998	 * even though it is not harmful, it was not really necessary.
999	 */
1000
1001	/* cancel timer */
1002	nd6_llinfo_settimer(ln, -1);
1003
1004	if (!ip6_forwarding) {
1005		int s;
1006		s = splsoftnet();
1007		dr = defrouter_lookup(&satocsin6(rt_getkey(rt))->sin6_addr,
1008		    rt->rt_ifp);
1009
1010		if (dr != NULL && dr->expire &&
1011		    ln->ln_state == ND6_LLINFO_STALE && gc) {
1012			/*
1013			 * If the reason for the deletion is just garbage
1014			 * collection, and the neighbor is an active default
1015			 * router, do not delete it.  Instead, reset the GC
1016			 * timer using the router's lifetime.
1017			 * Simply deleting the entry would affect default
1018			 * router selection, which is not necessarily a good
1019			 * thing, especially when we're using router preference
1020			 * values.
1021			 * XXX: the check for ln_state would be redundant,
1022			 *      but we intentionally keep it just in case.
1023			 */
1024			if (dr->expire > time_second)
1025				nd6_llinfo_settimer(ln,
1026				    (dr->expire - time_second) * hz);
1027			else
1028				nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz);
1029			splx(s);
1030			return ln->ln_next;
1031		}
1032
1033		if (ln->ln_router || dr) {
1034			/*
1035			 * rt6_flush must be called whether or not the neighbor
1036			 * is in the Default Router List.
1037			 * See a corresponding comment in nd6_na_input().
1038			 */
1039			rt6_flush(&in6, rt->rt_ifp);
1040		}
1041
1042		if (dr) {
1043			/*
1044			 * Unreachablity of a router might affect the default
1045			 * router selection and on-link detection of advertised
1046			 * prefixes.
1047			 */
1048
1049			/*
1050			 * Temporarily fake the state to choose a new default
1051			 * router and to perform on-link determination of
1052			 * prefixes correctly.
1053			 * Below the state will be set correctly,
1054			 * or the entry itself will be deleted.
1055			 */
1056			ln->ln_state = ND6_LLINFO_INCOMPLETE;
1057
1058			/*
1059			 * Since defrouter_select() does not affect the
1060			 * on-link determination and MIP6 needs the check
1061			 * before the default router selection, we perform
1062			 * the check now.
1063			 */
1064			pfxlist_onlink_check();
1065
1066			/*
1067			 * refresh default router list
1068			 */
1069			defrouter_select();
1070		}
1071		splx(s);
1072	}
1073
1074	/*
1075	 * Before deleting the entry, remember the next entry as the
1076	 * return value.  We need this because pfxlist_onlink_check() above
1077	 * might have freed other entries (particularly the old next entry) as
1078	 * a side effect (XXX).
1079	 */
1080	next = ln->ln_next;
1081
1082	/*
1083	 * Detach the route from the routing tree and the list of neighbor
1084	 * caches, and disable the route entry not to be used in already
1085	 * cached routes.
1086	 */
1087	rtrequest(RTM_DELETE, rt_getkey(rt), NULL, rt_mask(rt), 0, NULL);
1088
1089	return next;
1090}
1091
1092/*
1093 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1094 *
1095 * XXX cost-effective methods?
1096 */
1097void
1098nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
1099{
1100	struct llinfo_nd6 *ln;
1101
1102	/*
1103	 * If the caller specified "rt", use that.  Otherwise, resolve the
1104	 * routing table by supplied "dst6".
1105	 */
1106	if (rt == NULL) {
1107		if (dst6 == NULL)
1108			return;
1109		if ((rt = nd6_lookup(dst6, 0, NULL)) == NULL)
1110			return;
1111	}
1112
1113	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1114	    (rt->rt_flags & RTF_LLINFO) == 0 ||
1115	    !rt->rt_llinfo || !rt->rt_gateway ||
1116	    rt->rt_gateway->sa_family != AF_LINK) {
1117		/* This is not a host route. */
1118		return;
1119	}
1120
1121	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1122	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1123		return;
1124
1125	/*
1126	 * if we get upper-layer reachability confirmation many times,
1127	 * it is possible we have false information.
1128	 */
1129	if (!force) {
1130		ln->ln_byhint++;
1131		if (ln->ln_byhint > nd6_maxnudhint)
1132			return;
1133	}
1134
1135	ln->ln_state = ND6_LLINFO_REACHABLE;
1136	if (!ND6_LLINFO_PERMANENT(ln)) {
1137		nd6_llinfo_settimer(ln,
1138		    (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
1139	}
1140}
1141
1142void
1143nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
1144{
1145	struct sockaddr *gate = rt->rt_gateway;
1146	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1147	struct ifnet *ifp = rt->rt_ifp;
1148	uint8_t namelen = strlen(ifp->if_xname), addrlen = ifp->if_addrlen;
1149	struct ifaddr *ifa;
1150
1151	RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1152
1153	if (req == RTM_LLINFO_UPD) {
1154		int rc;
1155		struct in6_addr *in6;
1156		struct in6_addr in6_all;
1157		int anycast;
1158
1159		if ((ifa = info->rti_ifa) == NULL)
1160			return;
1161
1162		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1163		anycast = ifatoia6(ifa)->ia6_flags & IN6_IFF_ANYCAST;
1164
1165		in6_all = in6addr_linklocal_allnodes;
1166		if ((rc = in6_setscope(&in6_all, ifa->ifa_ifp, NULL)) != 0) {
1167			log(LOG_ERR, "%s: failed to set scope %s "
1168			    "(errno=%d)\n", __func__, if_name(ifp), rc);
1169			return;
1170		}
1171
1172		/* XXX don't set Override for proxy addresses */
1173		nd6_na_output(ifa->ifa_ifp, &in6_all, in6,
1174		    (anycast ? 0 : ND_NA_FLAG_OVERRIDE)
1175#if 0
1176		    | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0)
1177#endif
1178		    , 1, NULL);
1179		return;
1180	}
1181
1182	if ((rt->rt_flags & RTF_GATEWAY) != 0)
1183		return;
1184
1185	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1186		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1187		/*
1188		 * This is probably an interface direct route for a link
1189		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1190		 * We do not need special treatment below for such a route.
1191		 * Moreover, the RTF_LLINFO flag which would be set below
1192		 * would annoy the ndp(8) command.
1193		 */
1194		return;
1195	}
1196
1197	if (req == RTM_RESOLVE &&
1198	    (nd6_need_cache(ifp) == 0 || /* stf case */
1199	     !nd6_is_addr_neighbor(satocsin6(rt_getkey(rt)), ifp))) {
1200		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1201		/*
1202		 * FreeBSD and BSD/OS often make a cloned host route based
1203		 * on a less-specific route (e.g. the default route).
1204		 * If the less specific route does not have a "gateway"
1205		 * (this is the case when the route just goes to a p2p or an
1206		 * stf interface), we'll mistakenly make a neighbor cache for
1207		 * the host route, and will see strange neighbor solicitation
1208		 * for the corresponding destination.  In order to avoid the
1209		 * confusion, we check if the destination of the route is
1210		 * a neighbor in terms of neighbor discovery, and stop the
1211		 * process if not.  Additionally, we remove the LLINFO flag
1212		 * so that ndp(8) will not try to get the neighbor information
1213		 * of the destination.
1214		 */
1215		rt->rt_flags &= ~RTF_LLINFO;
1216		return;
1217	}
1218
1219	switch (req) {
1220	case RTM_ADD:
1221		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1222		/*
1223		 * There is no backward compatibility :)
1224		 *
1225		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1226		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1227		 *	   rt->rt_flags |= RTF_CLONING;
1228		 */
1229		if ((rt->rt_flags & RTF_CLONING) ||
1230		    ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
1231			union {
1232				struct sockaddr sa;
1233				struct sockaddr_dl sdl;
1234				struct sockaddr_storage ss;
1235			} u;
1236			/*
1237			 * Case 1: This route should come from a route to
1238			 * interface (RTF_CLONING case) or the route should be
1239			 * treated as on-link but is currently not
1240			 * (RTF_LLINFO && ln == NULL case).
1241			 */
1242			if (sockaddr_dl_init(&u.sdl, sizeof(u.ss),
1243			    ifp->if_index, ifp->if_type,
1244			    NULL, namelen, NULL, addrlen) == NULL) {
1245				printf("%s.%d: sockaddr_dl_init(, %zu, ) "
1246				    "failed on %s\n", __func__, __LINE__,
1247				    sizeof(u.ss), if_name(ifp));
1248			}
1249			rt_setgate(rt, &u.sa);
1250			gate = rt->rt_gateway;
1251			RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1252			if (ln != NULL)
1253				nd6_llinfo_settimer(ln, 0);
1254			RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1255			if ((rt->rt_flags & RTF_CLONING) != 0)
1256				break;
1257		}
1258		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1259		/*
1260		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1261		 * We don't do that here since llinfo is not ready yet.
1262		 *
1263		 * There are also couple of other things to be discussed:
1264		 * - unsolicited NA code needs improvement beforehand
1265		 * - RFC2461 says we MAY send multicast unsolicited NA
1266		 *   (7.2.6 paragraph 4), however, it also says that we
1267		 *   SHOULD provide a mechanism to prevent multicast NA storm.
1268		 *   we don't have anything like it right now.
1269		 *   note that the mechanism needs a mutual agreement
1270		 *   between proxies, which means that we need to implement
1271		 *   a new protocol, or a new kludge.
1272		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1273		 *   we need to check ip6forwarding before sending it.
1274		 *   (or should we allow proxy ND configuration only for
1275		 *   routers?  there's no mention about proxy ND from hosts)
1276		 */
1277#if 0
1278		/* XXX it does not work */
1279		if (rt->rt_flags & RTF_ANNOUNCE)
1280			nd6_na_output(ifp,
1281			      &satocsin6(rt_getkey(rt))->sin6_addr,
1282			      &satocsin6(rt_getkey(rt))->sin6_addr,
1283			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1284			      1, NULL);
1285#endif
1286		/* FALLTHROUGH */
1287	case RTM_RESOLVE:
1288		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1289			RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1290			/*
1291			 * Address resolution isn't necessary for a point to
1292			 * point link, so we can skip this test for a p2p link.
1293			 */
1294			if (gate->sa_family != AF_LINK ||
1295			    gate->sa_len <
1296			    sockaddr_dl_measure(namelen, addrlen)) {
1297				log(LOG_DEBUG,
1298				    "nd6_rtrequest: bad gateway value: %s\n",
1299				    if_name(ifp));
1300				break;
1301			}
1302			satosdl(gate)->sdl_type = ifp->if_type;
1303			satosdl(gate)->sdl_index = ifp->if_index;
1304			RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1305		}
1306		if (ln != NULL)
1307			break;	/* This happens on a route change */
1308		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1309		/*
1310		 * Case 2: This route may come from cloning, or a manual route
1311		 * add with a LL address.
1312		 */
1313		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
1314		rt->rt_llinfo = ln;
1315		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1316		if (ln == NULL) {
1317			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1318			break;
1319		}
1320		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1321		nd6_inuse++;
1322		nd6_allocated++;
1323		memset(ln, 0, sizeof(*ln));
1324		ln->ln_rt = rt;
1325		callout_init(&ln->ln_timer_ch, CALLOUT_MPSAFE);
1326		/* this is required for "ndp" command. - shin */
1327		if (req == RTM_ADD) {
1328		        /*
1329			 * gate should have some valid AF_LINK entry,
1330			 * and ln->ln_expire should have some lifetime
1331			 * which is specified by ndp command.
1332			 */
1333			ln->ln_state = ND6_LLINFO_REACHABLE;
1334			ln->ln_byhint = 0;
1335		} else {
1336		        /*
1337			 * When req == RTM_RESOLVE, rt is created and
1338			 * initialized in rtrequest(), so rt_expire is 0.
1339			 */
1340			ln->ln_state = ND6_LLINFO_NOSTATE;
1341			nd6_llinfo_settimer(ln, 0);
1342		}
1343		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1344		rt->rt_flags |= RTF_LLINFO;
1345		ln->ln_next = llinfo_nd6.ln_next;
1346		llinfo_nd6.ln_next = ln;
1347		ln->ln_prev = &llinfo_nd6;
1348		ln->ln_next->ln_prev = ln;
1349
1350		/*
1351		 * If we have too many cache entries, initiate immediate
1352		 * purging for some "less recently used" entries.  Note that
1353		 * we cannot directly call nd6_free() here because it would
1354		 * cause re-entering rtable related routines triggering an LOR
1355		 * problem for FreeBSD.
1356		 */
1357		if (ip6_neighborgcthresh >= 0 &&
1358		    nd6_inuse >= ip6_neighborgcthresh) {
1359			int i;
1360
1361			for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
1362				struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
1363
1364				/* Move this entry to the head */
1365				LN_DEQUEUE(ln_end);
1366				LN_INSERTHEAD(ln_end);
1367
1368				if (ND6_LLINFO_PERMANENT(ln_end))
1369					continue;
1370
1371				if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
1372					ln_end->ln_state = ND6_LLINFO_STALE;
1373				else
1374					ln_end->ln_state = ND6_LLINFO_PURGE;
1375				nd6_llinfo_settimer(ln_end, 0);
1376			}
1377		}
1378
1379		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1380		/*
1381		 * check if rt_getkey(rt) is an address assigned
1382		 * to the interface.
1383		 */
1384		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
1385		    &satocsin6(rt_getkey(rt))->sin6_addr);
1386		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1387		if (ifa != NULL) {
1388			const void *mac;
1389			nd6_llinfo_settimer(ln, -1);
1390			ln->ln_state = ND6_LLINFO_REACHABLE;
1391			ln->ln_byhint = 0;
1392			if ((mac = nd6_ifptomac(ifp)) != NULL) {
1393				/* XXX check for error */
1394				if (sockaddr_dl_setaddr(satosdl(gate),
1395				    gate->sa_len, mac,
1396				    ifp->if_addrlen) == NULL) {
1397					printf("%s.%d: "
1398					    "sockaddr_dl_setaddr(, %d, ) "
1399					    "failed on %s\n", __func__,
1400					    __LINE__, gate->sa_len,
1401					    if_name(ifp));
1402				}
1403			}
1404			if (nd6_useloopback) {
1405				ifp = rt->rt_ifp = lo0ifp;	/* XXX */
1406				/*
1407				 * Make sure rt_ifa be equal to the ifaddr
1408				 * corresponding to the address.
1409				 * We need this because when we refer
1410				 * rt_ifa->ia6_flags in ip6_input, we assume
1411				 * that the rt_ifa points to the address instead
1412				 * of the loopback address.
1413				 */
1414				if (ifa != rt->rt_ifa)
1415					rt_replace_ifa(rt, ifa);
1416				rt->rt_flags &= ~RTF_CLONED;
1417			}
1418		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1419			nd6_llinfo_settimer(ln, -1);
1420			ln->ln_state = ND6_LLINFO_REACHABLE;
1421			ln->ln_byhint = 0;
1422
1423			/* join solicited node multicast for proxy ND */
1424			if (ifp->if_flags & IFF_MULTICAST) {
1425				struct in6_addr llsol;
1426				int error;
1427
1428				llsol = satocsin6(rt_getkey(rt))->sin6_addr;
1429				llsol.s6_addr32[0] = htonl(0xff020000);
1430				llsol.s6_addr32[1] = 0;
1431				llsol.s6_addr32[2] = htonl(1);
1432				llsol.s6_addr8[12] = 0xff;
1433				if (in6_setscope(&llsol, ifp, NULL))
1434					break;
1435				if (!in6_addmulti(&llsol, ifp, &error, 0)) {
1436					nd6log((LOG_ERR, "%s: failed to join "
1437					    "%s (errno=%d)\n", if_name(ifp),
1438					    ip6_sprintf(&llsol), error));
1439				}
1440			}
1441		}
1442		break;
1443
1444	case RTM_DELETE:
1445		if (ln == NULL)
1446			break;
1447		/* leave from solicited node multicast for proxy ND */
1448		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1449		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1450			struct in6_addr llsol;
1451			struct in6_multi *in6m;
1452
1453			llsol = satocsin6(rt_getkey(rt))->sin6_addr;
1454			llsol.s6_addr32[0] = htonl(0xff020000);
1455			llsol.s6_addr32[1] = 0;
1456			llsol.s6_addr32[2] = htonl(1);
1457			llsol.s6_addr8[12] = 0xff;
1458			if (in6_setscope(&llsol, ifp, NULL) == 0) {
1459				IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1460				if (in6m)
1461					in6_delmulti(in6m);
1462			}
1463		}
1464		nd6_inuse--;
1465		ln->ln_next->ln_prev = ln->ln_prev;
1466		ln->ln_prev->ln_next = ln->ln_next;
1467		ln->ln_prev = NULL;
1468		nd6_llinfo_settimer(ln, -1);
1469		rt->rt_llinfo = 0;
1470		rt->rt_flags &= ~RTF_LLINFO;
1471		clear_llinfo_pqueue(ln);
1472		Free(ln);
1473	}
1474}
1475
1476int
1477nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp)
1478{
1479	struct in6_drlist *drl = (struct in6_drlist *)data;
1480	struct in6_oprlist *oprl = (struct in6_oprlist *)data;
1481	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1482	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1483	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1484	struct nd_defrouter *dr;
1485	struct nd_prefix *pr;
1486	struct rtentry *rt;
1487	int i = 0, error = 0;
1488	int s;
1489
1490	switch (cmd) {
1491	case SIOCGDRLST_IN6:
1492		/*
1493		 * obsolete API, use sysctl under net.inet6.icmp6
1494		 */
1495		memset(drl, 0, sizeof(*drl));
1496		s = splsoftnet();
1497		TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
1498			if (i >= DRLSTSIZ)
1499				break;
1500			drl->defrouter[i].rtaddr = dr->rtaddr;
1501			in6_clearscope(&drl->defrouter[i].rtaddr);
1502
1503			drl->defrouter[i].flags = dr->flags;
1504			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1505			drl->defrouter[i].expire = dr->expire;
1506			drl->defrouter[i].if_index = dr->ifp->if_index;
1507			i++;
1508		}
1509		splx(s);
1510		break;
1511	case SIOCGPRLST_IN6:
1512		/*
1513		 * obsolete API, use sysctl under net.inet6.icmp6
1514		 *
1515		 * XXX the structure in6_prlist was changed in backward-
1516		 * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
1517		 * in6_prlist is used for nd6_sysctl() - fill_prlist().
1518		 */
1519		/*
1520		 * XXX meaning of fields, especialy "raflags", is very
1521		 * differnet between RA prefix list and RR/static prefix list.
1522		 * how about separating ioctls into two?
1523		 */
1524		memset(oprl, 0, sizeof(*oprl));
1525		s = splsoftnet();
1526		LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1527			struct nd_pfxrouter *pfr;
1528			int j;
1529
1530			if (i >= PRLSTSIZ)
1531				break;
1532			oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
1533			oprl->prefix[i].raflags = pr->ndpr_raf;
1534			oprl->prefix[i].prefixlen = pr->ndpr_plen;
1535			oprl->prefix[i].vltime = pr->ndpr_vltime;
1536			oprl->prefix[i].pltime = pr->ndpr_pltime;
1537			oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1538			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
1539				oprl->prefix[i].expire = 0;
1540			else {
1541				time_t maxexpire;
1542
1543				/* XXX: we assume time_t is signed. */
1544				maxexpire = (-1) &
1545				    ~((time_t)1 <<
1546				    ((sizeof(maxexpire) * 8) - 1));
1547				if (pr->ndpr_vltime <
1548				    maxexpire - pr->ndpr_lastupdate) {
1549					oprl->prefix[i].expire =
1550						 pr->ndpr_lastupdate +
1551						pr->ndpr_vltime;
1552				} else
1553					oprl->prefix[i].expire = maxexpire;
1554			}
1555
1556			j = 0;
1557			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
1558				if (j < DRLSTSIZ) {
1559#define RTRADDR oprl->prefix[i].advrtr[j]
1560					RTRADDR = pfr->router->rtaddr;
1561					in6_clearscope(&RTRADDR);
1562#undef RTRADDR
1563				}
1564				j++;
1565			}
1566			oprl->prefix[i].advrtrs = j;
1567			oprl->prefix[i].origin = PR_ORIG_RA;
1568
1569			i++;
1570		}
1571		splx(s);
1572
1573		break;
1574	case OSIOCGIFINFO_IN6:
1575#define ND	ndi->ndi
1576		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
1577		memset(&ND, 0, sizeof(ND));
1578		ND.linkmtu = IN6_LINKMTU(ifp);
1579		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
1580		ND.basereachable = ND_IFINFO(ifp)->basereachable;
1581		ND.reachable = ND_IFINFO(ifp)->reachable;
1582		ND.retrans = ND_IFINFO(ifp)->retrans;
1583		ND.flags = ND_IFINFO(ifp)->flags;
1584		ND.recalctm = ND_IFINFO(ifp)->recalctm;
1585		ND.chlim = ND_IFINFO(ifp)->chlim;
1586		break;
1587	case SIOCGIFINFO_IN6:
1588		ND = *ND_IFINFO(ifp);
1589		break;
1590	case SIOCSIFINFO_IN6:
1591		/*
1592		 * used to change host variables from userland.
1593		 * intented for a use on router to reflect RA configurations.
1594		 */
1595		/* 0 means 'unspecified' */
1596		if (ND.linkmtu != 0) {
1597			if (ND.linkmtu < IPV6_MMTU ||
1598			    ND.linkmtu > IN6_LINKMTU(ifp)) {
1599				error = EINVAL;
1600				break;
1601			}
1602			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
1603		}
1604
1605		if (ND.basereachable != 0) {
1606			int obasereachable = ND_IFINFO(ifp)->basereachable;
1607
1608			ND_IFINFO(ifp)->basereachable = ND.basereachable;
1609			if (ND.basereachable != obasereachable)
1610				ND_IFINFO(ifp)->reachable =
1611				    ND_COMPUTE_RTIME(ND.basereachable);
1612		}
1613		if (ND.retrans != 0)
1614			ND_IFINFO(ifp)->retrans = ND.retrans;
1615		if (ND.chlim != 0)
1616			ND_IFINFO(ifp)->chlim = ND.chlim;
1617		/* FALLTHROUGH */
1618	case SIOCSIFINFO_FLAGS:
1619		ND_IFINFO(ifp)->flags = ND.flags;
1620		break;
1621#undef ND
1622	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1623		/* sync kernel routing table with the default router list */
1624		defrouter_reset();
1625		defrouter_select();
1626		break;
1627	case SIOCSPFXFLUSH_IN6:
1628	{
1629		/* flush all the prefix advertised by routers */
1630		struct nd_prefix *pfx, *next;
1631
1632		s = splsoftnet();
1633		LIST_FOREACH_SAFE(pfx, &nd_prefix, ndpr_entry, next) {
1634			struct in6_ifaddr *ia, *ia_next;
1635
1636			if (IN6_IS_ADDR_LINKLOCAL(&pfx->ndpr_prefix.sin6_addr))
1637				continue; /* XXX */
1638
1639			/* do we really have to remove addresses as well? */
1640			for (ia = in6_ifaddr; ia; ia = ia_next) {
1641				/* ia might be removed.  keep the next ptr. */
1642				ia_next = ia->ia_next;
1643
1644				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1645					continue;
1646
1647				if (ia->ia6_ndpr == pfx)
1648					in6_purgeaddr(&ia->ia_ifa);
1649			}
1650			prelist_remove(pfx);
1651		}
1652		splx(s);
1653		break;
1654	}
1655	case SIOCSRTRFLUSH_IN6:
1656	{
1657		/* flush all the default routers */
1658		struct nd_defrouter *drtr, *next;
1659
1660		s = splsoftnet();
1661		defrouter_reset();
1662		TAILQ_FOREACH_SAFE(drtr, &nd_defrouter, dr_entry, next) {
1663			defrtrlist_del(drtr);
1664		}
1665		defrouter_select();
1666		splx(s);
1667		break;
1668	}
1669	case SIOCGNBRINFO_IN6:
1670	{
1671		struct llinfo_nd6 *ln;
1672		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1673
1674		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
1675			return error;
1676
1677		s = splsoftnet();
1678		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL ||
1679		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
1680			error = EINVAL;
1681			splx(s);
1682			break;
1683		}
1684		nbi->state = ln->ln_state;
1685		nbi->asked = ln->ln_asked;
1686		nbi->isrouter = ln->ln_router;
1687		nbi->expire = ln->ln_expire;
1688		splx(s);
1689
1690		break;
1691	}
1692	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1693		ndif->ifindex = nd6_defifindex;
1694		break;
1695	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1696		return nd6_setdefaultiface(ndif->ifindex);
1697	}
1698	return error;
1699}
1700
1701void
1702nd6_llinfo_release_pkts(struct llinfo_nd6 *ln, struct ifnet *ifp,
1703    struct rtentry *rt)
1704{
1705	struct mbuf *m_hold, *m_hold_next;
1706
1707	for (m_hold = ln->ln_hold, ln->ln_hold = NULL;
1708	     m_hold != NULL;
1709	     m_hold = m_hold_next) {
1710		m_hold_next = m_hold->m_nextpkt;
1711		m_hold->m_nextpkt = NULL;
1712
1713		/*
1714		 * we assume ifp is not a p2p here, so
1715		 * just set the 2nd argument as the
1716		 * 1st one.
1717		 */
1718		nd6_output(ifp, ifp, m_hold, satocsin6(rt_getkey(rt)), rt);
1719	}
1720}
1721
1722/*
1723 * Create neighbor cache entry and cache link-layer address,
1724 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1725 */
1726struct rtentry *
1727nd6_cache_lladdr(
1728    struct ifnet *ifp,
1729    struct in6_addr *from,
1730    char *lladdr,
1731    int lladdrlen,
1732    int type,	/* ICMP6 type */
1733    int code	/* type dependent information */
1734)
1735{
1736	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
1737	struct rtentry *rt = NULL;
1738	struct llinfo_nd6 *ln = NULL;
1739	int is_newentry;
1740	struct sockaddr_dl *sdl = NULL;
1741	int do_update;
1742	int olladdr;
1743	int llchange;
1744	int newstate = 0;
1745
1746	if (ifp == NULL)
1747		panic("ifp == NULL in nd6_cache_lladdr");
1748	if (from == NULL)
1749		panic("from == NULL in nd6_cache_lladdr");
1750
1751	/* nothing must be updated for unspecified address */
1752	if (IN6_IS_ADDR_UNSPECIFIED(from))
1753		return NULL;
1754
1755	/*
1756	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1757	 * the caller.
1758	 *
1759	 * XXX If the link does not have link-layer adderss, what should
1760	 * we do? (ifp->if_addrlen == 0)
1761	 * Spec says nothing in sections for RA, RS and NA.  There's small
1762	 * description on it in NS section (RFC 2461 7.2.3).
1763	 */
1764
1765	rt = nd6_lookup(from, 0, ifp);
1766	if (rt == NULL) {
1767#if 0
1768		/* nothing must be done if there's no lladdr */
1769		if (!lladdr || !lladdrlen)
1770			return NULL;
1771#endif
1772
1773		rt = nd6_lookup(from, 1, ifp);
1774		is_newentry = 1;
1775	} else {
1776		/* do nothing if static ndp is set */
1777		if (rt->rt_flags & RTF_STATIC)
1778			return NULL;
1779		is_newentry = 0;
1780	}
1781
1782	if (rt == NULL)
1783		return NULL;
1784	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1785fail:
1786		(void)nd6_free(rt, 0);
1787		return NULL;
1788	}
1789	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1790	if (ln == NULL)
1791		goto fail;
1792	if (rt->rt_gateway == NULL)
1793		goto fail;
1794	if (rt->rt_gateway->sa_family != AF_LINK)
1795		goto fail;
1796	sdl = satosdl(rt->rt_gateway);
1797
1798	olladdr = (sdl->sdl_alen) ? 1 : 0;
1799	if (olladdr && lladdr) {
1800		if (memcmp(lladdr, CLLADDR(sdl), ifp->if_addrlen))
1801			llchange = 1;
1802		else
1803			llchange = 0;
1804	} else
1805		llchange = 0;
1806
1807	/*
1808	 * newentry olladdr  lladdr  llchange	(*=record)
1809	 *	0	n	n	--	(1)
1810	 *	0	y	n	--	(2)
1811	 *	0	n	y	--	(3) * STALE
1812	 *	0	y	y	n	(4) *
1813	 *	0	y	y	y	(5) * STALE
1814	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1815	 *	1	--	y	--	(7) * STALE
1816	 */
1817
1818	if (lladdr) {		/* (3-5) and (7) */
1819		/*
1820		 * Record source link-layer address
1821		 * XXX is it dependent to ifp->if_type?
1822		 */
1823		/* XXX check for error */
1824		if (sockaddr_dl_setaddr(sdl, sdl->sdl_len, lladdr,
1825		    ifp->if_addrlen) == NULL) {
1826			printf("%s.%d: sockaddr_dl_setaddr(, %d, ) "
1827			    "failed on %s\n", __func__, __LINE__,
1828			    sdl->sdl_len, if_name(ifp));
1829		}
1830	}
1831
1832	if (!is_newentry) {
1833		if ((!olladdr && lladdr) ||		/* (3) */
1834		    (olladdr && lladdr && llchange)) {	/* (5) */
1835			do_update = 1;
1836			newstate = ND6_LLINFO_STALE;
1837		} else					/* (1-2,4) */
1838			do_update = 0;
1839	} else {
1840		do_update = 1;
1841		if (lladdr == NULL)			/* (6) */
1842			newstate = ND6_LLINFO_NOSTATE;
1843		else					/* (7) */
1844			newstate = ND6_LLINFO_STALE;
1845	}
1846
1847	if (do_update) {
1848		/*
1849		 * Update the state of the neighbor cache.
1850		 */
1851		ln->ln_state = newstate;
1852
1853		if (ln->ln_state == ND6_LLINFO_STALE) {
1854			/*
1855			 * XXX: since nd6_output() below will cause
1856			 * state tansition to DELAY and reset the timer,
1857			 * we must set the timer now, although it is actually
1858			 * meaningless.
1859			 */
1860			nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz);
1861
1862			nd6_llinfo_release_pkts(ln, ifp, rt);
1863		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1864			/* probe right away */
1865			nd6_llinfo_settimer((void *)ln, 0);
1866		}
1867	}
1868
1869	/*
1870	 * ICMP6 type dependent behavior.
1871	 *
1872	 * NS: clear IsRouter if new entry
1873	 * RS: clear IsRouter
1874	 * RA: set IsRouter if there's lladdr
1875	 * redir: clear IsRouter if new entry
1876	 *
1877	 * RA case, (1):
1878	 * The spec says that we must set IsRouter in the following cases:
1879	 * - If lladdr exist, set IsRouter.  This means (1-5).
1880	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1881	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1882	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1883	 * neighbor cache, this is similar to (6).
1884	 * This case is rare but we figured that we MUST NOT set IsRouter.
1885	 *
1886	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1887	 *							D R
1888	 *	0	n	n	--	(1)	c   ?     s
1889	 *	0	y	n	--	(2)	c   s     s
1890	 *	0	n	y	--	(3)	c   s     s
1891	 *	0	y	y	n	(4)	c   s     s
1892	 *	0	y	y	y	(5)	c   s     s
1893	 *	1	--	n	--	(6) c	c 	c s
1894	 *	1	--	y	--	(7) c	c   s	c s
1895	 *
1896	 *					(c=clear s=set)
1897	 */
1898	switch (type & 0xff) {
1899	case ND_NEIGHBOR_SOLICIT:
1900		/*
1901		 * New entry must have is_router flag cleared.
1902		 */
1903		if (is_newentry)	/* (6-7) */
1904			ln->ln_router = 0;
1905		break;
1906	case ND_REDIRECT:
1907		/*
1908		 * If the icmp is a redirect to a better router, always set the
1909		 * is_router flag.  Otherwise, if the entry is newly created,
1910		 * clear the flag.  [RFC 2461, sec 8.3]
1911		 */
1912		if (code == ND_REDIRECT_ROUTER)
1913			ln->ln_router = 1;
1914		else if (is_newentry) /* (6-7) */
1915			ln->ln_router = 0;
1916		break;
1917	case ND_ROUTER_SOLICIT:
1918		/*
1919		 * is_router flag must always be cleared.
1920		 */
1921		ln->ln_router = 0;
1922		break;
1923	case ND_ROUTER_ADVERT:
1924		/*
1925		 * Mark an entry with lladdr as a router.
1926		 */
1927		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
1928		    (is_newentry && lladdr)) {			/* (7) */
1929			ln->ln_router = 1;
1930		}
1931		break;
1932	}
1933
1934	/*
1935	 * When the link-layer address of a router changes, select the
1936	 * best router again.  In particular, when the neighbor entry is newly
1937	 * created, it might affect the selection policy.
1938	 * Question: can we restrict the first condition to the "is_newentry"
1939	 * case?
1940	 * XXX: when we hear an RA from a new router with the link-layer
1941	 * address option, defrouter_select() is called twice, since
1942	 * defrtrlist_update called the function as well.  However, I believe
1943	 * we can compromise the overhead, since it only happens the first
1944	 * time.
1945	 * XXX: although defrouter_select() should not have a bad effect
1946	 * for those are not autoconfigured hosts, we explicitly avoid such
1947	 * cases for safety.
1948	 */
1949	if (do_update && ln->ln_router && !ip6_forwarding &&
1950	    nd6_accepts_rtadv(ndi))
1951		defrouter_select();
1952
1953	return rt;
1954}
1955
1956static void
1957nd6_slowtimo(void *ignored_arg)
1958{
1959	struct nd_ifinfo *nd6if;
1960	struct ifnet *ifp;
1961
1962	mutex_enter(softnet_lock);
1963	KERNEL_LOCK(1, NULL);
1964      	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1965	    nd6_slowtimo, NULL);
1966	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1967		nd6if = ND_IFINFO(ifp);
1968		if (nd6if->basereachable && /* already initialized */
1969		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1970			/*
1971			 * Since reachable time rarely changes by router
1972			 * advertisements, we SHOULD insure that a new random
1973			 * value gets recomputed at least once every few hours.
1974			 * (RFC 2461, 6.3.4)
1975			 */
1976			nd6if->recalctm = nd6_recalc_reachtm_interval;
1977			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1978		}
1979	}
1980	KERNEL_UNLOCK_ONE(NULL);
1981	mutex_exit(softnet_lock);
1982}
1983
1984#define senderr(e) { error = (e); goto bad;}
1985int
1986nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
1987    const struct sockaddr_in6 *dst, struct rtentry *rt0)
1988{
1989	struct mbuf *m = m0;
1990	struct rtentry *rt = rt0;
1991	struct sockaddr_in6 *gw6 = NULL;
1992	struct llinfo_nd6 *ln = NULL;
1993	int error = 0;
1994
1995	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1996		goto sendpkt;
1997
1998	if (nd6_need_cache(ifp) == 0)
1999		goto sendpkt;
2000
2001	/*
2002	 * next hop determination.  This routine is derived from ether_output.
2003	 */
2004	if (rt) {
2005		if ((rt->rt_flags & RTF_UP) == 0) {
2006			if ((rt0 = rt = rtalloc1(sin6tocsa(dst), 1)) != NULL) {
2007				rt->rt_refcnt--;
2008				if (rt->rt_ifp != ifp)
2009					senderr(EHOSTUNREACH);
2010			} else
2011				senderr(EHOSTUNREACH);
2012		}
2013
2014		if (rt->rt_flags & RTF_GATEWAY) {
2015			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
2016
2017			/*
2018			 * We skip link-layer address resolution and NUD
2019			 * if the gateway is not a neighbor from ND point
2020			 * of view, regardless of the value of nd_ifinfo.flags.
2021			 * The second condition is a bit tricky; we skip
2022			 * if the gateway is our own address, which is
2023			 * sometimes used to install a route to a p2p link.
2024			 */
2025			if (!nd6_is_addr_neighbor(gw6, ifp) ||
2026			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
2027				/*
2028				 * We allow this kind of tricky route only
2029				 * when the outgoing interface is p2p.
2030				 * XXX: we may need a more generic rule here.
2031				 */
2032				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
2033					senderr(EHOSTUNREACH);
2034
2035				goto sendpkt;
2036			}
2037
2038			if (rt->rt_gwroute == NULL)
2039				goto lookup;
2040			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
2041				rtfree(rt); rt = rt0;
2042			lookup:
2043				rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1);
2044				if ((rt = rt->rt_gwroute) == NULL)
2045					senderr(EHOSTUNREACH);
2046				/* the "G" test below also prevents rt == rt0 */
2047				if ((rt->rt_flags & RTF_GATEWAY) ||
2048				    (rt->rt_ifp != ifp)) {
2049					rt->rt_refcnt--;
2050					rt0->rt_gwroute = NULL;
2051					senderr(EHOSTUNREACH);
2052				}
2053			}
2054		}
2055	}
2056
2057	/*
2058	 * Address resolution or Neighbor Unreachability Detection
2059	 * for the next hop.
2060	 * At this point, the destination of the packet must be a unicast
2061	 * or an anycast address(i.e. not a multicast).
2062	 */
2063
2064	/* Look up the neighbor cache for the nexthop */
2065	if (rt != NULL && (rt->rt_flags & RTF_LLINFO) != 0)
2066		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
2067	else {
2068		/*
2069		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
2070		 * the condition below is not very efficient.  But we believe
2071		 * it is tolerable, because this should be a rare case.
2072		 */
2073		if (nd6_is_addr_neighbor(dst, ifp) &&
2074		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
2075			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
2076	}
2077	if (ln == NULL || rt == NULL) {
2078		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
2079		    !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
2080			log(LOG_DEBUG,
2081			    "nd6_output: can't allocate llinfo for %s "
2082			    "(ln=%p, rt=%p)\n",
2083			    ip6_sprintf(&dst->sin6_addr), ln, rt);
2084			senderr(EIO);	/* XXX: good error? */
2085		}
2086
2087		goto sendpkt;	/* send anyway */
2088	}
2089
2090	/*
2091	 * Move this entry to the head of the queue so that it is less likely
2092	 * for this entry to be a target of forced garbage collection (see
2093	 * nd6_rtrequest()).
2094	 */
2095	LN_DEQUEUE(ln);
2096	LN_INSERTHEAD(ln);
2097
2098	/* We don't have to do link-layer address resolution on a p2p link. */
2099	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
2100	    ln->ln_state < ND6_LLINFO_REACHABLE) {
2101		ln->ln_state = ND6_LLINFO_STALE;
2102		nd6_llinfo_settimer(ln, (long)nd6_gctimer * hz);
2103	}
2104
2105	/*
2106	 * The first time we send a packet to a neighbor whose entry is
2107	 * STALE, we have to change the state to DELAY and a sets a timer to
2108	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
2109	 * neighbor unreachability detection on expiration.
2110	 * (RFC 2461 7.3.3)
2111	 */
2112	if (ln->ln_state == ND6_LLINFO_STALE) {
2113		ln->ln_asked = 0;
2114		ln->ln_state = ND6_LLINFO_DELAY;
2115		nd6_llinfo_settimer(ln, (long)nd6_delay * hz);
2116	}
2117
2118	/*
2119	 * If the neighbor cache entry has a state other than INCOMPLETE
2120	 * (i.e. its link-layer address is already resolved), just
2121	 * send the packet.
2122	 */
2123	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
2124		goto sendpkt;
2125
2126	/*
2127	 * There is a neighbor cache entry, but no ethernet address
2128	 * response yet.  Append this latest packet to the end of the
2129	 * packet queue in the mbuf, unless the number of the packet
2130	 * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
2131	 * the oldest packet in the queue will be removed.
2132	 */
2133	if (ln->ln_state == ND6_LLINFO_NOSTATE)
2134		ln->ln_state = ND6_LLINFO_INCOMPLETE;
2135	if (ln->ln_hold) {
2136		struct mbuf *m_hold;
2137		int i;
2138
2139		i = 0;
2140		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
2141			i++;
2142			if (m_hold->m_nextpkt == NULL) {
2143				m_hold->m_nextpkt = m;
2144				break;
2145			}
2146		}
2147		while (i >= nd6_maxqueuelen) {
2148			m_hold = ln->ln_hold;
2149			ln->ln_hold = ln->ln_hold->m_nextpkt;
2150			m_freem(m_hold);
2151			i--;
2152		}
2153	} else {
2154		ln->ln_hold = m;
2155	}
2156
2157	/*
2158	 * If there has been no NS for the neighbor after entering the
2159	 * INCOMPLETE state, send the first solicitation.
2160	 */
2161	if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
2162		ln->ln_asked++;
2163		nd6_llinfo_settimer(ln,
2164		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
2165		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
2166	}
2167	return 0;
2168
2169  sendpkt:
2170	/* discard the packet if IPv6 operation is disabled on the interface */
2171	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
2172		error = ENETDOWN; /* better error? */
2173		goto bad;
2174	}
2175
2176#ifdef KAME_IPSEC
2177	/* clean ipsec history once it goes out of the node */
2178	ipsec_delaux(m);
2179#endif
2180	KERNEL_LOCK(1, NULL);
2181	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
2182		error = (*ifp->if_output)(origifp, m, sin6tocsa(dst), rt);
2183	else
2184		error = (*ifp->if_output)(ifp, m, sin6tocsa(dst), rt);
2185	KERNEL_UNLOCK_ONE(NULL);
2186	return error;
2187
2188  bad:
2189	if (m != NULL)
2190		m_freem(m);
2191	return error;
2192}
2193#undef senderr
2194
2195int
2196nd6_need_cache(struct ifnet *ifp)
2197{
2198	/*
2199	 * XXX: we currently do not make neighbor cache on any interface
2200	 * other than ARCnet, Ethernet, FDDI and GIF.
2201	 *
2202	 * RFC2893 says:
2203	 * - unidirectional tunnels needs no ND
2204	 */
2205	switch (ifp->if_type) {
2206	case IFT_ARCNET:
2207	case IFT_ETHER:
2208	case IFT_FDDI:
2209	case IFT_IEEE1394:
2210	case IFT_CARP:
2211	case IFT_GIF:		/* XXX need more cases? */
2212	case IFT_PPP:
2213	case IFT_TUNNEL:
2214		return 1;
2215	default:
2216		return 0;
2217	}
2218}
2219
2220int
2221nd6_storelladdr(const struct ifnet *ifp, const struct rtentry *rt,
2222    struct mbuf *m, const struct sockaddr *dst, uint8_t *lldst,
2223    size_t dstsize)
2224{
2225	const struct sockaddr_dl *sdl;
2226
2227	if (m->m_flags & M_MCAST) {
2228		switch (ifp->if_type) {
2229		case IFT_ETHER:
2230		case IFT_FDDI:
2231			ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
2232			    lldst);
2233			return 1;
2234		case IFT_IEEE1394:
2235			memcpy(lldst, ifp->if_broadcastaddr,
2236			    MIN(dstsize, ifp->if_addrlen));
2237			return 1;
2238		case IFT_ARCNET:
2239			*lldst = 0;
2240			return 1;
2241		default:
2242			m_freem(m);
2243			return 0;
2244		}
2245	}
2246
2247	if (rt == NULL) {
2248		/* this could happen, if we could not allocate memory */
2249		m_freem(m);
2250		return 0;
2251	}
2252	if (rt->rt_gateway->sa_family != AF_LINK) {
2253		printf("%s: something odd happens\n", __func__);
2254		m_freem(m);
2255		return 0;
2256	}
2257	sdl = satocsdl(rt->rt_gateway);
2258	if (sdl->sdl_alen == 0 || sdl->sdl_alen > dstsize) {
2259		/* this should be impossible, but we bark here for debugging */
2260		printf("%s: sdl_alen == %" PRIu8 ", dst=%s, if=%s\n", __func__,
2261		    sdl->sdl_alen, ip6_sprintf(&satocsin6(dst)->sin6_addr),
2262		    if_name(ifp));
2263		m_freem(m);
2264		return 0;
2265	}
2266
2267	memcpy(lldst, CLLADDR(sdl), MIN(dstsize, sdl->sdl_alen));
2268	return 1;
2269}
2270
2271static void
2272clear_llinfo_pqueue(struct llinfo_nd6 *ln)
2273{
2274	struct mbuf *m_hold, *m_hold_next;
2275
2276	for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) {
2277		m_hold_next = m_hold->m_nextpkt;
2278		m_hold->m_nextpkt = NULL;
2279		m_freem(m_hold);
2280	}
2281
2282	ln->ln_hold = NULL;
2283	return;
2284}
2285
2286int
2287nd6_sysctl(
2288    int name,
2289    void *oldp,	/* syscall arg, need copyout */
2290    size_t *oldlenp,
2291    void *newp,	/* syscall arg, need copyin */
2292    size_t newlen
2293)
2294{
2295	void *p;
2296	size_t ol;
2297	int error;
2298
2299	error = 0;
2300
2301	if (newp)
2302		return EPERM;
2303	if (oldp && !oldlenp)
2304		return EINVAL;
2305	ol = oldlenp ? *oldlenp : 0;
2306
2307	if (oldp) {
2308		p = malloc(*oldlenp, M_TEMP, M_WAITOK);
2309		if (p == NULL)
2310			return ENOMEM;
2311	} else
2312		p = NULL;
2313	switch (name) {
2314	case ICMPV6CTL_ND6_DRLIST:
2315		error = fill_drlist(p, oldlenp, ol);
2316		if (!error && p != NULL && oldp != NULL)
2317			error = copyout(p, oldp, *oldlenp);
2318		break;
2319
2320	case ICMPV6CTL_ND6_PRLIST:
2321		error = fill_prlist(p, oldlenp, ol);
2322		if (!error && p != NULL && oldp != NULL)
2323			error = copyout(p, oldp, *oldlenp);
2324		break;
2325
2326	case ICMPV6CTL_ND6_MAXQLEN:
2327		break;
2328
2329	default:
2330		error = ENOPROTOOPT;
2331		break;
2332	}
2333	if (p)
2334		free(p, M_TEMP);
2335
2336	return error;
2337}
2338
2339static int
2340fill_drlist(void *oldp, size_t *oldlenp, size_t ol)
2341{
2342	int error = 0, s;
2343	struct in6_defrouter *d = NULL, *de = NULL;
2344	struct nd_defrouter *dr;
2345	size_t l;
2346
2347	s = splsoftnet();
2348
2349	if (oldp) {
2350		d = (struct in6_defrouter *)oldp;
2351		de = (struct in6_defrouter *)((char *)oldp + *oldlenp);
2352	}
2353	l = 0;
2354
2355	TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
2356
2357		if (oldp && d + 1 <= de) {
2358			memset(d, 0, sizeof(*d));
2359			sockaddr_in6_init(&d->rtaddr, &dr->rtaddr, 0, 0, 0);
2360			if (sa6_recoverscope(&d->rtaddr)) {
2361				log(LOG_ERR,
2362				    "scope error in router list (%s)\n",
2363				    ip6_sprintf(&d->rtaddr.sin6_addr));
2364				/* XXX: press on... */
2365			}
2366			d->flags = dr->flags;
2367			d->rtlifetime = dr->rtlifetime;
2368			d->expire = dr->expire;
2369			d->if_index = dr->ifp->if_index;
2370		}
2371
2372		l += sizeof(*d);
2373		if (d)
2374			d++;
2375	}
2376
2377	if (oldp) {
2378		if (l > ol)
2379			error = ENOMEM;
2380	}
2381	if (oldlenp)
2382		*oldlenp = l;	/* (void *)d - (void *)oldp */
2383
2384	splx(s);
2385
2386	return error;
2387}
2388
2389static int
2390fill_prlist(void *oldp, size_t *oldlenp, size_t ol)
2391{
2392	int error = 0, s;
2393	struct nd_prefix *pr;
2394	uint8_t *p = NULL, *ps = NULL;
2395	uint8_t *pe = NULL;
2396	size_t l;
2397
2398	s = splsoftnet();
2399
2400	if (oldp) {
2401		ps = p = (uint8_t*)oldp;
2402		pe = (uint8_t*)oldp + *oldlenp;
2403	}
2404	l = 0;
2405
2406	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
2407		u_short advrtrs;
2408		struct sockaddr_in6 sin6;
2409		struct nd_pfxrouter *pfr;
2410		struct in6_prefix pfx;
2411
2412		if (oldp && p + sizeof(struct in6_prefix) <= pe)
2413		{
2414			memset(&pfx, 0, sizeof(pfx));
2415			ps = p;
2416			pfx.prefix = pr->ndpr_prefix;
2417
2418			if (sa6_recoverscope(&pfx.prefix)) {
2419				log(LOG_ERR,
2420				    "scope error in prefix list (%s)\n",
2421				    ip6_sprintf(&pfx.prefix.sin6_addr));
2422				/* XXX: press on... */
2423			}
2424			pfx.raflags = pr->ndpr_raf;
2425			pfx.prefixlen = pr->ndpr_plen;
2426			pfx.vltime = pr->ndpr_vltime;
2427			pfx.pltime = pr->ndpr_pltime;
2428			pfx.if_index = pr->ndpr_ifp->if_index;
2429			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2430				pfx.expire = 0;
2431			else {
2432				time_t maxexpire;
2433
2434				/* XXX: we assume time_t is signed. */
2435				maxexpire = (-1) &
2436				    ~((time_t)1 <<
2437				    ((sizeof(maxexpire) * 8) - 1));
2438				if (pr->ndpr_vltime <
2439				    maxexpire - pr->ndpr_lastupdate) {
2440					pfx.expire = pr->ndpr_lastupdate +
2441						pr->ndpr_vltime;
2442				} else
2443					pfx.expire = maxexpire;
2444			}
2445			pfx.refcnt = pr->ndpr_refcnt;
2446			pfx.flags = pr->ndpr_stateflags;
2447			pfx.origin = PR_ORIG_RA;
2448
2449			p += sizeof(pfx); l += sizeof(pfx);
2450
2451			advrtrs = 0;
2452			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
2453				if (p + sizeof(sin6) > pe) {
2454					advrtrs++;
2455					continue;
2456				}
2457
2458				sockaddr_in6_init(&sin6, &pfr->router->rtaddr,
2459				    0, 0, 0);
2460				if (sa6_recoverscope(&sin6)) {
2461					log(LOG_ERR,
2462					    "scope error in "
2463					    "prefix list (%s)\n",
2464					    ip6_sprintf(&pfr->router->rtaddr));
2465				}
2466				advrtrs++;
2467				memcpy(p, &sin6, sizeof(sin6));
2468				p += sizeof(sin6);
2469				l += sizeof(sin6);
2470			}
2471			pfx.advrtrs = advrtrs;
2472			memcpy(ps, &pfx, sizeof(pfx));
2473		}
2474		else {
2475			l += sizeof(pfx);
2476			advrtrs = 0;
2477			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
2478				advrtrs++;
2479				l += sizeof(sin6);
2480			}
2481		}
2482	}
2483
2484	if (oldp) {
2485		*oldlenp = l;	/* (void *)d - (void *)oldp */
2486		if (l > ol)
2487			error = ENOMEM;
2488	} else
2489		*oldlenp = l;
2490
2491	splx(s);
2492
2493	return error;
2494}
2495