nd6.c revision 91491
1/*	$FreeBSD: head/sys/netinet6/nd6.c 91491 2002-02-28 17:05:46Z ume $	*/
2/*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * XXX
35 * KAME 970409 note:
36 * BSD/OS version heavily modifies this code, related to llinfo.
37 * Since we don't have BSD/OS version of net/route.c in our hand,
38 * I left the code mostly as it was in 970310.  -- itojun
39 */
40
41#include "opt_inet.h"
42#include "opt_inet6.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/callout.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/time.h>
52#include <sys/kernel.h>
53#include <sys/protosw.h>
54#include <sys/errno.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/sysctl.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_types.h>
62#include <net/if_atm.h>
63#include <net/route.h>
64
65#include <netinet/in.h>
66#include <netinet/if_ether.h>
67#include <netinet/if_fddi.h>
68#include <netinet6/in6_var.h>
69#include <netinet/ip6.h>
70#include <netinet6/ip6_var.h>
71#include <netinet6/nd6.h>
72#include <netinet6/in6_prefix.h>
73#include <netinet/icmp6.h>
74
75#include <net/net_osdep.h>
76
77#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
78#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
79
80#define SIN6(s) ((struct sockaddr_in6 *)s)
81#define SDL(s) ((struct sockaddr_dl *)s)
82
83/* timer values */
84int	nd6_prune	= 1;	/* walk list every 1 seconds */
85int	nd6_delay	= 5;	/* delay first probe time 5 second */
86int	nd6_umaxtries	= 3;	/* maximum unicast query */
87int	nd6_mmaxtries	= 3;	/* maximum multicast query */
88int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
89int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
90
91/* preventing too many loops in ND option parsing */
92int nd6_maxndopt = 10;	/* max # of ND options allowed */
93
94int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
95
96#ifdef ND6_DEBUG
97int nd6_debug = 1;
98#else
99int nd6_debug = 0;
100#endif
101
102/* for debugging? */
103static int nd6_inuse, nd6_allocated;
104
105struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
106static size_t nd_ifinfo_indexlim = 8;
107struct nd_ifinfo *nd_ifinfo = NULL;
108struct nd_drhead nd_defrouter;
109struct nd_prhead nd_prefix = { 0 };
110
111int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
112static struct sockaddr_in6 all1_sa;
113
114static void nd6_slowtimo __P((void *));
115static int regen_tmpaddr __P((struct in6_ifaddr *));
116
117struct callout nd6_slowtimo_ch;
118struct callout nd6_timer_ch;
119extern struct callout in6_tmpaddrtimer_ch;
120
121void
122nd6_init()
123{
124	static int nd6_init_done = 0;
125	int i;
126
127	if (nd6_init_done) {
128		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
129		return;
130	}
131
132	all1_sa.sin6_family = AF_INET6;
133	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
134	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
135		all1_sa.sin6_addr.s6_addr[i] = 0xff;
136
137	/* initialization of the default router list */
138	TAILQ_INIT(&nd_defrouter);
139
140	nd6_init_done = 1;
141
142	/* start timer */
143	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
144	    nd6_slowtimo, NULL);
145}
146
147void
148nd6_ifattach(ifp)
149	struct ifnet *ifp;
150{
151
152	/*
153	 * We have some arrays that should be indexed by if_index.
154	 * since if_index will grow dynamically, they should grow too.
155	 */
156	if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
157		size_t n;
158		caddr_t q;
159
160		while (if_index >= nd_ifinfo_indexlim)
161			nd_ifinfo_indexlim <<= 1;
162
163		/* grow nd_ifinfo */
164		n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo);
165		q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK);
166		bzero(q, n);
167		if (nd_ifinfo) {
168			bcopy((caddr_t)nd_ifinfo, q, n/2);
169			free((caddr_t)nd_ifinfo, M_IP6NDP);
170		}
171		nd_ifinfo = (struct nd_ifinfo *)q;
172	}
173
174#define ND nd_ifinfo[ifp->if_index]
175
176	/*
177	 * Don't initialize if called twice.
178	 * XXX: to detect this, we should choose a member that is never set
179	 * before initialization of the ND structure itself.  We formaly used
180	 * the linkmtu member, which was not suitable because it could be
181	 * initialized via "ifconfig mtu".
182	 */
183	if (ND.basereachable)
184		return;
185
186	ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu;
187	ND.chlim = IPV6_DEFHLIM;
188	ND.basereachable = REACHABLE_TIME;
189	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
190	ND.retrans = RETRANS_TIMER;
191	ND.receivedra = 0;
192	ND.flags = ND6_IFF_PERFORMNUD;
193	nd6_setmtu(ifp);
194#undef ND
195}
196
197/*
198 * Reset ND level link MTU. This function is called when the physical MTU
199 * changes, which means we might have to adjust the ND level MTU.
200 */
201void
202nd6_setmtu(ifp)
203	struct ifnet *ifp;
204{
205#define MIN(a,b) ((a) < (b) ? (a) : (b))
206	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
207	u_long oldmaxmtu = ndi->maxmtu;
208	u_long oldlinkmtu = ndi->linkmtu;
209
210	switch (ifp->if_type) {
211	case IFT_ARCNET:	/* XXX MTU handling needs more work */
212		ndi->maxmtu = MIN(60480, ifp->if_mtu);
213		break;
214	case IFT_ETHER:
215		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
216		break;
217	case IFT_FDDI:
218		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
219		break;
220	case IFT_ATM:
221		ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu);
222		break;
223	case IFT_IEEE1394:	/* XXX should be IEEE1394MTU(1500) */
224		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
225		break;
226#ifdef IFT_IEEE80211
227	case IFT_IEEE80211:	/* XXX should be IEEE80211MTU(1500) */
228		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
229		break;
230#endif
231	default:
232		ndi->maxmtu = ifp->if_mtu;
233		break;
234	}
235
236	if (oldmaxmtu != ndi->maxmtu) {
237		/*
238		 * If the ND level MTU is not set yet, or if the maxmtu
239		 * is reset to a smaller value than the ND level MTU,
240		 * also reset the ND level MTU.
241		 */
242		if (ndi->linkmtu == 0 ||
243		    ndi->maxmtu < ndi->linkmtu) {
244			ndi->linkmtu = ndi->maxmtu;
245			/* also adjust in6_maxmtu if necessary. */
246			if (oldlinkmtu == 0) {
247				/*
248				 * XXX: the case analysis is grotty, but
249				 * it is not efficient to call in6_setmaxmtu()
250				 * here when we are during the initialization
251				 * procedure.
252				 */
253				if (in6_maxmtu < ndi->linkmtu)
254					in6_maxmtu = ndi->linkmtu;
255			} else
256				in6_setmaxmtu();
257		}
258	}
259#undef MIN
260}
261
262void
263nd6_option_init(opt, icmp6len, ndopts)
264	void *opt;
265	int icmp6len;
266	union nd_opts *ndopts;
267{
268	bzero(ndopts, sizeof(*ndopts));
269	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
270	ndopts->nd_opts_last
271		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
272
273	if (icmp6len == 0) {
274		ndopts->nd_opts_done = 1;
275		ndopts->nd_opts_search = NULL;
276	}
277}
278
279/*
280 * Take one ND option.
281 */
282struct nd_opt_hdr *
283nd6_option(ndopts)
284	union nd_opts *ndopts;
285{
286	struct nd_opt_hdr *nd_opt;
287	int olen;
288
289	if (!ndopts)
290		panic("ndopts == NULL in nd6_option\n");
291	if (!ndopts->nd_opts_last)
292		panic("uninitialized ndopts in nd6_option\n");
293	if (!ndopts->nd_opts_search)
294		return NULL;
295	if (ndopts->nd_opts_done)
296		return NULL;
297
298	nd_opt = ndopts->nd_opts_search;
299
300	/* make sure nd_opt_len is inside the buffer */
301	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
302		bzero(ndopts, sizeof(*ndopts));
303		return NULL;
304	}
305
306	olen = nd_opt->nd_opt_len << 3;
307	if (olen == 0) {
308		/*
309		 * Message validation requires that all included
310		 * options have a length that is greater than zero.
311		 */
312		bzero(ndopts, sizeof(*ndopts));
313		return NULL;
314	}
315
316	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
317	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
318		/* option overruns the end of buffer, invalid */
319		bzero(ndopts, sizeof(*ndopts));
320		return NULL;
321	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
322		/* reached the end of options chain */
323		ndopts->nd_opts_done = 1;
324		ndopts->nd_opts_search = NULL;
325	}
326	return nd_opt;
327}
328
329/*
330 * Parse multiple ND options.
331 * This function is much easier to use, for ND routines that do not need
332 * multiple options of the same type.
333 */
334int
335nd6_options(ndopts)
336	union nd_opts *ndopts;
337{
338	struct nd_opt_hdr *nd_opt;
339	int i = 0;
340
341	if (!ndopts)
342		panic("ndopts == NULL in nd6_options\n");
343	if (!ndopts->nd_opts_last)
344		panic("uninitialized ndopts in nd6_options\n");
345	if (!ndopts->nd_opts_search)
346		return 0;
347
348	while (1) {
349		nd_opt = nd6_option(ndopts);
350		if (!nd_opt && !ndopts->nd_opts_last) {
351			/*
352			 * Message validation requires that all included
353			 * options have a length that is greater than zero.
354			 */
355			icmp6stat.icp6s_nd_badopt++;
356			bzero(ndopts, sizeof(*ndopts));
357			return -1;
358		}
359
360		if (!nd_opt)
361			goto skip1;
362
363		switch (nd_opt->nd_opt_type) {
364		case ND_OPT_SOURCE_LINKADDR:
365		case ND_OPT_TARGET_LINKADDR:
366		case ND_OPT_MTU:
367		case ND_OPT_REDIRECTED_HEADER:
368			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
369				nd6log((LOG_INFO,
370				    "duplicated ND6 option found (type=%d)\n",
371				    nd_opt->nd_opt_type));
372				/* XXX bark? */
373			} else {
374				ndopts->nd_opt_array[nd_opt->nd_opt_type]
375					= nd_opt;
376			}
377			break;
378		case ND_OPT_PREFIX_INFORMATION:
379			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
380				ndopts->nd_opt_array[nd_opt->nd_opt_type]
381					= nd_opt;
382			}
383			ndopts->nd_opts_pi_end =
384				(struct nd_opt_prefix_info *)nd_opt;
385			break;
386		default:
387			/*
388			 * Unknown options must be silently ignored,
389			 * to accomodate future extension to the protocol.
390			 */
391			nd6log((LOG_DEBUG,
392			    "nd6_options: unsupported option %d - "
393			    "option ignored\n", nd_opt->nd_opt_type));
394		}
395
396skip1:
397		i++;
398		if (i > nd6_maxndopt) {
399			icmp6stat.icp6s_nd_toomanyopt++;
400			nd6log((LOG_INFO, "too many loop in nd opt\n"));
401			break;
402		}
403
404		if (ndopts->nd_opts_done)
405			break;
406	}
407
408	return 0;
409}
410
411/*
412 * ND6 timer routine to expire default route list and prefix list
413 */
414void
415nd6_timer(ignored_arg)
416	void	*ignored_arg;
417{
418	int s;
419	struct llinfo_nd6 *ln;
420	struct nd_defrouter *dr;
421	struct nd_prefix *pr;
422	struct ifnet *ifp;
423	struct in6_ifaddr *ia6, *nia6;
424	struct in6_addrlifetime *lt6;
425
426	s = splnet();
427	callout_reset(&nd6_timer_ch, nd6_prune * hz,
428		      nd6_timer, NULL);
429
430	ln = llinfo_nd6.ln_next;
431	/* XXX BSD/OS separates this code -- itojun */
432	while (ln && ln != &llinfo_nd6) {
433		struct rtentry *rt;
434		struct sockaddr_in6 *dst;
435		struct llinfo_nd6 *next = ln->ln_next;
436		/* XXX: used for the DELAY case only: */
437		struct nd_ifinfo *ndi = NULL;
438
439		if ((rt = ln->ln_rt) == NULL) {
440			ln = next;
441			continue;
442		}
443		if ((ifp = rt->rt_ifp) == NULL) {
444			ln = next;
445			continue;
446		}
447		ndi = &nd_ifinfo[ifp->if_index];
448		dst = (struct sockaddr_in6 *)rt_key(rt);
449
450		if (ln->ln_expire > time_second) {
451			ln = next;
452			continue;
453		}
454
455		/* sanity check */
456		if (!rt)
457			panic("rt=0 in nd6_timer(ln=%p)\n", ln);
458		if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
459			panic("rt_llinfo(%p) is not equal to ln(%p)\n",
460			      rt->rt_llinfo, ln);
461		if (!dst)
462			panic("dst=0 in nd6_timer(ln=%p)\n", ln);
463
464		switch (ln->ln_state) {
465		case ND6_LLINFO_INCOMPLETE:
466			if (ln->ln_asked < nd6_mmaxtries) {
467				ln->ln_asked++;
468				ln->ln_expire = time_second +
469					nd_ifinfo[ifp->if_index].retrans / 1000;
470				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
471					ln, 0);
472			} else {
473				struct mbuf *m = ln->ln_hold;
474				if (m) {
475					if (rt->rt_ifp) {
476						/*
477						 * Fake rcvif to make ICMP error
478						 * more helpful in diagnosing
479						 * for the receiver.
480						 * XXX: should we consider
481						 * older rcvif?
482						 */
483						m->m_pkthdr.rcvif = rt->rt_ifp;
484					}
485					icmp6_error(m, ICMP6_DST_UNREACH,
486						    ICMP6_DST_UNREACH_ADDR, 0);
487					ln->ln_hold = NULL;
488				}
489				next = nd6_free(rt);
490			}
491			break;
492		case ND6_LLINFO_REACHABLE:
493			if (ln->ln_expire) {
494				ln->ln_state = ND6_LLINFO_STALE;
495				ln->ln_expire = time_second + nd6_gctimer;
496			}
497			break;
498
499		case ND6_LLINFO_STALE:
500			/* Garbage Collection(RFC 2461 5.3) */
501			if (ln->ln_expire)
502				next = nd6_free(rt);
503			break;
504
505		case ND6_LLINFO_DELAY:
506			if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
507				/* We need NUD */
508				ln->ln_asked = 1;
509				ln->ln_state = ND6_LLINFO_PROBE;
510				ln->ln_expire = time_second +
511					ndi->retrans / 1000;
512				nd6_ns_output(ifp, &dst->sin6_addr,
513					      &dst->sin6_addr,
514					      ln, 0);
515			} else {
516				ln->ln_state = ND6_LLINFO_STALE; /* XXX */
517				ln->ln_expire = time_second + nd6_gctimer;
518			}
519			break;
520		case ND6_LLINFO_PROBE:
521			if (ln->ln_asked < nd6_umaxtries) {
522				ln->ln_asked++;
523				ln->ln_expire = time_second +
524					nd_ifinfo[ifp->if_index].retrans / 1000;
525				nd6_ns_output(ifp, &dst->sin6_addr,
526					       &dst->sin6_addr, ln, 0);
527			} else {
528				next = nd6_free(rt);
529			}
530			break;
531		}
532		ln = next;
533	}
534
535	/* expire default router list */
536	dr = TAILQ_FIRST(&nd_defrouter);
537	while (dr) {
538		if (dr->expire && dr->expire < time_second) {
539			struct nd_defrouter *t;
540			t = TAILQ_NEXT(dr, dr_entry);
541			defrtrlist_del(dr);
542			dr = t;
543		} else {
544			dr = TAILQ_NEXT(dr, dr_entry);
545		}
546	}
547
548	/*
549	 * expire interface addresses.
550	 * in the past the loop was inside prefix expiry processing.
551	 * However, from a stricter speci-confrmance standpoint, we should
552	 * rather separate address lifetimes and prefix lifetimes.
553	 */
554  addrloop:
555	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
556		nia6 = ia6->ia_next;
557		/* check address lifetime */
558		lt6 = &ia6->ia6_lifetime;
559		if (IFA6_IS_INVALID(ia6)) {
560			int regen = 0;
561
562			/*
563			 * If the expiring address is temporary, try
564			 * regenerating a new one.  This would be useful when
565			 * we suspended a laptop PC, then turned on after a
566			 * period that could invalidate all temporary
567			 * addresses.  Although we may have to restart the
568			 * loop (see below), it must be after purging the
569			 * address.  Otherwise, we'd see an infinite loop of
570			 * regeneration.
571			 */
572			if (ip6_use_tempaddr &&
573			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
574				if (regen_tmpaddr(ia6) == 0)
575					regen = 1;
576			}
577
578			in6_purgeaddr(&ia6->ia_ifa);
579
580			if (regen)
581				goto addrloop; /* XXX: see below */
582		} else if (IFA6_IS_DEPRECATED(ia6)) {
583			int oldflags = ia6->ia6_flags;
584
585			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
586
587			/*
588			 * If a temporary address has just become deprecated,
589			 * regenerate a new one if possible.
590			 */
591			if (ip6_use_tempaddr &&
592			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
593			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
594
595				if (regen_tmpaddr(ia6) == 0) {
596					/*
597					 * A new temporary address is
598					 * generated.
599					 * XXX: this means the address chain
600					 * has changed while we are still in
601					 * the loop.  Although the change
602					 * would not cause disaster (because
603					 * it's not an addition, but a
604					 * deletion,) we'd rather restart the
605					 * loop just for safety.  Or does this
606					 * significantly reduce performance??
607					 */
608					goto addrloop;
609				}
610			}
611		} else if (IFA6_IS_DEPRECATED(ia6)) {
612			/*
613			 * A new RA might have made a deprecated address
614			 * preferred.
615			 */
616			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
617		}
618	}
619
620	/* expire prefix list */
621	pr = nd_prefix.lh_first;
622	while (pr) {
623		/*
624		 * check prefix lifetime.
625		 * since pltime is just for autoconf, pltime processing for
626		 * prefix is not necessary.
627		 *
628		 * we offset expire time by NDPR_KEEP_EXPIRE, so that we
629		 * can use the old prefix information to validate the
630		 * next prefix information to come.  See prelist_update()
631		 * for actual validation.
632		 *
633		 * I don't think such an offset is necessary.
634		 * (jinmei@kame.net, 20010130).
635		 */
636		if (pr->ndpr_expire && pr->ndpr_expire < time_second) {
637			struct nd_prefix *t;
638			t = pr->ndpr_next;
639
640			/*
641			 * address expiration and prefix expiration are
642			 * separate.  NEVER perform in6_purgeaddr here.
643			 */
644
645			prelist_remove(pr);
646			pr = t;
647		} else
648			pr = pr->ndpr_next;
649	}
650	splx(s);
651}
652
653static int
654regen_tmpaddr(ia6)
655	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
656{
657	struct ifaddr *ifa;
658	struct ifnet *ifp;
659	struct in6_ifaddr *public_ifa6 = NULL;
660
661	ifp = ia6->ia_ifa.ifa_ifp;
662	for (ifa = ifp->if_addrlist.tqh_first; ifa;
663	     ifa = ifa->ifa_list.tqe_next)
664	{
665		struct in6_ifaddr *it6;
666
667		if (ifa->ifa_addr->sa_family != AF_INET6)
668			continue;
669
670		it6 = (struct in6_ifaddr *)ifa;
671
672		/* ignore no autoconf addresses. */
673		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
674			continue;
675
676		/* ignore autoconf addresses with different prefixes. */
677		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
678			continue;
679
680		/*
681		 * Now we are looking at an autoconf address with the same
682		 * prefix as ours.  If the address is temporary and is still
683		 * preferred, do not create another one.  It would be rare, but
684		 * could happen, for example, when we resume a laptop PC after
685		 * a long period.
686		 */
687		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
688		    !IFA6_IS_DEPRECATED(it6)) {
689			public_ifa6 = NULL;
690			break;
691		}
692
693		/*
694		 * This is a public autoconf address that has the same prefix
695		 * as ours.  If it is preferred, keep it.  We can't break the
696		 * loop here, because there may be a still-preferred temporary
697		 * address with the prefix.
698		 */
699		if (!IFA6_IS_DEPRECATED(it6))
700		    public_ifa6 = it6;
701	}
702
703	if (public_ifa6 != NULL) {
704		int e;
705
706		if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
707			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
708			    " tmp addr,errno=%d\n", e);
709			return(-1);
710		}
711		return(0);
712	}
713
714	return(-1);
715}
716
717/*
718 * Nuke neighbor cache/prefix/default router management table, right before
719 * ifp goes away.
720 */
721void
722nd6_purge(ifp)
723	struct ifnet *ifp;
724{
725	struct llinfo_nd6 *ln, *nln;
726	struct nd_defrouter *dr, *ndr, drany;
727	struct nd_prefix *pr, *npr;
728
729	/* Nuke default router list entries toward ifp */
730	if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
731		/*
732		 * The first entry of the list may be stored in
733		 * the routing table, so we'll delete it later.
734		 */
735		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
736			ndr = TAILQ_NEXT(dr, dr_entry);
737			if (dr->ifp == ifp)
738				defrtrlist_del(dr);
739		}
740		dr = TAILQ_FIRST(&nd_defrouter);
741		if (dr->ifp == ifp)
742			defrtrlist_del(dr);
743	}
744
745	/* Nuke prefix list entries toward ifp */
746	for (pr = nd_prefix.lh_first; pr; pr = npr) {
747		npr = pr->ndpr_next;
748		if (pr->ndpr_ifp == ifp) {
749			/*
750			 * Previously, pr->ndpr_addr is removed as well,
751			 * but I strongly believe we don't have to do it.
752			 * nd6_purge() is only called from in6_ifdetach(),
753			 * which removes all the associated interface addresses
754			 * by itself.
755			 * (jinmei@kame.net 20010129)
756			 */
757			prelist_remove(pr);
758		}
759	}
760
761	/* cancel default outgoing interface setting */
762	if (nd6_defifindex == ifp->if_index)
763		nd6_setdefaultiface(0);
764
765	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
766		/* refresh default router list */
767		bzero(&drany, sizeof(drany));
768		defrouter_delreq(&drany, 0);
769		defrouter_select();
770	}
771
772	/*
773	 * Nuke neighbor cache entries for the ifp.
774	 * Note that rt->rt_ifp may not be the same as ifp,
775	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
776	 * nd6_rtrequest(), and ip6_input().
777	 */
778	ln = llinfo_nd6.ln_next;
779	while (ln && ln != &llinfo_nd6) {
780		struct rtentry *rt;
781		struct sockaddr_dl *sdl;
782
783		nln = ln->ln_next;
784		rt = ln->ln_rt;
785		if (rt && rt->rt_gateway &&
786		    rt->rt_gateway->sa_family == AF_LINK) {
787			sdl = (struct sockaddr_dl *)rt->rt_gateway;
788			if (sdl->sdl_index == ifp->if_index)
789				nln = nd6_free(rt);
790		}
791		ln = nln;
792	}
793}
794
795struct rtentry *
796nd6_lookup(addr6, create, ifp)
797	struct in6_addr *addr6;
798	int create;
799	struct ifnet *ifp;
800{
801	struct rtentry *rt;
802	struct sockaddr_in6 sin6;
803
804	bzero(&sin6, sizeof(sin6));
805	sin6.sin6_len = sizeof(struct sockaddr_in6);
806	sin6.sin6_family = AF_INET6;
807	sin6.sin6_addr = *addr6;
808#ifdef SCOPEDROUTING
809	sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
810#endif
811	rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL);
812	if (rt && (rt->rt_flags & RTF_LLINFO) == 0) {
813		/*
814		 * This is the case for the default route.
815		 * If we want to create a neighbor cache for the address, we
816		 * should free the route for the destination and allocate an
817		 * interface route.
818		 */
819		if (create) {
820			RTFREE(rt);
821			rt = 0;
822		}
823	}
824	if (!rt) {
825		if (create && ifp) {
826			int e;
827
828			/*
829			 * If no route is available and create is set,
830			 * we allocate a host route for the destination
831			 * and treat it like an interface route.
832			 * This hack is necessary for a neighbor which can't
833			 * be covered by our own prefix.
834			 */
835			struct ifaddr *ifa =
836				ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
837			if (ifa == NULL)
838				return(NULL);
839
840			/*
841			 * Create a new route. RTF_LLINFO is necessary
842			 * to create a Neighbor Cache entry for the
843			 * destination in nd6_rtrequest which will be
844			 * called in rtequest via ifa->ifa_rtrequest.
845			 */
846			if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6,
847					   ifa->ifa_addr,
848					   (struct sockaddr *)&all1_sa,
849					   (ifa->ifa_flags |
850					    RTF_HOST | RTF_LLINFO) &
851					   ~RTF_CLONING,
852					   &rt)) != 0)
853				log(LOG_ERR,
854				    "nd6_lookup: failed to add route for a "
855				    "neighbor(%s), errno=%d\n",
856				    ip6_sprintf(addr6), e);
857			if (rt == NULL)
858				return(NULL);
859			if (rt->rt_llinfo) {
860				struct llinfo_nd6 *ln =
861					(struct llinfo_nd6 *)rt->rt_llinfo;
862				ln->ln_state = ND6_LLINFO_NOSTATE;
863			}
864		} else
865			return(NULL);
866	}
867	rt->rt_refcnt--;
868	/*
869	 * Validation for the entry.
870	 * XXX: we can't use rt->rt_ifp to check for the interface, since
871	 *      it might be the loopback interface if the entry is for our
872	 *      own address on a non-loopback interface. Instead, we should
873	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL interface.
874	 */
875	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
876	    rt->rt_gateway->sa_family != AF_LINK ||
877	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
878		if (create) {
879			log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n",
880			    ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec");
881			/* xxx more logs... kazu */
882		}
883		return(0);
884	}
885	return(rt);
886}
887
888/*
889 * Detect if a given IPv6 address identifies a neighbor on a given link.
890 * XXX: should take care of the destination of a p2p link?
891 */
892int
893nd6_is_addr_neighbor(addr, ifp)
894	struct sockaddr_in6 *addr;
895	struct ifnet *ifp;
896{
897	struct ifaddr *ifa;
898	int i;
899
900#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
901#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
902
903	/*
904	 * A link-local address is always a neighbor.
905	 * XXX: we should use the sin6_scope_id field rather than the embedded
906	 * interface index.
907	 */
908	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
909	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
910		return(1);
911
912	/*
913	 * If the address matches one of our addresses,
914	 * it should be a neighbor.
915	 */
916	for (ifa = ifp->if_addrlist.tqh_first;
917	     ifa;
918	     ifa = ifa->ifa_list.tqe_next)
919	{
920		if (ifa->ifa_addr->sa_family != AF_INET6)
921			next: continue;
922
923		for (i = 0; i < 4; i++) {
924			if ((IFADDR6(ifa).s6_addr32[i] ^
925			     addr->sin6_addr.s6_addr32[i]) &
926			    IFMASK6(ifa).s6_addr32[i])
927				goto next;
928		}
929		return(1);
930	}
931
932	/*
933	 * Even if the address matches none of our addresses, it might be
934	 * in the neighbor cache.
935	 */
936	if (nd6_lookup(&addr->sin6_addr, 0, ifp))
937		return(1);
938
939	return(0);
940#undef IFADDR6
941#undef IFMASK6
942}
943
944/*
945 * Free an nd6 llinfo entry.
946 */
947struct llinfo_nd6 *
948nd6_free(rt)
949	struct rtentry *rt;
950{
951	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
952	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
953	struct nd_defrouter *dr;
954
955	/*
956	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
957	 * even though it is not harmful, it was not really necessary.
958	 */
959
960	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
961		int s;
962		s = splnet();
963		dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
964				      rt->rt_ifp);
965
966		if (ln->ln_router || dr) {
967			/*
968			 * rt6_flush must be called whether or not the neighbor
969			 * is in the Default Router List.
970			 * See a corresponding comment in nd6_na_input().
971			 */
972			rt6_flush(&in6, rt->rt_ifp);
973		}
974
975		if (dr) {
976			/*
977			 * Unreachablity of a router might affect the default
978			 * router selection and on-link detection of advertised
979			 * prefixes.
980			 */
981
982			/*
983			 * Temporarily fake the state to choose a new default
984			 * router and to perform on-link determination of
985			 * prefixes coreectly.
986			 * Below the state will be set correctly,
987			 * or the entry itself will be deleted.
988			 */
989			ln->ln_state = ND6_LLINFO_INCOMPLETE;
990
991			/*
992			 * Since defrouter_select() does not affect the
993			 * on-link determination and MIP6 needs the check
994			 * before the default router selection, we perform
995			 * the check now.
996			 */
997			pfxlist_onlink_check();
998
999			if (dr == TAILQ_FIRST(&nd_defrouter)) {
1000				/*
1001				 * It is used as the current default router,
1002				 * so we have to move it to the end of the
1003				 * list and choose a new one.
1004				 * XXX: it is not very efficient if this is
1005				 *      the only router.
1006				 */
1007				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1008				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
1009
1010				defrouter_select();
1011			}
1012		}
1013		splx(s);
1014	}
1015
1016	/*
1017	 * Before deleting the entry, remember the next entry as the
1018	 * return value.  We need this because pfxlist_onlink_check() above
1019	 * might have freed other entries (particularly the old next entry) as
1020	 * a side effect (XXX).
1021	 */
1022	next = ln->ln_next;
1023
1024	/*
1025	 * Detach the route from the routing tree and the list of neighbor
1026	 * caches, and disable the route entry not to be used in already
1027	 * cached routes.
1028	 */
1029	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1030		  rt_mask(rt), 0, (struct rtentry **)0);
1031
1032	return(next);
1033}
1034
1035/*
1036 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1037 *
1038 * XXX cost-effective metods?
1039 */
1040void
1041nd6_nud_hint(rt, dst6, force)
1042	struct rtentry *rt;
1043	struct in6_addr *dst6;
1044	int force;
1045{
1046	struct llinfo_nd6 *ln;
1047
1048	/*
1049	 * If the caller specified "rt", use that.  Otherwise, resolve the
1050	 * routing table by supplied "dst6".
1051	 */
1052	if (!rt) {
1053		if (!dst6)
1054			return;
1055		if (!(rt = nd6_lookup(dst6, 0, NULL)))
1056			return;
1057	}
1058
1059	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1060	    (rt->rt_flags & RTF_LLINFO) == 0 ||
1061	    !rt->rt_llinfo || !rt->rt_gateway ||
1062	    rt->rt_gateway->sa_family != AF_LINK) {
1063		/* This is not a host route. */
1064		return;
1065	}
1066
1067	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1068	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1069		return;
1070
1071	/*
1072	 * if we get upper-layer reachability confirmation many times,
1073	 * it is possible we have false information.
1074	 */
1075	if (!force) {
1076		ln->ln_byhint++;
1077		if (ln->ln_byhint > nd6_maxnudhint)
1078			return;
1079	}
1080
1081	ln->ln_state = ND6_LLINFO_REACHABLE;
1082	if (ln->ln_expire)
1083		ln->ln_expire = time_second +
1084			nd_ifinfo[rt->rt_ifp->if_index].reachable;
1085}
1086
1087void
1088nd6_rtrequest(req, rt, info)
1089	int	req;
1090	struct rtentry *rt;
1091	struct rt_addrinfo *info; /* xxx unused */
1092{
1093	struct sockaddr *gate = rt->rt_gateway;
1094	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1095	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1096	struct ifnet *ifp = rt->rt_ifp;
1097	struct ifaddr *ifa;
1098
1099	if (rt->rt_flags & RTF_GATEWAY)
1100		return;
1101
1102	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1103		/*
1104		 * This is probably an interface direct route for a link
1105		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1106		 * We do not need special treatment below for such a route.
1107		 * Moreover, the RTF_LLINFO flag which would be set below
1108		 * would annoy the ndp(8) command.
1109		 */
1110		return;
1111	}
1112
1113	if (req == RTM_RESOLVE &&
1114	    (nd6_need_cache(ifp) == 0 || /* stf case */
1115	     !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) {
1116		/*
1117		 * FreeBSD and BSD/OS often make a cloned host route based
1118		 * on a less-specific route (e.g. the default route).
1119		 * If the less specific route does not have a "gateway"
1120		 * (this is the case when the route just goes to a p2p or an
1121		 * stf interface), we'll mistakenly make a neighbor cache for
1122		 * the host route, and will see strange neighbor solicitation
1123		 * for the corresponding destination.  In order to avoid the
1124		 * confusion, we check if the destination of the route is
1125		 * a neighbor in terms of neighbor discovery, and stop the
1126		 * process if not.  Additionally, we remove the LLINFO flag
1127		 * so that ndp(8) will not try to get the neighbor information
1128		 * of the destination.
1129		 */
1130		rt->rt_flags &= ~RTF_LLINFO;
1131		return;
1132	}
1133
1134	switch (req) {
1135	case RTM_ADD:
1136		/*
1137		 * There is no backward compatibility :)
1138		 *
1139		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1140		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1141		 *	   rt->rt_flags |= RTF_CLONING;
1142		 */
1143		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
1144			/*
1145			 * Case 1: This route should come from
1146			 * a route to interface. RTF_LLINFO flag is set
1147			 * for a host route whose destination should be
1148			 * treated as on-link.
1149			 */
1150			rt_setgate(rt, rt_key(rt),
1151				   (struct sockaddr *)&null_sdl);
1152			gate = rt->rt_gateway;
1153			SDL(gate)->sdl_type = ifp->if_type;
1154			SDL(gate)->sdl_index = ifp->if_index;
1155			if (ln)
1156				ln->ln_expire = time_second;
1157#if 1
1158			if (ln && ln->ln_expire == 0) {
1159				/* kludge for desktops */
1160#if 0
1161				printf("nd6_request: time.tv_sec is zero; "
1162				       "treat it as 1\n");
1163#endif
1164				ln->ln_expire = 1;
1165			}
1166#endif
1167			if (rt->rt_flags & RTF_CLONING)
1168				break;
1169		}
1170		/*
1171		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1172		 * We don't do that here since llinfo is not ready yet.
1173		 *
1174		 * There are also couple of other things to be discussed:
1175		 * - unsolicited NA code needs improvement beforehand
1176		 * - RFC2461 says we MAY send multicast unsolicited NA
1177		 *   (7.2.6 paragraph 4), however, it also says that we
1178		 *   SHOULD provide a mechanism to prevent multicast NA storm.
1179		 *   we don't have anything like it right now.
1180		 *   note that the mechanism needs a mutual agreement
1181		 *   between proxies, which means that we need to implement
1182		 *   a new protocol, or a new kludge.
1183		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1184		 *   we need to check ip6forwarding before sending it.
1185		 *   (or should we allow proxy ND configuration only for
1186		 *   routers?  there's no mention about proxy ND from hosts)
1187		 */
1188#if 0
1189		/* XXX it does not work */
1190		if (rt->rt_flags & RTF_ANNOUNCE)
1191			nd6_na_output(ifp,
1192			      &SIN6(rt_key(rt))->sin6_addr,
1193			      &SIN6(rt_key(rt))->sin6_addr,
1194			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1195			      1, NULL);
1196#endif
1197		/* FALLTHROUGH */
1198	case RTM_RESOLVE:
1199		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1200			/*
1201			 * Address resolution isn't necessary for a point to
1202			 * point link, so we can skip this test for a p2p link.
1203			 */
1204			if (gate->sa_family != AF_LINK ||
1205			    gate->sa_len < sizeof(null_sdl)) {
1206				log(LOG_DEBUG,
1207				    "nd6_rtrequest: bad gateway value: %s\n",
1208				    if_name(ifp));
1209				break;
1210			}
1211			SDL(gate)->sdl_type = ifp->if_type;
1212			SDL(gate)->sdl_index = ifp->if_index;
1213		}
1214		if (ln != NULL)
1215			break;	/* This happens on a route change */
1216		/*
1217		 * Case 2: This route may come from cloning, or a manual route
1218		 * add with a LL address.
1219		 */
1220		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
1221		rt->rt_llinfo = (caddr_t)ln;
1222		if (!ln) {
1223			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1224			break;
1225		}
1226		nd6_inuse++;
1227		nd6_allocated++;
1228		Bzero(ln, sizeof(*ln));
1229		ln->ln_rt = rt;
1230		/* this is required for "ndp" command. - shin */
1231		if (req == RTM_ADD) {
1232		        /*
1233			 * gate should have some valid AF_LINK entry,
1234			 * and ln->ln_expire should have some lifetime
1235			 * which is specified by ndp command.
1236			 */
1237			ln->ln_state = ND6_LLINFO_REACHABLE;
1238			ln->ln_byhint = 0;
1239		} else {
1240		        /*
1241			 * When req == RTM_RESOLVE, rt is created and
1242			 * initialized in rtrequest(), so rt_expire is 0.
1243			 */
1244			ln->ln_state = ND6_LLINFO_NOSTATE;
1245			ln->ln_expire = time_second;
1246		}
1247		rt->rt_flags |= RTF_LLINFO;
1248		ln->ln_next = llinfo_nd6.ln_next;
1249		llinfo_nd6.ln_next = ln;
1250		ln->ln_prev = &llinfo_nd6;
1251		ln->ln_next->ln_prev = ln;
1252
1253		/*
1254		 * check if rt_key(rt) is one of my address assigned
1255		 * to the interface.
1256		 */
1257		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1258					  &SIN6(rt_key(rt))->sin6_addr);
1259		if (ifa) {
1260			caddr_t macp = nd6_ifptomac(ifp);
1261			ln->ln_expire = 0;
1262			ln->ln_state = ND6_LLINFO_REACHABLE;
1263			ln->ln_byhint = 0;
1264			if (macp) {
1265				Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1266				SDL(gate)->sdl_alen = ifp->if_addrlen;
1267			}
1268			if (nd6_useloopback) {
1269				rt->rt_ifp = &loif[0];	/*XXX*/
1270				/*
1271				 * Make sure rt_ifa be equal to the ifaddr
1272				 * corresponding to the address.
1273				 * We need this because when we refer
1274				 * rt_ifa->ia6_flags in ip6_input, we assume
1275				 * that the rt_ifa points to the address instead
1276				 * of the loopback address.
1277				 */
1278				if (ifa != rt->rt_ifa) {
1279					IFAFREE(rt->rt_ifa);
1280					IFAREF(ifa);
1281					rt->rt_ifa = ifa;
1282				}
1283			}
1284		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1285			ln->ln_expire = 0;
1286			ln->ln_state = ND6_LLINFO_REACHABLE;
1287			ln->ln_byhint = 0;
1288
1289			/* join solicited node multicast for proxy ND */
1290			if (ifp->if_flags & IFF_MULTICAST) {
1291				struct in6_addr llsol;
1292				int error;
1293
1294				llsol = SIN6(rt_key(rt))->sin6_addr;
1295				llsol.s6_addr16[0] = htons(0xff02);
1296				llsol.s6_addr16[1] = htons(ifp->if_index);
1297				llsol.s6_addr32[1] = 0;
1298				llsol.s6_addr32[2] = htonl(1);
1299				llsol.s6_addr8[12] = 0xff;
1300
1301				if (!in6_addmulti(&llsol, ifp, &error)) {
1302					nd6log((LOG_ERR, "%s: failed to join "
1303					    "%s (errno=%d)\n", if_name(ifp),
1304					    ip6_sprintf(&llsol), error));
1305				}
1306			}
1307		}
1308		break;
1309
1310	case RTM_DELETE:
1311		if (!ln)
1312			break;
1313		/* leave from solicited node multicast for proxy ND */
1314		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1315		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1316			struct in6_addr llsol;
1317			struct in6_multi *in6m;
1318
1319			llsol = SIN6(rt_key(rt))->sin6_addr;
1320			llsol.s6_addr16[0] = htons(0xff02);
1321			llsol.s6_addr16[1] = htons(ifp->if_index);
1322			llsol.s6_addr32[1] = 0;
1323			llsol.s6_addr32[2] = htonl(1);
1324			llsol.s6_addr8[12] = 0xff;
1325
1326			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1327			if (in6m)
1328				in6_delmulti(in6m);
1329		}
1330		nd6_inuse--;
1331		ln->ln_next->ln_prev = ln->ln_prev;
1332		ln->ln_prev->ln_next = ln->ln_next;
1333		ln->ln_prev = NULL;
1334		rt->rt_llinfo = 0;
1335		rt->rt_flags &= ~RTF_LLINFO;
1336		if (ln->ln_hold)
1337			m_freem(ln->ln_hold);
1338		Free((caddr_t)ln);
1339	}
1340}
1341
1342int
1343nd6_ioctl(cmd, data, ifp)
1344	u_long cmd;
1345	caddr_t	data;
1346	struct ifnet *ifp;
1347{
1348	struct in6_drlist *drl = (struct in6_drlist *)data;
1349	struct in6_prlist *prl = (struct in6_prlist *)data;
1350	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1351	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1352	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1353	struct nd_defrouter *dr, any;
1354	struct nd_prefix *pr;
1355	struct rtentry *rt;
1356	int i = 0, error = 0;
1357	int s;
1358
1359	switch (cmd) {
1360	case SIOCGDRLST_IN6:
1361		/*
1362		 * obsolete API, use sysctl under net.inet6.icmp6
1363		 */
1364		bzero(drl, sizeof(*drl));
1365		s = splnet();
1366		dr = TAILQ_FIRST(&nd_defrouter);
1367		while (dr && i < DRLSTSIZ) {
1368			drl->defrouter[i].rtaddr = dr->rtaddr;
1369			if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) {
1370				/* XXX: need to this hack for KAME stack */
1371				drl->defrouter[i].rtaddr.s6_addr16[1] = 0;
1372			} else
1373				log(LOG_ERR,
1374				    "default router list contains a "
1375				    "non-linklocal address(%s)\n",
1376				    ip6_sprintf(&drl->defrouter[i].rtaddr));
1377
1378			drl->defrouter[i].flags = dr->flags;
1379			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1380			drl->defrouter[i].expire = dr->expire;
1381			drl->defrouter[i].if_index = dr->ifp->if_index;
1382			i++;
1383			dr = TAILQ_NEXT(dr, dr_entry);
1384		}
1385		splx(s);
1386		break;
1387	case SIOCGPRLST_IN6:
1388		/*
1389		 * obsolete API, use sysctl under net.inet6.icmp6
1390		 */
1391		/*
1392		 * XXX meaning of fields, especialy "raflags", is very
1393		 * differnet between RA prefix list and RR/static prefix list.
1394		 * how about separating ioctls into two?
1395		 */
1396		bzero(prl, sizeof(*prl));
1397		s = splnet();
1398		pr = nd_prefix.lh_first;
1399		while (pr && i < PRLSTSIZ) {
1400			struct nd_pfxrouter *pfr;
1401			int j;
1402
1403			(void)in6_embedscope(&prl->prefix[i].prefix,
1404			    &pr->ndpr_prefix, NULL, NULL);
1405			prl->prefix[i].raflags = pr->ndpr_raf;
1406			prl->prefix[i].prefixlen = pr->ndpr_plen;
1407			prl->prefix[i].vltime = pr->ndpr_vltime;
1408			prl->prefix[i].pltime = pr->ndpr_pltime;
1409			prl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1410			prl->prefix[i].expire = pr->ndpr_expire;
1411
1412			pfr = pr->ndpr_advrtrs.lh_first;
1413			j = 0;
1414			while (pfr) {
1415				if (j < DRLSTSIZ) {
1416#define RTRADDR prl->prefix[i].advrtr[j]
1417					RTRADDR = pfr->router->rtaddr;
1418					if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1419						/* XXX: hack for KAME */
1420						RTRADDR.s6_addr16[1] = 0;
1421					} else
1422						log(LOG_ERR,
1423						    "a router(%s) advertises "
1424						    "a prefix with "
1425						    "non-link local address\n",
1426						    ip6_sprintf(&RTRADDR));
1427#undef RTRADDR
1428				}
1429				j++;
1430				pfr = pfr->pfr_next;
1431			}
1432			prl->prefix[i].advrtrs = j;
1433			prl->prefix[i].origin = PR_ORIG_RA;
1434
1435			i++;
1436			pr = pr->ndpr_next;
1437		}
1438	      {
1439		struct rr_prefix *rpp;
1440
1441		for (rpp = LIST_FIRST(&rr_prefix); rpp;
1442		     rpp = LIST_NEXT(rpp, rp_entry)) {
1443			if (i >= PRLSTSIZ)
1444				break;
1445			(void)in6_embedscope(&prl->prefix[i].prefix,
1446			    &pr->ndpr_prefix, NULL, NULL);
1447			prl->prefix[i].raflags = rpp->rp_raf;
1448			prl->prefix[i].prefixlen = rpp->rp_plen;
1449			prl->prefix[i].vltime = rpp->rp_vltime;
1450			prl->prefix[i].pltime = rpp->rp_pltime;
1451			prl->prefix[i].if_index = rpp->rp_ifp->if_index;
1452			prl->prefix[i].expire = rpp->rp_expire;
1453			prl->prefix[i].advrtrs = 0;
1454			prl->prefix[i].origin = rpp->rp_origin;
1455			i++;
1456		}
1457	      }
1458		splx(s);
1459
1460		break;
1461	case OSIOCGIFINFO_IN6:
1462		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1463			error = EINVAL;
1464			break;
1465		}
1466		ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu;
1467		ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu;
1468		ndi->ndi.basereachable =
1469		    nd_ifinfo[ifp->if_index].basereachable;
1470		ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable;
1471		ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans;
1472		ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags;
1473		ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm;
1474		ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim;
1475		ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra;
1476		break;
1477	case SIOCGIFINFO_IN6:
1478		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1479			error = EINVAL;
1480			break;
1481		}
1482		ndi->ndi = nd_ifinfo[ifp->if_index];
1483		break;
1484	case SIOCSIFINFO_FLAGS:
1485		/* XXX: almost all other fields of ndi->ndi is unused */
1486		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1487			error = EINVAL;
1488			break;
1489		}
1490		nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags;
1491		break;
1492	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1493		/* flush default router list */
1494		/*
1495		 * xxx sumikawa: should not delete route if default
1496		 * route equals to the top of default router list
1497		 */
1498		bzero(&any, sizeof(any));
1499		defrouter_delreq(&any, 0);
1500		defrouter_select();
1501		/* xxx sumikawa: flush prefix list */
1502		break;
1503	case SIOCSPFXFLUSH_IN6:
1504	    {
1505		/* flush all the prefix advertised by routers */
1506		struct nd_prefix *pr, *next;
1507
1508		s = splnet();
1509		for (pr = nd_prefix.lh_first; pr; pr = next) {
1510			struct in6_ifaddr *ia, *ia_next;
1511
1512			next = pr->ndpr_next;
1513
1514			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1515				continue; /* XXX */
1516
1517			/* do we really have to remove addresses as well? */
1518			for (ia = in6_ifaddr; ia; ia = ia_next) {
1519				/* ia might be removed. keep the next ptr. */
1520				ia_next = ia->ia_next;
1521
1522				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1523					continue;
1524
1525				if (ia->ia6_ndpr == pr)
1526					in6_purgeaddr(&ia->ia_ifa);
1527			}
1528			prelist_remove(pr);
1529		}
1530		splx(s);
1531		break;
1532	    }
1533	case SIOCSRTRFLUSH_IN6:
1534	    {
1535		/* flush all the default routers */
1536		struct nd_defrouter *dr, *next;
1537
1538		s = splnet();
1539		if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1540			/*
1541			 * The first entry of the list may be stored in
1542			 * the routing table, so we'll delete it later.
1543			 */
1544			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
1545				next = TAILQ_NEXT(dr, dr_entry);
1546				defrtrlist_del(dr);
1547			}
1548			defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
1549		}
1550		splx(s);
1551		break;
1552	    }
1553	case SIOCGNBRINFO_IN6:
1554	    {
1555		struct llinfo_nd6 *ln;
1556		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1557
1558		/*
1559		 * XXX: KAME specific hack for scoped addresses
1560		 *      XXXX: for other scopes than link-local?
1561		 */
1562		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
1563		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
1564			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
1565
1566			if (*idp == 0)
1567				*idp = htons(ifp->if_index);
1568		}
1569
1570		s = splnet();
1571		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
1572			error = EINVAL;
1573			splx(s);
1574			break;
1575		}
1576		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1577		nbi->state = ln->ln_state;
1578		nbi->asked = ln->ln_asked;
1579		nbi->isrouter = ln->ln_router;
1580		nbi->expire = ln->ln_expire;
1581		splx(s);
1582
1583		break;
1584	    }
1585	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1586		ndif->ifindex = nd6_defifindex;
1587		break;
1588	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1589		return(nd6_setdefaultiface(ndif->ifindex));
1590		break;
1591	}
1592	return(error);
1593}
1594
1595/*
1596 * Create neighbor cache entry and cache link-layer address,
1597 * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
1598 */
1599struct rtentry *
1600nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
1601	struct ifnet *ifp;
1602	struct in6_addr *from;
1603	char *lladdr;
1604	int lladdrlen;
1605	int type;	/* ICMP6 type */
1606	int code;	/* type dependent information */
1607{
1608	struct rtentry *rt = NULL;
1609	struct llinfo_nd6 *ln = NULL;
1610	int is_newentry;
1611	struct sockaddr_dl *sdl = NULL;
1612	int do_update;
1613	int olladdr;
1614	int llchange;
1615	int newstate = 0;
1616
1617	if (!ifp)
1618		panic("ifp == NULL in nd6_cache_lladdr");
1619	if (!from)
1620		panic("from == NULL in nd6_cache_lladdr");
1621
1622	/* nothing must be updated for unspecified address */
1623	if (IN6_IS_ADDR_UNSPECIFIED(from))
1624		return NULL;
1625
1626	/*
1627	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1628	 * the caller.
1629	 *
1630	 * XXX If the link does not have link-layer adderss, what should
1631	 * we do? (ifp->if_addrlen == 0)
1632	 * Spec says nothing in sections for RA, RS and NA.  There's small
1633	 * description on it in NS section (RFC 2461 7.2.3).
1634	 */
1635
1636	rt = nd6_lookup(from, 0, ifp);
1637	if (!rt) {
1638#if 0
1639		/* nothing must be done if there's no lladdr */
1640		if (!lladdr || !lladdrlen)
1641			return NULL;
1642#endif
1643
1644		rt = nd6_lookup(from, 1, ifp);
1645		is_newentry = 1;
1646	} else {
1647		/* do nothing if static ndp is set */
1648		if (rt->rt_flags & RTF_STATIC)
1649			return NULL;
1650		is_newentry = 0;
1651	}
1652
1653	if (!rt)
1654		return NULL;
1655	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1656fail:
1657		(void)nd6_free(rt);
1658		return NULL;
1659	}
1660	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1661	if (!ln)
1662		goto fail;
1663	if (!rt->rt_gateway)
1664		goto fail;
1665	if (rt->rt_gateway->sa_family != AF_LINK)
1666		goto fail;
1667	sdl = SDL(rt->rt_gateway);
1668
1669	olladdr = (sdl->sdl_alen) ? 1 : 0;
1670	if (olladdr && lladdr) {
1671		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1672			llchange = 1;
1673		else
1674			llchange = 0;
1675	} else
1676		llchange = 0;
1677
1678	/*
1679	 * newentry olladdr  lladdr  llchange	(*=record)
1680	 *	0	n	n	--	(1)
1681	 *	0	y	n	--	(2)
1682	 *	0	n	y	--	(3) * STALE
1683	 *	0	y	y	n	(4) *
1684	 *	0	y	y	y	(5) * STALE
1685	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1686	 *	1	--	y	--	(7) * STALE
1687	 */
1688
1689	if (lladdr) {		/*(3-5) and (7)*/
1690		/*
1691		 * Record source link-layer address
1692		 * XXX is it dependent to ifp->if_type?
1693		 */
1694		sdl->sdl_alen = ifp->if_addrlen;
1695		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1696	}
1697
1698	if (!is_newentry) {
1699		if ((!olladdr && lladdr)		/*(3)*/
1700		 || (olladdr && lladdr && llchange)) {	/*(5)*/
1701			do_update = 1;
1702			newstate = ND6_LLINFO_STALE;
1703		} else					/*(1-2,4)*/
1704			do_update = 0;
1705	} else {
1706		do_update = 1;
1707		if (!lladdr)				/*(6)*/
1708			newstate = ND6_LLINFO_NOSTATE;
1709		else					/*(7)*/
1710			newstate = ND6_LLINFO_STALE;
1711	}
1712
1713	if (do_update) {
1714		/*
1715		 * Update the state of the neighbor cache.
1716		 */
1717		ln->ln_state = newstate;
1718
1719		if (ln->ln_state == ND6_LLINFO_STALE) {
1720			/*
1721			 * XXX: since nd6_output() below will cause
1722			 * state tansition to DELAY and reset the timer,
1723			 * we must set the timer now, although it is actually
1724			 * meaningless.
1725			 */
1726			ln->ln_expire = time_second + nd6_gctimer;
1727
1728			if (ln->ln_hold) {
1729				/*
1730				 * we assume ifp is not a p2p here, so just
1731				 * set the 2nd argument as the 1st one.
1732				 */
1733				nd6_output(ifp, ifp, ln->ln_hold,
1734					   (struct sockaddr_in6 *)rt_key(rt),
1735					   rt);
1736				ln->ln_hold = NULL;
1737			}
1738		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1739			/* probe right away */
1740			ln->ln_expire = time_second;
1741		}
1742	}
1743
1744	/*
1745	 * ICMP6 type dependent behavior.
1746	 *
1747	 * NS: clear IsRouter if new entry
1748	 * RS: clear IsRouter
1749	 * RA: set IsRouter if there's lladdr
1750	 * redir: clear IsRouter if new entry
1751	 *
1752	 * RA case, (1):
1753	 * The spec says that we must set IsRouter in the following cases:
1754	 * - If lladdr exist, set IsRouter.  This means (1-5).
1755	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1756	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1757	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1758	 * neighbor cache, this is similar to (6).
1759	 * This case is rare but we figured that we MUST NOT set IsRouter.
1760	 *
1761	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1762	 *							D R
1763	 *	0	n	n	--	(1)	c   ?     s
1764	 *	0	y	n	--	(2)	c   s     s
1765	 *	0	n	y	--	(3)	c   s     s
1766	 *	0	y	y	n	(4)	c   s     s
1767	 *	0	y	y	y	(5)	c   s     s
1768	 *	1	--	n	--	(6) c	c 	c s
1769	 *	1	--	y	--	(7) c	c   s	c s
1770	 *
1771	 *					(c=clear s=set)
1772	 */
1773	switch (type & 0xff) {
1774	case ND_NEIGHBOR_SOLICIT:
1775		/*
1776		 * New entry must have is_router flag cleared.
1777		 */
1778		if (is_newentry)	/*(6-7)*/
1779			ln->ln_router = 0;
1780		break;
1781	case ND_REDIRECT:
1782		/*
1783		 * If the icmp is a redirect to a better router, always set the
1784		 * is_router flag. Otherwise, if the entry is newly created,
1785		 * clear the flag. [RFC 2461, sec 8.3]
1786		 */
1787		if (code == ND_REDIRECT_ROUTER)
1788			ln->ln_router = 1;
1789		else if (is_newentry) /*(6-7)*/
1790			ln->ln_router = 0;
1791		break;
1792	case ND_ROUTER_SOLICIT:
1793		/*
1794		 * is_router flag must always be cleared.
1795		 */
1796		ln->ln_router = 0;
1797		break;
1798	case ND_ROUTER_ADVERT:
1799		/*
1800		 * Mark an entry with lladdr as a router.
1801		 */
1802		if ((!is_newentry && (olladdr || lladdr))	/*(2-5)*/
1803		 || (is_newentry && lladdr)) {			/*(7)*/
1804			ln->ln_router = 1;
1805		}
1806		break;
1807	}
1808
1809	/*
1810	 * When the link-layer address of a router changes, select the
1811	 * best router again.  In particular, when the neighbor entry is newly
1812	 * created, it might affect the selection policy.
1813	 * Question: can we restrict the first condition to the "is_newentry"
1814	 * case?
1815	 * XXX: when we hear an RA from a new router with the link-layer
1816	 * address option, defrouter_select() is called twice, since
1817	 * defrtrlist_update called the function as well.  However, I believe
1818	 * we can compromise the overhead, since it only happens the first
1819	 * time.
1820	 * XXX: although defrouter_select() should not have a bad effect
1821	 * for those are not autoconfigured hosts, we explicitly avoid such
1822	 * cases for safety.
1823	 */
1824	if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv)
1825		defrouter_select();
1826
1827	return rt;
1828}
1829
1830static void
1831nd6_slowtimo(ignored_arg)
1832    void *ignored_arg;
1833{
1834	int s = splnet();
1835	int i;
1836	struct nd_ifinfo *nd6if;
1837
1838	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1839	    nd6_slowtimo, NULL);
1840	for (i = 1; i < if_index + 1; i++) {
1841		if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
1842			continue;
1843		nd6if = &nd_ifinfo[i];
1844		if (nd6if->basereachable && /* already initialized */
1845		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1846			/*
1847			 * Since reachable time rarely changes by router
1848			 * advertisements, we SHOULD insure that a new random
1849			 * value gets recomputed at least once every few hours.
1850			 * (RFC 2461, 6.3.4)
1851			 */
1852			nd6if->recalctm = nd6_recalc_reachtm_interval;
1853			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1854		}
1855	}
1856	splx(s);
1857}
1858
1859#define senderr(e) { error = (e); goto bad;}
1860int
1861nd6_output(ifp, origifp, m0, dst, rt0)
1862	struct ifnet *ifp;
1863	struct ifnet *origifp;
1864	struct mbuf *m0;
1865	struct sockaddr_in6 *dst;
1866	struct rtentry *rt0;
1867{
1868	struct mbuf *m = m0;
1869	struct rtentry *rt = rt0;
1870	struct sockaddr_in6 *gw6 = NULL;
1871	struct llinfo_nd6 *ln = NULL;
1872	int error = 0;
1873
1874	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1875		goto sendpkt;
1876
1877	if (nd6_need_cache(ifp) == 0)
1878		goto sendpkt;
1879
1880	/*
1881	 * next hop determination. This routine is derived from ether_outpout.
1882	 */
1883	if (rt) {
1884		if ((rt->rt_flags & RTF_UP) == 0) {
1885			if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) !=
1886				NULL)
1887			{
1888				rt->rt_refcnt--;
1889				if (rt->rt_ifp != ifp) {
1890					/* XXX: loop care? */
1891					return nd6_output(ifp, origifp, m0,
1892							  dst, rt);
1893				}
1894			} else
1895				senderr(EHOSTUNREACH);
1896		}
1897
1898		if (rt->rt_flags & RTF_GATEWAY) {
1899			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
1900
1901			/*
1902			 * We skip link-layer address resolution and NUD
1903			 * if the gateway is not a neighbor from ND point
1904			 * of view, regardless the value of the
1905			 * nd_ifinfo.flags.
1906			 * The second condition is a bit tricky: we skip
1907			 * if the gateway is our own address, which is
1908			 * sometimes used to install a route to a p2p link.
1909			 */
1910			if (!nd6_is_addr_neighbor(gw6, ifp) ||
1911			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
1912				/*
1913				 * We allow this kind of tricky route only
1914				 * when the outgoing interface is p2p.
1915				 * XXX: we may need a more generic rule here.
1916				 */
1917				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1918					senderr(EHOSTUNREACH);
1919
1920				goto sendpkt;
1921			}
1922
1923			if (rt->rt_gwroute == 0)
1924				goto lookup;
1925			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
1926				rtfree(rt); rt = rt0;
1927			lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
1928				if ((rt = rt->rt_gwroute) == 0)
1929					senderr(EHOSTUNREACH);
1930			}
1931		}
1932	}
1933
1934	/*
1935	 * Address resolution or Neighbor Unreachability Detection
1936	 * for the next hop.
1937	 * At this point, the destination of the packet must be a unicast
1938	 * or an anycast address(i.e. not a multicast).
1939	 */
1940
1941	/* Look up the neighbor cache for the nexthop */
1942	if (rt && (rt->rt_flags & RTF_LLINFO) != 0)
1943		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1944	else {
1945		/*
1946		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
1947		 * the condition below is not very efficient. But we believe
1948		 * it is tolerable, because this should be a rare case.
1949		 */
1950		if (nd6_is_addr_neighbor(dst, ifp) &&
1951		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
1952			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1953	}
1954	if (!ln || !rt) {
1955		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
1956		    !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
1957			log(LOG_DEBUG,
1958			    "nd6_output: can't allocate llinfo for %s "
1959			    "(ln=%p, rt=%p)\n",
1960			    ip6_sprintf(&dst->sin6_addr), ln, rt);
1961			senderr(EIO);	/* XXX: good error? */
1962		}
1963
1964		goto sendpkt;	/* send anyway */
1965	}
1966
1967	/* We don't have to do link-layer address resolution on a p2p link. */
1968	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
1969	    ln->ln_state < ND6_LLINFO_REACHABLE) {
1970		ln->ln_state = ND6_LLINFO_STALE;
1971		ln->ln_expire = time_second + nd6_gctimer;
1972	}
1973
1974	/*
1975	 * The first time we send a packet to a neighbor whose entry is
1976	 * STALE, we have to change the state to DELAY and a sets a timer to
1977	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1978	 * neighbor unreachability detection on expiration.
1979	 * (RFC 2461 7.3.3)
1980	 */
1981	if (ln->ln_state == ND6_LLINFO_STALE) {
1982		ln->ln_asked = 0;
1983		ln->ln_state = ND6_LLINFO_DELAY;
1984		ln->ln_expire = time_second + nd6_delay;
1985	}
1986
1987	/*
1988	 * If the neighbor cache entry has a state other than INCOMPLETE
1989	 * (i.e. its link-layer address is already reloved), just
1990	 * send the packet.
1991	 */
1992	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
1993		goto sendpkt;
1994
1995	/*
1996	 * There is a neighbor cache entry, but no ethernet address
1997	 * response yet. Replace the held mbuf (if any) with this
1998	 * latest one.
1999	 *
2000	 * XXX Does the code conform to rate-limiting rule?
2001	 * (RFC 2461 7.2.2)
2002	 */
2003	if (ln->ln_state == ND6_LLINFO_NOSTATE)
2004		ln->ln_state = ND6_LLINFO_INCOMPLETE;
2005	if (ln->ln_hold)
2006		m_freem(ln->ln_hold);
2007	ln->ln_hold = m;
2008	if (ln->ln_expire) {
2009		if (ln->ln_asked < nd6_mmaxtries &&
2010		    ln->ln_expire < time_second) {
2011			ln->ln_asked++;
2012			ln->ln_expire = time_second +
2013				nd_ifinfo[ifp->if_index].retrans / 1000;
2014			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
2015		}
2016	}
2017	return(0);
2018
2019  sendpkt:
2020
2021	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
2022		return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
2023					 rt));
2024	}
2025	return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt));
2026
2027  bad:
2028	if (m)
2029		m_freem(m);
2030	return (error);
2031}
2032#undef senderr
2033
2034int
2035nd6_need_cache(ifp)
2036	struct ifnet *ifp;
2037{
2038	/*
2039	 * XXX: we currently do not make neighbor cache on any interface
2040	 * other than ARCnet, Ethernet, FDDI and GIF.
2041	 *
2042	 * RFC2893 says:
2043	 * - unidirectional tunnels needs no ND
2044	 */
2045	switch (ifp->if_type) {
2046	case IFT_ARCNET:
2047	case IFT_ETHER:
2048	case IFT_FDDI:
2049	case IFT_IEEE1394:
2050#ifdef IFT_L2VLAN
2051	case IFT_L2VLAN:
2052#endif
2053#ifdef IFT_IEEE80211
2054	case IFT_IEEE80211:
2055#endif
2056	case IFT_GIF:		/* XXX need more cases? */
2057		return(1);
2058	default:
2059		return(0);
2060	}
2061}
2062
2063int
2064nd6_storelladdr(ifp, rt, m, dst, desten)
2065	struct ifnet *ifp;
2066	struct rtentry *rt;
2067	struct mbuf *m;
2068	struct sockaddr *dst;
2069	u_char *desten;
2070{
2071	int i;
2072	struct sockaddr_dl *sdl;
2073
2074	if (m->m_flags & M_MCAST) {
2075		switch (ifp->if_type) {
2076		case IFT_ETHER:
2077		case IFT_FDDI:
2078#ifdef IFT_L2VLAN
2079	case IFT_L2VLAN:
2080#endif
2081#ifdef IFT_IEEE80211
2082		case IFT_IEEE80211:
2083#endif
2084			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2085						 desten);
2086			return(1);
2087		case IFT_IEEE1394:
2088			for (i = 0; i < ifp->if_addrlen; i++)
2089				desten[i] = ~0;
2090			return(1);
2091		case IFT_ARCNET:
2092			*desten = 0;
2093			return(1);
2094		default:
2095			m_freem(m);
2096			return(0);
2097		}
2098	}
2099
2100	if (rt == NULL) {
2101		/* this could happen, if we could not allocate memory */
2102		m_freem(m);
2103		return(0);
2104	}
2105	if (rt->rt_gateway->sa_family != AF_LINK) {
2106		printf("nd6_storelladdr: something odd happens\n");
2107		m_freem(m);
2108		return(0);
2109	}
2110	sdl = SDL(rt->rt_gateway);
2111	if (sdl->sdl_alen == 0) {
2112		/* this should be impossible, but we bark here for debugging */
2113		printf("nd6_storelladdr: sdl_alen == 0\n");
2114		m_freem(m);
2115		return(0);
2116	}
2117
2118	bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
2119	return(1);
2120}
2121
2122static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2123static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2124#ifdef SYSCTL_DECL
2125SYSCTL_DECL(_net_inet6_icmp6);
2126#endif
2127SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2128	CTLFLAG_RD, nd6_sysctl_drlist, "");
2129SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2130	CTLFLAG_RD, nd6_sysctl_prlist, "");
2131
2132static int
2133nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2134{
2135	int error;
2136	char buf[1024];
2137	struct in6_defrouter *d, *de;
2138	struct nd_defrouter *dr;
2139
2140	if (req->newptr)
2141		return EPERM;
2142	error = 0;
2143
2144	for (dr = TAILQ_FIRST(&nd_defrouter);
2145	     dr;
2146	     dr = TAILQ_NEXT(dr, dr_entry)) {
2147		d = (struct in6_defrouter *)buf;
2148		de = (struct in6_defrouter *)(buf + sizeof(buf));
2149
2150		if (d + 1 <= de) {
2151			bzero(d, sizeof(*d));
2152			d->rtaddr.sin6_family = AF_INET6;
2153			d->rtaddr.sin6_len = sizeof(d->rtaddr);
2154			if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
2155			    dr->ifp) != 0)
2156				log(LOG_ERR,
2157				    "scope error in "
2158				    "default router list (%s)\n",
2159				    ip6_sprintf(&dr->rtaddr));
2160			d->flags = dr->flags;
2161			d->rtlifetime = dr->rtlifetime;
2162			d->expire = dr->expire;
2163			d->if_index = dr->ifp->if_index;
2164		} else
2165			panic("buffer too short");
2166
2167		error = SYSCTL_OUT(req, buf, sizeof(*d));
2168		if (error)
2169			break;
2170	}
2171	return error;
2172}
2173
2174static int
2175nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2176{
2177	int error;
2178	char buf[1024];
2179	struct in6_prefix *p, *pe;
2180	struct nd_prefix *pr;
2181
2182	if (req->newptr)
2183		return EPERM;
2184	error = 0;
2185
2186	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2187		u_short advrtrs;
2188		size_t advance;
2189		struct sockaddr_in6 *sin6, *s6;
2190		struct nd_pfxrouter *pfr;
2191
2192		p = (struct in6_prefix *)buf;
2193		pe = (struct in6_prefix *)(buf + sizeof(buf));
2194
2195		if (p + 1 <= pe) {
2196			bzero(p, sizeof(*p));
2197			sin6 = (struct sockaddr_in6 *)(p + 1);
2198
2199			p->prefix = pr->ndpr_prefix;
2200			if (in6_recoverscope(&p->prefix,
2201			    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
2202				log(LOG_ERR,
2203				    "scope error in prefix list (%s)\n",
2204				    ip6_sprintf(&p->prefix.sin6_addr));
2205			p->raflags = pr->ndpr_raf;
2206			p->prefixlen = pr->ndpr_plen;
2207			p->vltime = pr->ndpr_vltime;
2208			p->pltime = pr->ndpr_pltime;
2209			p->if_index = pr->ndpr_ifp->if_index;
2210			p->expire = pr->ndpr_expire;
2211			p->refcnt = pr->ndpr_refcnt;
2212			p->flags = pr->ndpr_stateflags;
2213			p->origin = PR_ORIG_RA;
2214			advrtrs = 0;
2215			for (pfr = pr->ndpr_advrtrs.lh_first;
2216			     pfr;
2217			     pfr = pfr->pfr_next) {
2218				if ((void *)&sin6[advrtrs + 1] >
2219				    (void *)pe) {
2220					advrtrs++;
2221					continue;
2222				}
2223				s6 = &sin6[advrtrs];
2224				bzero(s6, sizeof(*s6));
2225				s6->sin6_family = AF_INET6;
2226				s6->sin6_len = sizeof(*sin6);
2227				if (in6_recoverscope(s6,
2228				    &pfr->router->rtaddr,
2229				    pfr->router->ifp) != 0)
2230					log(LOG_ERR,
2231					    "scope error in "
2232					    "prefix list (%s)\n",
2233					    ip6_sprintf(&pfr->router->rtaddr));
2234				advrtrs++;
2235			}
2236			p->advrtrs = advrtrs;
2237		} else
2238			panic("buffer too short");
2239
2240		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
2241		error = SYSCTL_OUT(req, buf, advance);
2242		if (error)
2243			break;
2244	}
2245	return error;
2246}
2247