nd6.c revision 101240
1/*	$FreeBSD: head/sys/netinet6/nd6.c 101240 2002-08-02 20:49:14Z rwatson $	*/
2/*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * XXX
35 * KAME 970409 note:
36 * BSD/OS version heavily modifies this code, related to llinfo.
37 * Since we don't have BSD/OS version of net/route.c in our hand,
38 * I left the code mostly as it was in 970310.  -- itojun
39 */
40
41#include "opt_inet.h"
42#include "opt_inet6.h"
43#include "opt_mac.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/callout.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/time.h>
54#include <sys/kernel.h>
55#include <sys/protosw.h>
56#include <sys/errno.h>
57#include <sys/syslog.h>
58#include <sys/queue.h>
59#include <sys/sysctl.h>
60
61#include <net/if.h>
62#include <net/if_dl.h>
63#include <net/if_types.h>
64#include <net/if_atm.h>
65#include <net/fddi.h>
66#include <net/route.h>
67
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet6/in6_var.h>
71#include <netinet/ip6.h>
72#include <netinet6/ip6_var.h>
73#include <netinet6/nd6.h>
74#include <netinet6/in6_prefix.h>
75#include <netinet/icmp6.h>
76
77#include <net/net_osdep.h>
78
79#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
80#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
81
82#define SIN6(s) ((struct sockaddr_in6 *)s)
83#define SDL(s) ((struct sockaddr_dl *)s)
84
85/* timer values */
86int	nd6_prune	= 1;	/* walk list every 1 seconds */
87int	nd6_delay	= 5;	/* delay first probe time 5 second */
88int	nd6_umaxtries	= 3;	/* maximum unicast query */
89int	nd6_mmaxtries	= 3;	/* maximum multicast query */
90int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
91int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
92
93/* preventing too many loops in ND option parsing */
94int nd6_maxndopt = 10;	/* max # of ND options allowed */
95
96int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
97
98#ifdef ND6_DEBUG
99int nd6_debug = 1;
100#else
101int nd6_debug = 0;
102#endif
103
104/* for debugging? */
105static int nd6_inuse, nd6_allocated;
106
107struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
108static size_t nd_ifinfo_indexlim = 8;
109struct nd_ifinfo *nd_ifinfo = NULL;
110struct nd_drhead nd_defrouter;
111struct nd_prhead nd_prefix = { 0 };
112
113int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
114static struct sockaddr_in6 all1_sa;
115
116static void nd6_slowtimo __P((void *));
117static int regen_tmpaddr __P((struct in6_ifaddr *));
118
119struct callout nd6_slowtimo_ch;
120struct callout nd6_timer_ch;
121extern struct callout in6_tmpaddrtimer_ch;
122
123void
124nd6_init()
125{
126	static int nd6_init_done = 0;
127	int i;
128
129	if (nd6_init_done) {
130		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
131		return;
132	}
133
134	all1_sa.sin6_family = AF_INET6;
135	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
136	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
137		all1_sa.sin6_addr.s6_addr[i] = 0xff;
138
139	/* initialization of the default router list */
140	TAILQ_INIT(&nd_defrouter);
141
142	nd6_init_done = 1;
143
144	/* start timer */
145	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
146	    nd6_slowtimo, NULL);
147}
148
149void
150nd6_ifattach(ifp)
151	struct ifnet *ifp;
152{
153
154	/*
155	 * We have some arrays that should be indexed by if_index.
156	 * since if_index will grow dynamically, they should grow too.
157	 */
158	if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
159		size_t n;
160		caddr_t q;
161
162		while (if_index >= nd_ifinfo_indexlim)
163			nd_ifinfo_indexlim <<= 1;
164
165		/* grow nd_ifinfo */
166		n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo);
167		q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK);
168		bzero(q, n);
169		if (nd_ifinfo) {
170			bcopy((caddr_t)nd_ifinfo, q, n/2);
171			free((caddr_t)nd_ifinfo, M_IP6NDP);
172		}
173		nd_ifinfo = (struct nd_ifinfo *)q;
174	}
175
176#define ND nd_ifinfo[ifp->if_index]
177
178	/*
179	 * Don't initialize if called twice.
180	 * XXX: to detect this, we should choose a member that is never set
181	 * before initialization of the ND structure itself.  We formaly used
182	 * the linkmtu member, which was not suitable because it could be
183	 * initialized via "ifconfig mtu".
184	 */
185	if (ND.basereachable)
186		return;
187
188	ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu;
189	ND.chlim = IPV6_DEFHLIM;
190	ND.basereachable = REACHABLE_TIME;
191	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
192	ND.retrans = RETRANS_TIMER;
193	ND.receivedra = 0;
194	ND.flags = ND6_IFF_PERFORMNUD;
195	nd6_setmtu(ifp);
196#undef ND
197}
198
199/*
200 * Reset ND level link MTU. This function is called when the physical MTU
201 * changes, which means we might have to adjust the ND level MTU.
202 */
203void
204nd6_setmtu(ifp)
205	struct ifnet *ifp;
206{
207#define MIN(a,b) ((a) < (b) ? (a) : (b))
208	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
209	u_long oldmaxmtu = ndi->maxmtu;
210	u_long oldlinkmtu = ndi->linkmtu;
211
212	switch (ifp->if_type) {
213	case IFT_ARCNET:	/* XXX MTU handling needs more work */
214		ndi->maxmtu = MIN(60480, ifp->if_mtu);
215		break;
216	case IFT_ETHER:
217		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
218		break;
219	case IFT_FDDI:
220		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
221		break;
222	case IFT_ATM:
223		ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu);
224		break;
225	case IFT_IEEE1394:	/* XXX should be IEEE1394MTU(1500) */
226		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
227		break;
228#ifdef IFT_IEEE80211
229	case IFT_IEEE80211:	/* XXX should be IEEE80211MTU(1500) */
230		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
231		break;
232#endif
233	default:
234		ndi->maxmtu = ifp->if_mtu;
235		break;
236	}
237
238	if (oldmaxmtu != ndi->maxmtu) {
239		/*
240		 * If the ND level MTU is not set yet, or if the maxmtu
241		 * is reset to a smaller value than the ND level MTU,
242		 * also reset the ND level MTU.
243		 */
244		if (ndi->linkmtu == 0 ||
245		    ndi->maxmtu < ndi->linkmtu) {
246			ndi->linkmtu = ndi->maxmtu;
247			/* also adjust in6_maxmtu if necessary. */
248			if (oldlinkmtu == 0) {
249				/*
250				 * XXX: the case analysis is grotty, but
251				 * it is not efficient to call in6_setmaxmtu()
252				 * here when we are during the initialization
253				 * procedure.
254				 */
255				if (in6_maxmtu < ndi->linkmtu)
256					in6_maxmtu = ndi->linkmtu;
257			} else
258				in6_setmaxmtu();
259		}
260	}
261#undef MIN
262}
263
264void
265nd6_option_init(opt, icmp6len, ndopts)
266	void *opt;
267	int icmp6len;
268	union nd_opts *ndopts;
269{
270	bzero(ndopts, sizeof(*ndopts));
271	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
272	ndopts->nd_opts_last
273		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
274
275	if (icmp6len == 0) {
276		ndopts->nd_opts_done = 1;
277		ndopts->nd_opts_search = NULL;
278	}
279}
280
281/*
282 * Take one ND option.
283 */
284struct nd_opt_hdr *
285nd6_option(ndopts)
286	union nd_opts *ndopts;
287{
288	struct nd_opt_hdr *nd_opt;
289	int olen;
290
291	if (!ndopts)
292		panic("ndopts == NULL in nd6_option\n");
293	if (!ndopts->nd_opts_last)
294		panic("uninitialized ndopts in nd6_option\n");
295	if (!ndopts->nd_opts_search)
296		return NULL;
297	if (ndopts->nd_opts_done)
298		return NULL;
299
300	nd_opt = ndopts->nd_opts_search;
301
302	/* make sure nd_opt_len is inside the buffer */
303	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
304		bzero(ndopts, sizeof(*ndopts));
305		return NULL;
306	}
307
308	olen = nd_opt->nd_opt_len << 3;
309	if (olen == 0) {
310		/*
311		 * Message validation requires that all included
312		 * options have a length that is greater than zero.
313		 */
314		bzero(ndopts, sizeof(*ndopts));
315		return NULL;
316	}
317
318	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
319	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
320		/* option overruns the end of buffer, invalid */
321		bzero(ndopts, sizeof(*ndopts));
322		return NULL;
323	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
324		/* reached the end of options chain */
325		ndopts->nd_opts_done = 1;
326		ndopts->nd_opts_search = NULL;
327	}
328	return nd_opt;
329}
330
331/*
332 * Parse multiple ND options.
333 * This function is much easier to use, for ND routines that do not need
334 * multiple options of the same type.
335 */
336int
337nd6_options(ndopts)
338	union nd_opts *ndopts;
339{
340	struct nd_opt_hdr *nd_opt;
341	int i = 0;
342
343	if (!ndopts)
344		panic("ndopts == NULL in nd6_options\n");
345	if (!ndopts->nd_opts_last)
346		panic("uninitialized ndopts in nd6_options\n");
347	if (!ndopts->nd_opts_search)
348		return 0;
349
350	while (1) {
351		nd_opt = nd6_option(ndopts);
352		if (!nd_opt && !ndopts->nd_opts_last) {
353			/*
354			 * Message validation requires that all included
355			 * options have a length that is greater than zero.
356			 */
357			icmp6stat.icp6s_nd_badopt++;
358			bzero(ndopts, sizeof(*ndopts));
359			return -1;
360		}
361
362		if (!nd_opt)
363			goto skip1;
364
365		switch (nd_opt->nd_opt_type) {
366		case ND_OPT_SOURCE_LINKADDR:
367		case ND_OPT_TARGET_LINKADDR:
368		case ND_OPT_MTU:
369		case ND_OPT_REDIRECTED_HEADER:
370			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
371				nd6log((LOG_INFO,
372				    "duplicated ND6 option found (type=%d)\n",
373				    nd_opt->nd_opt_type));
374				/* XXX bark? */
375			} else {
376				ndopts->nd_opt_array[nd_opt->nd_opt_type]
377					= nd_opt;
378			}
379			break;
380		case ND_OPT_PREFIX_INFORMATION:
381			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
382				ndopts->nd_opt_array[nd_opt->nd_opt_type]
383					= nd_opt;
384			}
385			ndopts->nd_opts_pi_end =
386				(struct nd_opt_prefix_info *)nd_opt;
387			break;
388		default:
389			/*
390			 * Unknown options must be silently ignored,
391			 * to accomodate future extension to the protocol.
392			 */
393			nd6log((LOG_DEBUG,
394			    "nd6_options: unsupported option %d - "
395			    "option ignored\n", nd_opt->nd_opt_type));
396		}
397
398skip1:
399		i++;
400		if (i > nd6_maxndopt) {
401			icmp6stat.icp6s_nd_toomanyopt++;
402			nd6log((LOG_INFO, "too many loop in nd opt\n"));
403			break;
404		}
405
406		if (ndopts->nd_opts_done)
407			break;
408	}
409
410	return 0;
411}
412
413/*
414 * ND6 timer routine to expire default route list and prefix list
415 */
416void
417nd6_timer(ignored_arg)
418	void	*ignored_arg;
419{
420	int s;
421	struct llinfo_nd6 *ln;
422	struct nd_defrouter *dr;
423	struct nd_prefix *pr;
424	struct ifnet *ifp;
425	struct in6_ifaddr *ia6, *nia6;
426	struct in6_addrlifetime *lt6;
427
428	s = splnet();
429	callout_reset(&nd6_timer_ch, nd6_prune * hz,
430		      nd6_timer, NULL);
431
432	ln = llinfo_nd6.ln_next;
433	while (ln && ln != &llinfo_nd6) {
434		struct rtentry *rt;
435		struct sockaddr_in6 *dst;
436		struct llinfo_nd6 *next = ln->ln_next;
437		/* XXX: used for the DELAY case only: */
438		struct nd_ifinfo *ndi = NULL;
439
440		if ((rt = ln->ln_rt) == NULL) {
441			ln = next;
442			continue;
443		}
444		if ((ifp = rt->rt_ifp) == NULL) {
445			ln = next;
446			continue;
447		}
448		ndi = &nd_ifinfo[ifp->if_index];
449		dst = (struct sockaddr_in6 *)rt_key(rt);
450
451		if (ln->ln_expire > time_second) {
452			ln = next;
453			continue;
454		}
455
456		/* sanity check */
457		if (!rt)
458			panic("rt=0 in nd6_timer(ln=%p)\n", ln);
459		if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
460			panic("rt_llinfo(%p) is not equal to ln(%p)\n",
461			      rt->rt_llinfo, ln);
462		if (!dst)
463			panic("dst=0 in nd6_timer(ln=%p)\n", ln);
464
465		switch (ln->ln_state) {
466		case ND6_LLINFO_INCOMPLETE:
467			if (ln->ln_asked < nd6_mmaxtries) {
468				ln->ln_asked++;
469				ln->ln_expire = time_second +
470					nd_ifinfo[ifp->if_index].retrans / 1000;
471				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
472					ln, 0);
473			} else {
474				struct mbuf *m = ln->ln_hold;
475				if (m) {
476					if (rt->rt_ifp) {
477						/*
478						 * Fake rcvif to make ICMP error
479						 * more helpful in diagnosing
480						 * for the receiver.
481						 * XXX: should we consider
482						 * older rcvif?
483						 */
484						m->m_pkthdr.rcvif = rt->rt_ifp;
485					}
486					icmp6_error(m, ICMP6_DST_UNREACH,
487						    ICMP6_DST_UNREACH_ADDR, 0);
488					ln->ln_hold = NULL;
489				}
490				next = nd6_free(rt);
491			}
492			break;
493		case ND6_LLINFO_REACHABLE:
494			if (ln->ln_expire) {
495				ln->ln_state = ND6_LLINFO_STALE;
496				ln->ln_expire = time_second + nd6_gctimer;
497			}
498			break;
499
500		case ND6_LLINFO_STALE:
501			/* Garbage Collection(RFC 2461 5.3) */
502			if (ln->ln_expire)
503				next = nd6_free(rt);
504			break;
505
506		case ND6_LLINFO_DELAY:
507			if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
508				/* We need NUD */
509				ln->ln_asked = 1;
510				ln->ln_state = ND6_LLINFO_PROBE;
511				ln->ln_expire = time_second +
512					ndi->retrans / 1000;
513				nd6_ns_output(ifp, &dst->sin6_addr,
514					      &dst->sin6_addr,
515					      ln, 0);
516			} else {
517				ln->ln_state = ND6_LLINFO_STALE; /* XXX */
518				ln->ln_expire = time_second + nd6_gctimer;
519			}
520			break;
521		case ND6_LLINFO_PROBE:
522			if (ln->ln_asked < nd6_umaxtries) {
523				ln->ln_asked++;
524				ln->ln_expire = time_second +
525					nd_ifinfo[ifp->if_index].retrans / 1000;
526				nd6_ns_output(ifp, &dst->sin6_addr,
527					       &dst->sin6_addr, ln, 0);
528			} else {
529				next = nd6_free(rt);
530			}
531			break;
532		}
533		ln = next;
534	}
535
536	/* expire default router list */
537	dr = TAILQ_FIRST(&nd_defrouter);
538	while (dr) {
539		if (dr->expire && dr->expire < time_second) {
540			struct nd_defrouter *t;
541			t = TAILQ_NEXT(dr, dr_entry);
542			defrtrlist_del(dr);
543			dr = t;
544		} else {
545			dr = TAILQ_NEXT(dr, dr_entry);
546		}
547	}
548
549	/*
550	 * expire interface addresses.
551	 * in the past the loop was inside prefix expiry processing.
552	 * However, from a stricter speci-confrmance standpoint, we should
553	 * rather separate address lifetimes and prefix lifetimes.
554	 */
555  addrloop:
556	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
557		nia6 = ia6->ia_next;
558		/* check address lifetime */
559		lt6 = &ia6->ia6_lifetime;
560		if (IFA6_IS_INVALID(ia6)) {
561			int regen = 0;
562
563			/*
564			 * If the expiring address is temporary, try
565			 * regenerating a new one.  This would be useful when
566			 * we suspended a laptop PC, then turned it on after a
567			 * period that could invalidate all temporary
568			 * addresses.  Although we may have to restart the
569			 * loop (see below), it must be after purging the
570			 * address.  Otherwise, we'd see an infinite loop of
571			 * regeneration.
572			 */
573			if (ip6_use_tempaddr &&
574			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
575				if (regen_tmpaddr(ia6) == 0)
576					regen = 1;
577			}
578
579			in6_purgeaddr(&ia6->ia_ifa);
580
581			if (regen)
582				goto addrloop; /* XXX: see below */
583		}
584		if (IFA6_IS_DEPRECATED(ia6)) {
585			int oldflags = ia6->ia6_flags;
586
587			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
588
589			/*
590			 * If a temporary address has just become deprecated,
591			 * regenerate a new one if possible.
592			 */
593			if (ip6_use_tempaddr &&
594			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
595			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
596
597				if (regen_tmpaddr(ia6) == 0) {
598					/*
599					 * A new temporary address is
600					 * generated.
601					 * XXX: this means the address chain
602					 * has changed while we are still in
603					 * the loop.  Although the change
604					 * would not cause disaster (because
605					 * it's not a deletion, but an
606					 * addition,) we'd rather restart the
607					 * loop just for safety.  Or does this
608					 * significantly reduce performance??
609					 */
610					goto addrloop;
611				}
612			}
613		} else {
614			/*
615			 * A new RA might have made a deprecated address
616			 * preferred.
617			 */
618			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
619		}
620	}
621
622	/* expire prefix list */
623	pr = nd_prefix.lh_first;
624	while (pr) {
625		/*
626		 * check prefix lifetime.
627		 * since pltime is just for autoconf, pltime processing for
628		 * prefix is not necessary.
629		 */
630		if (pr->ndpr_expire && pr->ndpr_expire < time_second) {
631			struct nd_prefix *t;
632			t = pr->ndpr_next;
633
634			/*
635			 * address expiration and prefix expiration are
636			 * separate.  NEVER perform in6_purgeaddr here.
637			 */
638
639			prelist_remove(pr);
640			pr = t;
641		} else
642			pr = pr->ndpr_next;
643	}
644	splx(s);
645}
646
647static int
648regen_tmpaddr(ia6)
649	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
650{
651	struct ifaddr *ifa;
652	struct ifnet *ifp;
653	struct in6_ifaddr *public_ifa6 = NULL;
654
655	ifp = ia6->ia_ifa.ifa_ifp;
656	for (ifa = ifp->if_addrlist.tqh_first; ifa;
657	     ifa = ifa->ifa_list.tqe_next)
658	{
659		struct in6_ifaddr *it6;
660
661		if (ifa->ifa_addr->sa_family != AF_INET6)
662			continue;
663
664		it6 = (struct in6_ifaddr *)ifa;
665
666		/* ignore no autoconf addresses. */
667		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
668			continue;
669
670		/* ignore autoconf addresses with different prefixes. */
671		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
672			continue;
673
674		/*
675		 * Now we are looking at an autoconf address with the same
676		 * prefix as ours.  If the address is temporary and is still
677		 * preferred, do not create another one.  It would be rare, but
678		 * could happen, for example, when we resume a laptop PC after
679		 * a long period.
680		 */
681		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
682		    !IFA6_IS_DEPRECATED(it6)) {
683			public_ifa6 = NULL;
684			break;
685		}
686
687		/*
688		 * This is a public autoconf address that has the same prefix
689		 * as ours.  If it is preferred, keep it.  We can't break the
690		 * loop here, because there may be a still-preferred temporary
691		 * address with the prefix.
692		 */
693		if (!IFA6_IS_DEPRECATED(it6))
694		    public_ifa6 = it6;
695	}
696
697	if (public_ifa6 != NULL) {
698		int e;
699
700		if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
701			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
702			    " tmp addr,errno=%d\n", e);
703			return(-1);
704		}
705		return(0);
706	}
707
708	return(-1);
709}
710
711/*
712 * Nuke neighbor cache/prefix/default router management table, right before
713 * ifp goes away.
714 */
715void
716nd6_purge(ifp)
717	struct ifnet *ifp;
718{
719	struct llinfo_nd6 *ln, *nln;
720	struct nd_defrouter *dr, *ndr, drany;
721	struct nd_prefix *pr, *npr;
722
723	/* Nuke default router list entries toward ifp */
724	if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
725		/*
726		 * The first entry of the list may be stored in
727		 * the routing table, so we'll delete it later.
728		 */
729		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
730			ndr = TAILQ_NEXT(dr, dr_entry);
731			if (dr->ifp == ifp)
732				defrtrlist_del(dr);
733		}
734		dr = TAILQ_FIRST(&nd_defrouter);
735		if (dr->ifp == ifp)
736			defrtrlist_del(dr);
737	}
738
739	/* Nuke prefix list entries toward ifp */
740	for (pr = nd_prefix.lh_first; pr; pr = npr) {
741		npr = pr->ndpr_next;
742		if (pr->ndpr_ifp == ifp) {
743			/*
744			 * Previously, pr->ndpr_addr is removed as well,
745			 * but I strongly believe we don't have to do it.
746			 * nd6_purge() is only called from in6_ifdetach(),
747			 * which removes all the associated interface addresses
748			 * by itself.
749			 * (jinmei@kame.net 20010129)
750			 */
751			prelist_remove(pr);
752		}
753	}
754
755	/* cancel default outgoing interface setting */
756	if (nd6_defifindex == ifp->if_index)
757		nd6_setdefaultiface(0);
758
759	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
760		/* refresh default router list */
761		bzero(&drany, sizeof(drany));
762		defrouter_delreq(&drany, 0);
763		defrouter_select();
764	}
765
766	/*
767	 * Nuke neighbor cache entries for the ifp.
768	 * Note that rt->rt_ifp may not be the same as ifp,
769	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
770	 * nd6_rtrequest(), and ip6_input().
771	 */
772	ln = llinfo_nd6.ln_next;
773	while (ln && ln != &llinfo_nd6) {
774		struct rtentry *rt;
775		struct sockaddr_dl *sdl;
776
777		nln = ln->ln_next;
778		rt = ln->ln_rt;
779		if (rt && rt->rt_gateway &&
780		    rt->rt_gateway->sa_family == AF_LINK) {
781			sdl = (struct sockaddr_dl *)rt->rt_gateway;
782			if (sdl->sdl_index == ifp->if_index)
783				nln = nd6_free(rt);
784		}
785		ln = nln;
786	}
787}
788
789struct rtentry *
790nd6_lookup(addr6, create, ifp)
791	struct in6_addr *addr6;
792	int create;
793	struct ifnet *ifp;
794{
795	struct rtentry *rt;
796	struct sockaddr_in6 sin6;
797
798	bzero(&sin6, sizeof(sin6));
799	sin6.sin6_len = sizeof(struct sockaddr_in6);
800	sin6.sin6_family = AF_INET6;
801	sin6.sin6_addr = *addr6;
802#ifdef SCOPEDROUTING
803	sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
804#endif
805	rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL);
806	if (rt && (rt->rt_flags & RTF_LLINFO) == 0) {
807		/*
808		 * This is the case for the default route.
809		 * If we want to create a neighbor cache for the address, we
810		 * should free the route for the destination and allocate an
811		 * interface route.
812		 */
813		if (create) {
814			RTFREE(rt);
815			rt = 0;
816		}
817	}
818	if (!rt) {
819		if (create && ifp) {
820			int e;
821
822			/*
823			 * If no route is available and create is set,
824			 * we allocate a host route for the destination
825			 * and treat it like an interface route.
826			 * This hack is necessary for a neighbor which can't
827			 * be covered by our own prefix.
828			 */
829			struct ifaddr *ifa =
830				ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
831			if (ifa == NULL)
832				return(NULL);
833
834			/*
835			 * Create a new route.  RTF_LLINFO is necessary
836			 * to create a Neighbor Cache entry for the
837			 * destination in nd6_rtrequest which will be
838			 * called in rtrequest via ifa->ifa_rtrequest.
839			 */
840			if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6,
841					   ifa->ifa_addr,
842					   (struct sockaddr *)&all1_sa,
843					   (ifa->ifa_flags |
844					    RTF_HOST | RTF_LLINFO) &
845					   ~RTF_CLONING,
846					   &rt)) != 0)
847				log(LOG_ERR,
848				    "nd6_lookup: failed to add route for a "
849				    "neighbor(%s), errno=%d\n",
850				    ip6_sprintf(addr6), e);
851			if (rt == NULL)
852				return(NULL);
853			if (rt->rt_llinfo) {
854				struct llinfo_nd6 *ln =
855					(struct llinfo_nd6 *)rt->rt_llinfo;
856				ln->ln_state = ND6_LLINFO_NOSTATE;
857			}
858		} else
859			return(NULL);
860	}
861	rt->rt_refcnt--;
862	/*
863	 * Validation for the entry.
864	 * Note that the check for rt_llinfo is necessary because a cloned
865	 * route from a parent route that has the L flag (e.g. the default
866	 * route to a p2p interface) may have the flag, too, while the
867	 * destination is not actually a neighbor.
868	 * XXX: we can't use rt->rt_ifp to check for the interface, since
869	 *      it might be the loopback interface if the entry is for our
870	 *      own address on a non-loopback interface. Instead, we should
871	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
872	 *      interface.
873	 */
874	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
875	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
876	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
877		if (create) {
878			log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n",
879			    ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec");
880			/* xxx more logs... kazu */
881		}
882		return(NULL);
883	}
884	return(rt);
885}
886
887/*
888 * Detect if a given IPv6 address identifies a neighbor on a given link.
889 * XXX: should take care of the destination of a p2p link?
890 */
891int
892nd6_is_addr_neighbor(addr, ifp)
893	struct sockaddr_in6 *addr;
894	struct ifnet *ifp;
895{
896	struct ifaddr *ifa;
897	int i;
898
899#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
900#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
901
902	/*
903	 * A link-local address is always a neighbor.
904	 * XXX: we should use the sin6_scope_id field rather than the embedded
905	 * interface index.
906	 */
907	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
908	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
909		return(1);
910
911	/*
912	 * If the address matches one of our addresses,
913	 * it should be a neighbor.
914	 */
915	for (ifa = ifp->if_addrlist.tqh_first;
916	     ifa;
917	     ifa = ifa->ifa_list.tqe_next)
918	{
919		if (ifa->ifa_addr->sa_family != AF_INET6)
920			next: continue;
921
922		for (i = 0; i < 4; i++) {
923			if ((IFADDR6(ifa).s6_addr32[i] ^
924			     addr->sin6_addr.s6_addr32[i]) &
925			    IFMASK6(ifa).s6_addr32[i])
926				goto next;
927		}
928		return(1);
929	}
930
931	/*
932	 * Even if the address matches none of our addresses, it might be
933	 * in the neighbor cache.
934	 */
935	if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL)
936		return(1);
937
938	return(0);
939#undef IFADDR6
940#undef IFMASK6
941}
942
943/*
944 * Free an nd6 llinfo entry.
945 */
946struct llinfo_nd6 *
947nd6_free(rt)
948	struct rtentry *rt;
949{
950	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
951	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
952	struct nd_defrouter *dr;
953
954	/*
955	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
956	 * even though it is not harmful, it was not really necessary.
957	 */
958
959	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
960		int s;
961		s = splnet();
962		dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
963				      rt->rt_ifp);
964
965		if (ln->ln_router || dr) {
966			/*
967			 * rt6_flush must be called whether or not the neighbor
968			 * is in the Default Router List.
969			 * See a corresponding comment in nd6_na_input().
970			 */
971			rt6_flush(&in6, rt->rt_ifp);
972		}
973
974		if (dr) {
975			/*
976			 * Unreachablity of a router might affect the default
977			 * router selection and on-link detection of advertised
978			 * prefixes.
979			 */
980
981			/*
982			 * Temporarily fake the state to choose a new default
983			 * router and to perform on-link determination of
984			 * prefixes correctly.
985			 * Below the state will be set correctly,
986			 * or the entry itself will be deleted.
987			 */
988			ln->ln_state = ND6_LLINFO_INCOMPLETE;
989
990			/*
991			 * Since defrouter_select() does not affect the
992			 * on-link determination and MIP6 needs the check
993			 * before the default router selection, we perform
994			 * the check now.
995			 */
996			pfxlist_onlink_check();
997
998			if (dr == TAILQ_FIRST(&nd_defrouter)) {
999				/*
1000				 * It is used as the current default router,
1001				 * so we have to move it to the end of the
1002				 * list and choose a new one.
1003				 * XXX: it is not very efficient if this is
1004				 *      the only router.
1005				 */
1006				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1007				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
1008
1009				defrouter_select();
1010			}
1011		}
1012		splx(s);
1013	}
1014
1015	/*
1016	 * Before deleting the entry, remember the next entry as the
1017	 * return value.  We need this because pfxlist_onlink_check() above
1018	 * might have freed other entries (particularly the old next entry) as
1019	 * a side effect (XXX).
1020	 */
1021	next = ln->ln_next;
1022
1023	/*
1024	 * Detach the route from the routing tree and the list of neighbor
1025	 * caches, and disable the route entry not to be used in already
1026	 * cached routes.
1027	 */
1028	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1029		  rt_mask(rt), 0, (struct rtentry **)0);
1030
1031	return(next);
1032}
1033
1034/*
1035 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1036 *
1037 * XXX cost-effective metods?
1038 */
1039void
1040nd6_nud_hint(rt, dst6, force)
1041	struct rtentry *rt;
1042	struct in6_addr *dst6;
1043	int force;
1044{
1045	struct llinfo_nd6 *ln;
1046
1047	/*
1048	 * If the caller specified "rt", use that.  Otherwise, resolve the
1049	 * routing table by supplied "dst6".
1050	 */
1051	if (!rt) {
1052		if (!dst6)
1053			return;
1054		if (!(rt = nd6_lookup(dst6, 0, NULL)))
1055			return;
1056	}
1057
1058	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1059	    (rt->rt_flags & RTF_LLINFO) == 0 ||
1060	    !rt->rt_llinfo || !rt->rt_gateway ||
1061	    rt->rt_gateway->sa_family != AF_LINK) {
1062		/* This is not a host route. */
1063		return;
1064	}
1065
1066	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1067	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1068		return;
1069
1070	/*
1071	 * if we get upper-layer reachability confirmation many times,
1072	 * it is possible we have false information.
1073	 */
1074	if (!force) {
1075		ln->ln_byhint++;
1076		if (ln->ln_byhint > nd6_maxnudhint)
1077			return;
1078	}
1079
1080	ln->ln_state = ND6_LLINFO_REACHABLE;
1081	if (ln->ln_expire)
1082		ln->ln_expire = time_second +
1083			nd_ifinfo[rt->rt_ifp->if_index].reachable;
1084}
1085
1086void
1087nd6_rtrequest(req, rt, info)
1088	int	req;
1089	struct rtentry *rt;
1090	struct rt_addrinfo *info; /* xxx unused */
1091{
1092	struct sockaddr *gate = rt->rt_gateway;
1093	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1094	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1095	struct ifnet *ifp = rt->rt_ifp;
1096	struct ifaddr *ifa;
1097
1098	if ((rt->rt_flags & RTF_GATEWAY))
1099		return;
1100
1101	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1102		/*
1103		 * This is probably an interface direct route for a link
1104		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1105		 * We do not need special treatment below for such a route.
1106		 * Moreover, the RTF_LLINFO flag which would be set below
1107		 * would annoy the ndp(8) command.
1108		 */
1109		return;
1110	}
1111
1112	if (req == RTM_RESOLVE &&
1113	    (nd6_need_cache(ifp) == 0 || /* stf case */
1114	     !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) {
1115		/*
1116		 * FreeBSD and BSD/OS often make a cloned host route based
1117		 * on a less-specific route (e.g. the default route).
1118		 * If the less specific route does not have a "gateway"
1119		 * (this is the case when the route just goes to a p2p or an
1120		 * stf interface), we'll mistakenly make a neighbor cache for
1121		 * the host route, and will see strange neighbor solicitation
1122		 * for the corresponding destination.  In order to avoid the
1123		 * confusion, we check if the destination of the route is
1124		 * a neighbor in terms of neighbor discovery, and stop the
1125		 * process if not.  Additionally, we remove the LLINFO flag
1126		 * so that ndp(8) will not try to get the neighbor information
1127		 * of the destination.
1128		 */
1129		rt->rt_flags &= ~RTF_LLINFO;
1130		return;
1131	}
1132
1133	switch (req) {
1134	case RTM_ADD:
1135		/*
1136		 * There is no backward compatibility :)
1137		 *
1138		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1139		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1140		 *	   rt->rt_flags |= RTF_CLONING;
1141		 */
1142		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
1143			/*
1144			 * Case 1: This route should come from
1145			 * a route to interface.  RTF_LLINFO flag is set
1146			 * for a host route whose destination should be
1147			 * treated as on-link.
1148			 */
1149			rt_setgate(rt, rt_key(rt),
1150				   (struct sockaddr *)&null_sdl);
1151			gate = rt->rt_gateway;
1152			SDL(gate)->sdl_type = ifp->if_type;
1153			SDL(gate)->sdl_index = ifp->if_index;
1154			if (ln)
1155				ln->ln_expire = time_second;
1156#if 1
1157			if (ln && ln->ln_expire == 0) {
1158				/* kludge for desktops */
1159#if 0
1160				printf("nd6_rtequest: time.tv_sec is zero; "
1161				       "treat it as 1\n");
1162#endif
1163				ln->ln_expire = 1;
1164			}
1165#endif
1166			if ((rt->rt_flags & RTF_CLONING))
1167				break;
1168		}
1169		/*
1170		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1171		 * We don't do that here since llinfo is not ready yet.
1172		 *
1173		 * There are also couple of other things to be discussed:
1174		 * - unsolicited NA code needs improvement beforehand
1175		 * - RFC2461 says we MAY send multicast unsolicited NA
1176		 *   (7.2.6 paragraph 4), however, it also says that we
1177		 *   SHOULD provide a mechanism to prevent multicast NA storm.
1178		 *   we don't have anything like it right now.
1179		 *   note that the mechanism needs a mutual agreement
1180		 *   between proxies, which means that we need to implement
1181		 *   a new protocol, or a new kludge.
1182		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1183		 *   we need to check ip6forwarding before sending it.
1184		 *   (or should we allow proxy ND configuration only for
1185		 *   routers?  there's no mention about proxy ND from hosts)
1186		 */
1187#if 0
1188		/* XXX it does not work */
1189		if (rt->rt_flags & RTF_ANNOUNCE)
1190			nd6_na_output(ifp,
1191			      &SIN6(rt_key(rt))->sin6_addr,
1192			      &SIN6(rt_key(rt))->sin6_addr,
1193			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1194			      1, NULL);
1195#endif
1196		/* FALLTHROUGH */
1197	case RTM_RESOLVE:
1198		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1199			/*
1200			 * Address resolution isn't necessary for a point to
1201			 * point link, so we can skip this test for a p2p link.
1202			 */
1203			if (gate->sa_family != AF_LINK ||
1204			    gate->sa_len < sizeof(null_sdl)) {
1205				log(LOG_DEBUG,
1206				    "nd6_rtrequest: bad gateway value: %s\n",
1207				    if_name(ifp));
1208				break;
1209			}
1210			SDL(gate)->sdl_type = ifp->if_type;
1211			SDL(gate)->sdl_index = ifp->if_index;
1212		}
1213		if (ln != NULL)
1214			break;	/* This happens on a route change */
1215		/*
1216		 * Case 2: This route may come from cloning, or a manual route
1217		 * add with a LL address.
1218		 */
1219		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
1220		rt->rt_llinfo = (caddr_t)ln;
1221		if (!ln) {
1222			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1223			break;
1224		}
1225		nd6_inuse++;
1226		nd6_allocated++;
1227		Bzero(ln, sizeof(*ln));
1228		ln->ln_rt = rt;
1229		/* this is required for "ndp" command. - shin */
1230		if (req == RTM_ADD) {
1231		        /*
1232			 * gate should have some valid AF_LINK entry,
1233			 * and ln->ln_expire should have some lifetime
1234			 * which is specified by ndp command.
1235			 */
1236			ln->ln_state = ND6_LLINFO_REACHABLE;
1237			ln->ln_byhint = 0;
1238		} else {
1239		        /*
1240			 * When req == RTM_RESOLVE, rt is created and
1241			 * initialized in rtrequest(), so rt_expire is 0.
1242			 */
1243			ln->ln_state = ND6_LLINFO_NOSTATE;
1244			ln->ln_expire = time_second;
1245		}
1246		rt->rt_flags |= RTF_LLINFO;
1247		ln->ln_next = llinfo_nd6.ln_next;
1248		llinfo_nd6.ln_next = ln;
1249		ln->ln_prev = &llinfo_nd6;
1250		ln->ln_next->ln_prev = ln;
1251
1252		/*
1253		 * check if rt_key(rt) is one of my address assigned
1254		 * to the interface.
1255		 */
1256		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1257					  &SIN6(rt_key(rt))->sin6_addr);
1258		if (ifa) {
1259			caddr_t macp = nd6_ifptomac(ifp);
1260			ln->ln_expire = 0;
1261			ln->ln_state = ND6_LLINFO_REACHABLE;
1262			ln->ln_byhint = 0;
1263			if (macp) {
1264				Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1265				SDL(gate)->sdl_alen = ifp->if_addrlen;
1266			}
1267			if (nd6_useloopback) {
1268				rt->rt_ifp = &loif[0];	/* XXX */
1269				/*
1270				 * Make sure rt_ifa be equal to the ifaddr
1271				 * corresponding to the address.
1272				 * We need this because when we refer
1273				 * rt_ifa->ia6_flags in ip6_input, we assume
1274				 * that the rt_ifa points to the address instead
1275				 * of the loopback address.
1276				 */
1277				if (ifa != rt->rt_ifa) {
1278					IFAFREE(rt->rt_ifa);
1279					IFAREF(ifa);
1280					rt->rt_ifa = ifa;
1281				}
1282			}
1283		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1284			ln->ln_expire = 0;
1285			ln->ln_state = ND6_LLINFO_REACHABLE;
1286			ln->ln_byhint = 0;
1287
1288			/* join solicited node multicast for proxy ND */
1289			if (ifp->if_flags & IFF_MULTICAST) {
1290				struct in6_addr llsol;
1291				int error;
1292
1293				llsol = SIN6(rt_key(rt))->sin6_addr;
1294				llsol.s6_addr16[0] = htons(0xff02);
1295				llsol.s6_addr16[1] = htons(ifp->if_index);
1296				llsol.s6_addr32[1] = 0;
1297				llsol.s6_addr32[2] = htonl(1);
1298				llsol.s6_addr8[12] = 0xff;
1299
1300				if (!in6_addmulti(&llsol, ifp, &error)) {
1301					nd6log((LOG_ERR, "%s: failed to join "
1302					    "%s (errno=%d)\n", if_name(ifp),
1303					    ip6_sprintf(&llsol), error));
1304				}
1305			}
1306		}
1307		break;
1308
1309	case RTM_DELETE:
1310		if (!ln)
1311			break;
1312		/* leave from solicited node multicast for proxy ND */
1313		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1314		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1315			struct in6_addr llsol;
1316			struct in6_multi *in6m;
1317
1318			llsol = SIN6(rt_key(rt))->sin6_addr;
1319			llsol.s6_addr16[0] = htons(0xff02);
1320			llsol.s6_addr16[1] = htons(ifp->if_index);
1321			llsol.s6_addr32[1] = 0;
1322			llsol.s6_addr32[2] = htonl(1);
1323			llsol.s6_addr8[12] = 0xff;
1324
1325			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1326			if (in6m)
1327				in6_delmulti(in6m);
1328		}
1329		nd6_inuse--;
1330		ln->ln_next->ln_prev = ln->ln_prev;
1331		ln->ln_prev->ln_next = ln->ln_next;
1332		ln->ln_prev = NULL;
1333		rt->rt_llinfo = 0;
1334		rt->rt_flags &= ~RTF_LLINFO;
1335		if (ln->ln_hold)
1336			m_freem(ln->ln_hold);
1337		Free((caddr_t)ln);
1338	}
1339}
1340
1341int
1342nd6_ioctl(cmd, data, ifp)
1343	u_long cmd;
1344	caddr_t	data;
1345	struct ifnet *ifp;
1346{
1347	struct in6_drlist *drl = (struct in6_drlist *)data;
1348	struct in6_prlist *prl = (struct in6_prlist *)data;
1349	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1350	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1351	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1352	struct nd_defrouter *dr, any;
1353	struct nd_prefix *pr;
1354	struct rtentry *rt;
1355	int i = 0, error = 0;
1356	int s;
1357
1358	switch (cmd) {
1359	case SIOCGDRLST_IN6:
1360		/*
1361		 * obsolete API, use sysctl under net.inet6.icmp6
1362		 */
1363		bzero(drl, sizeof(*drl));
1364		s = splnet();
1365		dr = TAILQ_FIRST(&nd_defrouter);
1366		while (dr && i < DRLSTSIZ) {
1367			drl->defrouter[i].rtaddr = dr->rtaddr;
1368			if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) {
1369				/* XXX: need to this hack for KAME stack */
1370				drl->defrouter[i].rtaddr.s6_addr16[1] = 0;
1371			} else
1372				log(LOG_ERR,
1373				    "default router list contains a "
1374				    "non-linklocal address(%s)\n",
1375				    ip6_sprintf(&drl->defrouter[i].rtaddr));
1376
1377			drl->defrouter[i].flags = dr->flags;
1378			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1379			drl->defrouter[i].expire = dr->expire;
1380			drl->defrouter[i].if_index = dr->ifp->if_index;
1381			i++;
1382			dr = TAILQ_NEXT(dr, dr_entry);
1383		}
1384		splx(s);
1385		break;
1386	case SIOCGPRLST_IN6:
1387		/*
1388		 * obsolete API, use sysctl under net.inet6.icmp6
1389		 */
1390		/*
1391		 * XXX meaning of fields, especialy "raflags", is very
1392		 * differnet between RA prefix list and RR/static prefix list.
1393		 * how about separating ioctls into two?
1394		 */
1395		bzero(prl, sizeof(*prl));
1396		s = splnet();
1397		pr = nd_prefix.lh_first;
1398		while (pr && i < PRLSTSIZ) {
1399			struct nd_pfxrouter *pfr;
1400			int j;
1401
1402			(void)in6_embedscope(&prl->prefix[i].prefix,
1403			    &pr->ndpr_prefix, NULL, NULL);
1404			prl->prefix[i].raflags = pr->ndpr_raf;
1405			prl->prefix[i].prefixlen = pr->ndpr_plen;
1406			prl->prefix[i].vltime = pr->ndpr_vltime;
1407			prl->prefix[i].pltime = pr->ndpr_pltime;
1408			prl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1409			prl->prefix[i].expire = pr->ndpr_expire;
1410
1411			pfr = pr->ndpr_advrtrs.lh_first;
1412			j = 0;
1413			while (pfr) {
1414				if (j < DRLSTSIZ) {
1415#define RTRADDR prl->prefix[i].advrtr[j]
1416					RTRADDR = pfr->router->rtaddr;
1417					if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1418						/* XXX: hack for KAME */
1419						RTRADDR.s6_addr16[1] = 0;
1420					} else
1421						log(LOG_ERR,
1422						    "a router(%s) advertises "
1423						    "a prefix with "
1424						    "non-link local address\n",
1425						    ip6_sprintf(&RTRADDR));
1426#undef RTRADDR
1427				}
1428				j++;
1429				pfr = pfr->pfr_next;
1430			}
1431			prl->prefix[i].advrtrs = j;
1432			prl->prefix[i].origin = PR_ORIG_RA;
1433
1434			i++;
1435			pr = pr->ndpr_next;
1436		}
1437	      {
1438		struct rr_prefix *rpp;
1439
1440		for (rpp = LIST_FIRST(&rr_prefix); rpp;
1441		     rpp = LIST_NEXT(rpp, rp_entry)) {
1442			if (i >= PRLSTSIZ)
1443				break;
1444			(void)in6_embedscope(&prl->prefix[i].prefix,
1445			    &pr->ndpr_prefix, NULL, NULL);
1446			prl->prefix[i].raflags = rpp->rp_raf;
1447			prl->prefix[i].prefixlen = rpp->rp_plen;
1448			prl->prefix[i].vltime = rpp->rp_vltime;
1449			prl->prefix[i].pltime = rpp->rp_pltime;
1450			prl->prefix[i].if_index = rpp->rp_ifp->if_index;
1451			prl->prefix[i].expire = rpp->rp_expire;
1452			prl->prefix[i].advrtrs = 0;
1453			prl->prefix[i].origin = rpp->rp_origin;
1454			i++;
1455		}
1456	      }
1457		splx(s);
1458
1459		break;
1460	case OSIOCGIFINFO_IN6:
1461		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1462			error = EINVAL;
1463			break;
1464		}
1465		ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu;
1466		ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu;
1467		ndi->ndi.basereachable =
1468		    nd_ifinfo[ifp->if_index].basereachable;
1469		ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable;
1470		ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans;
1471		ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags;
1472		ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm;
1473		ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim;
1474		ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra;
1475		break;
1476	case SIOCGIFINFO_IN6:
1477		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1478			error = EINVAL;
1479			break;
1480		}
1481		ndi->ndi = nd_ifinfo[ifp->if_index];
1482		break;
1483	case SIOCSIFINFO_FLAGS:
1484		/* XXX: almost all other fields of ndi->ndi is unused */
1485		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1486			error = EINVAL;
1487			break;
1488		}
1489		nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags;
1490		break;
1491	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1492		/* flush default router list */
1493		/*
1494		 * xxx sumikawa: should not delete route if default
1495		 * route equals to the top of default router list
1496		 */
1497		bzero(&any, sizeof(any));
1498		defrouter_delreq(&any, 0);
1499		defrouter_select();
1500		/* xxx sumikawa: flush prefix list */
1501		break;
1502	case SIOCSPFXFLUSH_IN6:
1503	    {
1504		/* flush all the prefix advertised by routers */
1505		struct nd_prefix *pr, *next;
1506
1507		s = splnet();
1508		for (pr = nd_prefix.lh_first; pr; pr = next) {
1509			struct in6_ifaddr *ia, *ia_next;
1510
1511			next = pr->ndpr_next;
1512
1513			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1514				continue; /* XXX */
1515
1516			/* do we really have to remove addresses as well? */
1517			for (ia = in6_ifaddr; ia; ia = ia_next) {
1518				/* ia might be removed.  keep the next ptr. */
1519				ia_next = ia->ia_next;
1520
1521				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1522					continue;
1523
1524				if (ia->ia6_ndpr == pr)
1525					in6_purgeaddr(&ia->ia_ifa);
1526			}
1527			prelist_remove(pr);
1528		}
1529		splx(s);
1530		break;
1531	    }
1532	case SIOCSRTRFLUSH_IN6:
1533	    {
1534		/* flush all the default routers */
1535		struct nd_defrouter *dr, *next;
1536
1537		s = splnet();
1538		if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1539			/*
1540			 * The first entry of the list may be stored in
1541			 * the routing table, so we'll delete it later.
1542			 */
1543			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
1544				next = TAILQ_NEXT(dr, dr_entry);
1545				defrtrlist_del(dr);
1546			}
1547			defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
1548		}
1549		splx(s);
1550		break;
1551	    }
1552	case SIOCGNBRINFO_IN6:
1553	    {
1554		struct llinfo_nd6 *ln;
1555		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1556
1557		/*
1558		 * XXX: KAME specific hack for scoped addresses
1559		 *      XXXX: for other scopes than link-local?
1560		 */
1561		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
1562		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
1563			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
1564
1565			if (*idp == 0)
1566				*idp = htons(ifp->if_index);
1567		}
1568
1569		s = splnet();
1570		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
1571			error = EINVAL;
1572			splx(s);
1573			break;
1574		}
1575		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1576		nbi->state = ln->ln_state;
1577		nbi->asked = ln->ln_asked;
1578		nbi->isrouter = ln->ln_router;
1579		nbi->expire = ln->ln_expire;
1580		splx(s);
1581
1582		break;
1583	    }
1584	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1585		ndif->ifindex = nd6_defifindex;
1586		break;
1587	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1588		return(nd6_setdefaultiface(ndif->ifindex));
1589		break;
1590	}
1591	return(error);
1592}
1593
1594/*
1595 * Create neighbor cache entry and cache link-layer address,
1596 * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
1597 */
1598struct rtentry *
1599nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
1600	struct ifnet *ifp;
1601	struct in6_addr *from;
1602	char *lladdr;
1603	int lladdrlen;
1604	int type;	/* ICMP6 type */
1605	int code;	/* type dependent information */
1606{
1607	struct rtentry *rt = NULL;
1608	struct llinfo_nd6 *ln = NULL;
1609	int is_newentry;
1610	struct sockaddr_dl *sdl = NULL;
1611	int do_update;
1612	int olladdr;
1613	int llchange;
1614	int newstate = 0;
1615
1616	if (!ifp)
1617		panic("ifp == NULL in nd6_cache_lladdr");
1618	if (!from)
1619		panic("from == NULL in nd6_cache_lladdr");
1620
1621	/* nothing must be updated for unspecified address */
1622	if (IN6_IS_ADDR_UNSPECIFIED(from))
1623		return NULL;
1624
1625	/*
1626	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1627	 * the caller.
1628	 *
1629	 * XXX If the link does not have link-layer adderss, what should
1630	 * we do? (ifp->if_addrlen == 0)
1631	 * Spec says nothing in sections for RA, RS and NA.  There's small
1632	 * description on it in NS section (RFC 2461 7.2.3).
1633	 */
1634
1635	rt = nd6_lookup(from, 0, ifp);
1636	if (!rt) {
1637#if 0
1638		/* nothing must be done if there's no lladdr */
1639		if (!lladdr || !lladdrlen)
1640			return NULL;
1641#endif
1642
1643		rt = nd6_lookup(from, 1, ifp);
1644		is_newentry = 1;
1645	} else {
1646		/* do nothing if static ndp is set */
1647		if (rt->rt_flags & RTF_STATIC)
1648			return NULL;
1649		is_newentry = 0;
1650	}
1651
1652	if (!rt)
1653		return NULL;
1654	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1655fail:
1656		(void)nd6_free(rt);
1657		return NULL;
1658	}
1659	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1660	if (!ln)
1661		goto fail;
1662	if (!rt->rt_gateway)
1663		goto fail;
1664	if (rt->rt_gateway->sa_family != AF_LINK)
1665		goto fail;
1666	sdl = SDL(rt->rt_gateway);
1667
1668	olladdr = (sdl->sdl_alen) ? 1 : 0;
1669	if (olladdr && lladdr) {
1670		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1671			llchange = 1;
1672		else
1673			llchange = 0;
1674	} else
1675		llchange = 0;
1676
1677	/*
1678	 * newentry olladdr  lladdr  llchange	(*=record)
1679	 *	0	n	n	--	(1)
1680	 *	0	y	n	--	(2)
1681	 *	0	n	y	--	(3) * STALE
1682	 *	0	y	y	n	(4) *
1683	 *	0	y	y	y	(5) * STALE
1684	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1685	 *	1	--	y	--	(7) * STALE
1686	 */
1687
1688	if (lladdr) {		/* (3-5) and (7) */
1689		/*
1690		 * Record source link-layer address
1691		 * XXX is it dependent to ifp->if_type?
1692		 */
1693		sdl->sdl_alen = ifp->if_addrlen;
1694		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1695	}
1696
1697	if (!is_newentry) {
1698		if ((!olladdr && lladdr)		/* (3) */
1699		 || (olladdr && lladdr && llchange)) {	/* (5) */
1700			do_update = 1;
1701			newstate = ND6_LLINFO_STALE;
1702		} else					/* (1-2,4) */
1703			do_update = 0;
1704	} else {
1705		do_update = 1;
1706		if (!lladdr)				/* (6) */
1707			newstate = ND6_LLINFO_NOSTATE;
1708		else					/* (7) */
1709			newstate = ND6_LLINFO_STALE;
1710	}
1711
1712	if (do_update) {
1713		/*
1714		 * Update the state of the neighbor cache.
1715		 */
1716		ln->ln_state = newstate;
1717
1718		if (ln->ln_state == ND6_LLINFO_STALE) {
1719			/*
1720			 * XXX: since nd6_output() below will cause
1721			 * state tansition to DELAY and reset the timer,
1722			 * we must set the timer now, although it is actually
1723			 * meaningless.
1724			 */
1725			ln->ln_expire = time_second + nd6_gctimer;
1726
1727			if (ln->ln_hold) {
1728				/*
1729				 * we assume ifp is not a p2p here, so just
1730				 * set the 2nd argument as the 1st one.
1731				 */
1732				nd6_output(ifp, ifp, ln->ln_hold,
1733					   (struct sockaddr_in6 *)rt_key(rt),
1734					   rt);
1735				ln->ln_hold = NULL;
1736			}
1737		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1738			/* probe right away */
1739			ln->ln_expire = time_second;
1740		}
1741	}
1742
1743	/*
1744	 * ICMP6 type dependent behavior.
1745	 *
1746	 * NS: clear IsRouter if new entry
1747	 * RS: clear IsRouter
1748	 * RA: set IsRouter if there's lladdr
1749	 * redir: clear IsRouter if new entry
1750	 *
1751	 * RA case, (1):
1752	 * The spec says that we must set IsRouter in the following cases:
1753	 * - If lladdr exist, set IsRouter.  This means (1-5).
1754	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1755	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1756	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1757	 * neighbor cache, this is similar to (6).
1758	 * This case is rare but we figured that we MUST NOT set IsRouter.
1759	 *
1760	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1761	 *							D R
1762	 *	0	n	n	--	(1)	c   ?     s
1763	 *	0	y	n	--	(2)	c   s     s
1764	 *	0	n	y	--	(3)	c   s     s
1765	 *	0	y	y	n	(4)	c   s     s
1766	 *	0	y	y	y	(5)	c   s     s
1767	 *	1	--	n	--	(6) c	c 	c s
1768	 *	1	--	y	--	(7) c	c   s	c s
1769	 *
1770	 *					(c=clear s=set)
1771	 */
1772	switch (type & 0xff) {
1773	case ND_NEIGHBOR_SOLICIT:
1774		/*
1775		 * New entry must have is_router flag cleared.
1776		 */
1777		if (is_newentry)	/* (6-7) */
1778			ln->ln_router = 0;
1779		break;
1780	case ND_REDIRECT:
1781		/*
1782		 * If the icmp is a redirect to a better router, always set the
1783		 * is_router flag. Otherwise, if the entry is newly created,
1784		 * clear the flag. [RFC 2461, sec 8.3]
1785		 */
1786		if (code == ND_REDIRECT_ROUTER)
1787			ln->ln_router = 1;
1788		else if (is_newentry) /* (6-7) */
1789			ln->ln_router = 0;
1790		break;
1791	case ND_ROUTER_SOLICIT:
1792		/*
1793		 * is_router flag must always be cleared.
1794		 */
1795		ln->ln_router = 0;
1796		break;
1797	case ND_ROUTER_ADVERT:
1798		/*
1799		 * Mark an entry with lladdr as a router.
1800		 */
1801		if ((!is_newentry && (olladdr || lladdr))	/* (2-5) */
1802		 || (is_newentry && lladdr)) {			/* (7) */
1803			ln->ln_router = 1;
1804		}
1805		break;
1806	}
1807
1808	/*
1809	 * When the link-layer address of a router changes, select the
1810	 * best router again.  In particular, when the neighbor entry is newly
1811	 * created, it might affect the selection policy.
1812	 * Question: can we restrict the first condition to the "is_newentry"
1813	 * case?
1814	 * XXX: when we hear an RA from a new router with the link-layer
1815	 * address option, defrouter_select() is called twice, since
1816	 * defrtrlist_update called the function as well.  However, I believe
1817	 * we can compromise the overhead, since it only happens the first
1818	 * time.
1819	 * XXX: although defrouter_select() should not have a bad effect
1820	 * for those are not autoconfigured hosts, we explicitly avoid such
1821	 * cases for safety.
1822	 */
1823	if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv)
1824		defrouter_select();
1825
1826	return rt;
1827}
1828
1829static void
1830nd6_slowtimo(ignored_arg)
1831    void *ignored_arg;
1832{
1833	int s = splnet();
1834	int i;
1835	struct nd_ifinfo *nd6if;
1836
1837	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1838	    nd6_slowtimo, NULL);
1839	for (i = 1; i < if_index + 1; i++) {
1840		if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
1841			continue;
1842		nd6if = &nd_ifinfo[i];
1843		if (nd6if->basereachable && /* already initialized */
1844		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1845			/*
1846			 * Since reachable time rarely changes by router
1847			 * advertisements, we SHOULD insure that a new random
1848			 * value gets recomputed at least once every few hours.
1849			 * (RFC 2461, 6.3.4)
1850			 */
1851			nd6if->recalctm = nd6_recalc_reachtm_interval;
1852			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1853		}
1854	}
1855	splx(s);
1856}
1857
1858#define senderr(e) { error = (e); goto bad;}
1859int
1860nd6_output(ifp, origifp, m0, dst, rt0)
1861	struct ifnet *ifp;
1862	struct ifnet *origifp;
1863	struct mbuf *m0;
1864	struct sockaddr_in6 *dst;
1865	struct rtentry *rt0;
1866{
1867	struct mbuf *m = m0;
1868	struct rtentry *rt = rt0;
1869	struct sockaddr_in6 *gw6 = NULL;
1870	struct llinfo_nd6 *ln = NULL;
1871	int error = 0;
1872
1873	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1874		goto sendpkt;
1875
1876	if (nd6_need_cache(ifp) == 0)
1877		goto sendpkt;
1878
1879	/*
1880	 * next hop determination.  This routine is derived from ether_outpout.
1881	 */
1882	if (rt) {
1883		if ((rt->rt_flags & RTF_UP) == 0) {
1884			if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) !=
1885				NULL)
1886			{
1887				rt->rt_refcnt--;
1888				if (rt->rt_ifp != ifp) {
1889					/* XXX: loop care? */
1890					return nd6_output(ifp, origifp, m0,
1891							  dst, rt);
1892				}
1893			} else
1894				senderr(EHOSTUNREACH);
1895		}
1896
1897		if (rt->rt_flags & RTF_GATEWAY) {
1898			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
1899
1900			/*
1901			 * We skip link-layer address resolution and NUD
1902			 * if the gateway is not a neighbor from ND point
1903			 * of view, regardless of the value of nd_ifinfo.flags.
1904			 * The second condition is a bit tricky; we skip
1905			 * if the gateway is our own address, which is
1906			 * sometimes used to install a route to a p2p link.
1907			 */
1908			if (!nd6_is_addr_neighbor(gw6, ifp) ||
1909			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
1910				/*
1911				 * We allow this kind of tricky route only
1912				 * when the outgoing interface is p2p.
1913				 * XXX: we may need a more generic rule here.
1914				 */
1915				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1916					senderr(EHOSTUNREACH);
1917
1918				goto sendpkt;
1919			}
1920
1921			if (rt->rt_gwroute == 0)
1922				goto lookup;
1923			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
1924				rtfree(rt); rt = rt0;
1925			lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
1926				if ((rt = rt->rt_gwroute) == 0)
1927					senderr(EHOSTUNREACH);
1928			}
1929		}
1930	}
1931
1932	/*
1933	 * Address resolution or Neighbor Unreachability Detection
1934	 * for the next hop.
1935	 * At this point, the destination of the packet must be a unicast
1936	 * or an anycast address(i.e. not a multicast).
1937	 */
1938
1939	/* Look up the neighbor cache for the nexthop */
1940	if (rt && (rt->rt_flags & RTF_LLINFO) != 0)
1941		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1942	else {
1943		/*
1944		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
1945		 * the condition below is not very efficient.  But we believe
1946		 * it is tolerable, because this should be a rare case.
1947		 */
1948		if (nd6_is_addr_neighbor(dst, ifp) &&
1949		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
1950			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1951	}
1952	if (!ln || !rt) {
1953		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
1954		    !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
1955			log(LOG_DEBUG,
1956			    "nd6_output: can't allocate llinfo for %s "
1957			    "(ln=%p, rt=%p)\n",
1958			    ip6_sprintf(&dst->sin6_addr), ln, rt);
1959			senderr(EIO);	/* XXX: good error? */
1960		}
1961
1962		goto sendpkt;	/* send anyway */
1963	}
1964
1965	/* We don't have to do link-layer address resolution on a p2p link. */
1966	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
1967	    ln->ln_state < ND6_LLINFO_REACHABLE) {
1968		ln->ln_state = ND6_LLINFO_STALE;
1969		ln->ln_expire = time_second + nd6_gctimer;
1970	}
1971
1972	/*
1973	 * The first time we send a packet to a neighbor whose entry is
1974	 * STALE, we have to change the state to DELAY and a sets a timer to
1975	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1976	 * neighbor unreachability detection on expiration.
1977	 * (RFC 2461 7.3.3)
1978	 */
1979	if (ln->ln_state == ND6_LLINFO_STALE) {
1980		ln->ln_asked = 0;
1981		ln->ln_state = ND6_LLINFO_DELAY;
1982		ln->ln_expire = time_second + nd6_delay;
1983	}
1984
1985	/*
1986	 * If the neighbor cache entry has a state other than INCOMPLETE
1987	 * (i.e. its link-layer address is already resolved), just
1988	 * send the packet.
1989	 */
1990	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
1991		goto sendpkt;
1992
1993	/*
1994	 * There is a neighbor cache entry, but no ethernet address
1995	 * response yet.  Replace the held mbuf (if any) with this
1996	 * latest one.
1997	 *
1998	 * This code conforms to the rate-limiting rule described in Section
1999	 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
2000	 * an NS below.
2001	 */
2002	if (ln->ln_state == ND6_LLINFO_NOSTATE)
2003		ln->ln_state = ND6_LLINFO_INCOMPLETE;
2004	if (ln->ln_hold)
2005		m_freem(ln->ln_hold);
2006	ln->ln_hold = m;
2007	if (ln->ln_expire) {
2008		if (ln->ln_asked < nd6_mmaxtries &&
2009		    ln->ln_expire < time_second) {
2010			ln->ln_asked++;
2011			ln->ln_expire = time_second +
2012				nd_ifinfo[ifp->if_index].retrans / 1000;
2013			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
2014		}
2015	}
2016	return(0);
2017
2018  sendpkt:
2019
2020#ifdef MAC
2021	mac_create_mbuf_linklayer(ifp, m);
2022#endif
2023	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
2024		return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
2025					 rt));
2026	}
2027	return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt));
2028
2029  bad:
2030	if (m)
2031		m_freem(m);
2032	return (error);
2033}
2034#undef senderr
2035
2036int
2037nd6_need_cache(ifp)
2038	struct ifnet *ifp;
2039{
2040	/*
2041	 * XXX: we currently do not make neighbor cache on any interface
2042	 * other than ARCnet, Ethernet, FDDI and GIF.
2043	 *
2044	 * RFC2893 says:
2045	 * - unidirectional tunnels needs no ND
2046	 */
2047	switch (ifp->if_type) {
2048	case IFT_ARCNET:
2049	case IFT_ETHER:
2050	case IFT_FDDI:
2051	case IFT_IEEE1394:
2052#ifdef IFT_L2VLAN
2053	case IFT_L2VLAN:
2054#endif
2055#ifdef IFT_IEEE80211
2056	case IFT_IEEE80211:
2057#endif
2058	case IFT_GIF:		/* XXX need more cases? */
2059		return(1);
2060	default:
2061		return(0);
2062	}
2063}
2064
2065int
2066nd6_storelladdr(ifp, rt, m, dst, desten)
2067	struct ifnet *ifp;
2068	struct rtentry *rt;
2069	struct mbuf *m;
2070	struct sockaddr *dst;
2071	u_char *desten;
2072{
2073	int i;
2074	struct sockaddr_dl *sdl;
2075
2076	if (m->m_flags & M_MCAST) {
2077		switch (ifp->if_type) {
2078		case IFT_ETHER:
2079		case IFT_FDDI:
2080#ifdef IFT_L2VLAN
2081	case IFT_L2VLAN:
2082#endif
2083#ifdef IFT_IEEE80211
2084		case IFT_IEEE80211:
2085#endif
2086			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2087						 desten);
2088			return(1);
2089		case IFT_IEEE1394:
2090			/*
2091			 * netbsd can use if_broadcastaddr, but we don't do so
2092			 * to reduce # of ifdef.
2093			 */
2094			for (i = 0; i < ifp->if_addrlen; i++)
2095				desten[i] = ~0;
2096			return(1);
2097		case IFT_ARCNET:
2098			*desten = 0;
2099			return(1);
2100		default:
2101			m_freem(m);
2102			return(0);
2103		}
2104	}
2105
2106	if (rt == NULL) {
2107		/* this could happen, if we could not allocate memory */
2108		m_freem(m);
2109		return(0);
2110	}
2111	if (rt->rt_gateway->sa_family != AF_LINK) {
2112		printf("nd6_storelladdr: something odd happens\n");
2113		m_freem(m);
2114		return(0);
2115	}
2116	sdl = SDL(rt->rt_gateway);
2117	if (sdl->sdl_alen == 0) {
2118		/* this should be impossible, but we bark here for debugging */
2119		printf("nd6_storelladdr: sdl_alen == 0\n");
2120		m_freem(m);
2121		return(0);
2122	}
2123
2124	bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
2125	return(1);
2126}
2127
2128static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2129static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2130#ifdef SYSCTL_DECL
2131SYSCTL_DECL(_net_inet6_icmp6);
2132#endif
2133SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2134	CTLFLAG_RD, nd6_sysctl_drlist, "");
2135SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2136	CTLFLAG_RD, nd6_sysctl_prlist, "");
2137
2138static int
2139nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2140{
2141	int error;
2142	char buf[1024];
2143	struct in6_defrouter *d, *de;
2144	struct nd_defrouter *dr;
2145
2146	if (req->newptr)
2147		return EPERM;
2148	error = 0;
2149
2150	for (dr = TAILQ_FIRST(&nd_defrouter);
2151	     dr;
2152	     dr = TAILQ_NEXT(dr, dr_entry)) {
2153		d = (struct in6_defrouter *)buf;
2154		de = (struct in6_defrouter *)(buf + sizeof(buf));
2155
2156		if (d + 1 <= de) {
2157			bzero(d, sizeof(*d));
2158			d->rtaddr.sin6_family = AF_INET6;
2159			d->rtaddr.sin6_len = sizeof(d->rtaddr);
2160			if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
2161			    dr->ifp) != 0)
2162				log(LOG_ERR,
2163				    "scope error in "
2164				    "default router list (%s)\n",
2165				    ip6_sprintf(&dr->rtaddr));
2166			d->flags = dr->flags;
2167			d->rtlifetime = dr->rtlifetime;
2168			d->expire = dr->expire;
2169			d->if_index = dr->ifp->if_index;
2170		} else
2171			panic("buffer too short");
2172
2173		error = SYSCTL_OUT(req, buf, sizeof(*d));
2174		if (error)
2175			break;
2176	}
2177	return error;
2178}
2179
2180static int
2181nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2182{
2183	int error;
2184	char buf[1024];
2185	struct in6_prefix *p, *pe;
2186	struct nd_prefix *pr;
2187
2188	if (req->newptr)
2189		return EPERM;
2190	error = 0;
2191
2192	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2193		u_short advrtrs;
2194		size_t advance;
2195		struct sockaddr_in6 *sin6, *s6;
2196		struct nd_pfxrouter *pfr;
2197
2198		p = (struct in6_prefix *)buf;
2199		pe = (struct in6_prefix *)(buf + sizeof(buf));
2200
2201		if (p + 1 <= pe) {
2202			bzero(p, sizeof(*p));
2203			sin6 = (struct sockaddr_in6 *)(p + 1);
2204
2205			p->prefix = pr->ndpr_prefix;
2206			if (in6_recoverscope(&p->prefix,
2207			    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
2208				log(LOG_ERR,
2209				    "scope error in prefix list (%s)\n",
2210				    ip6_sprintf(&p->prefix.sin6_addr));
2211			p->raflags = pr->ndpr_raf;
2212			p->prefixlen = pr->ndpr_plen;
2213			p->vltime = pr->ndpr_vltime;
2214			p->pltime = pr->ndpr_pltime;
2215			p->if_index = pr->ndpr_ifp->if_index;
2216			p->expire = pr->ndpr_expire;
2217			p->refcnt = pr->ndpr_refcnt;
2218			p->flags = pr->ndpr_stateflags;
2219			p->origin = PR_ORIG_RA;
2220			advrtrs = 0;
2221			for (pfr = pr->ndpr_advrtrs.lh_first;
2222			     pfr;
2223			     pfr = pfr->pfr_next) {
2224				if ((void *)&sin6[advrtrs + 1] >
2225				    (void *)pe) {
2226					advrtrs++;
2227					continue;
2228				}
2229				s6 = &sin6[advrtrs];
2230				bzero(s6, sizeof(*s6));
2231				s6->sin6_family = AF_INET6;
2232				s6->sin6_len = sizeof(*sin6);
2233				if (in6_recoverscope(s6,
2234				    &pfr->router->rtaddr,
2235				    pfr->router->ifp) != 0)
2236					log(LOG_ERR,
2237					    "scope error in "
2238					    "prefix list (%s)\n",
2239					    ip6_sprintf(&pfr->router->rtaddr));
2240				advrtrs++;
2241			}
2242			p->advrtrs = advrtrs;
2243		} else
2244			panic("buffer too short");
2245
2246		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
2247		error = SYSCTL_OUT(req, buf, advance);
2248		if (error)
2249			break;
2250	}
2251	return error;
2252}
2253