nd6.c revision 121092
1/*	$FreeBSD: head/sys/netinet6/nd6.c 121092 2003-10-14 18:49:08Z sam $	*/
2/*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "opt_inet.h"
34#include "opt_inet6.h"
35#include "opt_mac.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/callout.h>
40#include <sys/mac.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/time.h>
46#include <sys/kernel.h>
47#include <sys/protosw.h>
48#include <sys/errno.h>
49#include <sys/syslog.h>
50#include <sys/queue.h>
51#include <sys/sysctl.h>
52
53#include <net/if.h>
54#include <net/if_dl.h>
55#include <net/if_types.h>
56#include <net/if_atm.h>
57#include <net/iso88025.h>
58#include <net/fddi.h>
59#include <net/route.h>
60
61#include <netinet/in.h>
62#include <netinet/if_ether.h>
63#include <netinet6/in6_var.h>
64#include <netinet/ip6.h>
65#include <netinet6/ip6_var.h>
66#include <netinet6/nd6.h>
67#include <netinet6/in6_prefix.h>
68#include <netinet/icmp6.h>
69
70#include <net/net_osdep.h>
71
72#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
73#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
74
75#define SIN6(s) ((struct sockaddr_in6 *)s)
76#define SDL(s) ((struct sockaddr_dl *)s)
77
78/* timer values */
79int	nd6_prune	= 1;	/* walk list every 1 seconds */
80int	nd6_delay	= 5;	/* delay first probe time 5 second */
81int	nd6_umaxtries	= 3;	/* maximum unicast query */
82int	nd6_mmaxtries	= 3;	/* maximum multicast query */
83int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
84int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
85
86/* preventing too many loops in ND option parsing */
87int nd6_maxndopt = 10;	/* max # of ND options allowed */
88
89int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
90
91#ifdef ND6_DEBUG
92int nd6_debug = 1;
93#else
94int nd6_debug = 0;
95#endif
96
97/* for debugging? */
98static int nd6_inuse, nd6_allocated;
99
100struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
101static size_t nd_ifinfo_indexlim = 8;
102struct nd_ifinfo *nd_ifinfo = NULL;
103struct nd_drhead nd_defrouter;
104struct nd_prhead nd_prefix = { 0 };
105
106int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
107static struct sockaddr_in6 all1_sa;
108
109static void nd6_slowtimo __P((void *));
110static int regen_tmpaddr __P((struct in6_ifaddr *));
111
112struct callout nd6_slowtimo_ch;
113struct callout nd6_timer_ch;
114extern struct callout in6_tmpaddrtimer_ch;
115
116void
117nd6_init()
118{
119	static int nd6_init_done = 0;
120	int i;
121
122	if (nd6_init_done) {
123		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
124		return;
125	}
126
127	all1_sa.sin6_family = AF_INET6;
128	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
129	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
130		all1_sa.sin6_addr.s6_addr[i] = 0xff;
131
132	/* initialization of the default router list */
133	TAILQ_INIT(&nd_defrouter);
134
135	nd6_init_done = 1;
136
137	/* start timer */
138	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
139	    nd6_slowtimo, NULL);
140}
141
142void
143nd6_ifattach(ifp)
144	struct ifnet *ifp;
145{
146
147	/*
148	 * We have some arrays that should be indexed by if_index.
149	 * since if_index will grow dynamically, they should grow too.
150	 */
151	if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
152		size_t n;
153		caddr_t q;
154
155		while (if_index >= nd_ifinfo_indexlim)
156			nd_ifinfo_indexlim <<= 1;
157
158		/* grow nd_ifinfo */
159		n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo);
160		q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK);
161		bzero(q, n);
162		if (nd_ifinfo) {
163			bcopy((caddr_t)nd_ifinfo, q, n/2);
164			free((caddr_t)nd_ifinfo, M_IP6NDP);
165		}
166		nd_ifinfo = (struct nd_ifinfo *)q;
167	}
168
169#define ND nd_ifinfo[ifp->if_index]
170
171	/*
172	 * Don't initialize if called twice.
173	 * XXX: to detect this, we should choose a member that is never set
174	 * before initialization of the ND structure itself.  We formaly used
175	 * the linkmtu member, which was not suitable because it could be
176	 * initialized via "ifconfig mtu".
177	 */
178	if (ND.basereachable)
179		return;
180
181	ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu;
182	ND.chlim = IPV6_DEFHLIM;
183	ND.basereachable = REACHABLE_TIME;
184	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
185	ND.retrans = RETRANS_TIMER;
186	ND.receivedra = 0;
187	/*
188	 * Note that the default value of ip6_accept_rtadv is 0, which means
189	 * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
190	 * here.
191	 */
192	ND.flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
193	nd6_setmtu(ifp);
194#undef ND
195}
196
197/*
198 * Reset ND level link MTU. This function is called when the physical MTU
199 * changes, which means we might have to adjust the ND level MTU.
200 */
201void
202nd6_setmtu(ifp)
203	struct ifnet *ifp;
204{
205	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
206	u_long oldmaxmtu = ndi->maxmtu;
207	u_long oldlinkmtu = ndi->linkmtu;
208
209	switch (ifp->if_type) {
210	case IFT_ARCNET:	/* XXX MTU handling needs more work */
211		ndi->maxmtu = MIN(60480, ifp->if_mtu);
212		break;
213	case IFT_ETHER:
214		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
215		break;
216	case IFT_FDDI:
217		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
218		break;
219	case IFT_ATM:
220		ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu);
221		break;
222	case IFT_IEEE1394:	/* XXX should be IEEE1394MTU(1500) */
223		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
224		break;
225#ifdef IFT_IEEE80211
226	case IFT_IEEE80211:	/* XXX should be IEEE80211MTU(1500) */
227		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
228		break;
229#endif
230	 case IFT_ISO88025:
231		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
232		 break;
233	default:
234		ndi->maxmtu = ifp->if_mtu;
235		break;
236	}
237
238	if (oldmaxmtu != ndi->maxmtu) {
239		/*
240		 * If the ND level MTU is not set yet, or if the maxmtu
241		 * is reset to a smaller value than the ND level MTU,
242		 * also reset the ND level MTU.
243		 */
244		if (ndi->linkmtu == 0 ||
245		    ndi->maxmtu < ndi->linkmtu) {
246			ndi->linkmtu = ndi->maxmtu;
247			/* also adjust in6_maxmtu if necessary. */
248			if (oldlinkmtu == 0) {
249				/*
250				 * XXX: the case analysis is grotty, but
251				 * it is not efficient to call in6_setmaxmtu()
252				 * here when we are during the initialization
253				 * procedure.
254				 */
255				if (in6_maxmtu < ndi->linkmtu)
256					in6_maxmtu = ndi->linkmtu;
257			} else
258				in6_setmaxmtu();
259		}
260	}
261#undef MIN
262}
263
264void
265nd6_option_init(opt, icmp6len, ndopts)
266	void *opt;
267	int icmp6len;
268	union nd_opts *ndopts;
269{
270
271	bzero(ndopts, sizeof(*ndopts));
272	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
273	ndopts->nd_opts_last
274		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
275
276	if (icmp6len == 0) {
277		ndopts->nd_opts_done = 1;
278		ndopts->nd_opts_search = NULL;
279	}
280}
281
282/*
283 * Take one ND option.
284 */
285struct nd_opt_hdr *
286nd6_option(ndopts)
287	union nd_opts *ndopts;
288{
289	struct nd_opt_hdr *nd_opt;
290	int olen;
291
292	if (!ndopts)
293		panic("ndopts == NULL in nd6_option");
294	if (!ndopts->nd_opts_last)
295		panic("uninitialized ndopts in nd6_option");
296	if (!ndopts->nd_opts_search)
297		return NULL;
298	if (ndopts->nd_opts_done)
299		return NULL;
300
301	nd_opt = ndopts->nd_opts_search;
302
303	/* make sure nd_opt_len is inside the buffer */
304	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
305		bzero(ndopts, sizeof(*ndopts));
306		return NULL;
307	}
308
309	olen = nd_opt->nd_opt_len << 3;
310	if (olen == 0) {
311		/*
312		 * Message validation requires that all included
313		 * options have a length that is greater than zero.
314		 */
315		bzero(ndopts, sizeof(*ndopts));
316		return NULL;
317	}
318
319	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
320	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
321		/* option overruns the end of buffer, invalid */
322		bzero(ndopts, sizeof(*ndopts));
323		return NULL;
324	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
325		/* reached the end of options chain */
326		ndopts->nd_opts_done = 1;
327		ndopts->nd_opts_search = NULL;
328	}
329	return nd_opt;
330}
331
332/*
333 * Parse multiple ND options.
334 * This function is much easier to use, for ND routines that do not need
335 * multiple options of the same type.
336 */
337int
338nd6_options(ndopts)
339	union nd_opts *ndopts;
340{
341	struct nd_opt_hdr *nd_opt;
342	int i = 0;
343
344	if (!ndopts)
345		panic("ndopts == NULL in nd6_options");
346	if (!ndopts->nd_opts_last)
347		panic("uninitialized ndopts in nd6_options");
348	if (!ndopts->nd_opts_search)
349		return 0;
350
351	while (1) {
352		nd_opt = nd6_option(ndopts);
353		if (!nd_opt && !ndopts->nd_opts_last) {
354			/*
355			 * Message validation requires that all included
356			 * options have a length that is greater than zero.
357			 */
358			icmp6stat.icp6s_nd_badopt++;
359			bzero(ndopts, sizeof(*ndopts));
360			return -1;
361		}
362
363		if (!nd_opt)
364			goto skip1;
365
366		switch (nd_opt->nd_opt_type) {
367		case ND_OPT_SOURCE_LINKADDR:
368		case ND_OPT_TARGET_LINKADDR:
369		case ND_OPT_MTU:
370		case ND_OPT_REDIRECTED_HEADER:
371			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
372				nd6log((LOG_INFO,
373				    "duplicated ND6 option found (type=%d)\n",
374				    nd_opt->nd_opt_type));
375				/* XXX bark? */
376			} else {
377				ndopts->nd_opt_array[nd_opt->nd_opt_type]
378					= nd_opt;
379			}
380			break;
381		case ND_OPT_PREFIX_INFORMATION:
382			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
383				ndopts->nd_opt_array[nd_opt->nd_opt_type]
384					= nd_opt;
385			}
386			ndopts->nd_opts_pi_end =
387				(struct nd_opt_prefix_info *)nd_opt;
388			break;
389		default:
390			/*
391			 * Unknown options must be silently ignored,
392			 * to accomodate future extension to the protocol.
393			 */
394			nd6log((LOG_DEBUG,
395			    "nd6_options: unsupported option %d - "
396			    "option ignored\n", nd_opt->nd_opt_type));
397		}
398
399skip1:
400		i++;
401		if (i > nd6_maxndopt) {
402			icmp6stat.icp6s_nd_toomanyopt++;
403			nd6log((LOG_INFO, "too many loop in nd opt\n"));
404			break;
405		}
406
407		if (ndopts->nd_opts_done)
408			break;
409	}
410
411	return 0;
412}
413
414/*
415 * ND6 timer routine to expire default route list and prefix list
416 */
417void
418nd6_timer(ignored_arg)
419	void	*ignored_arg;
420{
421	int s;
422	struct llinfo_nd6 *ln;
423	struct nd_defrouter *dr;
424	struct nd_prefix *pr;
425	struct ifnet *ifp;
426	struct in6_ifaddr *ia6, *nia6;
427	struct in6_addrlifetime *lt6;
428
429	s = splnet();
430	callout_reset(&nd6_timer_ch, nd6_prune * hz,
431	    nd6_timer, NULL);
432
433	ln = llinfo_nd6.ln_next;
434	while (ln && ln != &llinfo_nd6) {
435		struct rtentry *rt;
436		struct sockaddr_in6 *dst;
437		struct llinfo_nd6 *next = ln->ln_next;
438		/* XXX: used for the DELAY case only: */
439		struct nd_ifinfo *ndi = NULL;
440
441		if ((rt = ln->ln_rt) == NULL) {
442			ln = next;
443			continue;
444		}
445		if ((ifp = rt->rt_ifp) == NULL) {
446			ln = next;
447			continue;
448		}
449		ndi = &nd_ifinfo[ifp->if_index];
450		dst = (struct sockaddr_in6 *)rt_key(rt);
451
452		if (ln->ln_expire > time_second) {
453			ln = next;
454			continue;
455		}
456
457		/* sanity check */
458		if (!rt)
459			panic("rt=0 in nd6_timer(ln=%p)", ln);
460		if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
461			panic("rt_llinfo(%p) is not equal to ln(%p)",
462			      rt->rt_llinfo, ln);
463		if (!dst)
464			panic("dst=0 in nd6_timer(ln=%p)", ln);
465
466		switch (ln->ln_state) {
467		case ND6_LLINFO_INCOMPLETE:
468			if (ln->ln_asked < nd6_mmaxtries) {
469				ln->ln_asked++;
470				ln->ln_expire = time_second +
471					nd_ifinfo[ifp->if_index].retrans / 1000;
472				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
473					ln, 0);
474			} else {
475				struct mbuf *m = ln->ln_hold;
476				if (m) {
477					if (rt->rt_ifp) {
478						/*
479						 * Fake rcvif to make ICMP error
480						 * more helpful in diagnosing
481						 * for the receiver.
482						 * XXX: should we consider
483						 * older rcvif?
484						 */
485						m->m_pkthdr.rcvif = rt->rt_ifp;
486					}
487					icmp6_error(m, ICMP6_DST_UNREACH,
488						    ICMP6_DST_UNREACH_ADDR, 0);
489					ln->ln_hold = NULL;
490				}
491				next = nd6_free(rt);
492			}
493			break;
494		case ND6_LLINFO_REACHABLE:
495			if (ln->ln_expire) {
496				ln->ln_state = ND6_LLINFO_STALE;
497				ln->ln_expire = time_second + nd6_gctimer;
498			}
499			break;
500
501		case ND6_LLINFO_STALE:
502			/* Garbage Collection(RFC 2461 5.3) */
503			if (ln->ln_expire)
504				next = nd6_free(rt);
505			break;
506
507		case ND6_LLINFO_DELAY:
508			if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
509				/* We need NUD */
510				ln->ln_asked = 1;
511				ln->ln_state = ND6_LLINFO_PROBE;
512				ln->ln_expire = time_second +
513					ndi->retrans / 1000;
514				nd6_ns_output(ifp, &dst->sin6_addr,
515					      &dst->sin6_addr,
516					      ln, 0);
517			} else {
518				ln->ln_state = ND6_LLINFO_STALE; /* XXX */
519				ln->ln_expire = time_second + nd6_gctimer;
520			}
521			break;
522		case ND6_LLINFO_PROBE:
523			if (ln->ln_asked < nd6_umaxtries) {
524				ln->ln_asked++;
525				ln->ln_expire = time_second +
526					nd_ifinfo[ifp->if_index].retrans / 1000;
527				nd6_ns_output(ifp, &dst->sin6_addr,
528					       &dst->sin6_addr, ln, 0);
529			} else {
530				next = nd6_free(rt);
531			}
532			break;
533		}
534		ln = next;
535	}
536
537	/* expire default router list */
538	dr = TAILQ_FIRST(&nd_defrouter);
539	while (dr) {
540		if (dr->expire && dr->expire < time_second) {
541			struct nd_defrouter *t;
542			t = TAILQ_NEXT(dr, dr_entry);
543			defrtrlist_del(dr);
544			dr = t;
545		} else {
546			dr = TAILQ_NEXT(dr, dr_entry);
547		}
548	}
549
550	/*
551	 * expire interface addresses.
552	 * in the past the loop was inside prefix expiry processing.
553	 * However, from a stricter speci-confrmance standpoint, we should
554	 * rather separate address lifetimes and prefix lifetimes.
555	 */
556  addrloop:
557	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
558		nia6 = ia6->ia_next;
559		/* check address lifetime */
560		lt6 = &ia6->ia6_lifetime;
561		if (IFA6_IS_INVALID(ia6)) {
562			int regen = 0;
563
564			/*
565			 * If the expiring address is temporary, try
566			 * regenerating a new one.  This would be useful when
567			 * we suspended a laptop PC, then turned it on after a
568			 * period that could invalidate all temporary
569			 * addresses.  Although we may have to restart the
570			 * loop (see below), it must be after purging the
571			 * address.  Otherwise, we'd see an infinite loop of
572			 * regeneration.
573			 */
574			if (ip6_use_tempaddr &&
575			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
576				if (regen_tmpaddr(ia6) == 0)
577					regen = 1;
578			}
579
580			in6_purgeaddr(&ia6->ia_ifa);
581
582			if (regen)
583				goto addrloop; /* XXX: see below */
584		}
585		if (IFA6_IS_DEPRECATED(ia6)) {
586			int oldflags = ia6->ia6_flags;
587
588			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
589
590			/*
591			 * If a temporary address has just become deprecated,
592			 * regenerate a new one if possible.
593			 */
594			if (ip6_use_tempaddr &&
595			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
596			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
597
598				if (regen_tmpaddr(ia6) == 0) {
599					/*
600					 * A new temporary address is
601					 * generated.
602					 * XXX: this means the address chain
603					 * has changed while we are still in
604					 * the loop.  Although the change
605					 * would not cause disaster (because
606					 * it's not a deletion, but an
607					 * addition,) we'd rather restart the
608					 * loop just for safety.  Or does this
609					 * significantly reduce performance??
610					 */
611					goto addrloop;
612				}
613			}
614		} else {
615			/*
616			 * A new RA might have made a deprecated address
617			 * preferred.
618			 */
619			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
620		}
621	}
622
623	/* expire prefix list */
624	pr = nd_prefix.lh_first;
625	while (pr) {
626		/*
627		 * check prefix lifetime.
628		 * since pltime is just for autoconf, pltime processing for
629		 * prefix is not necessary.
630		 */
631		if (pr->ndpr_expire && pr->ndpr_expire < time_second) {
632			struct nd_prefix *t;
633			t = pr->ndpr_next;
634
635			/*
636			 * address expiration and prefix expiration are
637			 * separate.  NEVER perform in6_purgeaddr here.
638			 */
639
640			prelist_remove(pr);
641			pr = t;
642		} else
643			pr = pr->ndpr_next;
644	}
645	splx(s);
646}
647
648static int
649regen_tmpaddr(ia6)
650	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
651{
652	struct ifaddr *ifa;
653	struct ifnet *ifp;
654	struct in6_ifaddr *public_ifa6 = NULL;
655
656	ifp = ia6->ia_ifa.ifa_ifp;
657	for (ifa = ifp->if_addrlist.tqh_first; ifa;
658	     ifa = ifa->ifa_list.tqe_next) {
659		struct in6_ifaddr *it6;
660
661		if (ifa->ifa_addr->sa_family != AF_INET6)
662			continue;
663
664		it6 = (struct in6_ifaddr *)ifa;
665
666		/* ignore no autoconf addresses. */
667		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
668			continue;
669
670		/* ignore autoconf addresses with different prefixes. */
671		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
672			continue;
673
674		/*
675		 * Now we are looking at an autoconf address with the same
676		 * prefix as ours.  If the address is temporary and is still
677		 * preferred, do not create another one.  It would be rare, but
678		 * could happen, for example, when we resume a laptop PC after
679		 * a long period.
680		 */
681		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
682		    !IFA6_IS_DEPRECATED(it6)) {
683			public_ifa6 = NULL;
684			break;
685		}
686
687		/*
688		 * This is a public autoconf address that has the same prefix
689		 * as ours.  If it is preferred, keep it.  We can't break the
690		 * loop here, because there may be a still-preferred temporary
691		 * address with the prefix.
692		 */
693		if (!IFA6_IS_DEPRECATED(it6))
694		    public_ifa6 = it6;
695	}
696
697	if (public_ifa6 != NULL) {
698		int e;
699
700		if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
701			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
702			    " tmp addr,errno=%d\n", e);
703			return (-1);
704		}
705		return (0);
706	}
707
708	return (-1);
709}
710
711/*
712 * Nuke neighbor cache/prefix/default router management table, right before
713 * ifp goes away.
714 */
715void
716nd6_purge(ifp)
717	struct ifnet *ifp;
718{
719	struct llinfo_nd6 *ln, *nln;
720	struct nd_defrouter *dr, *ndr, drany;
721	struct nd_prefix *pr, *npr;
722
723	/* Nuke default router list entries toward ifp */
724	if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
725		/*
726		 * The first entry of the list may be stored in
727		 * the routing table, so we'll delete it later.
728		 */
729		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
730			ndr = TAILQ_NEXT(dr, dr_entry);
731			if (dr->ifp == ifp)
732				defrtrlist_del(dr);
733		}
734		dr = TAILQ_FIRST(&nd_defrouter);
735		if (dr->ifp == ifp)
736			defrtrlist_del(dr);
737	}
738
739	/* Nuke prefix list entries toward ifp */
740	for (pr = nd_prefix.lh_first; pr; pr = npr) {
741		npr = pr->ndpr_next;
742		if (pr->ndpr_ifp == ifp) {
743			/*
744			 * Previously, pr->ndpr_addr is removed as well,
745			 * but I strongly believe we don't have to do it.
746			 * nd6_purge() is only called from in6_ifdetach(),
747			 * which removes all the associated interface addresses
748			 * by itself.
749			 * (jinmei@kame.net 20010129)
750			 */
751			prelist_remove(pr);
752		}
753	}
754
755	/* cancel default outgoing interface setting */
756	if (nd6_defifindex == ifp->if_index)
757		nd6_setdefaultiface(0);
758
759	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
760		/* refresh default router list */
761		bzero(&drany, sizeof(drany));
762		defrouter_delreq(&drany, 0);
763		defrouter_select();
764	}
765
766	/*
767	 * Nuke neighbor cache entries for the ifp.
768	 * Note that rt->rt_ifp may not be the same as ifp,
769	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
770	 * nd6_rtrequest(), and ip6_input().
771	 */
772	ln = llinfo_nd6.ln_next;
773	while (ln && ln != &llinfo_nd6) {
774		struct rtentry *rt;
775		struct sockaddr_dl *sdl;
776
777		nln = ln->ln_next;
778		rt = ln->ln_rt;
779		if (rt && rt->rt_gateway &&
780		    rt->rt_gateway->sa_family == AF_LINK) {
781			sdl = (struct sockaddr_dl *)rt->rt_gateway;
782			if (sdl->sdl_index == ifp->if_index)
783				nln = nd6_free(rt);
784		}
785		ln = nln;
786	}
787}
788
789struct rtentry *
790nd6_lookup(addr6, create, ifp)
791	struct in6_addr *addr6;
792	int create;
793	struct ifnet *ifp;
794{
795	struct rtentry *rt;
796	struct sockaddr_in6 sin6;
797
798	bzero(&sin6, sizeof(sin6));
799	sin6.sin6_len = sizeof(struct sockaddr_in6);
800	sin6.sin6_family = AF_INET6;
801	sin6.sin6_addr = *addr6;
802	rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL);
803	if (rt) {
804		if ((rt->rt_flags & RTF_LLINFO) == 0 && create) {
805			/*
806			 * This is the case for the default route.
807			 * If we want to create a neighbor cache for the
808			 * address, we should free the route for the
809			 * destination and allocate an interface route.
810			 */
811			RTFREE_LOCKED(rt);
812			rt = 0;
813		}
814	}
815	if (!rt) {
816		if (create && ifp) {
817			int e;
818
819			/*
820			 * If no route is available and create is set,
821			 * we allocate a host route for the destination
822			 * and treat it like an interface route.
823			 * This hack is necessary for a neighbor which can't
824			 * be covered by our own prefix.
825			 */
826			struct ifaddr *ifa =
827			    ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
828			if (ifa == NULL)
829				return (NULL);
830
831			/*
832			 * Create a new route.  RTF_LLINFO is necessary
833			 * to create a Neighbor Cache entry for the
834			 * destination in nd6_rtrequest which will be
835			 * called in rtrequest via ifa->ifa_rtrequest.
836			 */
837			if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6,
838			    ifa->ifa_addr, (struct sockaddr *)&all1_sa,
839			    (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) &
840			    ~RTF_CLONING, &rt)) != 0) {
841				log(LOG_ERR,
842				    "nd6_lookup: failed to add route for a "
843				    "neighbor(%s), errno=%d\n",
844				    ip6_sprintf(addr6), e);
845			}
846			if (rt == NULL)
847				return (NULL);
848			RT_LOCK(rt);
849			if (rt->rt_llinfo) {
850				struct llinfo_nd6 *ln =
851				    (struct llinfo_nd6 *)rt->rt_llinfo;
852				ln->ln_state = ND6_LLINFO_NOSTATE;
853			}
854		} else
855			return (NULL);
856	}
857	RT_LOCK_ASSERT(rt);
858	rt->rt_refcnt--;
859	/*
860	 * Validation for the entry.
861	 * Note that the check for rt_llinfo is necessary because a cloned
862	 * route from a parent route that has the L flag (e.g. the default
863	 * route to a p2p interface) may have the flag, too, while the
864	 * destination is not actually a neighbor.
865	 * XXX: we can't use rt->rt_ifp to check for the interface, since
866	 *      it might be the loopback interface if the entry is for our
867	 *      own address on a non-loopback interface. Instead, we should
868	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
869	 *	interface.
870	 */
871	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
872	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
873	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
874		if (create) {
875			log(LOG_DEBUG,
876			    "nd6_lookup: failed to lookup %s (if = %s)\n",
877			    ip6_sprintf(addr6),
878			    ifp ? if_name(ifp) : "unspec");
879			/* xxx more logs... kazu */
880		}
881		RT_UNLOCK(rt);
882		return (NULL);
883	}
884	RT_UNLOCK(rt);		/* XXX not ready to return rt locked */
885	return (rt);
886}
887
888/*
889 * Detect if a given IPv6 address identifies a neighbor on a given link.
890 * XXX: should take care of the destination of a p2p link?
891 */
892int
893nd6_is_addr_neighbor(addr, ifp)
894	struct sockaddr_in6 *addr;
895	struct ifnet *ifp;
896{
897	struct ifaddr *ifa;
898	int i;
899
900#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
901#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
902
903	/*
904	 * A link-local address is always a neighbor.
905	 * XXX: we should use the sin6_scope_id field rather than the embedded
906	 * interface index.
907	 */
908	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
909	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
910		return (1);
911
912	/*
913	 * If the address matches one of our addresses,
914	 * it should be a neighbor.
915	 */
916	for (ifa = ifp->if_addrlist.tqh_first; ifa;
917	     ifa = ifa->ifa_list.tqe_next) {
918		if (ifa->ifa_addr->sa_family != AF_INET6)
919			next: continue;
920
921		for (i = 0; i < 4; i++) {
922			if ((IFADDR6(ifa).s6_addr32[i] ^
923			     addr->sin6_addr.s6_addr32[i]) &
924			    IFMASK6(ifa).s6_addr32[i])
925				goto next;
926		}
927		return (1);
928	}
929
930	/*
931	 * Even if the address matches none of our addresses, it might be
932	 * in the neighbor cache.
933	 */
934	if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL)
935		return (1);
936
937	return (0);
938#undef IFADDR6
939#undef IFMASK6
940}
941
942/*
943 * Free an nd6 llinfo entry.
944 */
945struct llinfo_nd6 *
946nd6_free(rt)
947	struct rtentry *rt;
948{
949	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
950	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
951	struct nd_defrouter *dr;
952
953	/*
954	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
955	 * even though it is not harmful, it was not really necessary.
956	 */
957
958	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
959		int s;
960		s = splnet();
961		dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
962		    rt->rt_ifp);
963
964		if (ln->ln_router || dr) {
965			/*
966			 * rt6_flush must be called whether or not the neighbor
967			 * is in the Default Router List.
968			 * See a corresponding comment in nd6_na_input().
969			 */
970			rt6_flush(&in6, rt->rt_ifp);
971		}
972
973		if (dr) {
974			/*
975			 * Unreachablity of a router might affect the default
976			 * router selection and on-link detection of advertised
977			 * prefixes.
978			 */
979
980			/*
981			 * Temporarily fake the state to choose a new default
982			 * router and to perform on-link determination of
983			 * prefixes correctly.
984			 * Below the state will be set correctly,
985			 * or the entry itself will be deleted.
986			 */
987			ln->ln_state = ND6_LLINFO_INCOMPLETE;
988
989			/*
990			 * Since defrouter_select() does not affect the
991			 * on-link determination and MIP6 needs the check
992			 * before the default router selection, we perform
993			 * the check now.
994			 */
995			pfxlist_onlink_check();
996
997			if (dr == TAILQ_FIRST(&nd_defrouter)) {
998				/*
999				 * It is used as the current default router,
1000				 * so we have to move it to the end of the
1001				 * list and choose a new one.
1002				 * XXX: it is not very efficient if this is
1003				 *      the only router.
1004				 */
1005				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1006				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
1007
1008				defrouter_select();
1009			}
1010		}
1011		splx(s);
1012	}
1013
1014	/*
1015	 * Before deleting the entry, remember the next entry as the
1016	 * return value.  We need this because pfxlist_onlink_check() above
1017	 * might have freed other entries (particularly the old next entry) as
1018	 * a side effect (XXX).
1019	 */
1020	next = ln->ln_next;
1021
1022	/*
1023	 * Detach the route from the routing tree and the list of neighbor
1024	 * caches, and disable the route entry not to be used in already
1025	 * cached routes.
1026	 */
1027	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1028	    rt_mask(rt), 0, (struct rtentry **)0);
1029
1030	return (next);
1031}
1032
1033/*
1034 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1035 *
1036 * XXX cost-effective metods?
1037 */
1038void
1039nd6_nud_hint(rt, dst6, force)
1040	struct rtentry *rt;
1041	struct in6_addr *dst6;
1042	int force;
1043{
1044	struct llinfo_nd6 *ln;
1045
1046	/*
1047	 * If the caller specified "rt", use that.  Otherwise, resolve the
1048	 * routing table by supplied "dst6".
1049	 */
1050	if (!rt) {
1051		if (!dst6)
1052			return;
1053		if (!(rt = nd6_lookup(dst6, 0, NULL)))
1054			return;
1055	}
1056
1057	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1058	    (rt->rt_flags & RTF_LLINFO) == 0 ||
1059	    !rt->rt_llinfo || !rt->rt_gateway ||
1060	    rt->rt_gateway->sa_family != AF_LINK) {
1061		/* This is not a host route. */
1062		return;
1063	}
1064
1065	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1066	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1067		return;
1068
1069	/*
1070	 * if we get upper-layer reachability confirmation many times,
1071	 * it is possible we have false information.
1072	 */
1073	if (!force) {
1074		ln->ln_byhint++;
1075		if (ln->ln_byhint > nd6_maxnudhint)
1076			return;
1077	}
1078
1079	ln->ln_state = ND6_LLINFO_REACHABLE;
1080	if (ln->ln_expire)
1081		ln->ln_expire = time_second +
1082			nd_ifinfo[rt->rt_ifp->if_index].reachable;
1083}
1084
1085void
1086nd6_rtrequest(req, rt, info)
1087	int	req;
1088	struct rtentry *rt;
1089	struct rt_addrinfo *info; /* xxx unused */
1090{
1091	struct sockaddr *gate = rt->rt_gateway;
1092	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1093	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1094	struct ifnet *ifp = rt->rt_ifp;
1095	struct ifaddr *ifa;
1096
1097	RT_LOCK_ASSERT(rt);
1098
1099	if ((rt->rt_flags & RTF_GATEWAY) != 0)
1100		return;
1101
1102	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1103		/*
1104		 * This is probably an interface direct route for a link
1105		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1106		 * We do not need special treatment below for such a route.
1107		 * Moreover, the RTF_LLINFO flag which would be set below
1108		 * would annoy the ndp(8) command.
1109		 */
1110		return;
1111	}
1112
1113	if (req == RTM_RESOLVE &&
1114	    (nd6_need_cache(ifp) == 0 || /* stf case */
1115	     !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) {
1116		/*
1117		 * FreeBSD and BSD/OS often make a cloned host route based
1118		 * on a less-specific route (e.g. the default route).
1119		 * If the less specific route does not have a "gateway"
1120		 * (this is the case when the route just goes to a p2p or an
1121		 * stf interface), we'll mistakenly make a neighbor cache for
1122		 * the host route, and will see strange neighbor solicitation
1123		 * for the corresponding destination.  In order to avoid the
1124		 * confusion, we check if the destination of the route is
1125		 * a neighbor in terms of neighbor discovery, and stop the
1126		 * process if not.  Additionally, we remove the LLINFO flag
1127		 * so that ndp(8) will not try to get the neighbor information
1128		 * of the destination.
1129		 */
1130		rt->rt_flags &= ~RTF_LLINFO;
1131		return;
1132	}
1133
1134	switch (req) {
1135	case RTM_ADD:
1136		/*
1137		 * There is no backward compatibility :)
1138		 *
1139		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1140		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1141		 *	   rt->rt_flags |= RTF_CLONING;
1142		 */
1143		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
1144			/*
1145			 * Case 1: This route should come from
1146			 * a route to interface.  RTF_LLINFO flag is set
1147			 * for a host route whose destination should be
1148			 * treated as on-link.
1149			 */
1150			rt_setgate(rt, rt_key(rt),
1151				   (struct sockaddr *)&null_sdl);
1152			gate = rt->rt_gateway;
1153			SDL(gate)->sdl_type = ifp->if_type;
1154			SDL(gate)->sdl_index = ifp->if_index;
1155			if (ln)
1156				ln->ln_expire = time_second;
1157			if (ln && ln->ln_expire == 0) {
1158				/* kludge for desktops */
1159				ln->ln_expire = 1;
1160			}
1161			if ((rt->rt_flags & RTF_CLONING) != 0)
1162				break;
1163		}
1164		/*
1165		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1166		 * We don't do that here since llinfo is not ready yet.
1167		 *
1168		 * There are also couple of other things to be discussed:
1169		 * - unsolicited NA code needs improvement beforehand
1170		 * - RFC2461 says we MAY send multicast unsolicited NA
1171		 *   (7.2.6 paragraph 4), however, it also says that we
1172		 *   SHOULD provide a mechanism to prevent multicast NA storm.
1173		 *   we don't have anything like it right now.
1174		 *   note that the mechanism needs a mutual agreement
1175		 *   between proxies, which means that we need to implement
1176		 *   a new protocol, or a new kludge.
1177		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1178		 *   we need to check ip6forwarding before sending it.
1179		 *   (or should we allow proxy ND configuration only for
1180		 *   routers?  there's no mention about proxy ND from hosts)
1181		 */
1182#if 0
1183		/* XXX it does not work */
1184		if (rt->rt_flags & RTF_ANNOUNCE)
1185			nd6_na_output(ifp,
1186			      &SIN6(rt_key(rt))->sin6_addr,
1187			      &SIN6(rt_key(rt))->sin6_addr,
1188			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1189			      1, NULL);
1190#endif
1191		/* FALLTHROUGH */
1192	case RTM_RESOLVE:
1193		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1194			/*
1195			 * Address resolution isn't necessary for a point to
1196			 * point link, so we can skip this test for a p2p link.
1197			 */
1198			if (gate->sa_family != AF_LINK ||
1199			    gate->sa_len < sizeof(null_sdl)) {
1200				log(LOG_DEBUG,
1201				    "nd6_rtrequest: bad gateway value: %s\n",
1202				    if_name(ifp));
1203				break;
1204			}
1205			SDL(gate)->sdl_type = ifp->if_type;
1206			SDL(gate)->sdl_index = ifp->if_index;
1207		}
1208		if (ln != NULL)
1209			break;	/* This happens on a route change */
1210		/*
1211		 * Case 2: This route may come from cloning, or a manual route
1212		 * add with a LL address.
1213		 */
1214		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
1215		rt->rt_llinfo = (caddr_t)ln;
1216		if (!ln) {
1217			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1218			break;
1219		}
1220		nd6_inuse++;
1221		nd6_allocated++;
1222		Bzero(ln, sizeof(*ln));
1223		ln->ln_rt = rt;
1224		/* this is required for "ndp" command. - shin */
1225		if (req == RTM_ADD) {
1226		        /*
1227			 * gate should have some valid AF_LINK entry,
1228			 * and ln->ln_expire should have some lifetime
1229			 * which is specified by ndp command.
1230			 */
1231			ln->ln_state = ND6_LLINFO_REACHABLE;
1232			ln->ln_byhint = 0;
1233		} else {
1234		        /*
1235			 * When req == RTM_RESOLVE, rt is created and
1236			 * initialized in rtrequest(), so rt_expire is 0.
1237			 */
1238			ln->ln_state = ND6_LLINFO_NOSTATE;
1239			ln->ln_expire = time_second;
1240		}
1241		rt->rt_flags |= RTF_LLINFO;
1242		ln->ln_next = llinfo_nd6.ln_next;
1243		llinfo_nd6.ln_next = ln;
1244		ln->ln_prev = &llinfo_nd6;
1245		ln->ln_next->ln_prev = ln;
1246
1247		/*
1248		 * check if rt_key(rt) is one of my address assigned
1249		 * to the interface.
1250		 */
1251		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1252		    &SIN6(rt_key(rt))->sin6_addr);
1253		if (ifa) {
1254			caddr_t macp = nd6_ifptomac(ifp);
1255			ln->ln_expire = 0;
1256			ln->ln_state = ND6_LLINFO_REACHABLE;
1257			ln->ln_byhint = 0;
1258			if (macp) {
1259				Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1260				SDL(gate)->sdl_alen = ifp->if_addrlen;
1261			}
1262			if (nd6_useloopback) {
1263				rt->rt_ifp = &loif[0];	/* XXX */
1264				/*
1265				 * Make sure rt_ifa be equal to the ifaddr
1266				 * corresponding to the address.
1267				 * We need this because when we refer
1268				 * rt_ifa->ia6_flags in ip6_input, we assume
1269				 * that the rt_ifa points to the address instead
1270				 * of the loopback address.
1271				 */
1272				if (ifa != rt->rt_ifa) {
1273					IFAFREE(rt->rt_ifa);
1274					IFAREF(ifa);
1275					rt->rt_ifa = ifa;
1276				}
1277			}
1278		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1279			ln->ln_expire = 0;
1280			ln->ln_state = ND6_LLINFO_REACHABLE;
1281			ln->ln_byhint = 0;
1282
1283			/* join solicited node multicast for proxy ND */
1284			if (ifp->if_flags & IFF_MULTICAST) {
1285				struct in6_addr llsol;
1286				int error;
1287
1288				llsol = SIN6(rt_key(rt))->sin6_addr;
1289				llsol.s6_addr16[0] = htons(0xff02);
1290				llsol.s6_addr16[1] = htons(ifp->if_index);
1291				llsol.s6_addr32[1] = 0;
1292				llsol.s6_addr32[2] = htonl(1);
1293				llsol.s6_addr8[12] = 0xff;
1294
1295				if (!in6_addmulti(&llsol, ifp, &error)) {
1296					nd6log((LOG_ERR, "%s: failed to join "
1297					    "%s (errno=%d)\n", if_name(ifp),
1298					    ip6_sprintf(&llsol), error));
1299				}
1300			}
1301		}
1302		break;
1303
1304	case RTM_DELETE:
1305		if (!ln)
1306			break;
1307		/* leave from solicited node multicast for proxy ND */
1308		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1309		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1310			struct in6_addr llsol;
1311			struct in6_multi *in6m;
1312
1313			llsol = SIN6(rt_key(rt))->sin6_addr;
1314			llsol.s6_addr16[0] = htons(0xff02);
1315			llsol.s6_addr16[1] = htons(ifp->if_index);
1316			llsol.s6_addr32[1] = 0;
1317			llsol.s6_addr32[2] = htonl(1);
1318			llsol.s6_addr8[12] = 0xff;
1319
1320			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1321			if (in6m)
1322				in6_delmulti(in6m);
1323		}
1324		nd6_inuse--;
1325		ln->ln_next->ln_prev = ln->ln_prev;
1326		ln->ln_prev->ln_next = ln->ln_next;
1327		ln->ln_prev = NULL;
1328		rt->rt_llinfo = 0;
1329		rt->rt_flags &= ~RTF_LLINFO;
1330		if (ln->ln_hold)
1331			m_freem(ln->ln_hold);
1332		Free((caddr_t)ln);
1333	}
1334}
1335
1336int
1337nd6_ioctl(cmd, data, ifp)
1338	u_long cmd;
1339	caddr_t	data;
1340	struct ifnet *ifp;
1341{
1342	struct in6_drlist *drl = (struct in6_drlist *)data;
1343	struct in6_prlist *prl = (struct in6_prlist *)data;
1344	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1345	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1346	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1347	struct nd_defrouter *dr, any;
1348	struct nd_prefix *pr;
1349	struct rtentry *rt;
1350	int i = 0, error = 0;
1351	int s;
1352
1353	switch (cmd) {
1354	case SIOCGDRLST_IN6:
1355		/*
1356		 * obsolete API, use sysctl under net.inet6.icmp6
1357		 */
1358		bzero(drl, sizeof(*drl));
1359		s = splnet();
1360		dr = TAILQ_FIRST(&nd_defrouter);
1361		while (dr && i < DRLSTSIZ) {
1362			drl->defrouter[i].rtaddr = dr->rtaddr;
1363			if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) {
1364				/* XXX: need to this hack for KAME stack */
1365				drl->defrouter[i].rtaddr.s6_addr16[1] = 0;
1366			} else
1367				log(LOG_ERR,
1368				    "default router list contains a "
1369				    "non-linklocal address(%s)\n",
1370				    ip6_sprintf(&drl->defrouter[i].rtaddr));
1371
1372			drl->defrouter[i].flags = dr->flags;
1373			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1374			drl->defrouter[i].expire = dr->expire;
1375			drl->defrouter[i].if_index = dr->ifp->if_index;
1376			i++;
1377			dr = TAILQ_NEXT(dr, dr_entry);
1378		}
1379		splx(s);
1380		break;
1381	case SIOCGPRLST_IN6:
1382		/*
1383		 * obsolete API, use sysctl under net.inet6.icmp6
1384		 */
1385		/*
1386		 * XXX meaning of fields, especialy "raflags", is very
1387		 * differnet between RA prefix list and RR/static prefix list.
1388		 * how about separating ioctls into two?
1389		 */
1390		bzero(prl, sizeof(*prl));
1391		s = splnet();
1392		pr = nd_prefix.lh_first;
1393		while (pr && i < PRLSTSIZ) {
1394			struct nd_pfxrouter *pfr;
1395			int j;
1396
1397			(void)in6_embedscope(&prl->prefix[i].prefix,
1398			    &pr->ndpr_prefix, NULL, NULL);
1399			prl->prefix[i].raflags = pr->ndpr_raf;
1400			prl->prefix[i].prefixlen = pr->ndpr_plen;
1401			prl->prefix[i].vltime = pr->ndpr_vltime;
1402			prl->prefix[i].pltime = pr->ndpr_pltime;
1403			prl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1404			prl->prefix[i].expire = pr->ndpr_expire;
1405
1406			pfr = pr->ndpr_advrtrs.lh_first;
1407			j = 0;
1408			while (pfr) {
1409				if (j < DRLSTSIZ) {
1410#define RTRADDR prl->prefix[i].advrtr[j]
1411					RTRADDR = pfr->router->rtaddr;
1412					if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1413						/* XXX: hack for KAME */
1414						RTRADDR.s6_addr16[1] = 0;
1415					} else
1416						log(LOG_ERR,
1417						    "a router(%s) advertises "
1418						    "a prefix with "
1419						    "non-link local address\n",
1420						    ip6_sprintf(&RTRADDR));
1421#undef RTRADDR
1422				}
1423				j++;
1424				pfr = pfr->pfr_next;
1425			}
1426			prl->prefix[i].advrtrs = j;
1427			prl->prefix[i].origin = PR_ORIG_RA;
1428
1429			i++;
1430			pr = pr->ndpr_next;
1431		}
1432	      {
1433		struct rr_prefix *rpp;
1434
1435		for (rpp = LIST_FIRST(&rr_prefix); rpp;
1436		     rpp = LIST_NEXT(rpp, rp_entry)) {
1437			if (i >= PRLSTSIZ)
1438				break;
1439			(void)in6_embedscope(&prl->prefix[i].prefix,
1440			    &pr->ndpr_prefix, NULL, NULL);
1441			prl->prefix[i].raflags = rpp->rp_raf;
1442			prl->prefix[i].prefixlen = rpp->rp_plen;
1443			prl->prefix[i].vltime = rpp->rp_vltime;
1444			prl->prefix[i].pltime = rpp->rp_pltime;
1445			prl->prefix[i].if_index = rpp->rp_ifp->if_index;
1446			prl->prefix[i].expire = rpp->rp_expire;
1447			prl->prefix[i].advrtrs = 0;
1448			prl->prefix[i].origin = rpp->rp_origin;
1449			i++;
1450		}
1451	      }
1452		splx(s);
1453
1454		break;
1455	case OSIOCGIFINFO_IN6:
1456		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1457			error = EINVAL;
1458			break;
1459		}
1460		ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu;
1461		ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu;
1462		ndi->ndi.basereachable =
1463		    nd_ifinfo[ifp->if_index].basereachable;
1464		ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable;
1465		ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans;
1466		ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags;
1467		ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm;
1468		ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim;
1469		ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra;
1470		break;
1471	case SIOCGIFINFO_IN6:
1472		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1473			error = EINVAL;
1474			break;
1475		}
1476		ndi->ndi = nd_ifinfo[ifp->if_index];
1477		break;
1478	case SIOCSIFINFO_FLAGS:
1479		/* XXX: almost all other fields of ndi->ndi is unused */
1480		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1481			error = EINVAL;
1482			break;
1483		}
1484		nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags;
1485		break;
1486	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1487		/* flush default router list */
1488		/*
1489		 * xxx sumikawa: should not delete route if default
1490		 * route equals to the top of default router list
1491		 */
1492		bzero(&any, sizeof(any));
1493		defrouter_delreq(&any, 0);
1494		defrouter_select();
1495		/* xxx sumikawa: flush prefix list */
1496		break;
1497	case SIOCSPFXFLUSH_IN6:
1498	{
1499		/* flush all the prefix advertised by routers */
1500		struct nd_prefix *pr, *next;
1501
1502		s = splnet();
1503		for (pr = nd_prefix.lh_first; pr; pr = next) {
1504			struct in6_ifaddr *ia, *ia_next;
1505
1506			next = pr->ndpr_next;
1507
1508			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1509				continue; /* XXX */
1510
1511			/* do we really have to remove addresses as well? */
1512			for (ia = in6_ifaddr; ia; ia = ia_next) {
1513				/* ia might be removed.  keep the next ptr. */
1514				ia_next = ia->ia_next;
1515
1516				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1517					continue;
1518
1519				if (ia->ia6_ndpr == pr)
1520					in6_purgeaddr(&ia->ia_ifa);
1521			}
1522			prelist_remove(pr);
1523		}
1524		splx(s);
1525		break;
1526	}
1527	case SIOCSRTRFLUSH_IN6:
1528	{
1529		/* flush all the default routers */
1530		struct nd_defrouter *dr, *next;
1531
1532		s = splnet();
1533		if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1534			/*
1535			 * The first entry of the list may be stored in
1536			 * the routing table, so we'll delete it later.
1537			 */
1538			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
1539				next = TAILQ_NEXT(dr, dr_entry);
1540				defrtrlist_del(dr);
1541			}
1542			defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
1543		}
1544		splx(s);
1545		break;
1546	}
1547	case SIOCGNBRINFO_IN6:
1548	{
1549		struct llinfo_nd6 *ln;
1550		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1551
1552		/*
1553		 * XXX: KAME specific hack for scoped addresses
1554		 *      XXXX: for other scopes than link-local?
1555		 */
1556		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
1557		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
1558			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
1559
1560			if (*idp == 0)
1561				*idp = htons(ifp->if_index);
1562		}
1563
1564		s = splnet();
1565		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
1566			error = EINVAL;
1567			splx(s);
1568			break;
1569		}
1570		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1571		nbi->state = ln->ln_state;
1572		nbi->asked = ln->ln_asked;
1573		nbi->isrouter = ln->ln_router;
1574		nbi->expire = ln->ln_expire;
1575		splx(s);
1576
1577		break;
1578	}
1579	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1580		ndif->ifindex = nd6_defifindex;
1581		break;
1582	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1583		return (nd6_setdefaultiface(ndif->ifindex));
1584	}
1585	return (error);
1586}
1587
1588/*
1589 * Create neighbor cache entry and cache link-layer address,
1590 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1591 */
1592struct rtentry *
1593nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
1594	struct ifnet *ifp;
1595	struct in6_addr *from;
1596	char *lladdr;
1597	int lladdrlen;
1598	int type;	/* ICMP6 type */
1599	int code;	/* type dependent information */
1600{
1601	struct rtentry *rt = NULL;
1602	struct llinfo_nd6 *ln = NULL;
1603	int is_newentry;
1604	struct sockaddr_dl *sdl = NULL;
1605	int do_update;
1606	int olladdr;
1607	int llchange;
1608	int newstate = 0;
1609
1610	if (!ifp)
1611		panic("ifp == NULL in nd6_cache_lladdr");
1612	if (!from)
1613		panic("from == NULL in nd6_cache_lladdr");
1614
1615	/* nothing must be updated for unspecified address */
1616	if (IN6_IS_ADDR_UNSPECIFIED(from))
1617		return NULL;
1618
1619	/*
1620	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1621	 * the caller.
1622	 *
1623	 * XXX If the link does not have link-layer adderss, what should
1624	 * we do? (ifp->if_addrlen == 0)
1625	 * Spec says nothing in sections for RA, RS and NA.  There's small
1626	 * description on it in NS section (RFC 2461 7.2.3).
1627	 */
1628
1629	rt = nd6_lookup(from, 0, ifp);
1630	if (!rt) {
1631#if 0
1632		/* nothing must be done if there's no lladdr */
1633		if (!lladdr || !lladdrlen)
1634			return NULL;
1635#endif
1636
1637		rt = nd6_lookup(from, 1, ifp);
1638		is_newentry = 1;
1639	} else {
1640		/* do nothing if static ndp is set */
1641		if (rt->rt_flags & RTF_STATIC)
1642			return NULL;
1643		is_newentry = 0;
1644	}
1645
1646	if (!rt)
1647		return NULL;
1648	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1649fail:
1650		(void)nd6_free(rt);
1651		return NULL;
1652	}
1653	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1654	if (!ln)
1655		goto fail;
1656	if (!rt->rt_gateway)
1657		goto fail;
1658	if (rt->rt_gateway->sa_family != AF_LINK)
1659		goto fail;
1660	sdl = SDL(rt->rt_gateway);
1661
1662	olladdr = (sdl->sdl_alen) ? 1 : 0;
1663	if (olladdr && lladdr) {
1664		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1665			llchange = 1;
1666		else
1667			llchange = 0;
1668	} else
1669		llchange = 0;
1670
1671	/*
1672	 * newentry olladdr  lladdr  llchange	(*=record)
1673	 *	0	n	n	--	(1)
1674	 *	0	y	n	--	(2)
1675	 *	0	n	y	--	(3) * STALE
1676	 *	0	y	y	n	(4) *
1677	 *	0	y	y	y	(5) * STALE
1678	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1679	 *	1	--	y	--	(7) * STALE
1680	 */
1681
1682	if (lladdr) {		/* (3-5) and (7) */
1683		/*
1684		 * Record source link-layer address
1685		 * XXX is it dependent to ifp->if_type?
1686		 */
1687		sdl->sdl_alen = ifp->if_addrlen;
1688		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1689	}
1690
1691	if (!is_newentry) {
1692		if ((!olladdr && lladdr) ||		/* (3) */
1693		    (olladdr && lladdr && llchange)) {	/* (5) */
1694			do_update = 1;
1695			newstate = ND6_LLINFO_STALE;
1696		} else					/* (1-2,4) */
1697			do_update = 0;
1698	} else {
1699		do_update = 1;
1700		if (!lladdr)				/* (6) */
1701			newstate = ND6_LLINFO_NOSTATE;
1702		else					/* (7) */
1703			newstate = ND6_LLINFO_STALE;
1704	}
1705
1706	if (do_update) {
1707		/*
1708		 * Update the state of the neighbor cache.
1709		 */
1710		ln->ln_state = newstate;
1711
1712		if (ln->ln_state == ND6_LLINFO_STALE) {
1713			/*
1714			 * XXX: since nd6_output() below will cause
1715			 * state tansition to DELAY and reset the timer,
1716			 * we must set the timer now, although it is actually
1717			 * meaningless.
1718			 */
1719			ln->ln_expire = time_second + nd6_gctimer;
1720
1721			if (ln->ln_hold) {
1722				/*
1723				 * we assume ifp is not a p2p here, so just
1724				 * set the 2nd argument as the 1st one.
1725				 */
1726				nd6_output(ifp, ifp, ln->ln_hold,
1727				    (struct sockaddr_in6 *)rt_key(rt), rt);
1728				ln->ln_hold = NULL;
1729			}
1730		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1731			/* probe right away */
1732			ln->ln_expire = time_second;
1733		}
1734	}
1735
1736	/*
1737	 * ICMP6 type dependent behavior.
1738	 *
1739	 * NS: clear IsRouter if new entry
1740	 * RS: clear IsRouter
1741	 * RA: set IsRouter if there's lladdr
1742	 * redir: clear IsRouter if new entry
1743	 *
1744	 * RA case, (1):
1745	 * The spec says that we must set IsRouter in the following cases:
1746	 * - If lladdr exist, set IsRouter.  This means (1-5).
1747	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1748	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1749	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1750	 * neighbor cache, this is similar to (6).
1751	 * This case is rare but we figured that we MUST NOT set IsRouter.
1752	 *
1753	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1754	 *							D R
1755	 *	0	n	n	--	(1)	c   ?     s
1756	 *	0	y	n	--	(2)	c   s     s
1757	 *	0	n	y	--	(3)	c   s     s
1758	 *	0	y	y	n	(4)	c   s     s
1759	 *	0	y	y	y	(5)	c   s     s
1760	 *	1	--	n	--	(6) c	c 	c s
1761	 *	1	--	y	--	(7) c	c   s	c s
1762	 *
1763	 *					(c=clear s=set)
1764	 */
1765	switch (type & 0xff) {
1766	case ND_NEIGHBOR_SOLICIT:
1767		/*
1768		 * New entry must have is_router flag cleared.
1769		 */
1770		if (is_newentry)	/* (6-7) */
1771			ln->ln_router = 0;
1772		break;
1773	case ND_REDIRECT:
1774		/*
1775		 * If the icmp is a redirect to a better router, always set the
1776		 * is_router flag.  Otherwise, if the entry is newly created,
1777		 * clear the flag.  [RFC 2461, sec 8.3]
1778		 */
1779		if (code == ND_REDIRECT_ROUTER)
1780			ln->ln_router = 1;
1781		else if (is_newentry) /* (6-7) */
1782			ln->ln_router = 0;
1783		break;
1784	case ND_ROUTER_SOLICIT:
1785		/*
1786		 * is_router flag must always be cleared.
1787		 */
1788		ln->ln_router = 0;
1789		break;
1790	case ND_ROUTER_ADVERT:
1791		/*
1792		 * Mark an entry with lladdr as a router.
1793		 */
1794		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
1795		    (is_newentry && lladdr)) {			/* (7) */
1796			ln->ln_router = 1;
1797		}
1798		break;
1799	}
1800
1801	/*
1802	 * When the link-layer address of a router changes, select the
1803	 * best router again.  In particular, when the neighbor entry is newly
1804	 * created, it might affect the selection policy.
1805	 * Question: can we restrict the first condition to the "is_newentry"
1806	 * case?
1807	 * XXX: when we hear an RA from a new router with the link-layer
1808	 * address option, defrouter_select() is called twice, since
1809	 * defrtrlist_update called the function as well.  However, I believe
1810	 * we can compromise the overhead, since it only happens the first
1811	 * time.
1812	 * XXX: although defrouter_select() should not have a bad effect
1813	 * for those are not autoconfigured hosts, we explicitly avoid such
1814	 * cases for safety.
1815	 */
1816	if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv)
1817		defrouter_select();
1818
1819	return rt;
1820}
1821
1822static void
1823nd6_slowtimo(ignored_arg)
1824    void *ignored_arg;
1825{
1826	int s = splnet();
1827	int i;
1828	struct nd_ifinfo *nd6if;
1829
1830	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1831	    nd6_slowtimo, NULL);
1832	for (i = 1; i < if_index + 1; i++) {
1833		if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
1834			continue;
1835		nd6if = &nd_ifinfo[i];
1836		if (nd6if->basereachable && /* already initialized */
1837		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1838			/*
1839			 * Since reachable time rarely changes by router
1840			 * advertisements, we SHOULD insure that a new random
1841			 * value gets recomputed at least once every few hours.
1842			 * (RFC 2461, 6.3.4)
1843			 */
1844			nd6if->recalctm = nd6_recalc_reachtm_interval;
1845			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1846		}
1847	}
1848	splx(s);
1849}
1850
1851#define senderr(e) { error = (e); goto bad;}
1852int
1853nd6_output(ifp, origifp, m0, dst, rt0)
1854	struct ifnet *ifp;
1855	struct ifnet *origifp;
1856	struct mbuf *m0;
1857	struct sockaddr_in6 *dst;
1858	struct rtentry *rt0;
1859{
1860	struct mbuf *m = m0;
1861	struct rtentry *rt = rt0;
1862	struct sockaddr_in6 *gw6 = NULL;
1863	struct llinfo_nd6 *ln = NULL;
1864	int error = 0;
1865
1866	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1867		goto sendpkt;
1868
1869	if (nd6_need_cache(ifp) == 0)
1870		goto sendpkt;
1871
1872	/*
1873	 * next hop determination.  This routine is derived from ether_outpout.
1874	 */
1875	if (rt) {
1876		if ((rt->rt_flags & RTF_UP) == 0) {
1877			rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL);
1878			if (rt != NULL) {
1879				rt->rt_refcnt--;
1880				RT_UNLOCK(rt);
1881				if (rt->rt_ifp != ifp) {
1882					/* XXX: loop care? */
1883					return nd6_output(ifp, origifp, m0,
1884					    dst, rt);
1885				}
1886			} else
1887				senderr(EHOSTUNREACH);
1888		}
1889
1890		if (rt->rt_flags & RTF_GATEWAY) {
1891			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
1892
1893			/*
1894			 * We skip link-layer address resolution and NUD
1895			 * if the gateway is not a neighbor from ND point
1896			 * of view, regardless of the value of nd_ifinfo.flags.
1897			 * The second condition is a bit tricky; we skip
1898			 * if the gateway is our own address, which is
1899			 * sometimes used to install a route to a p2p link.
1900			 */
1901			if (!nd6_is_addr_neighbor(gw6, ifp) ||
1902			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
1903				/*
1904				 * We allow this kind of tricky route only
1905				 * when the outgoing interface is p2p.
1906				 * XXX: we may need a more generic rule here.
1907				 */
1908				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1909					senderr(EHOSTUNREACH);
1910
1911				goto sendpkt;
1912			}
1913
1914			if (rt->rt_gwroute == 0)
1915				goto lookup;
1916			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
1917				rtfree(rt); rt = rt0;
1918			lookup:
1919				rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
1920				if ((rt = rt->rt_gwroute) == 0)
1921					senderr(EHOSTUNREACH);
1922				RT_UNLOCK(rt);
1923			}
1924		}
1925	}
1926
1927	/*
1928	 * Address resolution or Neighbor Unreachability Detection
1929	 * for the next hop.
1930	 * At this point, the destination of the packet must be a unicast
1931	 * or an anycast address(i.e. not a multicast).
1932	 */
1933
1934	/* Look up the neighbor cache for the nexthop */
1935	if (rt && (rt->rt_flags & RTF_LLINFO) != 0)
1936		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1937	else {
1938		/*
1939		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
1940		 * the condition below is not very efficient.  But we believe
1941		 * it is tolerable, because this should be a rare case.
1942		 */
1943		if (nd6_is_addr_neighbor(dst, ifp) &&
1944		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
1945			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1946	}
1947	if (!ln || !rt) {
1948		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
1949		    !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
1950			log(LOG_DEBUG,
1951			    "nd6_output: can't allocate llinfo for %s "
1952			    "(ln=%p, rt=%p)\n",
1953			    ip6_sprintf(&dst->sin6_addr), ln, rt);
1954			senderr(EIO);	/* XXX: good error? */
1955		}
1956
1957		goto sendpkt;	/* send anyway */
1958	}
1959
1960	/* We don't have to do link-layer address resolution on a p2p link. */
1961	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
1962	    ln->ln_state < ND6_LLINFO_REACHABLE) {
1963		ln->ln_state = ND6_LLINFO_STALE;
1964		ln->ln_expire = time_second + nd6_gctimer;
1965	}
1966
1967	/*
1968	 * The first time we send a packet to a neighbor whose entry is
1969	 * STALE, we have to change the state to DELAY and a sets a timer to
1970	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1971	 * neighbor unreachability detection on expiration.
1972	 * (RFC 2461 7.3.3)
1973	 */
1974	if (ln->ln_state == ND6_LLINFO_STALE) {
1975		ln->ln_asked = 0;
1976		ln->ln_state = ND6_LLINFO_DELAY;
1977		ln->ln_expire = time_second + nd6_delay;
1978	}
1979
1980	/*
1981	 * If the neighbor cache entry has a state other than INCOMPLETE
1982	 * (i.e. its link-layer address is already resolved), just
1983	 * send the packet.
1984	 */
1985	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
1986		goto sendpkt;
1987
1988	/*
1989	 * There is a neighbor cache entry, but no ethernet address
1990	 * response yet.  Replace the held mbuf (if any) with this
1991	 * latest one.
1992	 *
1993	 * This code conforms to the rate-limiting rule described in Section
1994	 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
1995	 * an NS below.
1996	 */
1997	if (ln->ln_state == ND6_LLINFO_NOSTATE)
1998		ln->ln_state = ND6_LLINFO_INCOMPLETE;
1999	if (ln->ln_hold)
2000		m_freem(ln->ln_hold);
2001	ln->ln_hold = m;
2002	if (ln->ln_expire) {
2003		if (ln->ln_asked < nd6_mmaxtries &&
2004		    ln->ln_expire < time_second) {
2005			ln->ln_asked++;
2006			ln->ln_expire = time_second +
2007				nd_ifinfo[ifp->if_index].retrans / 1000;
2008			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
2009		}
2010	}
2011	return (0);
2012
2013  sendpkt:
2014
2015#ifdef MAC
2016	mac_create_mbuf_linklayer(ifp, m);
2017#endif
2018	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
2019		return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
2020		    rt));
2021	}
2022	return ((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt));
2023
2024  bad:
2025	if (m)
2026		m_freem(m);
2027	return (error);
2028}
2029#undef senderr
2030
2031int
2032nd6_need_cache(ifp)
2033	struct ifnet *ifp;
2034{
2035	/*
2036	 * XXX: we currently do not make neighbor cache on any interface
2037	 * other than ARCnet, Ethernet, FDDI and GIF.
2038	 *
2039	 * RFC2893 says:
2040	 * - unidirectional tunnels needs no ND
2041	 */
2042	switch (ifp->if_type) {
2043	case IFT_ARCNET:
2044	case IFT_ETHER:
2045	case IFT_FDDI:
2046	case IFT_IEEE1394:
2047#ifdef IFT_L2VLAN
2048	case IFT_L2VLAN:
2049#endif
2050#ifdef IFT_IEEE80211
2051	case IFT_IEEE80211:
2052#endif
2053	case IFT_GIF:		/* XXX need more cases? */
2054		return (1);
2055	default:
2056		return (0);
2057	}
2058}
2059
2060int
2061nd6_storelladdr(ifp, rt, m, dst, desten)
2062	struct ifnet *ifp;
2063	struct rtentry *rt;
2064	struct mbuf *m;
2065	struct sockaddr *dst;
2066	u_char *desten;
2067{
2068	int i;
2069	struct sockaddr_dl *sdl;
2070
2071	if (m->m_flags & M_MCAST) {
2072		switch (ifp->if_type) {
2073		case IFT_ETHER:
2074		case IFT_FDDI:
2075#ifdef IFT_L2VLAN
2076	case IFT_L2VLAN:
2077#endif
2078#ifdef IFT_IEEE80211
2079		case IFT_IEEE80211:
2080#endif
2081		case IFT_ISO88025:
2082			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2083						 desten);
2084			return (1);
2085		case IFT_IEEE1394:
2086			/*
2087			 * netbsd can use if_broadcastaddr, but we don't do so
2088			 * to reduce # of ifdef.
2089			 */
2090			for (i = 0; i < ifp->if_addrlen; i++)
2091				desten[i] = ~0;
2092			return (1);
2093		case IFT_ARCNET:
2094			*desten = 0;
2095			return (1);
2096		default:
2097			m_freem(m);
2098			return (0);
2099		}
2100	}
2101
2102	if (rt == NULL) {
2103		/* this could happen, if we could not allocate memory */
2104		m_freem(m);
2105		return (0);
2106	}
2107	if (rt->rt_gateway->sa_family != AF_LINK) {
2108		printf("nd6_storelladdr: something odd happens\n");
2109		m_freem(m);
2110		return (0);
2111	}
2112	sdl = SDL(rt->rt_gateway);
2113	if (sdl->sdl_alen == 0) {
2114		/* this should be impossible, but we bark here for debugging */
2115		printf("nd6_storelladdr: sdl_alen == 0\n");
2116		m_freem(m);
2117		return (0);
2118	}
2119
2120	bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
2121	return (1);
2122}
2123
2124static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2125static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2126#ifdef SYSCTL_DECL
2127SYSCTL_DECL(_net_inet6_icmp6);
2128#endif
2129SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2130	CTLFLAG_RD, nd6_sysctl_drlist, "");
2131SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2132	CTLFLAG_RD, nd6_sysctl_prlist, "");
2133
2134static int
2135nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2136{
2137	int error;
2138	char buf[1024];
2139	struct in6_defrouter *d, *de;
2140	struct nd_defrouter *dr;
2141
2142	if (req->newptr)
2143		return EPERM;
2144	error = 0;
2145
2146	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
2147	     dr = TAILQ_NEXT(dr, dr_entry)) {
2148		d = (struct in6_defrouter *)buf;
2149		de = (struct in6_defrouter *)(buf + sizeof(buf));
2150
2151		if (d + 1 <= de) {
2152			bzero(d, sizeof(*d));
2153			d->rtaddr.sin6_family = AF_INET6;
2154			d->rtaddr.sin6_len = sizeof(d->rtaddr);
2155			if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
2156			    dr->ifp) != 0)
2157				log(LOG_ERR,
2158				    "scope error in "
2159				    "default router list (%s)\n",
2160				    ip6_sprintf(&dr->rtaddr));
2161			d->flags = dr->flags;
2162			d->rtlifetime = dr->rtlifetime;
2163			d->expire = dr->expire;
2164			d->if_index = dr->ifp->if_index;
2165		} else
2166			panic("buffer too short");
2167
2168		error = SYSCTL_OUT(req, buf, sizeof(*d));
2169		if (error)
2170			break;
2171	}
2172
2173	return (error);
2174}
2175
2176static int
2177nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2178{
2179	int error;
2180	char buf[1024];
2181	struct in6_prefix *p, *pe;
2182	struct nd_prefix *pr;
2183
2184	if (req->newptr)
2185		return EPERM;
2186	error = 0;
2187
2188	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2189		u_short advrtrs;
2190		size_t advance;
2191		struct sockaddr_in6 *sin6, *s6;
2192		struct nd_pfxrouter *pfr;
2193
2194		p = (struct in6_prefix *)buf;
2195		pe = (struct in6_prefix *)(buf + sizeof(buf));
2196
2197		if (p + 1 <= pe) {
2198			bzero(p, sizeof(*p));
2199			sin6 = (struct sockaddr_in6 *)(p + 1);
2200
2201			p->prefix = pr->ndpr_prefix;
2202			if (in6_recoverscope(&p->prefix,
2203			    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
2204				log(LOG_ERR,
2205				    "scope error in prefix list (%s)\n",
2206				    ip6_sprintf(&p->prefix.sin6_addr));
2207			p->raflags = pr->ndpr_raf;
2208			p->prefixlen = pr->ndpr_plen;
2209			p->vltime = pr->ndpr_vltime;
2210			p->pltime = pr->ndpr_pltime;
2211			p->if_index = pr->ndpr_ifp->if_index;
2212			p->expire = pr->ndpr_expire;
2213			p->refcnt = pr->ndpr_refcnt;
2214			p->flags = pr->ndpr_stateflags;
2215			p->origin = PR_ORIG_RA;
2216			advrtrs = 0;
2217			for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
2218			     pfr = pfr->pfr_next) {
2219				if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
2220					advrtrs++;
2221					continue;
2222				}
2223				s6 = &sin6[advrtrs];
2224				bzero(s6, sizeof(*s6));
2225				s6->sin6_family = AF_INET6;
2226				s6->sin6_len = sizeof(*sin6);
2227				if (in6_recoverscope(s6, &pfr->router->rtaddr,
2228				    pfr->router->ifp) != 0)
2229					log(LOG_ERR,
2230					    "scope error in "
2231					    "prefix list (%s)\n",
2232					    ip6_sprintf(&pfr->router->rtaddr));
2233				advrtrs++;
2234			}
2235			p->advrtrs = advrtrs;
2236		} else
2237			panic("buffer too short");
2238
2239		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
2240		error = SYSCTL_OUT(req, buf, advance);
2241		if (error)
2242			break;
2243	}
2244
2245	return (error);
2246}
2247