nd6.c revision 95395
1/*	$FreeBSD: head/sys/netinet6/nd6.c 95395 2002-04-24 19:09:48Z ume $	*/
2/*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * XXX
35 * KAME 970409 note:
36 * BSD/OS version heavily modifies this code, related to llinfo.
37 * Since we don't have BSD/OS version of net/route.c in our hand,
38 * I left the code mostly as it was in 970310.  -- itojun
39 */
40
41#include "opt_inet.h"
42#include "opt_inet6.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/callout.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/time.h>
52#include <sys/kernel.h>
53#include <sys/protosw.h>
54#include <sys/errno.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/sysctl.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_types.h>
62#include <net/if_atm.h>
63#include <net/fddi.h>
64#include <net/route.h>
65
66#include <netinet/in.h>
67#include <netinet/if_ether.h>
68#include <netinet6/in6_var.h>
69#include <netinet/ip6.h>
70#include <netinet6/ip6_var.h>
71#include <netinet6/nd6.h>
72#include <netinet6/in6_prefix.h>
73#include <netinet/icmp6.h>
74
75#include <net/net_osdep.h>
76
77#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
78#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
79
80#define SIN6(s) ((struct sockaddr_in6 *)s)
81#define SDL(s) ((struct sockaddr_dl *)s)
82
83/* timer values */
84int	nd6_prune	= 1;	/* walk list every 1 seconds */
85int	nd6_delay	= 5;	/* delay first probe time 5 second */
86int	nd6_umaxtries	= 3;	/* maximum unicast query */
87int	nd6_mmaxtries	= 3;	/* maximum multicast query */
88int	nd6_useloopback = 1;	/* use loopback interface for local traffic */
89int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
90
91/* preventing too many loops in ND option parsing */
92int nd6_maxndopt = 10;	/* max # of ND options allowed */
93
94int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
95
96#ifdef ND6_DEBUG
97int nd6_debug = 1;
98#else
99int nd6_debug = 0;
100#endif
101
102/* for debugging? */
103static int nd6_inuse, nd6_allocated;
104
105struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6};
106static size_t nd_ifinfo_indexlim = 8;
107struct nd_ifinfo *nd_ifinfo = NULL;
108struct nd_drhead nd_defrouter;
109struct nd_prhead nd_prefix = { 0 };
110
111int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
112static struct sockaddr_in6 all1_sa;
113
114static void nd6_slowtimo __P((void *));
115static int regen_tmpaddr __P((struct in6_ifaddr *));
116
117struct callout nd6_slowtimo_ch;
118struct callout nd6_timer_ch;
119extern struct callout in6_tmpaddrtimer_ch;
120
121void
122nd6_init()
123{
124	static int nd6_init_done = 0;
125	int i;
126
127	if (nd6_init_done) {
128		log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
129		return;
130	}
131
132	all1_sa.sin6_family = AF_INET6;
133	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
134	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
135		all1_sa.sin6_addr.s6_addr[i] = 0xff;
136
137	/* initialization of the default router list */
138	TAILQ_INIT(&nd_defrouter);
139
140	nd6_init_done = 1;
141
142	/* start timer */
143	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
144	    nd6_slowtimo, NULL);
145}
146
147void
148nd6_ifattach(ifp)
149	struct ifnet *ifp;
150{
151
152	/*
153	 * We have some arrays that should be indexed by if_index.
154	 * since if_index will grow dynamically, they should grow too.
155	 */
156	if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
157		size_t n;
158		caddr_t q;
159
160		while (if_index >= nd_ifinfo_indexlim)
161			nd_ifinfo_indexlim <<= 1;
162
163		/* grow nd_ifinfo */
164		n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo);
165		q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK);
166		bzero(q, n);
167		if (nd_ifinfo) {
168			bcopy((caddr_t)nd_ifinfo, q, n/2);
169			free((caddr_t)nd_ifinfo, M_IP6NDP);
170		}
171		nd_ifinfo = (struct nd_ifinfo *)q;
172	}
173
174#define ND nd_ifinfo[ifp->if_index]
175
176	/*
177	 * Don't initialize if called twice.
178	 * XXX: to detect this, we should choose a member that is never set
179	 * before initialization of the ND structure itself.  We formaly used
180	 * the linkmtu member, which was not suitable because it could be
181	 * initialized via "ifconfig mtu".
182	 */
183	if (ND.basereachable)
184		return;
185
186	ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu;
187	ND.chlim = IPV6_DEFHLIM;
188	ND.basereachable = REACHABLE_TIME;
189	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
190	ND.retrans = RETRANS_TIMER;
191	ND.receivedra = 0;
192	ND.flags = ND6_IFF_PERFORMNUD;
193	nd6_setmtu(ifp);
194#undef ND
195}
196
197/*
198 * Reset ND level link MTU. This function is called when the physical MTU
199 * changes, which means we might have to adjust the ND level MTU.
200 */
201void
202nd6_setmtu(ifp)
203	struct ifnet *ifp;
204{
205#define MIN(a,b) ((a) < (b) ? (a) : (b))
206	struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
207	u_long oldmaxmtu = ndi->maxmtu;
208	u_long oldlinkmtu = ndi->linkmtu;
209
210	switch (ifp->if_type) {
211	case IFT_ARCNET:	/* XXX MTU handling needs more work */
212		ndi->maxmtu = MIN(60480, ifp->if_mtu);
213		break;
214	case IFT_ETHER:
215		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
216		break;
217	case IFT_FDDI:
218		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu);
219		break;
220	case IFT_ATM:
221		ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu);
222		break;
223	case IFT_IEEE1394:	/* XXX should be IEEE1394MTU(1500) */
224		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
225		break;
226#ifdef IFT_IEEE80211
227	case IFT_IEEE80211:	/* XXX should be IEEE80211MTU(1500) */
228		ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu);
229		break;
230#endif
231	default:
232		ndi->maxmtu = ifp->if_mtu;
233		break;
234	}
235
236	if (oldmaxmtu != ndi->maxmtu) {
237		/*
238		 * If the ND level MTU is not set yet, or if the maxmtu
239		 * is reset to a smaller value than the ND level MTU,
240		 * also reset the ND level MTU.
241		 */
242		if (ndi->linkmtu == 0 ||
243		    ndi->maxmtu < ndi->linkmtu) {
244			ndi->linkmtu = ndi->maxmtu;
245			/* also adjust in6_maxmtu if necessary. */
246			if (oldlinkmtu == 0) {
247				/*
248				 * XXX: the case analysis is grotty, but
249				 * it is not efficient to call in6_setmaxmtu()
250				 * here when we are during the initialization
251				 * procedure.
252				 */
253				if (in6_maxmtu < ndi->linkmtu)
254					in6_maxmtu = ndi->linkmtu;
255			} else
256				in6_setmaxmtu();
257		}
258	}
259#undef MIN
260}
261
262void
263nd6_option_init(opt, icmp6len, ndopts)
264	void *opt;
265	int icmp6len;
266	union nd_opts *ndopts;
267{
268	bzero(ndopts, sizeof(*ndopts));
269	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
270	ndopts->nd_opts_last
271		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
272
273	if (icmp6len == 0) {
274		ndopts->nd_opts_done = 1;
275		ndopts->nd_opts_search = NULL;
276	}
277}
278
279/*
280 * Take one ND option.
281 */
282struct nd_opt_hdr *
283nd6_option(ndopts)
284	union nd_opts *ndopts;
285{
286	struct nd_opt_hdr *nd_opt;
287	int olen;
288
289	if (!ndopts)
290		panic("ndopts == NULL in nd6_option\n");
291	if (!ndopts->nd_opts_last)
292		panic("uninitialized ndopts in nd6_option\n");
293	if (!ndopts->nd_opts_search)
294		return NULL;
295	if (ndopts->nd_opts_done)
296		return NULL;
297
298	nd_opt = ndopts->nd_opts_search;
299
300	/* make sure nd_opt_len is inside the buffer */
301	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
302		bzero(ndopts, sizeof(*ndopts));
303		return NULL;
304	}
305
306	olen = nd_opt->nd_opt_len << 3;
307	if (olen == 0) {
308		/*
309		 * Message validation requires that all included
310		 * options have a length that is greater than zero.
311		 */
312		bzero(ndopts, sizeof(*ndopts));
313		return NULL;
314	}
315
316	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
317	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
318		/* option overruns the end of buffer, invalid */
319		bzero(ndopts, sizeof(*ndopts));
320		return NULL;
321	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
322		/* reached the end of options chain */
323		ndopts->nd_opts_done = 1;
324		ndopts->nd_opts_search = NULL;
325	}
326	return nd_opt;
327}
328
329/*
330 * Parse multiple ND options.
331 * This function is much easier to use, for ND routines that do not need
332 * multiple options of the same type.
333 */
334int
335nd6_options(ndopts)
336	union nd_opts *ndopts;
337{
338	struct nd_opt_hdr *nd_opt;
339	int i = 0;
340
341	if (!ndopts)
342		panic("ndopts == NULL in nd6_options\n");
343	if (!ndopts->nd_opts_last)
344		panic("uninitialized ndopts in nd6_options\n");
345	if (!ndopts->nd_opts_search)
346		return 0;
347
348	while (1) {
349		nd_opt = nd6_option(ndopts);
350		if (!nd_opt && !ndopts->nd_opts_last) {
351			/*
352			 * Message validation requires that all included
353			 * options have a length that is greater than zero.
354			 */
355			icmp6stat.icp6s_nd_badopt++;
356			bzero(ndopts, sizeof(*ndopts));
357			return -1;
358		}
359
360		if (!nd_opt)
361			goto skip1;
362
363		switch (nd_opt->nd_opt_type) {
364		case ND_OPT_SOURCE_LINKADDR:
365		case ND_OPT_TARGET_LINKADDR:
366		case ND_OPT_MTU:
367		case ND_OPT_REDIRECTED_HEADER:
368			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
369				nd6log((LOG_INFO,
370				    "duplicated ND6 option found (type=%d)\n",
371				    nd_opt->nd_opt_type));
372				/* XXX bark? */
373			} else {
374				ndopts->nd_opt_array[nd_opt->nd_opt_type]
375					= nd_opt;
376			}
377			break;
378		case ND_OPT_PREFIX_INFORMATION:
379			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
380				ndopts->nd_opt_array[nd_opt->nd_opt_type]
381					= nd_opt;
382			}
383			ndopts->nd_opts_pi_end =
384				(struct nd_opt_prefix_info *)nd_opt;
385			break;
386		default:
387			/*
388			 * Unknown options must be silently ignored,
389			 * to accomodate future extension to the protocol.
390			 */
391			nd6log((LOG_DEBUG,
392			    "nd6_options: unsupported option %d - "
393			    "option ignored\n", nd_opt->nd_opt_type));
394		}
395
396skip1:
397		i++;
398		if (i > nd6_maxndopt) {
399			icmp6stat.icp6s_nd_toomanyopt++;
400			nd6log((LOG_INFO, "too many loop in nd opt\n"));
401			break;
402		}
403
404		if (ndopts->nd_opts_done)
405			break;
406	}
407
408	return 0;
409}
410
411/*
412 * ND6 timer routine to expire default route list and prefix list
413 */
414void
415nd6_timer(ignored_arg)
416	void	*ignored_arg;
417{
418	int s;
419	struct llinfo_nd6 *ln;
420	struct nd_defrouter *dr;
421	struct nd_prefix *pr;
422	struct ifnet *ifp;
423	struct in6_ifaddr *ia6, *nia6;
424	struct in6_addrlifetime *lt6;
425
426	s = splnet();
427	callout_reset(&nd6_timer_ch, nd6_prune * hz,
428		      nd6_timer, NULL);
429
430	ln = llinfo_nd6.ln_next;
431	while (ln && ln != &llinfo_nd6) {
432		struct rtentry *rt;
433		struct sockaddr_in6 *dst;
434		struct llinfo_nd6 *next = ln->ln_next;
435		/* XXX: used for the DELAY case only: */
436		struct nd_ifinfo *ndi = NULL;
437
438		if ((rt = ln->ln_rt) == NULL) {
439			ln = next;
440			continue;
441		}
442		if ((ifp = rt->rt_ifp) == NULL) {
443			ln = next;
444			continue;
445		}
446		ndi = &nd_ifinfo[ifp->if_index];
447		dst = (struct sockaddr_in6 *)rt_key(rt);
448
449		if (ln->ln_expire > time_second) {
450			ln = next;
451			continue;
452		}
453
454		/* sanity check */
455		if (!rt)
456			panic("rt=0 in nd6_timer(ln=%p)\n", ln);
457		if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
458			panic("rt_llinfo(%p) is not equal to ln(%p)\n",
459			      rt->rt_llinfo, ln);
460		if (!dst)
461			panic("dst=0 in nd6_timer(ln=%p)\n", ln);
462
463		switch (ln->ln_state) {
464		case ND6_LLINFO_INCOMPLETE:
465			if (ln->ln_asked < nd6_mmaxtries) {
466				ln->ln_asked++;
467				ln->ln_expire = time_second +
468					nd_ifinfo[ifp->if_index].retrans / 1000;
469				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
470					ln, 0);
471			} else {
472				struct mbuf *m = ln->ln_hold;
473				if (m) {
474					if (rt->rt_ifp) {
475						/*
476						 * Fake rcvif to make ICMP error
477						 * more helpful in diagnosing
478						 * for the receiver.
479						 * XXX: should we consider
480						 * older rcvif?
481						 */
482						m->m_pkthdr.rcvif = rt->rt_ifp;
483					}
484					icmp6_error(m, ICMP6_DST_UNREACH,
485						    ICMP6_DST_UNREACH_ADDR, 0);
486					ln->ln_hold = NULL;
487				}
488				next = nd6_free(rt);
489			}
490			break;
491		case ND6_LLINFO_REACHABLE:
492			if (ln->ln_expire) {
493				ln->ln_state = ND6_LLINFO_STALE;
494				ln->ln_expire = time_second + nd6_gctimer;
495			}
496			break;
497
498		case ND6_LLINFO_STALE:
499			/* Garbage Collection(RFC 2461 5.3) */
500			if (ln->ln_expire)
501				next = nd6_free(rt);
502			break;
503
504		case ND6_LLINFO_DELAY:
505			if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
506				/* We need NUD */
507				ln->ln_asked = 1;
508				ln->ln_state = ND6_LLINFO_PROBE;
509				ln->ln_expire = time_second +
510					ndi->retrans / 1000;
511				nd6_ns_output(ifp, &dst->sin6_addr,
512					      &dst->sin6_addr,
513					      ln, 0);
514			} else {
515				ln->ln_state = ND6_LLINFO_STALE; /* XXX */
516				ln->ln_expire = time_second + nd6_gctimer;
517			}
518			break;
519		case ND6_LLINFO_PROBE:
520			if (ln->ln_asked < nd6_umaxtries) {
521				ln->ln_asked++;
522				ln->ln_expire = time_second +
523					nd_ifinfo[ifp->if_index].retrans / 1000;
524				nd6_ns_output(ifp, &dst->sin6_addr,
525					       &dst->sin6_addr, ln, 0);
526			} else {
527				next = nd6_free(rt);
528			}
529			break;
530		}
531		ln = next;
532	}
533
534	/* expire default router list */
535	dr = TAILQ_FIRST(&nd_defrouter);
536	while (dr) {
537		if (dr->expire && dr->expire < time_second) {
538			struct nd_defrouter *t;
539			t = TAILQ_NEXT(dr, dr_entry);
540			defrtrlist_del(dr);
541			dr = t;
542		} else {
543			dr = TAILQ_NEXT(dr, dr_entry);
544		}
545	}
546
547	/*
548	 * expire interface addresses.
549	 * in the past the loop was inside prefix expiry processing.
550	 * However, from a stricter speci-confrmance standpoint, we should
551	 * rather separate address lifetimes and prefix lifetimes.
552	 */
553  addrloop:
554	for (ia6 = in6_ifaddr; ia6; ia6 = nia6) {
555		nia6 = ia6->ia_next;
556		/* check address lifetime */
557		lt6 = &ia6->ia6_lifetime;
558		if (IFA6_IS_INVALID(ia6)) {
559			int regen = 0;
560
561			/*
562			 * If the expiring address is temporary, try
563			 * regenerating a new one.  This would be useful when
564			 * we suspended a laptop PC, then turned it on after a
565			 * period that could invalidate all temporary
566			 * addresses.  Although we may have to restart the
567			 * loop (see below), it must be after purging the
568			 * address.  Otherwise, we'd see an infinite loop of
569			 * regeneration.
570			 */
571			if (ip6_use_tempaddr &&
572			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
573				if (regen_tmpaddr(ia6) == 0)
574					regen = 1;
575			}
576
577			in6_purgeaddr(&ia6->ia_ifa);
578
579			if (regen)
580				goto addrloop; /* XXX: see below */
581		}
582		if (IFA6_IS_DEPRECATED(ia6)) {
583			int oldflags = ia6->ia6_flags;
584
585			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
586
587			/*
588			 * If a temporary address has just become deprecated,
589			 * regenerate a new one if possible.
590			 */
591			if (ip6_use_tempaddr &&
592			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
593			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
594
595				if (regen_tmpaddr(ia6) == 0) {
596					/*
597					 * A new temporary address is
598					 * generated.
599					 * XXX: this means the address chain
600					 * has changed while we are still in
601					 * the loop.  Although the change
602					 * would not cause disaster (because
603					 * it's not a deletion, but an
604					 * addition,) we'd rather restart the
605					 * loop just for safety.  Or does this
606					 * significantly reduce performance??
607					 */
608					goto addrloop;
609				}
610			}
611		} else {
612			/*
613			 * A new RA might have made a deprecated address
614			 * preferred.
615			 */
616			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
617		}
618	}
619
620	/* expire prefix list */
621	pr = nd_prefix.lh_first;
622	while (pr) {
623		/*
624		 * check prefix lifetime.
625		 * since pltime is just for autoconf, pltime processing for
626		 * prefix is not necessary.
627		 */
628		if (pr->ndpr_expire && pr->ndpr_expire < time_second) {
629			struct nd_prefix *t;
630			t = pr->ndpr_next;
631
632			/*
633			 * address expiration and prefix expiration are
634			 * separate.  NEVER perform in6_purgeaddr here.
635			 */
636
637			prelist_remove(pr);
638			pr = t;
639		} else
640			pr = pr->ndpr_next;
641	}
642	splx(s);
643}
644
645static int
646regen_tmpaddr(ia6)
647	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
648{
649	struct ifaddr *ifa;
650	struct ifnet *ifp;
651	struct in6_ifaddr *public_ifa6 = NULL;
652
653	ifp = ia6->ia_ifa.ifa_ifp;
654	for (ifa = ifp->if_addrlist.tqh_first; ifa;
655	     ifa = ifa->ifa_list.tqe_next)
656	{
657		struct in6_ifaddr *it6;
658
659		if (ifa->ifa_addr->sa_family != AF_INET6)
660			continue;
661
662		it6 = (struct in6_ifaddr *)ifa;
663
664		/* ignore no autoconf addresses. */
665		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
666			continue;
667
668		/* ignore autoconf addresses with different prefixes. */
669		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
670			continue;
671
672		/*
673		 * Now we are looking at an autoconf address with the same
674		 * prefix as ours.  If the address is temporary and is still
675		 * preferred, do not create another one.  It would be rare, but
676		 * could happen, for example, when we resume a laptop PC after
677		 * a long period.
678		 */
679		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
680		    !IFA6_IS_DEPRECATED(it6)) {
681			public_ifa6 = NULL;
682			break;
683		}
684
685		/*
686		 * This is a public autoconf address that has the same prefix
687		 * as ours.  If it is preferred, keep it.  We can't break the
688		 * loop here, because there may be a still-preferred temporary
689		 * address with the prefix.
690		 */
691		if (!IFA6_IS_DEPRECATED(it6))
692		    public_ifa6 = it6;
693	}
694
695	if (public_ifa6 != NULL) {
696		int e;
697
698		if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
699			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
700			    " tmp addr,errno=%d\n", e);
701			return(-1);
702		}
703		return(0);
704	}
705
706	return(-1);
707}
708
709/*
710 * Nuke neighbor cache/prefix/default router management table, right before
711 * ifp goes away.
712 */
713void
714nd6_purge(ifp)
715	struct ifnet *ifp;
716{
717	struct llinfo_nd6 *ln, *nln;
718	struct nd_defrouter *dr, *ndr, drany;
719	struct nd_prefix *pr, *npr;
720
721	/* Nuke default router list entries toward ifp */
722	if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
723		/*
724		 * The first entry of the list may be stored in
725		 * the routing table, so we'll delete it later.
726		 */
727		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
728			ndr = TAILQ_NEXT(dr, dr_entry);
729			if (dr->ifp == ifp)
730				defrtrlist_del(dr);
731		}
732		dr = TAILQ_FIRST(&nd_defrouter);
733		if (dr->ifp == ifp)
734			defrtrlist_del(dr);
735	}
736
737	/* Nuke prefix list entries toward ifp */
738	for (pr = nd_prefix.lh_first; pr; pr = npr) {
739		npr = pr->ndpr_next;
740		if (pr->ndpr_ifp == ifp) {
741			/*
742			 * Previously, pr->ndpr_addr is removed as well,
743			 * but I strongly believe we don't have to do it.
744			 * nd6_purge() is only called from in6_ifdetach(),
745			 * which removes all the associated interface addresses
746			 * by itself.
747			 * (jinmei@kame.net 20010129)
748			 */
749			prelist_remove(pr);
750		}
751	}
752
753	/* cancel default outgoing interface setting */
754	if (nd6_defifindex == ifp->if_index)
755		nd6_setdefaultiface(0);
756
757	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
758		/* refresh default router list */
759		bzero(&drany, sizeof(drany));
760		defrouter_delreq(&drany, 0);
761		defrouter_select();
762	}
763
764	/*
765	 * Nuke neighbor cache entries for the ifp.
766	 * Note that rt->rt_ifp may not be the same as ifp,
767	 * due to KAME goto ours hack.  See RTM_RESOLVE case in
768	 * nd6_rtrequest(), and ip6_input().
769	 */
770	ln = llinfo_nd6.ln_next;
771	while (ln && ln != &llinfo_nd6) {
772		struct rtentry *rt;
773		struct sockaddr_dl *sdl;
774
775		nln = ln->ln_next;
776		rt = ln->ln_rt;
777		if (rt && rt->rt_gateway &&
778		    rt->rt_gateway->sa_family == AF_LINK) {
779			sdl = (struct sockaddr_dl *)rt->rt_gateway;
780			if (sdl->sdl_index == ifp->if_index)
781				nln = nd6_free(rt);
782		}
783		ln = nln;
784	}
785}
786
787struct rtentry *
788nd6_lookup(addr6, create, ifp)
789	struct in6_addr *addr6;
790	int create;
791	struct ifnet *ifp;
792{
793	struct rtentry *rt;
794	struct sockaddr_in6 sin6;
795
796	bzero(&sin6, sizeof(sin6));
797	sin6.sin6_len = sizeof(struct sockaddr_in6);
798	sin6.sin6_family = AF_INET6;
799	sin6.sin6_addr = *addr6;
800#ifdef SCOPEDROUTING
801	sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
802#endif
803	rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL);
804	if (rt && (rt->rt_flags & RTF_LLINFO) == 0) {
805		/*
806		 * This is the case for the default route.
807		 * If we want to create a neighbor cache for the address, we
808		 * should free the route for the destination and allocate an
809		 * interface route.
810		 */
811		if (create) {
812			RTFREE(rt);
813			rt = 0;
814		}
815	}
816	if (!rt) {
817		if (create && ifp) {
818			int e;
819
820			/*
821			 * If no route is available and create is set,
822			 * we allocate a host route for the destination
823			 * and treat it like an interface route.
824			 * This hack is necessary for a neighbor which can't
825			 * be covered by our own prefix.
826			 */
827			struct ifaddr *ifa =
828				ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
829			if (ifa == NULL)
830				return(NULL);
831
832			/*
833			 * Create a new route.  RTF_LLINFO is necessary
834			 * to create a Neighbor Cache entry for the
835			 * destination in nd6_rtrequest which will be
836			 * called in rtrequest via ifa->ifa_rtrequest.
837			 */
838			if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6,
839					   ifa->ifa_addr,
840					   (struct sockaddr *)&all1_sa,
841					   (ifa->ifa_flags |
842					    RTF_HOST | RTF_LLINFO) &
843					   ~RTF_CLONING,
844					   &rt)) != 0)
845				log(LOG_ERR,
846				    "nd6_lookup: failed to add route for a "
847				    "neighbor(%s), errno=%d\n",
848				    ip6_sprintf(addr6), e);
849			if (rt == NULL)
850				return(NULL);
851			if (rt->rt_llinfo) {
852				struct llinfo_nd6 *ln =
853					(struct llinfo_nd6 *)rt->rt_llinfo;
854				ln->ln_state = ND6_LLINFO_NOSTATE;
855			}
856		} else
857			return(NULL);
858	}
859	rt->rt_refcnt--;
860	/*
861	 * Validation for the entry.
862	 * Note that the check for rt_llinfo is necessary because a cloned
863	 * route from a parent route that has the L flag (e.g. the default
864	 * route to a p2p interface) may have the flag, too, while the
865	 * destination is not actually a neighbor.
866	 * XXX: we can't use rt->rt_ifp to check for the interface, since
867	 *      it might be the loopback interface if the entry is for our
868	 *      own address on a non-loopback interface. Instead, we should
869	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
870	 *      interface.
871	 */
872	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
873	    rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
874	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
875		if (create) {
876			log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n",
877			    ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec");
878			/* xxx more logs... kazu */
879		}
880		return(NULL);
881	}
882	return(rt);
883}
884
885/*
886 * Detect if a given IPv6 address identifies a neighbor on a given link.
887 * XXX: should take care of the destination of a p2p link?
888 */
889int
890nd6_is_addr_neighbor(addr, ifp)
891	struct sockaddr_in6 *addr;
892	struct ifnet *ifp;
893{
894	struct ifaddr *ifa;
895	int i;
896
897#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
898#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
899
900	/*
901	 * A link-local address is always a neighbor.
902	 * XXX: we should use the sin6_scope_id field rather than the embedded
903	 * interface index.
904	 */
905	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
906	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
907		return(1);
908
909	/*
910	 * If the address matches one of our addresses,
911	 * it should be a neighbor.
912	 */
913	for (ifa = ifp->if_addrlist.tqh_first;
914	     ifa;
915	     ifa = ifa->ifa_list.tqe_next)
916	{
917		if (ifa->ifa_addr->sa_family != AF_INET6)
918			next: continue;
919
920		for (i = 0; i < 4; i++) {
921			if ((IFADDR6(ifa).s6_addr32[i] ^
922			     addr->sin6_addr.s6_addr32[i]) &
923			    IFMASK6(ifa).s6_addr32[i])
924				goto next;
925		}
926		return(1);
927	}
928
929	/*
930	 * Even if the address matches none of our addresses, it might be
931	 * in the neighbor cache.
932	 */
933	if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL)
934		return(1);
935
936	return(0);
937#undef IFADDR6
938#undef IFMASK6
939}
940
941/*
942 * Free an nd6 llinfo entry.
943 */
944struct llinfo_nd6 *
945nd6_free(rt)
946	struct rtentry *rt;
947{
948	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
949	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
950	struct nd_defrouter *dr;
951
952	/*
953	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
954	 * even though it is not harmful, it was not really necessary.
955	 */
956
957	if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */
958		int s;
959		s = splnet();
960		dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
961				      rt->rt_ifp);
962
963		if (ln->ln_router || dr) {
964			/*
965			 * rt6_flush must be called whether or not the neighbor
966			 * is in the Default Router List.
967			 * See a corresponding comment in nd6_na_input().
968			 */
969			rt6_flush(&in6, rt->rt_ifp);
970		}
971
972		if (dr) {
973			/*
974			 * Unreachablity of a router might affect the default
975			 * router selection and on-link detection of advertised
976			 * prefixes.
977			 */
978
979			/*
980			 * Temporarily fake the state to choose a new default
981			 * router and to perform on-link determination of
982			 * prefixes correctly.
983			 * Below the state will be set correctly,
984			 * or the entry itself will be deleted.
985			 */
986			ln->ln_state = ND6_LLINFO_INCOMPLETE;
987
988			/*
989			 * Since defrouter_select() does not affect the
990			 * on-link determination and MIP6 needs the check
991			 * before the default router selection, we perform
992			 * the check now.
993			 */
994			pfxlist_onlink_check();
995
996			if (dr == TAILQ_FIRST(&nd_defrouter)) {
997				/*
998				 * It is used as the current default router,
999				 * so we have to move it to the end of the
1000				 * list and choose a new one.
1001				 * XXX: it is not very efficient if this is
1002				 *      the only router.
1003				 */
1004				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1005				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
1006
1007				defrouter_select();
1008			}
1009		}
1010		splx(s);
1011	}
1012
1013	/*
1014	 * Before deleting the entry, remember the next entry as the
1015	 * return value.  We need this because pfxlist_onlink_check() above
1016	 * might have freed other entries (particularly the old next entry) as
1017	 * a side effect (XXX).
1018	 */
1019	next = ln->ln_next;
1020
1021	/*
1022	 * Detach the route from the routing tree and the list of neighbor
1023	 * caches, and disable the route entry not to be used in already
1024	 * cached routes.
1025	 */
1026	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1027		  rt_mask(rt), 0, (struct rtentry **)0);
1028
1029	return(next);
1030}
1031
1032/*
1033 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1034 *
1035 * XXX cost-effective metods?
1036 */
1037void
1038nd6_nud_hint(rt, dst6, force)
1039	struct rtentry *rt;
1040	struct in6_addr *dst6;
1041	int force;
1042{
1043	struct llinfo_nd6 *ln;
1044
1045	/*
1046	 * If the caller specified "rt", use that.  Otherwise, resolve the
1047	 * routing table by supplied "dst6".
1048	 */
1049	if (!rt) {
1050		if (!dst6)
1051			return;
1052		if (!(rt = nd6_lookup(dst6, 0, NULL)))
1053			return;
1054	}
1055
1056	if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1057	    (rt->rt_flags & RTF_LLINFO) == 0 ||
1058	    !rt->rt_llinfo || !rt->rt_gateway ||
1059	    rt->rt_gateway->sa_family != AF_LINK) {
1060		/* This is not a host route. */
1061		return;
1062	}
1063
1064	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1065	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1066		return;
1067
1068	/*
1069	 * if we get upper-layer reachability confirmation many times,
1070	 * it is possible we have false information.
1071	 */
1072	if (!force) {
1073		ln->ln_byhint++;
1074		if (ln->ln_byhint > nd6_maxnudhint)
1075			return;
1076	}
1077
1078	ln->ln_state = ND6_LLINFO_REACHABLE;
1079	if (ln->ln_expire)
1080		ln->ln_expire = time_second +
1081			nd_ifinfo[rt->rt_ifp->if_index].reachable;
1082}
1083
1084void
1085nd6_rtrequest(req, rt, info)
1086	int	req;
1087	struct rtentry *rt;
1088	struct rt_addrinfo *info; /* xxx unused */
1089{
1090	struct sockaddr *gate = rt->rt_gateway;
1091	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1092	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1093	struct ifnet *ifp = rt->rt_ifp;
1094	struct ifaddr *ifa;
1095
1096	if ((rt->rt_flags & RTF_GATEWAY))
1097		return;
1098
1099	if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1100		/*
1101		 * This is probably an interface direct route for a link
1102		 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1103		 * We do not need special treatment below for such a route.
1104		 * Moreover, the RTF_LLINFO flag which would be set below
1105		 * would annoy the ndp(8) command.
1106		 */
1107		return;
1108	}
1109
1110	if (req == RTM_RESOLVE &&
1111	    (nd6_need_cache(ifp) == 0 || /* stf case */
1112	     !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) {
1113		/*
1114		 * FreeBSD and BSD/OS often make a cloned host route based
1115		 * on a less-specific route (e.g. the default route).
1116		 * If the less specific route does not have a "gateway"
1117		 * (this is the case when the route just goes to a p2p or an
1118		 * stf interface), we'll mistakenly make a neighbor cache for
1119		 * the host route, and will see strange neighbor solicitation
1120		 * for the corresponding destination.  In order to avoid the
1121		 * confusion, we check if the destination of the route is
1122		 * a neighbor in terms of neighbor discovery, and stop the
1123		 * process if not.  Additionally, we remove the LLINFO flag
1124		 * so that ndp(8) will not try to get the neighbor information
1125		 * of the destination.
1126		 */
1127		rt->rt_flags &= ~RTF_LLINFO;
1128		return;
1129	}
1130
1131	switch (req) {
1132	case RTM_ADD:
1133		/*
1134		 * There is no backward compatibility :)
1135		 *
1136		 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1137		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1138		 *	   rt->rt_flags |= RTF_CLONING;
1139		 */
1140		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
1141			/*
1142			 * Case 1: This route should come from
1143			 * a route to interface.  RTF_LLINFO flag is set
1144			 * for a host route whose destination should be
1145			 * treated as on-link.
1146			 */
1147			rt_setgate(rt, rt_key(rt),
1148				   (struct sockaddr *)&null_sdl);
1149			gate = rt->rt_gateway;
1150			SDL(gate)->sdl_type = ifp->if_type;
1151			SDL(gate)->sdl_index = ifp->if_index;
1152			if (ln)
1153				ln->ln_expire = time_second;
1154#if 1
1155			if (ln && ln->ln_expire == 0) {
1156				/* kludge for desktops */
1157#if 0
1158				printf("nd6_rtequest: time.tv_sec is zero; "
1159				       "treat it as 1\n");
1160#endif
1161				ln->ln_expire = 1;
1162			}
1163#endif
1164			if ((rt->rt_flags & RTF_CLONING))
1165				break;
1166		}
1167		/*
1168		 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1169		 * We don't do that here since llinfo is not ready yet.
1170		 *
1171		 * There are also couple of other things to be discussed:
1172		 * - unsolicited NA code needs improvement beforehand
1173		 * - RFC2461 says we MAY send multicast unsolicited NA
1174		 *   (7.2.6 paragraph 4), however, it also says that we
1175		 *   SHOULD provide a mechanism to prevent multicast NA storm.
1176		 *   we don't have anything like it right now.
1177		 *   note that the mechanism needs a mutual agreement
1178		 *   between proxies, which means that we need to implement
1179		 *   a new protocol, or a new kludge.
1180		 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1181		 *   we need to check ip6forwarding before sending it.
1182		 *   (or should we allow proxy ND configuration only for
1183		 *   routers?  there's no mention about proxy ND from hosts)
1184		 */
1185#if 0
1186		/* XXX it does not work */
1187		if (rt->rt_flags & RTF_ANNOUNCE)
1188			nd6_na_output(ifp,
1189			      &SIN6(rt_key(rt))->sin6_addr,
1190			      &SIN6(rt_key(rt))->sin6_addr,
1191			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1192			      1, NULL);
1193#endif
1194		/* FALLTHROUGH */
1195	case RTM_RESOLVE:
1196		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1197			/*
1198			 * Address resolution isn't necessary for a point to
1199			 * point link, so we can skip this test for a p2p link.
1200			 */
1201			if (gate->sa_family != AF_LINK ||
1202			    gate->sa_len < sizeof(null_sdl)) {
1203				log(LOG_DEBUG,
1204				    "nd6_rtrequest: bad gateway value: %s\n",
1205				    if_name(ifp));
1206				break;
1207			}
1208			SDL(gate)->sdl_type = ifp->if_type;
1209			SDL(gate)->sdl_index = ifp->if_index;
1210		}
1211		if (ln != NULL)
1212			break;	/* This happens on a route change */
1213		/*
1214		 * Case 2: This route may come from cloning, or a manual route
1215		 * add with a LL address.
1216		 */
1217		R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln));
1218		rt->rt_llinfo = (caddr_t)ln;
1219		if (!ln) {
1220			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1221			break;
1222		}
1223		nd6_inuse++;
1224		nd6_allocated++;
1225		Bzero(ln, sizeof(*ln));
1226		ln->ln_rt = rt;
1227		/* this is required for "ndp" command. - shin */
1228		if (req == RTM_ADD) {
1229		        /*
1230			 * gate should have some valid AF_LINK entry,
1231			 * and ln->ln_expire should have some lifetime
1232			 * which is specified by ndp command.
1233			 */
1234			ln->ln_state = ND6_LLINFO_REACHABLE;
1235			ln->ln_byhint = 0;
1236		} else {
1237		        /*
1238			 * When req == RTM_RESOLVE, rt is created and
1239			 * initialized in rtrequest(), so rt_expire is 0.
1240			 */
1241			ln->ln_state = ND6_LLINFO_NOSTATE;
1242			ln->ln_expire = time_second;
1243		}
1244		rt->rt_flags |= RTF_LLINFO;
1245		ln->ln_next = llinfo_nd6.ln_next;
1246		llinfo_nd6.ln_next = ln;
1247		ln->ln_prev = &llinfo_nd6;
1248		ln->ln_next->ln_prev = ln;
1249
1250		/*
1251		 * check if rt_key(rt) is one of my address assigned
1252		 * to the interface.
1253		 */
1254		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1255					  &SIN6(rt_key(rt))->sin6_addr);
1256		if (ifa) {
1257			caddr_t macp = nd6_ifptomac(ifp);
1258			ln->ln_expire = 0;
1259			ln->ln_state = ND6_LLINFO_REACHABLE;
1260			ln->ln_byhint = 0;
1261			if (macp) {
1262				Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1263				SDL(gate)->sdl_alen = ifp->if_addrlen;
1264			}
1265			if (nd6_useloopback) {
1266				rt->rt_ifp = &loif[0];	/* XXX */
1267				/*
1268				 * Make sure rt_ifa be equal to the ifaddr
1269				 * corresponding to the address.
1270				 * We need this because when we refer
1271				 * rt_ifa->ia6_flags in ip6_input, we assume
1272				 * that the rt_ifa points to the address instead
1273				 * of the loopback address.
1274				 */
1275				if (ifa != rt->rt_ifa) {
1276					IFAFREE(rt->rt_ifa);
1277					IFAREF(ifa);
1278					rt->rt_ifa = ifa;
1279				}
1280			}
1281		} else if (rt->rt_flags & RTF_ANNOUNCE) {
1282			ln->ln_expire = 0;
1283			ln->ln_state = ND6_LLINFO_REACHABLE;
1284			ln->ln_byhint = 0;
1285
1286			/* join solicited node multicast for proxy ND */
1287			if (ifp->if_flags & IFF_MULTICAST) {
1288				struct in6_addr llsol;
1289				int error;
1290
1291				llsol = SIN6(rt_key(rt))->sin6_addr;
1292				llsol.s6_addr16[0] = htons(0xff02);
1293				llsol.s6_addr16[1] = htons(ifp->if_index);
1294				llsol.s6_addr32[1] = 0;
1295				llsol.s6_addr32[2] = htonl(1);
1296				llsol.s6_addr8[12] = 0xff;
1297
1298				if (!in6_addmulti(&llsol, ifp, &error)) {
1299					nd6log((LOG_ERR, "%s: failed to join "
1300					    "%s (errno=%d)\n", if_name(ifp),
1301					    ip6_sprintf(&llsol), error));
1302				}
1303			}
1304		}
1305		break;
1306
1307	case RTM_DELETE:
1308		if (!ln)
1309			break;
1310		/* leave from solicited node multicast for proxy ND */
1311		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1312		    (ifp->if_flags & IFF_MULTICAST) != 0) {
1313			struct in6_addr llsol;
1314			struct in6_multi *in6m;
1315
1316			llsol = SIN6(rt_key(rt))->sin6_addr;
1317			llsol.s6_addr16[0] = htons(0xff02);
1318			llsol.s6_addr16[1] = htons(ifp->if_index);
1319			llsol.s6_addr32[1] = 0;
1320			llsol.s6_addr32[2] = htonl(1);
1321			llsol.s6_addr8[12] = 0xff;
1322
1323			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
1324			if (in6m)
1325				in6_delmulti(in6m);
1326		}
1327		nd6_inuse--;
1328		ln->ln_next->ln_prev = ln->ln_prev;
1329		ln->ln_prev->ln_next = ln->ln_next;
1330		ln->ln_prev = NULL;
1331		rt->rt_llinfo = 0;
1332		rt->rt_flags &= ~RTF_LLINFO;
1333		if (ln->ln_hold)
1334			m_freem(ln->ln_hold);
1335		Free((caddr_t)ln);
1336	}
1337}
1338
1339int
1340nd6_ioctl(cmd, data, ifp)
1341	u_long cmd;
1342	caddr_t	data;
1343	struct ifnet *ifp;
1344{
1345	struct in6_drlist *drl = (struct in6_drlist *)data;
1346	struct in6_prlist *prl = (struct in6_prlist *)data;
1347	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1348	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1349	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1350	struct nd_defrouter *dr, any;
1351	struct nd_prefix *pr;
1352	struct rtentry *rt;
1353	int i = 0, error = 0;
1354	int s;
1355
1356	switch (cmd) {
1357	case SIOCGDRLST_IN6:
1358		/*
1359		 * obsolete API, use sysctl under net.inet6.icmp6
1360		 */
1361		bzero(drl, sizeof(*drl));
1362		s = splnet();
1363		dr = TAILQ_FIRST(&nd_defrouter);
1364		while (dr && i < DRLSTSIZ) {
1365			drl->defrouter[i].rtaddr = dr->rtaddr;
1366			if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) {
1367				/* XXX: need to this hack for KAME stack */
1368				drl->defrouter[i].rtaddr.s6_addr16[1] = 0;
1369			} else
1370				log(LOG_ERR,
1371				    "default router list contains a "
1372				    "non-linklocal address(%s)\n",
1373				    ip6_sprintf(&drl->defrouter[i].rtaddr));
1374
1375			drl->defrouter[i].flags = dr->flags;
1376			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1377			drl->defrouter[i].expire = dr->expire;
1378			drl->defrouter[i].if_index = dr->ifp->if_index;
1379			i++;
1380			dr = TAILQ_NEXT(dr, dr_entry);
1381		}
1382		splx(s);
1383		break;
1384	case SIOCGPRLST_IN6:
1385		/*
1386		 * obsolete API, use sysctl under net.inet6.icmp6
1387		 */
1388		/*
1389		 * XXX meaning of fields, especialy "raflags", is very
1390		 * differnet between RA prefix list and RR/static prefix list.
1391		 * how about separating ioctls into two?
1392		 */
1393		bzero(prl, sizeof(*prl));
1394		s = splnet();
1395		pr = nd_prefix.lh_first;
1396		while (pr && i < PRLSTSIZ) {
1397			struct nd_pfxrouter *pfr;
1398			int j;
1399
1400			(void)in6_embedscope(&prl->prefix[i].prefix,
1401			    &pr->ndpr_prefix, NULL, NULL);
1402			prl->prefix[i].raflags = pr->ndpr_raf;
1403			prl->prefix[i].prefixlen = pr->ndpr_plen;
1404			prl->prefix[i].vltime = pr->ndpr_vltime;
1405			prl->prefix[i].pltime = pr->ndpr_pltime;
1406			prl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1407			prl->prefix[i].expire = pr->ndpr_expire;
1408
1409			pfr = pr->ndpr_advrtrs.lh_first;
1410			j = 0;
1411			while (pfr) {
1412				if (j < DRLSTSIZ) {
1413#define RTRADDR prl->prefix[i].advrtr[j]
1414					RTRADDR = pfr->router->rtaddr;
1415					if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1416						/* XXX: hack for KAME */
1417						RTRADDR.s6_addr16[1] = 0;
1418					} else
1419						log(LOG_ERR,
1420						    "a router(%s) advertises "
1421						    "a prefix with "
1422						    "non-link local address\n",
1423						    ip6_sprintf(&RTRADDR));
1424#undef RTRADDR
1425				}
1426				j++;
1427				pfr = pfr->pfr_next;
1428			}
1429			prl->prefix[i].advrtrs = j;
1430			prl->prefix[i].origin = PR_ORIG_RA;
1431
1432			i++;
1433			pr = pr->ndpr_next;
1434		}
1435	      {
1436		struct rr_prefix *rpp;
1437
1438		for (rpp = LIST_FIRST(&rr_prefix); rpp;
1439		     rpp = LIST_NEXT(rpp, rp_entry)) {
1440			if (i >= PRLSTSIZ)
1441				break;
1442			(void)in6_embedscope(&prl->prefix[i].prefix,
1443			    &pr->ndpr_prefix, NULL, NULL);
1444			prl->prefix[i].raflags = rpp->rp_raf;
1445			prl->prefix[i].prefixlen = rpp->rp_plen;
1446			prl->prefix[i].vltime = rpp->rp_vltime;
1447			prl->prefix[i].pltime = rpp->rp_pltime;
1448			prl->prefix[i].if_index = rpp->rp_ifp->if_index;
1449			prl->prefix[i].expire = rpp->rp_expire;
1450			prl->prefix[i].advrtrs = 0;
1451			prl->prefix[i].origin = rpp->rp_origin;
1452			i++;
1453		}
1454	      }
1455		splx(s);
1456
1457		break;
1458	case OSIOCGIFINFO_IN6:
1459		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1460			error = EINVAL;
1461			break;
1462		}
1463		ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu;
1464		ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu;
1465		ndi->ndi.basereachable =
1466		    nd_ifinfo[ifp->if_index].basereachable;
1467		ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable;
1468		ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans;
1469		ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags;
1470		ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm;
1471		ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim;
1472		ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra;
1473		break;
1474	case SIOCGIFINFO_IN6:
1475		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1476			error = EINVAL;
1477			break;
1478		}
1479		ndi->ndi = nd_ifinfo[ifp->if_index];
1480		break;
1481	case SIOCSIFINFO_FLAGS:
1482		/* XXX: almost all other fields of ndi->ndi is unused */
1483		if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
1484			error = EINVAL;
1485			break;
1486		}
1487		nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags;
1488		break;
1489	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1490		/* flush default router list */
1491		/*
1492		 * xxx sumikawa: should not delete route if default
1493		 * route equals to the top of default router list
1494		 */
1495		bzero(&any, sizeof(any));
1496		defrouter_delreq(&any, 0);
1497		defrouter_select();
1498		/* xxx sumikawa: flush prefix list */
1499		break;
1500	case SIOCSPFXFLUSH_IN6:
1501	    {
1502		/* flush all the prefix advertised by routers */
1503		struct nd_prefix *pr, *next;
1504
1505		s = splnet();
1506		for (pr = nd_prefix.lh_first; pr; pr = next) {
1507			struct in6_ifaddr *ia, *ia_next;
1508
1509			next = pr->ndpr_next;
1510
1511			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1512				continue; /* XXX */
1513
1514			/* do we really have to remove addresses as well? */
1515			for (ia = in6_ifaddr; ia; ia = ia_next) {
1516				/* ia might be removed.  keep the next ptr. */
1517				ia_next = ia->ia_next;
1518
1519				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1520					continue;
1521
1522				if (ia->ia6_ndpr == pr)
1523					in6_purgeaddr(&ia->ia_ifa);
1524			}
1525			prelist_remove(pr);
1526		}
1527		splx(s);
1528		break;
1529	    }
1530	case SIOCSRTRFLUSH_IN6:
1531	    {
1532		/* flush all the default routers */
1533		struct nd_defrouter *dr, *next;
1534
1535		s = splnet();
1536		if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1537			/*
1538			 * The first entry of the list may be stored in
1539			 * the routing table, so we'll delete it later.
1540			 */
1541			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
1542				next = TAILQ_NEXT(dr, dr_entry);
1543				defrtrlist_del(dr);
1544			}
1545			defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
1546		}
1547		splx(s);
1548		break;
1549	    }
1550	case SIOCGNBRINFO_IN6:
1551	    {
1552		struct llinfo_nd6 *ln;
1553		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1554
1555		/*
1556		 * XXX: KAME specific hack for scoped addresses
1557		 *      XXXX: for other scopes than link-local?
1558		 */
1559		if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) ||
1560		    IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) {
1561			u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
1562
1563			if (*idp == 0)
1564				*idp = htons(ifp->if_index);
1565		}
1566
1567		s = splnet();
1568		if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
1569			error = EINVAL;
1570			splx(s);
1571			break;
1572		}
1573		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1574		nbi->state = ln->ln_state;
1575		nbi->asked = ln->ln_asked;
1576		nbi->isrouter = ln->ln_router;
1577		nbi->expire = ln->ln_expire;
1578		splx(s);
1579
1580		break;
1581	    }
1582	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1583		ndif->ifindex = nd6_defifindex;
1584		break;
1585	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1586		return(nd6_setdefaultiface(ndif->ifindex));
1587		break;
1588	}
1589	return(error);
1590}
1591
1592/*
1593 * Create neighbor cache entry and cache link-layer address,
1594 * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
1595 */
1596struct rtentry *
1597nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
1598	struct ifnet *ifp;
1599	struct in6_addr *from;
1600	char *lladdr;
1601	int lladdrlen;
1602	int type;	/* ICMP6 type */
1603	int code;	/* type dependent information */
1604{
1605	struct rtentry *rt = NULL;
1606	struct llinfo_nd6 *ln = NULL;
1607	int is_newentry;
1608	struct sockaddr_dl *sdl = NULL;
1609	int do_update;
1610	int olladdr;
1611	int llchange;
1612	int newstate = 0;
1613
1614	if (!ifp)
1615		panic("ifp == NULL in nd6_cache_lladdr");
1616	if (!from)
1617		panic("from == NULL in nd6_cache_lladdr");
1618
1619	/* nothing must be updated for unspecified address */
1620	if (IN6_IS_ADDR_UNSPECIFIED(from))
1621		return NULL;
1622
1623	/*
1624	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1625	 * the caller.
1626	 *
1627	 * XXX If the link does not have link-layer adderss, what should
1628	 * we do? (ifp->if_addrlen == 0)
1629	 * Spec says nothing in sections for RA, RS and NA.  There's small
1630	 * description on it in NS section (RFC 2461 7.2.3).
1631	 */
1632
1633	rt = nd6_lookup(from, 0, ifp);
1634	if (!rt) {
1635#if 0
1636		/* nothing must be done if there's no lladdr */
1637		if (!lladdr || !lladdrlen)
1638			return NULL;
1639#endif
1640
1641		rt = nd6_lookup(from, 1, ifp);
1642		is_newentry = 1;
1643	} else {
1644		/* do nothing if static ndp is set */
1645		if (rt->rt_flags & RTF_STATIC)
1646			return NULL;
1647		is_newentry = 0;
1648	}
1649
1650	if (!rt)
1651		return NULL;
1652	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
1653fail:
1654		(void)nd6_free(rt);
1655		return NULL;
1656	}
1657	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1658	if (!ln)
1659		goto fail;
1660	if (!rt->rt_gateway)
1661		goto fail;
1662	if (rt->rt_gateway->sa_family != AF_LINK)
1663		goto fail;
1664	sdl = SDL(rt->rt_gateway);
1665
1666	olladdr = (sdl->sdl_alen) ? 1 : 0;
1667	if (olladdr && lladdr) {
1668		if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
1669			llchange = 1;
1670		else
1671			llchange = 0;
1672	} else
1673		llchange = 0;
1674
1675	/*
1676	 * newentry olladdr  lladdr  llchange	(*=record)
1677	 *	0	n	n	--	(1)
1678	 *	0	y	n	--	(2)
1679	 *	0	n	y	--	(3) * STALE
1680	 *	0	y	y	n	(4) *
1681	 *	0	y	y	y	(5) * STALE
1682	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1683	 *	1	--	y	--	(7) * STALE
1684	 */
1685
1686	if (lladdr) {		/* (3-5) and (7) */
1687		/*
1688		 * Record source link-layer address
1689		 * XXX is it dependent to ifp->if_type?
1690		 */
1691		sdl->sdl_alen = ifp->if_addrlen;
1692		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
1693	}
1694
1695	if (!is_newentry) {
1696		if ((!olladdr && lladdr)		/* (3) */
1697		 || (olladdr && lladdr && llchange)) {	/* (5) */
1698			do_update = 1;
1699			newstate = ND6_LLINFO_STALE;
1700		} else					/* (1-2,4) */
1701			do_update = 0;
1702	} else {
1703		do_update = 1;
1704		if (!lladdr)				/* (6) */
1705			newstate = ND6_LLINFO_NOSTATE;
1706		else					/* (7) */
1707			newstate = ND6_LLINFO_STALE;
1708	}
1709
1710	if (do_update) {
1711		/*
1712		 * Update the state of the neighbor cache.
1713		 */
1714		ln->ln_state = newstate;
1715
1716		if (ln->ln_state == ND6_LLINFO_STALE) {
1717			/*
1718			 * XXX: since nd6_output() below will cause
1719			 * state tansition to DELAY and reset the timer,
1720			 * we must set the timer now, although it is actually
1721			 * meaningless.
1722			 */
1723			ln->ln_expire = time_second + nd6_gctimer;
1724
1725			if (ln->ln_hold) {
1726				/*
1727				 * we assume ifp is not a p2p here, so just
1728				 * set the 2nd argument as the 1st one.
1729				 */
1730				nd6_output(ifp, ifp, ln->ln_hold,
1731					   (struct sockaddr_in6 *)rt_key(rt),
1732					   rt);
1733				ln->ln_hold = NULL;
1734			}
1735		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1736			/* probe right away */
1737			ln->ln_expire = time_second;
1738		}
1739	}
1740
1741	/*
1742	 * ICMP6 type dependent behavior.
1743	 *
1744	 * NS: clear IsRouter if new entry
1745	 * RS: clear IsRouter
1746	 * RA: set IsRouter if there's lladdr
1747	 * redir: clear IsRouter if new entry
1748	 *
1749	 * RA case, (1):
1750	 * The spec says that we must set IsRouter in the following cases:
1751	 * - If lladdr exist, set IsRouter.  This means (1-5).
1752	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1753	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1754	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1755	 * neighbor cache, this is similar to (6).
1756	 * This case is rare but we figured that we MUST NOT set IsRouter.
1757	 *
1758	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1759	 *							D R
1760	 *	0	n	n	--	(1)	c   ?     s
1761	 *	0	y	n	--	(2)	c   s     s
1762	 *	0	n	y	--	(3)	c   s     s
1763	 *	0	y	y	n	(4)	c   s     s
1764	 *	0	y	y	y	(5)	c   s     s
1765	 *	1	--	n	--	(6) c	c 	c s
1766	 *	1	--	y	--	(7) c	c   s	c s
1767	 *
1768	 *					(c=clear s=set)
1769	 */
1770	switch (type & 0xff) {
1771	case ND_NEIGHBOR_SOLICIT:
1772		/*
1773		 * New entry must have is_router flag cleared.
1774		 */
1775		if (is_newentry)	/* (6-7) */
1776			ln->ln_router = 0;
1777		break;
1778	case ND_REDIRECT:
1779		/*
1780		 * If the icmp is a redirect to a better router, always set the
1781		 * is_router flag. Otherwise, if the entry is newly created,
1782		 * clear the flag. [RFC 2461, sec 8.3]
1783		 */
1784		if (code == ND_REDIRECT_ROUTER)
1785			ln->ln_router = 1;
1786		else if (is_newentry) /* (6-7) */
1787			ln->ln_router = 0;
1788		break;
1789	case ND_ROUTER_SOLICIT:
1790		/*
1791		 * is_router flag must always be cleared.
1792		 */
1793		ln->ln_router = 0;
1794		break;
1795	case ND_ROUTER_ADVERT:
1796		/*
1797		 * Mark an entry with lladdr as a router.
1798		 */
1799		if ((!is_newentry && (olladdr || lladdr))	/* (2-5) */
1800		 || (is_newentry && lladdr)) {			/* (7) */
1801			ln->ln_router = 1;
1802		}
1803		break;
1804	}
1805
1806	/*
1807	 * When the link-layer address of a router changes, select the
1808	 * best router again.  In particular, when the neighbor entry is newly
1809	 * created, it might affect the selection policy.
1810	 * Question: can we restrict the first condition to the "is_newentry"
1811	 * case?
1812	 * XXX: when we hear an RA from a new router with the link-layer
1813	 * address option, defrouter_select() is called twice, since
1814	 * defrtrlist_update called the function as well.  However, I believe
1815	 * we can compromise the overhead, since it only happens the first
1816	 * time.
1817	 * XXX: although defrouter_select() should not have a bad effect
1818	 * for those are not autoconfigured hosts, we explicitly avoid such
1819	 * cases for safety.
1820	 */
1821	if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv)
1822		defrouter_select();
1823
1824	return rt;
1825}
1826
1827static void
1828nd6_slowtimo(ignored_arg)
1829    void *ignored_arg;
1830{
1831	int s = splnet();
1832	int i;
1833	struct nd_ifinfo *nd6if;
1834
1835	callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1836	    nd6_slowtimo, NULL);
1837	for (i = 1; i < if_index + 1; i++) {
1838		if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
1839			continue;
1840		nd6if = &nd_ifinfo[i];
1841		if (nd6if->basereachable && /* already initialized */
1842		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1843			/*
1844			 * Since reachable time rarely changes by router
1845			 * advertisements, we SHOULD insure that a new random
1846			 * value gets recomputed at least once every few hours.
1847			 * (RFC 2461, 6.3.4)
1848			 */
1849			nd6if->recalctm = nd6_recalc_reachtm_interval;
1850			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1851		}
1852	}
1853	splx(s);
1854}
1855
1856#define senderr(e) { error = (e); goto bad;}
1857int
1858nd6_output(ifp, origifp, m0, dst, rt0)
1859	struct ifnet *ifp;
1860	struct ifnet *origifp;
1861	struct mbuf *m0;
1862	struct sockaddr_in6 *dst;
1863	struct rtentry *rt0;
1864{
1865	struct mbuf *m = m0;
1866	struct rtentry *rt = rt0;
1867	struct sockaddr_in6 *gw6 = NULL;
1868	struct llinfo_nd6 *ln = NULL;
1869	int error = 0;
1870
1871	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1872		goto sendpkt;
1873
1874	if (nd6_need_cache(ifp) == 0)
1875		goto sendpkt;
1876
1877	/*
1878	 * next hop determination.  This routine is derived from ether_outpout.
1879	 */
1880	if (rt) {
1881		if ((rt->rt_flags & RTF_UP) == 0) {
1882			if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) !=
1883				NULL)
1884			{
1885				rt->rt_refcnt--;
1886				if (rt->rt_ifp != ifp) {
1887					/* XXX: loop care? */
1888					return nd6_output(ifp, origifp, m0,
1889							  dst, rt);
1890				}
1891			} else
1892				senderr(EHOSTUNREACH);
1893		}
1894
1895		if (rt->rt_flags & RTF_GATEWAY) {
1896			gw6 = (struct sockaddr_in6 *)rt->rt_gateway;
1897
1898			/*
1899			 * We skip link-layer address resolution and NUD
1900			 * if the gateway is not a neighbor from ND point
1901			 * of view, regardless of the value of nd_ifinfo.flags.
1902			 * The second condition is a bit tricky; we skip
1903			 * if the gateway is our own address, which is
1904			 * sometimes used to install a route to a p2p link.
1905			 */
1906			if (!nd6_is_addr_neighbor(gw6, ifp) ||
1907			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
1908				/*
1909				 * We allow this kind of tricky route only
1910				 * when the outgoing interface is p2p.
1911				 * XXX: we may need a more generic rule here.
1912				 */
1913				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1914					senderr(EHOSTUNREACH);
1915
1916				goto sendpkt;
1917			}
1918
1919			if (rt->rt_gwroute == 0)
1920				goto lookup;
1921			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
1922				rtfree(rt); rt = rt0;
1923			lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
1924				if ((rt = rt->rt_gwroute) == 0)
1925					senderr(EHOSTUNREACH);
1926			}
1927		}
1928	}
1929
1930	/*
1931	 * Address resolution or Neighbor Unreachability Detection
1932	 * for the next hop.
1933	 * At this point, the destination of the packet must be a unicast
1934	 * or an anycast address(i.e. not a multicast).
1935	 */
1936
1937	/* Look up the neighbor cache for the nexthop */
1938	if (rt && (rt->rt_flags & RTF_LLINFO) != 0)
1939		ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1940	else {
1941		/*
1942		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
1943		 * the condition below is not very efficient.  But we believe
1944		 * it is tolerable, because this should be a rare case.
1945		 */
1946		if (nd6_is_addr_neighbor(dst, ifp) &&
1947		    (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL)
1948			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
1949	}
1950	if (!ln || !rt) {
1951		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
1952		    !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
1953			log(LOG_DEBUG,
1954			    "nd6_output: can't allocate llinfo for %s "
1955			    "(ln=%p, rt=%p)\n",
1956			    ip6_sprintf(&dst->sin6_addr), ln, rt);
1957			senderr(EIO);	/* XXX: good error? */
1958		}
1959
1960		goto sendpkt;	/* send anyway */
1961	}
1962
1963	/* We don't have to do link-layer address resolution on a p2p link. */
1964	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
1965	    ln->ln_state < ND6_LLINFO_REACHABLE) {
1966		ln->ln_state = ND6_LLINFO_STALE;
1967		ln->ln_expire = time_second + nd6_gctimer;
1968	}
1969
1970	/*
1971	 * The first time we send a packet to a neighbor whose entry is
1972	 * STALE, we have to change the state to DELAY and a sets a timer to
1973	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1974	 * neighbor unreachability detection on expiration.
1975	 * (RFC 2461 7.3.3)
1976	 */
1977	if (ln->ln_state == ND6_LLINFO_STALE) {
1978		ln->ln_asked = 0;
1979		ln->ln_state = ND6_LLINFO_DELAY;
1980		ln->ln_expire = time_second + nd6_delay;
1981	}
1982
1983	/*
1984	 * If the neighbor cache entry has a state other than INCOMPLETE
1985	 * (i.e. its link-layer address is already resolved), just
1986	 * send the packet.
1987	 */
1988	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
1989		goto sendpkt;
1990
1991	/*
1992	 * There is a neighbor cache entry, but no ethernet address
1993	 * response yet.  Replace the held mbuf (if any) with this
1994	 * latest one.
1995	 *
1996	 * This code conforms to the rate-limiting rule described in Section
1997	 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
1998	 * an NS below.
1999	 */
2000	if (ln->ln_state == ND6_LLINFO_NOSTATE)
2001		ln->ln_state = ND6_LLINFO_INCOMPLETE;
2002	if (ln->ln_hold)
2003		m_freem(ln->ln_hold);
2004	ln->ln_hold = m;
2005	if (ln->ln_expire) {
2006		if (ln->ln_asked < nd6_mmaxtries &&
2007		    ln->ln_expire < time_second) {
2008			ln->ln_asked++;
2009			ln->ln_expire = time_second +
2010				nd_ifinfo[ifp->if_index].retrans / 1000;
2011			nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
2012		}
2013	}
2014	return(0);
2015
2016  sendpkt:
2017
2018	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
2019		return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
2020					 rt));
2021	}
2022	return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt));
2023
2024  bad:
2025	if (m)
2026		m_freem(m);
2027	return (error);
2028}
2029#undef senderr
2030
2031int
2032nd6_need_cache(ifp)
2033	struct ifnet *ifp;
2034{
2035	/*
2036	 * XXX: we currently do not make neighbor cache on any interface
2037	 * other than ARCnet, Ethernet, FDDI and GIF.
2038	 *
2039	 * RFC2893 says:
2040	 * - unidirectional tunnels needs no ND
2041	 */
2042	switch (ifp->if_type) {
2043	case IFT_ARCNET:
2044	case IFT_ETHER:
2045	case IFT_FDDI:
2046	case IFT_IEEE1394:
2047#ifdef IFT_L2VLAN
2048	case IFT_L2VLAN:
2049#endif
2050#ifdef IFT_IEEE80211
2051	case IFT_IEEE80211:
2052#endif
2053	case IFT_GIF:		/* XXX need more cases? */
2054		return(1);
2055	default:
2056		return(0);
2057	}
2058}
2059
2060int
2061nd6_storelladdr(ifp, rt, m, dst, desten)
2062	struct ifnet *ifp;
2063	struct rtentry *rt;
2064	struct mbuf *m;
2065	struct sockaddr *dst;
2066	u_char *desten;
2067{
2068	int i;
2069	struct sockaddr_dl *sdl;
2070
2071	if (m->m_flags & M_MCAST) {
2072		switch (ifp->if_type) {
2073		case IFT_ETHER:
2074		case IFT_FDDI:
2075#ifdef IFT_L2VLAN
2076	case IFT_L2VLAN:
2077#endif
2078#ifdef IFT_IEEE80211
2079		case IFT_IEEE80211:
2080#endif
2081			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2082						 desten);
2083			return(1);
2084		case IFT_IEEE1394:
2085			/*
2086			 * netbsd can use if_broadcastaddr, but we don't do so
2087			 * to reduce # of ifdef.
2088			 */
2089			for (i = 0; i < ifp->if_addrlen; i++)
2090				desten[i] = ~0;
2091			return(1);
2092		case IFT_ARCNET:
2093			*desten = 0;
2094			return(1);
2095		default:
2096			m_freem(m);
2097			return(0);
2098		}
2099	}
2100
2101	if (rt == NULL) {
2102		/* this could happen, if we could not allocate memory */
2103		m_freem(m);
2104		return(0);
2105	}
2106	if (rt->rt_gateway->sa_family != AF_LINK) {
2107		printf("nd6_storelladdr: something odd happens\n");
2108		m_freem(m);
2109		return(0);
2110	}
2111	sdl = SDL(rt->rt_gateway);
2112	if (sdl->sdl_alen == 0) {
2113		/* this should be impossible, but we bark here for debugging */
2114		printf("nd6_storelladdr: sdl_alen == 0\n");
2115		m_freem(m);
2116		return(0);
2117	}
2118
2119	bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
2120	return(1);
2121}
2122
2123static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2124static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2125#ifdef SYSCTL_DECL
2126SYSCTL_DECL(_net_inet6_icmp6);
2127#endif
2128SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2129	CTLFLAG_RD, nd6_sysctl_drlist, "");
2130SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2131	CTLFLAG_RD, nd6_sysctl_prlist, "");
2132
2133static int
2134nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2135{
2136	int error;
2137	char buf[1024];
2138	struct in6_defrouter *d, *de;
2139	struct nd_defrouter *dr;
2140
2141	if (req->newptr)
2142		return EPERM;
2143	error = 0;
2144
2145	for (dr = TAILQ_FIRST(&nd_defrouter);
2146	     dr;
2147	     dr = TAILQ_NEXT(dr, dr_entry)) {
2148		d = (struct in6_defrouter *)buf;
2149		de = (struct in6_defrouter *)(buf + sizeof(buf));
2150
2151		if (d + 1 <= de) {
2152			bzero(d, sizeof(*d));
2153			d->rtaddr.sin6_family = AF_INET6;
2154			d->rtaddr.sin6_len = sizeof(d->rtaddr);
2155			if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
2156			    dr->ifp) != 0)
2157				log(LOG_ERR,
2158				    "scope error in "
2159				    "default router list (%s)\n",
2160				    ip6_sprintf(&dr->rtaddr));
2161			d->flags = dr->flags;
2162			d->rtlifetime = dr->rtlifetime;
2163			d->expire = dr->expire;
2164			d->if_index = dr->ifp->if_index;
2165		} else
2166			panic("buffer too short");
2167
2168		error = SYSCTL_OUT(req, buf, sizeof(*d));
2169		if (error)
2170			break;
2171	}
2172	return error;
2173}
2174
2175static int
2176nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2177{
2178	int error;
2179	char buf[1024];
2180	struct in6_prefix *p, *pe;
2181	struct nd_prefix *pr;
2182
2183	if (req->newptr)
2184		return EPERM;
2185	error = 0;
2186
2187	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2188		u_short advrtrs;
2189		size_t advance;
2190		struct sockaddr_in6 *sin6, *s6;
2191		struct nd_pfxrouter *pfr;
2192
2193		p = (struct in6_prefix *)buf;
2194		pe = (struct in6_prefix *)(buf + sizeof(buf));
2195
2196		if (p + 1 <= pe) {
2197			bzero(p, sizeof(*p));
2198			sin6 = (struct sockaddr_in6 *)(p + 1);
2199
2200			p->prefix = pr->ndpr_prefix;
2201			if (in6_recoverscope(&p->prefix,
2202			    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
2203				log(LOG_ERR,
2204				    "scope error in prefix list (%s)\n",
2205				    ip6_sprintf(&p->prefix.sin6_addr));
2206			p->raflags = pr->ndpr_raf;
2207			p->prefixlen = pr->ndpr_plen;
2208			p->vltime = pr->ndpr_vltime;
2209			p->pltime = pr->ndpr_pltime;
2210			p->if_index = pr->ndpr_ifp->if_index;
2211			p->expire = pr->ndpr_expire;
2212			p->refcnt = pr->ndpr_refcnt;
2213			p->flags = pr->ndpr_stateflags;
2214			p->origin = PR_ORIG_RA;
2215			advrtrs = 0;
2216			for (pfr = pr->ndpr_advrtrs.lh_first;
2217			     pfr;
2218			     pfr = pfr->pfr_next) {
2219				if ((void *)&sin6[advrtrs + 1] >
2220				    (void *)pe) {
2221					advrtrs++;
2222					continue;
2223				}
2224				s6 = &sin6[advrtrs];
2225				bzero(s6, sizeof(*s6));
2226				s6->sin6_family = AF_INET6;
2227				s6->sin6_len = sizeof(*sin6);
2228				if (in6_recoverscope(s6,
2229				    &pfr->router->rtaddr,
2230				    pfr->router->ifp) != 0)
2231					log(LOG_ERR,
2232					    "scope error in "
2233					    "prefix list (%s)\n",
2234					    ip6_sprintf(&pfr->router->rtaddr));
2235				advrtrs++;
2236			}
2237			p->advrtrs = advrtrs;
2238		} else
2239			panic("buffer too short");
2240
2241		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
2242		error = SYSCTL_OUT(req, buf, advance);
2243		if (error)
2244			break;
2245	}
2246	return error;
2247}
2248