nd6.c revision 201284
11573Srgrimes/*-
21573Srgrimes * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31573Srgrimes * All rights reserved.
41573Srgrimes *
51573Srgrimes * Redistribution and use in source and binary forms, with or without
61573Srgrimes * modification, are permitted provided that the following conditions
71573Srgrimes * are met:
81573Srgrimes * 1. Redistributions of source code must retain the above copyright
91573Srgrimes *    notice, this list of conditions and the following disclaimer.
101573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111573Srgrimes *    notice, this list of conditions and the following disclaimer in the
121573Srgrimes *    documentation and/or other materials provided with the distribution.
131573Srgrimes * 3. Neither the name of the project nor the names of its contributors
141573Srgrimes *    may be used to endorse or promote products derived from this software
151573Srgrimes *    without specific prior written permission.
16148834Sstefanf *
171573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
181573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
211573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271573Srgrimes * SUCH DAMAGE.
281573Srgrimes *
291573Srgrimes *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
301573Srgrimes */
3184260Sobrien
32238624Spfg#include <sys/cdefs.h>
331573Srgrimes__FBSDID("$FreeBSD: head/sys/netinet6/nd6.c 201284 2009-12-30 21:51:23Z qingli $");
341573Srgrimes
351573Srgrimes#include "opt_inet.h"
361573Srgrimes#include "opt_inet6.h"
371573Srgrimes
3884260Sobrien#include <sys/param.h>
3984260Sobrien#include <sys/systm.h>
401573Srgrimes#include <sys/callout.h>
411573Srgrimes#include <sys/malloc.h>
421573Srgrimes#include <sys/mbuf.h>
431573Srgrimes#include <sys/socket.h>
441573Srgrimes#include <sys/sockio.h>
451573Srgrimes#include <sys/time.h>
461573Srgrimes#include <sys/kernel.h>
47148834Sstefanf#include <sys/protosw.h>
481573Srgrimes#include <sys/errno.h>
4984260Sobrien#include <sys/syslog.h>
5084260Sobrien#include <sys/lock.h>
5184260Sobrien#include <sys/rwlock.h>
521573Srgrimes#include <sys/queue.h>
5384260Sobrien#include <sys/sysctl.h>
541573Srgrimes
5584260Sobrien#include <net/if.h>
5684260Sobrien#include <net/if_arc.h>
571573Srgrimes#include <net/if_dl.h>
5884260Sobrien#include <net/if_types.h>
5984260Sobrien#include <net/iso88025.h>
601573Srgrimes#include <net/fddi.h>
61148834Sstefanf#include <net/route.h>
6284260Sobrien#include <net/vnet.h>
6384260Sobrien
6484260Sobrien#include <netinet/in.h>
651573Srgrimes#include <net/if_llatbl.h>
661573Srgrimes#define	L3_ADDR_SIN6(le)	((struct sockaddr_in6 *) L3_ADDR(le))
671573Srgrimes#include <netinet/if_ether.h>
6884260Sobrien#include <netinet6/in6_var.h>
6984260Sobrien#include <netinet/ip6.h>
7084260Sobrien#include <netinet6/ip6_var.h>
7184260Sobrien#include <netinet6/scope6_var.h>
7284260Sobrien#include <netinet6/nd6.h>
7384260Sobrien#include <netinet6/in6_ifattach.h>
7484260Sobrien#include <netinet/icmp6.h>
7584260Sobrien
761573Srgrimes#include <sys/limits.h>
771573Srgrimes
781573Srgrimes#include <security/mac/mac_framework.h>
7984260Sobrien
801573Srgrimes#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
811573Srgrimes#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
821573Srgrimes
831573Srgrimes#define SIN6(s) ((struct sockaddr_in6 *)s)
841573Srgrimes
851573SrgrimesVNET_DEFINE(int, nd6_prune);
8684260SobrienVNET_DEFINE(int, nd6_delay);
871573SrgrimesVNET_DEFINE(int, nd6_umaxtries);
8884260SobrienVNET_DEFINE(int, nd6_mmaxtries);
8984260SobrienVNET_DEFINE(int, nd6_useloopback);
9084260SobrienVNET_DEFINE(int, nd6_gctimer);
9184260Sobrien
9284260Sobrien/* preventing too many loops in ND option parsing */
9384260Sobrienstatic VNET_DEFINE(int, nd6_maxndopt);
9484260SobrienVNET_DEFINE(int, nd6_maxnudhint);
9584260Sobrienstatic VNET_DEFINE(int, nd6_maxqueuelen);
961573Srgrimes#define	V_nd6_maxndopt			VNET(nd6_maxndopt)
971573Srgrimes#define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
981573Srgrimes
991573SrgrimesVNET_DEFINE(int, nd6_debug);
1001573Srgrimes
1011573Srgrimes/* for debugging? */
1021573Srgrimes#if 0
10384260Sobrienstatic int nd6_inuse, nd6_allocated;
1041573Srgrimes#endif
10584260Sobrien
1061573SrgrimesVNET_DEFINE(struct nd_drhead, nd_defrouter);
107148834SstefanfVNET_DEFINE(struct nd_prhead, nd_prefix);
108148834Sstefanf
109148834SstefanfVNET_DEFINE(int, nd6_recalc_reachtm_interval);
110148834Sstefanf#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
111148834Sstefanf
112148834Sstefanfstatic struct sockaddr_in6 all1_sa;
113148834Sstefanf
11484260Sobrienstatic int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
11584260Sobrien	struct ifnet *));
11684260Sobrienstatic void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
117148834Sstefanfstatic void nd6_slowtimo(void *);
118148834Sstefanfstatic int regen_tmpaddr(struct in6_ifaddr *);
119148834Sstefanfstatic struct llentry *nd6_free(struct llentry *, int);
120148834Sstefanfstatic void nd6_llinfo_timer(void *);
121148834Sstefanfstatic void clear_llinfo_pqueue(struct llentry *);
12284260Sobrien
12384260Sobrienstatic VNET_DEFINE(struct callout, nd6_slowtimo_ch);
124148834Sstefanf#define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
125148834Sstefanf
126148834SstefanfVNET_DEFINE(struct callout, nd6_timer_ch);
127148834Sstefanf
128148834SstefanfVNET_DECLARE(int, dad_ignore_ns);
129148834SstefanfVNET_DECLARE(int, dad_maxtry);
13084260Sobrien#define	V_dad_ignore_ns			VNET(dad_ignore_ns)
13184260Sobrien#define	V_dad_maxtry			VNET(dad_maxtry)
13284260Sobrien
13384260Sobrienvoid
13484260Sobriennd6_init(void)
1351573Srgrimes{
13684260Sobrien	int i;
1371573Srgrimes
1381573Srgrimes	V_nd6_prune	= 1;	/* walk list every 1 seconds */
1391573Srgrimes	V_nd6_delay	= 5;	/* delay first probe time 5 second */
1401573Srgrimes	V_nd6_umaxtries	= 3;	/* maximum unicast query */
1411573Srgrimes	V_nd6_mmaxtries	= 3;	/* maximum multicast query */
1421573Srgrimes	V_nd6_useloopback = 1;	/* use loopback interface for local traffic */
1431573Srgrimes	V_nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
14484260Sobrien
1451573Srgrimes	/* preventing too many loops in ND option parsing */
14684260Sobrien	V_nd6_maxndopt = 10;	/* max # of ND options allowed */
14784260Sobrien
14884260Sobrien	V_nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
14984260Sobrien	V_nd6_maxqueuelen = 1;	/* max pkts cached in unresolved ND entries */
15084260Sobrien
15184260Sobrien#ifdef ND6_DEBUG
1521573Srgrimes	V_nd6_debug = 1;
1531573Srgrimes#else
1541573Srgrimes	V_nd6_debug = 0;
1551573Srgrimes#endif
1561573Srgrimes
1571573Srgrimes	V_nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
1581573Srgrimes
15984260Sobrien	V_dad_ignore_ns = 0;	/* ignore NS in DAD - specwise incorrect*/
1601573Srgrimes	V_dad_maxtry = 15;	/* max # of *tries* to transmit DAD packet */
16184260Sobrien
16284260Sobrien	/*
16384260Sobrien	 * XXX just to get this to compile KMM
16484260Sobrien	 */
16584260Sobrien#ifdef notyet
1661573Srgrimes	V_llinfo_nd6.ln_next = &V_llinfo_nd6;
1671573Srgrimes	V_llinfo_nd6.ln_prev = &V_llinfo_nd6;
1681573Srgrimes#endif
1691573Srgrimes	LIST_INIT(&V_nd_prefix);
1701573Srgrimes
171148834Sstefanf	V_ip6_use_tempaddr = 0;
172148834Sstefanf	V_ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
173148834Sstefanf	V_ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
174148834Sstefanf	V_ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;
175148834Sstefanf
176148834Sstefanf	V_ip6_desync_factor = 0;
177148834Sstefanf
178148834Sstefanf	all1_sa.sin6_family = AF_INET6;
179148834Sstefanf	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
180148834Sstefanf	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
181148834Sstefanf		all1_sa.sin6_addr.s6_addr[i] = 0xff;
182148834Sstefanf
183148834Sstefanf	/* initialization of the default router list */
184148834Sstefanf	TAILQ_INIT(&V_nd_defrouter);
185148834Sstefanf	/* start timer */
1861573Srgrimes	callout_init(&V_nd6_slowtimo_ch, 0);
1871573Srgrimes	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
188148834Sstefanf	    nd6_slowtimo, curvnet);
189148834Sstefanf}
1901573Srgrimes
19184260Sobrien
192148834Sstefanf#ifdef VIMAGE
1931573Srgrimesvoid
194148834Sstefanfnd6_destroy()
195148834Sstefanf{
196148834Sstefanf
197148834Sstefanf	callout_drain(&V_nd6_slowtimo_ch);
198148834Sstefanf	callout_drain(&V_nd6_timer_ch);
199148834Sstefanf}
200148834Sstefanf#endif
201238624Spfg
202148834Sstefanfstruct nd_ifinfo *
203148834Sstefanfnd6_ifattach(struct ifnet *ifp)
20484260Sobrien{
20584260Sobrien	struct nd_ifinfo *nd;
20684260Sobrien
20784260Sobrien	nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
20884260Sobrien	bzero(nd, sizeof(*nd));
20984260Sobrien
21084260Sobrien	nd->initialized = 1;
21184260Sobrien
2121573Srgrimes	nd->chlim = IPV6_DEFHLIM;
21384260Sobrien	nd->basereachable = REACHABLE_TIME;
21484260Sobrien	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
21584260Sobrien	nd->retrans = RETRANS_TIMER;
2161573Srgrimes
21784260Sobrien	nd->flags = ND6_IFF_PERFORMNUD;
21884260Sobrien
21984260Sobrien	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. */
22084260Sobrien	if (V_ip6_auto_linklocal || (ifp->if_flags & IFF_LOOPBACK))
2211573Srgrimes		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
22284260Sobrien
22384260Sobrien	/* A loopback interface does not need to accept RTADV. */
22484260Sobrien	if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK))
2251573Srgrimes		nd->flags |= ND6_IFF_ACCEPT_RTADV;
22684260Sobrien
22784260Sobrien	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
22884260Sobrien	nd6_setmtu0(ifp, nd);
22984260Sobrien
2301573Srgrimes	return nd;
23184260Sobrien}
23284260Sobrien
23384260Sobrienvoid
23484260Sobriennd6_ifdetach(struct nd_ifinfo *nd)
2351573Srgrimes{
23684260Sobrien
23784260Sobrien	free(nd, M_IP6NDP);
23884260Sobrien}
23984260Sobrien
24084260Sobrien/*
24184260Sobrien * Reset ND level link MTU. This function is called when the physical MTU
24284260Sobrien * changes, which means we might have to adjust the ND level MTU.
2431573Srgrimes */
24484260Sobrienvoid
24584260Sobriennd6_setmtu(struct ifnet *ifp)
24684260Sobrien{
2471573Srgrimes
24884260Sobrien	nd6_setmtu0(ifp, ND_IFINFO(ifp));
24984260Sobrien}
25084260Sobrien
25184260Sobrien/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
2521573Srgrimesvoid
25384260Sobriennd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
25484260Sobrien{
25584260Sobrien	u_int32_t omaxmtu;
2561573Srgrimes
25784260Sobrien	omaxmtu = ndi->maxmtu;
25884260Sobrien
25984260Sobrien	switch (ifp->if_type) {
26084260Sobrien	case IFT_ARCNET:
2611573Srgrimes		ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
26284260Sobrien		break;
26384260Sobrien	case IFT_FDDI:
26484260Sobrien		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
26584260Sobrien		break;
2661573Srgrimes	case IFT_ISO88025:
26784260Sobrien		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
26884260Sobrien		 break;
26984260Sobrien	default:
27084260Sobrien		ndi->maxmtu = ifp->if_mtu;
27184260Sobrien		break;
27284260Sobrien	}
27384260Sobrien
2741573Srgrimes	/*
27584260Sobrien	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
27684260Sobrien	 * undesirable situation.  We thus notify the operator of the change
27784260Sobrien	 * explicitly.  The check for omaxmtu is necessary to restrict the
2781573Srgrimes	 * log to the case of changing the MTU, not initializing it.
27984260Sobrien	 */
28084260Sobrien	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
28184260Sobrien		log(LOG_NOTICE, "nd6_setmtu0: "
28284260Sobrien		    "new link MTU on %s (%lu) is too small for IPv6\n",
2831573Srgrimes		    if_name(ifp), (unsigned long)ndi->maxmtu);
28484260Sobrien	}
28584260Sobrien
28684260Sobrien	if (ndi->maxmtu > V_in6_maxmtu)
2871573Srgrimes		in6_setmaxmtu(); /* check all interfaces just in case */
28884260Sobrien
28984260Sobrien}
29084260Sobrien
29184260Sobrienvoid
2921573Srgrimesnd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
29384260Sobrien{
29484260Sobrien
29584260Sobrien	bzero(ndopts, sizeof(*ndopts));
29684260Sobrien	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
2971573Srgrimes	ndopts->nd_opts_last
29884260Sobrien		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
29984260Sobrien
30084260Sobrien	if (icmp6len == 0) {
30184260Sobrien		ndopts->nd_opts_done = 1;
302148834Sstefanf		ndopts->nd_opts_search = NULL;
3031573Srgrimes	}
30484260Sobrien}
30584260Sobrien
30684260Sobrien/*
30784260Sobrien * Take one ND option.
3088870Srgrimes */
30984260Sobrienstruct nd_opt_hdr *
31084260Sobriennd6_option(union nd_opts *ndopts)
31184260Sobrien{
31284260Sobrien	struct nd_opt_hdr *nd_opt;
3131573Srgrimes	int olen;
31484260Sobrien
31584260Sobrien	if (ndopts == NULL)
31684260Sobrien		panic("ndopts == NULL in nd6_option");
31784260Sobrien	if (ndopts->nd_opts_last == NULL)
3181573Srgrimes		panic("uninitialized ndopts in nd6_option");
31984260Sobrien	if (ndopts->nd_opts_search == NULL)
32084260Sobrien		return NULL;
32184260Sobrien	if (ndopts->nd_opts_done)
32284260Sobrien		return NULL;
3231573Srgrimes
32484260Sobrien	nd_opt = ndopts->nd_opts_search;
32584260Sobrien
32684260Sobrien	/* make sure nd_opt_len is inside the buffer */
32784260Sobrien	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
32884260Sobrien		bzero(ndopts, sizeof(*ndopts));
32984260Sobrien		return NULL;
33084260Sobrien	}
33184260Sobrien
332148834Sstefanf	olen = nd_opt->nd_opt_len << 3;
3331573Srgrimes	if (olen == 0) {
33484260Sobrien		/*
33584260Sobrien		 * Message validation requires that all included
3361573Srgrimes		 * options have a length that is greater than zero.
33784260Sobrien		 */
33884260Sobrien		bzero(ndopts, sizeof(*ndopts));
3391573Srgrimes		return NULL;
34084260Sobrien	}
34184260Sobrien
34284260Sobrien	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
34384260Sobrien	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
3441573Srgrimes		/* option overruns the end of buffer, invalid */
34584260Sobrien		bzero(ndopts, sizeof(*ndopts));
34684260Sobrien		return NULL;
34784260Sobrien	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
34884260Sobrien		/* reached the end of options chain */
3491573Srgrimes		ndopts->nd_opts_done = 1;
35084260Sobrien		ndopts->nd_opts_search = NULL;
35184260Sobrien	}
35284260Sobrien	return nd_opt;
35384260Sobrien}
3541573Srgrimes
35584260Sobrien/*
35684260Sobrien * Parse multiple ND options.
35784260Sobrien * This function is much easier to use, for ND routines that do not need
35884260Sobrien * multiple options of the same type.
35984260Sobrien */
36084260Sobrienint
36184260Sobriennd6_options(union nd_opts *ndopts)
36284260Sobrien{
36384260Sobrien	struct nd_opt_hdr *nd_opt;
3641573Srgrimes	int i = 0;
36584260Sobrien
36684260Sobrien	if (ndopts == NULL)
36784260Sobrien		panic("ndopts == NULL in nd6_options");
36884260Sobrien	if (ndopts->nd_opts_last == NULL)
3691573Srgrimes		panic("uninitialized ndopts in nd6_options");
3701573Srgrimes	if (ndopts->nd_opts_search == NULL)
37184260Sobrien		return 0;
37284260Sobrien
37384260Sobrien	while (1) {
37484260Sobrien		nd_opt = nd6_option(ndopts);
37584260Sobrien		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
3761573Srgrimes			/*
37784260Sobrien			 * Message validation requires that all included
37884260Sobrien			 * options have a length that is greater than zero.
37984260Sobrien			 */
38084260Sobrien			ICMP6STAT_INC(icp6s_nd_badopt);
3811573Srgrimes			bzero(ndopts, sizeof(*ndopts));
38284260Sobrien			return -1;
38384260Sobrien		}
3841573Srgrimes
38584260Sobrien		if (nd_opt == NULL)
38684260Sobrien			goto skip1;
38784260Sobrien
3881573Srgrimes		switch (nd_opt->nd_opt_type) {
38984260Sobrien		case ND_OPT_SOURCE_LINKADDR:
39084260Sobrien		case ND_OPT_TARGET_LINKADDR:
39184260Sobrien		case ND_OPT_MTU:
39284260Sobrien		case ND_OPT_REDIRECTED_HEADER:
39384260Sobrien			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
3941573Srgrimes				nd6log((LOG_INFO,
395148834Sstefanf				    "duplicated ND6 option found (type=%d)\n",
39684260Sobrien				    nd_opt->nd_opt_type));
397148834Sstefanf				/* XXX bark? */
398148834Sstefanf			} else {
399148834Sstefanf				ndopts->nd_opt_array[nd_opt->nd_opt_type]
400148834Sstefanf					= nd_opt;
401148834Sstefanf			}
402148834Sstefanf			break;
40384260Sobrien		case ND_OPT_PREFIX_INFORMATION:
40484260Sobrien			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
405148834Sstefanf				ndopts->nd_opt_array[nd_opt->nd_opt_type]
40684260Sobrien					= nd_opt;
40784260Sobrien			}
40884260Sobrien			ndopts->nd_opts_pi_end =
40984260Sobrien				(struct nd_opt_prefix_info *)nd_opt;
410148814Sstefanf			break;
41184260Sobrien		default:
41284260Sobrien			/*
41384260Sobrien			 * Unknown options must be silently ignored,
41484260Sobrien			 * to accomodate future extension to the protocol.
41584260Sobrien			 */
4161573Srgrimes			nd6log((LOG_DEBUG,
417148834Sstefanf			    "nd6_options: unsupported option %d - "
418148834Sstefanf			    "option ignored\n", nd_opt->nd_opt_type));
419148834Sstefanf		}
420238624Spfg
421148834Sstefanfskip1:
422148834Sstefanf		i++;
423148834Sstefanf		if (i > V_nd6_maxndopt) {
424148834Sstefanf			ICMP6STAT_INC(icp6s_nd_toomanyopt);
425148834Sstefanf			nd6log((LOG_INFO, "too many loop in nd opt\n"));
426148834Sstefanf			break;
427148834Sstefanf		}
428148834Sstefanf
429148834Sstefanf		if (ndopts->nd_opts_done)
4301573Srgrimes			break;
431148834Sstefanf	}
432148834Sstefanf
433148834Sstefanf	return 0;
434148834Sstefanf}
435148834Sstefanf
436148834Sstefanf/*
437148834Sstefanf * ND6 timer routine to handle ND6 entries
438148834Sstefanf */
439148834Sstefanfvoid
440148834Sstefanfnd6_llinfo_settimer_locked(struct llentry *ln, long tick)
441148834Sstefanf{
442148834Sstefanf	int canceled;
443148834Sstefanf
444148834Sstefanf	if (tick < 0) {
445148834Sstefanf		ln->la_expire = 0;
446		ln->ln_ntick = 0;
447		canceled = callout_stop(&ln->ln_timer_ch);
448	} else {
449		ln->la_expire = time_second + tick / hz;
450		LLE_ADDREF(ln);
451		if (tick > INT_MAX) {
452			ln->ln_ntick = tick - INT_MAX;
453			canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
454			    nd6_llinfo_timer, ln);
455		} else {
456			ln->ln_ntick = 0;
457			canceled = callout_reset(&ln->ln_timer_ch, tick,
458			    nd6_llinfo_timer, ln);
459		}
460	}
461	if (canceled)
462		LLE_REMREF(ln);
463}
464
465void
466nd6_llinfo_settimer(struct llentry *ln, long tick)
467{
468
469	LLE_WLOCK(ln);
470	nd6_llinfo_settimer_locked(ln, tick);
471	LLE_WUNLOCK(ln);
472}
473
474static void
475nd6_llinfo_timer(void *arg)
476{
477	struct llentry *ln;
478	struct in6_addr *dst;
479	struct ifnet *ifp;
480	struct nd_ifinfo *ndi = NULL;
481
482	ln = (struct llentry *)arg;
483	if (ln == NULL) {
484		panic("%s: NULL entry!\n", __func__);
485		return;
486	}
487
488	if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL)
489		panic("ln ifp == NULL");
490
491	CURVNET_SET(ifp->if_vnet);
492
493	if (ln->ln_ntick > 0) {
494		if (ln->ln_ntick > INT_MAX) {
495			ln->ln_ntick -= INT_MAX;
496			nd6_llinfo_settimer(ln, INT_MAX);
497		} else {
498			ln->ln_ntick = 0;
499			nd6_llinfo_settimer(ln, ln->ln_ntick);
500		}
501		goto done;
502	}
503
504	ndi = ND_IFINFO(ifp);
505	dst = &L3_ADDR_SIN6(ln)->sin6_addr;
506	if (ln->la_flags & LLE_STATIC) {
507		goto done;
508	}
509
510	if (ln->la_flags & LLE_DELETED) {
511		(void)nd6_free(ln, 0);
512		ln = NULL;
513		goto done;
514	}
515
516	switch (ln->ln_state) {
517	case ND6_LLINFO_INCOMPLETE:
518		if (ln->la_asked < V_nd6_mmaxtries) {
519			ln->la_asked++;
520			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
521			nd6_ns_output(ifp, NULL, dst, ln, 0);
522		} else {
523			struct mbuf *m = ln->la_hold;
524			if (m) {
525				struct mbuf *m0;
526
527				/*
528				 * assuming every packet in la_hold has the
529				 * same IP header
530				 */
531				m0 = m->m_nextpkt;
532				m->m_nextpkt = NULL;
533				icmp6_error2(m, ICMP6_DST_UNREACH,
534				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
535
536				ln->la_hold = m0;
537				clear_llinfo_pqueue(ln);
538			}
539			(void)nd6_free(ln, 0);
540			ln = NULL;
541		}
542		break;
543	case ND6_LLINFO_REACHABLE:
544		if (!ND6_LLINFO_PERMANENT(ln)) {
545			ln->ln_state = ND6_LLINFO_STALE;
546			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
547		}
548		break;
549
550	case ND6_LLINFO_STALE:
551		/* Garbage Collection(RFC 2461 5.3) */
552		if (!ND6_LLINFO_PERMANENT(ln)) {
553			(void)nd6_free(ln, 1);
554			ln = NULL;
555		}
556		break;
557
558	case ND6_LLINFO_DELAY:
559		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
560			/* We need NUD */
561			ln->la_asked = 1;
562			ln->ln_state = ND6_LLINFO_PROBE;
563			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
564			nd6_ns_output(ifp, dst, dst, ln, 0);
565		} else {
566			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
567			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
568		}
569		break;
570	case ND6_LLINFO_PROBE:
571		if (ln->la_asked < V_nd6_umaxtries) {
572			ln->la_asked++;
573			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
574			nd6_ns_output(ifp, dst, dst, ln, 0);
575		} else {
576			(void)nd6_free(ln, 0);
577			ln = NULL;
578		}
579		break;
580	}
581done:
582	if (ln != NULL)
583		LLE_FREE(ln);
584	CURVNET_RESTORE();
585}
586
587
588/*
589 * ND6 timer routine to expire default route list and prefix list
590 */
591void
592nd6_timer(void *arg)
593{
594	CURVNET_SET((struct vnet *) arg);
595	int s;
596	struct nd_defrouter *dr;
597	struct nd_prefix *pr;
598	struct in6_ifaddr *ia6, *nia6;
599	struct in6_addrlifetime *lt6;
600
601	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
602	    nd6_timer, curvnet);
603
604	/* expire default router list */
605	s = splnet();
606	dr = TAILQ_FIRST(&V_nd_defrouter);
607	while (dr) {
608		if (dr->expire && dr->expire < time_second) {
609			struct nd_defrouter *t;
610			t = TAILQ_NEXT(dr, dr_entry);
611			defrtrlist_del(dr);
612			dr = t;
613		} else {
614			dr = TAILQ_NEXT(dr, dr_entry);
615		}
616	}
617
618	/*
619	 * expire interface addresses.
620	 * in the past the loop was inside prefix expiry processing.
621	 * However, from a stricter speci-confrmance standpoint, we should
622	 * rather separate address lifetimes and prefix lifetimes.
623	 *
624	 * XXXRW: in6_ifaddrhead locking.
625	 */
626  addrloop:
627	TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
628		/* check address lifetime */
629		lt6 = &ia6->ia6_lifetime;
630		if (IFA6_IS_INVALID(ia6)) {
631			int regen = 0;
632
633			/*
634			 * If the expiring address is temporary, try
635			 * regenerating a new one.  This would be useful when
636			 * we suspended a laptop PC, then turned it on after a
637			 * period that could invalidate all temporary
638			 * addresses.  Although we may have to restart the
639			 * loop (see below), it must be after purging the
640			 * address.  Otherwise, we'd see an infinite loop of
641			 * regeneration.
642			 */
643			if (V_ip6_use_tempaddr &&
644			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
645				if (regen_tmpaddr(ia6) == 0)
646					regen = 1;
647			}
648
649			in6_purgeaddr(&ia6->ia_ifa);
650
651			if (regen)
652				goto addrloop; /* XXX: see below */
653		} else if (IFA6_IS_DEPRECATED(ia6)) {
654			int oldflags = ia6->ia6_flags;
655
656			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
657
658			/*
659			 * If a temporary address has just become deprecated,
660			 * regenerate a new one if possible.
661			 */
662			if (V_ip6_use_tempaddr &&
663			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
664			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
665
666				if (regen_tmpaddr(ia6) == 0) {
667					/*
668					 * A new temporary address is
669					 * generated.
670					 * XXX: this means the address chain
671					 * has changed while we are still in
672					 * the loop.  Although the change
673					 * would not cause disaster (because
674					 * it's not a deletion, but an
675					 * addition,) we'd rather restart the
676					 * loop just for safety.  Or does this
677					 * significantly reduce performance??
678					 */
679					goto addrloop;
680				}
681			}
682		} else {
683			/*
684			 * A new RA might have made a deprecated address
685			 * preferred.
686			 */
687			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
688		}
689	}
690
691	/* expire prefix list */
692	pr = V_nd_prefix.lh_first;
693	while (pr) {
694		/*
695		 * check prefix lifetime.
696		 * since pltime is just for autoconf, pltime processing for
697		 * prefix is not necessary.
698		 */
699		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
700		    time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
701			struct nd_prefix *t;
702			t = pr->ndpr_next;
703
704			/*
705			 * address expiration and prefix expiration are
706			 * separate.  NEVER perform in6_purgeaddr here.
707			 */
708
709			prelist_remove(pr);
710			pr = t;
711		} else
712			pr = pr->ndpr_next;
713	}
714	splx(s);
715	CURVNET_RESTORE();
716}
717
718/*
719 * ia6 - deprecated/invalidated temporary address
720 */
721static int
722regen_tmpaddr(struct in6_ifaddr *ia6)
723{
724	struct ifaddr *ifa;
725	struct ifnet *ifp;
726	struct in6_ifaddr *public_ifa6 = NULL;
727
728	ifp = ia6->ia_ifa.ifa_ifp;
729	IF_ADDR_LOCK(ifp);
730	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
731		struct in6_ifaddr *it6;
732
733		if (ifa->ifa_addr->sa_family != AF_INET6)
734			continue;
735
736		it6 = (struct in6_ifaddr *)ifa;
737
738		/* ignore no autoconf addresses. */
739		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
740			continue;
741
742		/* ignore autoconf addresses with different prefixes. */
743		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
744			continue;
745
746		/*
747		 * Now we are looking at an autoconf address with the same
748		 * prefix as ours.  If the address is temporary and is still
749		 * preferred, do not create another one.  It would be rare, but
750		 * could happen, for example, when we resume a laptop PC after
751		 * a long period.
752		 */
753		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
754		    !IFA6_IS_DEPRECATED(it6)) {
755			public_ifa6 = NULL;
756			break;
757		}
758
759		/*
760		 * This is a public autoconf address that has the same prefix
761		 * as ours.  If it is preferred, keep it.  We can't break the
762		 * loop here, because there may be a still-preferred temporary
763		 * address with the prefix.
764		 */
765		if (!IFA6_IS_DEPRECATED(it6))
766		    public_ifa6 = it6;
767	}
768
769	if (public_ifa6 != NULL) {
770		int e;
771
772		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
773			IF_ADDR_UNLOCK(ifp);
774			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
775			    " tmp addr,errno=%d\n", e);
776			return (-1);
777		}
778		IF_ADDR_UNLOCK(ifp);
779		return (0);
780	}
781
782	IF_ADDR_UNLOCK(ifp);
783	return (-1);
784}
785
786/*
787 * Nuke neighbor cache/prefix/default router management table, right before
788 * ifp goes away.
789 */
790void
791nd6_purge(struct ifnet *ifp)
792{
793	struct nd_defrouter *dr, *ndr;
794	struct nd_prefix *pr, *npr;
795
796	/*
797	 * Nuke default router list entries toward ifp.
798	 * We defer removal of default router list entries that is installed
799	 * in the routing table, in order to keep additional side effects as
800	 * small as possible.
801	 */
802	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
803		ndr = TAILQ_NEXT(dr, dr_entry);
804		if (dr->installed)
805			continue;
806
807		if (dr->ifp == ifp)
808			defrtrlist_del(dr);
809	}
810
811	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
812		ndr = TAILQ_NEXT(dr, dr_entry);
813		if (!dr->installed)
814			continue;
815
816		if (dr->ifp == ifp)
817			defrtrlist_del(dr);
818	}
819
820	/* Nuke prefix list entries toward ifp */
821	for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
822		npr = pr->ndpr_next;
823		if (pr->ndpr_ifp == ifp) {
824			/*
825			 * Because if_detach() does *not* release prefixes
826			 * while purging addresses the reference count will
827			 * still be above zero. We therefore reset it to
828			 * make sure that the prefix really gets purged.
829			 */
830			pr->ndpr_refcnt = 0;
831
832			/*
833			 * Previously, pr->ndpr_addr is removed as well,
834			 * but I strongly believe we don't have to do it.
835			 * nd6_purge() is only called from in6_ifdetach(),
836			 * which removes all the associated interface addresses
837			 * by itself.
838			 * (jinmei@kame.net 20010129)
839			 */
840			prelist_remove(pr);
841		}
842	}
843
844	/* cancel default outgoing interface setting */
845	if (V_nd6_defifindex == ifp->if_index)
846		nd6_setdefaultiface(0);
847
848	if (!V_ip6_forwarding && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
849		/* Refresh default router list. */
850		defrouter_select();
851	}
852
853	/* XXXXX
854	 * We do not nuke the neighbor cache entries here any more
855	 * because the neighbor cache is kept in if_afdata[AF_INET6].
856	 * nd6_purge() is invoked by in6_ifdetach() which is called
857	 * from if_detach() where everything gets purged. So let
858	 * in6_domifdetach() do the actual L2 table purging work.
859	 */
860}
861
862/*
863 * the caller acquires and releases the lock on the lltbls
864 * Returns the llentry locked
865 */
866struct llentry *
867nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
868{
869	struct sockaddr_in6 sin6;
870	struct llentry *ln;
871	int llflags = 0;
872
873	bzero(&sin6, sizeof(sin6));
874	sin6.sin6_len = sizeof(struct sockaddr_in6);
875	sin6.sin6_family = AF_INET6;
876	sin6.sin6_addr = *addr6;
877
878	IF_AFDATA_LOCK_ASSERT(ifp);
879
880	if (flags & ND6_CREATE)
881	    llflags |= LLE_CREATE;
882	if (flags & ND6_EXCLUSIVE)
883	    llflags |= LLE_EXCLUSIVE;
884
885	ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
886	if ((ln != NULL) && (flags & LLE_CREATE)) {
887		ln->ln_state = ND6_LLINFO_NOSTATE;
888		callout_init(&ln->ln_timer_ch, 0);
889	}
890
891	return (ln);
892}
893
894/*
895 * Test whether a given IPv6 address is a neighbor or not, ignoring
896 * the actual neighbor cache.  The neighbor cache is ignored in order
897 * to not reenter the routing code from within itself.
898 */
899static int
900nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
901{
902	struct nd_prefix *pr;
903	struct ifaddr *dstaddr;
904
905	/*
906	 * A link-local address is always a neighbor.
907	 * XXX: a link does not necessarily specify a single interface.
908	 */
909	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
910		struct sockaddr_in6 sin6_copy;
911		u_int32_t zone;
912
913		/*
914		 * We need sin6_copy since sa6_recoverscope() may modify the
915		 * content (XXX).
916		 */
917		sin6_copy = *addr;
918		if (sa6_recoverscope(&sin6_copy))
919			return (0); /* XXX: should be impossible */
920		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
921			return (0);
922		if (sin6_copy.sin6_scope_id == zone)
923			return (1);
924		else
925			return (0);
926	}
927
928	/*
929	 * If the address matches one of our addresses,
930	 * it should be a neighbor.
931	 * If the address matches one of our on-link prefixes, it should be a
932	 * neighbor.
933	 */
934	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
935		if (pr->ndpr_ifp != ifp)
936			continue;
937
938		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
939			struct rtentry *rt;
940			rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0);
941			if (rt == NULL)
942				continue;
943			/*
944			 * This is the case where multiple interfaces
945			 * have the same prefix, but only one is installed
946			 * into the routing table and that prefix entry
947			 * is not the one being examined here. In the case
948			 * where RADIX_MPATH is enabled, multiple route
949			 * entries (of the same rt_key value) will be
950			 * installed because the interface addresses all
951			 * differ.
952			 */
953			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
954			       &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
955				RTFREE_LOCKED(rt);
956				continue;
957			}
958			RTFREE_LOCKED(rt);
959		}
960
961		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
962		    &addr->sin6_addr, &pr->ndpr_mask))
963			return (1);
964	}
965
966	/*
967	 * If the address is assigned on the node of the other side of
968	 * a p2p interface, the address should be a neighbor.
969	 */
970	dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
971	if (dstaddr != NULL) {
972		if (dstaddr->ifa_ifp == ifp) {
973			ifa_free(dstaddr);
974			return (1);
975		}
976		ifa_free(dstaddr);
977	}
978
979	/*
980	 * If the default router list is empty, all addresses are regarded
981	 * as on-link, and thus, as a neighbor.
982	 * XXX: we restrict the condition to hosts, because routers usually do
983	 * not have the "default router list".
984	 */
985	if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
986	    V_nd6_defifindex == ifp->if_index) {
987		return (1);
988	}
989
990	return (0);
991}
992
993
994/*
995 * Detect if a given IPv6 address identifies a neighbor on a given link.
996 * XXX: should take care of the destination of a p2p link?
997 */
998int
999nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
1000{
1001	struct llentry *lle;
1002	int rc = 0;
1003
1004	IF_AFDATA_UNLOCK_ASSERT(ifp);
1005	if (nd6_is_new_addr_neighbor(addr, ifp))
1006		return (1);
1007
1008	/*
1009	 * Even if the address matches none of our addresses, it might be
1010	 * in the neighbor cache.
1011	 */
1012	IF_AFDATA_LOCK(ifp);
1013	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
1014		LLE_RUNLOCK(lle);
1015		rc = 1;
1016	}
1017	IF_AFDATA_UNLOCK(ifp);
1018	return (rc);
1019}
1020
1021/*
1022 * Free an nd6 llinfo entry.
1023 * Since the function would cause significant changes in the kernel, DO NOT
1024 * make it global, unless you have a strong reason for the change, and are sure
1025 * that the change is safe.
1026 */
1027static struct llentry *
1028nd6_free(struct llentry *ln, int gc)
1029{
1030        struct llentry *next;
1031	struct nd_defrouter *dr;
1032	struct ifnet *ifp=NULL;
1033
1034	/*
1035	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
1036	 * even though it is not harmful, it was not really necessary.
1037	 */
1038
1039	/* cancel timer */
1040	nd6_llinfo_settimer(ln, -1);
1041
1042	if (!V_ip6_forwarding) {
1043		int s;
1044		s = splnet();
1045		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
1046
1047		if (dr != NULL && dr->expire &&
1048		    ln->ln_state == ND6_LLINFO_STALE && gc) {
1049			/*
1050			 * If the reason for the deletion is just garbage
1051			 * collection, and the neighbor is an active default
1052			 * router, do not delete it.  Instead, reset the GC
1053			 * timer using the router's lifetime.
1054			 * Simply deleting the entry would affect default
1055			 * router selection, which is not necessarily a good
1056			 * thing, especially when we're using router preference
1057			 * values.
1058			 * XXX: the check for ln_state would be redundant,
1059			 *      but we intentionally keep it just in case.
1060			 */
1061			if (dr->expire > time_second)
1062				nd6_llinfo_settimer(ln,
1063				    (dr->expire - time_second) * hz);
1064			else
1065				nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
1066			splx(s);
1067			LLE_WLOCK(ln);
1068			LLE_REMREF(ln);
1069			LLE_WUNLOCK(ln);
1070			return (LIST_NEXT(ln, lle_next));
1071		}
1072
1073		if (ln->ln_router || dr) {
1074			/*
1075			 * rt6_flush must be called whether or not the neighbor
1076			 * is in the Default Router List.
1077			 * See a corresponding comment in nd6_na_input().
1078			 */
1079			rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
1080		}
1081
1082		if (dr) {
1083			/*
1084			 * Unreachablity of a router might affect the default
1085			 * router selection and on-link detection of advertised
1086			 * prefixes.
1087			 */
1088
1089			/*
1090			 * Temporarily fake the state to choose a new default
1091			 * router and to perform on-link determination of
1092			 * prefixes correctly.
1093			 * Below the state will be set correctly,
1094			 * or the entry itself will be deleted.
1095			 */
1096			ln->ln_state = ND6_LLINFO_INCOMPLETE;
1097
1098			/*
1099			 * Since defrouter_select() does not affect the
1100			 * on-link determination and MIP6 needs the check
1101			 * before the default router selection, we perform
1102			 * the check now.
1103			 */
1104			pfxlist_onlink_check();
1105
1106			/*
1107			 * refresh default router list
1108			 */
1109			defrouter_select();
1110		}
1111		splx(s);
1112	}
1113
1114	/*
1115	 * Before deleting the entry, remember the next entry as the
1116	 * return value.  We need this because pfxlist_onlink_check() above
1117	 * might have freed other entries (particularly the old next entry) as
1118	 * a side effect (XXX).
1119	 */
1120	next = LIST_NEXT(ln, lle_next);
1121
1122	ifp = ln->lle_tbl->llt_ifp;
1123	IF_AFDATA_LOCK(ifp);
1124	LLE_WLOCK(ln);
1125	llentry_free(ln);
1126	IF_AFDATA_UNLOCK(ifp);
1127
1128	return (next);
1129}
1130
1131/*
1132 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1133 *
1134 * XXX cost-effective methods?
1135 */
1136void
1137nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
1138{
1139	struct llentry *ln;
1140	struct ifnet *ifp;
1141
1142	if ((dst6 == NULL) || (rt == NULL))
1143		return;
1144
1145	ifp = rt->rt_ifp;
1146	IF_AFDATA_LOCK(ifp);
1147	ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
1148	IF_AFDATA_UNLOCK(ifp);
1149	if (ln == NULL)
1150		return;
1151
1152	if (ln->ln_state < ND6_LLINFO_REACHABLE)
1153		goto done;
1154
1155	/*
1156	 * if we get upper-layer reachability confirmation many times,
1157	 * it is possible we have false information.
1158	 */
1159	if (!force) {
1160		ln->ln_byhint++;
1161		if (ln->ln_byhint > V_nd6_maxnudhint) {
1162			goto done;
1163		}
1164	}
1165
1166 	ln->ln_state = ND6_LLINFO_REACHABLE;
1167	if (!ND6_LLINFO_PERMANENT(ln)) {
1168		nd6_llinfo_settimer(ln,
1169		    (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
1170	}
1171done:
1172	LLE_WUNLOCK(ln);
1173}
1174
1175
1176int
1177nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
1178{
1179	struct in6_drlist *drl = (struct in6_drlist *)data;
1180	struct in6_oprlist *oprl = (struct in6_oprlist *)data;
1181	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1182	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1183	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1184	struct nd_defrouter *dr;
1185	struct nd_prefix *pr;
1186	int i = 0, error = 0;
1187	int s;
1188
1189	switch (cmd) {
1190	case SIOCGDRLST_IN6:
1191		/*
1192		 * obsolete API, use sysctl under net.inet6.icmp6
1193		 */
1194		bzero(drl, sizeof(*drl));
1195		s = splnet();
1196		dr = TAILQ_FIRST(&V_nd_defrouter);
1197		while (dr && i < DRLSTSIZ) {
1198			drl->defrouter[i].rtaddr = dr->rtaddr;
1199			in6_clearscope(&drl->defrouter[i].rtaddr);
1200
1201			drl->defrouter[i].flags = dr->flags;
1202			drl->defrouter[i].rtlifetime = dr->rtlifetime;
1203			drl->defrouter[i].expire = dr->expire;
1204			drl->defrouter[i].if_index = dr->ifp->if_index;
1205			i++;
1206			dr = TAILQ_NEXT(dr, dr_entry);
1207		}
1208		splx(s);
1209		break;
1210	case SIOCGPRLST_IN6:
1211		/*
1212		 * obsolete API, use sysctl under net.inet6.icmp6
1213		 *
1214		 * XXX the structure in6_prlist was changed in backward-
1215		 * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
1216		 * in6_prlist is used for nd6_sysctl() - fill_prlist().
1217		 */
1218		/*
1219		 * XXX meaning of fields, especialy "raflags", is very
1220		 * differnet between RA prefix list and RR/static prefix list.
1221		 * how about separating ioctls into two?
1222		 */
1223		bzero(oprl, sizeof(*oprl));
1224		s = splnet();
1225		pr = V_nd_prefix.lh_first;
1226		while (pr && i < PRLSTSIZ) {
1227			struct nd_pfxrouter *pfr;
1228			int j;
1229
1230			oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
1231			oprl->prefix[i].raflags = pr->ndpr_raf;
1232			oprl->prefix[i].prefixlen = pr->ndpr_plen;
1233			oprl->prefix[i].vltime = pr->ndpr_vltime;
1234			oprl->prefix[i].pltime = pr->ndpr_pltime;
1235			oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
1236			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
1237				oprl->prefix[i].expire = 0;
1238			else {
1239				time_t maxexpire;
1240
1241				/* XXX: we assume time_t is signed. */
1242				maxexpire = (-1) &
1243				    ~((time_t)1 <<
1244				    ((sizeof(maxexpire) * 8) - 1));
1245				if (pr->ndpr_vltime <
1246				    maxexpire - pr->ndpr_lastupdate) {
1247					oprl->prefix[i].expire =
1248					    pr->ndpr_lastupdate +
1249					    pr->ndpr_vltime;
1250				} else
1251					oprl->prefix[i].expire = maxexpire;
1252			}
1253
1254			pfr = pr->ndpr_advrtrs.lh_first;
1255			j = 0;
1256			while (pfr) {
1257				if (j < DRLSTSIZ) {
1258#define RTRADDR oprl->prefix[i].advrtr[j]
1259					RTRADDR = pfr->router->rtaddr;
1260					in6_clearscope(&RTRADDR);
1261#undef RTRADDR
1262				}
1263				j++;
1264				pfr = pfr->pfr_next;
1265			}
1266			oprl->prefix[i].advrtrs = j;
1267			oprl->prefix[i].origin = PR_ORIG_RA;
1268
1269			i++;
1270			pr = pr->ndpr_next;
1271		}
1272		splx(s);
1273
1274		break;
1275	case OSIOCGIFINFO_IN6:
1276#define ND	ndi->ndi
1277		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
1278		bzero(&ND, sizeof(ND));
1279		ND.linkmtu = IN6_LINKMTU(ifp);
1280		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
1281		ND.basereachable = ND_IFINFO(ifp)->basereachable;
1282		ND.reachable = ND_IFINFO(ifp)->reachable;
1283		ND.retrans = ND_IFINFO(ifp)->retrans;
1284		ND.flags = ND_IFINFO(ifp)->flags;
1285		ND.recalctm = ND_IFINFO(ifp)->recalctm;
1286		ND.chlim = ND_IFINFO(ifp)->chlim;
1287		break;
1288	case SIOCGIFINFO_IN6:
1289		ND = *ND_IFINFO(ifp);
1290		break;
1291	case SIOCSIFINFO_IN6:
1292		/*
1293		 * used to change host variables from userland.
1294		 * intented for a use on router to reflect RA configurations.
1295		 */
1296		/* 0 means 'unspecified' */
1297		if (ND.linkmtu != 0) {
1298			if (ND.linkmtu < IPV6_MMTU ||
1299			    ND.linkmtu > IN6_LINKMTU(ifp)) {
1300				error = EINVAL;
1301				break;
1302			}
1303			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
1304		}
1305
1306		if (ND.basereachable != 0) {
1307			int obasereachable = ND_IFINFO(ifp)->basereachable;
1308
1309			ND_IFINFO(ifp)->basereachable = ND.basereachable;
1310			if (ND.basereachable != obasereachable)
1311				ND_IFINFO(ifp)->reachable =
1312				    ND_COMPUTE_RTIME(ND.basereachable);
1313		}
1314		if (ND.retrans != 0)
1315			ND_IFINFO(ifp)->retrans = ND.retrans;
1316		if (ND.chlim != 0)
1317			ND_IFINFO(ifp)->chlim = ND.chlim;
1318		/* FALLTHROUGH */
1319	case SIOCSIFINFO_FLAGS:
1320	{
1321		struct ifaddr *ifa;
1322		struct in6_ifaddr *ia;
1323
1324		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1325		    !(ND.flags & ND6_IFF_IFDISABLED)) {
1326			/* ifdisabled 1->0 transision */
1327
1328			/*
1329			 * If the interface is marked as ND6_IFF_IFDISABLED and
1330			 * has an link-local address with IN6_IFF_DUPLICATED,
1331			 * do not clear ND6_IFF_IFDISABLED.
1332			 * See RFC 4862, Section 5.4.5.
1333			 */
1334			int duplicated_linklocal = 0;
1335
1336			IF_ADDR_LOCK(ifp);
1337			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1338				if (ifa->ifa_addr->sa_family != AF_INET6)
1339					continue;
1340				ia = (struct in6_ifaddr *)ifa;
1341				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
1342				    IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
1343					duplicated_linklocal = 1;
1344					break;
1345				}
1346			}
1347			IF_ADDR_UNLOCK(ifp);
1348
1349			if (duplicated_linklocal) {
1350				ND.flags |= ND6_IFF_IFDISABLED;
1351				log(LOG_ERR, "Cannot enable an interface"
1352				    " with a link-local address marked"
1353				    " duplicate.\n");
1354			} else {
1355				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
1356				in6_if_up(ifp);
1357			}
1358		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1359			    (ND.flags & ND6_IFF_IFDISABLED)) {
1360			/* ifdisabled 0->1 transision */
1361			/* Mark all IPv6 address as tentative. */
1362
1363			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
1364			IF_ADDR_LOCK(ifp);
1365			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1366				if (ifa->ifa_addr->sa_family != AF_INET6)
1367					continue;
1368				ia = (struct in6_ifaddr *)ifa;
1369				ia->ia6_flags |= IN6_IFF_TENTATIVE;
1370			}
1371			IF_ADDR_UNLOCK(ifp);
1372		}
1373
1374		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) &&
1375		    (ND.flags & ND6_IFF_AUTO_LINKLOCAL)) {
1376			/* auto_linklocal 0->1 transision */
1377
1378			/* If no link-local address on ifp, configure */
1379			ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
1380			in6_ifattach(ifp, NULL);
1381		}
1382	}
1383		ND_IFINFO(ifp)->flags = ND.flags;
1384		break;
1385#undef ND
1386	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1387		/* sync kernel routing table with the default router list */
1388		defrouter_reset();
1389		defrouter_select();
1390		break;
1391	case SIOCSPFXFLUSH_IN6:
1392	{
1393		/* flush all the prefix advertised by routers */
1394		struct nd_prefix *pr, *next;
1395
1396		s = splnet();
1397		for (pr = V_nd_prefix.lh_first; pr; pr = next) {
1398			struct in6_ifaddr *ia, *ia_next;
1399
1400			next = pr->ndpr_next;
1401
1402			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1403				continue; /* XXX */
1404
1405			/* do we really have to remove addresses as well? */
1406			/* XXXRW: in6_ifaddrhead locking. */
1407			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
1408			    ia_next) {
1409				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1410					continue;
1411
1412				if (ia->ia6_ndpr == pr)
1413					in6_purgeaddr(&ia->ia_ifa);
1414			}
1415			prelist_remove(pr);
1416		}
1417		splx(s);
1418		break;
1419	}
1420	case SIOCSRTRFLUSH_IN6:
1421	{
1422		/* flush all the default routers */
1423		struct nd_defrouter *dr, *next;
1424
1425		s = splnet();
1426		defrouter_reset();
1427		for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
1428			next = TAILQ_NEXT(dr, dr_entry);
1429			defrtrlist_del(dr);
1430		}
1431		defrouter_select();
1432		splx(s);
1433		break;
1434	}
1435	case SIOCGNBRINFO_IN6:
1436	{
1437		struct llentry *ln;
1438		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1439
1440		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
1441			return (error);
1442
1443		IF_AFDATA_LOCK(ifp);
1444		ln = nd6_lookup(&nb_addr, 0, ifp);
1445		IF_AFDATA_UNLOCK(ifp);
1446
1447		if (ln == NULL) {
1448			error = EINVAL;
1449			break;
1450		}
1451		nbi->state = ln->ln_state;
1452		nbi->asked = ln->la_asked;
1453		nbi->isrouter = ln->ln_router;
1454		nbi->expire = ln->la_expire;
1455		LLE_RUNLOCK(ln);
1456		break;
1457	}
1458	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1459		ndif->ifindex = V_nd6_defifindex;
1460		break;
1461	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1462		return (nd6_setdefaultiface(ndif->ifindex));
1463	}
1464	return (error);
1465}
1466
1467/*
1468 * Create neighbor cache entry and cache link-layer address,
1469 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1470 *
1471 * type - ICMP6 type
1472 * code - type dependent information
1473 *
1474 * XXXXX
1475 *  The caller of this function already acquired the ndp
1476 *  cache table lock because the cache entry is returned.
1477 */
1478struct llentry *
1479nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
1480    int lladdrlen, int type, int code)
1481{
1482	struct llentry *ln = NULL;
1483	int is_newentry;
1484	int do_update;
1485	int olladdr;
1486	int llchange;
1487	int flags = 0;
1488	int newstate = 0;
1489	uint16_t router = 0;
1490	struct sockaddr_in6 sin6;
1491	struct mbuf *chain = NULL;
1492	int static_route = 0;
1493
1494	IF_AFDATA_UNLOCK_ASSERT(ifp);
1495
1496	if (ifp == NULL)
1497		panic("ifp == NULL in nd6_cache_lladdr");
1498	if (from == NULL)
1499		panic("from == NULL in nd6_cache_lladdr");
1500
1501	/* nothing must be updated for unspecified address */
1502	if (IN6_IS_ADDR_UNSPECIFIED(from))
1503		return NULL;
1504
1505	/*
1506	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1507	 * the caller.
1508	 *
1509	 * XXX If the link does not have link-layer adderss, what should
1510	 * we do? (ifp->if_addrlen == 0)
1511	 * Spec says nothing in sections for RA, RS and NA.  There's small
1512	 * description on it in NS section (RFC 2461 7.2.3).
1513	 */
1514	flags |= lladdr ? ND6_EXCLUSIVE : 0;
1515	IF_AFDATA_LOCK(ifp);
1516	ln = nd6_lookup(from, flags, ifp);
1517
1518	if (ln == NULL) {
1519		flags |= LLE_EXCLUSIVE;
1520		ln = nd6_lookup(from, flags |ND6_CREATE, ifp);
1521		IF_AFDATA_UNLOCK(ifp);
1522		is_newentry = 1;
1523	} else {
1524		IF_AFDATA_UNLOCK(ifp);
1525		/* do nothing if static ndp is set */
1526		if (ln->la_flags & LLE_STATIC) {
1527			static_route = 1;
1528			goto done;
1529		}
1530		is_newentry = 0;
1531	}
1532	if (ln == NULL)
1533		return (NULL);
1534
1535	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
1536	if (olladdr && lladdr) {
1537		llchange = bcmp(lladdr, &ln->ll_addr,
1538		    ifp->if_addrlen);
1539	} else
1540		llchange = 0;
1541
1542	/*
1543	 * newentry olladdr  lladdr  llchange	(*=record)
1544	 *	0	n	n	--	(1)
1545	 *	0	y	n	--	(2)
1546	 *	0	n	y	--	(3) * STALE
1547	 *	0	y	y	n	(4) *
1548	 *	0	y	y	y	(5) * STALE
1549	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
1550	 *	1	--	y	--	(7) * STALE
1551	 */
1552
1553	if (lladdr) {		/* (3-5) and (7) */
1554		/*
1555		 * Record source link-layer address
1556		 * XXX is it dependent to ifp->if_type?
1557		 */
1558		bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
1559		ln->la_flags |= LLE_VALID;
1560	}
1561
1562	if (!is_newentry) {
1563		if ((!olladdr && lladdr != NULL) ||	/* (3) */
1564		    (olladdr && lladdr != NULL && llchange)) {	/* (5) */
1565			do_update = 1;
1566			newstate = ND6_LLINFO_STALE;
1567		} else					/* (1-2,4) */
1568			do_update = 0;
1569	} else {
1570		do_update = 1;
1571		if (lladdr == NULL)			/* (6) */
1572			newstate = ND6_LLINFO_NOSTATE;
1573		else					/* (7) */
1574			newstate = ND6_LLINFO_STALE;
1575	}
1576
1577	if (do_update) {
1578		/*
1579		 * Update the state of the neighbor cache.
1580		 */
1581		ln->ln_state = newstate;
1582
1583		if (ln->ln_state == ND6_LLINFO_STALE) {
1584			/*
1585			 * XXX: since nd6_output() below will cause
1586			 * state tansition to DELAY and reset the timer,
1587			 * we must set the timer now, although it is actually
1588			 * meaningless.
1589			 */
1590			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
1591
1592			if (ln->la_hold) {
1593				struct mbuf *m_hold, *m_hold_next;
1594
1595				/*
1596				 * reset the la_hold in advance, to explicitly
1597				 * prevent a la_hold lookup in nd6_output()
1598				 * (wouldn't happen, though...)
1599				 */
1600				for (m_hold = ln->la_hold, ln->la_hold = NULL;
1601				    m_hold; m_hold = m_hold_next) {
1602					m_hold_next = m_hold->m_nextpkt;
1603					m_hold->m_nextpkt = NULL;
1604
1605					/*
1606					 * we assume ifp is not a p2p here, so
1607					 * just set the 2nd argument as the
1608					 * 1st one.
1609					 */
1610					nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
1611				}
1612				/*
1613				 * If we have mbufs in the chain we need to do
1614				 * deferred transmit. Copy the address from the
1615				 * llentry before dropping the lock down below.
1616				 */
1617				if (chain != NULL)
1618					memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
1619			}
1620		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
1621			/* probe right away */
1622			nd6_llinfo_settimer_locked((void *)ln, 0);
1623		}
1624	}
1625
1626	/*
1627	 * ICMP6 type dependent behavior.
1628	 *
1629	 * NS: clear IsRouter if new entry
1630	 * RS: clear IsRouter
1631	 * RA: set IsRouter if there's lladdr
1632	 * redir: clear IsRouter if new entry
1633	 *
1634	 * RA case, (1):
1635	 * The spec says that we must set IsRouter in the following cases:
1636	 * - If lladdr exist, set IsRouter.  This means (1-5).
1637	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1638	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1639	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1640	 * neighbor cache, this is similar to (6).
1641	 * This case is rare but we figured that we MUST NOT set IsRouter.
1642	 *
1643	 * newentry olladdr  lladdr  llchange	    NS  RS  RA	redir
1644	 *							D R
1645	 *	0	n	n	--	(1)	c   ?     s
1646	 *	0	y	n	--	(2)	c   s     s
1647	 *	0	n	y	--	(3)	c   s     s
1648	 *	0	y	y	n	(4)	c   s     s
1649	 *	0	y	y	y	(5)	c   s     s
1650	 *	1	--	n	--	(6) c	c	c s
1651	 *	1	--	y	--	(7) c	c   s	c s
1652	 *
1653	 *					(c=clear s=set)
1654	 */
1655	switch (type & 0xff) {
1656	case ND_NEIGHBOR_SOLICIT:
1657		/*
1658		 * New entry must have is_router flag cleared.
1659		 */
1660		if (is_newentry)	/* (6-7) */
1661			ln->ln_router = 0;
1662		break;
1663	case ND_REDIRECT:
1664		/*
1665		 * If the icmp is a redirect to a better router, always set the
1666		 * is_router flag.  Otherwise, if the entry is newly created,
1667		 * clear the flag.  [RFC 2461, sec 8.3]
1668		 */
1669		if (code == ND_REDIRECT_ROUTER)
1670			ln->ln_router = 1;
1671		else if (is_newentry) /* (6-7) */
1672			ln->ln_router = 0;
1673		break;
1674	case ND_ROUTER_SOLICIT:
1675		/*
1676		 * is_router flag must always be cleared.
1677		 */
1678		ln->ln_router = 0;
1679		break;
1680	case ND_ROUTER_ADVERT:
1681		/*
1682		 * Mark an entry with lladdr as a router.
1683		 */
1684		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
1685		    (is_newentry && lladdr)) {			/* (7) */
1686			ln->ln_router = 1;
1687		}
1688		break;
1689	}
1690
1691	if (ln != NULL) {
1692		static_route = (ln->la_flags & LLE_STATIC);
1693		router = ln->ln_router;
1694
1695		if (flags & ND6_EXCLUSIVE)
1696			LLE_WUNLOCK(ln);
1697		else
1698			LLE_RUNLOCK(ln);
1699		if (static_route)
1700			ln = NULL;
1701	}
1702	if (chain)
1703		nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
1704
1705	/*
1706	 * When the link-layer address of a router changes, select the
1707	 * best router again.  In particular, when the neighbor entry is newly
1708	 * created, it might affect the selection policy.
1709	 * Question: can we restrict the first condition to the "is_newentry"
1710	 * case?
1711	 * XXX: when we hear an RA from a new router with the link-layer
1712	 * address option, defrouter_select() is called twice, since
1713	 * defrtrlist_update called the function as well.  However, I believe
1714	 * we can compromise the overhead, since it only happens the first
1715	 * time.
1716	 * XXX: although defrouter_select() should not have a bad effect
1717	 * for those are not autoconfigured hosts, we explicitly avoid such
1718	 * cases for safety.
1719	 */
1720	if (do_update && router && !V_ip6_forwarding &&
1721	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
1722		/*
1723		 * guaranteed recursion
1724		 */
1725		defrouter_select();
1726	}
1727
1728	return (ln);
1729done:
1730	if (ln != NULL) {
1731		if (flags & ND6_EXCLUSIVE)
1732			LLE_WUNLOCK(ln);
1733		else
1734			LLE_RUNLOCK(ln);
1735		if (static_route)
1736			ln = NULL;
1737	}
1738	return (ln);
1739}
1740
1741static void
1742nd6_slowtimo(void *arg)
1743{
1744	CURVNET_SET((struct vnet *) arg);
1745	struct nd_ifinfo *nd6if;
1746	struct ifnet *ifp;
1747
1748	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1749	    nd6_slowtimo, curvnet);
1750	IFNET_RLOCK_NOSLEEP();
1751	for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
1752	    ifp = TAILQ_NEXT(ifp, if_list)) {
1753		nd6if = ND_IFINFO(ifp);
1754		if (nd6if->basereachable && /* already initialized */
1755		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1756			/*
1757			 * Since reachable time rarely changes by router
1758			 * advertisements, we SHOULD insure that a new random
1759			 * value gets recomputed at least once every few hours.
1760			 * (RFC 2461, 6.3.4)
1761			 */
1762			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
1763			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
1764		}
1765	}
1766	IFNET_RUNLOCK_NOSLEEP();
1767	CURVNET_RESTORE();
1768}
1769
1770int
1771nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
1772    struct sockaddr_in6 *dst, struct rtentry *rt0)
1773{
1774
1775	return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
1776}
1777
1778
1779/*
1780 * Note that I'm not enforcing any global serialization
1781 * lle state or asked changes here as the logic is too
1782 * complicated to avoid having to always acquire an exclusive
1783 * lock
1784 * KMM
1785 *
1786 */
1787#define senderr(e) { error = (e); goto bad;}
1788
1789int
1790nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
1791    struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
1792	struct mbuf **chain)
1793{
1794	struct mbuf *m = m0;
1795	struct llentry *ln = lle;
1796	int error = 0;
1797	int flags = 0;
1798
1799#ifdef INVARIANTS
1800	if (lle != NULL) {
1801
1802		LLE_WLOCK_ASSERT(lle);
1803
1804		KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
1805	}
1806#endif
1807	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
1808		goto sendpkt;
1809
1810	if (nd6_need_cache(ifp) == 0)
1811		goto sendpkt;
1812
1813	/*
1814	 * next hop determination.  This routine is derived from ether_output.
1815	 */
1816
1817	/*
1818	 * Address resolution or Neighbor Unreachability Detection
1819	 * for the next hop.
1820	 * At this point, the destination of the packet must be a unicast
1821	 * or an anycast address(i.e. not a multicast).
1822	 */
1823
1824	flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
1825	if (ln == NULL) {
1826	retry:
1827		IF_AFDATA_LOCK(ifp);
1828		ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
1829		IF_AFDATA_UNLOCK(ifp);
1830		if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
1831			/*
1832			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
1833			 * the condition below is not very efficient.  But we believe
1834			 * it is tolerable, because this should be a rare case.
1835			 */
1836			flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
1837			IF_AFDATA_LOCK(ifp);
1838			ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
1839			IF_AFDATA_UNLOCK(ifp);
1840		}
1841	}
1842	if (ln == NULL) {
1843		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
1844		    !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
1845			char ip6buf[INET6_ADDRSTRLEN];
1846			log(LOG_DEBUG,
1847			    "nd6_output: can't allocate llinfo for %s "
1848			    "(ln=%p)\n",
1849			    ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
1850			senderr(EIO);	/* XXX: good error? */
1851		}
1852		goto sendpkt;	/* send anyway */
1853	}
1854
1855	/* We don't have to do link-layer address resolution on a p2p link. */
1856	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
1857	    ln->ln_state < ND6_LLINFO_REACHABLE) {
1858		if ((flags & LLE_EXCLUSIVE) == 0) {
1859			flags |= LLE_EXCLUSIVE;
1860			goto retry;
1861		}
1862		ln->ln_state = ND6_LLINFO_STALE;
1863		nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
1864	}
1865
1866	/*
1867	 * The first time we send a packet to a neighbor whose entry is
1868	 * STALE, we have to change the state to DELAY and a sets a timer to
1869	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
1870	 * neighbor unreachability detection on expiration.
1871	 * (RFC 2461 7.3.3)
1872	 */
1873	if (ln->ln_state == ND6_LLINFO_STALE) {
1874		if ((flags & LLE_EXCLUSIVE) == 0) {
1875			flags |= LLE_EXCLUSIVE;
1876			LLE_RUNLOCK(ln);
1877			goto retry;
1878		}
1879		ln->la_asked = 0;
1880		ln->ln_state = ND6_LLINFO_DELAY;
1881		nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
1882	}
1883
1884	/*
1885	 * If the neighbor cache entry has a state other than INCOMPLETE
1886	 * (i.e. its link-layer address is already resolved), just
1887	 * send the packet.
1888	 */
1889	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
1890		goto sendpkt;
1891
1892	/*
1893	 * There is a neighbor cache entry, but no ethernet address
1894	 * response yet.  Append this latest packet to the end of the
1895	 * packet queue in the mbuf, unless the number of the packet
1896	 * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
1897	 * the oldest packet in the queue will be removed.
1898	 */
1899	if (ln->ln_state == ND6_LLINFO_NOSTATE)
1900		ln->ln_state = ND6_LLINFO_INCOMPLETE;
1901
1902	if ((flags & LLE_EXCLUSIVE) == 0) {
1903		flags |= LLE_EXCLUSIVE;
1904		LLE_RUNLOCK(ln);
1905		goto retry;
1906	}
1907	if (ln->la_hold) {
1908		struct mbuf *m_hold;
1909		int i;
1910
1911		i = 0;
1912		for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
1913			i++;
1914			if (m_hold->m_nextpkt == NULL) {
1915				m_hold->m_nextpkt = m;
1916				break;
1917			}
1918		}
1919		while (i >= V_nd6_maxqueuelen) {
1920			m_hold = ln->la_hold;
1921			ln->la_hold = ln->la_hold->m_nextpkt;
1922			m_freem(m_hold);
1923			i--;
1924		}
1925	} else {
1926		ln->la_hold = m;
1927	}
1928	/*
1929	 * We did the lookup (no lle arg) so we
1930	 * need to do the unlock here
1931	 */
1932	if (lle == NULL) {
1933		if (flags & LLE_EXCLUSIVE)
1934			LLE_WUNLOCK(ln);
1935		else
1936			LLE_RUNLOCK(ln);
1937	}
1938
1939	/*
1940	 * If there has been no NS for the neighbor after entering the
1941	 * INCOMPLETE state, send the first solicitation.
1942	 */
1943	if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
1944		ln->la_asked++;
1945
1946		nd6_llinfo_settimer(ln,
1947		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
1948		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
1949	}
1950	return (0);
1951
1952  sendpkt:
1953	/* discard the packet if IPv6 operation is disabled on the interface */
1954	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
1955		error = ENETDOWN; /* better error? */
1956		goto bad;
1957	}
1958	/*
1959	 * ln is valid and the caller did not pass in
1960	 * an llentry
1961	 */
1962	if ((ln != NULL) && (lle == NULL)) {
1963		if (flags & LLE_EXCLUSIVE)
1964			LLE_WUNLOCK(ln);
1965		else
1966			LLE_RUNLOCK(ln);
1967	}
1968
1969#ifdef MAC
1970	mac_netinet6_nd6_send(ifp, m);
1971#endif
1972	/*
1973	 * We were passed in a pointer to an lle with the lock held
1974	 * this means that we can't call if_output as we will
1975	 * recurse on the lle lock - so what we do is we create
1976	 * a list of mbufs to send and transmit them in the caller
1977	 * after the lock is dropped
1978	 */
1979	if (lle != NULL) {
1980		if (*chain == NULL)
1981			*chain = m;
1982		else {
1983			struct mbuf *m = *chain;
1984
1985			/*
1986			 * append mbuf to end of deferred chain
1987			 */
1988			while (m->m_nextpkt != NULL)
1989				m = m->m_nextpkt;
1990			m->m_nextpkt = m;
1991		}
1992		return (error);
1993	}
1994	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
1995		return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
1996		    NULL));
1997	}
1998	error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
1999	return (error);
2000
2001  bad:
2002	/*
2003	 * ln is valid and the caller did not pass in
2004	 * an llentry
2005	 */
2006	if ((ln != NULL) && (lle == NULL)) {
2007		if (flags & LLE_EXCLUSIVE)
2008			LLE_WUNLOCK(ln);
2009		else
2010			LLE_RUNLOCK(ln);
2011	}
2012	if (m)
2013		m_freem(m);
2014	return (error);
2015}
2016#undef senderr
2017
2018
2019int
2020nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
2021    struct sockaddr_in6 *dst, struct route *ro)
2022{
2023	struct mbuf *m, *m_head;
2024	struct ifnet *outifp;
2025	int error = 0;
2026
2027	m_head = chain;
2028	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
2029		outifp = origifp;
2030	else
2031		outifp = ifp;
2032
2033	while (m_head) {
2034		m = m_head;
2035		m_head = m_head->m_nextpkt;
2036		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);
2037	}
2038
2039	/*
2040	 * XXX
2041	 * note that intermediate errors are blindly ignored - but this is
2042	 * the same convention as used with nd6_output when called by
2043	 * nd6_cache_lladdr
2044	 */
2045	return (error);
2046}
2047
2048
2049int
2050nd6_need_cache(struct ifnet *ifp)
2051{
2052	/*
2053	 * XXX: we currently do not make neighbor cache on any interface
2054	 * other than ARCnet, Ethernet, FDDI and GIF.
2055	 *
2056	 * RFC2893 says:
2057	 * - unidirectional tunnels needs no ND
2058	 */
2059	switch (ifp->if_type) {
2060	case IFT_ARCNET:
2061	case IFT_ETHER:
2062	case IFT_FDDI:
2063	case IFT_IEEE1394:
2064#ifdef IFT_L2VLAN
2065	case IFT_L2VLAN:
2066#endif
2067#ifdef IFT_IEEE80211
2068	case IFT_IEEE80211:
2069#endif
2070#ifdef IFT_CARP
2071	case IFT_CARP:
2072#endif
2073	case IFT_GIF:		/* XXX need more cases? */
2074	case IFT_PPP:
2075	case IFT_TUNNEL:
2076	case IFT_BRIDGE:
2077	case IFT_PROPVIRTUAL:
2078		return (1);
2079	default:
2080		return (0);
2081	}
2082}
2083
2084/*
2085 * the callers of this function need to be re-worked to drop
2086 * the lle lock, drop here for now
2087 */
2088int
2089nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
2090    struct sockaddr *dst, u_char *desten, struct llentry **lle)
2091{
2092	struct llentry *ln;
2093
2094	*lle = NULL;
2095	IF_AFDATA_UNLOCK_ASSERT(ifp);
2096	if (m->m_flags & M_MCAST) {
2097		int i;
2098
2099		switch (ifp->if_type) {
2100		case IFT_ETHER:
2101		case IFT_FDDI:
2102#ifdef IFT_L2VLAN
2103		case IFT_L2VLAN:
2104#endif
2105#ifdef IFT_IEEE80211
2106		case IFT_IEEE80211:
2107#endif
2108		case IFT_BRIDGE:
2109		case IFT_ISO88025:
2110			ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2111						 desten);
2112			return (0);
2113		case IFT_IEEE1394:
2114			/*
2115			 * netbsd can use if_broadcastaddr, but we don't do so
2116			 * to reduce # of ifdef.
2117			 */
2118			for (i = 0; i < ifp->if_addrlen; i++)
2119				desten[i] = ~0;
2120			return (0);
2121		case IFT_ARCNET:
2122			*desten = 0;
2123			return (0);
2124		default:
2125			m_freem(m);
2126			return (EAFNOSUPPORT);
2127		}
2128	}
2129
2130
2131	/*
2132	 * the entry should have been created in nd6_store_lladdr
2133	 */
2134	IF_AFDATA_LOCK(ifp);
2135	ln = lla_lookup(LLTABLE6(ifp), 0, dst);
2136	IF_AFDATA_UNLOCK(ifp);
2137	if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
2138		if (ln != NULL)
2139			LLE_RUNLOCK(ln);
2140		/* this could happen, if we could not allocate memory */
2141		m_freem(m);
2142		return (1);
2143	}
2144
2145	bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
2146	*lle = ln;
2147	LLE_RUNLOCK(ln);
2148	/*
2149	 * A *small* use after free race exists here
2150	 */
2151	return (0);
2152}
2153
2154static void
2155clear_llinfo_pqueue(struct llentry *ln)
2156{
2157	struct mbuf *m_hold, *m_hold_next;
2158
2159	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
2160		m_hold_next = m_hold->m_nextpkt;
2161		m_hold->m_nextpkt = NULL;
2162		m_freem(m_hold);
2163	}
2164
2165	ln->la_hold = NULL;
2166	return;
2167}
2168
2169static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2170static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2171#ifdef SYSCTL_DECL
2172SYSCTL_DECL(_net_inet6_icmp6);
2173#endif
2174SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2175	CTLFLAG_RD, nd6_sysctl_drlist, "");
2176SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2177	CTLFLAG_RD, nd6_sysctl_prlist, "");
2178SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
2179	CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
2180
2181static int
2182nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2183{
2184	int error;
2185	char buf[1024] __aligned(4);
2186	struct in6_defrouter *d, *de;
2187	struct nd_defrouter *dr;
2188
2189	if (req->newptr)
2190		return EPERM;
2191	error = 0;
2192
2193	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
2194	     dr = TAILQ_NEXT(dr, dr_entry)) {
2195		d = (struct in6_defrouter *)buf;
2196		de = (struct in6_defrouter *)(buf + sizeof(buf));
2197
2198		if (d + 1 <= de) {
2199			bzero(d, sizeof(*d));
2200			d->rtaddr.sin6_family = AF_INET6;
2201			d->rtaddr.sin6_len = sizeof(d->rtaddr);
2202			d->rtaddr.sin6_addr = dr->rtaddr;
2203			error = sa6_recoverscope(&d->rtaddr);
2204			if (error != 0)
2205				return (error);
2206			d->flags = dr->flags;
2207			d->rtlifetime = dr->rtlifetime;
2208			d->expire = dr->expire;
2209			d->if_index = dr->ifp->if_index;
2210		} else
2211			panic("buffer too short");
2212
2213		error = SYSCTL_OUT(req, buf, sizeof(*d));
2214		if (error)
2215			break;
2216	}
2217
2218	return (error);
2219}
2220
2221static int
2222nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2223{
2224	int error;
2225	char buf[1024] __aligned(4);
2226	struct in6_prefix *p, *pe;
2227	struct nd_prefix *pr;
2228	char ip6buf[INET6_ADDRSTRLEN];
2229
2230	if (req->newptr)
2231		return EPERM;
2232	error = 0;
2233
2234	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2235		u_short advrtrs;
2236		size_t advance;
2237		struct sockaddr_in6 *sin6, *s6;
2238		struct nd_pfxrouter *pfr;
2239
2240		p = (struct in6_prefix *)buf;
2241		pe = (struct in6_prefix *)(buf + sizeof(buf));
2242
2243		if (p + 1 <= pe) {
2244			bzero(p, sizeof(*p));
2245			sin6 = (struct sockaddr_in6 *)(p + 1);
2246
2247			p->prefix = pr->ndpr_prefix;
2248			if (sa6_recoverscope(&p->prefix)) {
2249				log(LOG_ERR,
2250				    "scope error in prefix list (%s)\n",
2251				    ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
2252				/* XXX: press on... */
2253			}
2254			p->raflags = pr->ndpr_raf;
2255			p->prefixlen = pr->ndpr_plen;
2256			p->vltime = pr->ndpr_vltime;
2257			p->pltime = pr->ndpr_pltime;
2258			p->if_index = pr->ndpr_ifp->if_index;
2259			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2260				p->expire = 0;
2261			else {
2262				time_t maxexpire;
2263
2264				/* XXX: we assume time_t is signed. */
2265				maxexpire = (-1) &
2266				    ~((time_t)1 <<
2267				    ((sizeof(maxexpire) * 8) - 1));
2268				if (pr->ndpr_vltime <
2269				    maxexpire - pr->ndpr_lastupdate) {
2270				    p->expire = pr->ndpr_lastupdate +
2271				        pr->ndpr_vltime;
2272				} else
2273					p->expire = maxexpire;
2274			}
2275			p->refcnt = pr->ndpr_refcnt;
2276			p->flags = pr->ndpr_stateflags;
2277			p->origin = PR_ORIG_RA;
2278			advrtrs = 0;
2279			for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
2280			     pfr = pfr->pfr_next) {
2281				if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
2282					advrtrs++;
2283					continue;
2284				}
2285				s6 = &sin6[advrtrs];
2286				bzero(s6, sizeof(*s6));
2287				s6->sin6_family = AF_INET6;
2288				s6->sin6_len = sizeof(*sin6);
2289				s6->sin6_addr = pfr->router->rtaddr;
2290				if (sa6_recoverscope(s6)) {
2291					log(LOG_ERR,
2292					    "scope error in "
2293					    "prefix list (%s)\n",
2294					    ip6_sprintf(ip6buf,
2295						    &pfr->router->rtaddr));
2296				}
2297				advrtrs++;
2298			}
2299			p->advrtrs = advrtrs;
2300		} else
2301			panic("buffer too short");
2302
2303		advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
2304		error = SYSCTL_OUT(req, buf, advance);
2305		if (error)
2306			break;
2307	}
2308
2309	return (error);
2310}
2311