nd6_rtr.c revision 317067
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/netinet6/nd6_rtr.c 317067 2017-04-17 20:13:20Z asomers $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/refcount.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/time.h>
46#include <sys/kernel.h>
47#include <sys/lock.h>
48#include <sys/errno.h>
49#include <sys/rmlock.h>
50#include <sys/rwlock.h>
51#include <sys/syslog.h>
52#include <sys/queue.h>
53
54#include <net/if.h>
55#include <net/if_var.h>
56#include <net/if_types.h>
57#include <net/if_dl.h>
58#include <net/route.h>
59#include <net/route_var.h>
60#include <net/radix.h>
61#include <net/vnet.h>
62
63#include <netinet/in.h>
64#include <net/if_llatbl.h>
65#include <netinet6/in6_var.h>
66#include <netinet6/in6_ifattach.h>
67#include <netinet/ip6.h>
68#include <netinet6/ip6_var.h>
69#include <netinet6/nd6.h>
70#include <netinet/icmp6.h>
71#include <netinet6/scope6_var.h>
72
73static int rtpref(struct nd_defrouter *);
74static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
75static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
76    struct mbuf *, int);
77static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
78static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
79    struct nd_defrouter *);
80static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
81static void pfxrtr_del(struct nd_pfxrouter *);
82static struct nd_pfxrouter *find_pfxlist_reachable_router(struct nd_prefix *);
83static void defrouter_delreq(struct nd_defrouter *);
84static void nd6_rtmsg(int, struct rtentry *);
85
86static int in6_init_prefix_ltimes(struct nd_prefix *);
87static void in6_init_address_ltimes(struct nd_prefix *,
88    struct in6_addrlifetime *);
89
90static int rt6_deleteroute(const struct rtentry *, void *);
91
92VNET_DECLARE(int, nd6_recalc_reachtm_interval);
93#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
94
95static VNET_DEFINE(struct ifnet *, nd6_defifp);
96VNET_DEFINE(int, nd6_defifindex);
97#define	V_nd6_defifp			VNET(nd6_defifp)
98
99VNET_DEFINE(int, ip6_use_tempaddr) = 0;
100
101VNET_DEFINE(int, ip6_desync_factor);
102VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
103VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
104
105VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
106
107/* RTPREF_MEDIUM has to be 0! */
108#define RTPREF_HIGH	1
109#define RTPREF_MEDIUM	0
110#define RTPREF_LOW	(-1)
111#define RTPREF_RESERVED	(-2)
112#define RTPREF_INVALID	(-3)	/* internal */
113
114/*
115 * Receive Router Solicitation Message - just for routers.
116 * Router solicitation/advertisement is mostly managed by userland program
117 * (rtadvd) so here we have no function like nd6_ra_output().
118 *
119 * Based on RFC 2461
120 */
121void
122nd6_rs_input(struct mbuf *m, int off, int icmp6len)
123{
124	struct ifnet *ifp = m->m_pkthdr.rcvif;
125	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
126	struct nd_router_solicit *nd_rs;
127	struct in6_addr saddr6 = ip6->ip6_src;
128	char *lladdr = NULL;
129	int lladdrlen = 0;
130	union nd_opts ndopts;
131	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
132
133	/*
134	 * Accept RS only when V_ip6_forwarding=1 and the interface has
135	 * no ND6_IFF_ACCEPT_RTADV.
136	 */
137	if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
138		goto freeit;
139
140	/* Sanity checks */
141	if (ip6->ip6_hlim != 255) {
142		nd6log((LOG_ERR,
143		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
144		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
145		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
146		goto bad;
147	}
148
149	/*
150	 * Don't update the neighbor cache, if src = ::.
151	 * This indicates that the src has no IP address assigned yet.
152	 */
153	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
154		goto freeit;
155
156#ifndef PULLDOWN_TEST
157	IP6_EXTHDR_CHECK(m, off, icmp6len,);
158	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
159#else
160	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
161	if (nd_rs == NULL) {
162		ICMP6STAT_INC(icp6s_tooshort);
163		return;
164	}
165#endif
166
167	icmp6len -= sizeof(*nd_rs);
168	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
169	if (nd6_options(&ndopts) < 0) {
170		nd6log((LOG_INFO,
171		    "nd6_rs_input: invalid ND option, ignored\n"));
172		/* nd6_options have incremented stats */
173		goto freeit;
174	}
175
176	if (ndopts.nd_opts_src_lladdr) {
177		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
178		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
179	}
180
181	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
182		nd6log((LOG_INFO,
183		    "nd6_rs_input: lladdrlen mismatch for %s "
184		    "(if %d, RS packet %d)\n",
185		    ip6_sprintf(ip6bufs, &saddr6),
186		    ifp->if_addrlen, lladdrlen - 2));
187		goto bad;
188	}
189
190	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
191
192 freeit:
193	m_freem(m);
194	return;
195
196 bad:
197	ICMP6STAT_INC(icp6s_badrs);
198	m_freem(m);
199}
200
201/*
202 * Receive Router Advertisement Message.
203 *
204 * Based on RFC 2461
205 * TODO: on-link bit on prefix information
206 * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
207 */
208void
209nd6_ra_input(struct mbuf *m, int off, int icmp6len)
210{
211	struct ifnet *ifp = m->m_pkthdr.rcvif;
212	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
213	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
214	struct nd_router_advert *nd_ra;
215	struct in6_addr saddr6 = ip6->ip6_src;
216	int mcast = 0;
217	union nd_opts ndopts;
218	struct nd_defrouter *dr;
219	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
220
221	dr = NULL;
222
223	/*
224	 * We only accept RAs only when the per-interface flag
225	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
226	 */
227	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
228		goto freeit;
229
230	if (ip6->ip6_hlim != 255) {
231		nd6log((LOG_ERR,
232		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
233		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
234		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
235		goto bad;
236	}
237
238	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
239		nd6log((LOG_ERR,
240		    "nd6_ra_input: src %s is not link-local\n",
241		    ip6_sprintf(ip6bufs, &saddr6)));
242		goto bad;
243	}
244
245#ifndef PULLDOWN_TEST
246	IP6_EXTHDR_CHECK(m, off, icmp6len,);
247	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
248#else
249	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
250	if (nd_ra == NULL) {
251		ICMP6STAT_INC(icp6s_tooshort);
252		return;
253	}
254#endif
255
256	icmp6len -= sizeof(*nd_ra);
257	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
258	if (nd6_options(&ndopts) < 0) {
259		nd6log((LOG_INFO,
260		    "nd6_ra_input: invalid ND option, ignored\n"));
261		/* nd6_options have incremented stats */
262		goto freeit;
263	}
264
265    {
266	struct nd_defrouter dr0;
267	u_int32_t advreachable = nd_ra->nd_ra_reachable;
268
269	/* remember if this is a multicasted advertisement */
270	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
271		mcast = 1;
272
273	bzero(&dr0, sizeof(dr0));
274	dr0.rtaddr = saddr6;
275	dr0.raflags = nd_ra->nd_ra_flags_reserved;
276	/*
277	 * Effectively-disable routes from RA messages when
278	 * ND6_IFF_NO_RADR enabled on the receiving interface or
279	 * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
280	 */
281	if (ndi->flags & ND6_IFF_NO_RADR)
282		dr0.rtlifetime = 0;
283	else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
284		dr0.rtlifetime = 0;
285	else
286		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
287	dr0.expire = time_uptime + dr0.rtlifetime;
288	dr0.ifp = ifp;
289	/* unspecified or not? (RFC 2461 6.3.4) */
290	if (advreachable) {
291		advreachable = ntohl(advreachable);
292		if (advreachable <= MAX_REACHABLE_TIME &&
293		    ndi->basereachable != advreachable) {
294			ndi->basereachable = advreachable;
295			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
296			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
297		}
298	}
299	if (nd_ra->nd_ra_retransmit)
300		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
301	if (nd_ra->nd_ra_curhoplimit) {
302		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
303			ndi->chlim = nd_ra->nd_ra_curhoplimit;
304		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
305			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
306			    "%s on %s (current = %d, received = %d). "
307			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
308			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
309		}
310	}
311	dr = defrtrlist_update(&dr0);
312    }
313
314	/*
315	 * prefix
316	 */
317	if (ndopts.nd_opts_pi) {
318		struct nd_opt_hdr *pt;
319		struct nd_opt_prefix_info *pi = NULL;
320		struct nd_prefixctl pr;
321
322		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
323		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
324		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
325						(pt->nd_opt_len << 3))) {
326			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
327				continue;
328			pi = (struct nd_opt_prefix_info *)pt;
329
330			if (pi->nd_opt_pi_len != 4) {
331				nd6log((LOG_INFO,
332				    "nd6_ra_input: invalid option "
333				    "len %d for prefix information option, "
334				    "ignored\n", pi->nd_opt_pi_len));
335				continue;
336			}
337
338			if (128 < pi->nd_opt_pi_prefix_len) {
339				nd6log((LOG_INFO,
340				    "nd6_ra_input: invalid prefix "
341				    "len %d for prefix information option, "
342				    "ignored\n", pi->nd_opt_pi_prefix_len));
343				continue;
344			}
345
346			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
347			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
348				nd6log((LOG_INFO,
349				    "nd6_ra_input: invalid prefix "
350				    "%s, ignored\n",
351				    ip6_sprintf(ip6bufs,
352					&pi->nd_opt_pi_prefix)));
353				continue;
354			}
355
356			bzero(&pr, sizeof(pr));
357			pr.ndpr_prefix.sin6_family = AF_INET6;
358			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
359			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
360			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
361
362			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
363			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
364			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
365			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
366			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
367			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
368			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
369			(void)prelist_update(&pr, dr, m, mcast);
370		}
371	}
372	if (dr != NULL) {
373		defrouter_rele(dr);
374		dr = NULL;
375	}
376
377	/*
378	 * MTU
379	 */
380	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
381		u_long mtu;
382		u_long maxmtu;
383
384		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
385
386		/* lower bound */
387		if (mtu < IPV6_MMTU) {
388			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
389			    "mtu=%lu sent from %s, ignoring\n",
390			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
391			goto skip;
392		}
393
394		/* upper bound */
395		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
396		    ? ndi->maxmtu : ifp->if_mtu;
397		if (mtu <= maxmtu) {
398			int change = (ndi->linkmtu != mtu);
399
400			ndi->linkmtu = mtu;
401			if (change) /* in6_maxmtu may change */
402				in6_setmaxmtu();
403		} else {
404			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
405			    "mtu=%lu sent from %s; "
406			    "exceeds maxmtu %lu, ignoring\n",
407			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
408		}
409	}
410
411 skip:
412
413	/*
414	 * Source link layer address
415	 */
416    {
417	char *lladdr = NULL;
418	int lladdrlen = 0;
419
420	if (ndopts.nd_opts_src_lladdr) {
421		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
422		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
423	}
424
425	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
426		nd6log((LOG_INFO,
427		    "nd6_ra_input: lladdrlen mismatch for %s "
428		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
429		    ifp->if_addrlen, lladdrlen - 2));
430		goto bad;
431	}
432
433	nd6_cache_lladdr(ifp, &saddr6, lladdr,
434	    lladdrlen, ND_ROUTER_ADVERT, 0);
435
436	/*
437	 * Installing a link-layer address might change the state of the
438	 * router's neighbor cache, which might also affect our on-link
439	 * detection of adveritsed prefixes.
440	 */
441	pfxlist_onlink_check();
442    }
443
444 freeit:
445	m_freem(m);
446	return;
447
448 bad:
449	ICMP6STAT_INC(icp6s_badra);
450	m_freem(m);
451}
452
453/* tell the change to user processes watching the routing socket. */
454static void
455nd6_rtmsg(int cmd, struct rtentry *rt)
456{
457	struct rt_addrinfo info;
458	struct ifnet *ifp;
459	struct ifaddr *ifa;
460
461	bzero((caddr_t)&info, sizeof(info));
462	info.rti_info[RTAX_DST] = rt_key(rt);
463	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
464	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
465	ifp = rt->rt_ifp;
466	if (ifp != NULL) {
467		IF_ADDR_RLOCK(ifp);
468		ifa = TAILQ_FIRST(&ifp->if_addrhead);
469		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
470		ifa_ref(ifa);
471		IF_ADDR_RUNLOCK(ifp);
472		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
473	} else
474		ifa = NULL;
475
476	rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum);
477	if (ifa != NULL)
478		ifa_free(ifa);
479}
480
481/*
482 * default router list processing sub routines
483 */
484
485static void
486defrouter_addreq(struct nd_defrouter *new)
487{
488	struct sockaddr_in6 def, mask, gate;
489	struct rtentry *newrt = NULL;
490	int error;
491
492	bzero(&def, sizeof(def));
493	bzero(&mask, sizeof(mask));
494	bzero(&gate, sizeof(gate));
495
496	def.sin6_len = mask.sin6_len = gate.sin6_len =
497	    sizeof(struct sockaddr_in6);
498	def.sin6_family = gate.sin6_family = AF_INET6;
499	gate.sin6_addr = new->rtaddr;
500
501	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
502	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
503	    RTF_GATEWAY, &newrt, new->ifp->if_fib);
504	if (newrt) {
505		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
506		RTFREE(newrt);
507	}
508	if (error == 0)
509		new->installed = 1;
510}
511
512struct nd_defrouter *
513defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
514{
515	struct nd_defrouter *dr;
516
517	ND6_LOCK_ASSERT();
518	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
519		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
520			defrouter_ref(dr);
521			return (dr);
522		}
523	return (NULL);
524}
525
526struct nd_defrouter *
527defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
528{
529	struct nd_defrouter *dr;
530
531	ND6_RLOCK();
532	dr = defrouter_lookup_locked(addr, ifp);
533	ND6_RUNLOCK();
534	return (dr);
535}
536
537void
538defrouter_ref(struct nd_defrouter *dr)
539{
540
541	refcount_acquire(&dr->refcnt);
542}
543
544void
545defrouter_rele(struct nd_defrouter *dr)
546{
547
548	if (refcount_release(&dr->refcnt))
549		free(dr, M_IP6NDP);
550}
551
552/*
553 * Remove the default route for a given router.
554 * This is just a subroutine function for defrouter_select_fib(), and
555 * should not be called from anywhere else.
556 */
557static void
558defrouter_delreq(struct nd_defrouter *dr)
559{
560	struct sockaddr_in6 def, mask, gate;
561	struct rtentry *oldrt = NULL;
562
563	bzero(&def, sizeof(def));
564	bzero(&mask, sizeof(mask));
565	bzero(&gate, sizeof(gate));
566
567	def.sin6_len = mask.sin6_len = gate.sin6_len =
568	    sizeof(struct sockaddr_in6);
569	def.sin6_family = gate.sin6_family = AF_INET6;
570	gate.sin6_addr = dr->rtaddr;
571
572	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
573	    (struct sockaddr *)&gate,
574	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, dr->ifp->if_fib);
575	if (oldrt) {
576		nd6_rtmsg(RTM_DELETE, oldrt);
577		RTFREE(oldrt);
578	}
579
580	dr->installed = 0;
581}
582
583/*
584 * Remove all default routes from default router list.
585 */
586void
587defrouter_reset(void)
588{
589	struct nd_defrouter *dr, **dra;
590	int count, i;
591
592	count = i = 0;
593
594	/*
595	 * We can't delete routes with the ND lock held, so make a copy of the
596	 * current default router list and use that when deleting routes.
597	 */
598	ND6_RLOCK();
599	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
600		count++;
601	ND6_RUNLOCK();
602
603	dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
604
605	ND6_RLOCK();
606	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
607		if (i == count)
608			break;
609		defrouter_ref(dr);
610		dra[i++] = dr;
611	}
612	ND6_RUNLOCK();
613
614	for (i = 0; i < count && dra[i] != NULL; i++) {
615		defrouter_delreq(dra[i]);
616		defrouter_rele(dra[i]);
617	}
618	free(dra, M_TEMP);
619
620	/*
621	 * XXX should we also nuke any default routers in the kernel, by
622	 * going through them by rtalloc1()?
623	 */
624}
625
626/*
627 * Look up a matching default router list entry and remove it. Returns true if a
628 * matching entry was found, false otherwise.
629 */
630bool
631defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
632{
633	struct nd_defrouter *dr;
634
635	ND6_WLOCK();
636	dr = defrouter_lookup_locked(addr, ifp);
637	if (dr == NULL) {
638		ND6_WUNLOCK();
639		return (false);
640	}
641
642	defrouter_unlink(dr, NULL);
643	ND6_WUNLOCK();
644	defrouter_del(dr);
645	defrouter_rele(dr);
646	return (true);
647}
648
649/*
650 * Remove a router from the global list and optionally stash it in a
651 * caller-supplied queue.
652 *
653 * The ND lock must be held.
654 */
655void
656defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
657{
658
659	ND6_WLOCK_ASSERT();
660	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
661	V_nd6_list_genid++;
662	if (drq != NULL)
663		TAILQ_INSERT_TAIL(drq, dr, dr_entry);
664}
665
666void
667defrouter_del(struct nd_defrouter *dr)
668{
669	struct nd_defrouter *deldr = NULL;
670	struct nd_prefix *pr;
671	struct nd_pfxrouter *pfxrtr;
672
673	ND6_UNLOCK_ASSERT();
674
675	/*
676	 * Flush all the routing table entries that use the router
677	 * as a next hop.
678	 */
679	if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
680		rt6_flush(&dr->rtaddr, dr->ifp);
681
682	if (dr->installed) {
683		deldr = dr;
684		defrouter_delreq(dr);
685	}
686
687	/*
688	 * Also delete all the pointers to the router in each prefix lists.
689	 */
690	ND6_WLOCK();
691	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
692		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
693			pfxrtr_del(pfxrtr);
694	}
695	ND6_WUNLOCK();
696
697	pfxlist_onlink_check();
698
699	/*
700	 * If the router is the primary one, choose a new one.
701	 * Note that defrouter_select_fib() will remove the current
702         * gateway from the routing table.
703	 */
704	if (deldr)
705		defrouter_select_fib(deldr->ifp->if_fib);
706
707	/*
708	 * Release the list reference.
709	 */
710	defrouter_rele(dr);
711}
712
713/*
714 * Default Router Selection according to Section 6.3.6 of RFC 2461 and
715 * draft-ietf-ipngwg-router-selection:
716 * 1) Routers that are reachable or probably reachable should be preferred.
717 *    If we have more than one (probably) reachable router, prefer ones
718 *    with the highest router preference.
719 * 2) When no routers on the list are known to be reachable or
720 *    probably reachable, routers SHOULD be selected in a round-robin
721 *    fashion, regardless of router preference values.
722 * 3) If the Default Router List is empty, assume that all
723 *    destinations are on-link.
724 *
725 * We assume nd_defrouter is sorted by router preference value.
726 * Since the code below covers both with and without router preference cases,
727 * we do not need to classify the cases by ifdef.
728 *
729 * At this moment, we do not try to install more than one default router,
730 * even when the multipath routing is available, because we're not sure about
731 * the benefits for stub hosts comparing to the risk of making the code
732 * complicated and the possibility of introducing bugs.
733 *
734 * We maintain a single list of routers for multiple FIBs, only considering one
735 * at a time based on the receiving interface's FIB. If @fibnum is RT_ALL_FIBS,
736 * we do the whole thing multiple times.
737 */
738void
739defrouter_select_fib(int fibnum)
740{
741	struct nd_defrouter *dr, *selected_dr, *installed_dr;
742	struct llentry *ln = NULL;
743
744	if (fibnum == RT_ALL_FIBS) {
745		for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
746			defrouter_select_fib(fibnum);
747		}
748	}
749
750	ND6_RLOCK();
751	/*
752	 * Let's handle easy case (3) first:
753	 * If default router list is empty, there's nothing to be done.
754	 */
755	if (TAILQ_EMPTY(&V_nd_defrouter)) {
756		ND6_RUNLOCK();
757		return;
758	}
759
760	/*
761	 * Search for a (probably) reachable router from the list.
762	 * We just pick up the first reachable one (if any), assuming that
763	 * the ordering rule of the list described in defrtrlist_update().
764	 */
765	selected_dr = installed_dr = NULL;
766	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
767		IF_AFDATA_RLOCK(dr->ifp);
768		if (selected_dr == NULL && dr->ifp->if_fib == fibnum &&
769		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
770		    ND6_IS_LLINFO_PROBREACH(ln)) {
771			selected_dr = dr;
772			defrouter_ref(selected_dr);
773		}
774		IF_AFDATA_RUNLOCK(dr->ifp);
775		if (ln != NULL) {
776			LLE_RUNLOCK(ln);
777			ln = NULL;
778		}
779
780		if (dr->installed && dr->ifp->if_fib == fibnum) {
781			if (installed_dr == NULL) {
782				installed_dr = dr;
783				defrouter_ref(installed_dr);
784			} else {
785				/*
786				 * this should not happen.
787				 * warn for diagnosis.
788				 */
789				log(LOG_ERR, "defrouter_select_fib: more than "
790				             "one router is installed\n");
791			}
792		}
793	}
794	/*
795	 * If none of the default routers was found to be reachable,
796	 * round-robin the list regardless of preference.
797	 * Otherwise, if we have an installed router, check if the selected
798	 * (reachable) router should really be preferred to the installed one.
799	 * We only prefer the new router when the old one is not reachable
800	 * or when the new one has a really higher preference value.
801	 */
802	if (selected_dr == NULL) {
803		if (installed_dr == NULL ||
804		    TAILQ_NEXT(installed_dr, dr_entry) == NULL)
805			dr = TAILQ_FIRST(&V_nd_defrouter);
806		else
807			dr = TAILQ_NEXT(installed_dr, dr_entry);
808
809		/* Ensure we select a router for this FIB. */
810		TAILQ_FOREACH_FROM(dr, &V_nd_defrouter, dr_entry) {
811			if (dr->ifp->if_fib == fibnum) {
812				selected_dr = dr;
813				defrouter_ref(selected_dr);
814				break;
815			}
816		}
817	} else if (installed_dr != NULL) {
818		IF_AFDATA_RLOCK(installed_dr->ifp);
819		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0,
820		                     installed_dr->ifp)) &&
821		    ND6_IS_LLINFO_PROBREACH(ln) &&
822		    installed_dr->ifp->if_fib == fibnum &&
823		    rtpref(selected_dr) <= rtpref(installed_dr)) {
824			defrouter_rele(selected_dr);
825			selected_dr = installed_dr;
826		}
827		IF_AFDATA_RUNLOCK(installed_dr->ifp);
828		if (ln != NULL)
829			LLE_RUNLOCK(ln);
830	}
831	ND6_RUNLOCK();
832
833	/*
834	 * If we selected a router for this FIB and it's different
835	 * than the installed one, remove the installed router and
836	 * install the selected one in its place.
837	 */
838	if (installed_dr != selected_dr) {
839		if (installed_dr != NULL) {
840			defrouter_delreq(installed_dr);
841			defrouter_rele(installed_dr);
842		}
843		if (selected_dr != NULL)
844			defrouter_addreq(selected_dr);
845	}
846	if (selected_dr != NULL)
847		defrouter_rele(selected_dr);
848}
849
850/*
851 * Maintain old KPI for default router selection.
852 * If unspecified, we can re-select routers for all FIBs.
853 */
854void
855defrouter_select(void)
856{
857	defrouter_select_fib(RT_ALL_FIBS);
858}
859
860/*
861 * for default router selection
862 * regards router-preference field as a 2-bit signed integer
863 */
864static int
865rtpref(struct nd_defrouter *dr)
866{
867	switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
868	case ND_RA_FLAG_RTPREF_HIGH:
869		return (RTPREF_HIGH);
870	case ND_RA_FLAG_RTPREF_MEDIUM:
871	case ND_RA_FLAG_RTPREF_RSV:
872		return (RTPREF_MEDIUM);
873	case ND_RA_FLAG_RTPREF_LOW:
874		return (RTPREF_LOW);
875	default:
876		/*
877		 * This case should never happen.  If it did, it would mean a
878		 * serious bug of kernel internal.  We thus always bark here.
879		 * Or, can we even panic?
880		 */
881		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
882		return (RTPREF_INVALID);
883	}
884	/* NOTREACHED */
885}
886
887static struct nd_defrouter *
888defrtrlist_update(struct nd_defrouter *new)
889{
890	struct nd_defrouter *dr, *n;
891	uint64_t genid;
892	int oldpref;
893	bool writelocked;
894
895	if (new->rtlifetime == 0) {
896		defrouter_remove(&new->rtaddr, new->ifp);
897		return (NULL);
898	}
899
900	ND6_RLOCK();
901	writelocked = false;
902restart:
903	dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
904	if (dr != NULL) {
905		oldpref = rtpref(dr);
906
907		/* override */
908		dr->raflags = new->raflags; /* XXX flag check */
909		dr->rtlifetime = new->rtlifetime;
910		dr->expire = new->expire;
911
912		/*
913		 * If the preference does not change, there's no need
914		 * to sort the entries. Also make sure the selected
915		 * router is still installed in the kernel.
916		 */
917		if (dr->installed && rtpref(new) == oldpref) {
918			if (writelocked)
919				ND6_WUNLOCK();
920			else
921				ND6_RUNLOCK();
922			return (dr);
923		}
924	}
925
926	/*
927	 * The router needs to be reinserted into the default router
928	 * list, so upgrade to a write lock. If that fails and the list
929	 * has potentially changed while the lock was dropped, we'll
930	 * redo the lookup with the write lock held.
931	 */
932	if (!writelocked) {
933		writelocked = true;
934		if (!ND6_TRY_UPGRADE()) {
935			genid = V_nd6_list_genid;
936			ND6_RUNLOCK();
937			ND6_WLOCK();
938			if (genid != V_nd6_list_genid)
939				goto restart;
940		}
941	}
942
943	if (dr != NULL) {
944		/*
945		 * The preferred router may have changed, so relocate this
946		 * router.
947		 */
948		TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
949		n = dr;
950	} else {
951		n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
952		if (n == NULL) {
953			ND6_WUNLOCK();
954			return (NULL);
955		}
956		memcpy(n, new, sizeof(*n));
957		/* Initialize with an extra reference for the caller. */
958		refcount_init(&n->refcnt, 2);
959	}
960
961	/*
962	 * Insert the new router in the Default Router List;
963	 * The Default Router List should be in the descending order
964	 * of router-preferece.  Routers with the same preference are
965	 * sorted in the arriving time order.
966	 */
967
968	/* insert at the end of the group */
969	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
970		if (rtpref(n) > rtpref(dr))
971			break;
972	}
973	if (dr != NULL)
974		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
975	else
976		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
977	V_nd6_list_genid++;
978	ND6_WUNLOCK();
979
980	defrouter_select_fib(new->ifp->if_fib);
981
982	return (n);
983}
984
985static struct nd_pfxrouter *
986pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
987{
988	struct nd_pfxrouter *search;
989
990	ND6_LOCK_ASSERT();
991
992	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
993		if (search->router == dr)
994			break;
995	}
996	return (search);
997}
998
999static void
1000pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
1001{
1002	struct nd_pfxrouter *new;
1003	bool update;
1004
1005	ND6_UNLOCK_ASSERT();
1006
1007	ND6_RLOCK();
1008	if (pfxrtr_lookup(pr, dr) != NULL) {
1009		ND6_RUNLOCK();
1010		return;
1011	}
1012	ND6_RUNLOCK();
1013
1014	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
1015	if (new == NULL)
1016		return;
1017	defrouter_ref(dr);
1018	new->router = dr;
1019
1020	ND6_WLOCK();
1021	if (pfxrtr_lookup(pr, dr) == NULL) {
1022		LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
1023		update = true;
1024	} else {
1025		/* We lost a race to add the reference. */
1026		defrouter_rele(dr);
1027		free(new, M_IP6NDP);
1028		update = false;
1029	}
1030	ND6_WUNLOCK();
1031
1032	if (update)
1033		pfxlist_onlink_check();
1034}
1035
1036static void
1037pfxrtr_del(struct nd_pfxrouter *pfr)
1038{
1039
1040	ND6_WLOCK_ASSERT();
1041
1042	LIST_REMOVE(pfr, pfr_entry);
1043	defrouter_rele(pfr->router);
1044	free(pfr, M_IP6NDP);
1045}
1046
1047static struct nd_prefix *
1048nd6_prefix_lookup_locked(struct nd_prefixctl *key)
1049{
1050	struct nd_prefix *search;
1051
1052	ND6_LOCK_ASSERT();
1053
1054	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
1055		if (key->ndpr_ifp == search->ndpr_ifp &&
1056		    key->ndpr_plen == search->ndpr_plen &&
1057		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
1058		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
1059			nd6_prefix_ref(search);
1060			break;
1061		}
1062	}
1063	return (search);
1064}
1065
1066struct nd_prefix *
1067nd6_prefix_lookup(struct nd_prefixctl *key)
1068{
1069	struct nd_prefix *search;
1070
1071	ND6_RLOCK();
1072	search = nd6_prefix_lookup_locked(key);
1073	ND6_RUNLOCK();
1074	return (search);
1075}
1076
1077void
1078nd6_prefix_ref(struct nd_prefix *pr)
1079{
1080
1081	refcount_acquire(&pr->ndpr_refcnt);
1082}
1083
1084void
1085nd6_prefix_rele(struct nd_prefix *pr)
1086{
1087
1088	if (refcount_release(&pr->ndpr_refcnt)) {
1089		KASSERT(LIST_EMPTY(&pr->ndpr_advrtrs),
1090		    ("prefix %p has advertising routers", pr));
1091		free(pr, M_IP6NDP);
1092	}
1093}
1094
1095int
1096nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
1097    struct nd_prefix **newp)
1098{
1099	struct nd_prefix *new;
1100	char ip6buf[INET6_ADDRSTRLEN];
1101	int error;
1102
1103	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
1104	if (new == NULL)
1105		return (ENOMEM);
1106	refcount_init(&new->ndpr_refcnt, newp != NULL ? 2 : 1);
1107	new->ndpr_ifp = pr->ndpr_ifp;
1108	new->ndpr_prefix = pr->ndpr_prefix;
1109	new->ndpr_plen = pr->ndpr_plen;
1110	new->ndpr_vltime = pr->ndpr_vltime;
1111	new->ndpr_pltime = pr->ndpr_pltime;
1112	new->ndpr_flags = pr->ndpr_flags;
1113	if ((error = in6_init_prefix_ltimes(new)) != 0) {
1114		free(new, M_IP6NDP);
1115		return (error);
1116	}
1117	new->ndpr_lastupdate = time_uptime;
1118
1119	/* initialization */
1120	LIST_INIT(&new->ndpr_advrtrs);
1121	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
1122	/* make prefix in the canonical form */
1123	IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
1124
1125	ND6_WLOCK();
1126	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
1127	V_nd6_list_genid++;
1128	ND6_WUNLOCK();
1129
1130	/* ND_OPT_PI_FLAG_ONLINK processing */
1131	if (new->ndpr_raf_onlink) {
1132		ND6_ONLINK_LOCK();
1133		if ((error = nd6_prefix_onlink(new)) != 0) {
1134			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
1135			    "the prefix %s/%d on-link on %s (errno=%d)\n",
1136			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1137			    pr->ndpr_plen, if_name(pr->ndpr_ifp), error));
1138			/* proceed anyway. XXX: is it correct? */
1139		}
1140		ND6_ONLINK_UNLOCK();
1141	}
1142
1143	if (dr != NULL)
1144		pfxrtr_add(new, dr);
1145	if (newp != NULL)
1146		*newp = new;
1147	return (0);
1148}
1149
1150/*
1151 * Remove a prefix from the prefix list and optionally stash it in a
1152 * caller-provided list.
1153 *
1154 * The ND6 lock must be held.
1155 */
1156void
1157nd6_prefix_unlink(struct nd_prefix *pr, struct nd_prhead *list)
1158{
1159
1160	ND6_WLOCK_ASSERT();
1161
1162	LIST_REMOVE(pr, ndpr_entry);
1163	V_nd6_list_genid++;
1164	if (list != NULL)
1165		LIST_INSERT_HEAD(list, pr, ndpr_entry);
1166}
1167
1168/*
1169 * Free an unlinked prefix, first marking it off-link if necessary.
1170 */
1171void
1172nd6_prefix_del(struct nd_prefix *pr)
1173{
1174	struct nd_pfxrouter *pfr, *next;
1175	int e;
1176	char ip6buf[INET6_ADDRSTRLEN];
1177
1178	KASSERT(pr->ndpr_addrcnt == 0,
1179	    ("prefix %p has referencing addresses", pr));
1180	ND6_UNLOCK_ASSERT();
1181
1182	/*
1183	 * Though these flags are now meaningless, we'd rather keep the value
1184	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
1185	 * when executing "ndp -p".
1186	 */
1187	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1188		ND6_ONLINK_LOCK();
1189		if ((e = nd6_prefix_offlink(pr)) != 0) {
1190			nd6log((LOG_ERR,
1191			    "nd6_prefix_del: failed to make %s/%d offlink "
1192			    "on %s, errno=%d\n",
1193			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1194			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
1195			/* what should we do? */
1196		}
1197		ND6_ONLINK_UNLOCK();
1198	}
1199
1200	/* Release references to routers that have advertised this prefix. */
1201	ND6_WLOCK();
1202	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next)
1203		pfxrtr_del(pfr);
1204	ND6_WUNLOCK();
1205
1206	nd6_prefix_rele(pr);
1207
1208	pfxlist_onlink_check();
1209}
1210
1211static int
1212prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
1213    struct mbuf *m, int mcast)
1214{
1215	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
1216	struct ifaddr *ifa;
1217	struct ifnet *ifp = new->ndpr_ifp;
1218	struct nd_prefix *pr;
1219	int error = 0;
1220	int auth;
1221	struct in6_addrlifetime lt6_tmp;
1222	char ip6buf[INET6_ADDRSTRLEN];
1223
1224	auth = 0;
1225	if (m) {
1226		/*
1227		 * Authenticity for NA consists authentication for
1228		 * both IP header and IP datagrams, doesn't it ?
1229		 */
1230#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
1231		auth = ((m->m_flags & M_AUTHIPHDR) &&
1232		    (m->m_flags & M_AUTHIPDGM));
1233#endif
1234	}
1235
1236	if ((pr = nd6_prefix_lookup(new)) != NULL) {
1237		/*
1238		 * nd6_prefix_lookup() ensures that pr and new have the same
1239		 * prefix on a same interface.
1240		 */
1241
1242		/*
1243		 * Update prefix information.  Note that the on-link (L) bit
1244		 * and the autonomous (A) bit should NOT be changed from 1
1245		 * to 0.
1246		 */
1247		if (new->ndpr_raf_onlink == 1)
1248			pr->ndpr_raf_onlink = 1;
1249		if (new->ndpr_raf_auto == 1)
1250			pr->ndpr_raf_auto = 1;
1251		if (new->ndpr_raf_onlink) {
1252			pr->ndpr_vltime = new->ndpr_vltime;
1253			pr->ndpr_pltime = new->ndpr_pltime;
1254			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
1255			pr->ndpr_lastupdate = time_uptime;
1256		}
1257
1258		if (new->ndpr_raf_onlink &&
1259		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1260			ND6_ONLINK_LOCK();
1261			if ((error = nd6_prefix_onlink(pr)) != 0) {
1262				nd6log((LOG_ERR,
1263				    "prelist_update: failed to make "
1264				    "the prefix %s/%d on-link on %s "
1265				    "(errno=%d)\n",
1266				    ip6_sprintf(ip6buf,
1267				        &pr->ndpr_prefix.sin6_addr),
1268				    pr->ndpr_plen, if_name(pr->ndpr_ifp),
1269				    error));
1270				/* proceed anyway. XXX: is it correct? */
1271			}
1272			ND6_ONLINK_UNLOCK();
1273		}
1274
1275		if (dr != NULL)
1276			pfxrtr_add(pr, dr);
1277	} else {
1278		if (new->ndpr_vltime == 0)
1279			goto end;
1280		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
1281			goto end;
1282
1283		error = nd6_prelist_add(new, dr, &pr);
1284		if (error != 0) {
1285			nd6log((LOG_NOTICE, "prelist_update: "
1286			    "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
1287			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
1288			    new->ndpr_plen, if_name(new->ndpr_ifp), error));
1289			goto end; /* we should just give up in this case. */
1290		}
1291
1292		/*
1293		 * XXX: from the ND point of view, we can ignore a prefix
1294		 * with the on-link bit being zero.  However, we need a
1295		 * prefix structure for references from autoconfigured
1296		 * addresses.  Thus, we explicitly make sure that the prefix
1297		 * itself expires now.
1298		 */
1299		if (pr->ndpr_raf_onlink == 0) {
1300			pr->ndpr_vltime = 0;
1301			pr->ndpr_pltime = 0;
1302			in6_init_prefix_ltimes(pr);
1303		}
1304	}
1305
1306	/*
1307	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
1308	 * Note that pr must be non NULL at this point.
1309	 */
1310
1311	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
1312	if (!new->ndpr_raf_auto)
1313		goto end;
1314
1315	/*
1316	 * 5.5.3 (b). the link-local prefix should have been ignored in
1317	 * nd6_ra_input.
1318	 */
1319
1320	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
1321	if (new->ndpr_pltime > new->ndpr_vltime) {
1322		error = EINVAL;	/* XXX: won't be used */
1323		goto end;
1324	}
1325
1326	/*
1327	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
1328	 * an address configured by stateless autoconfiguration already in the
1329	 * list of addresses associated with the interface, and the Valid
1330	 * Lifetime is not 0, form an address.  We first check if we have
1331	 * a matching prefix.
1332	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
1333	 * consider autoconfigured addresses while RFC2462 simply said
1334	 * "address".
1335	 */
1336	IF_ADDR_RLOCK(ifp);
1337	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1338		struct in6_ifaddr *ifa6;
1339		u_int32_t remaininglifetime;
1340
1341		if (ifa->ifa_addr->sa_family != AF_INET6)
1342			continue;
1343
1344		ifa6 = (struct in6_ifaddr *)ifa;
1345
1346		/*
1347		 * We only consider autoconfigured addresses as per rfc2462bis.
1348		 */
1349		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
1350			continue;
1351
1352		/*
1353		 * Spec is not clear here, but I believe we should concentrate
1354		 * on unicast (i.e. not anycast) addresses.
1355		 * XXX: other ia6_flags? detached or duplicated?
1356		 */
1357		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
1358			continue;
1359
1360		/*
1361		 * Ignore the address if it is not associated with a prefix
1362		 * or is associated with a prefix that is different from this
1363		 * one.  (pr is never NULL here)
1364		 */
1365		if (ifa6->ia6_ndpr != pr)
1366			continue;
1367
1368		if (ia6_match == NULL) /* remember the first one */
1369			ia6_match = ifa6;
1370
1371		/*
1372		 * An already autoconfigured address matched.  Now that we
1373		 * are sure there is at least one matched address, we can
1374		 * proceed to 5.5.3. (e): update the lifetimes according to the
1375		 * "two hours" rule and the privacy extension.
1376		 * We apply some clarifications in rfc2462bis:
1377		 * - use remaininglifetime instead of storedlifetime as a
1378		 *   variable name
1379		 * - remove the dead code in the "two-hour" rule
1380		 */
1381#define TWOHOUR		(120*60)
1382		lt6_tmp = ifa6->ia6_lifetime;
1383
1384		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
1385			remaininglifetime = ND6_INFINITE_LIFETIME;
1386		else if (time_uptime - ifa6->ia6_updatetime >
1387			 lt6_tmp.ia6t_vltime) {
1388			/*
1389			 * The case of "invalid" address.  We should usually
1390			 * not see this case.
1391			 */
1392			remaininglifetime = 0;
1393		} else
1394			remaininglifetime = lt6_tmp.ia6t_vltime -
1395			    (time_uptime - ifa6->ia6_updatetime);
1396
1397		/* when not updating, keep the current stored lifetime. */
1398		lt6_tmp.ia6t_vltime = remaininglifetime;
1399
1400		if (TWOHOUR < new->ndpr_vltime ||
1401		    remaininglifetime < new->ndpr_vltime) {
1402			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1403		} else if (remaininglifetime <= TWOHOUR) {
1404			if (auth) {
1405				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1406			}
1407		} else {
1408			/*
1409			 * new->ndpr_vltime <= TWOHOUR &&
1410			 * TWOHOUR < remaininglifetime
1411			 */
1412			lt6_tmp.ia6t_vltime = TWOHOUR;
1413		}
1414
1415		/* The 2 hour rule is not imposed for preferred lifetime. */
1416		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
1417
1418		in6_init_address_ltimes(pr, &lt6_tmp);
1419
1420		/*
1421		 * We need to treat lifetimes for temporary addresses
1422		 * differently, according to
1423		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
1424		 * we only update the lifetimes when they are in the maximum
1425		 * intervals.
1426		 */
1427		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1428			u_int32_t maxvltime, maxpltime;
1429
1430			if (V_ip6_temp_valid_lifetime >
1431			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1432			    V_ip6_desync_factor)) {
1433				maxvltime = V_ip6_temp_valid_lifetime -
1434				    (time_uptime - ifa6->ia6_createtime) -
1435				    V_ip6_desync_factor;
1436			} else
1437				maxvltime = 0;
1438			if (V_ip6_temp_preferred_lifetime >
1439			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1440			    V_ip6_desync_factor)) {
1441				maxpltime = V_ip6_temp_preferred_lifetime -
1442				    (time_uptime - ifa6->ia6_createtime) -
1443				    V_ip6_desync_factor;
1444			} else
1445				maxpltime = 0;
1446
1447			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
1448			    lt6_tmp.ia6t_vltime > maxvltime) {
1449				lt6_tmp.ia6t_vltime = maxvltime;
1450			}
1451			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
1452			    lt6_tmp.ia6t_pltime > maxpltime) {
1453				lt6_tmp.ia6t_pltime = maxpltime;
1454			}
1455		}
1456		ifa6->ia6_lifetime = lt6_tmp;
1457		ifa6->ia6_updatetime = time_uptime;
1458	}
1459	IF_ADDR_RUNLOCK(ifp);
1460	if (ia6_match == NULL && new->ndpr_vltime) {
1461		int ifidlen;
1462
1463		/*
1464		 * 5.5.3 (d) (continued)
1465		 * No address matched and the valid lifetime is non-zero.
1466		 * Create a new address.
1467		 */
1468
1469		/*
1470		 * Prefix Length check:
1471		 * If the sum of the prefix length and interface identifier
1472		 * length does not equal 128 bits, the Prefix Information
1473		 * option MUST be ignored.  The length of the interface
1474		 * identifier is defined in a separate link-type specific
1475		 * document.
1476		 */
1477		ifidlen = in6_if2idlen(ifp);
1478		if (ifidlen < 0) {
1479			/* this should not happen, so we always log it. */
1480			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
1481			    if_name(ifp));
1482			goto end;
1483		}
1484		if (ifidlen + pr->ndpr_plen != 128) {
1485			nd6log((LOG_INFO,
1486			    "prelist_update: invalid prefixlen "
1487			    "%d for %s, ignored\n",
1488			    pr->ndpr_plen, if_name(ifp)));
1489			goto end;
1490		}
1491
1492		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
1493			/*
1494			 * note that we should use pr (not new) for reference.
1495			 */
1496			pr->ndpr_addrcnt++;
1497			ia6->ia6_ndpr = pr;
1498
1499			/*
1500			 * RFC 3041 3.3 (2).
1501			 * When a new public address is created as described
1502			 * in RFC2462, also create a new temporary address.
1503			 *
1504			 * RFC 3041 3.5.
1505			 * When an interface connects to a new link, a new
1506			 * randomized interface identifier should be generated
1507			 * immediately together with a new set of temporary
1508			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
1509			 * in6_tmpifadd().
1510			 */
1511			if (V_ip6_use_tempaddr) {
1512				int e;
1513				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
1514					nd6log((LOG_NOTICE, "prelist_update: "
1515					    "failed to create a temporary "
1516					    "address, errno=%d\n",
1517					    e));
1518				}
1519			}
1520			ifa_free(&ia6->ia_ifa);
1521
1522			/*
1523			 * A newly added address might affect the status
1524			 * of other addresses, so we check and update it.
1525			 * XXX: what if address duplication happens?
1526			 */
1527			pfxlist_onlink_check();
1528		} else {
1529			/* just set an error. do not bark here. */
1530			error = EADDRNOTAVAIL; /* XXX: might be unused. */
1531		}
1532	}
1533
1534end:
1535	if (pr != NULL)
1536		nd6_prefix_rele(pr);
1537	return (error);
1538}
1539
1540/*
1541 * A supplement function used in the on-link detection below;
1542 * detect if a given prefix has a (probably) reachable advertising router.
1543 * XXX: lengthy function name...
1544 */
1545static struct nd_pfxrouter *
1546find_pfxlist_reachable_router(struct nd_prefix *pr)
1547{
1548	struct nd_pfxrouter *pfxrtr;
1549	struct llentry *ln;
1550	int canreach;
1551
1552	ND6_LOCK_ASSERT();
1553
1554	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
1555		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
1556		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
1557		IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
1558		if (ln == NULL)
1559			continue;
1560		canreach = ND6_IS_LLINFO_PROBREACH(ln);
1561		LLE_RUNLOCK(ln);
1562		if (canreach)
1563			break;
1564	}
1565	return (pfxrtr);
1566}
1567
1568/*
1569 * Check if each prefix in the prefix list has at least one available router
1570 * that advertised the prefix (a router is "available" if its neighbor cache
1571 * entry is reachable or probably reachable).
1572 * If the check fails, the prefix may be off-link, because, for example,
1573 * we have moved from the network but the lifetime of the prefix has not
1574 * expired yet.  So we should not use the prefix if there is another prefix
1575 * that has an available router.
1576 * But, if there is no prefix that has an available router, we still regard
1577 * all the prefixes as on-link.  This is because we can't tell if all the
1578 * routers are simply dead or if we really moved from the network and there
1579 * is no router around us.
1580 */
1581void
1582pfxlist_onlink_check(void)
1583{
1584	struct nd_prefix *pr;
1585	struct in6_ifaddr *ifa;
1586	struct nd_defrouter *dr;
1587	struct nd_pfxrouter *pfxrtr = NULL;
1588	struct rm_priotracker in6_ifa_tracker;
1589	uint64_t genid;
1590	uint32_t flags;
1591
1592	ND6_ONLINK_LOCK();
1593	ND6_RLOCK();
1594
1595	/*
1596	 * Check if there is a prefix that has a reachable advertising
1597	 * router.
1598	 */
1599	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1600		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
1601			break;
1602	}
1603
1604	/*
1605	 * If we have no such prefix, check whether we still have a router
1606	 * that does not advertise any prefixes.
1607	 */
1608	if (pr == NULL) {
1609		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
1610			struct nd_prefix *pr0;
1611
1612			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
1613				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
1614					break;
1615			}
1616			if (pfxrtr != NULL)
1617				break;
1618		}
1619	}
1620	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
1621		/*
1622		 * There is at least one prefix that has a reachable router,
1623		 * or at least a router which probably does not advertise
1624		 * any prefixes.  The latter would be the case when we move
1625		 * to a new link where we have a router that does not provide
1626		 * prefixes and we configure an address by hand.
1627		 * Detach prefixes which have no reachable advertising
1628		 * router, and attach other prefixes.
1629		 */
1630		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1631			/* XXX: a link-local prefix should never be detached */
1632			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1633			    pr->ndpr_raf_onlink == 0 ||
1634			    pr->ndpr_raf_auto == 0)
1635				continue;
1636
1637			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1638			    find_pfxlist_reachable_router(pr) == NULL)
1639				pr->ndpr_stateflags |= NDPRF_DETACHED;
1640			else if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1641			    find_pfxlist_reachable_router(pr) != NULL)
1642				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1643		}
1644	} else {
1645		/* there is no prefix that has a reachable router */
1646		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1647			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1648			    pr->ndpr_raf_onlink == 0 ||
1649			    pr->ndpr_raf_auto == 0)
1650				continue;
1651			pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1652		}
1653	}
1654
1655	/*
1656	 * Remove each interface route associated with a (just) detached
1657	 * prefix, and reinstall the interface route for a (just) attached
1658	 * prefix.  Note that all attempt of reinstallation does not
1659	 * necessarily success, when a same prefix is shared among multiple
1660	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
1661	 * so we don't have to care about them.
1662	 */
1663restart:
1664	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1665		char ip6buf[INET6_ADDRSTRLEN];
1666		int e;
1667
1668		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1669		    pr->ndpr_raf_onlink == 0 ||
1670		    pr->ndpr_raf_auto == 0)
1671			continue;
1672
1673		flags = pr->ndpr_stateflags & (NDPRF_DETACHED | NDPRF_ONLINK);
1674		if (flags == 0 || flags == (NDPRF_DETACHED | NDPRF_ONLINK)) {
1675			genid = V_nd6_list_genid;
1676			ND6_RUNLOCK();
1677			if ((flags & NDPRF_ONLINK) != 0 &&
1678			    (e = nd6_prefix_offlink(pr)) != 0) {
1679				nd6log((LOG_ERR,
1680				    "pfxlist_onlink_check: failed to "
1681				    "make %s/%d offlink, errno=%d\n",
1682				    ip6_sprintf(ip6buf,
1683					    &pr->ndpr_prefix.sin6_addr),
1684					    pr->ndpr_plen, e));
1685			} else if ((flags & NDPRF_ONLINK) == 0 &&
1686			    (e = nd6_prefix_onlink(pr)) != 0) {
1687				nd6log((LOG_ERR,
1688				    "pfxlist_onlink_check: failed to "
1689				    "make %s/%d onlink, errno=%d\n",
1690				    ip6_sprintf(ip6buf,
1691					    &pr->ndpr_prefix.sin6_addr),
1692					    pr->ndpr_plen, e));
1693			}
1694			ND6_RLOCK();
1695			if (genid != V_nd6_list_genid)
1696				goto restart;
1697		}
1698	}
1699
1700	/*
1701	 * Changes on the prefix status might affect address status as well.
1702	 * Make sure that all addresses derived from an attached prefix are
1703	 * attached, and that all addresses derived from a detached prefix are
1704	 * detached.  Note, however, that a manually configured address should
1705	 * always be attached.
1706	 * The precise detection logic is same as the one for prefixes.
1707	 */
1708	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
1709	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1710		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
1711			continue;
1712
1713		if (ifa->ia6_ndpr == NULL) {
1714			/*
1715			 * This can happen when we first configure the address
1716			 * (i.e. the address exists, but the prefix does not).
1717			 * XXX: complicated relationships...
1718			 */
1719			continue;
1720		}
1721
1722		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
1723			break;
1724	}
1725	if (ifa) {
1726		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1727			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1728				continue;
1729
1730			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
1731				continue;
1732
1733			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
1734				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1735					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1736					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1737					nd6_dad_start((struct ifaddr *)ifa, 0);
1738				}
1739			} else {
1740				ifa->ia6_flags |= IN6_IFF_DETACHED;
1741			}
1742		}
1743	} else {
1744		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1745			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1746				continue;
1747
1748			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1749				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1750				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1751				/* Do we need a delay in this case? */
1752				nd6_dad_start((struct ifaddr *)ifa, 0);
1753			}
1754		}
1755	}
1756	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
1757	ND6_RUNLOCK();
1758	ND6_ONLINK_UNLOCK();
1759}
1760
1761static int
1762nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
1763{
1764	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1765	struct rib_head *rnh;
1766	struct rtentry *rt;
1767	struct sockaddr_in6 mask6;
1768	u_long rtflags;
1769	int error, a_failure, fibnum, maxfib;
1770
1771	/*
1772	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
1773	 * ifa->ifa_rtrequest = nd6_rtrequest;
1774	 */
1775	bzero(&mask6, sizeof(mask6));
1776	mask6.sin6_len = sizeof(mask6);
1777	mask6.sin6_addr = pr->ndpr_mask;
1778	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
1779
1780	if(V_rt_add_addr_allfibs) {
1781		fibnum = 0;
1782		maxfib = rt_numfibs;
1783	} else {
1784		fibnum = ifa->ifa_ifp->if_fib;
1785		maxfib = fibnum + 1;
1786	}
1787	a_failure = 0;
1788	for (; fibnum < maxfib; fibnum++) {
1789
1790		rt = NULL;
1791		error = in6_rtrequest(RTM_ADD,
1792		    (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
1793		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
1794		if (error == 0) {
1795			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
1796			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
1797			    error, pr, ifa));
1798
1799			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
1800			/* XXX what if rhn == NULL? */
1801			RIB_WLOCK(rnh);
1802			RT_LOCK(rt);
1803			if (rt_setgate(rt, rt_key(rt),
1804			    (struct sockaddr *)&null_sdl) == 0) {
1805				struct sockaddr_dl *dl;
1806
1807				dl = (struct sockaddr_dl *)rt->rt_gateway;
1808				dl->sdl_type = rt->rt_ifp->if_type;
1809				dl->sdl_index = rt->rt_ifp->if_index;
1810			}
1811			RIB_WUNLOCK(rnh);
1812			nd6_rtmsg(RTM_ADD, rt);
1813			RT_UNLOCK(rt);
1814			pr->ndpr_stateflags |= NDPRF_ONLINK;
1815		} else {
1816			char ip6buf[INET6_ADDRSTRLEN];
1817			char ip6bufg[INET6_ADDRSTRLEN];
1818			char ip6bufm[INET6_ADDRSTRLEN];
1819			struct sockaddr_in6 *sin6;
1820
1821			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
1822			nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
1823			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
1824			    "flags=%lx errno = %d\n",
1825			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1826			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
1827			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
1828			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
1829			    rtflags, error));
1830
1831			/* Save last error to return, see rtinit(). */
1832			a_failure = error;
1833		}
1834
1835		if (rt != NULL) {
1836			RT_LOCK(rt);
1837			RT_REMREF(rt);
1838			RT_UNLOCK(rt);
1839		}
1840	}
1841
1842	/* Return the last error we got. */
1843	return (a_failure);
1844}
1845
1846int
1847nd6_prefix_onlink(struct nd_prefix *pr)
1848{
1849	struct ifaddr *ifa;
1850	struct ifnet *ifp = pr->ndpr_ifp;
1851	struct nd_prefix *opr;
1852	char ip6buf[INET6_ADDRSTRLEN];
1853	int error;
1854
1855	ND6_ONLINK_LOCK_ASSERT();
1856	ND6_UNLOCK_ASSERT();
1857
1858	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0)
1859		return (EEXIST);
1860
1861	/*
1862	 * Add the interface route associated with the prefix.  Before
1863	 * installing the route, check if there's the same prefix on another
1864	 * interface, and the prefix has already installed the interface route.
1865	 * Although such a configuration is expected to be rare, we explicitly
1866	 * allow it.
1867	 */
1868	ND6_RLOCK();
1869	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1870		if (opr == pr)
1871			continue;
1872
1873		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1874			continue;
1875
1876		if (!V_rt_add_addr_allfibs &&
1877		    opr->ndpr_ifp->if_fib != pr->ndpr_ifp->if_fib)
1878			continue;
1879
1880		if (opr->ndpr_plen == pr->ndpr_plen &&
1881		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1882		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
1883			ND6_RUNLOCK();
1884			return (0);
1885		}
1886	}
1887	ND6_RUNLOCK();
1888
1889	/*
1890	 * We prefer link-local addresses as the associated interface address.
1891	 */
1892	/* search for a link-local addr */
1893	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
1894	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1895	if (ifa == NULL) {
1896		/* XXX: freebsd does not have ifa_ifwithaf */
1897		IF_ADDR_RLOCK(ifp);
1898		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1899			if (ifa->ifa_addr->sa_family == AF_INET6) {
1900				ifa_ref(ifa);
1901				break;
1902			}
1903		}
1904		IF_ADDR_RUNLOCK(ifp);
1905		/* should we care about ia6_flags? */
1906	}
1907	if (ifa == NULL) {
1908		/*
1909		 * This can still happen, when, for example, we receive an RA
1910		 * containing a prefix with the L bit set and the A bit clear,
1911		 * after removing all IPv6 addresses on the receiving
1912		 * interface.  This should, of course, be rare though.
1913		 */
1914		nd6log((LOG_NOTICE,
1915		    "nd6_prefix_onlink: failed to find any ifaddr"
1916		    " to add route for a prefix(%s/%d) on %s\n",
1917		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1918		    pr->ndpr_plen, if_name(ifp)));
1919		return (0);
1920	}
1921
1922	error = nd6_prefix_onlink_rtrequest(pr, ifa);
1923
1924	if (ifa != NULL)
1925		ifa_free(ifa);
1926
1927	return (error);
1928}
1929
1930int
1931nd6_prefix_offlink(struct nd_prefix *pr)
1932{
1933	int error = 0;
1934	struct ifnet *ifp = pr->ndpr_ifp;
1935	struct nd_prefix *opr;
1936	struct sockaddr_in6 sa6, mask6;
1937	struct rtentry *rt;
1938	char ip6buf[INET6_ADDRSTRLEN];
1939	uint64_t genid;
1940	int fibnum, maxfib, a_failure;
1941
1942	ND6_ONLINK_LOCK_ASSERT();
1943	ND6_UNLOCK_ASSERT();
1944
1945	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1946		return (EEXIST);
1947
1948	bzero(&sa6, sizeof(sa6));
1949	sa6.sin6_family = AF_INET6;
1950	sa6.sin6_len = sizeof(sa6);
1951	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
1952	    sizeof(struct in6_addr));
1953	bzero(&mask6, sizeof(mask6));
1954	mask6.sin6_family = AF_INET6;
1955	mask6.sin6_len = sizeof(sa6);
1956	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
1957
1958	if (V_rt_add_addr_allfibs) {
1959		fibnum = 0;
1960		maxfib = rt_numfibs;
1961	} else {
1962		fibnum = ifp->if_fib;
1963		maxfib = fibnum + 1;
1964	}
1965
1966	a_failure = 0;
1967	for (; fibnum < maxfib; fibnum++) {
1968		rt = NULL;
1969		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
1970		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
1971		if (error == 0) {
1972			/* report the route deletion to the routing socket. */
1973			if (rt != NULL)
1974				nd6_rtmsg(RTM_DELETE, rt);
1975		} else {
1976			/* Save last error to return, see rtinit(). */
1977			a_failure = error;
1978		}
1979		if (rt != NULL) {
1980			RTFREE(rt);
1981		}
1982	}
1983	error = a_failure;
1984	a_failure = 1;
1985	if (error == 0) {
1986		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
1987
1988		/*
1989		 * There might be the same prefix on another interface,
1990		 * the prefix which could not be on-link just because we have
1991		 * the interface route (see comments in nd6_prefix_onlink).
1992		 * If there's one, try to make the prefix on-link on the
1993		 * interface.
1994		 */
1995		ND6_RLOCK();
1996restart:
1997		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1998			/*
1999			 * KAME specific: detached prefixes should not be
2000			 * on-link.
2001			 */
2002			if (opr == pr || (opr->ndpr_stateflags &
2003			    (NDPRF_ONLINK | NDPRF_DETACHED)) != 0)
2004				continue;
2005
2006			if (opr->ndpr_plen == pr->ndpr_plen &&
2007			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
2008			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
2009				int e;
2010
2011				genid = V_nd6_list_genid;
2012				ND6_RUNLOCK();
2013				if ((e = nd6_prefix_onlink(opr)) != 0) {
2014					nd6log((LOG_ERR,
2015					    "nd6_prefix_offlink: failed to "
2016					    "recover a prefix %s/%d from %s "
2017					    "to %s (errno = %d)\n",
2018					    ip6_sprintf(ip6buf,
2019						&opr->ndpr_prefix.sin6_addr),
2020					    opr->ndpr_plen, if_name(ifp),
2021					    if_name(opr->ndpr_ifp), e));
2022				} else
2023					a_failure = 0;
2024				ND6_RLOCK();
2025				if (genid != V_nd6_list_genid)
2026					goto restart;
2027			}
2028		}
2029		ND6_RUNLOCK();
2030	} else {
2031		/* XXX: can we still set the NDPRF_ONLINK flag? */
2032		nd6log((LOG_ERR,
2033		    "nd6_prefix_offlink: failed to delete route: "
2034		    "%s/%d on %s (errno = %d)\n",
2035		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
2036		    if_name(ifp), error));
2037	}
2038
2039	if (a_failure)
2040		lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
2041		    (struct sockaddr *)&mask6, LLE_STATIC);
2042
2043	return (error);
2044}
2045
2046static struct in6_ifaddr *
2047in6_ifadd(struct nd_prefixctl *pr, int mcast)
2048{
2049	struct ifnet *ifp = pr->ndpr_ifp;
2050	struct ifaddr *ifa;
2051	struct in6_aliasreq ifra;
2052	struct in6_ifaddr *ia, *ib;
2053	int error, plen0;
2054	struct in6_addr mask;
2055	int prefixlen = pr->ndpr_plen;
2056	int updateflags;
2057	char ip6buf[INET6_ADDRSTRLEN];
2058
2059	in6_prefixlen2mask(&mask, prefixlen);
2060
2061	/*
2062	 * find a link-local address (will be interface ID).
2063	 * Is it really mandatory? Theoretically, a global or a site-local
2064	 * address can be configured without a link-local address, if we
2065	 * have a unique interface identifier...
2066	 *
2067	 * it is not mandatory to have a link-local address, we can generate
2068	 * interface identifier on the fly.  we do this because:
2069	 * (1) it should be the easiest way to find interface identifier.
2070	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
2071	 * for multiple addresses on a single interface, and possible shortcut
2072	 * of DAD.  we omitted DAD for this reason in the past.
2073	 * (3) a user can prevent autoconfiguration of global address
2074	 * by removing link-local address by hand (this is partly because we
2075	 * don't have other way to control the use of IPv6 on an interface.
2076	 * this has been our design choice - cf. NRL's "ifconfig auto").
2077	 * (4) it is easier to manage when an interface has addresses
2078	 * with the same interface identifier, than to have multiple addresses
2079	 * with different interface identifiers.
2080	 */
2081	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
2082	if (ifa)
2083		ib = (struct in6_ifaddr *)ifa;
2084	else
2085		return NULL;
2086
2087	/* prefixlen + ifidlen must be equal to 128 */
2088	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
2089	if (prefixlen != plen0) {
2090		ifa_free(ifa);
2091		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
2092		    "(prefix=%d ifid=%d)\n",
2093		    if_name(ifp), prefixlen, 128 - plen0));
2094		return NULL;
2095	}
2096
2097	/* make ifaddr */
2098	in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
2099
2100	IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
2101	/* interface ID */
2102	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
2103	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
2104	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
2105	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
2106	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
2107	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
2108	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
2109	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
2110	ifa_free(ifa);
2111
2112	/* lifetimes. */
2113	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
2114	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
2115
2116	/* XXX: scope zone ID? */
2117
2118	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
2119
2120	/*
2121	 * Make sure that we do not have this address already.  This should
2122	 * usually not happen, but we can still see this case, e.g., if we
2123	 * have manually configured the exact address to be configured.
2124	 */
2125	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
2126	    &ifra.ifra_addr.sin6_addr);
2127	if (ifa != NULL) {
2128		ifa_free(ifa);
2129		/* this should be rare enough to make an explicit log */
2130		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
2131		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
2132		return (NULL);
2133	}
2134
2135	/*
2136	 * Allocate ifaddr structure, link into chain, etc.
2137	 * If we are going to create a new address upon receiving a multicasted
2138	 * RA, we need to impose a random delay before starting DAD.
2139	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
2140	 */
2141	updateflags = 0;
2142	if (mcast)
2143		updateflags |= IN6_IFAUPDATE_DADDELAY;
2144	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
2145		nd6log((LOG_ERR,
2146		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
2147		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
2148		    if_name(ifp), error));
2149		return (NULL);	/* ifaddr must not have been allocated. */
2150	}
2151
2152	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2153	/*
2154	 * XXXRW: Assumption of non-NULLness here might not be true with
2155	 * fine-grained locking -- should we validate it?  Or just return
2156	 * earlier ifa rather than looking it up again?
2157	 */
2158	return (ia);		/* this is always non-NULL  and referenced. */
2159}
2160
2161/*
2162 * ia0 - corresponding public address
2163 */
2164int
2165in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
2166{
2167	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
2168	struct in6_ifaddr *newia;
2169	struct in6_aliasreq ifra;
2170	int error;
2171	int trylimit = 3;	/* XXX: adhoc value */
2172	int updateflags;
2173	u_int32_t randid[2];
2174	time_t vltime0, pltime0;
2175
2176	in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr,
2177	    &ia0->ia_prefixmask.sin6_addr);
2178
2179	ifra.ifra_addr = ia0->ia_addr;	/* XXX: do we need this ? */
2180	/* clear the old IFID */
2181	IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr,
2182	    &ifra.ifra_prefixmask.sin6_addr);
2183
2184  again:
2185	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
2186	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
2187		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
2188		    "random IFID\n"));
2189		return (EINVAL);
2190	}
2191	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
2192	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
2193	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
2194	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
2195
2196	/*
2197	 * in6_get_tmpifid() quite likely provided a unique interface ID.
2198	 * However, we may still have a chance to see collision, because
2199	 * there may be a time lag between generation of the ID and generation
2200	 * of the address.  So, we'll do one more sanity check.
2201	 */
2202
2203	if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) {
2204		if (trylimit-- > 0) {
2205			forcegen = 1;
2206			goto again;
2207		}
2208
2209		/* Give up.  Something strange should have happened.  */
2210		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
2211		    "find a unique random IFID\n"));
2212		return (EEXIST);
2213	}
2214
2215	/*
2216	 * The Valid Lifetime is the lower of the Valid Lifetime of the
2217         * public address or TEMP_VALID_LIFETIME.
2218	 * The Preferred Lifetime is the lower of the Preferred Lifetime
2219         * of the public address or TEMP_PREFERRED_LIFETIME -
2220         * DESYNC_FACTOR.
2221	 */
2222	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
2223		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
2224		    (ia0->ia6_lifetime.ia6t_vltime -
2225		    (time_uptime - ia0->ia6_updatetime));
2226		if (vltime0 > V_ip6_temp_valid_lifetime)
2227			vltime0 = V_ip6_temp_valid_lifetime;
2228	} else
2229		vltime0 = V_ip6_temp_valid_lifetime;
2230	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
2231		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
2232		    (ia0->ia6_lifetime.ia6t_pltime -
2233		    (time_uptime - ia0->ia6_updatetime));
2234		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
2235			pltime0 = V_ip6_temp_preferred_lifetime -
2236			    V_ip6_desync_factor;
2237		}
2238	} else
2239		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
2240	ifra.ifra_lifetime.ia6t_vltime = vltime0;
2241	ifra.ifra_lifetime.ia6t_pltime = pltime0;
2242
2243	/*
2244	 * A temporary address is created only if this calculated Preferred
2245	 * Lifetime is greater than REGEN_ADVANCE time units.
2246	 */
2247	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
2248		return (0);
2249
2250	/* XXX: scope zone ID? */
2251
2252	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
2253
2254	/* allocate ifaddr structure, link into chain, etc. */
2255	updateflags = 0;
2256	if (delay)
2257		updateflags |= IN6_IFAUPDATE_DADDELAY;
2258	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
2259		return (error);
2260
2261	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2262	if (newia == NULL) {	/* XXX: can it happen? */
2263		nd6log((LOG_ERR,
2264		    "in6_tmpifadd: ifa update succeeded, but we got "
2265		    "no ifaddr\n"));
2266		return (EINVAL); /* XXX */
2267	}
2268	newia->ia6_ndpr = ia0->ia6_ndpr;
2269	newia->ia6_ndpr->ndpr_addrcnt++;
2270	ifa_free(&newia->ia_ifa);
2271
2272	/*
2273	 * A newly added address might affect the status of other addresses.
2274	 * XXX: when the temporary address is generated with a new public
2275	 * address, the onlink check is redundant.  However, it would be safe
2276	 * to do the check explicitly everywhere a new address is generated,
2277	 * and, in fact, we surely need the check when we create a new
2278	 * temporary address due to deprecation of an old temporary address.
2279	 */
2280	pfxlist_onlink_check();
2281
2282	return (0);
2283}
2284
2285static int
2286in6_init_prefix_ltimes(struct nd_prefix *ndpr)
2287{
2288	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
2289		ndpr->ndpr_preferred = 0;
2290	else
2291		ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
2292	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2293		ndpr->ndpr_expire = 0;
2294	else
2295		ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
2296
2297	return 0;
2298}
2299
2300static void
2301in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
2302{
2303	/* init ia6t_expire */
2304	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
2305		lt6->ia6t_expire = 0;
2306	else {
2307		lt6->ia6t_expire = time_uptime;
2308		lt6->ia6t_expire += lt6->ia6t_vltime;
2309	}
2310
2311	/* init ia6t_preferred */
2312	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
2313		lt6->ia6t_preferred = 0;
2314	else {
2315		lt6->ia6t_preferred = time_uptime;
2316		lt6->ia6t_preferred += lt6->ia6t_pltime;
2317	}
2318}
2319
2320/*
2321 * Delete all the routing table entries that use the specified gateway.
2322 * XXX: this function causes search through all entries of routing table, so
2323 * it shouldn't be called when acting as a router.
2324 */
2325void
2326rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
2327{
2328
2329	/* We'll care only link-local addresses */
2330	if (!IN6_IS_ADDR_LINKLOCAL(gateway))
2331		return;
2332
2333	/* XXX Do we really need to walk any but the default FIB? */
2334	rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
2335}
2336
2337static int
2338rt6_deleteroute(const struct rtentry *rt, void *arg)
2339{
2340#define SIN6(s)	((struct sockaddr_in6 *)s)
2341	struct in6_addr *gate = (struct in6_addr *)arg;
2342
2343	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
2344		return (0);
2345
2346	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
2347		return (0);
2348	}
2349
2350	/*
2351	 * Do not delete a static route.
2352	 * XXX: this seems to be a bit ad-hoc. Should we consider the
2353	 * 'cloned' bit instead?
2354	 */
2355	if ((rt->rt_flags & RTF_STATIC) != 0)
2356		return (0);
2357
2358	/*
2359	 * We delete only host route. This means, in particular, we don't
2360	 * delete default route.
2361	 */
2362	if ((rt->rt_flags & RTF_HOST) == 0)
2363		return (0);
2364
2365	return (1);
2366#undef SIN6
2367}
2368
2369int
2370nd6_setdefaultiface(int ifindex)
2371{
2372	int error = 0;
2373
2374	if (ifindex < 0 || V_if_index < ifindex)
2375		return (EINVAL);
2376	if (ifindex != 0 && !ifnet_byindex(ifindex))
2377		return (EINVAL);
2378
2379	if (V_nd6_defifindex != ifindex) {
2380		V_nd6_defifindex = ifindex;
2381		if (V_nd6_defifindex > 0)
2382			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
2383		else
2384			V_nd6_defifp = NULL;
2385
2386		/*
2387		 * Our current implementation assumes one-to-one maping between
2388		 * interfaces and links, so it would be natural to use the
2389		 * default interface as the default link.
2390		 */
2391		scope6_setdefault(V_nd6_defifp);
2392	}
2393
2394	return (error);
2395}
2396