nd6_rtr.c revision 207369
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/netinet6/nd6_rtr.c 207369 2010-04-29 11:52:42Z bz $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/socket.h>
43#include <sys/sockio.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/errno.h>
48#include <sys/rwlock.h>
49#include <sys/syslog.h>
50#include <sys/queue.h>
51
52#include <net/if.h>
53#include <net/if_types.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56#include <net/radix.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <net/if_llatbl.h>
61#include <netinet6/in6_var.h>
62#include <netinet6/in6_ifattach.h>
63#include <netinet/ip6.h>
64#include <netinet6/ip6_var.h>
65#include <netinet6/nd6.h>
66#include <netinet/icmp6.h>
67#include <netinet6/scope6_var.h>
68
69static int rtpref(struct nd_defrouter *);
70static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
71static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *,
72    struct mbuf *, int));
73static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *,	int);
74static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
75	struct nd_defrouter *));
76static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
77static void pfxrtr_del(struct nd_pfxrouter *);
78static struct nd_pfxrouter *find_pfxlist_reachable_router
79(struct nd_prefix *);
80static void defrouter_delreq(struct nd_defrouter *);
81static void nd6_rtmsg(int, struct rtentry *);
82
83static int in6_init_prefix_ltimes(struct nd_prefix *);
84static void in6_init_address_ltimes __P((struct nd_prefix *,
85	struct in6_addrlifetime *));
86
87static int rt6_deleteroute(struct radix_node *, void *);
88
89VNET_DECLARE(int, nd6_recalc_reachtm_interval);
90#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
91
92static VNET_DEFINE(struct ifnet *, nd6_defifp);
93VNET_DEFINE(int, nd6_defifindex);
94#define	V_nd6_defifp			VNET(nd6_defifp)
95
96VNET_DEFINE(int, ip6_use_tempaddr) = 0;
97
98VNET_DEFINE(int, ip6_desync_factor);
99VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
100VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
101
102VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
103
104/* RTPREF_MEDIUM has to be 0! */
105#define RTPREF_HIGH	1
106#define RTPREF_MEDIUM	0
107#define RTPREF_LOW	(-1)
108#define RTPREF_RESERVED	(-2)
109#define RTPREF_INVALID	(-3)	/* internal */
110
111/*
112 * Receive Router Solicitation Message - just for routers.
113 * Router solicitation/advertisement is mostly managed by userland program
114 * (rtadvd) so here we have no function like nd6_ra_output().
115 *
116 * Based on RFC 2461
117 */
118void
119nd6_rs_input(struct mbuf *m, int off, int icmp6len)
120{
121	struct ifnet *ifp = m->m_pkthdr.rcvif;
122	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
123	struct nd_router_solicit *nd_rs;
124	struct in6_addr saddr6 = ip6->ip6_src;
125	char *lladdr = NULL;
126	int lladdrlen = 0;
127	union nd_opts ndopts;
128	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
129
130	/* If I'm not a router, ignore it. */
131	if (!V_ip6_forwarding)
132		goto freeit;
133
134	/* Sanity checks */
135	if (ip6->ip6_hlim != 255) {
136		nd6log((LOG_ERR,
137		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
138		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
139		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
140		goto bad;
141	}
142
143	/*
144	 * Don't update the neighbor cache, if src = ::.
145	 * This indicates that the src has no IP address assigned yet.
146	 */
147	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
148		goto freeit;
149
150#ifndef PULLDOWN_TEST
151	IP6_EXTHDR_CHECK(m, off, icmp6len,);
152	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
153#else
154	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
155	if (nd_rs == NULL) {
156		ICMP6STAT_INC(icp6s_tooshort);
157		return;
158	}
159#endif
160
161	icmp6len -= sizeof(*nd_rs);
162	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
163	if (nd6_options(&ndopts) < 0) {
164		nd6log((LOG_INFO,
165		    "nd6_rs_input: invalid ND option, ignored\n"));
166		/* nd6_options have incremented stats */
167		goto freeit;
168	}
169
170	if (ndopts.nd_opts_src_lladdr) {
171		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
172		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
173	}
174
175	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
176		nd6log((LOG_INFO,
177		    "nd6_rs_input: lladdrlen mismatch for %s "
178		    "(if %d, RS packet %d)\n",
179		    ip6_sprintf(ip6bufs, &saddr6),
180		    ifp->if_addrlen, lladdrlen - 2));
181		goto bad;
182	}
183
184	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
185
186 freeit:
187	m_freem(m);
188	return;
189
190 bad:
191	ICMP6STAT_INC(icp6s_badrs);
192	m_freem(m);
193}
194
195/*
196 * Receive Router Advertisement Message.
197 *
198 * Based on RFC 2461
199 * TODO: on-link bit on prefix information
200 * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
201 */
202void
203nd6_ra_input(struct mbuf *m, int off, int icmp6len)
204{
205	struct ifnet *ifp = m->m_pkthdr.rcvif;
206	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
207	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
208	struct nd_router_advert *nd_ra;
209	struct in6_addr saddr6 = ip6->ip6_src;
210	int mcast = 0;
211	union nd_opts ndopts;
212	struct nd_defrouter *dr;
213	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
214
215	/*
216	 * We only accept RAs only when
217	 * the node is not a router and
218	 * per-interface variable allows RAs on the receiving interface.
219	 */
220	if (V_ip6_forwarding || !(ndi->flags & ND6_IFF_ACCEPT_RTADV))
221		goto freeit;
222
223	if (ip6->ip6_hlim != 255) {
224		nd6log((LOG_ERR,
225		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
226		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
227		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
228		goto bad;
229	}
230
231	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
232		nd6log((LOG_ERR,
233		    "nd6_ra_input: src %s is not link-local\n",
234		    ip6_sprintf(ip6bufs, &saddr6)));
235		goto bad;
236	}
237
238#ifndef PULLDOWN_TEST
239	IP6_EXTHDR_CHECK(m, off, icmp6len,);
240	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
241#else
242	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
243	if (nd_ra == NULL) {
244		ICMP6STAT_INC(icp6s_tooshort);
245		return;
246	}
247#endif
248
249	icmp6len -= sizeof(*nd_ra);
250	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
251	if (nd6_options(&ndopts) < 0) {
252		nd6log((LOG_INFO,
253		    "nd6_ra_input: invalid ND option, ignored\n"));
254		/* nd6_options have incremented stats */
255		goto freeit;
256	}
257
258    {
259	struct nd_defrouter dr0;
260	u_int32_t advreachable = nd_ra->nd_ra_reachable;
261
262	/* remember if this is a multicasted advertisement */
263	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
264		mcast = 1;
265
266	bzero(&dr0, sizeof(dr0));
267	dr0.rtaddr = saddr6;
268	dr0.flags  = nd_ra->nd_ra_flags_reserved;
269	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
270	dr0.expire = time_second + dr0.rtlifetime;
271	dr0.ifp = ifp;
272	/* unspecified or not? (RFC 2461 6.3.4) */
273	if (advreachable) {
274		advreachable = ntohl(advreachable);
275		if (advreachable <= MAX_REACHABLE_TIME &&
276		    ndi->basereachable != advreachable) {
277			ndi->basereachable = advreachable;
278			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
279			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
280		}
281	}
282	if (nd_ra->nd_ra_retransmit)
283		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
284	if (nd_ra->nd_ra_curhoplimit)
285		ndi->chlim = nd_ra->nd_ra_curhoplimit;
286	dr = defrtrlist_update(&dr0);
287    }
288
289	/*
290	 * prefix
291	 */
292	if (ndopts.nd_opts_pi) {
293		struct nd_opt_hdr *pt;
294		struct nd_opt_prefix_info *pi = NULL;
295		struct nd_prefixctl pr;
296
297		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
298		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
299		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
300						(pt->nd_opt_len << 3))) {
301			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
302				continue;
303			pi = (struct nd_opt_prefix_info *)pt;
304
305			if (pi->nd_opt_pi_len != 4) {
306				nd6log((LOG_INFO,
307				    "nd6_ra_input: invalid option "
308				    "len %d for prefix information option, "
309				    "ignored\n", pi->nd_opt_pi_len));
310				continue;
311			}
312
313			if (128 < pi->nd_opt_pi_prefix_len) {
314				nd6log((LOG_INFO,
315				    "nd6_ra_input: invalid prefix "
316				    "len %d for prefix information option, "
317				    "ignored\n", pi->nd_opt_pi_prefix_len));
318				continue;
319			}
320
321			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
322			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
323				nd6log((LOG_INFO,
324				    "nd6_ra_input: invalid prefix "
325				    "%s, ignored\n",
326				    ip6_sprintf(ip6bufs,
327					&pi->nd_opt_pi_prefix)));
328				continue;
329			}
330
331			bzero(&pr, sizeof(pr));
332			pr.ndpr_prefix.sin6_family = AF_INET6;
333			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
334			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
335			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
336
337			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
338			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
339			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
340			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
341			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
342			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
343			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
344			(void)prelist_update(&pr, dr, m, mcast);
345		}
346	}
347
348	/*
349	 * MTU
350	 */
351	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
352		u_long mtu;
353		u_long maxmtu;
354
355		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
356
357		/* lower bound */
358		if (mtu < IPV6_MMTU) {
359			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
360			    "mtu=%lu sent from %s, ignoring\n",
361			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
362			goto skip;
363		}
364
365		/* upper bound */
366		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
367		    ? ndi->maxmtu : ifp->if_mtu;
368		if (mtu <= maxmtu) {
369			int change = (ndi->linkmtu != mtu);
370
371			ndi->linkmtu = mtu;
372			if (change) /* in6_maxmtu may change */
373				in6_setmaxmtu();
374		} else {
375			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
376			    "mtu=%lu sent from %s; "
377			    "exceeds maxmtu %lu, ignoring\n",
378			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
379		}
380	}
381
382 skip:
383
384	/*
385	 * Source link layer address
386	 */
387    {
388	char *lladdr = NULL;
389	int lladdrlen = 0;
390
391	if (ndopts.nd_opts_src_lladdr) {
392		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
393		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
394	}
395
396	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
397		nd6log((LOG_INFO,
398		    "nd6_ra_input: lladdrlen mismatch for %s "
399		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
400		    ifp->if_addrlen, lladdrlen - 2));
401		goto bad;
402	}
403
404	nd6_cache_lladdr(ifp, &saddr6, lladdr,
405	    lladdrlen, ND_ROUTER_ADVERT, 0);
406
407	/*
408	 * Installing a link-layer address might change the state of the
409	 * router's neighbor cache, which might also affect our on-link
410	 * detection of adveritsed prefixes.
411	 */
412	pfxlist_onlink_check();
413    }
414
415 freeit:
416	m_freem(m);
417	return;
418
419 bad:
420	ICMP6STAT_INC(icp6s_badra);
421	m_freem(m);
422}
423
424/*
425 * default router list proccessing sub routines
426 */
427
428/* tell the change to user processes watching the routing socket. */
429static void
430nd6_rtmsg(int cmd, struct rtentry *rt)
431{
432	struct rt_addrinfo info;
433	struct ifnet *ifp;
434	struct ifaddr *ifa;
435
436	bzero((caddr_t)&info, sizeof(info));
437	info.rti_info[RTAX_DST] = rt_key(rt);
438	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
439	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
440	ifp = rt->rt_ifp;
441	if (ifp != NULL) {
442		IF_ADDR_LOCK(ifp);
443		ifa = TAILQ_FIRST(&ifp->if_addrhead);
444		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
445		ifa_ref(ifa);
446		IF_ADDR_UNLOCK(ifp);
447		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
448	} else
449		ifa = NULL;
450
451	rt_missmsg(cmd, &info, rt->rt_flags, 0);
452	if (ifa != NULL)
453		ifa_free(ifa);
454}
455
456void
457defrouter_addreq(struct nd_defrouter *new)
458{
459	struct sockaddr_in6 def, mask, gate;
460	struct rtentry *newrt = NULL;
461	int s;
462	int error;
463
464	bzero(&def, sizeof(def));
465	bzero(&mask, sizeof(mask));
466	bzero(&gate, sizeof(gate));
467
468	def.sin6_len = mask.sin6_len = gate.sin6_len =
469	    sizeof(struct sockaddr_in6);
470	def.sin6_family = gate.sin6_family = AF_INET6;
471	gate.sin6_addr = new->rtaddr;
472
473	s = splnet();
474	error = rtrequest(RTM_ADD, (struct sockaddr *)&def,
475	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
476	    RTF_GATEWAY, &newrt);
477	if (newrt) {
478		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
479		RTFREE(newrt);
480	}
481	if (error == 0)
482		new->installed = 1;
483	splx(s);
484	return;
485}
486
487struct nd_defrouter *
488defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
489{
490	struct nd_defrouter *dr;
491
492	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
493	     dr = TAILQ_NEXT(dr, dr_entry)) {
494		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
495			return (dr);
496	}
497
498	return (NULL);		/* search failed */
499}
500
501/*
502 * Remove the default route for a given router.
503 * This is just a subroutine function for defrouter_select(), and should
504 * not be called from anywhere else.
505 */
506static void
507defrouter_delreq(struct nd_defrouter *dr)
508{
509	struct sockaddr_in6 def, mask, gate;
510	struct rtentry *oldrt = NULL;
511
512	bzero(&def, sizeof(def));
513	bzero(&mask, sizeof(mask));
514	bzero(&gate, sizeof(gate));
515
516	def.sin6_len = mask.sin6_len = gate.sin6_len =
517	    sizeof(struct sockaddr_in6);
518	def.sin6_family = gate.sin6_family = AF_INET6;
519	gate.sin6_addr = dr->rtaddr;
520
521	rtrequest(RTM_DELETE, (struct sockaddr *)&def,
522	    (struct sockaddr *)&gate,
523	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt);
524	if (oldrt) {
525		nd6_rtmsg(RTM_DELETE, oldrt);
526		RTFREE(oldrt);
527	}
528
529	dr->installed = 0;
530}
531
532/*
533 * remove all default routes from default router list
534 */
535void
536defrouter_reset(void)
537{
538	struct nd_defrouter *dr;
539
540	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
541	     dr = TAILQ_NEXT(dr, dr_entry))
542		defrouter_delreq(dr);
543
544	/*
545	 * XXX should we also nuke any default routers in the kernel, by
546	 * going through them by rtalloc1()?
547	 */
548}
549
550void
551defrtrlist_del(struct nd_defrouter *dr)
552{
553	struct nd_defrouter *deldr = NULL;
554	struct nd_prefix *pr;
555
556	/*
557	 * Flush all the routing table entries that use the router
558	 * as a next hop.
559	 */
560	if (!V_ip6_forwarding)
561		rt6_flush(&dr->rtaddr, dr->ifp);
562
563	if (dr->installed) {
564		deldr = dr;
565		defrouter_delreq(dr);
566	}
567	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
568
569	/*
570	 * Also delete all the pointers to the router in each prefix lists.
571	 */
572	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
573		struct nd_pfxrouter *pfxrtr;
574		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
575			pfxrtr_del(pfxrtr);
576	}
577	pfxlist_onlink_check();
578
579	/*
580	 * If the router is the primary one, choose a new one.
581	 * Note that defrouter_select() will remove the current gateway
582	 * from the routing table.
583	 */
584	if (deldr)
585		defrouter_select();
586
587	free(dr, M_IP6NDP);
588}
589
590/*
591 * Default Router Selection according to Section 6.3.6 of RFC 2461 and
592 * draft-ietf-ipngwg-router-selection:
593 * 1) Routers that are reachable or probably reachable should be preferred.
594 *    If we have more than one (probably) reachable router, prefer ones
595 *    with the highest router preference.
596 * 2) When no routers on the list are known to be reachable or
597 *    probably reachable, routers SHOULD be selected in a round-robin
598 *    fashion, regardless of router preference values.
599 * 3) If the Default Router List is empty, assume that all
600 *    destinations are on-link.
601 *
602 * We assume nd_defrouter is sorted by router preference value.
603 * Since the code below covers both with and without router preference cases,
604 * we do not need to classify the cases by ifdef.
605 *
606 * At this moment, we do not try to install more than one default router,
607 * even when the multipath routing is available, because we're not sure about
608 * the benefits for stub hosts comparing to the risk of making the code
609 * complicated and the possibility of introducing bugs.
610 */
611void
612defrouter_select(void)
613{
614	int s = splnet();
615	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
616	struct llentry *ln = NULL;
617
618	/*
619	 * This function should be called only when acting as an autoconfigured
620	 * host.  Although the remaining part of this function is not effective
621	 * if the node is not an autoconfigured host, we explicitly exclude
622	 * such cases here for safety.
623	 */
624	if (V_ip6_forwarding) {
625		nd6log((LOG_WARNING,
626		    "defrouter_select: called unexpectedly (forwarding=%d)\n",
627		    V_ip6_forwarding));
628		splx(s);
629		return;
630	}
631
632	/*
633	 * Let's handle easy case (3) first:
634	 * If default router list is empty, there's nothing to be done.
635	 */
636	if (!TAILQ_FIRST(&V_nd_defrouter)) {
637		splx(s);
638		return;
639	}
640
641	/*
642	 * Search for a (probably) reachable router from the list.
643	 * We just pick up the first reachable one (if any), assuming that
644	 * the ordering rule of the list described in defrtrlist_update().
645	 */
646	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
647	     dr = TAILQ_NEXT(dr, dr_entry)) {
648		IF_AFDATA_LOCK(dr->ifp);
649		if (selected_dr == NULL &&
650		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
651		    ND6_IS_LLINFO_PROBREACH(ln)) {
652			selected_dr = dr;
653		}
654		IF_AFDATA_UNLOCK(dr->ifp);
655		if (ln != NULL) {
656			LLE_RUNLOCK(ln);
657			ln = NULL;
658		}
659
660		if (dr->installed && installed_dr == NULL)
661			installed_dr = dr;
662		else if (dr->installed && installed_dr) {
663			/* this should not happen.  warn for diagnosis. */
664			log(LOG_ERR, "defrouter_select: more than one router"
665			    " is installed\n");
666		}
667	}
668	/*
669	 * If none of the default routers was found to be reachable,
670	 * round-robin the list regardless of preference.
671	 * Otherwise, if we have an installed router, check if the selected
672	 * (reachable) router should really be preferred to the installed one.
673	 * We only prefer the new router when the old one is not reachable
674	 * or when the new one has a really higher preference value.
675	 */
676	if (selected_dr == NULL) {
677		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
678			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
679		else
680			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
681	} else if (installed_dr) {
682		IF_AFDATA_LOCK(installed_dr->ifp);
683		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
684		    ND6_IS_LLINFO_PROBREACH(ln) &&
685		    rtpref(selected_dr) <= rtpref(installed_dr)) {
686			selected_dr = installed_dr;
687		}
688		IF_AFDATA_UNLOCK(installed_dr->ifp);
689		if (ln != NULL)
690			LLE_RUNLOCK(ln);
691	}
692
693	/*
694	 * If the selected router is different than the installed one,
695	 * remove the installed router and install the selected one.
696	 * Note that the selected router is never NULL here.
697	 */
698	if (installed_dr != selected_dr) {
699		if (installed_dr)
700			defrouter_delreq(installed_dr);
701		defrouter_addreq(selected_dr);
702	}
703
704	splx(s);
705	return;
706}
707
708/*
709 * for default router selection
710 * regards router-preference field as a 2-bit signed integer
711 */
712static int
713rtpref(struct nd_defrouter *dr)
714{
715	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
716	case ND_RA_FLAG_RTPREF_HIGH:
717		return (RTPREF_HIGH);
718	case ND_RA_FLAG_RTPREF_MEDIUM:
719	case ND_RA_FLAG_RTPREF_RSV:
720		return (RTPREF_MEDIUM);
721	case ND_RA_FLAG_RTPREF_LOW:
722		return (RTPREF_LOW);
723	default:
724		/*
725		 * This case should never happen.  If it did, it would mean a
726		 * serious bug of kernel internal.  We thus always bark here.
727		 * Or, can we even panic?
728		 */
729		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
730		return (RTPREF_INVALID);
731	}
732	/* NOTREACHED */
733}
734
735static struct nd_defrouter *
736defrtrlist_update(struct nd_defrouter *new)
737{
738	struct nd_defrouter *dr, *n;
739	int s = splnet();
740
741	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
742		/* entry exists */
743		if (new->rtlifetime == 0) {
744			defrtrlist_del(dr);
745			dr = NULL;
746		} else {
747			int oldpref = rtpref(dr);
748
749			/* override */
750			dr->flags = new->flags; /* xxx flag check */
751			dr->rtlifetime = new->rtlifetime;
752			dr->expire = new->expire;
753
754			/*
755			 * If the preference does not change, there's no need
756			 * to sort the entries.
757			 */
758			if (rtpref(new) == oldpref) {
759				splx(s);
760				return (dr);
761			}
762
763			/*
764			 * preferred router may be changed, so relocate
765			 * this router.
766			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
767			 * However, since defrtrlist_del() has many side
768			 * effects, we intentionally do so here.
769			 * defrouter_select() below will handle routing
770			 * changes later.
771			 */
772			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
773			n = dr;
774			goto insert;
775		}
776		splx(s);
777		return (dr);
778	}
779
780	/* entry does not exist */
781	if (new->rtlifetime == 0) {
782		splx(s);
783		return (NULL);
784	}
785
786	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
787	if (n == NULL) {
788		splx(s);
789		return (NULL);
790	}
791	bzero(n, sizeof(*n));
792	*n = *new;
793
794insert:
795	/*
796	 * Insert the new router in the Default Router List;
797	 * The Default Router List should be in the descending order
798	 * of router-preferece.  Routers with the same preference are
799	 * sorted in the arriving time order.
800	 */
801
802	/* insert at the end of the group */
803	for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
804	     dr = TAILQ_NEXT(dr, dr_entry)) {
805		if (rtpref(n) > rtpref(dr))
806			break;
807	}
808	if (dr)
809		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
810	else
811		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
812
813	defrouter_select();
814
815	splx(s);
816
817	return (n);
818}
819
820static struct nd_pfxrouter *
821pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
822{
823	struct nd_pfxrouter *search;
824
825	for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
826		if (search->router == dr)
827			break;
828	}
829
830	return (search);
831}
832
833static void
834pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
835{
836	struct nd_pfxrouter *new;
837
838	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
839	if (new == NULL)
840		return;
841	bzero(new, sizeof(*new));
842	new->router = dr;
843
844	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
845
846	pfxlist_onlink_check();
847}
848
849static void
850pfxrtr_del(struct nd_pfxrouter *pfr)
851{
852	LIST_REMOVE(pfr, pfr_entry);
853	free(pfr, M_IP6NDP);
854}
855
856struct nd_prefix *
857nd6_prefix_lookup(struct nd_prefixctl *key)
858{
859	struct nd_prefix *search;
860
861	for (search = V_nd_prefix.lh_first;
862	    search; search = search->ndpr_next) {
863		if (key->ndpr_ifp == search->ndpr_ifp &&
864		    key->ndpr_plen == search->ndpr_plen &&
865		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
866		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
867			break;
868		}
869	}
870
871	return (search);
872}
873
874int
875nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
876    struct nd_prefix **newp)
877{
878	struct nd_prefix *new = NULL;
879	int error = 0;
880	int i, s;
881	char ip6buf[INET6_ADDRSTRLEN];
882
883	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
884	if (new == NULL)
885		return(ENOMEM);
886	bzero(new, sizeof(*new));
887	new->ndpr_ifp = pr->ndpr_ifp;
888	new->ndpr_prefix = pr->ndpr_prefix;
889	new->ndpr_plen = pr->ndpr_plen;
890	new->ndpr_vltime = pr->ndpr_vltime;
891	new->ndpr_pltime = pr->ndpr_pltime;
892	new->ndpr_flags = pr->ndpr_flags;
893	if ((error = in6_init_prefix_ltimes(new)) != 0) {
894		free(new, M_IP6NDP);
895		return(error);
896	}
897	new->ndpr_lastupdate = time_second;
898	if (newp != NULL)
899		*newp = new;
900
901	/* initialization */
902	LIST_INIT(&new->ndpr_advrtrs);
903	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
904	/* make prefix in the canonical form */
905	for (i = 0; i < 4; i++)
906		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
907		    new->ndpr_mask.s6_addr32[i];
908
909	s = splnet();
910	/* link ndpr_entry to nd_prefix list */
911	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
912	splx(s);
913
914	/* ND_OPT_PI_FLAG_ONLINK processing */
915	if (new->ndpr_raf_onlink) {
916		int e;
917
918		if ((e = nd6_prefix_onlink(new)) != 0) {
919			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
920			    "the prefix %s/%d on-link on %s (errno=%d)\n",
921			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
922			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
923			/* proceed anyway. XXX: is it correct? */
924		}
925	}
926
927	if (dr)
928		pfxrtr_add(new, dr);
929
930	return 0;
931}
932
933void
934prelist_remove(struct nd_prefix *pr)
935{
936	struct nd_pfxrouter *pfr, *next;
937	int e, s;
938	char ip6buf[INET6_ADDRSTRLEN];
939
940	/* make sure to invalidate the prefix until it is really freed. */
941	pr->ndpr_vltime = 0;
942	pr->ndpr_pltime = 0;
943
944	/*
945	 * Though these flags are now meaningless, we'd rather keep the value
946	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
947	 * when executing "ndp -p".
948	 */
949
950	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
951	    (e = nd6_prefix_offlink(pr)) != 0) {
952		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
953		    "on %s, errno=%d\n",
954		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
955		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
956		/* what should we do? */
957	}
958
959	if (pr->ndpr_refcnt > 0)
960		return;		/* notice here? */
961
962	s = splnet();
963
964	/* unlink ndpr_entry from nd_prefix list */
965	LIST_REMOVE(pr, ndpr_entry);
966
967	/* free list of routers that adversed the prefix */
968	for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
969		next = pfr->pfr_next;
970
971		free(pfr, M_IP6NDP);
972	}
973	splx(s);
974
975	free(pr, M_IP6NDP);
976
977	pfxlist_onlink_check();
978}
979
980/*
981 * dr - may be NULL
982 */
983
984static int
985prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
986    struct mbuf *m, int mcast)
987{
988	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
989	struct ifaddr *ifa;
990	struct ifnet *ifp = new->ndpr_ifp;
991	struct nd_prefix *pr;
992	int s = splnet();
993	int error = 0;
994	int newprefix = 0;
995	int auth;
996	struct in6_addrlifetime lt6_tmp;
997	char ip6buf[INET6_ADDRSTRLEN];
998
999	auth = 0;
1000	if (m) {
1001		/*
1002		 * Authenticity for NA consists authentication for
1003		 * both IP header and IP datagrams, doesn't it ?
1004		 */
1005#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
1006		auth = ((m->m_flags & M_AUTHIPHDR) &&
1007		    (m->m_flags & M_AUTHIPDGM));
1008#endif
1009	}
1010
1011	if ((pr = nd6_prefix_lookup(new)) != NULL) {
1012		/*
1013		 * nd6_prefix_lookup() ensures that pr and new have the same
1014		 * prefix on a same interface.
1015		 */
1016
1017		/*
1018		 * Update prefix information.  Note that the on-link (L) bit
1019		 * and the autonomous (A) bit should NOT be changed from 1
1020		 * to 0.
1021		 */
1022		if (new->ndpr_raf_onlink == 1)
1023			pr->ndpr_raf_onlink = 1;
1024		if (new->ndpr_raf_auto == 1)
1025			pr->ndpr_raf_auto = 1;
1026		if (new->ndpr_raf_onlink) {
1027			pr->ndpr_vltime = new->ndpr_vltime;
1028			pr->ndpr_pltime = new->ndpr_pltime;
1029			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
1030			pr->ndpr_lastupdate = time_second;
1031		}
1032
1033		if (new->ndpr_raf_onlink &&
1034		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1035			int e;
1036
1037			if ((e = nd6_prefix_onlink(pr)) != 0) {
1038				nd6log((LOG_ERR,
1039				    "prelist_update: failed to make "
1040				    "the prefix %s/%d on-link on %s "
1041				    "(errno=%d)\n",
1042				    ip6_sprintf(ip6buf,
1043					    &pr->ndpr_prefix.sin6_addr),
1044				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
1045				/* proceed anyway. XXX: is it correct? */
1046			}
1047		}
1048
1049		if (dr && pfxrtr_lookup(pr, dr) == NULL)
1050			pfxrtr_add(pr, dr);
1051	} else {
1052		struct nd_prefix *newpr = NULL;
1053
1054		newprefix = 1;
1055
1056		if (new->ndpr_vltime == 0)
1057			goto end;
1058		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
1059			goto end;
1060
1061		error = nd6_prelist_add(new, dr, &newpr);
1062		if (error != 0 || newpr == NULL) {
1063			nd6log((LOG_NOTICE, "prelist_update: "
1064			    "nd6_prelist_add failed for %s/%d on %s "
1065			    "errno=%d, returnpr=%p\n",
1066			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
1067			    new->ndpr_plen, if_name(new->ndpr_ifp),
1068			    error, newpr));
1069			goto end; /* we should just give up in this case. */
1070		}
1071
1072		/*
1073		 * XXX: from the ND point of view, we can ignore a prefix
1074		 * with the on-link bit being zero.  However, we need a
1075		 * prefix structure for references from autoconfigured
1076		 * addresses.  Thus, we explicitly make sure that the prefix
1077		 * itself expires now.
1078		 */
1079		if (newpr->ndpr_raf_onlink == 0) {
1080			newpr->ndpr_vltime = 0;
1081			newpr->ndpr_pltime = 0;
1082			in6_init_prefix_ltimes(newpr);
1083		}
1084
1085		pr = newpr;
1086	}
1087
1088	/*
1089	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
1090	 * Note that pr must be non NULL at this point.
1091	 */
1092
1093	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
1094	if (!new->ndpr_raf_auto)
1095		goto end;
1096
1097	/*
1098	 * 5.5.3 (b). the link-local prefix should have been ignored in
1099	 * nd6_ra_input.
1100	 */
1101
1102	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
1103	if (new->ndpr_pltime > new->ndpr_vltime) {
1104		error = EINVAL;	/* XXX: won't be used */
1105		goto end;
1106	}
1107
1108	/*
1109	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
1110	 * an address configured by stateless autoconfiguration already in the
1111	 * list of addresses associated with the interface, and the Valid
1112	 * Lifetime is not 0, form an address.  We first check if we have
1113	 * a matching prefix.
1114	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
1115	 * consider autoconfigured addresses while RFC2462 simply said
1116	 * "address".
1117	 */
1118	IF_ADDR_LOCK(ifp);
1119	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1120		struct in6_ifaddr *ifa6;
1121		u_int32_t remaininglifetime;
1122
1123		if (ifa->ifa_addr->sa_family != AF_INET6)
1124			continue;
1125
1126		ifa6 = (struct in6_ifaddr *)ifa;
1127
1128		/*
1129		 * We only consider autoconfigured addresses as per rfc2462bis.
1130		 */
1131		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
1132			continue;
1133
1134		/*
1135		 * Spec is not clear here, but I believe we should concentrate
1136		 * on unicast (i.e. not anycast) addresses.
1137		 * XXX: other ia6_flags? detached or duplicated?
1138		 */
1139		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
1140			continue;
1141
1142		/*
1143		 * Ignore the address if it is not associated with a prefix
1144		 * or is associated with a prefix that is different from this
1145		 * one.  (pr is never NULL here)
1146		 */
1147		if (ifa6->ia6_ndpr != pr)
1148			continue;
1149
1150		if (ia6_match == NULL) /* remember the first one */
1151			ia6_match = ifa6;
1152
1153		/*
1154		 * An already autoconfigured address matched.  Now that we
1155		 * are sure there is at least one matched address, we can
1156		 * proceed to 5.5.3. (e): update the lifetimes according to the
1157		 * "two hours" rule and the privacy extension.
1158		 * We apply some clarifications in rfc2462bis:
1159		 * - use remaininglifetime instead of storedlifetime as a
1160		 *   variable name
1161		 * - remove the dead code in the "two-hour" rule
1162		 */
1163#define TWOHOUR		(120*60)
1164		lt6_tmp = ifa6->ia6_lifetime;
1165
1166		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
1167			remaininglifetime = ND6_INFINITE_LIFETIME;
1168		else if (time_second - ifa6->ia6_updatetime >
1169			 lt6_tmp.ia6t_vltime) {
1170			/*
1171			 * The case of "invalid" address.  We should usually
1172			 * not see this case.
1173			 */
1174			remaininglifetime = 0;
1175		} else
1176			remaininglifetime = lt6_tmp.ia6t_vltime -
1177			    (time_second - ifa6->ia6_updatetime);
1178
1179		/* when not updating, keep the current stored lifetime. */
1180		lt6_tmp.ia6t_vltime = remaininglifetime;
1181
1182		if (TWOHOUR < new->ndpr_vltime ||
1183		    remaininglifetime < new->ndpr_vltime) {
1184			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1185		} else if (remaininglifetime <= TWOHOUR) {
1186			if (auth) {
1187				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1188			}
1189		} else {
1190			/*
1191			 * new->ndpr_vltime <= TWOHOUR &&
1192			 * TWOHOUR < remaininglifetime
1193			 */
1194			lt6_tmp.ia6t_vltime = TWOHOUR;
1195		}
1196
1197		/* The 2 hour rule is not imposed for preferred lifetime. */
1198		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
1199
1200		in6_init_address_ltimes(pr, &lt6_tmp);
1201
1202		/*
1203		 * We need to treat lifetimes for temporary addresses
1204		 * differently, according to
1205		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
1206		 * we only update the lifetimes when they are in the maximum
1207		 * intervals.
1208		 */
1209		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1210			u_int32_t maxvltime, maxpltime;
1211
1212			if (V_ip6_temp_valid_lifetime >
1213			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
1214			    V_ip6_desync_factor)) {
1215				maxvltime = V_ip6_temp_valid_lifetime -
1216				    (time_second - ifa6->ia6_createtime) -
1217				    V_ip6_desync_factor;
1218			} else
1219				maxvltime = 0;
1220			if (V_ip6_temp_preferred_lifetime >
1221			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
1222			    V_ip6_desync_factor)) {
1223				maxpltime = V_ip6_temp_preferred_lifetime -
1224				    (time_second - ifa6->ia6_createtime) -
1225				    V_ip6_desync_factor;
1226			} else
1227				maxpltime = 0;
1228
1229			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
1230			    lt6_tmp.ia6t_vltime > maxvltime) {
1231				lt6_tmp.ia6t_vltime = maxvltime;
1232			}
1233			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
1234			    lt6_tmp.ia6t_pltime > maxpltime) {
1235				lt6_tmp.ia6t_pltime = maxpltime;
1236			}
1237		}
1238		ifa6->ia6_lifetime = lt6_tmp;
1239		ifa6->ia6_updatetime = time_second;
1240	}
1241	IF_ADDR_UNLOCK(ifp);
1242	if (ia6_match == NULL && new->ndpr_vltime) {
1243		int ifidlen;
1244
1245		/*
1246		 * 5.5.3 (d) (continued)
1247		 * No address matched and the valid lifetime is non-zero.
1248		 * Create a new address.
1249		 */
1250
1251		/*
1252		 * Prefix Length check:
1253		 * If the sum of the prefix length and interface identifier
1254		 * length does not equal 128 bits, the Prefix Information
1255		 * option MUST be ignored.  The length of the interface
1256		 * identifier is defined in a separate link-type specific
1257		 * document.
1258		 */
1259		ifidlen = in6_if2idlen(ifp);
1260		if (ifidlen < 0) {
1261			/* this should not happen, so we always log it. */
1262			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
1263			    if_name(ifp));
1264			goto end;
1265		}
1266		if (ifidlen + pr->ndpr_plen != 128) {
1267			nd6log((LOG_INFO,
1268			    "prelist_update: invalid prefixlen "
1269			    "%d for %s, ignored\n",
1270			    pr->ndpr_plen, if_name(ifp)));
1271			goto end;
1272		}
1273
1274		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
1275			/*
1276			 * note that we should use pr (not new) for reference.
1277			 */
1278			pr->ndpr_refcnt++;
1279			ia6->ia6_ndpr = pr;
1280
1281			/*
1282			 * RFC 3041 3.3 (2).
1283			 * When a new public address is created as described
1284			 * in RFC2462, also create a new temporary address.
1285			 *
1286			 * RFC 3041 3.5.
1287			 * When an interface connects to a new link, a new
1288			 * randomized interface identifier should be generated
1289			 * immediately together with a new set of temporary
1290			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
1291			 * in6_tmpifadd().
1292			 */
1293			if (V_ip6_use_tempaddr) {
1294				int e;
1295				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
1296					nd6log((LOG_NOTICE, "prelist_update: "
1297					    "failed to create a temporary "
1298					    "address, errno=%d\n",
1299					    e));
1300				}
1301			}
1302			ifa_free(&ia6->ia_ifa);
1303
1304			/*
1305			 * A newly added address might affect the status
1306			 * of other addresses, so we check and update it.
1307			 * XXX: what if address duplication happens?
1308			 */
1309			pfxlist_onlink_check();
1310		} else {
1311			/* just set an error. do not bark here. */
1312			error = EADDRNOTAVAIL; /* XXX: might be unused. */
1313		}
1314	}
1315
1316 end:
1317	splx(s);
1318	return error;
1319}
1320
1321/*
1322 * A supplement function used in the on-link detection below;
1323 * detect if a given prefix has a (probably) reachable advertising router.
1324 * XXX: lengthy function name...
1325 */
1326static struct nd_pfxrouter *
1327find_pfxlist_reachable_router(struct nd_prefix *pr)
1328{
1329	struct nd_pfxrouter *pfxrtr;
1330	struct llentry *ln;
1331	int canreach;
1332
1333	for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr != NULL;
1334	     pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
1335		IF_AFDATA_LOCK(pfxrtr->router->ifp);
1336		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
1337		IF_AFDATA_UNLOCK(pfxrtr->router->ifp);
1338		if (ln == NULL)
1339			continue;
1340		canreach = ND6_IS_LLINFO_PROBREACH(ln);
1341		LLE_RUNLOCK(ln);
1342		if (canreach)
1343			break;
1344	}
1345	return (pfxrtr);
1346}
1347
1348/*
1349 * Check if each prefix in the prefix list has at least one available router
1350 * that advertised the prefix (a router is "available" if its neighbor cache
1351 * entry is reachable or probably reachable).
1352 * If the check fails, the prefix may be off-link, because, for example,
1353 * we have moved from the network but the lifetime of the prefix has not
1354 * expired yet.  So we should not use the prefix if there is another prefix
1355 * that has an available router.
1356 * But, if there is no prefix that has an available router, we still regards
1357 * all the prefixes as on-link.  This is because we can't tell if all the
1358 * routers are simply dead or if we really moved from the network and there
1359 * is no router around us.
1360 */
1361void
1362pfxlist_onlink_check()
1363{
1364	struct nd_prefix *pr;
1365	struct in6_ifaddr *ifa;
1366	struct nd_defrouter *dr;
1367	struct nd_pfxrouter *pfxrtr = NULL;
1368
1369	/*
1370	 * Check if there is a prefix that has a reachable advertising
1371	 * router.
1372	 */
1373	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1374		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
1375			break;
1376	}
1377
1378	/*
1379	 * If we have no such prefix, check whether we still have a router
1380	 * that does not advertise any prefixes.
1381	 */
1382	if (pr == NULL) {
1383		for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
1384		    dr = TAILQ_NEXT(dr, dr_entry)) {
1385			struct nd_prefix *pr0;
1386
1387			for (pr0 = V_nd_prefix.lh_first; pr0;
1388			    pr0 = pr0->ndpr_next) {
1389				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
1390					break;
1391			}
1392			if (pfxrtr != NULL)
1393				break;
1394		}
1395	}
1396	if (pr != NULL || (TAILQ_FIRST(&V_nd_defrouter) && pfxrtr == NULL)) {
1397		/*
1398		 * There is at least one prefix that has a reachable router,
1399		 * or at least a router which probably does not advertise
1400		 * any prefixes.  The latter would be the case when we move
1401		 * to a new link where we have a router that does not provide
1402		 * prefixes and we configure an address by hand.
1403		 * Detach prefixes which have no reachable advertising
1404		 * router, and attach other prefixes.
1405		 */
1406		for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1407			/* XXX: a link-local prefix should never be detached */
1408			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1409				continue;
1410
1411			/*
1412			 * we aren't interested in prefixes without the L bit
1413			 * set.
1414			 */
1415			if (pr->ndpr_raf_onlink == 0)
1416				continue;
1417
1418			if (pr->ndpr_raf_auto == 0)
1419				continue;
1420
1421			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1422			    find_pfxlist_reachable_router(pr) == NULL)
1423				pr->ndpr_stateflags |= NDPRF_DETACHED;
1424			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1425			    find_pfxlist_reachable_router(pr) != 0)
1426				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1427		}
1428	} else {
1429		/* there is no prefix that has a reachable router */
1430		for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1431			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1432				continue;
1433
1434			if (pr->ndpr_raf_onlink == 0)
1435				continue;
1436
1437			if (pr->ndpr_raf_auto == 0)
1438				continue;
1439
1440			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1441				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1442		}
1443	}
1444
1445	/*
1446	 * Remove each interface route associated with a (just) detached
1447	 * prefix, and reinstall the interface route for a (just) attached
1448	 * prefix.  Note that all attempt of reinstallation does not
1449	 * necessarily success, when a same prefix is shared among multiple
1450	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
1451	 * so we don't have to care about them.
1452	 */
1453	for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1454		int e;
1455		char ip6buf[INET6_ADDRSTRLEN];
1456
1457		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1458			continue;
1459
1460		if (pr->ndpr_raf_onlink == 0)
1461			continue;
1462
1463		if (pr->ndpr_raf_auto == 0)
1464			continue;
1465
1466		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1467		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1468			if ((e = nd6_prefix_offlink(pr)) != 0) {
1469				nd6log((LOG_ERR,
1470				    "pfxlist_onlink_check: failed to "
1471				    "make %s/%d offlink, errno=%d\n",
1472				    ip6_sprintf(ip6buf,
1473					    &pr->ndpr_prefix.sin6_addr),
1474					    pr->ndpr_plen, e));
1475			}
1476		}
1477		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1478		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
1479		    pr->ndpr_raf_onlink) {
1480			if ((e = nd6_prefix_onlink(pr)) != 0) {
1481				nd6log((LOG_ERR,
1482				    "pfxlist_onlink_check: failed to "
1483				    "make %s/%d onlink, errno=%d\n",
1484				    ip6_sprintf(ip6buf,
1485					    &pr->ndpr_prefix.sin6_addr),
1486					    pr->ndpr_plen, e));
1487			}
1488		}
1489	}
1490
1491	/*
1492	 * Changes on the prefix status might affect address status as well.
1493	 * Make sure that all addresses derived from an attached prefix are
1494	 * attached, and that all addresses derived from a detached prefix are
1495	 * detached.  Note, however, that a manually configured address should
1496	 * always be attached.
1497	 * The precise detection logic is same as the one for prefixes.
1498	 *
1499	 * XXXRW: in6_ifaddrhead locking.
1500	 */
1501	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1502		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
1503			continue;
1504
1505		if (ifa->ia6_ndpr == NULL) {
1506			/*
1507			 * This can happen when we first configure the address
1508			 * (i.e. the address exists, but the prefix does not).
1509			 * XXX: complicated relationships...
1510			 */
1511			continue;
1512		}
1513
1514		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
1515			break;
1516	}
1517	if (ifa) {
1518		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1519			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1520				continue;
1521
1522			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
1523				continue;
1524
1525			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
1526				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1527					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1528					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1529					nd6_dad_start((struct ifaddr *)ifa, 0);
1530				}
1531			} else {
1532				ifa->ia6_flags |= IN6_IFF_DETACHED;
1533			}
1534		}
1535	}
1536	else {
1537		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1538			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1539				continue;
1540
1541			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1542				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1543				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1544				/* Do we need a delay in this case? */
1545				nd6_dad_start((struct ifaddr *)ifa, 0);
1546			}
1547		}
1548	}
1549}
1550
1551int
1552nd6_prefix_onlink(struct nd_prefix *pr)
1553{
1554	struct ifaddr *ifa;
1555	struct ifnet *ifp = pr->ndpr_ifp;
1556	struct sockaddr_in6 mask6;
1557	struct nd_prefix *opr;
1558	u_long rtflags;
1559	int error = 0;
1560	struct radix_node_head *rnh;
1561	struct rtentry *rt = NULL;
1562	char ip6buf[INET6_ADDRSTRLEN];
1563	struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1564
1565	/* sanity check */
1566	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1567		nd6log((LOG_ERR,
1568		    "nd6_prefix_onlink: %s/%d is already on-link\n",
1569		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1570		    pr->ndpr_plen));
1571		return (EEXIST);
1572	}
1573
1574	/*
1575	 * Add the interface route associated with the prefix.  Before
1576	 * installing the route, check if there's the same prefix on another
1577	 * interface, and the prefix has already installed the interface route.
1578	 * Although such a configuration is expected to be rare, we explicitly
1579	 * allow it.
1580	 */
1581	for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
1582		if (opr == pr)
1583			continue;
1584
1585		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1586			continue;
1587
1588		if (opr->ndpr_plen == pr->ndpr_plen &&
1589		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1590		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
1591			return (0);
1592	}
1593
1594	/*
1595	 * We prefer link-local addresses as the associated interface address.
1596	 */
1597	/* search for a link-local addr */
1598	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
1599	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1600	if (ifa == NULL) {
1601		/* XXX: freebsd does not have ifa_ifwithaf */
1602		IF_ADDR_LOCK(ifp);
1603		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1604			if (ifa->ifa_addr->sa_family == AF_INET6)
1605				break;
1606		}
1607		if (ifa != NULL)
1608			ifa_ref(ifa);
1609		IF_ADDR_UNLOCK(ifp);
1610		/* should we care about ia6_flags? */
1611	}
1612	if (ifa == NULL) {
1613		/*
1614		 * This can still happen, when, for example, we receive an RA
1615		 * containing a prefix with the L bit set and the A bit clear,
1616		 * after removing all IPv6 addresses on the receiving
1617		 * interface.  This should, of course, be rare though.
1618		 */
1619		nd6log((LOG_NOTICE,
1620		    "nd6_prefix_onlink: failed to find any ifaddr"
1621		    " to add route for a prefix(%s/%d) on %s\n",
1622		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1623		    pr->ndpr_plen, if_name(ifp)));
1624		return (0);
1625	}
1626
1627	/*
1628	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
1629	 * ifa->ifa_rtrequest = nd6_rtrequest;
1630	 */
1631	bzero(&mask6, sizeof(mask6));
1632	mask6.sin6_len = sizeof(mask6);
1633	mask6.sin6_addr = pr->ndpr_mask;
1634	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
1635	error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
1636	    ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt);
1637	if (error == 0) {
1638		if (rt != NULL) /* this should be non NULL, though */ {
1639			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
1640			/* XXX what if rhn == NULL? */
1641			RADIX_NODE_HEAD_LOCK(rnh);
1642			RT_LOCK(rt);
1643			if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) {
1644				((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
1645					rt->rt_ifp->if_type;
1646				((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
1647					rt->rt_ifp->if_index;
1648			}
1649			RADIX_NODE_HEAD_UNLOCK(rnh);
1650			nd6_rtmsg(RTM_ADD, rt);
1651			RT_UNLOCK(rt);
1652		}
1653		pr->ndpr_stateflags |= NDPRF_ONLINK;
1654	} else {
1655		char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
1656		nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
1657		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
1658		    "errno = %d\n",
1659		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1660		    pr->ndpr_plen, if_name(ifp),
1661		    ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
1662		    ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
1663	}
1664
1665	if (rt != NULL) {
1666		RT_LOCK(rt);
1667		RT_REMREF(rt);
1668		RT_UNLOCK(rt);
1669	}
1670	if (ifa != NULL)
1671		ifa_free(ifa);
1672
1673	return (error);
1674}
1675
1676int
1677nd6_prefix_offlink(struct nd_prefix *pr)
1678{
1679	int error = 0;
1680	struct ifnet *ifp = pr->ndpr_ifp;
1681	struct nd_prefix *opr;
1682	struct sockaddr_in6 sa6, mask6;
1683	struct rtentry *rt = NULL;
1684	char ip6buf[INET6_ADDRSTRLEN];
1685
1686	/* sanity check */
1687	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1688		nd6log((LOG_ERR,
1689		    "nd6_prefix_offlink: %s/%d is already off-link\n",
1690		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1691		    pr->ndpr_plen));
1692		return (EEXIST);
1693	}
1694
1695	bzero(&sa6, sizeof(sa6));
1696	sa6.sin6_family = AF_INET6;
1697	sa6.sin6_len = sizeof(sa6);
1698	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
1699	    sizeof(struct in6_addr));
1700	bzero(&mask6, sizeof(mask6));
1701	mask6.sin6_family = AF_INET6;
1702	mask6.sin6_len = sizeof(sa6);
1703	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
1704	error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
1705	    (struct sockaddr *)&mask6, 0, &rt);
1706	if (error == 0) {
1707		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
1708
1709		/* report the route deletion to the routing socket. */
1710		if (rt != NULL)
1711			nd6_rtmsg(RTM_DELETE, rt);
1712
1713		/*
1714		 * There might be the same prefix on another interface,
1715		 * the prefix which could not be on-link just because we have
1716		 * the interface route (see comments in nd6_prefix_onlink).
1717		 * If there's one, try to make the prefix on-link on the
1718		 * interface.
1719		 */
1720		for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
1721			if (opr == pr)
1722				continue;
1723
1724			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
1725				continue;
1726
1727			/*
1728			 * KAME specific: detached prefixes should not be
1729			 * on-link.
1730			 */
1731			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1732				continue;
1733
1734			if (opr->ndpr_plen == pr->ndpr_plen &&
1735			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1736			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
1737				int e;
1738
1739				if ((e = nd6_prefix_onlink(opr)) != 0) {
1740					nd6log((LOG_ERR,
1741					    "nd6_prefix_offlink: failed to "
1742					    "recover a prefix %s/%d from %s "
1743					    "to %s (errno = %d)\n",
1744					    ip6_sprintf(ip6buf,
1745						&opr->ndpr_prefix.sin6_addr),
1746					    opr->ndpr_plen, if_name(ifp),
1747					    if_name(opr->ndpr_ifp), e));
1748				}
1749			}
1750		}
1751	} else {
1752		/* XXX: can we still set the NDPRF_ONLINK flag? */
1753		nd6log((LOG_ERR,
1754		    "nd6_prefix_offlink: failed to delete route: "
1755		    "%s/%d on %s (errno = %d)\n",
1756		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
1757		    if_name(ifp), error));
1758	}
1759
1760	if (rt != NULL) {
1761		RTFREE(rt);
1762	}
1763
1764	return (error);
1765}
1766
1767static struct in6_ifaddr *
1768in6_ifadd(struct nd_prefixctl *pr, int mcast)
1769{
1770	struct ifnet *ifp = pr->ndpr_ifp;
1771	struct ifaddr *ifa;
1772	struct in6_aliasreq ifra;
1773	struct in6_ifaddr *ia, *ib;
1774	int error, plen0;
1775	struct in6_addr mask;
1776	int prefixlen = pr->ndpr_plen;
1777	int updateflags;
1778	char ip6buf[INET6_ADDRSTRLEN];
1779
1780	in6_prefixlen2mask(&mask, prefixlen);
1781
1782	/*
1783	 * find a link-local address (will be interface ID).
1784	 * Is it really mandatory? Theoretically, a global or a site-local
1785	 * address can be configured without a link-local address, if we
1786	 * have a unique interface identifier...
1787	 *
1788	 * it is not mandatory to have a link-local address, we can generate
1789	 * interface identifier on the fly.  we do this because:
1790	 * (1) it should be the easiest way to find interface identifier.
1791	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
1792	 * for multiple addresses on a single interface, and possible shortcut
1793	 * of DAD.  we omitted DAD for this reason in the past.
1794	 * (3) a user can prevent autoconfiguration of global address
1795	 * by removing link-local address by hand (this is partly because we
1796	 * don't have other way to control the use of IPv6 on an interface.
1797	 * this has been our design choice - cf. NRL's "ifconfig auto").
1798	 * (4) it is easier to manage when an interface has addresses
1799	 * with the same interface identifier, than to have multiple addresses
1800	 * with different interface identifiers.
1801	 */
1802	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
1803	if (ifa)
1804		ib = (struct in6_ifaddr *)ifa;
1805	else
1806		return NULL;
1807
1808	/* prefixlen + ifidlen must be equal to 128 */
1809	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
1810	if (prefixlen != plen0) {
1811		ifa_free(ifa);
1812		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
1813		    "(prefix=%d ifid=%d)\n",
1814		    if_name(ifp), prefixlen, 128 - plen0));
1815		return NULL;
1816	}
1817
1818	/* make ifaddr */
1819
1820	bzero(&ifra, sizeof(ifra));
1821	/*
1822	 * in6_update_ifa() does not use ifra_name, but we accurately set it
1823	 * for safety.
1824	 */
1825	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1826	ifra.ifra_addr.sin6_family = AF_INET6;
1827	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
1828	/* prefix */
1829	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
1830	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1831	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1832	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1833	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1834
1835	/* interface ID */
1836	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
1837	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
1838	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
1839	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
1840	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1841	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
1842	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1843	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
1844	ifa_free(ifa);
1845
1846	/* new prefix mask. */
1847	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
1848	ifra.ifra_prefixmask.sin6_family = AF_INET6;
1849	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
1850	    sizeof(ifra.ifra_prefixmask.sin6_addr));
1851
1852	/* lifetimes. */
1853	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
1854	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
1855
1856	/* XXX: scope zone ID? */
1857
1858	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
1859
1860	/*
1861	 * Make sure that we do not have this address already.  This should
1862	 * usually not happen, but we can still see this case, e.g., if we
1863	 * have manually configured the exact address to be configured.
1864	 */
1865	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
1866	    &ifra.ifra_addr.sin6_addr);
1867	if (ifa != NULL) {
1868		ifa_free(ifa);
1869		/* this should be rare enough to make an explicit log */
1870		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
1871		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
1872		return (NULL);
1873	}
1874
1875	/*
1876	 * Allocate ifaddr structure, link into chain, etc.
1877	 * If we are going to create a new address upon receiving a multicasted
1878	 * RA, we need to impose a random delay before starting DAD.
1879	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
1880	 */
1881	updateflags = 0;
1882	if (mcast)
1883		updateflags |= IN6_IFAUPDATE_DADDELAY;
1884	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
1885		nd6log((LOG_ERR,
1886		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
1887		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
1888		    if_name(ifp), error));
1889		return (NULL);	/* ifaddr must not have been allocated. */
1890	}
1891
1892	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
1893	/*
1894	 * XXXRW: Assumption of non-NULLness here might not be true with
1895	 * fine-grained locking -- should we validate it?  Or just return
1896	 * earlier ifa rather than looking it up again?
1897	 */
1898	return (ia);		/* this is always non-NULL  and referenced. */
1899}
1900
1901/*
1902 * ia0 - corresponding public address
1903 */
1904int
1905in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
1906{
1907	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
1908	struct in6_ifaddr *newia, *ia;
1909	struct in6_aliasreq ifra;
1910	int i, error;
1911	int trylimit = 3;	/* XXX: adhoc value */
1912	int updateflags;
1913	u_int32_t randid[2];
1914	time_t vltime0, pltime0;
1915
1916	bzero(&ifra, sizeof(ifra));
1917	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1918	ifra.ifra_addr = ia0->ia_addr;
1919	/* copy prefix mask */
1920	ifra.ifra_prefixmask = ia0->ia_prefixmask;
1921	/* clear the old IFID */
1922	for (i = 0; i < 4; i++) {
1923		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
1924		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
1925	}
1926
1927  again:
1928	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
1929	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
1930		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
1931		    "random IFID\n"));
1932		return (EINVAL);
1933	}
1934	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1935	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
1936	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1937	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
1938
1939	/*
1940	 * in6_get_tmpifid() quite likely provided a unique interface ID.
1941	 * However, we may still have a chance to see collision, because
1942	 * there may be a time lag between generation of the ID and generation
1943	 * of the address.  So, we'll do one more sanity check.
1944	 */
1945	IN6_IFADDR_RLOCK();
1946	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
1947		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
1948		    &ifra.ifra_addr.sin6_addr)) {
1949			if (trylimit-- == 0) {
1950				IN6_IFADDR_RUNLOCK();
1951				/*
1952				 * Give up.  Something strange should have
1953				 * happened.
1954				 */
1955				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
1956				    "find a unique random IFID\n"));
1957				return (EEXIST);
1958			}
1959			IN6_IFADDR_RUNLOCK();
1960			forcegen = 1;
1961			goto again;
1962		}
1963	}
1964	IN6_IFADDR_RUNLOCK();
1965
1966	/*
1967	 * The Valid Lifetime is the lower of the Valid Lifetime of the
1968         * public address or TEMP_VALID_LIFETIME.
1969	 * The Preferred Lifetime is the lower of the Preferred Lifetime
1970         * of the public address or TEMP_PREFERRED_LIFETIME -
1971         * DESYNC_FACTOR.
1972	 */
1973	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
1974		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
1975		    (ia0->ia6_lifetime.ia6t_vltime -
1976		    (time_second - ia0->ia6_updatetime));
1977		if (vltime0 > V_ip6_temp_valid_lifetime)
1978			vltime0 = V_ip6_temp_valid_lifetime;
1979	} else
1980		vltime0 = V_ip6_temp_valid_lifetime;
1981	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
1982		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
1983		    (ia0->ia6_lifetime.ia6t_pltime -
1984		    (time_second - ia0->ia6_updatetime));
1985		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
1986			pltime0 = V_ip6_temp_preferred_lifetime -
1987			    V_ip6_desync_factor;
1988		}
1989	} else
1990		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
1991	ifra.ifra_lifetime.ia6t_vltime = vltime0;
1992	ifra.ifra_lifetime.ia6t_pltime = pltime0;
1993
1994	/*
1995	 * A temporary address is created only if this calculated Preferred
1996	 * Lifetime is greater than REGEN_ADVANCE time units.
1997	 */
1998	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
1999		return (0);
2000
2001	/* XXX: scope zone ID? */
2002
2003	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
2004
2005	/* allocate ifaddr structure, link into chain, etc. */
2006	updateflags = 0;
2007	if (delay)
2008		updateflags |= IN6_IFAUPDATE_DADDELAY;
2009	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
2010		return (error);
2011
2012	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2013	if (newia == NULL) {	/* XXX: can it happen? */
2014		nd6log((LOG_ERR,
2015		    "in6_tmpifadd: ifa update succeeded, but we got "
2016		    "no ifaddr\n"));
2017		return (EINVAL); /* XXX */
2018	}
2019	newia->ia6_ndpr = ia0->ia6_ndpr;
2020	newia->ia6_ndpr->ndpr_refcnt++;
2021	ifa_free(&newia->ia_ifa);
2022
2023	/*
2024	 * A newly added address might affect the status of other addresses.
2025	 * XXX: when the temporary address is generated with a new public
2026	 * address, the onlink check is redundant.  However, it would be safe
2027	 * to do the check explicitly everywhere a new address is generated,
2028	 * and, in fact, we surely need the check when we create a new
2029	 * temporary address due to deprecation of an old temporary address.
2030	 */
2031	pfxlist_onlink_check();
2032
2033	return (0);
2034}
2035
2036static int
2037in6_init_prefix_ltimes(struct nd_prefix *ndpr)
2038{
2039	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
2040		ndpr->ndpr_preferred = 0;
2041	else
2042		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
2043	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2044		ndpr->ndpr_expire = 0;
2045	else
2046		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
2047
2048	return 0;
2049}
2050
2051static void
2052in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
2053{
2054	/* init ia6t_expire */
2055	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
2056		lt6->ia6t_expire = 0;
2057	else {
2058		lt6->ia6t_expire = time_second;
2059		lt6->ia6t_expire += lt6->ia6t_vltime;
2060	}
2061
2062	/* init ia6t_preferred */
2063	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
2064		lt6->ia6t_preferred = 0;
2065	else {
2066		lt6->ia6t_preferred = time_second;
2067		lt6->ia6t_preferred += lt6->ia6t_pltime;
2068	}
2069}
2070
2071/*
2072 * Delete all the routing table entries that use the specified gateway.
2073 * XXX: this function causes search through all entries of routing table, so
2074 * it shouldn't be called when acting as a router.
2075 */
2076void
2077rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
2078{
2079	struct radix_node_head *rnh;
2080	int s = splnet();
2081
2082	/* We'll care only link-local addresses */
2083	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
2084		splx(s);
2085		return;
2086	}
2087
2088	rnh = rt_tables_get_rnh(0, AF_INET6);
2089	if (rnh == NULL)
2090		return;
2091
2092	RADIX_NODE_HEAD_LOCK(rnh);
2093	rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
2094	RADIX_NODE_HEAD_UNLOCK(rnh);
2095	splx(s);
2096}
2097
2098static int
2099rt6_deleteroute(struct radix_node *rn, void *arg)
2100{
2101#define SIN6(s)	((struct sockaddr_in6 *)s)
2102	struct rtentry *rt = (struct rtentry *)rn;
2103	struct in6_addr *gate = (struct in6_addr *)arg;
2104
2105	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
2106		return (0);
2107
2108	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
2109		return (0);
2110	}
2111
2112	/*
2113	 * Do not delete a static route.
2114	 * XXX: this seems to be a bit ad-hoc. Should we consider the
2115	 * 'cloned' bit instead?
2116	 */
2117	if ((rt->rt_flags & RTF_STATIC) != 0)
2118		return (0);
2119
2120	/*
2121	 * We delete only host route. This means, in particular, we don't
2122	 * delete default route.
2123	 */
2124	if ((rt->rt_flags & RTF_HOST) == 0)
2125		return (0);
2126
2127	return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
2128	    rt_mask(rt), rt->rt_flags, 0));
2129#undef SIN6
2130}
2131
2132int
2133nd6_setdefaultiface(int ifindex)
2134{
2135	int error = 0;
2136
2137	if (ifindex < 0 || V_if_index < ifindex)
2138		return (EINVAL);
2139	if (ifindex != 0 && !ifnet_byindex(ifindex))
2140		return (EINVAL);
2141
2142	if (V_nd6_defifindex != ifindex) {
2143		V_nd6_defifindex = ifindex;
2144		if (V_nd6_defifindex > 0)
2145			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
2146		else
2147			V_nd6_defifp = NULL;
2148
2149		/*
2150		 * Our current implementation assumes one-to-one maping between
2151		 * interfaces and links, so it would be natural to use the
2152		 * default interface as the default link.
2153		 */
2154		scope6_setdefault(V_nd6_defifp);
2155	}
2156
2157	return (error);
2158}
2159