1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/socket.h>
43#include <sys/sockio.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/errno.h>
48#include <sys/rwlock.h>
49#include <sys/syslog.h>
50#include <sys/queue.h>
51
52#include <net/if.h>
53#include <net/if_types.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56#include <net/radix.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <net/if_llatbl.h>
61#include <netinet6/in6_var.h>
62#include <netinet6/in6_ifattach.h>
63#include <netinet/ip6.h>
64#include <netinet6/ip6_var.h>
65#include <netinet6/nd6.h>
66#include <netinet/icmp6.h>
67#include <netinet6/scope6_var.h>
68
69static int rtpref(struct nd_defrouter *);
70static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
71static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
72    struct mbuf *, int);
73static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
74static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
75	struct nd_defrouter *);
76static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
77static void pfxrtr_del(struct nd_pfxrouter *);
78static struct nd_pfxrouter *find_pfxlist_reachable_router
79(struct nd_prefix *);
80static void defrouter_delreq(struct nd_defrouter *);
81static void nd6_rtmsg(int, struct rtentry *);
82
83static int in6_init_prefix_ltimes(struct nd_prefix *);
84static void in6_init_address_ltimes(struct nd_prefix *,
85	struct in6_addrlifetime *);
86
87static int nd6_prefix_onlink(struct nd_prefix *);
88static int nd6_prefix_offlink(struct nd_prefix *);
89
90static int rt6_deleteroute(struct radix_node *, void *);
91
92VNET_DECLARE(int, nd6_recalc_reachtm_interval);
93#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
94
95static VNET_DEFINE(struct ifnet *, nd6_defifp);
96VNET_DEFINE(int, nd6_defifindex);
97#define	V_nd6_defifp			VNET(nd6_defifp)
98
99VNET_DEFINE(int, ip6_use_tempaddr) = 0;
100
101VNET_DEFINE(int, ip6_desync_factor);
102VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
103VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
104
105VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
106
107/* RTPREF_MEDIUM has to be 0! */
108#define RTPREF_HIGH	1
109#define RTPREF_MEDIUM	0
110#define RTPREF_LOW	(-1)
111#define RTPREF_RESERVED	(-2)
112#define RTPREF_INVALID	(-3)	/* internal */
113
114/*
115 * Receive Router Solicitation Message - just for routers.
116 * Router solicitation/advertisement is mostly managed by userland program
117 * (rtadvd) so here we have no function like nd6_ra_output().
118 *
119 * Based on RFC 2461
120 */
121void
122nd6_rs_input(struct mbuf *m, int off, int icmp6len)
123{
124	struct ifnet *ifp = m->m_pkthdr.rcvif;
125	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
126	struct nd_router_solicit *nd_rs;
127	struct in6_addr saddr6 = ip6->ip6_src;
128	char *lladdr = NULL;
129	int lladdrlen = 0;
130	union nd_opts ndopts;
131	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
132
133	/*
134	 * Accept RS only when V_ip6_forwarding=1 and the interface has
135	 * no ND6_IFF_ACCEPT_RTADV.
136	 */
137	if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
138		goto freeit;
139
140	/* Sanity checks */
141	if (ip6->ip6_hlim != 255) {
142		nd6log((LOG_ERR,
143		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
144		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
145		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
146		goto bad;
147	}
148
149	/*
150	 * Don't update the neighbor cache, if src = ::.
151	 * This indicates that the src has no IP address assigned yet.
152	 */
153	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
154		goto freeit;
155
156#ifndef PULLDOWN_TEST
157	IP6_EXTHDR_CHECK(m, off, icmp6len,);
158	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
159#else
160	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
161	if (nd_rs == NULL) {
162		ICMP6STAT_INC(icp6s_tooshort);
163		return;
164	}
165#endif
166
167	icmp6len -= sizeof(*nd_rs);
168	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
169	if (nd6_options(&ndopts) < 0) {
170		nd6log((LOG_INFO,
171		    "nd6_rs_input: invalid ND option, ignored\n"));
172		/* nd6_options have incremented stats */
173		goto freeit;
174	}
175
176	if (ndopts.nd_opts_src_lladdr) {
177		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
178		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
179	}
180
181	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
182		nd6log((LOG_INFO,
183		    "nd6_rs_input: lladdrlen mismatch for %s "
184		    "(if %d, RS packet %d)\n",
185		    ip6_sprintf(ip6bufs, &saddr6),
186		    ifp->if_addrlen, lladdrlen - 2));
187		goto bad;
188	}
189
190	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
191
192 freeit:
193	m_freem(m);
194	return;
195
196 bad:
197	ICMP6STAT_INC(icp6s_badrs);
198	m_freem(m);
199}
200
201/*
202 * Receive Router Advertisement Message.
203 *
204 * Based on RFC 2461
205 * TODO: on-link bit on prefix information
206 * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
207 */
208void
209nd6_ra_input(struct mbuf *m, int off, int icmp6len)
210{
211	struct ifnet *ifp = m->m_pkthdr.rcvif;
212	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
213	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
214	struct nd_router_advert *nd_ra;
215	struct in6_addr saddr6 = ip6->ip6_src;
216	int mcast = 0;
217	union nd_opts ndopts;
218	struct nd_defrouter *dr;
219	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
220
221	/*
222	 * We only accept RAs only when the per-interface flag
223	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
224	 */
225	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
226		goto freeit;
227
228	if (ip6->ip6_hlim != 255) {
229		nd6log((LOG_ERR,
230		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
231		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
232		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
233		goto bad;
234	}
235
236	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
237		nd6log((LOG_ERR,
238		    "nd6_ra_input: src %s is not link-local\n",
239		    ip6_sprintf(ip6bufs, &saddr6)));
240		goto bad;
241	}
242
243#ifndef PULLDOWN_TEST
244	IP6_EXTHDR_CHECK(m, off, icmp6len,);
245	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
246#else
247	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
248	if (nd_ra == NULL) {
249		ICMP6STAT_INC(icp6s_tooshort);
250		return;
251	}
252#endif
253
254	icmp6len -= sizeof(*nd_ra);
255	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
256	if (nd6_options(&ndopts) < 0) {
257		nd6log((LOG_INFO,
258		    "nd6_ra_input: invalid ND option, ignored\n"));
259		/* nd6_options have incremented stats */
260		goto freeit;
261	}
262
263    {
264	struct nd_defrouter dr0;
265	u_int32_t advreachable = nd_ra->nd_ra_reachable;
266
267	/* remember if this is a multicasted advertisement */
268	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
269		mcast = 1;
270
271	bzero(&dr0, sizeof(dr0));
272	dr0.rtaddr = saddr6;
273	dr0.flags  = nd_ra->nd_ra_flags_reserved;
274	/*
275	 * Effectively-disable routes from RA messages when
276	 * ND6_IFF_NO_RADR enabled on the receiving interface or
277	 * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
278	 */
279	if (ndi->flags & ND6_IFF_NO_RADR)
280		dr0.rtlifetime = 0;
281	else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
282		dr0.rtlifetime = 0;
283	else
284		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
285	dr0.expire = time_second + dr0.rtlifetime;
286	dr0.ifp = ifp;
287	/* unspecified or not? (RFC 2461 6.3.4) */
288	if (advreachable) {
289		advreachable = ntohl(advreachable);
290		if (advreachable <= MAX_REACHABLE_TIME &&
291		    ndi->basereachable != advreachable) {
292			ndi->basereachable = advreachable;
293			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
294			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
295		}
296	}
297	if (nd_ra->nd_ra_retransmit)
298		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
299	if (nd_ra->nd_ra_curhoplimit) {
300		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
301			ndi->chlim = nd_ra->nd_ra_curhoplimit;
302		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
303			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
304			    "%s on %s (current = %d, received = %d). "
305			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
306			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
307		}
308	}
309	dr = defrtrlist_update(&dr0);
310    }
311
312	/*
313	 * prefix
314	 */
315	if (ndopts.nd_opts_pi) {
316		struct nd_opt_hdr *pt;
317		struct nd_opt_prefix_info *pi = NULL;
318		struct nd_prefixctl pr;
319
320		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
321		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
322		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
323						(pt->nd_opt_len << 3))) {
324			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
325				continue;
326			pi = (struct nd_opt_prefix_info *)pt;
327
328			if (pi->nd_opt_pi_len != 4) {
329				nd6log((LOG_INFO,
330				    "nd6_ra_input: invalid option "
331				    "len %d for prefix information option, "
332				    "ignored\n", pi->nd_opt_pi_len));
333				continue;
334			}
335
336			if (128 < pi->nd_opt_pi_prefix_len) {
337				nd6log((LOG_INFO,
338				    "nd6_ra_input: invalid prefix "
339				    "len %d for prefix information option, "
340				    "ignored\n", pi->nd_opt_pi_prefix_len));
341				continue;
342			}
343
344			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
345			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
346				nd6log((LOG_INFO,
347				    "nd6_ra_input: invalid prefix "
348				    "%s, ignored\n",
349				    ip6_sprintf(ip6bufs,
350					&pi->nd_opt_pi_prefix)));
351				continue;
352			}
353
354			bzero(&pr, sizeof(pr));
355			pr.ndpr_prefix.sin6_family = AF_INET6;
356			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
357			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
358			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
359
360			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
361			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
362			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
363			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
364			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
365			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
366			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
367			(void)prelist_update(&pr, dr, m, mcast);
368		}
369	}
370
371	/*
372	 * MTU
373	 */
374	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
375		u_long mtu;
376		u_long maxmtu;
377
378		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
379
380		/* lower bound */
381		if (mtu < IPV6_MMTU) {
382			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
383			    "mtu=%lu sent from %s, ignoring\n",
384			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
385			goto skip;
386		}
387
388		/* upper bound */
389		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
390		    ? ndi->maxmtu : ifp->if_mtu;
391		if (mtu <= maxmtu) {
392			int change = (ndi->linkmtu != mtu);
393
394			ndi->linkmtu = mtu;
395			if (change) /* in6_maxmtu may change */
396				in6_setmaxmtu();
397		} else {
398			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
399			    "mtu=%lu sent from %s; "
400			    "exceeds maxmtu %lu, ignoring\n",
401			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
402		}
403	}
404
405 skip:
406
407	/*
408	 * Source link layer address
409	 */
410    {
411	char *lladdr = NULL;
412	int lladdrlen = 0;
413
414	if (ndopts.nd_opts_src_lladdr) {
415		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
416		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
417	}
418
419	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
420		nd6log((LOG_INFO,
421		    "nd6_ra_input: lladdrlen mismatch for %s "
422		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
423		    ifp->if_addrlen, lladdrlen - 2));
424		goto bad;
425	}
426
427	nd6_cache_lladdr(ifp, &saddr6, lladdr,
428	    lladdrlen, ND_ROUTER_ADVERT, 0);
429
430	/*
431	 * Installing a link-layer address might change the state of the
432	 * router's neighbor cache, which might also affect our on-link
433	 * detection of adveritsed prefixes.
434	 */
435	pfxlist_onlink_check();
436    }
437
438 freeit:
439	m_freem(m);
440	return;
441
442 bad:
443	ICMP6STAT_INC(icp6s_badra);
444	m_freem(m);
445}
446
447/*
448 * default router list proccessing sub routines
449 */
450
451/* tell the change to user processes watching the routing socket. */
452static void
453nd6_rtmsg(int cmd, struct rtentry *rt)
454{
455	struct rt_addrinfo info;
456	struct ifnet *ifp;
457	struct ifaddr *ifa;
458
459	bzero((caddr_t)&info, sizeof(info));
460	info.rti_info[RTAX_DST] = rt_key(rt);
461	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
462	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
463	ifp = rt->rt_ifp;
464	if (ifp != NULL) {
465		IF_ADDR_RLOCK(ifp);
466		ifa = TAILQ_FIRST(&ifp->if_addrhead);
467		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
468		ifa_ref(ifa);
469		IF_ADDR_RUNLOCK(ifp);
470		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
471	} else
472		ifa = NULL;
473
474	rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum);
475	if (ifa != NULL)
476		ifa_free(ifa);
477}
478
479static void
480defrouter_addreq(struct nd_defrouter *new)
481{
482	struct sockaddr_in6 def, mask, gate;
483	struct rtentry *newrt = NULL;
484	int s;
485	int error;
486
487	bzero(&def, sizeof(def));
488	bzero(&mask, sizeof(mask));
489	bzero(&gate, sizeof(gate));
490
491	def.sin6_len = mask.sin6_len = gate.sin6_len =
492	    sizeof(struct sockaddr_in6);
493	def.sin6_family = gate.sin6_family = AF_INET6;
494	gate.sin6_addr = new->rtaddr;
495
496	s = splnet();
497	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
498	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
499	    RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
500	if (newrt) {
501		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
502		RTFREE(newrt);
503	}
504	if (error == 0)
505		new->installed = 1;
506	splx(s);
507	return;
508}
509
510struct nd_defrouter *
511defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
512{
513	struct nd_defrouter *dr;
514
515	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
516		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
517			return (dr);
518	}
519
520	return (NULL);		/* search failed */
521}
522
523/*
524 * Remove the default route for a given router.
525 * This is just a subroutine function for defrouter_select(), and should
526 * not be called from anywhere else.
527 */
528static void
529defrouter_delreq(struct nd_defrouter *dr)
530{
531	struct sockaddr_in6 def, mask, gate;
532	struct rtentry *oldrt = NULL;
533
534	bzero(&def, sizeof(def));
535	bzero(&mask, sizeof(mask));
536	bzero(&gate, sizeof(gate));
537
538	def.sin6_len = mask.sin6_len = gate.sin6_len =
539	    sizeof(struct sockaddr_in6);
540	def.sin6_family = gate.sin6_family = AF_INET6;
541	gate.sin6_addr = dr->rtaddr;
542
543	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
544	    (struct sockaddr *)&gate,
545	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB);
546	if (oldrt) {
547		nd6_rtmsg(RTM_DELETE, oldrt);
548		RTFREE(oldrt);
549	}
550
551	dr->installed = 0;
552}
553
554/*
555 * remove all default routes from default router list
556 */
557void
558defrouter_reset(void)
559{
560	struct nd_defrouter *dr;
561
562	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
563		defrouter_delreq(dr);
564
565	/*
566	 * XXX should we also nuke any default routers in the kernel, by
567	 * going through them by rtalloc1()?
568	 */
569}
570
571void
572defrtrlist_del(struct nd_defrouter *dr)
573{
574	struct nd_defrouter *deldr = NULL;
575	struct nd_prefix *pr;
576
577	/*
578	 * Flush all the routing table entries that use the router
579	 * as a next hop.
580	 */
581	if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
582		rt6_flush(&dr->rtaddr, dr->ifp);
583
584	if (dr->installed) {
585		deldr = dr;
586		defrouter_delreq(dr);
587	}
588	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
589
590	/*
591	 * Also delete all the pointers to the router in each prefix lists.
592	 */
593	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
594		struct nd_pfxrouter *pfxrtr;
595		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
596			pfxrtr_del(pfxrtr);
597	}
598	pfxlist_onlink_check();
599
600	/*
601	 * If the router is the primary one, choose a new one.
602	 * Note that defrouter_select() will remove the current gateway
603	 * from the routing table.
604	 */
605	if (deldr)
606		defrouter_select();
607
608	free(dr, M_IP6NDP);
609}
610
611/*
612 * Default Router Selection according to Section 6.3.6 of RFC 2461 and
613 * draft-ietf-ipngwg-router-selection:
614 * 1) Routers that are reachable or probably reachable should be preferred.
615 *    If we have more than one (probably) reachable router, prefer ones
616 *    with the highest router preference.
617 * 2) When no routers on the list are known to be reachable or
618 *    probably reachable, routers SHOULD be selected in a round-robin
619 *    fashion, regardless of router preference values.
620 * 3) If the Default Router List is empty, assume that all
621 *    destinations are on-link.
622 *
623 * We assume nd_defrouter is sorted by router preference value.
624 * Since the code below covers both with and without router preference cases,
625 * we do not need to classify the cases by ifdef.
626 *
627 * At this moment, we do not try to install more than one default router,
628 * even when the multipath routing is available, because we're not sure about
629 * the benefits for stub hosts comparing to the risk of making the code
630 * complicated and the possibility of introducing bugs.
631 */
632void
633defrouter_select(void)
634{
635	int s = splnet();
636	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
637	struct llentry *ln = NULL;
638
639	/*
640	 * Let's handle easy case (3) first:
641	 * If default router list is empty, there's nothing to be done.
642	 */
643	if (TAILQ_EMPTY(&V_nd_defrouter)) {
644		splx(s);
645		return;
646	}
647
648	/*
649	 * Search for a (probably) reachable router from the list.
650	 * We just pick up the first reachable one (if any), assuming that
651	 * the ordering rule of the list described in defrtrlist_update().
652	 */
653	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
654		IF_AFDATA_RLOCK(dr->ifp);
655		if (selected_dr == NULL &&
656		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
657		    ND6_IS_LLINFO_PROBREACH(ln)) {
658			selected_dr = dr;
659		}
660		IF_AFDATA_RUNLOCK(dr->ifp);
661		if (ln != NULL) {
662			LLE_RUNLOCK(ln);
663			ln = NULL;
664		}
665
666		if (dr->installed && installed_dr == NULL)
667			installed_dr = dr;
668		else if (dr->installed && installed_dr) {
669			/* this should not happen.  warn for diagnosis. */
670			log(LOG_ERR, "defrouter_select: more than one router"
671			    " is installed\n");
672		}
673	}
674	/*
675	 * If none of the default routers was found to be reachable,
676	 * round-robin the list regardless of preference.
677	 * Otherwise, if we have an installed router, check if the selected
678	 * (reachable) router should really be preferred to the installed one.
679	 * We only prefer the new router when the old one is not reachable
680	 * or when the new one has a really higher preference value.
681	 */
682	if (selected_dr == NULL) {
683		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
684			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
685		else
686			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
687	} else if (installed_dr) {
688		IF_AFDATA_RLOCK(installed_dr->ifp);
689		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
690		    ND6_IS_LLINFO_PROBREACH(ln) &&
691		    rtpref(selected_dr) <= rtpref(installed_dr)) {
692			selected_dr = installed_dr;
693		}
694		IF_AFDATA_RUNLOCK(installed_dr->ifp);
695		if (ln != NULL)
696			LLE_RUNLOCK(ln);
697	}
698
699	/*
700	 * If the selected router is different than the installed one,
701	 * remove the installed router and install the selected one.
702	 * Note that the selected router is never NULL here.
703	 */
704	if (installed_dr != selected_dr) {
705		if (installed_dr)
706			defrouter_delreq(installed_dr);
707		defrouter_addreq(selected_dr);
708	}
709
710	splx(s);
711	return;
712}
713
714/*
715 * for default router selection
716 * regards router-preference field as a 2-bit signed integer
717 */
718static int
719rtpref(struct nd_defrouter *dr)
720{
721	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
722	case ND_RA_FLAG_RTPREF_HIGH:
723		return (RTPREF_HIGH);
724	case ND_RA_FLAG_RTPREF_MEDIUM:
725	case ND_RA_FLAG_RTPREF_RSV:
726		return (RTPREF_MEDIUM);
727	case ND_RA_FLAG_RTPREF_LOW:
728		return (RTPREF_LOW);
729	default:
730		/*
731		 * This case should never happen.  If it did, it would mean a
732		 * serious bug of kernel internal.  We thus always bark here.
733		 * Or, can we even panic?
734		 */
735		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
736		return (RTPREF_INVALID);
737	}
738	/* NOTREACHED */
739}
740
741static struct nd_defrouter *
742defrtrlist_update(struct nd_defrouter *new)
743{
744	struct nd_defrouter *dr, *n;
745	int s = splnet();
746
747	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
748		/* entry exists */
749		if (new->rtlifetime == 0) {
750			defrtrlist_del(dr);
751			dr = NULL;
752		} else {
753			int oldpref = rtpref(dr);
754
755			/* override */
756			dr->flags = new->flags; /* xxx flag check */
757			dr->rtlifetime = new->rtlifetime;
758			dr->expire = new->expire;
759
760			/*
761			 * If the preference does not change, there's no need
762			 * to sort the entries. Also make sure the selected
763			 * router is still installed in the kernel.
764			 */
765			if (dr->installed && rtpref(new) == oldpref) {
766				splx(s);
767				return (dr);
768			}
769
770			/*
771			 * preferred router may be changed, so relocate
772			 * this router.
773			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
774			 * However, since defrtrlist_del() has many side
775			 * effects, we intentionally do so here.
776			 * defrouter_select() below will handle routing
777			 * changes later.
778			 */
779			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
780			n = dr;
781			goto insert;
782		}
783		splx(s);
784		return (dr);
785	}
786
787	/* entry does not exist */
788	if (new->rtlifetime == 0) {
789		splx(s);
790		return (NULL);
791	}
792
793	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
794	if (n == NULL) {
795		splx(s);
796		return (NULL);
797	}
798	bzero(n, sizeof(*n));
799	*n = *new;
800
801insert:
802	/*
803	 * Insert the new router in the Default Router List;
804	 * The Default Router List should be in the descending order
805	 * of router-preferece.  Routers with the same preference are
806	 * sorted in the arriving time order.
807	 */
808
809	/* insert at the end of the group */
810	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
811		if (rtpref(n) > rtpref(dr))
812			break;
813	}
814	if (dr)
815		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
816	else
817		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
818
819	defrouter_select();
820
821	splx(s);
822
823	return (n);
824}
825
826static struct nd_pfxrouter *
827pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
828{
829	struct nd_pfxrouter *search;
830
831	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
832		if (search->router == dr)
833			break;
834	}
835
836	return (search);
837}
838
839static void
840pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
841{
842	struct nd_pfxrouter *new;
843
844	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
845	if (new == NULL)
846		return;
847	bzero(new, sizeof(*new));
848	new->router = dr;
849
850	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
851
852	pfxlist_onlink_check();
853}
854
855static void
856pfxrtr_del(struct nd_pfxrouter *pfr)
857{
858	LIST_REMOVE(pfr, pfr_entry);
859	free(pfr, M_IP6NDP);
860}
861
862struct nd_prefix *
863nd6_prefix_lookup(struct nd_prefixctl *key)
864{
865	struct nd_prefix *search;
866
867	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
868		if (key->ndpr_ifp == search->ndpr_ifp &&
869		    key->ndpr_plen == search->ndpr_plen &&
870		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
871		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
872			break;
873		}
874	}
875
876	return (search);
877}
878
879int
880nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
881    struct nd_prefix **newp)
882{
883	struct nd_prefix *new = NULL;
884	int error = 0;
885	int i, s;
886	char ip6buf[INET6_ADDRSTRLEN];
887
888	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
889	if (new == NULL)
890		return(ENOMEM);
891	bzero(new, sizeof(*new));
892	new->ndpr_ifp = pr->ndpr_ifp;
893	new->ndpr_prefix = pr->ndpr_prefix;
894	new->ndpr_plen = pr->ndpr_plen;
895	new->ndpr_vltime = pr->ndpr_vltime;
896	new->ndpr_pltime = pr->ndpr_pltime;
897	new->ndpr_flags = pr->ndpr_flags;
898	if ((error = in6_init_prefix_ltimes(new)) != 0) {
899		free(new, M_IP6NDP);
900		return(error);
901	}
902	new->ndpr_lastupdate = time_second;
903	if (newp != NULL)
904		*newp = new;
905
906	/* initialization */
907	LIST_INIT(&new->ndpr_advrtrs);
908	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
909	/* make prefix in the canonical form */
910	for (i = 0; i < 4; i++)
911		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
912		    new->ndpr_mask.s6_addr32[i];
913
914	s = splnet();
915	/* link ndpr_entry to nd_prefix list */
916	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
917	splx(s);
918
919	/* ND_OPT_PI_FLAG_ONLINK processing */
920	if (new->ndpr_raf_onlink) {
921		int e;
922
923		if ((e = nd6_prefix_onlink(new)) != 0) {
924			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
925			    "the prefix %s/%d on-link on %s (errno=%d)\n",
926			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
927			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
928			/* proceed anyway. XXX: is it correct? */
929		}
930	}
931
932	if (dr)
933		pfxrtr_add(new, dr);
934
935	return 0;
936}
937
938void
939prelist_remove(struct nd_prefix *pr)
940{
941	struct nd_pfxrouter *pfr, *next;
942	int e, s;
943	char ip6buf[INET6_ADDRSTRLEN];
944
945	/* make sure to invalidate the prefix until it is really freed. */
946	pr->ndpr_vltime = 0;
947	pr->ndpr_pltime = 0;
948
949	/*
950	 * Though these flags are now meaningless, we'd rather keep the value
951	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
952	 * when executing "ndp -p".
953	 */
954
955	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
956	    (e = nd6_prefix_offlink(pr)) != 0) {
957		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
958		    "on %s, errno=%d\n",
959		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
960		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
961		/* what should we do? */
962	}
963
964	if (pr->ndpr_refcnt > 0)
965		return;		/* notice here? */
966
967	s = splnet();
968
969	/* unlink ndpr_entry from nd_prefix list */
970	LIST_REMOVE(pr, ndpr_entry);
971
972	/* free list of routers that adversed the prefix */
973	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
974		free(pfr, M_IP6NDP);
975	}
976	splx(s);
977
978	free(pr, M_IP6NDP);
979
980	pfxlist_onlink_check();
981}
982
983/*
984 * dr - may be NULL
985 */
986
987static int
988prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
989    struct mbuf *m, int mcast)
990{
991	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
992	struct ifaddr *ifa;
993	struct ifnet *ifp = new->ndpr_ifp;
994	struct nd_prefix *pr;
995	int s = splnet();
996	int error = 0;
997	int newprefix = 0;
998	int auth;
999	struct in6_addrlifetime lt6_tmp;
1000	char ip6buf[INET6_ADDRSTRLEN];
1001
1002	auth = 0;
1003	if (m) {
1004		/*
1005		 * Authenticity for NA consists authentication for
1006		 * both IP header and IP datagrams, doesn't it ?
1007		 */
1008#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
1009		auth = ((m->m_flags & M_AUTHIPHDR) &&
1010		    (m->m_flags & M_AUTHIPDGM));
1011#endif
1012	}
1013
1014	if ((pr = nd6_prefix_lookup(new)) != NULL) {
1015		/*
1016		 * nd6_prefix_lookup() ensures that pr and new have the same
1017		 * prefix on a same interface.
1018		 */
1019
1020		/*
1021		 * Update prefix information.  Note that the on-link (L) bit
1022		 * and the autonomous (A) bit should NOT be changed from 1
1023		 * to 0.
1024		 */
1025		if (new->ndpr_raf_onlink == 1)
1026			pr->ndpr_raf_onlink = 1;
1027		if (new->ndpr_raf_auto == 1)
1028			pr->ndpr_raf_auto = 1;
1029		if (new->ndpr_raf_onlink) {
1030			pr->ndpr_vltime = new->ndpr_vltime;
1031			pr->ndpr_pltime = new->ndpr_pltime;
1032			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
1033			pr->ndpr_lastupdate = time_second;
1034		}
1035
1036		if (new->ndpr_raf_onlink &&
1037		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1038			int e;
1039
1040			if ((e = nd6_prefix_onlink(pr)) != 0) {
1041				nd6log((LOG_ERR,
1042				    "prelist_update: failed to make "
1043				    "the prefix %s/%d on-link on %s "
1044				    "(errno=%d)\n",
1045				    ip6_sprintf(ip6buf,
1046					    &pr->ndpr_prefix.sin6_addr),
1047				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
1048				/* proceed anyway. XXX: is it correct? */
1049			}
1050		}
1051
1052		if (dr && pfxrtr_lookup(pr, dr) == NULL)
1053			pfxrtr_add(pr, dr);
1054	} else {
1055		struct nd_prefix *newpr = NULL;
1056
1057		newprefix = 1;
1058
1059		if (new->ndpr_vltime == 0)
1060			goto end;
1061		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
1062			goto end;
1063
1064		error = nd6_prelist_add(new, dr, &newpr);
1065		if (error != 0 || newpr == NULL) {
1066			nd6log((LOG_NOTICE, "prelist_update: "
1067			    "nd6_prelist_add failed for %s/%d on %s "
1068			    "errno=%d, returnpr=%p\n",
1069			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
1070			    new->ndpr_plen, if_name(new->ndpr_ifp),
1071			    error, newpr));
1072			goto end; /* we should just give up in this case. */
1073		}
1074
1075		/*
1076		 * XXX: from the ND point of view, we can ignore a prefix
1077		 * with the on-link bit being zero.  However, we need a
1078		 * prefix structure for references from autoconfigured
1079		 * addresses.  Thus, we explicitly make sure that the prefix
1080		 * itself expires now.
1081		 */
1082		if (newpr->ndpr_raf_onlink == 0) {
1083			newpr->ndpr_vltime = 0;
1084			newpr->ndpr_pltime = 0;
1085			in6_init_prefix_ltimes(newpr);
1086		}
1087
1088		pr = newpr;
1089	}
1090
1091	/*
1092	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
1093	 * Note that pr must be non NULL at this point.
1094	 */
1095
1096	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
1097	if (!new->ndpr_raf_auto)
1098		goto end;
1099
1100	/*
1101	 * 5.5.3 (b). the link-local prefix should have been ignored in
1102	 * nd6_ra_input.
1103	 */
1104
1105	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
1106	if (new->ndpr_pltime > new->ndpr_vltime) {
1107		error = EINVAL;	/* XXX: won't be used */
1108		goto end;
1109	}
1110
1111	/*
1112	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
1113	 * an address configured by stateless autoconfiguration already in the
1114	 * list of addresses associated with the interface, and the Valid
1115	 * Lifetime is not 0, form an address.  We first check if we have
1116	 * a matching prefix.
1117	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
1118	 * consider autoconfigured addresses while RFC2462 simply said
1119	 * "address".
1120	 */
1121	IF_ADDR_RLOCK(ifp);
1122	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1123		struct in6_ifaddr *ifa6;
1124		u_int32_t remaininglifetime;
1125
1126		if (ifa->ifa_addr->sa_family != AF_INET6)
1127			continue;
1128
1129		ifa6 = (struct in6_ifaddr *)ifa;
1130
1131		/*
1132		 * We only consider autoconfigured addresses as per rfc2462bis.
1133		 */
1134		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
1135			continue;
1136
1137		/*
1138		 * Spec is not clear here, but I believe we should concentrate
1139		 * on unicast (i.e. not anycast) addresses.
1140		 * XXX: other ia6_flags? detached or duplicated?
1141		 */
1142		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
1143			continue;
1144
1145		/*
1146		 * Ignore the address if it is not associated with a prefix
1147		 * or is associated with a prefix that is different from this
1148		 * one.  (pr is never NULL here)
1149		 */
1150		if (ifa6->ia6_ndpr != pr)
1151			continue;
1152
1153		if (ia6_match == NULL) /* remember the first one */
1154			ia6_match = ifa6;
1155
1156		/*
1157		 * An already autoconfigured address matched.  Now that we
1158		 * are sure there is at least one matched address, we can
1159		 * proceed to 5.5.3. (e): update the lifetimes according to the
1160		 * "two hours" rule and the privacy extension.
1161		 * We apply some clarifications in rfc2462bis:
1162		 * - use remaininglifetime instead of storedlifetime as a
1163		 *   variable name
1164		 * - remove the dead code in the "two-hour" rule
1165		 */
1166#define TWOHOUR		(120*60)
1167		lt6_tmp = ifa6->ia6_lifetime;
1168
1169		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
1170			remaininglifetime = ND6_INFINITE_LIFETIME;
1171		else if (time_second - ifa6->ia6_updatetime >
1172			 lt6_tmp.ia6t_vltime) {
1173			/*
1174			 * The case of "invalid" address.  We should usually
1175			 * not see this case.
1176			 */
1177			remaininglifetime = 0;
1178		} else
1179			remaininglifetime = lt6_tmp.ia6t_vltime -
1180			    (time_second - ifa6->ia6_updatetime);
1181
1182		/* when not updating, keep the current stored lifetime. */
1183		lt6_tmp.ia6t_vltime = remaininglifetime;
1184
1185		if (TWOHOUR < new->ndpr_vltime ||
1186		    remaininglifetime < new->ndpr_vltime) {
1187			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1188		} else if (remaininglifetime <= TWOHOUR) {
1189			if (auth) {
1190				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1191			}
1192		} else {
1193			/*
1194			 * new->ndpr_vltime <= TWOHOUR &&
1195			 * TWOHOUR < remaininglifetime
1196			 */
1197			lt6_tmp.ia6t_vltime = TWOHOUR;
1198		}
1199
1200		/* The 2 hour rule is not imposed for preferred lifetime. */
1201		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
1202
1203		in6_init_address_ltimes(pr, &lt6_tmp);
1204
1205		/*
1206		 * We need to treat lifetimes for temporary addresses
1207		 * differently, according to
1208		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
1209		 * we only update the lifetimes when they are in the maximum
1210		 * intervals.
1211		 */
1212		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1213			u_int32_t maxvltime, maxpltime;
1214
1215			if (V_ip6_temp_valid_lifetime >
1216			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
1217			    V_ip6_desync_factor)) {
1218				maxvltime = V_ip6_temp_valid_lifetime -
1219				    (time_second - ifa6->ia6_createtime) -
1220				    V_ip6_desync_factor;
1221			} else
1222				maxvltime = 0;
1223			if (V_ip6_temp_preferred_lifetime >
1224			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
1225			    V_ip6_desync_factor)) {
1226				maxpltime = V_ip6_temp_preferred_lifetime -
1227				    (time_second - ifa6->ia6_createtime) -
1228				    V_ip6_desync_factor;
1229			} else
1230				maxpltime = 0;
1231
1232			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
1233			    lt6_tmp.ia6t_vltime > maxvltime) {
1234				lt6_tmp.ia6t_vltime = maxvltime;
1235			}
1236			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
1237			    lt6_tmp.ia6t_pltime > maxpltime) {
1238				lt6_tmp.ia6t_pltime = maxpltime;
1239			}
1240		}
1241		ifa6->ia6_lifetime = lt6_tmp;
1242		ifa6->ia6_updatetime = time_second;
1243	}
1244	IF_ADDR_RUNLOCK(ifp);
1245	if (ia6_match == NULL && new->ndpr_vltime) {
1246		int ifidlen;
1247
1248		/*
1249		 * 5.5.3 (d) (continued)
1250		 * No address matched and the valid lifetime is non-zero.
1251		 * Create a new address.
1252		 */
1253
1254		/*
1255		 * Prefix Length check:
1256		 * If the sum of the prefix length and interface identifier
1257		 * length does not equal 128 bits, the Prefix Information
1258		 * option MUST be ignored.  The length of the interface
1259		 * identifier is defined in a separate link-type specific
1260		 * document.
1261		 */
1262		ifidlen = in6_if2idlen(ifp);
1263		if (ifidlen < 0) {
1264			/* this should not happen, so we always log it. */
1265			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
1266			    if_name(ifp));
1267			goto end;
1268		}
1269		if (ifidlen + pr->ndpr_plen != 128) {
1270			nd6log((LOG_INFO,
1271			    "prelist_update: invalid prefixlen "
1272			    "%d for %s, ignored\n",
1273			    pr->ndpr_plen, if_name(ifp)));
1274			goto end;
1275		}
1276
1277		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
1278			/*
1279			 * note that we should use pr (not new) for reference.
1280			 */
1281			pr->ndpr_refcnt++;
1282			ia6->ia6_ndpr = pr;
1283
1284			/*
1285			 * RFC 3041 3.3 (2).
1286			 * When a new public address is created as described
1287			 * in RFC2462, also create a new temporary address.
1288			 *
1289			 * RFC 3041 3.5.
1290			 * When an interface connects to a new link, a new
1291			 * randomized interface identifier should be generated
1292			 * immediately together with a new set of temporary
1293			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
1294			 * in6_tmpifadd().
1295			 */
1296			if (V_ip6_use_tempaddr) {
1297				int e;
1298				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
1299					nd6log((LOG_NOTICE, "prelist_update: "
1300					    "failed to create a temporary "
1301					    "address, errno=%d\n",
1302					    e));
1303				}
1304			}
1305			ifa_free(&ia6->ia_ifa);
1306
1307			/*
1308			 * A newly added address might affect the status
1309			 * of other addresses, so we check and update it.
1310			 * XXX: what if address duplication happens?
1311			 */
1312			pfxlist_onlink_check();
1313		} else {
1314			/* just set an error. do not bark here. */
1315			error = EADDRNOTAVAIL; /* XXX: might be unused. */
1316		}
1317	}
1318
1319 end:
1320	splx(s);
1321	return error;
1322}
1323
1324/*
1325 * A supplement function used in the on-link detection below;
1326 * detect if a given prefix has a (probably) reachable advertising router.
1327 * XXX: lengthy function name...
1328 */
1329static struct nd_pfxrouter *
1330find_pfxlist_reachable_router(struct nd_prefix *pr)
1331{
1332	struct nd_pfxrouter *pfxrtr;
1333	struct llentry *ln;
1334	int canreach;
1335
1336	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
1337		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
1338		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
1339		IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
1340		if (ln == NULL)
1341			continue;
1342		canreach = ND6_IS_LLINFO_PROBREACH(ln);
1343		LLE_RUNLOCK(ln);
1344		if (canreach)
1345			break;
1346	}
1347	return (pfxrtr);
1348}
1349
1350/*
1351 * Check if each prefix in the prefix list has at least one available router
1352 * that advertised the prefix (a router is "available" if its neighbor cache
1353 * entry is reachable or probably reachable).
1354 * If the check fails, the prefix may be off-link, because, for example,
1355 * we have moved from the network but the lifetime of the prefix has not
1356 * expired yet.  So we should not use the prefix if there is another prefix
1357 * that has an available router.
1358 * But, if there is no prefix that has an available router, we still regards
1359 * all the prefixes as on-link.  This is because we can't tell if all the
1360 * routers are simply dead or if we really moved from the network and there
1361 * is no router around us.
1362 */
1363void
1364pfxlist_onlink_check()
1365{
1366	struct nd_prefix *pr;
1367	struct in6_ifaddr *ifa;
1368	struct nd_defrouter *dr;
1369	struct nd_pfxrouter *pfxrtr = NULL;
1370
1371	/*
1372	 * Check if there is a prefix that has a reachable advertising
1373	 * router.
1374	 */
1375	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1376		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
1377			break;
1378	}
1379
1380	/*
1381	 * If we have no such prefix, check whether we still have a router
1382	 * that does not advertise any prefixes.
1383	 */
1384	if (pr == NULL) {
1385		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
1386			struct nd_prefix *pr0;
1387
1388			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
1389				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
1390					break;
1391			}
1392			if (pfxrtr != NULL)
1393				break;
1394		}
1395	}
1396	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
1397		/*
1398		 * There is at least one prefix that has a reachable router,
1399		 * or at least a router which probably does not advertise
1400		 * any prefixes.  The latter would be the case when we move
1401		 * to a new link where we have a router that does not provide
1402		 * prefixes and we configure an address by hand.
1403		 * Detach prefixes which have no reachable advertising
1404		 * router, and attach other prefixes.
1405		 */
1406		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1407			/* XXX: a link-local prefix should never be detached */
1408			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1409				continue;
1410
1411			/*
1412			 * we aren't interested in prefixes without the L bit
1413			 * set.
1414			 */
1415			if (pr->ndpr_raf_onlink == 0)
1416				continue;
1417
1418			if (pr->ndpr_raf_auto == 0)
1419				continue;
1420
1421			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1422			    find_pfxlist_reachable_router(pr) == NULL)
1423				pr->ndpr_stateflags |= NDPRF_DETACHED;
1424			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1425			    find_pfxlist_reachable_router(pr) != 0)
1426				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1427		}
1428	} else {
1429		/* there is no prefix that has a reachable router */
1430		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1431			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1432				continue;
1433
1434			if (pr->ndpr_raf_onlink == 0)
1435				continue;
1436
1437			if (pr->ndpr_raf_auto == 0)
1438				continue;
1439
1440			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1441				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1442		}
1443	}
1444
1445	/*
1446	 * Remove each interface route associated with a (just) detached
1447	 * prefix, and reinstall the interface route for a (just) attached
1448	 * prefix.  Note that all attempt of reinstallation does not
1449	 * necessarily success, when a same prefix is shared among multiple
1450	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
1451	 * so we don't have to care about them.
1452	 */
1453	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1454		int e;
1455		char ip6buf[INET6_ADDRSTRLEN];
1456
1457		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1458			continue;
1459
1460		if (pr->ndpr_raf_onlink == 0)
1461			continue;
1462
1463		if (pr->ndpr_raf_auto == 0)
1464			continue;
1465
1466		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1467		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1468			if ((e = nd6_prefix_offlink(pr)) != 0) {
1469				nd6log((LOG_ERR,
1470				    "pfxlist_onlink_check: failed to "
1471				    "make %s/%d offlink, errno=%d\n",
1472				    ip6_sprintf(ip6buf,
1473					    &pr->ndpr_prefix.sin6_addr),
1474					    pr->ndpr_plen, e));
1475			}
1476		}
1477		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1478		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
1479		    pr->ndpr_raf_onlink) {
1480			if ((e = nd6_prefix_onlink(pr)) != 0) {
1481				nd6log((LOG_ERR,
1482				    "pfxlist_onlink_check: failed to "
1483				    "make %s/%d onlink, errno=%d\n",
1484				    ip6_sprintf(ip6buf,
1485					    &pr->ndpr_prefix.sin6_addr),
1486					    pr->ndpr_plen, e));
1487			}
1488		}
1489	}
1490
1491	/*
1492	 * Changes on the prefix status might affect address status as well.
1493	 * Make sure that all addresses derived from an attached prefix are
1494	 * attached, and that all addresses derived from a detached prefix are
1495	 * detached.  Note, however, that a manually configured address should
1496	 * always be attached.
1497	 * The precise detection logic is same as the one for prefixes.
1498	 *
1499	 * XXXRW: in6_ifaddrhead locking.
1500	 */
1501	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1502		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
1503			continue;
1504
1505		if (ifa->ia6_ndpr == NULL) {
1506			/*
1507			 * This can happen when we first configure the address
1508			 * (i.e. the address exists, but the prefix does not).
1509			 * XXX: complicated relationships...
1510			 */
1511			continue;
1512		}
1513
1514		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
1515			break;
1516	}
1517	if (ifa) {
1518		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1519			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1520				continue;
1521
1522			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
1523				continue;
1524
1525			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
1526				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1527					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1528					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1529					nd6_dad_start((struct ifaddr *)ifa, 0);
1530				}
1531			} else {
1532				ifa->ia6_flags |= IN6_IFF_DETACHED;
1533			}
1534		}
1535	}
1536	else {
1537		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1538			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1539				continue;
1540
1541			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1542				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1543				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1544				/* Do we need a delay in this case? */
1545				nd6_dad_start((struct ifaddr *)ifa, 0);
1546			}
1547		}
1548	}
1549}
1550
1551static int
1552nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
1553{
1554	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1555	struct radix_node_head *rnh;
1556	struct rtentry *rt;
1557	struct sockaddr_in6 mask6;
1558	u_long rtflags;
1559	int error, a_failure, fibnum;
1560
1561	/*
1562	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
1563	 * ifa->ifa_rtrequest = nd6_rtrequest;
1564	 */
1565	bzero(&mask6, sizeof(mask6));
1566	mask6.sin6_len = sizeof(mask6);
1567	mask6.sin6_addr = pr->ndpr_mask;
1568	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
1569
1570	a_failure = 0;
1571	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1572
1573		rt = NULL;
1574		error = in6_rtrequest(RTM_ADD,
1575		    (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
1576		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
1577		if (error == 0) {
1578			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
1579			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
1580			    error, pr, ifa));
1581
1582			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
1583			/* XXX what if rhn == NULL? */
1584			RADIX_NODE_HEAD_LOCK(rnh);
1585			RT_LOCK(rt);
1586			if (rt_setgate(rt, rt_key(rt),
1587			    (struct sockaddr *)&null_sdl) == 0) {
1588				struct sockaddr_dl *dl;
1589
1590				dl = (struct sockaddr_dl *)rt->rt_gateway;
1591				dl->sdl_type = rt->rt_ifp->if_type;
1592				dl->sdl_index = rt->rt_ifp->if_index;
1593			}
1594			RADIX_NODE_HEAD_UNLOCK(rnh);
1595			nd6_rtmsg(RTM_ADD, rt);
1596			RT_UNLOCK(rt);
1597			pr->ndpr_stateflags |= NDPRF_ONLINK;
1598		} else {
1599			char ip6buf[INET6_ADDRSTRLEN];
1600			char ip6bufg[INET6_ADDRSTRLEN];
1601			char ip6bufm[INET6_ADDRSTRLEN];
1602			struct sockaddr_in6 *sin6;
1603
1604			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
1605			nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
1606			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
1607			    "flags=%lx errno = %d\n",
1608			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1609			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
1610			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
1611			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
1612			    rtflags, error));
1613
1614			/* Save last error to return, see rtinit(). */
1615			a_failure = error;
1616		}
1617
1618		if (rt != NULL) {
1619			RT_LOCK(rt);
1620			RT_REMREF(rt);
1621			RT_UNLOCK(rt);
1622		}
1623	}
1624
1625	/* Return the last error we got. */
1626	return (a_failure);
1627}
1628
1629static int
1630nd6_prefix_onlink(struct nd_prefix *pr)
1631{
1632	struct ifaddr *ifa;
1633	struct ifnet *ifp = pr->ndpr_ifp;
1634	struct nd_prefix *opr;
1635	int error = 0;
1636	char ip6buf[INET6_ADDRSTRLEN];
1637
1638	/* sanity check */
1639	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1640		nd6log((LOG_ERR,
1641		    "nd6_prefix_onlink: %s/%d is already on-link\n",
1642		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1643		    pr->ndpr_plen));
1644		return (EEXIST);
1645	}
1646
1647	/*
1648	 * Add the interface route associated with the prefix.  Before
1649	 * installing the route, check if there's the same prefix on another
1650	 * interface, and the prefix has already installed the interface route.
1651	 * Although such a configuration is expected to be rare, we explicitly
1652	 * allow it.
1653	 */
1654	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1655		if (opr == pr)
1656			continue;
1657
1658		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1659			continue;
1660
1661		if (opr->ndpr_plen == pr->ndpr_plen &&
1662		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1663		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
1664			return (0);
1665	}
1666
1667	/*
1668	 * We prefer link-local addresses as the associated interface address.
1669	 */
1670	/* search for a link-local addr */
1671	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
1672	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1673	if (ifa == NULL) {
1674		/* XXX: freebsd does not have ifa_ifwithaf */
1675		IF_ADDR_RLOCK(ifp);
1676		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1677			if (ifa->ifa_addr->sa_family == AF_INET6)
1678				break;
1679		}
1680		if (ifa != NULL)
1681			ifa_ref(ifa);
1682		IF_ADDR_RUNLOCK(ifp);
1683		/* should we care about ia6_flags? */
1684	}
1685	if (ifa == NULL) {
1686		/*
1687		 * This can still happen, when, for example, we receive an RA
1688		 * containing a prefix with the L bit set and the A bit clear,
1689		 * after removing all IPv6 addresses on the receiving
1690		 * interface.  This should, of course, be rare though.
1691		 */
1692		nd6log((LOG_NOTICE,
1693		    "nd6_prefix_onlink: failed to find any ifaddr"
1694		    " to add route for a prefix(%s/%d) on %s\n",
1695		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1696		    pr->ndpr_plen, if_name(ifp)));
1697		return (0);
1698	}
1699
1700	error = nd6_prefix_onlink_rtrequest(pr, ifa);
1701
1702	if (ifa != NULL)
1703		ifa_free(ifa);
1704
1705	return (error);
1706}
1707
1708static int
1709nd6_prefix_offlink(struct nd_prefix *pr)
1710{
1711	int error = 0;
1712	struct ifnet *ifp = pr->ndpr_ifp;
1713	struct nd_prefix *opr;
1714	struct sockaddr_in6 sa6, mask6;
1715	struct rtentry *rt;
1716	char ip6buf[INET6_ADDRSTRLEN];
1717	int fibnum, a_failure;
1718
1719	/* sanity check */
1720	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1721		nd6log((LOG_ERR,
1722		    "nd6_prefix_offlink: %s/%d is already off-link\n",
1723		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1724		    pr->ndpr_plen));
1725		return (EEXIST);
1726	}
1727
1728	bzero(&sa6, sizeof(sa6));
1729	sa6.sin6_family = AF_INET6;
1730	sa6.sin6_len = sizeof(sa6);
1731	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
1732	    sizeof(struct in6_addr));
1733	bzero(&mask6, sizeof(mask6));
1734	mask6.sin6_family = AF_INET6;
1735	mask6.sin6_len = sizeof(sa6);
1736	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
1737
1738	a_failure = 0;
1739	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1740		rt = NULL;
1741		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
1742		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
1743		if (error == 0) {
1744			/* report the route deletion to the routing socket. */
1745			if (rt != NULL)
1746				nd6_rtmsg(RTM_DELETE, rt);
1747		} else {
1748			/* Save last error to return, see rtinit(). */
1749			a_failure = error;
1750		}
1751		if (rt != NULL) {
1752			RTFREE(rt);
1753		}
1754	}
1755	error = a_failure;
1756	if (error == 0) {
1757		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
1758
1759		/*
1760		 * There might be the same prefix on another interface,
1761		 * the prefix which could not be on-link just because we have
1762		 * the interface route (see comments in nd6_prefix_onlink).
1763		 * If there's one, try to make the prefix on-link on the
1764		 * interface.
1765		 */
1766		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1767			if (opr == pr)
1768				continue;
1769
1770			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
1771				continue;
1772
1773			/*
1774			 * KAME specific: detached prefixes should not be
1775			 * on-link.
1776			 */
1777			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1778				continue;
1779
1780			if (opr->ndpr_plen == pr->ndpr_plen &&
1781			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1782			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
1783				int e;
1784
1785				if ((e = nd6_prefix_onlink(opr)) != 0) {
1786					nd6log((LOG_ERR,
1787					    "nd6_prefix_offlink: failed to "
1788					    "recover a prefix %s/%d from %s "
1789					    "to %s (errno = %d)\n",
1790					    ip6_sprintf(ip6buf,
1791						&opr->ndpr_prefix.sin6_addr),
1792					    opr->ndpr_plen, if_name(ifp),
1793					    if_name(opr->ndpr_ifp), e));
1794				}
1795			}
1796		}
1797	} else {
1798		/* XXX: can we still set the NDPRF_ONLINK flag? */
1799		nd6log((LOG_ERR,
1800		    "nd6_prefix_offlink: failed to delete route: "
1801		    "%s/%d on %s (errno = %d)\n",
1802		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
1803		    if_name(ifp), error));
1804	}
1805
1806	return (error);
1807}
1808
1809static struct in6_ifaddr *
1810in6_ifadd(struct nd_prefixctl *pr, int mcast)
1811{
1812	struct ifnet *ifp = pr->ndpr_ifp;
1813	struct ifaddr *ifa;
1814	struct in6_aliasreq ifra;
1815	struct in6_ifaddr *ia, *ib;
1816	int error, plen0;
1817	struct in6_addr mask;
1818	int prefixlen = pr->ndpr_plen;
1819	int updateflags;
1820	char ip6buf[INET6_ADDRSTRLEN];
1821
1822	in6_prefixlen2mask(&mask, prefixlen);
1823
1824	/*
1825	 * find a link-local address (will be interface ID).
1826	 * Is it really mandatory? Theoretically, a global or a site-local
1827	 * address can be configured without a link-local address, if we
1828	 * have a unique interface identifier...
1829	 *
1830	 * it is not mandatory to have a link-local address, we can generate
1831	 * interface identifier on the fly.  we do this because:
1832	 * (1) it should be the easiest way to find interface identifier.
1833	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
1834	 * for multiple addresses on a single interface, and possible shortcut
1835	 * of DAD.  we omitted DAD for this reason in the past.
1836	 * (3) a user can prevent autoconfiguration of global address
1837	 * by removing link-local address by hand (this is partly because we
1838	 * don't have other way to control the use of IPv6 on an interface.
1839	 * this has been our design choice - cf. NRL's "ifconfig auto").
1840	 * (4) it is easier to manage when an interface has addresses
1841	 * with the same interface identifier, than to have multiple addresses
1842	 * with different interface identifiers.
1843	 */
1844	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
1845	if (ifa)
1846		ib = (struct in6_ifaddr *)ifa;
1847	else
1848		return NULL;
1849
1850	/* prefixlen + ifidlen must be equal to 128 */
1851	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
1852	if (prefixlen != plen0) {
1853		ifa_free(ifa);
1854		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
1855		    "(prefix=%d ifid=%d)\n",
1856		    if_name(ifp), prefixlen, 128 - plen0));
1857		return NULL;
1858	}
1859
1860	/* make ifaddr */
1861
1862	bzero(&ifra, sizeof(ifra));
1863	/*
1864	 * in6_update_ifa() does not use ifra_name, but we accurately set it
1865	 * for safety.
1866	 */
1867	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1868	ifra.ifra_addr.sin6_family = AF_INET6;
1869	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
1870	/* prefix */
1871	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
1872	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1873	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1874	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1875	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1876
1877	/* interface ID */
1878	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
1879	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
1880	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
1881	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
1882	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1883	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
1884	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1885	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
1886	ifa_free(ifa);
1887
1888	/* new prefix mask. */
1889	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
1890	ifra.ifra_prefixmask.sin6_family = AF_INET6;
1891	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
1892	    sizeof(ifra.ifra_prefixmask.sin6_addr));
1893
1894	/* lifetimes. */
1895	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
1896	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
1897
1898	/* XXX: scope zone ID? */
1899
1900	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
1901
1902	/*
1903	 * Make sure that we do not have this address already.  This should
1904	 * usually not happen, but we can still see this case, e.g., if we
1905	 * have manually configured the exact address to be configured.
1906	 */
1907	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
1908	    &ifra.ifra_addr.sin6_addr);
1909	if (ifa != NULL) {
1910		ifa_free(ifa);
1911		/* this should be rare enough to make an explicit log */
1912		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
1913		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
1914		return (NULL);
1915	}
1916
1917	/*
1918	 * Allocate ifaddr structure, link into chain, etc.
1919	 * If we are going to create a new address upon receiving a multicasted
1920	 * RA, we need to impose a random delay before starting DAD.
1921	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
1922	 */
1923	updateflags = 0;
1924	if (mcast)
1925		updateflags |= IN6_IFAUPDATE_DADDELAY;
1926	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
1927		nd6log((LOG_ERR,
1928		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
1929		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
1930		    if_name(ifp), error));
1931		return (NULL);	/* ifaddr must not have been allocated. */
1932	}
1933
1934	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
1935	/*
1936	 * XXXRW: Assumption of non-NULLness here might not be true with
1937	 * fine-grained locking -- should we validate it?  Or just return
1938	 * earlier ifa rather than looking it up again?
1939	 */
1940	return (ia);		/* this is always non-NULL  and referenced. */
1941}
1942
1943/*
1944 * ia0 - corresponding public address
1945 */
1946int
1947in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
1948{
1949	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
1950	struct in6_ifaddr *newia, *ia;
1951	struct in6_aliasreq ifra;
1952	int i, error;
1953	int trylimit = 3;	/* XXX: adhoc value */
1954	int updateflags;
1955	u_int32_t randid[2];
1956	time_t vltime0, pltime0;
1957
1958	bzero(&ifra, sizeof(ifra));
1959	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1960	ifra.ifra_addr = ia0->ia_addr;
1961	/* copy prefix mask */
1962	ifra.ifra_prefixmask = ia0->ia_prefixmask;
1963	/* clear the old IFID */
1964	for (i = 0; i < 4; i++) {
1965		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
1966		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
1967	}
1968
1969  again:
1970	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
1971	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
1972		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
1973		    "random IFID\n"));
1974		return (EINVAL);
1975	}
1976	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1977	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
1978	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1979	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
1980
1981	/*
1982	 * in6_get_tmpifid() quite likely provided a unique interface ID.
1983	 * However, we may still have a chance to see collision, because
1984	 * there may be a time lag between generation of the ID and generation
1985	 * of the address.  So, we'll do one more sanity check.
1986	 */
1987	IN6_IFADDR_RLOCK();
1988	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
1989		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
1990		    &ifra.ifra_addr.sin6_addr)) {
1991			if (trylimit-- == 0) {
1992				IN6_IFADDR_RUNLOCK();
1993				/*
1994				 * Give up.  Something strange should have
1995				 * happened.
1996				 */
1997				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
1998				    "find a unique random IFID\n"));
1999				return (EEXIST);
2000			}
2001			IN6_IFADDR_RUNLOCK();
2002			forcegen = 1;
2003			goto again;
2004		}
2005	}
2006	IN6_IFADDR_RUNLOCK();
2007
2008	/*
2009	 * The Valid Lifetime is the lower of the Valid Lifetime of the
2010         * public address or TEMP_VALID_LIFETIME.
2011	 * The Preferred Lifetime is the lower of the Preferred Lifetime
2012         * of the public address or TEMP_PREFERRED_LIFETIME -
2013         * DESYNC_FACTOR.
2014	 */
2015	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
2016		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
2017		    (ia0->ia6_lifetime.ia6t_vltime -
2018		    (time_second - ia0->ia6_updatetime));
2019		if (vltime0 > V_ip6_temp_valid_lifetime)
2020			vltime0 = V_ip6_temp_valid_lifetime;
2021	} else
2022		vltime0 = V_ip6_temp_valid_lifetime;
2023	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
2024		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
2025		    (ia0->ia6_lifetime.ia6t_pltime -
2026		    (time_second - ia0->ia6_updatetime));
2027		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
2028			pltime0 = V_ip6_temp_preferred_lifetime -
2029			    V_ip6_desync_factor;
2030		}
2031	} else
2032		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
2033	ifra.ifra_lifetime.ia6t_vltime = vltime0;
2034	ifra.ifra_lifetime.ia6t_pltime = pltime0;
2035
2036	/*
2037	 * A temporary address is created only if this calculated Preferred
2038	 * Lifetime is greater than REGEN_ADVANCE time units.
2039	 */
2040	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
2041		return (0);
2042
2043	/* XXX: scope zone ID? */
2044
2045	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
2046
2047	/* allocate ifaddr structure, link into chain, etc. */
2048	updateflags = 0;
2049	if (delay)
2050		updateflags |= IN6_IFAUPDATE_DADDELAY;
2051	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
2052		return (error);
2053
2054	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2055	if (newia == NULL) {	/* XXX: can it happen? */
2056		nd6log((LOG_ERR,
2057		    "in6_tmpifadd: ifa update succeeded, but we got "
2058		    "no ifaddr\n"));
2059		return (EINVAL); /* XXX */
2060	}
2061	newia->ia6_ndpr = ia0->ia6_ndpr;
2062	newia->ia6_ndpr->ndpr_refcnt++;
2063	ifa_free(&newia->ia_ifa);
2064
2065	/*
2066	 * A newly added address might affect the status of other addresses.
2067	 * XXX: when the temporary address is generated with a new public
2068	 * address, the onlink check is redundant.  However, it would be safe
2069	 * to do the check explicitly everywhere a new address is generated,
2070	 * and, in fact, we surely need the check when we create a new
2071	 * temporary address due to deprecation of an old temporary address.
2072	 */
2073	pfxlist_onlink_check();
2074
2075	return (0);
2076}
2077
2078static int
2079in6_init_prefix_ltimes(struct nd_prefix *ndpr)
2080{
2081	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
2082		ndpr->ndpr_preferred = 0;
2083	else
2084		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
2085	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2086		ndpr->ndpr_expire = 0;
2087	else
2088		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
2089
2090	return 0;
2091}
2092
2093static void
2094in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
2095{
2096	/* init ia6t_expire */
2097	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
2098		lt6->ia6t_expire = 0;
2099	else {
2100		lt6->ia6t_expire = time_second;
2101		lt6->ia6t_expire += lt6->ia6t_vltime;
2102	}
2103
2104	/* init ia6t_preferred */
2105	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
2106		lt6->ia6t_preferred = 0;
2107	else {
2108		lt6->ia6t_preferred = time_second;
2109		lt6->ia6t_preferred += lt6->ia6t_pltime;
2110	}
2111}
2112
2113/*
2114 * Delete all the routing table entries that use the specified gateway.
2115 * XXX: this function causes search through all entries of routing table, so
2116 * it shouldn't be called when acting as a router.
2117 */
2118void
2119rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
2120{
2121	struct radix_node_head *rnh;
2122	u_int fibnum;
2123	int s = splnet();
2124
2125	/* We'll care only link-local addresses */
2126	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
2127		splx(s);
2128		return;
2129	}
2130
2131	/* XXX Do we really need to walk any but the default FIB? */
2132	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
2133		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
2134		if (rnh == NULL)
2135			continue;
2136
2137		RADIX_NODE_HEAD_LOCK(rnh);
2138		rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
2139		RADIX_NODE_HEAD_UNLOCK(rnh);
2140	}
2141	splx(s);
2142}
2143
2144static int
2145rt6_deleteroute(struct radix_node *rn, void *arg)
2146{
2147#define SIN6(s)	((struct sockaddr_in6 *)s)
2148	struct rtentry *rt = (struct rtentry *)rn;
2149	struct in6_addr *gate = (struct in6_addr *)arg;
2150
2151	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
2152		return (0);
2153
2154	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
2155		return (0);
2156	}
2157
2158	/*
2159	 * Do not delete a static route.
2160	 * XXX: this seems to be a bit ad-hoc. Should we consider the
2161	 * 'cloned' bit instead?
2162	 */
2163	if ((rt->rt_flags & RTF_STATIC) != 0)
2164		return (0);
2165
2166	/*
2167	 * We delete only host route. This means, in particular, we don't
2168	 * delete default route.
2169	 */
2170	if ((rt->rt_flags & RTF_HOST) == 0)
2171		return (0);
2172
2173	return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
2174	    rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
2175#undef SIN6
2176}
2177
2178int
2179nd6_setdefaultiface(int ifindex)
2180{
2181	int error = 0;
2182
2183	if (ifindex < 0 || V_if_index < ifindex)
2184		return (EINVAL);
2185	if (ifindex != 0 && !ifnet_byindex(ifindex))
2186		return (EINVAL);
2187
2188	if (V_nd6_defifindex != ifindex) {
2189		V_nd6_defifindex = ifindex;
2190		if (V_nd6_defifindex > 0)
2191			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
2192		else
2193			V_nd6_defifp = NULL;
2194
2195		/*
2196		 * Our current implementation assumes one-to-one maping between
2197		 * interfaces and links, so it would be natural to use the
2198		 * default interface as the default link.
2199		 */
2200		scope6_setdefault(V_nd6_defifp);
2201	}
2202
2203	return (error);
2204}
2205