in6_src.c revision 184096
1139776Simp/*-
222521Sdyson * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31541Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. Neither the name of the project nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	$KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
301541Srgrimes */
311541Srgrimes
321541Srgrimes/*-
331541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993
341541Srgrimes *	The Regents of the University of California.  All rights reserved.
3550477Speter *
361541Srgrimes * Redistribution and use in source and binary forms, with or without
371541Srgrimes * modification, are permitted provided that the following conditions
381541Srgrimes * are met:
391541Srgrimes * 1. Redistributions of source code must retain the above copyright
401541Srgrimes *    notice, this list of conditions and the following disclaimer.
411541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
421541Srgrimes *    notice, this list of conditions and the following disclaimer in the
431541Srgrimes *    documentation and/or other materials provided with the distribution.
441541Srgrimes * 4. Neither the name of the University nor the names of its contributors
45177785Skib *    may be used to endorse or promote products derived from this software
4622605Smpp *    without specific prior written permission.
4776166Smarkm *
4830354Sphk * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
491541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
501541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
5176166Smarkm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
5276166Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53232059Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
5476166Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
5577031Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
561541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57151897Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5830354Sphk * SUCH DAMAGE.
59116271Sphk *
60132902Sphk *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
61116271Sphk */
62116271Sphk
63116271Sphk#include <sys/cdefs.h>
64116271Sphk__FBSDID("$FreeBSD: head/sys/netinet6/in6_src.c 184096 2008-10-20 18:43:59Z bz $");
65116271Sphk
66116271Sphk#include "opt_inet.h"
67116271Sphk#include "opt_inet6.h"
6812595Sbde#include "opt_mpath.h"
691541Srgrimes
701541Srgrimes#include <sys/param.h>
711541Srgrimes#include <sys/systm.h>
7212769Sphk#include <sys/lock.h>
73191990Sattilio#include <sys/malloc.h>
741541Srgrimes#include <sys/mbuf.h>
751541Srgrimes#include <sys/priv.h>
761541Srgrimes#include <sys/protosw.h>
771541Srgrimes#include <sys/socket.h>
781541Srgrimes#include <sys/socketvar.h>
79232059Smm#include <sys/sockio.h>
8097186Smux#include <sys/sysctl.h>
8197186Smux#include <sys/errno.h>
82132902Sphk#include <sys/time.h>
831541Srgrimes#include <sys/kernel.h>
8465467Sbp#include <sys/sx.h>
851541Srgrimes#include <sys/vimage.h>
86232059Smm
87232059Smm#include <net/if.h>
88137479Sphk#include <net/route.h>
89137479Sphk#ifdef RADIX_MPATH
90245004Skib#include <net/radix_mpath.h>
911541Srgrimes#endif
921541Srgrimes
931541Srgrimes#include <netinet/in.h>
941541Srgrimes#include <netinet/in_var.h>
95159019Srodrigc#include <netinet/in_systm.h>
96159019Srodrigc#include <netinet/ip.h>
97159019Srodrigc#include <netinet/in_pcb.h>
98159019Srodrigc#include <netinet/ip_var.h>
99159019Srodrigc#include <netinet/udp.h>
100159019Srodrigc#include <netinet/udp_var.h>
101159019Srodrigc#include <netinet6/in6_var.h>
1021541Srgrimes#include <netinet/ip6.h>
1031541Srgrimes#include <netinet6/in6_pcb.h>
1041541Srgrimes#include <netinet6/ip6_var.h>
1051541Srgrimes#include <netinet6/scope6_var.h>
1061541Srgrimes#include <netinet6/nd6.h>
10797186Smux
10897186Smuxstatic struct mtx addrsel_lock;
10997186Smux#define	ADDRSEL_LOCK_INIT()	mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
1101541Srgrimes#define	ADDRSEL_LOCK()		mtx_lock(&addrsel_lock)
1111541Srgrimes#define	ADDRSEL_UNLOCK()	mtx_unlock(&addrsel_lock)
112226681Spho#define	ADDRSEL_LOCK_ASSERT()	mtx_assert(&addrsel_lock, MA_OWNED)
11324988Skato
114138290Sphkstatic struct sx addrsel_sxlock;
115226681Spho#define	ADDRSEL_SXLOCK_INIT()	sx_init(&addrsel_sxlock, "addrsel_sxlock")
116175294Sattilio#define	ADDRSEL_SLOCK()		sx_slock(&addrsel_sxlock)
11724988Skato#define	ADDRSEL_SUNLOCK()	sx_sunlock(&addrsel_sxlock)
11824988Skato#define	ADDRSEL_XLOCK()		sx_xlock(&addrsel_sxlock)
11924988Skato#define	ADDRSEL_XUNLOCK()	sx_xunlock(&addrsel_sxlock)
1201541Srgrimes
1211541Srgrimes#define ADDR_LABEL_NOTAPP (-1)
122191990Sattiliostruct in6_addrpolicy defaultaddrpolicy;
1233496Sphk
124240285Skibint ip6_prefer_tempaddr = 0;
12524988Skato
12624988Skatostatic int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
127240285Skib	struct ip6_moptions *, struct route_in6 *, struct ifnet **,
12824988Skato	struct rtentry **, int, int));
129226686Skibstatic int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
130175202Sattilio	struct ip6_moptions *, struct route_in6 *ro, struct ifnet **));
13124988Skato
1323496Sphkstatic struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
1331541Srgrimes
13454655Seivindstatic void init_policy_queue(void);
1351541Srgrimesstatic int add_addrsel_policyent(struct in6_addrpolicy *);
1361541Srgrimesstatic int delete_addrsel_policyent(struct in6_addrpolicy *);
1371541Srgrimesstatic int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
1381541Srgrimes				    void *));
1391541Srgrimesstatic int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
1401541Srgrimesstatic struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
14124988Skato
14224988Skato/*
14324988Skato * Return an IPv6 address, which is the most appropriate for a given
14424988Skato * destination and user specified options.
14565467Sbp * If necessary, this function lookups the routing table and returns
14667441Sbp * an entry to the caller for later use.
14725016Skato */
14824988Skato#define REPLACE(r) do {\
14924988Skato	if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
1501541Srgrimes		sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
151245004Skib		V_ip6stat.ip6s_sources_rule[(r)]++; \
1521541Srgrimes	/* { \
1531541Srgrimes	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
1541541Srgrimes	printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
1551541Srgrimes	} */ \
1561541Srgrimes	goto replace; \
1571541Srgrimes} while(0)
1581541Srgrimes#define NEXT(r) do {\
1591541Srgrimes	if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
1601541Srgrimes		sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
1611541Srgrimes		V_ip6stat.ip6s_sources_rule[(r)]++; \
16298183Ssemenu	/* { \
1631541Srgrimes	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
1641541Srgrimes	printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
1651541Srgrimes	} */ \
1661541Srgrimes	goto next;		/* XXX: we can't use 'continue' here */ \
167229431Skib} while(0)
1681541Srgrimes#define BREAK(r) do { \
1691541Srgrimes	if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
1701541Srgrimes		sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
1711541Srgrimes		V_ip6stat.ip6s_sources_rule[(r)]++; \
1721541Srgrimes	goto out;		/* XXX: we can't use 'break' here */ \
1731541Srgrimes} while(0)
1741541Srgrimes
1751541Srgrimesstruct in6_addr *
176101308Sjeffin6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
1771541Srgrimes    struct inpcb *inp, struct route_in6 *ro, struct ucred *cred,
178103934Sjeff    struct ifnet **ifpp, int *errorp)
179103934Sjeff{
180103934Sjeff	INIT_VNET_INET6(curvnet);
181103934Sjeff	struct in6_addr dst;
182175294Sattilio	struct ifnet *ifp = NULL;
183103934Sjeff	struct in6_ifaddr *ia = NULL, *ia_best = NULL;
184162647Stegge	struct in6_pktinfo *pi = NULL;
185162647Stegge	int dst_scope = -1, best_scope = -1, best_matchlen = -1;
1861541Srgrimes	struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
187162647Stegge	u_int32_t odstzone;
188162647Stegge	int prefer_tempaddr;
189245004Skib	struct ip6_moptions *mopts;
190245004Skib
191245004Skib	dst = dstsock->sin6_addr; /* make a copy for local operation */
192245004Skib	*errorp = 0;
193245004Skib	if (ifpp)
194162647Stegge		*ifpp = NULL;
195245004Skib
196245004Skib	if (inp != NULL) {
197245004Skib		INP_LOCK_ASSERT(inp);
198245004Skib		mopts = inp->in6p_moptions;
199245004Skib	} else {
200240285Skib		mopts = NULL;
201162647Stegge	}
202232918Skevlo
20322521Sdyson	/*
204245004Skib	 * If the source address is explicitly specified by the caller,
205245004Skib	 * check if the requested source address is indeed a unicast address
206245004Skib	 * assigned to the node, and can be used as the packet's source
207245004Skib	 * address.  If everything is okay, use the address as source.
208245004Skib	 */
209245004Skib	if (opts && (pi = opts->ip6po_pktinfo) &&
2101541Srgrimes	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
211138483Sphk		struct sockaddr_in6 srcsock;
212138483Sphk		struct in6_ifaddr *ia6;
21365467Sbp
2141541Srgrimes		/* get the outgoing interface */
2151541Srgrimes		if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp))
2161541Srgrimes		    != 0) {
2171541Srgrimes			return (NULL);
2181541Srgrimes		}
2191541Srgrimes
2201541Srgrimes		/*
22112769Sphk		 * determine the appropriate zone id of the source based on
222191990Sattilio		 * the zone of the destination and the outgoing interface.
2231541Srgrimes		 * If the specified address is ambiguous wrt the scope zone,
2241541Srgrimes		 * the interface must be specified; otherwise, ifa_ifwithaddr()
2251541Srgrimes		 * will fail matching the address.
226240285Skib		 */
227240285Skib		bzero(&srcsock, sizeof(srcsock));
228240285Skib		srcsock.sin6_family = AF_INET6;
2291541Srgrimes		srcsock.sin6_len = sizeof(srcsock);
23065467Sbp		srcsock.sin6_addr = pi->ipi6_addr;
2311541Srgrimes		if (ifp) {
23222521Sdyson			*errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
233240285Skib			if (*errorp != 0)
234240285Skib				return (NULL);
235240285Skib		}
2361541Srgrimes
23776688Siedowse		ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
238191990Sattilio		if (ia6 == NULL ||
23966356Sbp		    (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
24066356Sbp			*errorp = EADDRNOTAVAIL;
2411541Srgrimes			return (NULL);
2421541Srgrimes		}
2431541Srgrimes		pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
2441541Srgrimes		if (ifpp)
24565467Sbp			*ifpp = ifp;
246240285Skib		return (&ia6->ia_addr.sin6_addr);
247245004Skib	}
248245004Skib
249245004Skib	/*
250245004Skib	 * Otherwise, if the socket has already bound the source, just use it.
251245004Skib	 */
252245004Skib	if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
253245004Skib		return (&inp->in6p_laddr);
254245004Skib	}
255240285Skib
256232918Skevlo	/*
25766356Sbp	 * If the address is not specified, choose the best one based on
258240285Skib	 * the outgoing interface and the destination address.
2591541Srgrimes	 */
2601541Srgrimes	/* get the outgoing interface */
26112769Sphk	if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0)
262191990Sattilio		return (NULL);
2631541Srgrimes
264144058Sjeff#ifdef DIAGNOSTIC
2651541Srgrimes	if (ifp == NULL)	/* this should not happen */
2661541Srgrimes		panic("in6_selectsrc: NULL ifp");
2671541Srgrimes#endif
2681541Srgrimes	*errorp = in6_setscope(&dst, ifp, &odstzone);
26965467Sbp	if (*errorp != 0)
27037977Sbde		return (NULL);
27137977Sbde
2721541Srgrimes	for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
2731541Srgrimes		int new_scope = -1, new_matchlen = -1;
2741541Srgrimes		struct in6_addrpolicy *new_policy = NULL;
2751541Srgrimes		u_int32_t srczone, osrczone, dstzone;
2761541Srgrimes		struct in6_addr src;
2771541Srgrimes		struct ifnet *ifp1 = ia->ia_ifp;
27866356Sbp
279226688Skib		/*
280175202Sattilio		 * We'll never take an address that breaks the scope zone
2811541Srgrimes		 * of the destination.  We also skip an address if its zone
2821541Srgrimes		 * does not contain the outgoing interface.
2831541Srgrimes		 * XXX: we should probably use sin6_scope_id here.
2841541Srgrimes		 */
28512769Sphk		if (in6_setscope(&dst, ifp1, &dstzone) ||
286191990Sattilio		    odstzone != dstzone) {
2871541Srgrimes			continue;
2881541Srgrimes		}
2891541Srgrimes		src = ia->ia_addr.sin6_addr;
290153400Sdes		if (in6_setscope(&src, ifp, &osrczone) ||
2911541Srgrimes		    in6_setscope(&src, ifp1, &srczone) ||
292191990Sattilio		    osrczone != srczone) {
2931541Srgrimes			continue;
2941541Srgrimes		}
29512769Sphk
296191990Sattilio		/* avoid unusable addresses */
2971541Srgrimes		if ((ia->ia6_flags &
2981541Srgrimes		     (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
2991541Srgrimes				continue;
3001541Srgrimes		}
3011541Srgrimes		if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
3021541Srgrimes			continue;
30365467Sbp
30437977Sbde		/* Rule 1: Prefer same address */
30537977Sbde		if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
3061541Srgrimes			ia_best = ia;
3071541Srgrimes			BREAK(1); /* there should be no better candidate */
3081541Srgrimes		}
309191990Sattilio
3101541Srgrimes		if (ia_best == NULL)
3111541Srgrimes			REPLACE(0);
3121541Srgrimes
3131541Srgrimes		/* Rule 2: Prefer appropriate scope */
3141541Srgrimes		if (dst_scope < 0)
315247619Sjilles			dst_scope = in6_addrscope(&dst);
316247619Sjilles		new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
3171541Srgrimes		if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
3181541Srgrimes			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
3191541Srgrimes				REPLACE(2);
3201541Srgrimes			NEXT(2);
3211541Srgrimes		} else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
3221541Srgrimes			if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
3231541Srgrimes				NEXT(2);
3241541Srgrimes			REPLACE(2);
3251541Srgrimes		}
3261541Srgrimes
32712769Sphk		/*
328191990Sattilio		 * Rule 3: Avoid deprecated addresses.  Note that the case of
3291541Srgrimes		 * !ip6_use_deprecated is already rejected above.
3301541Srgrimes		 */
3311541Srgrimes		if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
3321541Srgrimes			NEXT(3);
3331541Srgrimes		if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
3341541Srgrimes			REPLACE(3);
3351541Srgrimes
3361541Srgrimes		/* Rule 4: Prefer home addresses */
3371541Srgrimes		/*
33812769Sphk		 * XXX: This is a TODO.  We should probably merge the MIP6
33992462Smckusick		 * case above.
3401541Srgrimes		 */
3411541Srgrimes
34292462Smckusick		/* Rule 5: Prefer outgoing interface */
3431541Srgrimes		if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
3441541Srgrimes			NEXT(5);
34566356Sbp		if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
346232301Skib			REPLACE(5);
347232301Skib
348232301Skib		/*
349232301Skib		 * Rule 6: Prefer matching label
35092462Smckusick		 * Note that best_policy should be non-NULL here.
351240285Skib		 */
35266356Sbp		if (dst_policy == NULL)
35398183Ssemenu			dst_policy = lookup_addrsel_policy(dstsock);
3541541Srgrimes		if (dst_policy->label != ADDR_LABEL_NOTAPP) {
3551541Srgrimes			new_policy = lookup_addrsel_policy(&ia->ia_addr);
35612769Sphk			if (dst_policy->label == best_policy->label &&
357222167Srmacklem			    dst_policy->label != new_policy->label)
3581541Srgrimes				NEXT(6);
3591541Srgrimes			if (dst_policy->label != best_policy->label &&
360222167Srmacklem			    dst_policy->label == new_policy->label)
3611541Srgrimes				REPLACE(6);
3621541Srgrimes		}
36366356Sbp
364240285Skib		/*
365240285Skib		 * Rule 7: Prefer public addresses.
366222167Srmacklem		 * We allow users to reverse the logic by configuring
367240285Skib		 * a sysctl variable, so that privacy conscious users can
36866356Sbp		 * always prefer temporary addresses.
36998183Ssemenu		 */
3701541Srgrimes		if (opts == NULL ||
3711541Srgrimes		    opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
37254803Srwatson			prefer_tempaddr = V_ip6_prefer_tempaddr;
373191990Sattilio		} else if (opts->ip6po_prefer_tempaddr ==
37454803Srwatson		    IP6PO_TEMPADDR_NOTPREFER) {
37554803Srwatson			prefer_tempaddr = 0;
37674273Srwatson		} else
37774273Srwatson			prefer_tempaddr = 1;
37856272Srwatson		if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
37954803Srwatson		    (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
380240285Skib			if (prefer_tempaddr)
381240285Skib				REPLACE(7);
382240285Skib			else
38354803Srwatson				NEXT(7);
38454803Srwatson		}
385240285Skib		if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
386240285Skib		    !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
387240285Skib			if (prefer_tempaddr)
388240285Skib				NEXT(7);
38954803Srwatson			else
390240285Skib				REPLACE(7);
391240285Skib		}
392240285Skib
393250505Skib		/*
394240285Skib		 * Rule 8: prefer addresses on alive interfaces.
395250505Skib		 * This is a KAME specific rule.
396240285Skib		 */
397240285Skib		if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
398250505Skib		    !(ia->ia_ifp->if_flags & IFF_UP))
399250505Skib			NEXT(8);
400250505Skib		if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
401250505Skib		    (ia->ia_ifp->if_flags & IFF_UP))
402250505Skib			REPLACE(8);
403250505Skib
404250505Skib		/*
405250505Skib		 * Rule 14: Use longest matching prefix.
406250505Skib		 * Note: in the address selection draft, this rule is
407250505Skib		 * documented as "Rule 8".  However, since it is also
408250505Skib		 * documented that this rule can be overridden, we assign
409250505Skib		 * a large number so that it is easy to assign smaller numbers
410250505Skib		 * to more preferred rules.
411250505Skib		 */
412250505Skib		new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
413250852Skib		if (best_matchlen < new_matchlen)
414250852Skib			REPLACE(14);
415250852Skib		if (new_matchlen < best_matchlen)
416250852Skib			NEXT(14);
417250852Skib
418250852Skib		/* Rule 15 is reserved. */
419250852Skib
420250505Skib		/*
421250852Skib		 * Last resort: just keep the current candidate.
422250505Skib		 * Or, do we need more rules?
423250852Skib		 */
424250852Skib		continue;
425250852Skib
426250852Skib	  replace:
427250852Skib		ia_best = ia;
428250852Skib		best_scope = (new_scope >= 0 ? new_scope :
429250852Skib			      in6_addrscope(&ia_best->ia_addr.sin6_addr));
430250852Skib		best_policy = (new_policy ? new_policy :
431250852Skib			       lookup_addrsel_policy(&ia_best->ia_addr));
432250852Skib		best_matchlen = (new_matchlen >= 0 ? new_matchlen :
433250852Skib				 in6_matchlen(&ia_best->ia_addr.sin6_addr,
434250505Skib					      &dst));
435250505Skib
436250505Skib	  next:
437250505Skib		continue;
43812769Sphk
439116271Sphk	  out:
440116271Sphk		break;
441116271Sphk	}
442132902Sphk
443116271Sphk	if ((ia = ia_best) == NULL) {
444116271Sphk		*errorp = EADDRNOTAVAIL;
445116271Sphk		return (NULL);
446116271Sphk	}
447116271Sphk
448116271Sphk	if (ifpp)
449116271Sphk		*ifpp = ifp;
450240285Skib
451250505Skib	return (&ia->ia_addr.sin6_addr);
4521541Srgrimes}
4532946Swollman
454231269Smm/*
455 * clone - meaningful only for bsdi and freebsd
456 */
457static int
458selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
459    struct ip6_moptions *mopts, struct route_in6 *ro,
460    struct ifnet **retifp, struct rtentry **retrt, int clone,
461    int norouteok)
462{
463	INIT_VNET_NET(curvnet);
464	INIT_VNET_INET6(curvnet);
465	int error = 0;
466	struct ifnet *ifp = NULL;
467	struct rtentry *rt = NULL;
468	struct sockaddr_in6 *sin6_next;
469	struct in6_pktinfo *pi = NULL;
470	struct in6_addr *dst = &dstsock->sin6_addr;
471#if 0
472	char ip6buf[INET6_ADDRSTRLEN];
473
474	if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
475	    dstsock->sin6_addr.s6_addr32[1] == 0 &&
476	    !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
477		printf("in6_selectroute: strange destination %s\n",
478		       ip6_sprintf(ip6buf, &dstsock->sin6_addr));
479	} else {
480		printf("in6_selectroute: destination = %s%%%d\n",
481		       ip6_sprintf(ip6buf, &dstsock->sin6_addr),
482		       dstsock->sin6_scope_id); /* for debug */
483	}
484#endif
485
486	/* If the caller specify the outgoing interface explicitly, use it. */
487	if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
488		/* XXX boundary check is assumed to be already done. */
489		ifp = ifnet_byindex(pi->ipi6_ifindex);
490		if (ifp != NULL &&
491		    (norouteok || retrt == NULL ||
492		    IN6_IS_ADDR_MULTICAST(dst))) {
493			/*
494			 * we do not have to check or get the route for
495			 * multicast.
496			 */
497			goto done;
498		} else
499			goto getroute;
500	}
501
502	/*
503	 * If the destination address is a multicast address and the outgoing
504	 * interface for the address is specified by the caller, use it.
505	 */
506	if (IN6_IS_ADDR_MULTICAST(dst) &&
507	    mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
508		goto done; /* we do not need a route for multicast. */
509	}
510
511  getroute:
512	/*
513	 * If the next hop address for the packet is specified by the caller,
514	 * use it as the gateway.
515	 */
516	if (opts && opts->ip6po_nexthop) {
517		struct route_in6 *ron;
518
519		sin6_next = satosin6(opts->ip6po_nexthop);
520
521		/* at this moment, we only support AF_INET6 next hops */
522		if (sin6_next->sin6_family != AF_INET6) {
523			error = EAFNOSUPPORT; /* or should we proceed? */
524			goto done;
525		}
526
527		/*
528		 * If the next hop is an IPv6 address, then the node identified
529		 * by that address must be a neighbor of the sending host.
530		 */
531		ron = &opts->ip6po_nextroute;
532		if ((ron->ro_rt &&
533		     (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
534		     (RTF_UP | RTF_LLINFO)) ||
535		    !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
536		    &sin6_next->sin6_addr)) {
537			if (ron->ro_rt) {
538				RTFREE(ron->ro_rt);
539				ron->ro_rt = NULL;
540			}
541			*satosin6(&ron->ro_dst) = *sin6_next;
542		}
543		if (ron->ro_rt == NULL) {
544			rtalloc((struct route *)ron); /* multi path case? */
545			if (ron->ro_rt == NULL ||
546			    !(ron->ro_rt->rt_flags & RTF_LLINFO)) {
547				if (ron->ro_rt) {
548					RTFREE(ron->ro_rt);
549					ron->ro_rt = NULL;
550				}
551				error = EHOSTUNREACH;
552				goto done;
553			}
554		}
555		rt = ron->ro_rt;
556		ifp = rt->rt_ifp;
557
558		/*
559		 * When cloning is required, try to allocate a route to the
560		 * destination so that the caller can store path MTU
561		 * information.
562		 */
563		if (!clone)
564			goto done;
565	}
566
567	/*
568	 * Use a cached route if it exists and is valid, else try to allocate
569	 * a new one.  Note that we should check the address family of the
570	 * cached destination, in case of sharing the cache with IPv4.
571	 */
572	if (ro) {
573		if (ro->ro_rt &&
574		    (!(ro->ro_rt->rt_flags & RTF_UP) ||
575		     ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
576		     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
577		     dst))) {
578			RTFREE(ro->ro_rt);
579			ro->ro_rt = (struct rtentry *)NULL;
580		}
581		if (ro->ro_rt == (struct rtentry *)NULL) {
582			struct sockaddr_in6 *sa6;
583
584			/* No route yet, so try to acquire one */
585			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
586			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
587			*sa6 = *dstsock;
588			sa6->sin6_scope_id = 0;
589
590			if (clone) {
591#ifdef RADIX_MPATH
592				rtalloc_mpath((struct route *)ro,
593				    ntohl(sa6->sin6_addr.s6_addr32[3]));
594#else
595				rtalloc((struct route *)ro);
596#endif
597			} else {
598				ro->ro_rt = rtalloc1(&((struct route *)ro)
599						     ->ro_dst, 0, 0UL);
600				if (ro->ro_rt)
601					RT_UNLOCK(ro->ro_rt);
602			}
603		}
604
605		/*
606		 * do not care about the result if we have the nexthop
607		 * explicitly specified.
608		 */
609		if (opts && opts->ip6po_nexthop)
610			goto done;
611
612		if (ro->ro_rt) {
613			ifp = ro->ro_rt->rt_ifp;
614
615			if (ifp == NULL) { /* can this really happen? */
616				RTFREE(ro->ro_rt);
617				ro->ro_rt = NULL;
618			}
619		}
620		if (ro->ro_rt == NULL)
621			error = EHOSTUNREACH;
622		rt = ro->ro_rt;
623
624		/*
625		 * Check if the outgoing interface conflicts with
626		 * the interface specified by ipi6_ifindex (if specified).
627		 * Note that loopback interface is always okay.
628		 * (this may happen when we are sending a packet to one of
629		 *  our own addresses.)
630		 */
631		if (ifp && opts && opts->ip6po_pktinfo &&
632		    opts->ip6po_pktinfo->ipi6_ifindex) {
633			if (!(ifp->if_flags & IFF_LOOPBACK) &&
634			    ifp->if_index !=
635			    opts->ip6po_pktinfo->ipi6_ifindex) {
636				error = EHOSTUNREACH;
637				goto done;
638			}
639		}
640	}
641
642  done:
643	if (ifp == NULL && rt == NULL) {
644		/*
645		 * This can happen if the caller did not pass a cached route
646		 * nor any other hints.  We treat this case an error.
647		 */
648		error = EHOSTUNREACH;
649	}
650	if (error == EHOSTUNREACH)
651		V_ip6stat.ip6s_noroute++;
652
653	if (retifp != NULL)
654		*retifp = ifp;
655	if (retrt != NULL)
656		*retrt = rt;	/* rt may be NULL */
657
658	return (error);
659}
660
661static int
662in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
663    struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp)
664{
665	int error;
666	struct route_in6 sro;
667	struct rtentry *rt = NULL;
668
669	if (ro == NULL) {
670		bzero(&sro, sizeof(sro));
671		ro = &sro;
672	}
673
674	if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
675				     &rt, 0, 1)) != 0) {
676		if (ro == &sro && rt && rt == sro.ro_rt)
677			RTFREE(rt);
678		return (error);
679	}
680
681	/*
682	 * do not use a rejected or black hole route.
683	 * XXX: this check should be done in the L2 output routine.
684	 * However, if we skipped this check here, we'd see the following
685	 * scenario:
686	 * - install a rejected route for a scoped address prefix
687	 *   (like fe80::/10)
688	 * - send a packet to a destination that matches the scoped prefix,
689	 *   with ambiguity about the scope zone.
690	 * - pick the outgoing interface from the route, and disambiguate the
691	 *   scope zone with the interface.
692	 * - ip6_output() would try to get another route with the "new"
693	 *   destination, which may be valid.
694	 * - we'd see no error on output.
695	 * Although this may not be very harmful, it should still be confusing.
696	 * We thus reject the case here.
697	 */
698	if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
699		int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
700
701		if (ro == &sro && rt && rt == sro.ro_rt)
702			RTFREE(rt);
703		return (flags);
704	}
705
706	/*
707	 * Adjust the "outgoing" interface.  If we're going to loop the packet
708	 * back to ourselves, the ifp would be the loopback interface.
709	 * However, we'd rather know the interface associated to the
710	 * destination address (which should probably be one of our own
711	 * addresses.)
712	 */
713	if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
714		*retifp = rt->rt_ifa->ifa_ifp;
715
716	if (ro == &sro && rt && rt == sro.ro_rt)
717		RTFREE(rt);
718	return (0);
719}
720
721/*
722 * clone - meaningful only for bsdi and freebsd
723 */
724int
725in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
726    struct ip6_moptions *mopts, struct route_in6 *ro,
727    struct ifnet **retifp, struct rtentry **retrt, int clone)
728{
729
730	return (selectroute(dstsock, opts, mopts, ro, retifp,
731	    retrt, clone, 0));
732}
733
734/*
735 * Default hop limit selection. The precedence is as follows:
736 * 1. Hoplimit value specified via ioctl.
737 * 2. (If the outgoing interface is detected) the current
738 *     hop limit of the interface specified by router advertisement.
739 * 3. The system default hoplimit.
740 */
741int
742in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
743{
744	INIT_VNET_INET6(curvnet);
745
746	if (in6p && in6p->in6p_hops >= 0)
747		return (in6p->in6p_hops);
748	else if (ifp)
749		return (ND_IFINFO(ifp)->chlim);
750	else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
751		struct route_in6 ro6;
752		struct ifnet *lifp;
753
754		bzero(&ro6, sizeof(ro6));
755		ro6.ro_dst.sin6_family = AF_INET6;
756		ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
757		ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
758		rtalloc((struct route *)&ro6);
759		if (ro6.ro_rt) {
760			lifp = ro6.ro_rt->rt_ifp;
761			RTFREE(ro6.ro_rt);
762			if (lifp)
763				return (ND_IFINFO(lifp)->chlim);
764		} else
765			return (V_ip6_defhlim);
766	}
767	return (V_ip6_defhlim);
768}
769
770/*
771 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
772 * share this function by all *bsd*...
773 */
774int
775in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
776{
777	INIT_VNET_INET(curvnet);
778	struct socket *so = inp->inp_socket;
779	u_int16_t lport = 0, first, last, *lastport;
780	int count, error = 0, wild = 0, dorandom;
781	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
782
783	INP_INFO_WLOCK_ASSERT(pcbinfo);
784	INP_WLOCK_ASSERT(inp);
785
786	/* XXX: this is redundant when called from in6_pcbbind */
787	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
788		wild = INPLOOKUP_WILDCARD;
789
790	inp->inp_flags |= INP_ANONPORT;
791
792	if (inp->inp_flags & INP_HIGHPORT) {
793		first = V_ipport_hifirstauto;	/* sysctl */
794		last  = V_ipport_hilastauto;
795		lastport = &pcbinfo->ipi_lasthi;
796	} else if (inp->inp_flags & INP_LOWPORT) {
797		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
798		if (error)
799			return error;
800		first = V_ipport_lowfirstauto;	/* 1023 */
801		last  = V_ipport_lowlastauto;	/* 600 */
802		lastport = &pcbinfo->ipi_lastlow;
803	} else {
804		first = V_ipport_firstauto;	/* sysctl */
805		last  = V_ipport_lastauto;
806		lastport = &pcbinfo->ipi_lastport;
807	}
808
809	/*
810	 * For UDP, use random port allocation as long as the user
811	 * allows it.  For TCP (and as of yet unknown) connections,
812	 * use random port allocation only if the user allows it AND
813	 * ipport_tick() allows it.
814	 */
815	if (V_ipport_randomized &&
816	    (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
817		dorandom = 1;
818	else
819		dorandom = 0;
820	/*
821	 * It makes no sense to do random port allocation if
822	 * we have the only port available.
823	 */
824	if (first == last)
825		dorandom = 0;
826	/* Make sure to not include UDP packets in the count. */
827	if (pcbinfo != &V_udbinfo)
828		V_ipport_tcpallocs++;
829
830	/*
831	 * Instead of having two loops further down counting up or down
832	 * make sure that first is always <= last and go with only one
833	 * code path implementing all logic.
834	 */
835	if (first > last) {
836		u_int16_t aux;
837
838		aux = first;
839		first = last;
840		last = aux;
841	}
842
843	if (dorandom)
844		*lastport = first + (arc4random() % (last - first));
845
846	count = last - first;
847
848	do {
849		if (count-- < 0) {	/* completely used? */
850			/* Undo an address bind that may have occurred. */
851			inp->in6p_laddr = in6addr_any;
852			return (EADDRNOTAVAIL);
853		}
854		++*lastport;
855		if (*lastport < first || *lastport > last)
856			*lastport = first;
857		lport = htons(*lastport);
858	} while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr,
859	    lport, wild, cred));
860
861	inp->inp_lport = lport;
862	if (in_pcbinshash(inp) != 0) {
863		inp->in6p_laddr = in6addr_any;
864		inp->inp_lport = 0;
865		return (EAGAIN);
866	}
867
868	return (0);
869}
870
871void
872addrsel_policy_init(void)
873{
874	ADDRSEL_LOCK_INIT();
875	ADDRSEL_SXLOCK_INIT();
876	INIT_VNET_INET6(curvnet);
877
878	init_policy_queue();
879
880	/* initialize the "last resort" policy */
881	bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
882	V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
883}
884
885static struct in6_addrpolicy *
886lookup_addrsel_policy(struct sockaddr_in6 *key)
887{
888	INIT_VNET_INET6(curvnet);
889	struct in6_addrpolicy *match = NULL;
890
891	ADDRSEL_LOCK();
892	match = match_addrsel_policy(key);
893
894	if (match == NULL)
895		match = &V_defaultaddrpolicy;
896	else
897		match->use++;
898	ADDRSEL_UNLOCK();
899
900	return (match);
901}
902
903/*
904 * Subroutines to manage the address selection policy table via sysctl.
905 */
906struct walkarg {
907	struct sysctl_req *w_req;
908};
909
910static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
911SYSCTL_DECL(_net_inet6_ip6);
912SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
913	CTLFLAG_RD, in6_src_sysctl, "");
914
915static int
916in6_src_sysctl(SYSCTL_HANDLER_ARGS)
917{
918	struct walkarg w;
919
920	if (req->newptr)
921		return EPERM;
922
923	bzero(&w, sizeof(w));
924	w.w_req = req;
925
926	return (walk_addrsel_policy(dump_addrsel_policyent, &w));
927}
928
929int
930in6_src_ioctl(u_long cmd, caddr_t data)
931{
932	int i;
933	struct in6_addrpolicy ent0;
934
935	if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
936		return (EOPNOTSUPP); /* check for safety */
937
938	ent0 = *(struct in6_addrpolicy *)data;
939
940	if (ent0.label == ADDR_LABEL_NOTAPP)
941		return (EINVAL);
942	/* check if the prefix mask is consecutive. */
943	if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
944		return (EINVAL);
945	/* clear trailing garbages (if any) of the prefix address. */
946	for (i = 0; i < 4; i++) {
947		ent0.addr.sin6_addr.s6_addr32[i] &=
948			ent0.addrmask.sin6_addr.s6_addr32[i];
949	}
950	ent0.use = 0;
951
952	switch (cmd) {
953	case SIOCAADDRCTL_POLICY:
954		return (add_addrsel_policyent(&ent0));
955	case SIOCDADDRCTL_POLICY:
956		return (delete_addrsel_policyent(&ent0));
957	}
958
959	return (0);		/* XXX: compromise compilers */
960}
961
962/*
963 * The followings are implementation of the policy table using a
964 * simple tail queue.
965 * XXX such details should be hidden.
966 * XXX implementation using binary tree should be more efficient.
967 */
968struct addrsel_policyent {
969	TAILQ_ENTRY(addrsel_policyent) ape_entry;
970	struct in6_addrpolicy ape_policy;
971};
972
973TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
974
975struct addrsel_policyhead addrsel_policytab;
976
977static void
978init_policy_queue(void)
979{
980	INIT_VNET_INET6(curvnet);
981
982	TAILQ_INIT(&V_addrsel_policytab);
983}
984
985static int
986add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
987{
988	INIT_VNET_INET6(curvnet);
989	struct addrsel_policyent *new, *pol;
990
991	MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
992	       M_WAITOK);
993	ADDRSEL_XLOCK();
994	ADDRSEL_LOCK();
995
996	/* duplication check */
997	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
998		if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
999				       &pol->ape_policy.addr.sin6_addr) &&
1000		    IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1001				       &pol->ape_policy.addrmask.sin6_addr)) {
1002			ADDRSEL_UNLOCK();
1003			ADDRSEL_XUNLOCK();
1004			FREE(new, M_IFADDR);
1005			return (EEXIST);	/* or override it? */
1006		}
1007	}
1008
1009	bzero(new, sizeof(*new));
1010
1011	/* XXX: should validate entry */
1012	new->ape_policy = *newpolicy;
1013
1014	TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry);
1015	ADDRSEL_UNLOCK();
1016	ADDRSEL_XUNLOCK();
1017
1018	return (0);
1019}
1020
1021static int
1022delete_addrsel_policyent(struct in6_addrpolicy *key)
1023{
1024	INIT_VNET_INET6(curvnet);
1025	struct addrsel_policyent *pol;
1026
1027	ADDRSEL_XLOCK();
1028	ADDRSEL_LOCK();
1029
1030	/* search for the entry in the table */
1031	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1032		if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1033		    &pol->ape_policy.addr.sin6_addr) &&
1034		    IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1035		    &pol->ape_policy.addrmask.sin6_addr)) {
1036			break;
1037		}
1038	}
1039	if (pol == NULL) {
1040		ADDRSEL_UNLOCK();
1041		ADDRSEL_XUNLOCK();
1042		return (ESRCH);
1043	}
1044
1045	TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
1046	ADDRSEL_UNLOCK();
1047	ADDRSEL_XUNLOCK();
1048
1049	return (0);
1050}
1051
1052static int
1053walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
1054    void *w)
1055{
1056	INIT_VNET_INET6(curvnet);
1057	struct addrsel_policyent *pol;
1058	int error = 0;
1059
1060	ADDRSEL_SLOCK();
1061	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1062		if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1063			ADDRSEL_SUNLOCK();
1064			return (error);
1065		}
1066	}
1067	ADDRSEL_SUNLOCK();
1068	return (error);
1069}
1070
1071static int
1072dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
1073{
1074	int error = 0;
1075	struct walkarg *w = arg;
1076
1077	error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
1078
1079	return (error);
1080}
1081
1082static struct in6_addrpolicy *
1083match_addrsel_policy(struct sockaddr_in6 *key)
1084{
1085	INIT_VNET_INET6(curvnet);
1086	struct addrsel_policyent *pent;
1087	struct in6_addrpolicy *bestpol = NULL, *pol;
1088	int matchlen, bestmatchlen = -1;
1089	u_char *mp, *ep, *k, *p, m;
1090
1091	TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) {
1092		matchlen = 0;
1093
1094		pol = &pent->ape_policy;
1095		mp = (u_char *)&pol->addrmask.sin6_addr;
1096		ep = mp + 16;	/* XXX: scope field? */
1097		k = (u_char *)&key->sin6_addr;
1098		p = (u_char *)&pol->addr.sin6_addr;
1099		for (; mp < ep && *mp; mp++, k++, p++) {
1100			m = *mp;
1101			if ((*k & m) != *p)
1102				goto next; /* not match */
1103			if (m == 0xff) /* short cut for a typical case */
1104				matchlen += 8;
1105			else {
1106				while (m >= 0x80) {
1107					matchlen++;
1108					m <<= 1;
1109				}
1110			}
1111		}
1112
1113		/* matched.  check if this is better than the current best. */
1114		if (bestpol == NULL ||
1115		    matchlen > bestmatchlen) {
1116			bestpol = pol;
1117			bestmatchlen = matchlen;
1118		}
1119
1120	  next:
1121		continue;
1122	}
1123
1124	return (bestpol);
1125}
1126