Deleted Added
sdiff udiff text old ( 185348 ) new ( 185435 )
full compact
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/in6_src.c 185435 2008-11-29 14:32:14Z bz $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_mpath.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/lock.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/priv.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/sockio.h>
80#include <sys/sysctl.h>
81#include <sys/errno.h>
82#include <sys/time.h>
83#include <sys/jail.h>
84#include <sys/kernel.h>
85#include <sys/sx.h>
86#include <sys/vimage.h>
87
88#include <net/if.h>
89#include <net/route.h>
90#ifdef RADIX_MPATH
91#include <net/radix_mpath.h>
92#endif
93
94#include <netinet/in.h>
95#include <netinet/in_var.h>
96#include <netinet/in_systm.h>
97#include <netinet/ip.h>
98#include <netinet/in_pcb.h>
99#include <netinet/ip_var.h>
100#include <netinet/udp.h>
101#include <netinet/udp_var.h>
102#include <netinet6/in6_var.h>
103#include <netinet/ip6.h>
104#include <netinet6/in6_pcb.h>
105#include <netinet6/ip6_var.h>
106#include <netinet6/scope6_var.h>
107#include <netinet6/nd6.h>
108
109static struct mtx addrsel_lock;
110#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
111#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock)
112#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock)
113#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED)
114
115static struct sx addrsel_sxlock;
116#define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock")
117#define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock)
118#define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock)
119#define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock)
120#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock)
121
122#define ADDR_LABEL_NOTAPP (-1)
123
124#ifdef VIMAGE_GLOBALS
125struct in6_addrpolicy defaultaddrpolicy;
126int ip6_prefer_tempaddr;
127#endif
128
129static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
130 struct ip6_moptions *, struct route_in6 *, struct ifnet **,
131 struct rtentry **, int, int));
132static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
133 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **));
134
135static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
136
137static void init_policy_queue(void);
138static int add_addrsel_policyent(struct in6_addrpolicy *);
139static int delete_addrsel_policyent(struct in6_addrpolicy *);
140static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
141 void *));
142static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
143static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
144
145/*
146 * Return an IPv6 address, which is the most appropriate for a given
147 * destination and user specified options.
148 * If necessary, this function lookups the routing table and returns
149 * an entry to the caller for later use.
150 */
151#define REPLACE(r) do {\
152 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
153 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
154 V_ip6stat.ip6s_sources_rule[(r)]++; \
155 /* { \
156 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
157 printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
158 } */ \
159 goto replace; \
160} while(0)
161#define NEXT(r) do {\
162 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
163 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
164 V_ip6stat.ip6s_sources_rule[(r)]++; \
165 /* { \
166 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
167 printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
168 } */ \
169 goto next; /* XXX: we can't use 'continue' here */ \
170} while(0)
171#define BREAK(r) do { \
172 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
173 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
174 V_ip6stat.ip6s_sources_rule[(r)]++; \
175 goto out; /* XXX: we can't use 'break' here */ \
176} while(0)
177
178struct in6_addr *
179in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
180 struct inpcb *inp, struct route_in6 *ro, struct ucred *cred,
181 struct ifnet **ifpp, int *errorp)
182{
183 INIT_VNET_INET6(curvnet);
184 struct in6_addr dst;
185 struct ifnet *ifp = NULL;
186 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
187 struct in6_pktinfo *pi = NULL;
188 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
189 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
190 u_int32_t odstzone;
191 int prefer_tempaddr;
192 struct ip6_moptions *mopts;
193
194 dst = dstsock->sin6_addr; /* make a copy for local operation */
195 *errorp = 0;
196 if (ifpp)
197 *ifpp = NULL;
198
199 if (inp != NULL) {
200 INP_LOCK_ASSERT(inp);
201 mopts = inp->in6p_moptions;
202 } else {
203 mopts = NULL;
204 }
205
206 /*
207 * If the source address is explicitly specified by the caller,
208 * check if the requested source address is indeed a unicast address
209 * assigned to the node, and can be used as the packet's source
210 * address. If everything is okay, use the address as source.
211 */
212 if (opts && (pi = opts->ip6po_pktinfo) &&
213 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
214 struct sockaddr_in6 srcsock;
215 struct in6_ifaddr *ia6;
216
217 /* get the outgoing interface */
218 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp))
219 != 0) {
220 return (NULL);
221 }
222
223 /*
224 * determine the appropriate zone id of the source based on
225 * the zone of the destination and the outgoing interface.
226 * If the specified address is ambiguous wrt the scope zone,
227 * the interface must be specified; otherwise, ifa_ifwithaddr()
228 * will fail matching the address.
229 */
230 bzero(&srcsock, sizeof(srcsock));
231 srcsock.sin6_family = AF_INET6;
232 srcsock.sin6_len = sizeof(srcsock);
233 srcsock.sin6_addr = pi->ipi6_addr;
234 if (ifp) {
235 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
236 if (*errorp != 0)
237 return (NULL);
238 }
239 if (cred != NULL && prison_local_ip6(cred, &srcsock.sin6_addr,
240 (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
241 *errorp = EADDRNOTAVAIL;
242 return (NULL);
243 }
244
245 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
246 if (ia6 == NULL ||
247 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
248 *errorp = EADDRNOTAVAIL;
249 return (NULL);
250 }
251 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
252 if (ifpp)
253 *ifpp = ifp;
254 return (&ia6->ia_addr.sin6_addr);
255 }
256
257 /*
258 * Otherwise, if the socket has already bound the source, just use it.
259 */
260 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
261 if (cred != NULL && prison_local_ip6(cred, &inp->in6p_laddr,
262 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
263 *errorp = EADDRNOTAVAIL;
264 return (NULL);
265 }
266 return (&inp->in6p_laddr);
267 }
268
269 /*
270 * If the address is not specified, choose the best one based on
271 * the outgoing interface and the destination address.
272 */
273 /* get the outgoing interface */
274 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0)
275 return (NULL);
276
277#ifdef DIAGNOSTIC
278 if (ifp == NULL) /* this should not happen */
279 panic("in6_selectsrc: NULL ifp");
280#endif
281 *errorp = in6_setscope(&dst, ifp, &odstzone);
282 if (*errorp != 0)
283 return (NULL);
284
285 for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
286 int new_scope = -1, new_matchlen = -1;
287 struct in6_addrpolicy *new_policy = NULL;
288 u_int32_t srczone, osrczone, dstzone;
289 struct in6_addr src;
290 struct ifnet *ifp1 = ia->ia_ifp;
291
292 /*
293 * We'll never take an address that breaks the scope zone
294 * of the destination. We also skip an address if its zone
295 * does not contain the outgoing interface.
296 * XXX: we should probably use sin6_scope_id here.
297 */
298 if (in6_setscope(&dst, ifp1, &dstzone) ||
299 odstzone != dstzone) {
300 continue;
301 }
302 src = ia->ia_addr.sin6_addr;
303 if (in6_setscope(&src, ifp, &osrczone) ||
304 in6_setscope(&src, ifp1, &srczone) ||
305 osrczone != srczone) {
306 continue;
307 }
308
309 /* avoid unusable addresses */
310 if ((ia->ia6_flags &
311 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
312 continue;
313 }
314 if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
315 continue;
316
317 if (cred != NULL &&
318 prison_local_ip6(cred, &ia->ia_addr.sin6_addr,
319 (inp != NULL &&
320 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
321 continue;
322
323 /* Rule 1: Prefer same address */
324 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
325 ia_best = ia;
326 BREAK(1); /* there should be no better candidate */
327 }
328
329 if (ia_best == NULL)
330 REPLACE(0);
331
332 /* Rule 2: Prefer appropriate scope */
333 if (dst_scope < 0)
334 dst_scope = in6_addrscope(&dst);
335 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
336 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
337 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
338 REPLACE(2);
339 NEXT(2);
340 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
341 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
342 NEXT(2);
343 REPLACE(2);
344 }
345
346 /*
347 * Rule 3: Avoid deprecated addresses. Note that the case of
348 * !ip6_use_deprecated is already rejected above.
349 */
350 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
351 NEXT(3);
352 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
353 REPLACE(3);
354
355 /* Rule 4: Prefer home addresses */
356 /*
357 * XXX: This is a TODO. We should probably merge the MIP6
358 * case above.
359 */
360
361 /* Rule 5: Prefer outgoing interface */
362 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
363 NEXT(5);
364 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
365 REPLACE(5);
366
367 /*
368 * Rule 6: Prefer matching label
369 * Note that best_policy should be non-NULL here.
370 */
371 if (dst_policy == NULL)
372 dst_policy = lookup_addrsel_policy(dstsock);
373 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
374 new_policy = lookup_addrsel_policy(&ia->ia_addr);
375 if (dst_policy->label == best_policy->label &&
376 dst_policy->label != new_policy->label)
377 NEXT(6);
378 if (dst_policy->label != best_policy->label &&
379 dst_policy->label == new_policy->label)
380 REPLACE(6);
381 }
382
383 /*
384 * Rule 7: Prefer public addresses.
385 * We allow users to reverse the logic by configuring
386 * a sysctl variable, so that privacy conscious users can
387 * always prefer temporary addresses.
388 */
389 if (opts == NULL ||
390 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
391 prefer_tempaddr = V_ip6_prefer_tempaddr;
392 } else if (opts->ip6po_prefer_tempaddr ==
393 IP6PO_TEMPADDR_NOTPREFER) {
394 prefer_tempaddr = 0;
395 } else
396 prefer_tempaddr = 1;
397 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
398 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
399 if (prefer_tempaddr)
400 REPLACE(7);
401 else
402 NEXT(7);
403 }
404 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
405 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
406 if (prefer_tempaddr)
407 NEXT(7);
408 else
409 REPLACE(7);
410 }
411
412 /*
413 * Rule 8: prefer addresses on alive interfaces.
414 * This is a KAME specific rule.
415 */
416 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
417 !(ia->ia_ifp->if_flags & IFF_UP))
418 NEXT(8);
419 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
420 (ia->ia_ifp->if_flags & IFF_UP))
421 REPLACE(8);
422
423 /*
424 * Rule 14: Use longest matching prefix.
425 * Note: in the address selection draft, this rule is
426 * documented as "Rule 8". However, since it is also
427 * documented that this rule can be overridden, we assign
428 * a large number so that it is easy to assign smaller numbers
429 * to more preferred rules.
430 */
431 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
432 if (best_matchlen < new_matchlen)
433 REPLACE(14);
434 if (new_matchlen < best_matchlen)
435 NEXT(14);
436
437 /* Rule 15 is reserved. */
438
439 /*
440 * Last resort: just keep the current candidate.
441 * Or, do we need more rules?
442 */
443 continue;
444
445 replace:
446 ia_best = ia;
447 best_scope = (new_scope >= 0 ? new_scope :
448 in6_addrscope(&ia_best->ia_addr.sin6_addr));
449 best_policy = (new_policy ? new_policy :
450 lookup_addrsel_policy(&ia_best->ia_addr));
451 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
452 in6_matchlen(&ia_best->ia_addr.sin6_addr,
453 &dst));
454
455 next:
456 continue;
457
458 out:
459 break;
460 }
461
462 if ((ia = ia_best) == NULL) {
463 *errorp = EADDRNOTAVAIL;
464 return (NULL);
465 }
466
467 if (ifpp)
468 *ifpp = ifp;
469
470 return (&ia->ia_addr.sin6_addr);
471}
472
473/*
474 * clone - meaningful only for bsdi and freebsd
475 */
476static int
477selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
478 struct ip6_moptions *mopts, struct route_in6 *ro,
479 struct ifnet **retifp, struct rtentry **retrt, int clone,
480 int norouteok)
481{
482 INIT_VNET_INET6(curvnet);
483 int error = 0;
484 struct ifnet *ifp = NULL;
485 struct rtentry *rt = NULL;
486 struct sockaddr_in6 *sin6_next;
487 struct in6_pktinfo *pi = NULL;
488 struct in6_addr *dst = &dstsock->sin6_addr;
489#if 0
490 char ip6buf[INET6_ADDRSTRLEN];
491
492 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
493 dstsock->sin6_addr.s6_addr32[1] == 0 &&
494 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
495 printf("in6_selectroute: strange destination %s\n",
496 ip6_sprintf(ip6buf, &dstsock->sin6_addr));
497 } else {
498 printf("in6_selectroute: destination = %s%%%d\n",
499 ip6_sprintf(ip6buf, &dstsock->sin6_addr),
500 dstsock->sin6_scope_id); /* for debug */
501 }
502#endif
503
504 /* If the caller specify the outgoing interface explicitly, use it. */
505 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
506 /* XXX boundary check is assumed to be already done. */
507 ifp = ifnet_byindex(pi->ipi6_ifindex);
508 if (ifp != NULL &&
509 (norouteok || retrt == NULL ||
510 IN6_IS_ADDR_MULTICAST(dst))) {
511 /*
512 * we do not have to check or get the route for
513 * multicast.
514 */
515 goto done;
516 } else
517 goto getroute;
518 }
519
520 /*
521 * If the destination address is a multicast address and the outgoing
522 * interface for the address is specified by the caller, use it.
523 */
524 if (IN6_IS_ADDR_MULTICAST(dst) &&
525 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
526 goto done; /* we do not need a route for multicast. */
527 }
528
529 getroute:
530 /*
531 * If the next hop address for the packet is specified by the caller,
532 * use it as the gateway.
533 */
534 if (opts && opts->ip6po_nexthop) {
535 struct route_in6 *ron;
536
537 sin6_next = satosin6(opts->ip6po_nexthop);
538
539 /* at this moment, we only support AF_INET6 next hops */
540 if (sin6_next->sin6_family != AF_INET6) {
541 error = EAFNOSUPPORT; /* or should we proceed? */
542 goto done;
543 }
544
545 /*
546 * If the next hop is an IPv6 address, then the node identified
547 * by that address must be a neighbor of the sending host.
548 */
549 ron = &opts->ip6po_nextroute;
550 if ((ron->ro_rt &&
551 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
552 (RTF_UP | RTF_LLINFO)) ||
553 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
554 &sin6_next->sin6_addr)) {
555 if (ron->ro_rt) {
556 RTFREE(ron->ro_rt);
557 ron->ro_rt = NULL;
558 }
559 *satosin6(&ron->ro_dst) = *sin6_next;
560 }
561 if (ron->ro_rt == NULL) {
562 rtalloc((struct route *)ron); /* multi path case? */
563 if (ron->ro_rt == NULL ||
564 !(ron->ro_rt->rt_flags & RTF_LLINFO)) {
565 if (ron->ro_rt) {
566 RTFREE(ron->ro_rt);
567 ron->ro_rt = NULL;
568 }
569 error = EHOSTUNREACH;
570 goto done;
571 }
572 }
573 rt = ron->ro_rt;
574 ifp = rt->rt_ifp;
575
576 /*
577 * When cloning is required, try to allocate a route to the
578 * destination so that the caller can store path MTU
579 * information.
580 */
581 if (!clone)
582 goto done;
583 }
584
585 /*
586 * Use a cached route if it exists and is valid, else try to allocate
587 * a new one. Note that we should check the address family of the
588 * cached destination, in case of sharing the cache with IPv4.
589 */
590 if (ro) {
591 if (ro->ro_rt &&
592 (!(ro->ro_rt->rt_flags & RTF_UP) ||
593 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
594 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
595 dst))) {
596 RTFREE(ro->ro_rt);
597 ro->ro_rt = (struct rtentry *)NULL;
598 }
599 if (ro->ro_rt == (struct rtentry *)NULL) {
600 struct sockaddr_in6 *sa6;
601
602 /* No route yet, so try to acquire one */
603 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
604 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
605 *sa6 = *dstsock;
606 sa6->sin6_scope_id = 0;
607
608 if (clone) {
609#ifdef RADIX_MPATH
610 rtalloc_mpath((struct route *)ro,
611 ntohl(sa6->sin6_addr.s6_addr32[3]));
612#else
613 rtalloc((struct route *)ro);
614#endif
615 } else {
616 ro->ro_rt = rtalloc1(&((struct route *)ro)
617 ->ro_dst, 0, 0UL);
618 if (ro->ro_rt)
619 RT_UNLOCK(ro->ro_rt);
620 }
621 }
622
623 /*
624 * do not care about the result if we have the nexthop
625 * explicitly specified.
626 */
627 if (opts && opts->ip6po_nexthop)
628 goto done;
629
630 if (ro->ro_rt) {
631 ifp = ro->ro_rt->rt_ifp;
632
633 if (ifp == NULL) { /* can this really happen? */
634 RTFREE(ro->ro_rt);
635 ro->ro_rt = NULL;
636 }
637 }
638 if (ro->ro_rt == NULL)
639 error = EHOSTUNREACH;
640 rt = ro->ro_rt;
641
642 /*
643 * Check if the outgoing interface conflicts with
644 * the interface specified by ipi6_ifindex (if specified).
645 * Note that loopback interface is always okay.
646 * (this may happen when we are sending a packet to one of
647 * our own addresses.)
648 */
649 if (ifp && opts && opts->ip6po_pktinfo &&
650 opts->ip6po_pktinfo->ipi6_ifindex) {
651 if (!(ifp->if_flags & IFF_LOOPBACK) &&
652 ifp->if_index !=
653 opts->ip6po_pktinfo->ipi6_ifindex) {
654 error = EHOSTUNREACH;
655 goto done;
656 }
657 }
658 }
659
660 done:
661 if (ifp == NULL && rt == NULL) {
662 /*
663 * This can happen if the caller did not pass a cached route
664 * nor any other hints. We treat this case an error.
665 */
666 error = EHOSTUNREACH;
667 }
668 if (error == EHOSTUNREACH)
669 V_ip6stat.ip6s_noroute++;
670
671 if (retifp != NULL)
672 *retifp = ifp;
673 if (retrt != NULL)
674 *retrt = rt; /* rt may be NULL */
675
676 return (error);
677}
678
679static int
680in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
681 struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp)
682{
683 int error;
684 struct route_in6 sro;
685 struct rtentry *rt = NULL;
686
687 if (ro == NULL) {
688 bzero(&sro, sizeof(sro));
689 ro = &sro;
690 }
691
692 if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
693 &rt, 0, 1)) != 0) {
694 if (ro == &sro && rt && rt == sro.ro_rt)
695 RTFREE(rt);
696 return (error);
697 }
698
699 /*
700 * do not use a rejected or black hole route.
701 * XXX: this check should be done in the L2 output routine.
702 * However, if we skipped this check here, we'd see the following
703 * scenario:
704 * - install a rejected route for a scoped address prefix
705 * (like fe80::/10)
706 * - send a packet to a destination that matches the scoped prefix,
707 * with ambiguity about the scope zone.
708 * - pick the outgoing interface from the route, and disambiguate the
709 * scope zone with the interface.
710 * - ip6_output() would try to get another route with the "new"
711 * destination, which may be valid.
712 * - we'd see no error on output.
713 * Although this may not be very harmful, it should still be confusing.
714 * We thus reject the case here.
715 */
716 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
717 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
718
719 if (ro == &sro && rt && rt == sro.ro_rt)
720 RTFREE(rt);
721 return (flags);
722 }
723
724 /*
725 * Adjust the "outgoing" interface. If we're going to loop the packet
726 * back to ourselves, the ifp would be the loopback interface.
727 * However, we'd rather know the interface associated to the
728 * destination address (which should probably be one of our own
729 * addresses.)
730 */
731 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
732 *retifp = rt->rt_ifa->ifa_ifp;
733
734 if (ro == &sro && rt && rt == sro.ro_rt)
735 RTFREE(rt);
736 return (0);
737}
738
739/*
740 * clone - meaningful only for bsdi and freebsd
741 */
742int
743in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
744 struct ip6_moptions *mopts, struct route_in6 *ro,
745 struct ifnet **retifp, struct rtentry **retrt, int clone)
746{
747
748 return (selectroute(dstsock, opts, mopts, ro, retifp,
749 retrt, clone, 0));
750}
751
752/*
753 * Default hop limit selection. The precedence is as follows:
754 * 1. Hoplimit value specified via ioctl.
755 * 2. (If the outgoing interface is detected) the current
756 * hop limit of the interface specified by router advertisement.
757 * 3. The system default hoplimit.
758 */
759int
760in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
761{
762 INIT_VNET_INET6(curvnet);
763
764 if (in6p && in6p->in6p_hops >= 0)
765 return (in6p->in6p_hops);
766 else if (ifp)
767 return (ND_IFINFO(ifp)->chlim);
768 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
769 struct route_in6 ro6;
770 struct ifnet *lifp;
771
772 bzero(&ro6, sizeof(ro6));
773 ro6.ro_dst.sin6_family = AF_INET6;
774 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
775 ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
776 rtalloc((struct route *)&ro6);
777 if (ro6.ro_rt) {
778 lifp = ro6.ro_rt->rt_ifp;
779 RTFREE(ro6.ro_rt);
780 if (lifp)
781 return (ND_IFINFO(lifp)->chlim);
782 } else
783 return (V_ip6_defhlim);
784 }
785 return (V_ip6_defhlim);
786}
787
788/*
789 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
790 * share this function by all *bsd*...
791 */
792int
793in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
794{
795 INIT_VNET_INET(curvnet);
796 struct socket *so = inp->inp_socket;
797 u_int16_t lport = 0, first, last, *lastport;
798 int count, error = 0, wild = 0, dorandom;
799 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
800
801 INP_INFO_WLOCK_ASSERT(pcbinfo);
802 INP_WLOCK_ASSERT(inp);
803
804 if (prison_local_ip6(cred, laddr,
805 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
806 return(EINVAL);
807
808 /* XXX: this is redundant when called from in6_pcbbind */
809 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
810 wild = INPLOOKUP_WILDCARD;
811
812 inp->inp_flags |= INP_ANONPORT;
813
814 if (inp->inp_flags & INP_HIGHPORT) {
815 first = V_ipport_hifirstauto; /* sysctl */
816 last = V_ipport_hilastauto;
817 lastport = &pcbinfo->ipi_lasthi;
818 } else if (inp->inp_flags & INP_LOWPORT) {
819 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
820 if (error)
821 return error;
822 first = V_ipport_lowfirstauto; /* 1023 */
823 last = V_ipport_lowlastauto; /* 600 */
824 lastport = &pcbinfo->ipi_lastlow;
825 } else {
826 first = V_ipport_firstauto; /* sysctl */
827 last = V_ipport_lastauto;
828 lastport = &pcbinfo->ipi_lastport;
829 }
830
831 /*
832 * For UDP, use random port allocation as long as the user
833 * allows it. For TCP (and as of yet unknown) connections,
834 * use random port allocation only if the user allows it AND
835 * ipport_tick() allows it.
836 */
837 if (V_ipport_randomized &&
838 (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
839 dorandom = 1;
840 else
841 dorandom = 0;
842 /*
843 * It makes no sense to do random port allocation if
844 * we have the only port available.
845 */
846 if (first == last)
847 dorandom = 0;
848 /* Make sure to not include UDP packets in the count. */
849 if (pcbinfo != &V_udbinfo)
850 V_ipport_tcpallocs++;
851
852 /*
853 * Instead of having two loops further down counting up or down
854 * make sure that first is always <= last and go with only one
855 * code path implementing all logic.
856 */
857 if (first > last) {
858 u_int16_t aux;
859
860 aux = first;
861 first = last;
862 last = aux;
863 }
864
865 if (dorandom)
866 *lastport = first + (arc4random() % (last - first));
867
868 count = last - first;
869
870 do {
871 if (count-- < 0) { /* completely used? */
872 /* Undo an address bind that may have occurred. */
873 inp->in6p_laddr = in6addr_any;
874 return (EADDRNOTAVAIL);
875 }
876 ++*lastport;
877 if (*lastport < first || *lastport > last)
878 *lastport = first;
879 lport = htons(*lastport);
880 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr,
881 lport, wild, cred));
882
883 inp->inp_lport = lport;
884 if (in_pcbinshash(inp) != 0) {
885 inp->in6p_laddr = in6addr_any;
886 inp->inp_lport = 0;
887 return (EAGAIN);
888 }
889
890 return (0);
891}
892
893void
894addrsel_policy_init(void)
895{
896 ADDRSEL_LOCK_INIT();
897 ADDRSEL_SXLOCK_INIT();
898 INIT_VNET_INET6(curvnet);
899
900 V_ip6_prefer_tempaddr = 0;
901
902 init_policy_queue();
903
904 /* initialize the "last resort" policy */
905 bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
906 V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
907}
908
909static struct in6_addrpolicy *
910lookup_addrsel_policy(struct sockaddr_in6 *key)
911{
912 INIT_VNET_INET6(curvnet);
913 struct in6_addrpolicy *match = NULL;
914
915 ADDRSEL_LOCK();
916 match = match_addrsel_policy(key);
917
918 if (match == NULL)
919 match = &V_defaultaddrpolicy;
920 else
921 match->use++;
922 ADDRSEL_UNLOCK();
923
924 return (match);
925}
926
927/*
928 * Subroutines to manage the address selection policy table via sysctl.
929 */
930struct walkarg {
931 struct sysctl_req *w_req;
932};
933
934static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
935SYSCTL_DECL(_net_inet6_ip6);
936SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
937 CTLFLAG_RD, in6_src_sysctl, "");
938
939static int
940in6_src_sysctl(SYSCTL_HANDLER_ARGS)
941{
942 struct walkarg w;
943
944 if (req->newptr)
945 return EPERM;
946
947 bzero(&w, sizeof(w));
948 w.w_req = req;
949
950 return (walk_addrsel_policy(dump_addrsel_policyent, &w));
951}
952
953int
954in6_src_ioctl(u_long cmd, caddr_t data)
955{
956 int i;
957 struct in6_addrpolicy ent0;
958
959 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
960 return (EOPNOTSUPP); /* check for safety */
961
962 ent0 = *(struct in6_addrpolicy *)data;
963
964 if (ent0.label == ADDR_LABEL_NOTAPP)
965 return (EINVAL);
966 /* check if the prefix mask is consecutive. */
967 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
968 return (EINVAL);
969 /* clear trailing garbages (if any) of the prefix address. */
970 for (i = 0; i < 4; i++) {
971 ent0.addr.sin6_addr.s6_addr32[i] &=
972 ent0.addrmask.sin6_addr.s6_addr32[i];
973 }
974 ent0.use = 0;
975
976 switch (cmd) {
977 case SIOCAADDRCTL_POLICY:
978 return (add_addrsel_policyent(&ent0));
979 case SIOCDADDRCTL_POLICY:
980 return (delete_addrsel_policyent(&ent0));
981 }
982
983 return (0); /* XXX: compromise compilers */
984}
985
986/*
987 * The followings are implementation of the policy table using a
988 * simple tail queue.
989 * XXX such details should be hidden.
990 * XXX implementation using binary tree should be more efficient.
991 */
992struct addrsel_policyent {
993 TAILQ_ENTRY(addrsel_policyent) ape_entry;
994 struct in6_addrpolicy ape_policy;
995};
996
997TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
998
999#ifdef VIMAGE_GLOBALS
1000struct addrsel_policyhead addrsel_policytab;
1001#endif
1002
1003static void
1004init_policy_queue(void)
1005{
1006 INIT_VNET_INET6(curvnet);
1007
1008 TAILQ_INIT(&V_addrsel_policytab);
1009}
1010
1011static int
1012add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
1013{
1014 INIT_VNET_INET6(curvnet);
1015 struct addrsel_policyent *new, *pol;
1016
1017 new = malloc(sizeof(*new), M_IFADDR,
1018 M_WAITOK);
1019 ADDRSEL_XLOCK();
1020 ADDRSEL_LOCK();
1021
1022 /* duplication check */
1023 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1024 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1025 &pol->ape_policy.addr.sin6_addr) &&
1026 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1027 &pol->ape_policy.addrmask.sin6_addr)) {
1028 ADDRSEL_UNLOCK();
1029 ADDRSEL_XUNLOCK();
1030 free(new, M_IFADDR);
1031 return (EEXIST); /* or override it? */
1032 }
1033 }
1034
1035 bzero(new, sizeof(*new));
1036
1037 /* XXX: should validate entry */
1038 new->ape_policy = *newpolicy;
1039
1040 TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry);
1041 ADDRSEL_UNLOCK();
1042 ADDRSEL_XUNLOCK();
1043
1044 return (0);
1045}
1046
1047static int
1048delete_addrsel_policyent(struct in6_addrpolicy *key)
1049{
1050 INIT_VNET_INET6(curvnet);
1051 struct addrsel_policyent *pol;
1052
1053 ADDRSEL_XLOCK();
1054 ADDRSEL_LOCK();
1055
1056 /* search for the entry in the table */
1057 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1058 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1059 &pol->ape_policy.addr.sin6_addr) &&
1060 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1061 &pol->ape_policy.addrmask.sin6_addr)) {
1062 break;
1063 }
1064 }
1065 if (pol == NULL) {
1066 ADDRSEL_UNLOCK();
1067 ADDRSEL_XUNLOCK();
1068 return (ESRCH);
1069 }
1070
1071 TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
1072 ADDRSEL_UNLOCK();
1073 ADDRSEL_XUNLOCK();
1074
1075 return (0);
1076}
1077
1078static int
1079walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
1080 void *w)
1081{
1082 INIT_VNET_INET6(curvnet);
1083 struct addrsel_policyent *pol;
1084 int error = 0;
1085
1086 ADDRSEL_SLOCK();
1087 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
1088 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1089 ADDRSEL_SUNLOCK();
1090 return (error);
1091 }
1092 }
1093 ADDRSEL_SUNLOCK();
1094 return (error);
1095}
1096
1097static int
1098dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
1099{
1100 int error = 0;
1101 struct walkarg *w = arg;
1102
1103 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
1104
1105 return (error);
1106}
1107
1108static struct in6_addrpolicy *
1109match_addrsel_policy(struct sockaddr_in6 *key)
1110{
1111 INIT_VNET_INET6(curvnet);
1112 struct addrsel_policyent *pent;
1113 struct in6_addrpolicy *bestpol = NULL, *pol;
1114 int matchlen, bestmatchlen = -1;
1115 u_char *mp, *ep, *k, *p, m;
1116
1117 TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) {
1118 matchlen = 0;
1119
1120 pol = &pent->ape_policy;
1121 mp = (u_char *)&pol->addrmask.sin6_addr;
1122 ep = mp + 16; /* XXX: scope field? */
1123 k = (u_char *)&key->sin6_addr;
1124 p = (u_char *)&pol->addr.sin6_addr;
1125 for (; mp < ep && *mp; mp++, k++, p++) {
1126 m = *mp;
1127 if ((*k & m) != *p)
1128 goto next; /* not match */
1129 if (m == 0xff) /* short cut for a typical case */
1130 matchlen += 8;
1131 else {
1132 while (m >= 0x80) {
1133 matchlen++;
1134 m <<= 1;
1135 }
1136 }
1137 }
1138
1139 /* matched. check if this is better than the current best. */
1140 if (bestpol == NULL ||
1141 matchlen > bestmatchlen) {
1142 bestpol = pol;
1143 bestmatchlen = matchlen;
1144 }
1145
1146 next:
1147 continue;
1148 }
1149
1150 return (bestpol);
1151}