in6_pcb.c revision 222748
1/*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * Portions of this software were developed by Robert N. M. Watson under 7 * contract to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the project nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ 34 */ 35 36/*- 37 * Copyright (c) 1982, 1986, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 65 */ 66 67#include <sys/cdefs.h> 68__FBSDID("$FreeBSD: head/sys/netinet6/in6_pcb.c 222748 2011-06-06 12:55:02Z rwatson $"); 69 70#include "opt_inet.h" 71#include "opt_inet6.h" 72#include "opt_ipsec.h" 73#include "opt_pcbgroup.h" 74 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/malloc.h> 78#include <sys/mbuf.h> 79#include <sys/domain.h> 80#include <sys/protosw.h> 81#include <sys/socket.h> 82#include <sys/socketvar.h> 83#include <sys/sockio.h> 84#include <sys/errno.h> 85#include <sys/time.h> 86#include <sys/priv.h> 87#include <sys/proc.h> 88#include <sys/jail.h> 89 90#include <vm/uma.h> 91 92#include <net/if.h> 93#include <net/if_types.h> 94#include <net/route.h> 95 96#include <netinet/in.h> 97#include <netinet/in_var.h> 98#include <netinet/in_systm.h> 99#include <netinet/tcp_var.h> 100#include <netinet/ip6.h> 101#include <netinet/ip_var.h> 102 103#include <netinet6/ip6_var.h> 104#include <netinet6/nd6.h> 105#include <netinet/in_pcb.h> 106#include <netinet6/in6_pcb.h> 107#include <netinet6/scope6_var.h> 108 109struct in6_addr zeroin6_addr; 110 111int 112in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, 113 struct ucred *cred) 114{ 115 struct socket *so = inp->inp_socket; 116 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; 117 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 118 u_short lport = 0; 119 int error, lookupflags = 0; 120 int reuseport = (so->so_options & SO_REUSEPORT); 121 122 INP_WLOCK_ASSERT(inp); 123 INP_HASH_WLOCK_ASSERT(pcbinfo); 124 125 if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ 126 return (EADDRNOTAVAIL); 127 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 128 return (EINVAL); 129 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 130 lookupflags = INPLOOKUP_WILDCARD; 131 if (nam == NULL) { 132 if ((error = prison_local_ip6(cred, &inp->in6p_laddr, 133 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 134 return (error); 135 } else { 136 sin6 = (struct sockaddr_in6 *)nam; 137 if (nam->sa_len != sizeof(*sin6)) 138 return (EINVAL); 139 /* 140 * family check. 141 */ 142 if (nam->sa_family != AF_INET6) 143 return (EAFNOSUPPORT); 144 145 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) 146 return(error); 147 148 if ((error = prison_local_ip6(cred, &sin6->sin6_addr, 149 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 150 return (error); 151 152 lport = sin6->sin6_port; 153 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 154 /* 155 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 156 * allow compepte duplication of binding if 157 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 158 * and a multicast address is bound on both 159 * new and duplicated sockets. 160 */ 161 if (so->so_options & SO_REUSEADDR) 162 reuseport = SO_REUSEADDR|SO_REUSEPORT; 163 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 164 struct ifaddr *ifa; 165 166 sin6->sin6_port = 0; /* yech... */ 167 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == 168 NULL && 169 (inp->inp_flags & INP_BINDANY) == 0) { 170 return (EADDRNOTAVAIL); 171 } 172 173 /* 174 * XXX: bind to an anycast address might accidentally 175 * cause sending a packet with anycast source address. 176 * We should allow to bind to a deprecated address, since 177 * the application dares to use it. 178 */ 179 if (ifa != NULL && 180 ((struct in6_ifaddr *)ifa)->ia6_flags & 181 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { 182 ifa_free(ifa); 183 return (EADDRNOTAVAIL); 184 } 185 if (ifa != NULL) 186 ifa_free(ifa); 187 } 188 if (lport) { 189 struct inpcb *t; 190 191 /* GROSS */ 192 if (ntohs(lport) <= V_ipport_reservedhigh && 193 ntohs(lport) >= V_ipport_reservedlow && 194 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 195 0)) 196 return (EACCES); 197 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && 198 priv_check_cred(inp->inp_cred, 199 PRIV_NETINET_REUSEPORT, 0) != 0) { 200 t = in6_pcblookup_local(pcbinfo, 201 &sin6->sin6_addr, lport, 202 INPLOOKUP_WILDCARD, cred); 203 if (t && 204 ((t->inp_flags & INP_TIMEWAIT) == 0) && 205 (so->so_type != SOCK_STREAM || 206 IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && 207 (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 208 !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || 209 (t->inp_socket->so_options & SO_REUSEPORT) 210 == 0) && (inp->inp_cred->cr_uid != 211 t->inp_cred->cr_uid)) 212 return (EADDRINUSE); 213#ifdef INET 214 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && 215 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 216 struct sockaddr_in sin; 217 218 in6_sin6_2_sin(&sin, sin6); 219 t = in_pcblookup_local(pcbinfo, 220 sin.sin_addr, lport, 221 INPLOOKUP_WILDCARD, cred); 222 if (t && 223 ((t->inp_flags & 224 INP_TIMEWAIT) == 0) && 225 (so->so_type != SOCK_STREAM || 226 ntohl(t->inp_faddr.s_addr) == 227 INADDR_ANY) && 228 (inp->inp_cred->cr_uid != 229 t->inp_cred->cr_uid)) 230 return (EADDRINUSE); 231 } 232#endif 233 } 234 t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, 235 lport, lookupflags, cred); 236 if (t && (reuseport & ((t->inp_flags & INP_TIMEWAIT) ? 237 intotw(t)->tw_so_options : 238 t->inp_socket->so_options)) == 0) 239 return (EADDRINUSE); 240#ifdef INET 241 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && 242 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 243 struct sockaddr_in sin; 244 245 in6_sin6_2_sin(&sin, sin6); 246 t = in_pcblookup_local(pcbinfo, sin.sin_addr, 247 lport, lookupflags, cred); 248 if (t && t->inp_flags & INP_TIMEWAIT) { 249 if ((reuseport & 250 intotw(t)->tw_so_options) == 0 && 251 (ntohl(t->inp_laddr.s_addr) != 252 INADDR_ANY || ((inp->inp_vflag & 253 INP_IPV6PROTO) == 254 (t->inp_vflag & INP_IPV6PROTO)))) 255 return (EADDRINUSE); 256 } 257 else if (t && 258 (reuseport & t->inp_socket->so_options) 259 == 0 && (ntohl(t->inp_laddr.s_addr) != 260 INADDR_ANY || INP_SOCKAF(so) == 261 INP_SOCKAF(t->inp_socket))) 262 return (EADDRINUSE); 263 } 264#endif 265 } 266 inp->in6p_laddr = sin6->sin6_addr; 267 } 268 if (lport == 0) { 269 if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { 270 /* Undo an address bind that may have occurred. */ 271 inp->in6p_laddr = in6addr_any; 272 return (error); 273 } 274 } else { 275 inp->inp_lport = lport; 276 if (in_pcbinshash(inp) != 0) { 277 inp->in6p_laddr = in6addr_any; 278 inp->inp_lport = 0; 279 return (EAGAIN); 280 } 281 } 282 return (0); 283} 284 285/* 286 * Transform old in6_pcbconnect() into an inner subroutine for new 287 * in6_pcbconnect(): Do some validity-checking on the remote 288 * address (in mbuf 'nam') and then determine local host address 289 * (i.e., which interface) to use to access that remote host. 290 * 291 * This preserves definition of in6_pcbconnect(), while supporting a 292 * slightly different version for T/TCP. (This is more than 293 * a bit of a kludge, but cleaning up the internal interfaces would 294 * have forced minor changes in every protocol). 295 */ 296int 297in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, 298 struct in6_addr *plocal_addr6) 299{ 300 register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 301 int error = 0; 302 struct ifnet *ifp = NULL; 303 int scope_ambiguous = 0; 304 struct in6_addr in6a; 305 306 INP_WLOCK_ASSERT(inp); 307 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ 308 309 if (nam->sa_len != sizeof (*sin6)) 310 return (EINVAL); 311 if (sin6->sin6_family != AF_INET6) 312 return (EAFNOSUPPORT); 313 if (sin6->sin6_port == 0) 314 return (EADDRNOTAVAIL); 315 316 if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) 317 scope_ambiguous = 1; 318 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) 319 return(error); 320 321 if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { 322 /* 323 * If the destination address is UNSPECIFIED addr, 324 * use the loopback addr, e.g ::1. 325 */ 326 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 327 sin6->sin6_addr = in6addr_loopback; 328 } 329 if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) 330 return (error); 331 332 error = in6_selectsrc(sin6, inp->in6p_outputopts, 333 inp, NULL, inp->inp_cred, &ifp, &in6a); 334 if (error) 335 return (error); 336 337 if (ifp && scope_ambiguous && 338 (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { 339 return(error); 340 } 341 342 /* 343 * Do not update this earlier, in case we return with an error. 344 * 345 * XXX: this in6_selectsrc result might replace the bound local 346 * address with the address specified by setsockopt(IPV6_PKTINFO). 347 * Is it the intended behavior? 348 */ 349 *plocal_addr6 = in6a; 350 351 /* 352 * Don't do pcblookup call here; return interface in 353 * plocal_addr6 354 * and exit to caller, that will do the lookup. 355 */ 356 357 return (0); 358} 359 360/* 361 * Outer subroutine: 362 * Connect from a socket to a specified address. 363 * Both address and port must be specified in argument sin. 364 * If don't have a local address for this socket yet, 365 * then pick one. 366 */ 367int 368in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, 369 struct ucred *cred, struct mbuf *m) 370{ 371 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 372 register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 373 struct in6_addr addr6; 374 int error; 375 376 INP_WLOCK_ASSERT(inp); 377 INP_HASH_WLOCK_ASSERT(pcbinfo); 378 379 /* 380 * Call inner routine, to assign local interface address. 381 * in6_pcbladdr() may automatically fill in sin6_scope_id. 382 */ 383 if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) 384 return (error); 385 386 if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, 387 sin6->sin6_port, 388 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 389 ? &addr6 : &inp->in6p_laddr, 390 inp->inp_lport, 0, NULL) != NULL) { 391 return (EADDRINUSE); 392 } 393 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 394 if (inp->inp_lport == 0) { 395 error = in6_pcbbind(inp, (struct sockaddr *)0, cred); 396 if (error) 397 return (error); 398 } 399 inp->in6p_laddr = addr6; 400 } 401 inp->in6p_faddr = sin6->sin6_addr; 402 inp->inp_fport = sin6->sin6_port; 403 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 404 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 405 if (inp->inp_flags & IN6P_AUTOFLOWLABEL) 406 inp->inp_flow |= 407 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 408 409 in_pcbrehash_mbuf(inp, m); 410 411 return (0); 412} 413 414int 415in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 416{ 417 418 return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); 419} 420 421void 422in6_pcbdisconnect(struct inpcb *inp) 423{ 424 425 INP_WLOCK_ASSERT(inp); 426 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); 427 428 bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); 429 inp->inp_fport = 0; 430 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 431 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 432 in_pcbrehash(inp); 433} 434 435struct sockaddr * 436in6_sockaddr(in_port_t port, struct in6_addr *addr_p) 437{ 438 struct sockaddr_in6 *sin6; 439 440 sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); 441 bzero(sin6, sizeof *sin6); 442 sin6->sin6_family = AF_INET6; 443 sin6->sin6_len = sizeof(*sin6); 444 sin6->sin6_port = port; 445 sin6->sin6_addr = *addr_p; 446 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ 447 448 return (struct sockaddr *)sin6; 449} 450 451struct sockaddr * 452in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) 453{ 454 struct sockaddr_in sin; 455 struct sockaddr_in6 *sin6_p; 456 457 bzero(&sin, sizeof sin); 458 sin.sin_family = AF_INET; 459 sin.sin_len = sizeof(sin); 460 sin.sin_port = port; 461 sin.sin_addr = *addr_p; 462 463 sin6_p = malloc(sizeof *sin6_p, M_SONAME, 464 M_WAITOK); 465 in6_sin_2_v4mapsin6(&sin, sin6_p); 466 467 return (struct sockaddr *)sin6_p; 468} 469 470int 471in6_getsockaddr(struct socket *so, struct sockaddr **nam) 472{ 473 register struct inpcb *inp; 474 struct in6_addr addr; 475 in_port_t port; 476 477 inp = sotoinpcb(so); 478 KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); 479 480 INP_RLOCK(inp); 481 port = inp->inp_lport; 482 addr = inp->in6p_laddr; 483 INP_RUNLOCK(inp); 484 485 *nam = in6_sockaddr(port, &addr); 486 return 0; 487} 488 489int 490in6_getpeeraddr(struct socket *so, struct sockaddr **nam) 491{ 492 struct inpcb *inp; 493 struct in6_addr addr; 494 in_port_t port; 495 496 inp = sotoinpcb(so); 497 KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); 498 499 INP_RLOCK(inp); 500 port = inp->inp_fport; 501 addr = inp->in6p_faddr; 502 INP_RUNLOCK(inp); 503 504 *nam = in6_sockaddr(port, &addr); 505 return 0; 506} 507 508int 509in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) 510{ 511 struct inpcb *inp; 512 int error; 513 514 inp = sotoinpcb(so); 515 KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); 516 517#ifdef INET 518 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { 519 error = in_getsockaddr(so, nam); 520 if (error == 0) 521 in6_sin_2_v4mapsin6_in_sock(nam); 522 } else 523#endif 524 { 525 /* scope issues will be handled in in6_getsockaddr(). */ 526 error = in6_getsockaddr(so, nam); 527 } 528 529 return error; 530} 531 532int 533in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) 534{ 535 struct inpcb *inp; 536 int error; 537 538 inp = sotoinpcb(so); 539 KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); 540 541#ifdef INET 542 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { 543 error = in_getpeeraddr(so, nam); 544 if (error == 0) 545 in6_sin_2_v4mapsin6_in_sock(nam); 546 } else 547#endif 548 /* scope issues will be handled in in6_getpeeraddr(). */ 549 error = in6_getpeeraddr(so, nam); 550 551 return error; 552} 553 554/* 555 * Pass some notification to all connections of a protocol 556 * associated with address dst. The local address and/or port numbers 557 * may be specified to limit the search. The "usual action" will be 558 * taken, depending on the ctlinput cmd. The caller must filter any 559 * cmds that are uninteresting (e.g., no error in the map). 560 * Call the protocol specific routine (if any) to report 561 * any errors for each matching socket. 562 */ 563void 564in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, 565 u_int fport_arg, const struct sockaddr *src, u_int lport_arg, 566 int cmd, void *cmdarg, 567 struct inpcb *(*notify)(struct inpcb *, int)) 568{ 569 struct inpcb *inp, *inp_temp; 570 struct sockaddr_in6 sa6_src, *sa6_dst; 571 u_short fport = fport_arg, lport = lport_arg; 572 u_int32_t flowinfo; 573 int errno; 574 575 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 576 return; 577 578 sa6_dst = (struct sockaddr_in6 *)dst; 579 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 580 return; 581 582 /* 583 * note that src can be NULL when we get notify by local fragmentation. 584 */ 585 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 586 flowinfo = sa6_src.sin6_flowinfo; 587 588 /* 589 * Redirects go to all references to the destination, 590 * and use in6_rtchange to invalidate the route cache. 591 * Dead host indications: also use in6_rtchange to invalidate 592 * the cache, and deliver the error to all the sockets. 593 * Otherwise, if we have knowledge of the local port and address, 594 * deliver only to that socket. 595 */ 596 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 597 fport = 0; 598 lport = 0; 599 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); 600 601 if (cmd != PRC_HOSTDEAD) 602 notify = in6_rtchange; 603 } 604 errno = inet6ctlerrmap[cmd]; 605 INP_INFO_WLOCK(pcbinfo); 606 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { 607 INP_WLOCK(inp); 608 if ((inp->inp_vflag & INP_IPV6) == 0) { 609 INP_WUNLOCK(inp); 610 continue; 611 } 612 613 /* 614 * If the error designates a new path MTU for a destination 615 * and the application (associated with this socket) wanted to 616 * know the value, notify. Note that we notify for all 617 * disconnected sockets if the corresponding application 618 * wanted. This is because some UDP applications keep sending 619 * sockets disconnected. 620 * XXX: should we avoid to notify the value to TCP sockets? 621 */ 622 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 623 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 624 IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { 625 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, 626 (u_int32_t *)cmdarg); 627 } 628 629 /* 630 * Detect if we should notify the error. If no source and 631 * destination ports are specifed, but non-zero flowinfo and 632 * local address match, notify the error. This is the case 633 * when the error is delivered with an encrypted buffer 634 * by ESP. Otherwise, just compare addresses and ports 635 * as usual. 636 */ 637 if (lport == 0 && fport == 0 && flowinfo && 638 inp->inp_socket != NULL && 639 flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && 640 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) 641 goto do_notify; 642 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, 643 &sa6_dst->sin6_addr) || 644 inp->inp_socket == 0 || 645 (lport && inp->inp_lport != lport) || 646 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 647 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 648 &sa6_src.sin6_addr)) || 649 (fport && inp->inp_fport != fport)) { 650 INP_WUNLOCK(inp); 651 continue; 652 } 653 654 do_notify: 655 if (notify) { 656 if ((*notify)(inp, errno)) 657 INP_WUNLOCK(inp); 658 } else 659 INP_WUNLOCK(inp); 660 } 661 INP_INFO_WUNLOCK(pcbinfo); 662} 663 664/* 665 * Lookup a PCB based on the local address and port. Caller must hold the 666 * hash lock. No inpcb locks or references are acquired. 667 */ 668struct inpcb * 669in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, 670 u_short lport, int lookupflags, struct ucred *cred) 671{ 672 register struct inpcb *inp; 673 int matchwild = 3, wildcard; 674 675 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, 676 ("%s: invalid lookup flags %d", __func__, lookupflags)); 677 678 INP_HASH_WLOCK_ASSERT(pcbinfo); 679 680 if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { 681 struct inpcbhead *head; 682 /* 683 * Look for an unconnected (wildcard foreign addr) PCB that 684 * matches the local address and port we're looking for. 685 */ 686 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 687 0, pcbinfo->ipi_hashmask)]; 688 LIST_FOREACH(inp, head, inp_hash) { 689 /* XXX inp locking */ 690 if ((inp->inp_vflag & INP_IPV6) == 0) 691 continue; 692 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && 693 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 694 inp->inp_lport == lport) { 695 /* Found. */ 696 if (cred == NULL || 697 prison_equal_ip6(cred->cr_prison, 698 inp->inp_cred->cr_prison)) 699 return (inp); 700 } 701 } 702 /* 703 * Not found. 704 */ 705 return (NULL); 706 } else { 707 struct inpcbporthead *porthash; 708 struct inpcbport *phd; 709 struct inpcb *match = NULL; 710 /* 711 * Best fit PCB lookup. 712 * 713 * First see if this local port is in use by looking on the 714 * port hash list. 715 */ 716 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 717 pcbinfo->ipi_porthashmask)]; 718 LIST_FOREACH(phd, porthash, phd_hash) { 719 if (phd->phd_port == lport) 720 break; 721 } 722 if (phd != NULL) { 723 /* 724 * Port is in use by one or more PCBs. Look for best 725 * fit. 726 */ 727 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 728 wildcard = 0; 729 if (cred != NULL && 730 !prison_equal_ip6(cred->cr_prison, 731 inp->inp_cred->cr_prison)) 732 continue; 733 /* XXX inp locking */ 734 if ((inp->inp_vflag & INP_IPV6) == 0) 735 continue; 736 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 737 wildcard++; 738 if (!IN6_IS_ADDR_UNSPECIFIED( 739 &inp->in6p_laddr)) { 740 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 741 wildcard++; 742 else if (!IN6_ARE_ADDR_EQUAL( 743 &inp->in6p_laddr, laddr)) 744 continue; 745 } else { 746 if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) 747 wildcard++; 748 } 749 if (wildcard < matchwild) { 750 match = inp; 751 matchwild = wildcard; 752 if (matchwild == 0) 753 break; 754 } 755 } 756 } 757 return (match); 758 } 759} 760 761void 762in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 763{ 764 struct inpcb *in6p; 765 struct ip6_moptions *im6o; 766 int i, gap; 767 768 INP_INFO_RLOCK(pcbinfo); 769 LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { 770 INP_WLOCK(in6p); 771 im6o = in6p->in6p_moptions; 772 if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { 773 /* 774 * Unselect the outgoing ifp for multicast if it 775 * is being detached. 776 */ 777 if (im6o->im6o_multicast_ifp == ifp) 778 im6o->im6o_multicast_ifp = NULL; 779 /* 780 * Drop multicast group membership if we joined 781 * through the interface being detached. 782 */ 783 gap = 0; 784 for (i = 0; i < im6o->im6o_num_memberships; i++) { 785 if (im6o->im6o_membership[i]->in6m_ifp == 786 ifp) { 787 in6_mc_leave(im6o->im6o_membership[i], 788 NULL); 789 gap++; 790 } else if (gap != 0) { 791 im6o->im6o_membership[i - gap] = 792 im6o->im6o_membership[i]; 793 } 794 } 795 im6o->im6o_num_memberships -= gap; 796 } 797 INP_WUNLOCK(in6p); 798 } 799 INP_INFO_RUNLOCK(pcbinfo); 800} 801 802/* 803 * Check for alternatives when higher level complains 804 * about service problems. For now, invalidate cached 805 * routing information. If the route was created dynamically 806 * (by a redirect), time to try a default gateway again. 807 */ 808void 809in6_losing(struct inpcb *in6p) 810{ 811 812 /* 813 * We don't store route pointers in the routing table anymore 814 */ 815 return; 816} 817 818/* 819 * After a routing change, flush old routing 820 * and allocate a (hopefully) better one. 821 */ 822struct inpcb * 823in6_rtchange(struct inpcb *inp, int errno) 824{ 825 /* 826 * We don't store route pointers in the routing table anymore 827 */ 828 return inp; 829} 830 831#ifdef PCBGROUP 832/* 833 * Lookup PCB in hash list, using pcbgroup tables. 834 */ 835static struct inpcb * 836in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, 837 struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, 838 u_int lport_arg, int lookupflags, struct ifnet *ifp) 839{ 840 struct inpcbhead *head; 841 struct inpcb *inp, *tmpinp; 842 u_short fport = fport_arg, lport = lport_arg; 843 int faith; 844 845 if (faithprefix_p != NULL) 846 faith = (*faithprefix_p)(laddr); 847 else 848 faith = 0; 849 850 /* 851 * First look for an exact match. 852 */ 853 tmpinp = NULL; 854 INP_GROUP_LOCK(pcbgroup); 855 head = &pcbgroup->ipg_hashbase[ 856 INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, 857 pcbgroup->ipg_hashmask)]; 858 LIST_FOREACH(inp, head, inp_pcbgrouphash) { 859 /* XXX inp locking */ 860 if ((inp->inp_vflag & INP_IPV6) == 0) 861 continue; 862 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 863 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 864 inp->inp_fport == fport && 865 inp->inp_lport == lport) { 866 /* 867 * XXX We should be able to directly return 868 * the inp here, without any checks. 869 * Well unless both bound with SO_REUSEPORT? 870 */ 871 if (prison_flag(inp->inp_cred, PR_IP6)) 872 goto found; 873 if (tmpinp == NULL) 874 tmpinp = inp; 875 } 876 } 877 if (tmpinp != NULL) { 878 inp = tmpinp; 879 goto found; 880 } 881 882 /* 883 * Then look for a wildcard match, if requested. 884 */ 885 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { 886 struct inpcb *local_wild = NULL, *local_exact = NULL; 887 struct inpcb *jail_wild = NULL; 888 int injail; 889 890 /* 891 * Order of socket selection - we always prefer jails. 892 * 1. jailed, non-wild. 893 * 2. jailed, wild. 894 * 3. non-jailed, non-wild. 895 * 4. non-jailed, wild. 896 */ 897 head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, 898 0, pcbinfo->ipi_wildmask)]; 899 LIST_FOREACH(inp, head, inp_pcbgroup_wild) { 900 /* XXX inp locking */ 901 if ((inp->inp_vflag & INP_IPV6) == 0) 902 continue; 903 904 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 905 inp->inp_lport != lport) { 906 continue; 907 } 908 909 /* XXX inp locking */ 910 if (faith && (inp->inp_flags & INP_FAITH) == 0) 911 continue; 912 913 injail = prison_flag(inp->inp_cred, PR_IP6); 914 if (injail) { 915 if (prison_check_ip6(inp->inp_cred, 916 laddr) != 0) 917 continue; 918 } else { 919 if (local_exact != NULL) 920 continue; 921 } 922 923 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { 924 if (injail) 925 goto found; 926 else 927 local_exact = inp; 928 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 929 if (injail) 930 jail_wild = inp; 931 else 932 local_wild = inp; 933 } 934 } /* LIST_FOREACH */ 935 936 inp = jail_wild; 937 if (inp == NULL) 938 inp = jail_wild; 939 if (inp == NULL) 940 inp = local_exact; 941 if (inp == NULL) 942 inp = local_wild; 943 if (inp != NULL) 944 goto found; 945 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ 946 INP_GROUP_UNLOCK(pcbgroup); 947 return (NULL); 948 949found: 950 in_pcbref(inp); 951 INP_GROUP_UNLOCK(pcbgroup); 952 if (lookupflags & INPLOOKUP_WLOCKPCB) { 953 INP_WLOCK(inp); 954 if (in_pcbrele_wlocked(inp)) 955 return (NULL); 956 } else if (lookupflags & INPLOOKUP_RLOCKPCB) { 957 INP_RLOCK(inp); 958 if (in_pcbrele_rlocked(inp)) 959 return (NULL); 960 } else 961 panic("%s: locking buf", __func__); 962 return (inp); 963} 964#endif /* PCBGROUP */ 965 966/* 967 * Lookup PCB in hash list. 968 */ 969struct inpcb * 970in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 971 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, 972 int lookupflags, struct ifnet *ifp) 973{ 974 struct inpcbhead *head; 975 struct inpcb *inp, *tmpinp; 976 u_short fport = fport_arg, lport = lport_arg; 977 int faith; 978 979 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, 980 ("%s: invalid lookup flags %d", __func__, lookupflags)); 981 982 INP_HASH_LOCK_ASSERT(pcbinfo); 983 984 if (faithprefix_p != NULL) 985 faith = (*faithprefix_p)(laddr); 986 else 987 faith = 0; 988 989 /* 990 * First look for an exact match. 991 */ 992 tmpinp = NULL; 993 head = &pcbinfo->ipi_hashbase[ 994 INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, 995 pcbinfo->ipi_hashmask)]; 996 LIST_FOREACH(inp, head, inp_hash) { 997 /* XXX inp locking */ 998 if ((inp->inp_vflag & INP_IPV6) == 0) 999 continue; 1000 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 1001 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 1002 inp->inp_fport == fport && 1003 inp->inp_lport == lport) { 1004 /* 1005 * XXX We should be able to directly return 1006 * the inp here, without any checks. 1007 * Well unless both bound with SO_REUSEPORT? 1008 */ 1009 if (prison_flag(inp->inp_cred, PR_IP6)) 1010 return (inp); 1011 if (tmpinp == NULL) 1012 tmpinp = inp; 1013 } 1014 } 1015 if (tmpinp != NULL) 1016 return (tmpinp); 1017 1018 /* 1019 * Then look for a wildcard match, if requested. 1020 */ 1021 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { 1022 struct inpcb *local_wild = NULL, *local_exact = NULL; 1023 struct inpcb *jail_wild = NULL; 1024 int injail; 1025 1026 /* 1027 * Order of socket selection - we always prefer jails. 1028 * 1. jailed, non-wild. 1029 * 2. jailed, wild. 1030 * 3. non-jailed, non-wild. 1031 * 4. non-jailed, wild. 1032 */ 1033 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1034 0, pcbinfo->ipi_hashmask)]; 1035 LIST_FOREACH(inp, head, inp_hash) { 1036 /* XXX inp locking */ 1037 if ((inp->inp_vflag & INP_IPV6) == 0) 1038 continue; 1039 1040 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 1041 inp->inp_lport != lport) { 1042 continue; 1043 } 1044 1045 /* XXX inp locking */ 1046 if (faith && (inp->inp_flags & INP_FAITH) == 0) 1047 continue; 1048 1049 injail = prison_flag(inp->inp_cred, PR_IP6); 1050 if (injail) { 1051 if (prison_check_ip6(inp->inp_cred, 1052 laddr) != 0) 1053 continue; 1054 } else { 1055 if (local_exact != NULL) 1056 continue; 1057 } 1058 1059 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { 1060 if (injail) 1061 return (inp); 1062 else 1063 local_exact = inp; 1064 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 1065 if (injail) 1066 jail_wild = inp; 1067 else 1068 local_wild = inp; 1069 } 1070 } /* LIST_FOREACH */ 1071 1072 if (jail_wild != NULL) 1073 return (jail_wild); 1074 if (local_exact != NULL) 1075 return (local_exact); 1076 if (local_wild != NULL) 1077 return (local_wild); 1078 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ 1079 1080 /* 1081 * Not found. 1082 */ 1083 return (NULL); 1084} 1085 1086/* 1087 * Lookup PCB in hash list, using pcbinfo tables. This variation locks the 1088 * hash list lock, and will return the inpcb locked (i.e., requires 1089 * INPLOOKUP_LOCKPCB). 1090 */ 1091static struct inpcb * 1092in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 1093 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, 1094 struct ifnet *ifp) 1095{ 1096 struct inpcb *inp; 1097 1098 INP_HASH_RLOCK(pcbinfo); 1099 inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, 1100 (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); 1101 if (inp != NULL) { 1102 in_pcbref(inp); 1103 INP_HASH_RUNLOCK(pcbinfo); 1104 if (lookupflags & INPLOOKUP_WLOCKPCB) { 1105 INP_WLOCK(inp); 1106 if (in_pcbrele_wlocked(inp)) 1107 return (NULL); 1108 } else if (lookupflags & INPLOOKUP_RLOCKPCB) { 1109 INP_RLOCK(inp); 1110 if (in_pcbrele_rlocked(inp)) 1111 return (NULL); 1112 } else 1113 panic("%s: locking bug", __func__); 1114 } else 1115 INP_HASH_RUNLOCK(pcbinfo); 1116 return (inp); 1117} 1118 1119/* 1120 * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf 1121 * from which a pre-calculated hash value may be extracted. 1122 * 1123 * Possibly more of this logic should be in in6_pcbgroup.c. 1124 */ 1125struct inpcb * 1126in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, 1127 struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) 1128{ 1129#if defined(PCBGROUP) 1130 struct inpcbgroup *pcbgroup; 1131#endif 1132 1133 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, 1134 ("%s: invalid lookup flags %d", __func__, lookupflags)); 1135 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, 1136 ("%s: LOCKPCB not set", __func__)); 1137 1138#if defined(PCBGROUP) 1139 if (in_pcbgroup_enabled(pcbinfo)) { 1140 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, 1141 fport); 1142 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, 1143 laddr, lport, lookupflags, ifp)); 1144 } 1145#endif 1146 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, 1147 lookupflags, ifp)); 1148} 1149 1150struct inpcb * 1151in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 1152 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, 1153 struct ifnet *ifp, struct mbuf *m) 1154{ 1155#ifdef PCBGROUP 1156 struct inpcbgroup *pcbgroup; 1157#endif 1158 1159 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, 1160 ("%s: invalid lookup flags %d", __func__, lookupflags)); 1161 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, 1162 ("%s: LOCKPCB not set", __func__)); 1163 1164#ifdef PCBGROUP 1165 if (in_pcbgroup_enabled(pcbinfo)) { 1166 pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), 1167 m->m_pkthdr.flowid); 1168 if (pcbgroup != NULL) 1169 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, 1170 fport, laddr, lport, lookupflags, ifp)); 1171 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, 1172 fport); 1173 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, 1174 laddr, lport, lookupflags, ifp)); 1175 } 1176#endif 1177 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, 1178 lookupflags, ifp)); 1179} 1180 1181void 1182init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) 1183{ 1184 struct ip6_hdr *ip; 1185 1186 ip = mtod(m, struct ip6_hdr *); 1187 bzero(sin6, sizeof(*sin6)); 1188 sin6->sin6_len = sizeof(*sin6); 1189 sin6->sin6_family = AF_INET6; 1190 sin6->sin6_addr = ip->ip6_src; 1191 1192 (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ 1193 1194 return; 1195} 1196