in6_pcb.c revision 222748
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * Copyright (c) 2010-2011 Juniper Networks, Inc.
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Robert N. M. Watson under
7 * contract to Juniper Networks, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $
34 */
35
36/*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 *	The Regents of the University of California.  All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
65 */
66
67#include <sys/cdefs.h>
68__FBSDID("$FreeBSD: head/sys/netinet6/in6_pcb.c 222748 2011-06-06 12:55:02Z rwatson $");
69
70#include "opt_inet.h"
71#include "opt_inet6.h"
72#include "opt_ipsec.h"
73#include "opt_pcbgroup.h"
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/malloc.h>
78#include <sys/mbuf.h>
79#include <sys/domain.h>
80#include <sys/protosw.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/sockio.h>
84#include <sys/errno.h>
85#include <sys/time.h>
86#include <sys/priv.h>
87#include <sys/proc.h>
88#include <sys/jail.h>
89
90#include <vm/uma.h>
91
92#include <net/if.h>
93#include <net/if_types.h>
94#include <net/route.h>
95
96#include <netinet/in.h>
97#include <netinet/in_var.h>
98#include <netinet/in_systm.h>
99#include <netinet/tcp_var.h>
100#include <netinet/ip6.h>
101#include <netinet/ip_var.h>
102
103#include <netinet6/ip6_var.h>
104#include <netinet6/nd6.h>
105#include <netinet/in_pcb.h>
106#include <netinet6/in6_pcb.h>
107#include <netinet6/scope6_var.h>
108
109struct	in6_addr zeroin6_addr;
110
111int
112in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
113    struct ucred *cred)
114{
115	struct socket *so = inp->inp_socket;
116	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
117	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
118	u_short	lport = 0;
119	int error, lookupflags = 0;
120	int reuseport = (so->so_options & SO_REUSEPORT);
121
122	INP_WLOCK_ASSERT(inp);
123	INP_HASH_WLOCK_ASSERT(pcbinfo);
124
125	if (TAILQ_EMPTY(&V_in6_ifaddrhead))	/* XXX broken! */
126		return (EADDRNOTAVAIL);
127	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
128		return (EINVAL);
129	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
130		lookupflags = INPLOOKUP_WILDCARD;
131	if (nam == NULL) {
132		if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
133		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
134			return (error);
135	} else {
136		sin6 = (struct sockaddr_in6 *)nam;
137		if (nam->sa_len != sizeof(*sin6))
138			return (EINVAL);
139		/*
140		 * family check.
141		 */
142		if (nam->sa_family != AF_INET6)
143			return (EAFNOSUPPORT);
144
145		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
146			return(error);
147
148		if ((error = prison_local_ip6(cred, &sin6->sin6_addr,
149		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
150			return (error);
151
152		lport = sin6->sin6_port;
153		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
154			/*
155			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
156			 * allow compepte duplication of binding if
157			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
158			 * and a multicast address is bound on both
159			 * new and duplicated sockets.
160			 */
161			if (so->so_options & SO_REUSEADDR)
162				reuseport = SO_REUSEADDR|SO_REUSEPORT;
163		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
164			struct ifaddr *ifa;
165
166			sin6->sin6_port = 0;		/* yech... */
167			if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
168			    NULL &&
169			    (inp->inp_flags & INP_BINDANY) == 0) {
170				return (EADDRNOTAVAIL);
171			}
172
173			/*
174			 * XXX: bind to an anycast address might accidentally
175			 * cause sending a packet with anycast source address.
176			 * We should allow to bind to a deprecated address, since
177			 * the application dares to use it.
178			 */
179			if (ifa != NULL &&
180			    ((struct in6_ifaddr *)ifa)->ia6_flags &
181			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
182				ifa_free(ifa);
183				return (EADDRNOTAVAIL);
184			}
185			if (ifa != NULL)
186				ifa_free(ifa);
187		}
188		if (lport) {
189			struct inpcb *t;
190
191			/* GROSS */
192			if (ntohs(lport) <= V_ipport_reservedhigh &&
193			    ntohs(lport) >= V_ipport_reservedlow &&
194			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
195			    0))
196				return (EACCES);
197			if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
198			    priv_check_cred(inp->inp_cred,
199			    PRIV_NETINET_REUSEPORT, 0) != 0) {
200				t = in6_pcblookup_local(pcbinfo,
201				    &sin6->sin6_addr, lport,
202				    INPLOOKUP_WILDCARD, cred);
203				if (t &&
204				    ((t->inp_flags & INP_TIMEWAIT) == 0) &&
205				    (so->so_type != SOCK_STREAM ||
206				     IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
207				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
208				     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
209				     (t->inp_socket->so_options & SO_REUSEPORT)
210				      == 0) && (inp->inp_cred->cr_uid !=
211				     t->inp_cred->cr_uid))
212					return (EADDRINUSE);
213#ifdef INET
214				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
215				    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
216					struct sockaddr_in sin;
217
218					in6_sin6_2_sin(&sin, sin6);
219					t = in_pcblookup_local(pcbinfo,
220					    sin.sin_addr, lport,
221					    INPLOOKUP_WILDCARD, cred);
222					if (t &&
223					    ((t->inp_flags &
224					      INP_TIMEWAIT) == 0) &&
225					    (so->so_type != SOCK_STREAM ||
226					     ntohl(t->inp_faddr.s_addr) ==
227					      INADDR_ANY) &&
228					    (inp->inp_cred->cr_uid !=
229					     t->inp_cred->cr_uid))
230						return (EADDRINUSE);
231				}
232#endif
233			}
234			t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
235			    lport, lookupflags, cred);
236			if (t && (reuseport & ((t->inp_flags & INP_TIMEWAIT) ?
237			    intotw(t)->tw_so_options :
238			    t->inp_socket->so_options)) == 0)
239				return (EADDRINUSE);
240#ifdef INET
241			if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
242			    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
243				struct sockaddr_in sin;
244
245				in6_sin6_2_sin(&sin, sin6);
246				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
247				    lport, lookupflags, cred);
248				if (t && t->inp_flags & INP_TIMEWAIT) {
249					if ((reuseport &
250					    intotw(t)->tw_so_options) == 0 &&
251					    (ntohl(t->inp_laddr.s_addr) !=
252					     INADDR_ANY || ((inp->inp_vflag &
253					     INP_IPV6PROTO) ==
254					     (t->inp_vflag & INP_IPV6PROTO))))
255						return (EADDRINUSE);
256				}
257				else if (t &&
258				    (reuseport & t->inp_socket->so_options)
259				    == 0 && (ntohl(t->inp_laddr.s_addr) !=
260				    INADDR_ANY || INP_SOCKAF(so) ==
261				     INP_SOCKAF(t->inp_socket)))
262					return (EADDRINUSE);
263			}
264#endif
265		}
266		inp->in6p_laddr = sin6->sin6_addr;
267	}
268	if (lport == 0) {
269		if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) {
270			/* Undo an address bind that may have occurred. */
271			inp->in6p_laddr = in6addr_any;
272			return (error);
273		}
274	} else {
275		inp->inp_lport = lport;
276		if (in_pcbinshash(inp) != 0) {
277			inp->in6p_laddr = in6addr_any;
278			inp->inp_lport = 0;
279			return (EAGAIN);
280		}
281	}
282	return (0);
283}
284
285/*
286 *   Transform old in6_pcbconnect() into an inner subroutine for new
287 *   in6_pcbconnect(): Do some validity-checking on the remote
288 *   address (in mbuf 'nam') and then determine local host address
289 *   (i.e., which interface) to use to access that remote host.
290 *
291 *   This preserves definition of in6_pcbconnect(), while supporting a
292 *   slightly different version for T/TCP.  (This is more than
293 *   a bit of a kludge, but cleaning up the internal interfaces would
294 *   have forced minor changes in every protocol).
295 */
296int
297in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
298    struct in6_addr *plocal_addr6)
299{
300	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
301	int error = 0;
302	struct ifnet *ifp = NULL;
303	int scope_ambiguous = 0;
304	struct in6_addr in6a;
305
306	INP_WLOCK_ASSERT(inp);
307	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);	/* XXXRW: why? */
308
309	if (nam->sa_len != sizeof (*sin6))
310		return (EINVAL);
311	if (sin6->sin6_family != AF_INET6)
312		return (EAFNOSUPPORT);
313	if (sin6->sin6_port == 0)
314		return (EADDRNOTAVAIL);
315
316	if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
317		scope_ambiguous = 1;
318	if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
319		return(error);
320
321	if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) {
322		/*
323		 * If the destination address is UNSPECIFIED addr,
324		 * use the loopback addr, e.g ::1.
325		 */
326		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
327			sin6->sin6_addr = in6addr_loopback;
328	}
329	if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
330		return (error);
331
332	error = in6_selectsrc(sin6, inp->in6p_outputopts,
333	    inp, NULL, inp->inp_cred, &ifp, &in6a);
334	if (error)
335		return (error);
336
337	if (ifp && scope_ambiguous &&
338	    (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
339		return(error);
340	}
341
342	/*
343	 * Do not update this earlier, in case we return with an error.
344	 *
345	 * XXX: this in6_selectsrc result might replace the bound local
346	 * address with the address specified by setsockopt(IPV6_PKTINFO).
347	 * Is it the intended behavior?
348	 */
349	*plocal_addr6 = in6a;
350
351	/*
352	 * Don't do pcblookup call here; return interface in
353	 * plocal_addr6
354	 * and exit to caller, that will do the lookup.
355	 */
356
357	return (0);
358}
359
360/*
361 * Outer subroutine:
362 * Connect from a socket to a specified address.
363 * Both address and port must be specified in argument sin.
364 * If don't have a local address for this socket yet,
365 * then pick one.
366 */
367int
368in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam,
369    struct ucred *cred, struct mbuf *m)
370{
371	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
372	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
373	struct in6_addr addr6;
374	int error;
375
376	INP_WLOCK_ASSERT(inp);
377	INP_HASH_WLOCK_ASSERT(pcbinfo);
378
379	/*
380	 * Call inner routine, to assign local interface address.
381	 * in6_pcbladdr() may automatically fill in sin6_scope_id.
382	 */
383	if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
384		return (error);
385
386	if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
387			       sin6->sin6_port,
388			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
389			      ? &addr6 : &inp->in6p_laddr,
390			      inp->inp_lport, 0, NULL) != NULL) {
391		return (EADDRINUSE);
392	}
393	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
394		if (inp->inp_lport == 0) {
395			error = in6_pcbbind(inp, (struct sockaddr *)0, cred);
396			if (error)
397				return (error);
398		}
399		inp->in6p_laddr = addr6;
400	}
401	inp->in6p_faddr = sin6->sin6_addr;
402	inp->inp_fport = sin6->sin6_port;
403	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
404	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
405	if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
406		inp->inp_flow |=
407		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
408
409	in_pcbrehash_mbuf(inp, m);
410
411	return (0);
412}
413
414int
415in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
416{
417
418	return (in6_pcbconnect_mbuf(inp, nam, cred, NULL));
419}
420
421void
422in6_pcbdisconnect(struct inpcb *inp)
423{
424
425	INP_WLOCK_ASSERT(inp);
426	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
427
428	bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
429	inp->inp_fport = 0;
430	/* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
431	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
432	in_pcbrehash(inp);
433}
434
435struct sockaddr *
436in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
437{
438	struct sockaddr_in6 *sin6;
439
440	sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK);
441	bzero(sin6, sizeof *sin6);
442	sin6->sin6_family = AF_INET6;
443	sin6->sin6_len = sizeof(*sin6);
444	sin6->sin6_port = port;
445	sin6->sin6_addr = *addr_p;
446	(void)sa6_recoverscope(sin6); /* XXX: should catch errors */
447
448	return (struct sockaddr *)sin6;
449}
450
451struct sockaddr *
452in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
453{
454	struct sockaddr_in sin;
455	struct sockaddr_in6 *sin6_p;
456
457	bzero(&sin, sizeof sin);
458	sin.sin_family = AF_INET;
459	sin.sin_len = sizeof(sin);
460	sin.sin_port = port;
461	sin.sin_addr = *addr_p;
462
463	sin6_p = malloc(sizeof *sin6_p, M_SONAME,
464		M_WAITOK);
465	in6_sin_2_v4mapsin6(&sin, sin6_p);
466
467	return (struct sockaddr *)sin6_p;
468}
469
470int
471in6_getsockaddr(struct socket *so, struct sockaddr **nam)
472{
473	register struct inpcb *inp;
474	struct in6_addr addr;
475	in_port_t port;
476
477	inp = sotoinpcb(so);
478	KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
479
480	INP_RLOCK(inp);
481	port = inp->inp_lport;
482	addr = inp->in6p_laddr;
483	INP_RUNLOCK(inp);
484
485	*nam = in6_sockaddr(port, &addr);
486	return 0;
487}
488
489int
490in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
491{
492	struct inpcb *inp;
493	struct in6_addr addr;
494	in_port_t port;
495
496	inp = sotoinpcb(so);
497	KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
498
499	INP_RLOCK(inp);
500	port = inp->inp_fport;
501	addr = inp->in6p_faddr;
502	INP_RUNLOCK(inp);
503
504	*nam = in6_sockaddr(port, &addr);
505	return 0;
506}
507
508int
509in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
510{
511	struct	inpcb *inp;
512	int	error;
513
514	inp = sotoinpcb(so);
515	KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
516
517#ifdef INET
518	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
519		error = in_getsockaddr(so, nam);
520		if (error == 0)
521			in6_sin_2_v4mapsin6_in_sock(nam);
522	} else
523#endif
524	{
525		/* scope issues will be handled in in6_getsockaddr(). */
526		error = in6_getsockaddr(so, nam);
527	}
528
529	return error;
530}
531
532int
533in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
534{
535	struct	inpcb *inp;
536	int	error;
537
538	inp = sotoinpcb(so);
539	KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
540
541#ifdef INET
542	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
543		error = in_getpeeraddr(so, nam);
544		if (error == 0)
545			in6_sin_2_v4mapsin6_in_sock(nam);
546	} else
547#endif
548	/* scope issues will be handled in in6_getpeeraddr(). */
549	error = in6_getpeeraddr(so, nam);
550
551	return error;
552}
553
554/*
555 * Pass some notification to all connections of a protocol
556 * associated with address dst.  The local address and/or port numbers
557 * may be specified to limit the search.  The "usual action" will be
558 * taken, depending on the ctlinput cmd.  The caller must filter any
559 * cmds that are uninteresting (e.g., no error in the map).
560 * Call the protocol specific routine (if any) to report
561 * any errors for each matching socket.
562 */
563void
564in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
565    u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
566    int cmd, void *cmdarg,
567    struct inpcb *(*notify)(struct inpcb *, int))
568{
569	struct inpcb *inp, *inp_temp;
570	struct sockaddr_in6 sa6_src, *sa6_dst;
571	u_short	fport = fport_arg, lport = lport_arg;
572	u_int32_t flowinfo;
573	int errno;
574
575	if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
576		return;
577
578	sa6_dst = (struct sockaddr_in6 *)dst;
579	if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
580		return;
581
582	/*
583	 * note that src can be NULL when we get notify by local fragmentation.
584	 */
585	sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
586	flowinfo = sa6_src.sin6_flowinfo;
587
588	/*
589	 * Redirects go to all references to the destination,
590	 * and use in6_rtchange to invalidate the route cache.
591	 * Dead host indications: also use in6_rtchange to invalidate
592	 * the cache, and deliver the error to all the sockets.
593	 * Otherwise, if we have knowledge of the local port and address,
594	 * deliver only to that socket.
595	 */
596	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
597		fport = 0;
598		lport = 0;
599		bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
600
601		if (cmd != PRC_HOSTDEAD)
602			notify = in6_rtchange;
603	}
604	errno = inet6ctlerrmap[cmd];
605	INP_INFO_WLOCK(pcbinfo);
606	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
607		INP_WLOCK(inp);
608		if ((inp->inp_vflag & INP_IPV6) == 0) {
609			INP_WUNLOCK(inp);
610			continue;
611		}
612
613		/*
614		 * If the error designates a new path MTU for a destination
615		 * and the application (associated with this socket) wanted to
616		 * know the value, notify. Note that we notify for all
617		 * disconnected sockets if the corresponding application
618		 * wanted. This is because some UDP applications keep sending
619		 * sockets disconnected.
620		 * XXX: should we avoid to notify the value to TCP sockets?
621		 */
622		if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
623		    (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
624		     IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
625			ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
626					(u_int32_t *)cmdarg);
627		}
628
629		/*
630		 * Detect if we should notify the error. If no source and
631		 * destination ports are specifed, but non-zero flowinfo and
632		 * local address match, notify the error. This is the case
633		 * when the error is delivered with an encrypted buffer
634		 * by ESP. Otherwise, just compare addresses and ports
635		 * as usual.
636		 */
637		if (lport == 0 && fport == 0 && flowinfo &&
638		    inp->inp_socket != NULL &&
639		    flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
640		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
641			goto do_notify;
642		else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
643					     &sa6_dst->sin6_addr) ||
644			 inp->inp_socket == 0 ||
645			 (lport && inp->inp_lport != lport) ||
646			 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
647			  !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
648					      &sa6_src.sin6_addr)) ||
649			 (fport && inp->inp_fport != fport)) {
650			INP_WUNLOCK(inp);
651			continue;
652		}
653
654	  do_notify:
655		if (notify) {
656			if ((*notify)(inp, errno))
657				INP_WUNLOCK(inp);
658		} else
659			INP_WUNLOCK(inp);
660	}
661	INP_INFO_WUNLOCK(pcbinfo);
662}
663
664/*
665 * Lookup a PCB based on the local address and port.  Caller must hold the
666 * hash lock.  No inpcb locks or references are acquired.
667 */
668struct inpcb *
669in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
670    u_short lport, int lookupflags, struct ucred *cred)
671{
672	register struct inpcb *inp;
673	int matchwild = 3, wildcard;
674
675	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
676	    ("%s: invalid lookup flags %d", __func__, lookupflags));
677
678	INP_HASH_WLOCK_ASSERT(pcbinfo);
679
680	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
681		struct inpcbhead *head;
682		/*
683		 * Look for an unconnected (wildcard foreign addr) PCB that
684		 * matches the local address and port we're looking for.
685		 */
686		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
687		    0, pcbinfo->ipi_hashmask)];
688		LIST_FOREACH(inp, head, inp_hash) {
689			/* XXX inp locking */
690			if ((inp->inp_vflag & INP_IPV6) == 0)
691				continue;
692			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
693			    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
694			    inp->inp_lport == lport) {
695				/* Found. */
696				if (cred == NULL ||
697				    prison_equal_ip6(cred->cr_prison,
698					inp->inp_cred->cr_prison))
699					return (inp);
700			}
701		}
702		/*
703		 * Not found.
704		 */
705		return (NULL);
706	} else {
707		struct inpcbporthead *porthash;
708		struct inpcbport *phd;
709		struct inpcb *match = NULL;
710		/*
711		 * Best fit PCB lookup.
712		 *
713		 * First see if this local port is in use by looking on the
714		 * port hash list.
715		 */
716		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
717		    pcbinfo->ipi_porthashmask)];
718		LIST_FOREACH(phd, porthash, phd_hash) {
719			if (phd->phd_port == lport)
720				break;
721		}
722		if (phd != NULL) {
723			/*
724			 * Port is in use by one or more PCBs. Look for best
725			 * fit.
726			 */
727			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
728				wildcard = 0;
729				if (cred != NULL &&
730				    !prison_equal_ip6(cred->cr_prison,
731					inp->inp_cred->cr_prison))
732					continue;
733				/* XXX inp locking */
734				if ((inp->inp_vflag & INP_IPV6) == 0)
735					continue;
736				if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
737					wildcard++;
738				if (!IN6_IS_ADDR_UNSPECIFIED(
739					&inp->in6p_laddr)) {
740					if (IN6_IS_ADDR_UNSPECIFIED(laddr))
741						wildcard++;
742					else if (!IN6_ARE_ADDR_EQUAL(
743					    &inp->in6p_laddr, laddr))
744						continue;
745				} else {
746					if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
747						wildcard++;
748				}
749				if (wildcard < matchwild) {
750					match = inp;
751					matchwild = wildcard;
752					if (matchwild == 0)
753						break;
754				}
755			}
756		}
757		return (match);
758	}
759}
760
761void
762in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
763{
764	struct inpcb *in6p;
765	struct ip6_moptions *im6o;
766	int i, gap;
767
768	INP_INFO_RLOCK(pcbinfo);
769	LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
770		INP_WLOCK(in6p);
771		im6o = in6p->in6p_moptions;
772		if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
773			/*
774			 * Unselect the outgoing ifp for multicast if it
775			 * is being detached.
776			 */
777			if (im6o->im6o_multicast_ifp == ifp)
778				im6o->im6o_multicast_ifp = NULL;
779			/*
780			 * Drop multicast group membership if we joined
781			 * through the interface being detached.
782			 */
783			gap = 0;
784			for (i = 0; i < im6o->im6o_num_memberships; i++) {
785				if (im6o->im6o_membership[i]->in6m_ifp ==
786				    ifp) {
787					in6_mc_leave(im6o->im6o_membership[i],
788					    NULL);
789					gap++;
790				} else if (gap != 0) {
791					im6o->im6o_membership[i - gap] =
792					    im6o->im6o_membership[i];
793				}
794			}
795			im6o->im6o_num_memberships -= gap;
796		}
797		INP_WUNLOCK(in6p);
798	}
799	INP_INFO_RUNLOCK(pcbinfo);
800}
801
802/*
803 * Check for alternatives when higher level complains
804 * about service problems.  For now, invalidate cached
805 * routing information.  If the route was created dynamically
806 * (by a redirect), time to try a default gateway again.
807 */
808void
809in6_losing(struct inpcb *in6p)
810{
811
812	/*
813	 * We don't store route pointers in the routing table anymore
814	 */
815	return;
816}
817
818/*
819 * After a routing change, flush old routing
820 * and allocate a (hopefully) better one.
821 */
822struct inpcb *
823in6_rtchange(struct inpcb *inp, int errno)
824{
825	/*
826	 * We don't store route pointers in the routing table anymore
827	 */
828	return inp;
829}
830
831#ifdef PCBGROUP
832/*
833 * Lookup PCB in hash list, using pcbgroup tables.
834 */
835static struct inpcb *
836in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
837    struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
838    u_int lport_arg, int lookupflags, struct ifnet *ifp)
839{
840	struct inpcbhead *head;
841	struct inpcb *inp, *tmpinp;
842	u_short fport = fport_arg, lport = lport_arg;
843	int faith;
844
845	if (faithprefix_p != NULL)
846		faith = (*faithprefix_p)(laddr);
847	else
848		faith = 0;
849
850	/*
851	 * First look for an exact match.
852	 */
853	tmpinp = NULL;
854	INP_GROUP_LOCK(pcbgroup);
855	head = &pcbgroup->ipg_hashbase[
856	    INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
857	    pcbgroup->ipg_hashmask)];
858	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
859		/* XXX inp locking */
860		if ((inp->inp_vflag & INP_IPV6) == 0)
861			continue;
862		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
863		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
864		    inp->inp_fport == fport &&
865		    inp->inp_lport == lport) {
866			/*
867			 * XXX We should be able to directly return
868			 * the inp here, without any checks.
869			 * Well unless both bound with SO_REUSEPORT?
870			 */
871			if (prison_flag(inp->inp_cred, PR_IP6))
872				goto found;
873			if (tmpinp == NULL)
874				tmpinp = inp;
875		}
876	}
877	if (tmpinp != NULL) {
878		inp = tmpinp;
879		goto found;
880	}
881
882	/*
883	 * Then look for a wildcard match, if requested.
884	 */
885	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
886		struct inpcb *local_wild = NULL, *local_exact = NULL;
887		struct inpcb *jail_wild = NULL;
888		int injail;
889
890		/*
891		 * Order of socket selection - we always prefer jails.
892		 *      1. jailed, non-wild.
893		 *      2. jailed, wild.
894		 *      3. non-jailed, non-wild.
895		 *      4. non-jailed, wild.
896		 */
897		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
898		    0, pcbinfo->ipi_wildmask)];
899		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
900			/* XXX inp locking */
901			if ((inp->inp_vflag & INP_IPV6) == 0)
902				continue;
903
904			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
905			    inp->inp_lport != lport) {
906				continue;
907			}
908
909			/* XXX inp locking */
910			if (faith && (inp->inp_flags & INP_FAITH) == 0)
911				continue;
912
913			injail = prison_flag(inp->inp_cred, PR_IP6);
914			if (injail) {
915				if (prison_check_ip6(inp->inp_cred,
916				    laddr) != 0)
917					continue;
918			} else {
919				if (local_exact != NULL)
920					continue;
921			}
922
923			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
924				if (injail)
925					goto found;
926				else
927					local_exact = inp;
928			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
929				if (injail)
930					jail_wild = inp;
931				else
932					local_wild = inp;
933			}
934		} /* LIST_FOREACH */
935
936		inp = jail_wild;
937		if (inp == NULL)
938			inp = jail_wild;
939		if (inp == NULL)
940			inp = local_exact;
941		if (inp == NULL)
942			inp = local_wild;
943		if (inp != NULL)
944			goto found;
945	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
946	INP_GROUP_UNLOCK(pcbgroup);
947	return (NULL);
948
949found:
950	in_pcbref(inp);
951	INP_GROUP_UNLOCK(pcbgroup);
952	if (lookupflags & INPLOOKUP_WLOCKPCB) {
953		INP_WLOCK(inp);
954		if (in_pcbrele_wlocked(inp))
955			return (NULL);
956	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
957		INP_RLOCK(inp);
958		if (in_pcbrele_rlocked(inp))
959			return (NULL);
960	} else
961		panic("%s: locking buf", __func__);
962	return (inp);
963}
964#endif /* PCBGROUP */
965
966/*
967 * Lookup PCB in hash list.
968 */
969struct inpcb *
970in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
971    u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
972    int lookupflags, struct ifnet *ifp)
973{
974	struct inpcbhead *head;
975	struct inpcb *inp, *tmpinp;
976	u_short fport = fport_arg, lport = lport_arg;
977	int faith;
978
979	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
980	    ("%s: invalid lookup flags %d", __func__, lookupflags));
981
982	INP_HASH_LOCK_ASSERT(pcbinfo);
983
984	if (faithprefix_p != NULL)
985		faith = (*faithprefix_p)(laddr);
986	else
987		faith = 0;
988
989	/*
990	 * First look for an exact match.
991	 */
992	tmpinp = NULL;
993	head = &pcbinfo->ipi_hashbase[
994	    INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
995	    pcbinfo->ipi_hashmask)];
996	LIST_FOREACH(inp, head, inp_hash) {
997		/* XXX inp locking */
998		if ((inp->inp_vflag & INP_IPV6) == 0)
999			continue;
1000		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
1001		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
1002		    inp->inp_fport == fport &&
1003		    inp->inp_lport == lport) {
1004			/*
1005			 * XXX We should be able to directly return
1006			 * the inp here, without any checks.
1007			 * Well unless both bound with SO_REUSEPORT?
1008			 */
1009			if (prison_flag(inp->inp_cred, PR_IP6))
1010				return (inp);
1011			if (tmpinp == NULL)
1012				tmpinp = inp;
1013		}
1014	}
1015	if (tmpinp != NULL)
1016		return (tmpinp);
1017
1018	/*
1019	 * Then look for a wildcard match, if requested.
1020	 */
1021	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
1022		struct inpcb *local_wild = NULL, *local_exact = NULL;
1023		struct inpcb *jail_wild = NULL;
1024		int injail;
1025
1026		/*
1027		 * Order of socket selection - we always prefer jails.
1028		 *      1. jailed, non-wild.
1029		 *      2. jailed, wild.
1030		 *      3. non-jailed, non-wild.
1031		 *      4. non-jailed, wild.
1032		 */
1033		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1034		    0, pcbinfo->ipi_hashmask)];
1035		LIST_FOREACH(inp, head, inp_hash) {
1036			/* XXX inp locking */
1037			if ((inp->inp_vflag & INP_IPV6) == 0)
1038				continue;
1039
1040			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
1041			    inp->inp_lport != lport) {
1042				continue;
1043			}
1044
1045			/* XXX inp locking */
1046			if (faith && (inp->inp_flags & INP_FAITH) == 0)
1047				continue;
1048
1049			injail = prison_flag(inp->inp_cred, PR_IP6);
1050			if (injail) {
1051				if (prison_check_ip6(inp->inp_cred,
1052				    laddr) != 0)
1053					continue;
1054			} else {
1055				if (local_exact != NULL)
1056					continue;
1057			}
1058
1059			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
1060				if (injail)
1061					return (inp);
1062				else
1063					local_exact = inp;
1064			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1065				if (injail)
1066					jail_wild = inp;
1067				else
1068					local_wild = inp;
1069			}
1070		} /* LIST_FOREACH */
1071
1072		if (jail_wild != NULL)
1073			return (jail_wild);
1074		if (local_exact != NULL)
1075			return (local_exact);
1076		if (local_wild != NULL)
1077			return (local_wild);
1078	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
1079
1080	/*
1081	 * Not found.
1082	 */
1083	return (NULL);
1084}
1085
1086/*
1087 * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
1088 * hash list lock, and will return the inpcb locked (i.e., requires
1089 * INPLOOKUP_LOCKPCB).
1090 */
1091static struct inpcb *
1092in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1093    u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1094    struct ifnet *ifp)
1095{
1096	struct inpcb *inp;
1097
1098	INP_HASH_RLOCK(pcbinfo);
1099	inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
1100	    (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
1101	if (inp != NULL) {
1102		in_pcbref(inp);
1103		INP_HASH_RUNLOCK(pcbinfo);
1104		if (lookupflags & INPLOOKUP_WLOCKPCB) {
1105			INP_WLOCK(inp);
1106			if (in_pcbrele_wlocked(inp))
1107				return (NULL);
1108		} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
1109			INP_RLOCK(inp);
1110			if (in_pcbrele_rlocked(inp))
1111				return (NULL);
1112		} else
1113			panic("%s: locking bug", __func__);
1114	} else
1115		INP_HASH_RUNLOCK(pcbinfo);
1116	return (inp);
1117}
1118
1119/*
1120 * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
1121 * from which a pre-calculated hash value may be extracted.
1122 *
1123 * Possibly more of this logic should be in in6_pcbgroup.c.
1124 */
1125struct inpcb *
1126in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
1127    struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
1128{
1129#if defined(PCBGROUP)
1130	struct inpcbgroup *pcbgroup;
1131#endif
1132
1133	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1134	    ("%s: invalid lookup flags %d", __func__, lookupflags));
1135	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1136	    ("%s: LOCKPCB not set", __func__));
1137
1138#if defined(PCBGROUP)
1139	if (in_pcbgroup_enabled(pcbinfo)) {
1140		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
1141		    fport);
1142		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
1143		    laddr, lport, lookupflags, ifp));
1144	}
1145#endif
1146	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1147	    lookupflags, ifp));
1148}
1149
1150struct inpcb *
1151in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1152    u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1153    struct ifnet *ifp, struct mbuf *m)
1154{
1155#ifdef PCBGROUP
1156	struct inpcbgroup *pcbgroup;
1157#endif
1158
1159	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1160	    ("%s: invalid lookup flags %d", __func__, lookupflags));
1161	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1162	    ("%s: LOCKPCB not set", __func__));
1163
1164#ifdef PCBGROUP
1165	if (in_pcbgroup_enabled(pcbinfo)) {
1166		pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
1167		    m->m_pkthdr.flowid);
1168		if (pcbgroup != NULL)
1169			return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
1170			    fport, laddr, lport, lookupflags, ifp));
1171		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
1172		    fport);
1173		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
1174		    laddr, lport, lookupflags, ifp));
1175	}
1176#endif
1177	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1178	    lookupflags, ifp));
1179}
1180
1181void
1182init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
1183{
1184	struct ip6_hdr *ip;
1185
1186	ip = mtod(m, struct ip6_hdr *);
1187	bzero(sin6, sizeof(*sin6));
1188	sin6->sin6_len = sizeof(*sin6);
1189	sin6->sin6_family = AF_INET6;
1190	sin6->sin6_addr = ip->ip6_src;
1191
1192	(void)sa6_recoverscope(sin6); /* XXX: should catch errors... */
1193
1194	return;
1195}
1196