1/*	$OpenBSD: in6_pcb.c,v 1.144 2024/04/12 16:07:09 bluhm Exp $	*/
2
3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
34 *
35 * NRL grants permission for redistribution and use in source and binary
36 * forms, with or without modification, of the software and documentation
37 * created at NRL provided that the following conditions are met:
38 *
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgements:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 *	This product includes software developed at the Information
49 *	Technology Division, US Naval Research Laboratory.
50 * 4. Neither the name of the NRL nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 *
66 * The views and conclusions contained in the software and documentation
67 * are those of the authors and should not be interpreted as representing
68 * official policies, either expressed or implied, of the US Naval
69 * Research Laboratory (NRL).
70 */
71
72/*
73 * Copyright (c) 1982, 1986, 1990, 1993, 1995
74 *	Regents of the University of California.  All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 *    notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 *    notice, this list of conditions and the following disclaimer in the
83 *    documentation and/or other materials provided with the distribution.
84 * 3. Neither the name of the University nor the names of its contributors
85 *    may be used to endorse or promote products derived from this software
86 *    without specific prior written permission.
87 *
88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
91 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
98 * SUCH DAMAGE.
99 *
100 */
101
102#include "pf.h"
103#include "stoeplitz.h"
104
105#include <sys/param.h>
106#include <sys/systm.h>
107#include <sys/mbuf.h>
108#include <sys/protosw.h>
109#include <sys/socket.h>
110#include <sys/socketvar.h>
111
112#include <net/if.h>
113#include <net/if_var.h>
114#include <net/pfvar.h>
115
116#include <netinet/in.h>
117#include <netinet6/in6_var.h>
118#include <netinet/ip.h>
119#include <netinet/ip_var.h>
120#include <netinet6/ip6_var.h>
121#include <netinet/in_pcb.h>
122
123#if NSTOEPLITZ > 0
124#include <net/toeplitz.h>
125#endif
126
127const struct in6_addr zeroin6_addr;
128
129struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int,
130    const struct in6_addr *, u_short, const struct in6_addr *, u_short);
131
132struct inpcb * in6_pcblookup_lock(struct inpcbtable *, const struct in6_addr *,
133    u_int, const struct in6_addr *, u_int, u_int, int);
134
135uint64_t
136in6_pcbhash(struct inpcbtable *table, u_int rdomain,
137    const struct in6_addr *faddr, u_short fport,
138    const struct in6_addr *laddr, u_short lport)
139{
140	SIPHASH_CTX ctx;
141	u_int32_t nrdom = htonl(rdomain);
142
143	SipHash24_Init(&ctx, &table->inpt_key);
144	SipHash24_Update(&ctx, &nrdom, sizeof(nrdom));
145	SipHash24_Update(&ctx, faddr, sizeof(*faddr));
146	SipHash24_Update(&ctx, &fport, sizeof(fport));
147	SipHash24_Update(&ctx, laddr, sizeof(*laddr));
148	SipHash24_Update(&ctx, &lport, sizeof(lport));
149	return SipHash24_End(&ctx);
150}
151
152int
153in6_pcbaddrisavail_lock(const struct inpcb *inp, struct sockaddr_in6 *sin6,
154    int wild, struct proc *p, int lock)
155{
156	struct socket *so = inp->inp_socket;
157	struct inpcbtable *table = inp->inp_table;
158	u_short lport = sin6->sin6_port;
159	int reuseport = (so->so_options & SO_REUSEPORT);
160
161	wild |= INPLOOKUP_IPV6;
162	/* KAME hack: embed scopeid */
163	if (in6_embedscope(&sin6->sin6_addr, sin6,
164	    inp->inp_outputopts6, inp->inp_moptions6) != 0)
165		return (EINVAL);
166	/* this must be cleared for ifa_ifwithaddr() */
167	sin6->sin6_scope_id = 0;
168	/* reject IPv4 mapped address, we have no support for it */
169	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
170		return (EADDRNOTAVAIL);
171
172	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
173		/*
174		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
175		 * allow complete duplication of binding if
176		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
177		 * and a multicast address is bound on both
178		 * new and duplicated sockets.
179		 */
180		if (so->so_options & (SO_REUSEADDR|SO_REUSEPORT))
181			reuseport = SO_REUSEADDR | SO_REUSEPORT;
182	} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
183		struct ifaddr *ifa = NULL;
184
185		sin6->sin6_port = 0;  /*
186				       * Yechhhh, because of upcoming
187				       * call to ifa_ifwithaddr(), which
188				       * does bcmp's over the PORTS as
189				       * well.  (What about flow?)
190				       */
191		sin6->sin6_flowinfo = 0;
192		if (!(so->so_options & SO_BINDANY) &&
193		    (ifa = ifa_ifwithaddr(sin6tosa(sin6),
194		    inp->inp_rtableid)) == NULL)
195			return (EADDRNOTAVAIL);
196		sin6->sin6_port = lport;
197
198		/*
199		 * bind to an anycast address might accidentally
200		 * cause sending a packet with an anycast source
201		 * address, so we forbid it.
202		 *
203		 * We should allow to bind to a deprecated address,
204		 * since the application dare to use it.
205		 * But, can we assume that they are careful enough
206		 * to check if the address is deprecated or not?
207		 * Maybe, as a safeguard, we should have a setsockopt
208		 * flag to control the bind(2) behavior against
209		 * deprecated addresses (default: forbid bind(2)).
210		 */
211		if (ifa && ifatoia6(ifa)->ia6_flags & (IN6_IFF_ANYCAST|
212		    IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED|IN6_IFF_DETACHED))
213			return (EADDRNOTAVAIL);
214	}
215	if (lport) {
216		struct inpcb *t;
217		int error = 0;
218
219		if (so->so_euid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
220			t = in_pcblookup_local_lock(table, &sin6->sin6_addr,
221			    lport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6,
222			    inp->inp_rtableid, lock);
223			if (t && (so->so_euid != t->inp_socket->so_euid))
224				error = EADDRINUSE;
225			if (lock == IN_PCBLOCK_GRAB)
226				in_pcbunref(t);
227			if (error)
228				return (error);
229		}
230		t = in_pcblookup_local_lock(table, &sin6->sin6_addr, lport,
231		    wild, inp->inp_rtableid, lock);
232		if (t && (reuseport & t->inp_socket->so_options) == 0)
233			error = EADDRINUSE;
234		if (lock == IN_PCBLOCK_GRAB)
235			in_pcbunref(t);
236		if (error)
237			return (error);
238	}
239	return (0);
240}
241
242int
243in6_pcbaddrisavail(const struct inpcb *inp, struct sockaddr_in6 *sin6,
244    int wild, struct proc *p)
245{
246	return in6_pcbaddrisavail_lock(inp, sin6, wild, p, IN_PCBLOCK_GRAB);
247}
248
249/*
250 * Connect from a socket to a specified address.
251 * Both address and port must be specified in argument sin6.
252 * Eventually, flow labels will have to be dealt with here, as well.
253 *
254 * If don't have a local address for this socket yet,
255 * then pick one.
256 */
257int
258in6_pcbconnect(struct inpcb *inp, struct mbuf *nam)
259{
260	struct inpcbtable *table = inp->inp_table;
261	const struct in6_addr *in6a;
262	struct sockaddr_in6 *sin6;
263	struct inpcb *t;
264	int error;
265	struct sockaddr_in6 tmp;
266
267	KASSERT(ISSET(inp->inp_flags, INP_IPV6));
268
269	if ((error = in6_nam2sin6(nam, &sin6)))
270		return (error);
271	if (sin6->sin6_port == 0)
272		return (EADDRNOTAVAIL);
273	/* reject IPv4 mapped address, we have no support for it */
274	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
275		return (EADDRNOTAVAIL);
276
277	/* protect *sin6 from overwrites */
278	tmp = *sin6;
279	sin6 = &tmp;
280
281	/* KAME hack: embed scopeid */
282	if (in6_embedscope(&sin6->sin6_addr, sin6,
283	    inp->inp_outputopts6, inp->inp_moptions6) != 0)
284		return (EINVAL);
285	/* this must be cleared for ifa_ifwithaddr() */
286	sin6->sin6_scope_id = 0;
287
288	/* Source address selection. */
289	/*
290	 * XXX: in6_selectsrc might replace the bound local address
291	 * with the address specified by setsockopt(IPV6_PKTINFO).
292	 * Is it the intended behavior?
293	 */
294	error = in6_pcbselsrc(&in6a, sin6, inp, inp->inp_outputopts6);
295	if (error)
296		return (error);
297
298	inp->inp_ipv6.ip6_hlim = (u_int8_t)in6_selecthlim(inp);
299
300	/* keep lookup, modification, and rehash in sync */
301	mtx_enter(&table->inpt_mtx);
302
303	t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
304	    sin6->sin6_port,
305	    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) ? in6a : &inp->inp_laddr6,
306	    inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD);
307	if (t != NULL) {
308		mtx_leave(&table->inpt_mtx);
309		return (EADDRINUSE);
310	}
311
312	KASSERT(IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) || inp->inp_lport);
313
314	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
315		if (inp->inp_lport == 0) {
316			error = in_pcbbind_locked(inp, NULL, in6a, curproc);
317			if (error) {
318				mtx_leave(&table->inpt_mtx);
319				return (error);
320			}
321			t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
322			    sin6->sin6_port, in6a, inp->inp_lport,
323			    inp->inp_rtableid, IN_PCBLOCK_HOLD);
324			if (t != NULL) {
325				inp->inp_lport = 0;
326				mtx_leave(&table->inpt_mtx);
327				return (EADDRINUSE);
328			}
329		}
330		inp->inp_laddr6 = *in6a;
331	}
332	inp->inp_faddr6 = sin6->sin6_addr;
333	inp->inp_fport = sin6->sin6_port;
334	in_pcbrehash(inp);
335
336	mtx_leave(&table->inpt_mtx);
337
338	inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
339	if (ip6_auto_flowlabel)
340		inp->inp_flowinfo |=
341		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
342#if NSTOEPLITZ > 0
343	inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
344	    &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
345#endif
346	return (0);
347}
348
349/*
350 * Get the local address/port, and put it in a sockaddr_in6.
351 * This services the getsockname(2) call.
352 */
353void
354in6_setsockaddr(struct inpcb *inp, struct mbuf *nam)
355{
356	struct sockaddr_in6 *sin6;
357
358	nam->m_len = sizeof(struct sockaddr_in6);
359	sin6 = mtod(nam,struct sockaddr_in6 *);
360
361	bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
362	sin6->sin6_family = AF_INET6;
363	sin6->sin6_len = sizeof(struct sockaddr_in6);
364	sin6->sin6_port = inp->inp_lport;
365	sin6->sin6_addr = inp->inp_laddr6;
366	/* KAME hack: recover scopeid */
367	in6_recoverscope(sin6, &inp->inp_laddr6);
368}
369
370/*
371 * Get the foreign address/port, and put it in a sockaddr_in6.
372 * This services the getpeername(2) call.
373 */
374void
375in6_setpeeraddr(struct inpcb *inp, struct mbuf *nam)
376{
377	struct sockaddr_in6 *sin6;
378
379	nam->m_len = sizeof(struct sockaddr_in6);
380	sin6 = mtod(nam,struct sockaddr_in6 *);
381
382	bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
383	sin6->sin6_family = AF_INET6;
384	sin6->sin6_len = sizeof(struct sockaddr_in6);
385	sin6->sin6_port = inp->inp_fport;
386	sin6->sin6_addr = inp->inp_faddr6;
387	/* KAME hack: recover scopeid */
388	in6_recoverscope(sin6, &inp->inp_faddr6);
389}
390
391int
392in6_sockaddr(struct socket *so, struct mbuf *nam)
393{
394	struct inpcb *inp;
395
396	inp = sotoinpcb(so);
397	in6_setsockaddr(inp, nam);
398
399	return (0);
400}
401
402int
403in6_peeraddr(struct socket *so, struct mbuf *nam)
404{
405	struct inpcb *inp;
406
407	inp = sotoinpcb(so);
408	in6_setpeeraddr(inp, nam);
409
410	return (0);
411}
412
413/*
414 * Pass some notification to all connections of a protocol
415 * associated with address dst.  The local address and/or port numbers
416 * may be specified to limit the search.  The "usual action" will be
417 * taken, depending on the ctlinput cmd.  The caller must filter any
418 * cmds that are uninteresting (e.g., no error in the map).
419 * Call the protocol specific routine (if any) to report
420 * any errors for each matching socket.
421 *
422 * Also perform input-side security policy check
423 *    once PCB to be notified has been located.
424 */
425void
426in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst,
427    uint fport_arg, const struct sockaddr_in6 *src, uint lport_arg,
428    u_int rtable, int cmd, void *cmdarg, void (*notify)(struct inpcb *, int))
429{
430	SIMPLEQ_HEAD(, inpcb) inpcblist;
431	struct inpcb *inp;
432	u_short fport = fport_arg, lport = lport_arg;
433	struct sockaddr_in6 sa6_src;
434	int errno;
435	u_int32_t flowinfo;
436	u_int rdomain;
437
438	if ((unsigned)cmd >= PRC_NCMDS)
439		return;
440
441	if (IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr))
442		return;
443	if (IN6_IS_ADDR_V4MAPPED(&dst->sin6_addr)) {
444#ifdef DIAGNOSTIC
445		printf("%s: Huh?  Thought we never got "
446		       "called with mapped!\n", __func__);
447#endif
448		return;
449	}
450
451	/*
452	 * note that src can be NULL when we get notify by local fragmentation.
453	 */
454	sa6_src = (src == NULL) ? sa6_any : *src;
455	flowinfo = sa6_src.sin6_flowinfo;
456
457	/*
458	 * Redirects go to all references to the destination,
459	 * and use in_rtchange to invalidate the route cache.
460	 * Dead host indications: also use in_rtchange to invalidate
461	 * the cache, and deliver the error to all the sockets.
462	 * Otherwise, if we have knowledge of the local port and address,
463	 * deliver only to that socket.
464	 */
465	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
466		fport = 0;
467		lport = 0;
468		sa6_src.sin6_addr = in6addr_any;
469
470		if (cmd != PRC_HOSTDEAD)
471			notify = in_rtchange;
472	}
473	errno = inet6ctlerrmap[cmd];
474	if (notify == NULL)
475		return;
476
477	SIMPLEQ_INIT(&inpcblist);
478	rdomain = rtable_l2(rtable);
479	rw_enter_write(&table->inpt_notify);
480	mtx_enter(&table->inpt_mtx);
481	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
482		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
483
484		/*
485		 * Under the following condition, notify of redirects
486		 * to the pcb, without making address matches against inpcb.
487		 * - redirect notification is arrived.
488		 * - the inpcb is unconnected.
489		 * - the inpcb is caching !RTF_HOST routing entry.
490		 * - the ICMPv6 notification is from the gateway cached in the
491		 *   inpcb.  i.e. ICMPv6 notification is from nexthop gateway
492		 *   the inpcb used very recently.
493		 *
494		 * This is to improve interaction between netbsd/openbsd
495		 * redirect handling code, and inpcb route cache code.
496		 * without the clause, !RTF_HOST routing entry (which carries
497		 * gateway used by inpcb right before the ICMPv6 redirect)
498		 * will be cached forever in unconnected inpcb.
499		 *
500		 * There still is a question regarding to what is TRT:
501		 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be
502		 *   generated on packet output.  inpcb will always cache
503		 *   RTF_HOST routing entry so there's no need for the clause
504		 *   (ICMPv6 redirect will update RTF_HOST routing entry,
505		 *   and inpcb is caching it already).
506		 *   However, bsdi/freebsd are vulnerable to local DoS attacks
507		 *   due to the cloned routing entries.
508		 * - Specwise, "destination cache" is mentioned in RFC2461.
509		 *   Jinmei says that it implies bsdi/freebsd behavior, itojun
510		 *   is not really convinced.
511		 * - Having hiwat/lowat on # of cloned host route (redirect/
512		 *   pmtud) may be a good idea.  netbsd/openbsd has it.  see
513		 *   icmp6_mtudisc_update().
514		 */
515		if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) &&
516		    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) &&
517		    inp->inp_route.ro_rt &&
518		    !(inp->inp_route.ro_rt->rt_flags & RTF_HOST) &&
519		    IN6_ARE_ADDR_EQUAL(&inp->inp_route.ro_dstsin6.sin6_addr,
520		    &dst->sin6_addr)) {
521			goto do_notify;
522		}
523
524		/*
525		 * Detect if we should notify the error. If no source and
526		 * destination ports are specified, but non-zero flowinfo and
527		 * local address match, notify the error. This is the case
528		 * when the error is delivered with an encrypted buffer
529		 * by ESP. Otherwise, just compare addresses and ports
530		 * as usual.
531		 */
532		if (lport == 0 && fport == 0 && flowinfo &&
533		    flowinfo == (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK) &&
534		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, &sa6_src.sin6_addr))
535			goto do_notify;
536		else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
537					     &dst->sin6_addr) ||
538			 rtable_l2(inp->inp_rtableid) != rdomain ||
539			 (lport && inp->inp_lport != lport) ||
540			 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
541			  !IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6,
542					      &sa6_src.sin6_addr)) ||
543			 (fport && inp->inp_fport != fport)) {
544			continue;
545		}
546	  do_notify:
547		in_pcbref(inp);
548		SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
549	}
550	mtx_leave(&table->inpt_mtx);
551
552	while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
553		SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
554		(*notify)(inp, errno);
555		in_pcbunref(inp);
556	}
557	rw_exit_write(&table->inpt_notify);
558}
559
560struct rtentry *
561in6_pcbrtentry(struct inpcb *inp)
562{
563	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
564		return (NULL);
565	return (route6_mpath(&inp->inp_route, &inp->inp_faddr6,
566	    &inp->inp_laddr6, inp->inp_rtableid));
567}
568
569struct inpcb *
570in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain,
571    const struct in6_addr *faddr, u_short fport,
572    const struct in6_addr *laddr, u_short lport)
573{
574	struct inpcbhead *head;
575	struct inpcb *inp;
576
577	NET_ASSERT_LOCKED();
578	MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
579
580	head = &table->inpt_hashtbl[hash & table->inpt_mask];
581	LIST_FOREACH(inp, head, inp_hash) {
582		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
583
584		if (inp->inp_fport == fport && inp->inp_lport == lport &&
585		    IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
586		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
587		    rtable_l2(inp->inp_rtableid) == rdomain) {
588			break;
589		}
590	}
591	if (inp != NULL) {
592		/*
593		 * Move this PCB to the head of hash chain so that
594		 * repeated accesses are quicker.  This is analogous to
595		 * the historic single-entry PCB cache.
596		 */
597		if (inp != LIST_FIRST(head)) {
598			LIST_REMOVE(inp, inp_hash);
599			LIST_INSERT_HEAD(head, inp, inp_hash);
600		}
601	}
602	return (inp);
603}
604
605struct inpcb *
606in6_pcblookup_lock(struct inpcbtable *table, const struct in6_addr *faddr,
607    u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable,
608    int lock)
609{
610	struct inpcb *inp;
611	uint64_t hash;
612	u_int rdomain;
613
614	rdomain = rtable_l2(rtable);
615	hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport);
616
617	if (lock == IN_PCBLOCK_GRAB) {
618		mtx_enter(&table->inpt_mtx);
619	} else {
620		KASSERT(lock == IN_PCBLOCK_HOLD);
621		MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
622	}
623	inp = in6_pcbhash_lookup(table, hash, rdomain,
624	    faddr, fport, laddr, lport);
625	if (lock == IN_PCBLOCK_GRAB) {
626		in_pcbref(inp);
627		mtx_leave(&table->inpt_mtx);
628	}
629
630#ifdef DIAGNOSTIC
631	if (inp == NULL && in_pcbnotifymiss) {
632		printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n",
633		    __func__, ntohs(fport), ntohs(lport), rdomain);
634	}
635#endif
636	return (inp);
637}
638
639struct inpcb *
640in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr,
641    u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable)
642{
643	return in6_pcblookup_lock(table, faddr, fport, laddr, lport, rtable,
644	    IN_PCBLOCK_GRAB);
645}
646
647struct inpcb *
648in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
649    u_int lport, struct mbuf *m, u_int rtable)
650{
651	const struct in6_addr *key1, *key2;
652	struct inpcb *inp;
653	uint64_t hash;
654	u_int rdomain;
655
656	key1 = laddr;
657	key2 = &zeroin6_addr;
658#if NPF > 0
659	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
660		struct pf_divert *divert;
661
662		divert = pf_find_divert(m);
663		KASSERT(divert != NULL);
664		switch (divert->type) {
665		case PF_DIVERT_TO:
666			key1 = key2 = &divert->addr.v6;
667			lport = divert->port;
668			break;
669		case PF_DIVERT_REPLY:
670			return (NULL);
671		default:
672			panic("%s: unknown divert type %d, mbuf %p, divert %p",
673			    __func__, divert->type, m, divert);
674		}
675	} else if (m && m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST) {
676		/*
677		 * Redirected connections should not be treated the same
678		 * as connections directed to ::1 since localhost
679		 * can only be accessed from the host itself.
680		 */
681		key1 = &zeroin6_addr;
682		key2 = laddr;
683	}
684#endif
685
686	rdomain = rtable_l2(rtable);
687	hash = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport);
688
689	mtx_enter(&table->inpt_mtx);
690	inp = in6_pcbhash_lookup(table, hash, rdomain,
691	    &zeroin6_addr, 0, key1, lport);
692	if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
693		hash = in6_pcbhash(table, rdomain,
694		    &zeroin6_addr, 0, key2, lport);
695		inp = in6_pcbhash_lookup(table, hash, rdomain,
696		    &zeroin6_addr, 0, key2, lport);
697	}
698	in_pcbref(inp);
699	mtx_leave(&table->inpt_mtx);
700
701#ifdef DIAGNOSTIC
702	if (inp == NULL && in_pcbnotifymiss) {
703		printf("%s: laddr= lport=%d rdom=%u\n",
704		    __func__, ntohs(lport), rdomain);
705	}
706#endif
707	return (inp);
708}
709