Deleted Added
full compact
in_pcb.c (215317) in_pcb.c (215701)
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2007-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
32 */
33
34#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2007-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/netinet/in_pcb.c 215317 2010-11-14 20:38:11Z dim $");
35__FBSDID("$FreeBSD: head/sys/netinet/in_pcb.c 215701 2010-11-22 19:32:54Z dim $");
36
37#include "opt_ddb.h"
38#include "opt_ipsec.h"
39#include "opt_inet6.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/domain.h>
46#include <sys/protosw.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/jail.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54
55#ifdef DDB
56#include <ddb/ddb.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <net/if.h>
62#include <net/if_types.h>
63#include <net/route.h>
64#include <net/vnet.h>
65
66#include <netinet/in.h>
67#include <netinet/in_pcb.h>
68#include <netinet/in_var.h>
69#include <netinet/ip_var.h>
70#include <netinet/tcp_var.h>
71#include <netinet/udp.h>
72#include <netinet/udp_var.h>
73#ifdef INET6
74#include <netinet/ip6.h>
75#include <netinet6/ip6_var.h>
76#endif /* INET6 */
77
78
79#ifdef IPSEC
80#include <netipsec/ipsec.h>
81#include <netipsec/key.h>
82#endif /* IPSEC */
83
84#include <security/mac/mac_framework.h>
85
86/*
87 * These configure the range of local port addresses assigned to
88 * "unspecified" outgoing connections/packets/whatever.
89 */
90VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */
91VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */
92VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */
93VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */
94VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */
95VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */
96
97/*
98 * Reserved ports accessible only to root. There are significant
99 * security considerations that must be accounted for when changing these,
100 * but the security benefits can be great. Please be careful.
101 */
102VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */
103VNET_DEFINE(int, ipport_reservedlow);
104
105/* Variables dealing with random ephemeral port allocation. */
106VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */
107VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
108VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
109VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
110VNET_DEFINE(int, ipport_tcpallocs);
36
37#include "opt_ddb.h"
38#include "opt_ipsec.h"
39#include "opt_inet6.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/domain.h>
46#include <sys/protosw.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/jail.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54
55#ifdef DDB
56#include <ddb/ddb.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <net/if.h>
62#include <net/if_types.h>
63#include <net/route.h>
64#include <net/vnet.h>
65
66#include <netinet/in.h>
67#include <netinet/in_pcb.h>
68#include <netinet/in_var.h>
69#include <netinet/ip_var.h>
70#include <netinet/tcp_var.h>
71#include <netinet/udp.h>
72#include <netinet/udp_var.h>
73#ifdef INET6
74#include <netinet/ip6.h>
75#include <netinet6/ip6_var.h>
76#endif /* INET6 */
77
78
79#ifdef IPSEC
80#include <netipsec/ipsec.h>
81#include <netipsec/key.h>
82#endif /* IPSEC */
83
84#include <security/mac/mac_framework.h>
85
86/*
87 * These configure the range of local port addresses assigned to
88 * "unspecified" outgoing connections/packets/whatever.
89 */
90VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */
91VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */
92VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */
93VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */
94VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */
95VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */
96
97/*
98 * Reserved ports accessible only to root. There are significant
99 * security considerations that must be accounted for when changing these,
100 * but the security benefits can be great. Please be careful.
101 */
102VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */
103VNET_DEFINE(int, ipport_reservedlow);
104
105/* Variables dealing with random ephemeral port allocation. */
106VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */
107VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
108VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
109VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
110VNET_DEFINE(int, ipport_tcpallocs);
111STATIC_VNET_DEFINE(int, ipport_tcplastcount);
111static VNET_DEFINE(int, ipport_tcplastcount);
112
113#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
114
115#define RANGECHK(var, min, max) \
116 if ((var) < (min)) { (var) = (min); } \
117 else if ((var) > (max)) { (var) = (max); }
118
119static void in_pcbremlists(struct inpcb *inp);
120
121static int
122sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
123{
124 int error;
125
126#ifdef VIMAGE
127 error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
128#else
129 error = sysctl_handle_int(oidp, arg1, arg2, req);
130#endif
131 if (error == 0) {
132 RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
133 RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
134 RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
135 RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
136 RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
137 RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
138 }
139 return (error);
140}
141
142#undef RANGECHK
143
144SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
145
146SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
147 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
148 &sysctl_net_ipport_check, "I", "");
149SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
150 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
151 &sysctl_net_ipport_check, "I", "");
152SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
153 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
154 &sysctl_net_ipport_check, "I", "");
155SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
156 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
157 &sysctl_net_ipport_check, "I", "");
158SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
159 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
160 &sysctl_net_ipport_check, "I", "");
161SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
162 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
163 &sysctl_net_ipport_check, "I", "");
164SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
165 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
166SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
167 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
168SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
169 &VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
170SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
171 &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
172 "allocations before switching to a sequental one");
173SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
174 &VNET_NAME(ipport_randomtime), 0,
175 "Minimum time to keep sequental port "
176 "allocation before switching to a random one");
177
178/*
179 * in_pcb.c: manage the Protocol Control Blocks.
180 *
181 * NOTE: It is assumed that most of these functions will be called with
182 * the pcbinfo lock held, and often, the inpcb lock held, as these utility
183 * functions often modify hash chains or addresses in pcbs.
184 */
185
186/*
187 * Initialize an inpcbinfo -- we should be able to reduce the number of
188 * arguments in time.
189 */
190void
191in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
192 struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
193 char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
194 uint32_t inpcbzone_flags)
195{
196
197 INP_INFO_LOCK_INIT(pcbinfo, name);
198#ifdef VIMAGE
199 pcbinfo->ipi_vnet = curvnet;
200#endif
201 pcbinfo->ipi_listhead = listhead;
202 LIST_INIT(pcbinfo->ipi_listhead);
203 pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
204 &pcbinfo->ipi_hashmask);
205 pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
206 &pcbinfo->ipi_porthashmask);
207 pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
208 NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
209 inpcbzone_flags);
210 uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
211}
212
213/*
214 * Destroy an inpcbinfo.
215 */
216void
217in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
218{
219
220 hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
221 hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
222 pcbinfo->ipi_porthashmask);
223 uma_zdestroy(pcbinfo->ipi_zone);
224 INP_INFO_LOCK_DESTROY(pcbinfo);
225}
226
227/*
228 * Allocate a PCB and associate it with the socket.
229 * On success return with the PCB locked.
230 */
231int
232in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
233{
234 struct inpcb *inp;
235 int error;
236
237 INP_INFO_WLOCK_ASSERT(pcbinfo);
238 error = 0;
239 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
240 if (inp == NULL)
241 return (ENOBUFS);
242 bzero(inp, inp_zero_size);
243 inp->inp_pcbinfo = pcbinfo;
244 inp->inp_socket = so;
245 inp->inp_cred = crhold(so->so_cred);
246 inp->inp_inc.inc_fibnum = so->so_fibnum;
247#ifdef MAC
248 error = mac_inpcb_init(inp, M_NOWAIT);
249 if (error != 0)
250 goto out;
251 mac_inpcb_create(so, inp);
252#endif
253#ifdef IPSEC
254 error = ipsec_init_policy(so, &inp->inp_sp);
255 if (error != 0) {
256#ifdef MAC
257 mac_inpcb_destroy(inp);
258#endif
259 goto out;
260 }
261#endif /*IPSEC*/
262#ifdef INET6
263 if (INP_SOCKAF(so) == AF_INET6) {
264 inp->inp_vflag |= INP_IPV6PROTO;
265 if (V_ip6_v6only)
266 inp->inp_flags |= IN6P_IPV6_V6ONLY;
267 }
268#endif
269 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
270 pcbinfo->ipi_count++;
271 so->so_pcb = (caddr_t)inp;
272#ifdef INET6
273 if (V_ip6_auto_flowlabel)
274 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
275#endif
276 INP_WLOCK(inp);
277 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
278 inp->inp_refcount = 1; /* Reference from the inpcbinfo */
279#if defined(IPSEC) || defined(MAC)
280out:
281 if (error != 0) {
282 crfree(inp->inp_cred);
283 uma_zfree(pcbinfo->ipi_zone, inp);
284 }
285#endif
286 return (error);
287}
288
289int
290in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
291{
292 int anonport, error;
293
294 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
295 INP_WLOCK_ASSERT(inp);
296
297 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
298 return (EINVAL);
299 anonport = inp->inp_lport == 0 && (nam == NULL ||
300 ((struct sockaddr_in *)nam)->sin_port == 0);
301 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
302 &inp->inp_lport, cred);
303 if (error)
304 return (error);
305 if (in_pcbinshash(inp) != 0) {
306 inp->inp_laddr.s_addr = INADDR_ANY;
307 inp->inp_lport = 0;
308 return (EAGAIN);
309 }
310 if (anonport)
311 inp->inp_flags |= INP_ANONPORT;
312 return (0);
313}
314
315/*
316 * Set up a bind operation on a PCB, performing port allocation
317 * as required, but do not actually modify the PCB. Callers can
318 * either complete the bind by setting inp_laddr/inp_lport and
319 * calling in_pcbinshash(), or they can just use the resulting
320 * port and address to authorise the sending of a once-off packet.
321 *
322 * On error, the values of *laddrp and *lportp are not changed.
323 */
324int
325in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
326 u_short *lportp, struct ucred *cred)
327{
328 struct socket *so = inp->inp_socket;
329 unsigned short *lastport;
330 struct sockaddr_in *sin;
331 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
332 struct in_addr laddr;
333 u_short lport = 0;
334 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
335 int error;
336 int dorandom;
337
338 /*
339 * Because no actual state changes occur here, a global write lock on
340 * the pcbinfo isn't required.
341 */
342 INP_INFO_LOCK_ASSERT(pcbinfo);
343 INP_LOCK_ASSERT(inp);
344
345 if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
346 return (EADDRNOTAVAIL);
347 laddr.s_addr = *laddrp;
348 if (nam != NULL && laddr.s_addr != INADDR_ANY)
349 return (EINVAL);
350 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
351 wild = INPLOOKUP_WILDCARD;
352 if (nam == NULL) {
353 if ((error = prison_local_ip4(cred, &laddr)) != 0)
354 return (error);
355 } else {
356 sin = (struct sockaddr_in *)nam;
357 if (nam->sa_len != sizeof (*sin))
358 return (EINVAL);
359#ifdef notdef
360 /*
361 * We should check the family, but old programs
362 * incorrectly fail to initialize it.
363 */
364 if (sin->sin_family != AF_INET)
365 return (EAFNOSUPPORT);
366#endif
367 error = prison_local_ip4(cred, &sin->sin_addr);
368 if (error)
369 return (error);
370 if (sin->sin_port != *lportp) {
371 /* Don't allow the port to change. */
372 if (*lportp != 0)
373 return (EINVAL);
374 lport = sin->sin_port;
375 }
376 /* NB: lport is left as 0 if the port isn't being changed. */
377 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
378 /*
379 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
380 * allow complete duplication of binding if
381 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
382 * and a multicast address is bound on both
383 * new and duplicated sockets.
384 */
385 if (so->so_options & SO_REUSEADDR)
386 reuseport = SO_REUSEADDR|SO_REUSEPORT;
387 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
388 sin->sin_port = 0; /* yech... */
389 bzero(&sin->sin_zero, sizeof(sin->sin_zero));
390 /*
391 * Is the address a local IP address?
392 * If INP_BINDANY is set, then the socket may be bound
393 * to any endpoint address, local or not.
394 */
395 if ((inp->inp_flags & INP_BINDANY) == 0 &&
396 ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
397 return (EADDRNOTAVAIL);
398 }
399 laddr = sin->sin_addr;
400 if (lport) {
401 struct inpcb *t;
402 struct tcptw *tw;
403
404 /* GROSS */
405 if (ntohs(lport) <= V_ipport_reservedhigh &&
406 ntohs(lport) >= V_ipport_reservedlow &&
407 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
408 0))
409 return (EACCES);
410 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
411 priv_check_cred(inp->inp_cred,
412 PRIV_NETINET_REUSEPORT, 0) != 0) {
413 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
414 lport, INPLOOKUP_WILDCARD, cred);
415 /*
416 * XXX
417 * This entire block sorely needs a rewrite.
418 */
419 if (t &&
420 ((t->inp_flags & INP_TIMEWAIT) == 0) &&
421 (so->so_type != SOCK_STREAM ||
422 ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
423 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
424 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
425 (t->inp_socket->so_options &
426 SO_REUSEPORT) == 0) &&
427 (inp->inp_cred->cr_uid !=
428 t->inp_cred->cr_uid))
429 return (EADDRINUSE);
430 }
431 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
432 lport, wild, cred);
433 if (t && (t->inp_flags & INP_TIMEWAIT)) {
434 /*
435 * XXXRW: If an incpb has had its timewait
436 * state recycled, we treat the address as
437 * being in use (for now). This is better
438 * than a panic, but not desirable.
439 */
440 tw = intotw(inp);
441 if (tw == NULL ||
442 (reuseport & tw->tw_so_options) == 0)
443 return (EADDRINUSE);
444 } else if (t &&
445 (reuseport & t->inp_socket->so_options) == 0) {
446#ifdef INET6
447 if (ntohl(sin->sin_addr.s_addr) !=
448 INADDR_ANY ||
449 ntohl(t->inp_laddr.s_addr) !=
450 INADDR_ANY ||
451 INP_SOCKAF(so) ==
452 INP_SOCKAF(t->inp_socket))
453#endif
454 return (EADDRINUSE);
455 }
456 }
457 }
458 if (*lportp != 0)
459 lport = *lportp;
460 if (lport == 0) {
461 u_short first, last, aux;
462 int count;
463
464 if (inp->inp_flags & INP_HIGHPORT) {
465 first = V_ipport_hifirstauto; /* sysctl */
466 last = V_ipport_hilastauto;
467 lastport = &pcbinfo->ipi_lasthi;
468 } else if (inp->inp_flags & INP_LOWPORT) {
469 error = priv_check_cred(cred,
470 PRIV_NETINET_RESERVEDPORT, 0);
471 if (error)
472 return error;
473 first = V_ipport_lowfirstauto; /* 1023 */
474 last = V_ipport_lowlastauto; /* 600 */
475 lastport = &pcbinfo->ipi_lastlow;
476 } else {
477 first = V_ipport_firstauto; /* sysctl */
478 last = V_ipport_lastauto;
479 lastport = &pcbinfo->ipi_lastport;
480 }
481 /*
482 * For UDP, use random port allocation as long as the user
483 * allows it. For TCP (and as of yet unknown) connections,
484 * use random port allocation only if the user allows it AND
485 * ipport_tick() allows it.
486 */
487 if (V_ipport_randomized &&
488 (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
489 dorandom = 1;
490 else
491 dorandom = 0;
492 /*
493 * It makes no sense to do random port allocation if
494 * we have the only port available.
495 */
496 if (first == last)
497 dorandom = 0;
498 /* Make sure to not include UDP packets in the count. */
499 if (pcbinfo != &V_udbinfo)
500 V_ipport_tcpallocs++;
501 /*
502 * Instead of having two loops further down counting up or down
503 * make sure that first is always <= last and go with only one
504 * code path implementing all logic.
505 */
506 if (first > last) {
507 aux = first;
508 first = last;
509 last = aux;
510 }
511
512 if (dorandom)
513 *lastport = first +
514 (arc4random() % (last - first));
515
516 count = last - first;
517
518 do {
519 if (count-- < 0) /* completely used? */
520 return (EADDRNOTAVAIL);
521 ++*lastport;
522 if (*lastport < first || *lastport > last)
523 *lastport = first;
524 lport = htons(*lastport);
525 } while (in_pcblookup_local(pcbinfo, laddr,
526 lport, wild, cred));
527 }
528 *laddrp = laddr.s_addr;
529 *lportp = lport;
530 return (0);
531}
532
533/*
534 * Connect from a socket to a specified address.
535 * Both address and port must be specified in argument sin.
536 * If don't have a local address for this socket yet,
537 * then pick one.
538 */
539int
540in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
541{
542 u_short lport, fport;
543 in_addr_t laddr, faddr;
544 int anonport, error;
545
546 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
547 INP_WLOCK_ASSERT(inp);
548
549 lport = inp->inp_lport;
550 laddr = inp->inp_laddr.s_addr;
551 anonport = (lport == 0);
552 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
553 NULL, cred);
554 if (error)
555 return (error);
556
557 /* Do the initial binding of the local address if required. */
558 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
559 inp->inp_lport = lport;
560 inp->inp_laddr.s_addr = laddr;
561 if (in_pcbinshash(inp) != 0) {
562 inp->inp_laddr.s_addr = INADDR_ANY;
563 inp->inp_lport = 0;
564 return (EAGAIN);
565 }
566 }
567
568 /* Commit the remaining changes. */
569 inp->inp_lport = lport;
570 inp->inp_laddr.s_addr = laddr;
571 inp->inp_faddr.s_addr = faddr;
572 inp->inp_fport = fport;
573 in_pcbrehash(inp);
574
575 if (anonport)
576 inp->inp_flags |= INP_ANONPORT;
577 return (0);
578}
579
580/*
581 * Do proper source address selection on an unbound socket in case
582 * of connect. Take jails into account as well.
583 */
584static int
585in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
586 struct ucred *cred)
587{
588 struct ifaddr *ifa;
589 struct sockaddr *sa;
590 struct sockaddr_in *sin;
591 struct route sro;
592 int error;
593
594 KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
595
596 /*
597 * Bypass source address selection and use the primary jail IP
598 * if requested.
599 */
600 if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
601 return (0);
602
603 error = 0;
604 bzero(&sro, sizeof(sro));
605
606 sin = (struct sockaddr_in *)&sro.ro_dst;
607 sin->sin_family = AF_INET;
608 sin->sin_len = sizeof(struct sockaddr_in);
609 sin->sin_addr.s_addr = faddr->s_addr;
610
611 /*
612 * If route is known our src addr is taken from the i/f,
613 * else punt.
614 *
615 * Find out route to destination.
616 */
617 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
618 in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
619
620 /*
621 * If we found a route, use the address corresponding to
622 * the outgoing interface.
623 *
624 * Otherwise assume faddr is reachable on a directly connected
625 * network and try to find a corresponding interface to take
626 * the source address from.
627 */
628 if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
629 struct in_ifaddr *ia;
630 struct ifnet *ifp;
631
632 ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
633 if (ia == NULL)
634 ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0));
635 if (ia == NULL) {
636 error = ENETUNREACH;
637 goto done;
638 }
639
640 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
641 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
642 ifa_free(&ia->ia_ifa);
643 goto done;
644 }
645
646 ifp = ia->ia_ifp;
647 ifa_free(&ia->ia_ifa);
648 ia = NULL;
649 IF_ADDR_LOCK(ifp);
650 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
651
652 sa = ifa->ifa_addr;
653 if (sa->sa_family != AF_INET)
654 continue;
655 sin = (struct sockaddr_in *)sa;
656 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
657 ia = (struct in_ifaddr *)ifa;
658 break;
659 }
660 }
661 if (ia != NULL) {
662 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
663 IF_ADDR_UNLOCK(ifp);
664 goto done;
665 }
666 IF_ADDR_UNLOCK(ifp);
667
668 /* 3. As a last resort return the 'default' jail address. */
669 error = prison_get_ip4(cred, laddr);
670 goto done;
671 }
672
673 /*
674 * If the outgoing interface on the route found is not
675 * a loopback interface, use the address from that interface.
676 * In case of jails do those three steps:
677 * 1. check if the interface address belongs to the jail. If so use it.
678 * 2. check if we have any address on the outgoing interface
679 * belonging to this jail. If so use it.
680 * 3. as a last resort return the 'default' jail address.
681 */
682 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
683 struct in_ifaddr *ia;
684 struct ifnet *ifp;
685
686 /* If not jailed, use the default returned. */
687 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
688 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
689 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
690 goto done;
691 }
692
693 /* Jailed. */
694 /* 1. Check if the iface address belongs to the jail. */
695 sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
696 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
697 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
698 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
699 goto done;
700 }
701
702 /*
703 * 2. Check if we have any address on the outgoing interface
704 * belonging to this jail.
705 */
706 ia = NULL;
707 ifp = sro.ro_rt->rt_ifp;
708 IF_ADDR_LOCK(ifp);
709 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
710 sa = ifa->ifa_addr;
711 if (sa->sa_family != AF_INET)
712 continue;
713 sin = (struct sockaddr_in *)sa;
714 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
715 ia = (struct in_ifaddr *)ifa;
716 break;
717 }
718 }
719 if (ia != NULL) {
720 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
721 IF_ADDR_UNLOCK(ifp);
722 goto done;
723 }
724 IF_ADDR_UNLOCK(ifp);
725
726 /* 3. As a last resort return the 'default' jail address. */
727 error = prison_get_ip4(cred, laddr);
728 goto done;
729 }
730
731 /*
732 * The outgoing interface is marked with 'loopback net', so a route
733 * to ourselves is here.
734 * Try to find the interface of the destination address and then
735 * take the address from there. That interface is not necessarily
736 * a loopback interface.
737 * In case of jails, check that it is an address of the jail
738 * and if we cannot find, fall back to the 'default' jail address.
739 */
740 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
741 struct sockaddr_in sain;
742 struct in_ifaddr *ia;
743
744 bzero(&sain, sizeof(struct sockaddr_in));
745 sain.sin_family = AF_INET;
746 sain.sin_len = sizeof(struct sockaddr_in);
747 sain.sin_addr.s_addr = faddr->s_addr;
748
749 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
750 if (ia == NULL)
751 ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
752 if (ia == NULL)
753 ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
754
755 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
756 if (ia == NULL) {
757 error = ENETUNREACH;
758 goto done;
759 }
760 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
761 ifa_free(&ia->ia_ifa);
762 goto done;
763 }
764
765 /* Jailed. */
766 if (ia != NULL) {
767 struct ifnet *ifp;
768
769 ifp = ia->ia_ifp;
770 ifa_free(&ia->ia_ifa);
771 ia = NULL;
772 IF_ADDR_LOCK(ifp);
773 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
774
775 sa = ifa->ifa_addr;
776 if (sa->sa_family != AF_INET)
777 continue;
778 sin = (struct sockaddr_in *)sa;
779 if (prison_check_ip4(cred,
780 &sin->sin_addr) == 0) {
781 ia = (struct in_ifaddr *)ifa;
782 break;
783 }
784 }
785 if (ia != NULL) {
786 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
787 IF_ADDR_UNLOCK(ifp);
788 goto done;
789 }
790 IF_ADDR_UNLOCK(ifp);
791 }
792
793 /* 3. As a last resort return the 'default' jail address. */
794 error = prison_get_ip4(cred, laddr);
795 goto done;
796 }
797
798done:
799 if (sro.ro_rt != NULL)
800 RTFREE(sro.ro_rt);
801 return (error);
802}
803
804/*
805 * Set up for a connect from a socket to the specified address.
806 * On entry, *laddrp and *lportp should contain the current local
807 * address and port for the PCB; these are updated to the values
808 * that should be placed in inp_laddr and inp_lport to complete
809 * the connect.
810 *
811 * On success, *faddrp and *fportp will be set to the remote address
812 * and port. These are not updated in the error case.
813 *
814 * If the operation fails because the connection already exists,
815 * *oinpp will be set to the PCB of that connection so that the
816 * caller can decide to override it. In all other cases, *oinpp
817 * is set to NULL.
818 */
819int
820in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
821 in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
822 struct inpcb **oinpp, struct ucred *cred)
823{
824 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
825 struct in_ifaddr *ia;
826 struct inpcb *oinp;
827 struct in_addr laddr, faddr;
828 u_short lport, fport;
829 int error;
830
831 /*
832 * Because a global state change doesn't actually occur here, a read
833 * lock is sufficient.
834 */
835 INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
836 INP_LOCK_ASSERT(inp);
837
838 if (oinpp != NULL)
839 *oinpp = NULL;
840 if (nam->sa_len != sizeof (*sin))
841 return (EINVAL);
842 if (sin->sin_family != AF_INET)
843 return (EAFNOSUPPORT);
844 if (sin->sin_port == 0)
845 return (EADDRNOTAVAIL);
846 laddr.s_addr = *laddrp;
847 lport = *lportp;
848 faddr = sin->sin_addr;
849 fport = sin->sin_port;
850
851 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
852 /*
853 * If the destination address is INADDR_ANY,
854 * use the primary local address.
855 * If the supplied address is INADDR_BROADCAST,
856 * and the primary interface supports broadcast,
857 * choose the broadcast address for that interface.
858 */
859 if (faddr.s_addr == INADDR_ANY) {
860 IN_IFADDR_RLOCK();
861 faddr =
862 IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
863 IN_IFADDR_RUNLOCK();
864 if (cred != NULL &&
865 (error = prison_get_ip4(cred, &faddr)) != 0)
866 return (error);
867 } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
868 IN_IFADDR_RLOCK();
869 if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
870 IFF_BROADCAST)
871 faddr = satosin(&TAILQ_FIRST(
872 &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
873 IN_IFADDR_RUNLOCK();
874 }
875 }
876 if (laddr.s_addr == INADDR_ANY) {
877 /*
878 * If the destination address is multicast and an outgoing
879 * interface has been set as a multicast option, use the
880 * address of that interface as our source address.
881 */
882 if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
883 inp->inp_moptions != NULL) {
884 struct ip_moptions *imo;
885 struct ifnet *ifp;
886
887 imo = inp->inp_moptions;
888 if (imo->imo_multicast_ifp != NULL) {
889 ifp = imo->imo_multicast_ifp;
890 IN_IFADDR_RLOCK();
891 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
892 if (ia->ia_ifp == ifp)
893 break;
894 if (ia == NULL) {
895 IN_IFADDR_RUNLOCK();
896 return (EADDRNOTAVAIL);
897 }
898 laddr = ia->ia_addr.sin_addr;
899 IN_IFADDR_RUNLOCK();
900 }
901 } else {
902 error = in_pcbladdr(inp, &faddr, &laddr, cred);
903 if (error)
904 return (error);
905 }
906 }
907 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
908 0, NULL);
909 if (oinp != NULL) {
910 if (oinpp != NULL)
911 *oinpp = oinp;
912 return (EADDRINUSE);
913 }
914 if (lport == 0) {
915 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
916 cred);
917 if (error)
918 return (error);
919 }
920 *laddrp = laddr.s_addr;
921 *lportp = lport;
922 *faddrp = faddr.s_addr;
923 *fportp = fport;
924 return (0);
925}
926
927void
928in_pcbdisconnect(struct inpcb *inp)
929{
930
931 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
932 INP_WLOCK_ASSERT(inp);
933
934 inp->inp_faddr.s_addr = INADDR_ANY;
935 inp->inp_fport = 0;
936 in_pcbrehash(inp);
937}
938
939/*
940 * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
941 * For most protocols, this will be invoked immediately prior to calling
942 * in_pcbfree(). However, with TCP the inpcb may significantly outlive the
943 * socket, in which case in_pcbfree() is deferred.
944 */
945void
946in_pcbdetach(struct inpcb *inp)
947{
948
949 KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
950
951 inp->inp_socket->so_pcb = NULL;
952 inp->inp_socket = NULL;
953}
954
955/*
956 * in_pcbfree_internal() frees an inpcb that has been detached from its
957 * socket, and whose reference count has reached 0. It will also remove the
958 * inpcb from any global lists it might remain on.
959 */
960static void
961in_pcbfree_internal(struct inpcb *inp)
962{
963 struct inpcbinfo *ipi = inp->inp_pcbinfo;
964
965 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
966 KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
967
968 INP_INFO_WLOCK_ASSERT(ipi);
969 INP_WLOCK_ASSERT(inp);
970
971#ifdef IPSEC
972 if (inp->inp_sp != NULL)
973 ipsec_delete_pcbpolicy(inp);
974#endif /* IPSEC */
975 inp->inp_gencnt = ++ipi->ipi_gencnt;
976 in_pcbremlists(inp);
977#ifdef INET6
978 if (inp->inp_vflag & INP_IPV6PROTO) {
979 ip6_freepcbopts(inp->in6p_outputopts);
980 if (inp->in6p_moptions != NULL)
981 ip6_freemoptions(inp->in6p_moptions);
982 }
983#endif
984 if (inp->inp_options)
985 (void)m_free(inp->inp_options);
986 if (inp->inp_moptions != NULL)
987 inp_freemoptions(inp->inp_moptions);
988 inp->inp_vflag = 0;
989 crfree(inp->inp_cred);
990
991#ifdef MAC
992 mac_inpcb_destroy(inp);
993#endif
994 INP_WUNLOCK(inp);
995 uma_zfree(ipi->ipi_zone, inp);
996}
997
998/*
999 * in_pcbref() bumps the reference count on an inpcb in order to maintain
1000 * stability of an inpcb pointer despite the inpcb lock being released. This
1001 * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
1002 * but where the inpcb lock is already held.
1003 *
1004 * While the inpcb will not be freed, releasing the inpcb lock means that the
1005 * connection's state may change, so the caller should be careful to
1006 * revalidate any cached state on reacquiring the lock. Drop the reference
1007 * using in_pcbrele().
1008 */
1009void
1010in_pcbref(struct inpcb *inp)
1011{
1012
1013 INP_WLOCK_ASSERT(inp);
1014
1015 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
1016
1017 inp->inp_refcount++;
1018}
1019
1020/*
1021 * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
1022 * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
1023 * return a flag indicating whether or not the inpcb remains valid. If it is
1024 * valid, we return with the inpcb lock held.
1025 */
1026int
1027in_pcbrele(struct inpcb *inp)
1028{
1029#ifdef INVARIANTS
1030 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1031#endif
1032
1033 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
1034
1035 INP_INFO_WLOCK_ASSERT(ipi);
1036 INP_WLOCK_ASSERT(inp);
1037
1038 inp->inp_refcount--;
1039 if (inp->inp_refcount > 0)
1040 return (0);
1041 in_pcbfree_internal(inp);
1042 return (1);
1043}
1044
1045/*
1046 * Unconditionally schedule an inpcb to be freed by decrementing its
1047 * reference count, which should occur only after the inpcb has been detached
1048 * from its socket. If another thread holds a temporary reference (acquired
1049 * using in_pcbref()) then the free is deferred until that reference is
1050 * released using in_pcbrele(), but the inpcb is still unlocked.
1051 */
1052void
1053in_pcbfree(struct inpcb *inp)
1054{
1055#ifdef INVARIANTS
1056 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1057#endif
1058
1059 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
1060 __func__));
1061
1062 INP_INFO_WLOCK_ASSERT(ipi);
1063 INP_WLOCK_ASSERT(inp);
1064
1065 if (!in_pcbrele(inp))
1066 INP_WUNLOCK(inp);
1067}
1068
1069/*
1070 * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
1071 * port reservation, and preventing it from being returned by inpcb lookups.
1072 *
1073 * It is used by TCP to mark an inpcb as unused and avoid future packet
1074 * delivery or event notification when a socket remains open but TCP has
1075 * closed. This might occur as a result of a shutdown()-initiated TCP close
1076 * or a RST on the wire, and allows the port binding to be reused while still
1077 * maintaining the invariant that so_pcb always points to a valid inpcb until
1078 * in_pcbdetach().
1079 *
1080 * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
1081 * lists, but can lead to confusing netstat output, as open sockets with
1082 * closed TCP connections will no longer appear to have their bound port
1083 * number. An explicit flag would be better, as it would allow us to leave
1084 * the port number intact after the connection is dropped.
1085 *
1086 * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
1087 * in_pcbnotifyall() and in_pcbpurgeif0()?
1088 */
1089void
1090in_pcbdrop(struct inpcb *inp)
1091{
1092
1093 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
1094 INP_WLOCK_ASSERT(inp);
1095
1096 inp->inp_flags |= INP_DROPPED;
1097 if (inp->inp_flags & INP_INHASHLIST) {
1098 struct inpcbport *phd = inp->inp_phd;
1099
1100 LIST_REMOVE(inp, inp_hash);
1101 LIST_REMOVE(inp, inp_portlist);
1102 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1103 LIST_REMOVE(phd, phd_hash);
1104 free(phd, M_PCB);
1105 }
1106 inp->inp_flags &= ~INP_INHASHLIST;
1107 }
1108}
1109
1110/*
1111 * Common routines to return the socket addresses associated with inpcbs.
1112 */
1113struct sockaddr *
1114in_sockaddr(in_port_t port, struct in_addr *addr_p)
1115{
1116 struct sockaddr_in *sin;
1117
1118 sin = malloc(sizeof *sin, M_SONAME,
1119 M_WAITOK | M_ZERO);
1120 sin->sin_family = AF_INET;
1121 sin->sin_len = sizeof(*sin);
1122 sin->sin_addr = *addr_p;
1123 sin->sin_port = port;
1124
1125 return (struct sockaddr *)sin;
1126}
1127
1128int
1129in_getsockaddr(struct socket *so, struct sockaddr **nam)
1130{
1131 struct inpcb *inp;
1132 struct in_addr addr;
1133 in_port_t port;
1134
1135 inp = sotoinpcb(so);
1136 KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
1137
1138 INP_RLOCK(inp);
1139 port = inp->inp_lport;
1140 addr = inp->inp_laddr;
1141 INP_RUNLOCK(inp);
1142
1143 *nam = in_sockaddr(port, &addr);
1144 return 0;
1145}
1146
1147int
1148in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1149{
1150 struct inpcb *inp;
1151 struct in_addr addr;
1152 in_port_t port;
1153
1154 inp = sotoinpcb(so);
1155 KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
1156
1157 INP_RLOCK(inp);
1158 port = inp->inp_fport;
1159 addr = inp->inp_faddr;
1160 INP_RUNLOCK(inp);
1161
1162 *nam = in_sockaddr(port, &addr);
1163 return 0;
1164}
1165
1166void
1167in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
1168 struct inpcb *(*notify)(struct inpcb *, int))
1169{
1170 struct inpcb *inp, *inp_temp;
1171
1172 INP_INFO_WLOCK(pcbinfo);
1173 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
1174 INP_WLOCK(inp);
1175#ifdef INET6
1176 if ((inp->inp_vflag & INP_IPV4) == 0) {
1177 INP_WUNLOCK(inp);
1178 continue;
1179 }
1180#endif
1181 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1182 inp->inp_socket == NULL) {
1183 INP_WUNLOCK(inp);
1184 continue;
1185 }
1186 if ((*notify)(inp, errno))
1187 INP_WUNLOCK(inp);
1188 }
1189 INP_INFO_WUNLOCK(pcbinfo);
1190}
1191
1192void
1193in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
1194{
1195 struct inpcb *inp;
1196 struct ip_moptions *imo;
1197 int i, gap;
1198
1199 INP_INFO_RLOCK(pcbinfo);
1200 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1201 INP_WLOCK(inp);
1202 imo = inp->inp_moptions;
1203 if ((inp->inp_vflag & INP_IPV4) &&
1204 imo != NULL) {
1205 /*
1206 * Unselect the outgoing interface if it is being
1207 * detached.
1208 */
1209 if (imo->imo_multicast_ifp == ifp)
1210 imo->imo_multicast_ifp = NULL;
1211
1212 /*
1213 * Drop multicast group membership if we joined
1214 * through the interface being detached.
1215 */
1216 for (i = 0, gap = 0; i < imo->imo_num_memberships;
1217 i++) {
1218 if (imo->imo_membership[i]->inm_ifp == ifp) {
1219 in_delmulti(imo->imo_membership[i]);
1220 gap++;
1221 } else if (gap != 0)
1222 imo->imo_membership[i - gap] =
1223 imo->imo_membership[i];
1224 }
1225 imo->imo_num_memberships -= gap;
1226 }
1227 INP_WUNLOCK(inp);
1228 }
1229 INP_INFO_RUNLOCK(pcbinfo);
1230}
1231
1232/*
1233 * Lookup a PCB based on the local address and port.
1234 */
1235#define INP_LOOKUP_MAPPED_PCB_COST 3
1236struct inpcb *
1237in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1238 u_short lport, int wild_okay, struct ucred *cred)
1239{
1240 struct inpcb *inp;
1241#ifdef INET6
1242 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
1243#else
1244 int matchwild = 3;
1245#endif
1246 int wildcard;
1247
1248 INP_INFO_LOCK_ASSERT(pcbinfo);
1249
1250 if (!wild_okay) {
1251 struct inpcbhead *head;
1252 /*
1253 * Look for an unconnected (wildcard foreign addr) PCB that
1254 * matches the local address and port we're looking for.
1255 */
1256 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1257 0, pcbinfo->ipi_hashmask)];
1258 LIST_FOREACH(inp, head, inp_hash) {
1259#ifdef INET6
1260 /* XXX inp locking */
1261 if ((inp->inp_vflag & INP_IPV4) == 0)
1262 continue;
1263#endif
1264 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1265 inp->inp_laddr.s_addr == laddr.s_addr &&
1266 inp->inp_lport == lport) {
1267 /*
1268 * Found?
1269 */
1270 if (cred == NULL ||
1271 prison_equal_ip4(cred->cr_prison,
1272 inp->inp_cred->cr_prison))
1273 return (inp);
1274 }
1275 }
1276 /*
1277 * Not found.
1278 */
1279 return (NULL);
1280 } else {
1281 struct inpcbporthead *porthash;
1282 struct inpcbport *phd;
1283 struct inpcb *match = NULL;
1284 /*
1285 * Best fit PCB lookup.
1286 *
1287 * First see if this local port is in use by looking on the
1288 * port hash list.
1289 */
1290 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1291 pcbinfo->ipi_porthashmask)];
1292 LIST_FOREACH(phd, porthash, phd_hash) {
1293 if (phd->phd_port == lport)
1294 break;
1295 }
1296 if (phd != NULL) {
1297 /*
1298 * Port is in use by one or more PCBs. Look for best
1299 * fit.
1300 */
1301 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1302 wildcard = 0;
1303 if (cred != NULL &&
1304 !prison_equal_ip4(inp->inp_cred->cr_prison,
1305 cred->cr_prison))
1306 continue;
1307#ifdef INET6
1308 /* XXX inp locking */
1309 if ((inp->inp_vflag & INP_IPV4) == 0)
1310 continue;
1311 /*
1312 * We never select the PCB that has
1313 * INP_IPV6 flag and is bound to :: if
1314 * we have another PCB which is bound
1315 * to 0.0.0.0. If a PCB has the
1316 * INP_IPV6 flag, then we set its cost
1317 * higher than IPv4 only PCBs.
1318 *
1319 * Note that the case only happens
1320 * when a socket is bound to ::, under
1321 * the condition that the use of the
1322 * mapped address is allowed.
1323 */
1324 if ((inp->inp_vflag & INP_IPV6) != 0)
1325 wildcard += INP_LOOKUP_MAPPED_PCB_COST;
1326#endif
1327 if (inp->inp_faddr.s_addr != INADDR_ANY)
1328 wildcard++;
1329 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1330 if (laddr.s_addr == INADDR_ANY)
1331 wildcard++;
1332 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1333 continue;
1334 } else {
1335 if (laddr.s_addr != INADDR_ANY)
1336 wildcard++;
1337 }
1338 if (wildcard < matchwild) {
1339 match = inp;
1340 matchwild = wildcard;
1341 if (matchwild == 0)
1342 break;
1343 }
1344 }
1345 }
1346 return (match);
1347 }
1348}
1349#undef INP_LOOKUP_MAPPED_PCB_COST
1350
1351/*
1352 * Lookup PCB in hash list.
1353 */
1354struct inpcb *
1355in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1356 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1357 struct ifnet *ifp)
1358{
1359 struct inpcbhead *head;
1360 struct inpcb *inp, *tmpinp;
1361 u_short fport = fport_arg, lport = lport_arg;
1362
1363 INP_INFO_LOCK_ASSERT(pcbinfo);
1364
1365 /*
1366 * First look for an exact match.
1367 */
1368 tmpinp = NULL;
1369 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1370 pcbinfo->ipi_hashmask)];
1371 LIST_FOREACH(inp, head, inp_hash) {
1372#ifdef INET6
1373 /* XXX inp locking */
1374 if ((inp->inp_vflag & INP_IPV4) == 0)
1375 continue;
1376#endif
1377 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1378 inp->inp_laddr.s_addr == laddr.s_addr &&
1379 inp->inp_fport == fport &&
1380 inp->inp_lport == lport) {
1381 /*
1382 * XXX We should be able to directly return
1383 * the inp here, without any checks.
1384 * Well unless both bound with SO_REUSEPORT?
1385 */
1386 if (prison_flag(inp->inp_cred, PR_IP4))
1387 return (inp);
1388 if (tmpinp == NULL)
1389 tmpinp = inp;
1390 }
1391 }
1392 if (tmpinp != NULL)
1393 return (tmpinp);
1394
1395 /*
1396 * Then look for a wildcard match, if requested.
1397 */
1398 if (wildcard == INPLOOKUP_WILDCARD) {
1399 struct inpcb *local_wild = NULL, *local_exact = NULL;
1400#ifdef INET6
1401 struct inpcb *local_wild_mapped = NULL;
1402#endif
1403 struct inpcb *jail_wild = NULL;
1404 int injail;
1405
1406 /*
1407 * Order of socket selection - we always prefer jails.
1408 * 1. jailed, non-wild.
1409 * 2. jailed, wild.
1410 * 3. non-jailed, non-wild.
1411 * 4. non-jailed, wild.
1412 */
1413
1414 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1415 0, pcbinfo->ipi_hashmask)];
1416 LIST_FOREACH(inp, head, inp_hash) {
1417#ifdef INET6
1418 /* XXX inp locking */
1419 if ((inp->inp_vflag & INP_IPV4) == 0)
1420 continue;
1421#endif
1422 if (inp->inp_faddr.s_addr != INADDR_ANY ||
1423 inp->inp_lport != lport)
1424 continue;
1425
1426 /* XXX inp locking */
1427 if (ifp && ifp->if_type == IFT_FAITH &&
1428 (inp->inp_flags & INP_FAITH) == 0)
1429 continue;
1430
1431 injail = prison_flag(inp->inp_cred, PR_IP4);
1432 if (injail) {
1433 if (prison_check_ip4(inp->inp_cred,
1434 &laddr) != 0)
1435 continue;
1436 } else {
1437 if (local_exact != NULL)
1438 continue;
1439 }
1440
1441 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1442 if (injail)
1443 return (inp);
1444 else
1445 local_exact = inp;
1446 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1447#ifdef INET6
1448 /* XXX inp locking, NULL check */
1449 if (inp->inp_vflag & INP_IPV6PROTO)
1450 local_wild_mapped = inp;
1451 else
1452#endif /* INET6 */
1453 if (injail)
1454 jail_wild = inp;
1455 else
1456 local_wild = inp;
1457 }
1458 } /* LIST_FOREACH */
1459 if (jail_wild != NULL)
1460 return (jail_wild);
1461 if (local_exact != NULL)
1462 return (local_exact);
1463 if (local_wild != NULL)
1464 return (local_wild);
1465#ifdef INET6
1466 if (local_wild_mapped != NULL)
1467 return (local_wild_mapped);
1468#endif /* defined(INET6) */
1469 } /* if (wildcard == INPLOOKUP_WILDCARD) */
1470
1471 return (NULL);
1472}
1473
1474/*
1475 * Insert PCB onto various hash lists.
1476 */
1477int
1478in_pcbinshash(struct inpcb *inp)
1479{
1480 struct inpcbhead *pcbhash;
1481 struct inpcbporthead *pcbporthash;
1482 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1483 struct inpcbport *phd;
1484 u_int32_t hashkey_faddr;
1485
1486 INP_INFO_WLOCK_ASSERT(pcbinfo);
1487 INP_WLOCK_ASSERT(inp);
1488 KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
1489 ("in_pcbinshash: INP_INHASHLIST"));
1490
1491#ifdef INET6
1492 if (inp->inp_vflag & INP_IPV6)
1493 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1494 else
1495#endif /* INET6 */
1496 hashkey_faddr = inp->inp_faddr.s_addr;
1497
1498 pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1499 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1500
1501 pcbporthash = &pcbinfo->ipi_porthashbase[
1502 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
1503
1504 /*
1505 * Go through port list and look for a head for this lport.
1506 */
1507 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1508 if (phd->phd_port == inp->inp_lport)
1509 break;
1510 }
1511 /*
1512 * If none exists, malloc one and tack it on.
1513 */
1514 if (phd == NULL) {
1515 phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1516 if (phd == NULL) {
1517 return (ENOBUFS); /* XXX */
1518 }
1519 phd->phd_port = inp->inp_lport;
1520 LIST_INIT(&phd->phd_pcblist);
1521 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1522 }
1523 inp->inp_phd = phd;
1524 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1525 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1526 inp->inp_flags |= INP_INHASHLIST;
1527 return (0);
1528}
1529
1530/*
1531 * Move PCB to the proper hash bucket when { faddr, fport } have been
1532 * changed. NOTE: This does not handle the case of the lport changing (the
1533 * hashed port list would have to be updated as well), so the lport must
1534 * not change after in_pcbinshash() has been called.
1535 */
1536void
1537in_pcbrehash(struct inpcb *inp)
1538{
1539 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1540 struct inpcbhead *head;
1541 u_int32_t hashkey_faddr;
1542
1543 INP_INFO_WLOCK_ASSERT(pcbinfo);
1544 INP_WLOCK_ASSERT(inp);
1545 KASSERT(inp->inp_flags & INP_INHASHLIST,
1546 ("in_pcbrehash: !INP_INHASHLIST"));
1547
1548#ifdef INET6
1549 if (inp->inp_vflag & INP_IPV6)
1550 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1551 else
1552#endif /* INET6 */
1553 hashkey_faddr = inp->inp_faddr.s_addr;
1554
1555 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1556 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1557
1558 LIST_REMOVE(inp, inp_hash);
1559 LIST_INSERT_HEAD(head, inp, inp_hash);
1560}
1561
1562/*
1563 * Remove PCB from various lists.
1564 */
1565static void
1566in_pcbremlists(struct inpcb *inp)
1567{
1568 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1569
1570 INP_INFO_WLOCK_ASSERT(pcbinfo);
1571 INP_WLOCK_ASSERT(inp);
1572
1573 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1574 if (inp->inp_flags & INP_INHASHLIST) {
1575 struct inpcbport *phd = inp->inp_phd;
1576
1577 LIST_REMOVE(inp, inp_hash);
1578 LIST_REMOVE(inp, inp_portlist);
1579 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1580 LIST_REMOVE(phd, phd_hash);
1581 free(phd, M_PCB);
1582 }
1583 inp->inp_flags &= ~INP_INHASHLIST;
1584 }
1585 LIST_REMOVE(inp, inp_list);
1586 pcbinfo->ipi_count--;
1587}
1588
1589/*
1590 * A set label operation has occurred at the socket layer, propagate the
1591 * label change into the in_pcb for the socket.
1592 */
1593void
1594in_pcbsosetlabel(struct socket *so)
1595{
1596#ifdef MAC
1597 struct inpcb *inp;
1598
1599 inp = sotoinpcb(so);
1600 KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
1601
1602 INP_WLOCK(inp);
1603 SOCK_LOCK(so);
1604 mac_inpcb_sosetlabel(so, inp);
1605 SOCK_UNLOCK(so);
1606 INP_WUNLOCK(inp);
1607#endif
1608}
1609
1610/*
1611 * ipport_tick runs once per second, determining if random port allocation
1612 * should be continued. If more than ipport_randomcps ports have been
1613 * allocated in the last second, then we return to sequential port
1614 * allocation. We return to random allocation only once we drop below
1615 * ipport_randomcps for at least ipport_randomtime seconds.
1616 */
1617void
1618ipport_tick(void *xtp)
1619{
1620 VNET_ITERATOR_DECL(vnet_iter);
1621
1622 VNET_LIST_RLOCK_NOSLEEP();
1623 VNET_FOREACH(vnet_iter) {
1624 CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
1625 if (V_ipport_tcpallocs <=
1626 V_ipport_tcplastcount + V_ipport_randomcps) {
1627 if (V_ipport_stoprandom > 0)
1628 V_ipport_stoprandom--;
1629 } else
1630 V_ipport_stoprandom = V_ipport_randomtime;
1631 V_ipport_tcplastcount = V_ipport_tcpallocs;
1632 CURVNET_RESTORE();
1633 }
1634 VNET_LIST_RUNLOCK_NOSLEEP();
1635 callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
1636}
1637
1638void
1639inp_wlock(struct inpcb *inp)
1640{
1641
1642 INP_WLOCK(inp);
1643}
1644
1645void
1646inp_wunlock(struct inpcb *inp)
1647{
1648
1649 INP_WUNLOCK(inp);
1650}
1651
1652void
1653inp_rlock(struct inpcb *inp)
1654{
1655
1656 INP_RLOCK(inp);
1657}
1658
1659void
1660inp_runlock(struct inpcb *inp)
1661{
1662
1663 INP_RUNLOCK(inp);
1664}
1665
1666#ifdef INVARIANTS
1667void
1668inp_lock_assert(struct inpcb *inp)
1669{
1670
1671 INP_WLOCK_ASSERT(inp);
1672}
1673
1674void
1675inp_unlock_assert(struct inpcb *inp)
1676{
1677
1678 INP_UNLOCK_ASSERT(inp);
1679}
1680#endif
1681
1682void
1683inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
1684{
1685 struct inpcb *inp;
1686
1687 INP_INFO_RLOCK(&V_tcbinfo);
1688 LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
1689 INP_WLOCK(inp);
1690 func(inp, arg);
1691 INP_WUNLOCK(inp);
1692 }
1693 INP_INFO_RUNLOCK(&V_tcbinfo);
1694}
1695
1696struct socket *
1697inp_inpcbtosocket(struct inpcb *inp)
1698{
1699
1700 INP_WLOCK_ASSERT(inp);
1701 return (inp->inp_socket);
1702}
1703
1704struct tcpcb *
1705inp_inpcbtotcpcb(struct inpcb *inp)
1706{
1707
1708 INP_WLOCK_ASSERT(inp);
1709 return ((struct tcpcb *)inp->inp_ppcb);
1710}
1711
1712int
1713inp_ip_tos_get(const struct inpcb *inp)
1714{
1715
1716 return (inp->inp_ip_tos);
1717}
1718
1719void
1720inp_ip_tos_set(struct inpcb *inp, int val)
1721{
1722
1723 inp->inp_ip_tos = val;
1724}
1725
1726void
1727inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
1728 uint32_t *faddr, uint16_t *fp)
1729{
1730
1731 INP_LOCK_ASSERT(inp);
1732 *laddr = inp->inp_laddr.s_addr;
1733 *faddr = inp->inp_faddr.s_addr;
1734 *lp = inp->inp_lport;
1735 *fp = inp->inp_fport;
1736}
1737
1738struct inpcb *
1739so_sotoinpcb(struct socket *so)
1740{
1741
1742 return (sotoinpcb(so));
1743}
1744
1745struct tcpcb *
1746so_sototcpcb(struct socket *so)
1747{
1748
1749 return (sototcpcb(so));
1750}
1751
1752#ifdef DDB
1753static void
1754db_print_indent(int indent)
1755{
1756 int i;
1757
1758 for (i = 0; i < indent; i++)
1759 db_printf(" ");
1760}
1761
1762static void
1763db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
1764{
1765 char faddr_str[48], laddr_str[48];
1766
1767 db_print_indent(indent);
1768 db_printf("%s at %p\n", name, inc);
1769
1770 indent += 2;
1771
1772#ifdef INET6
1773 if (inc->inc_flags & INC_ISIPV6) {
1774 /* IPv6. */
1775 ip6_sprintf(laddr_str, &inc->inc6_laddr);
1776 ip6_sprintf(faddr_str, &inc->inc6_faddr);
1777 } else {
1778#endif
1779 /* IPv4. */
1780 inet_ntoa_r(inc->inc_laddr, laddr_str);
1781 inet_ntoa_r(inc->inc_faddr, faddr_str);
1782#ifdef INET6
1783 }
1784#endif
1785 db_print_indent(indent);
1786 db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
1787 ntohs(inc->inc_lport));
1788 db_print_indent(indent);
1789 db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
1790 ntohs(inc->inc_fport));
1791}
1792
1793static void
1794db_print_inpflags(int inp_flags)
1795{
1796 int comma;
1797
1798 comma = 0;
1799 if (inp_flags & INP_RECVOPTS) {
1800 db_printf("%sINP_RECVOPTS", comma ? ", " : "");
1801 comma = 1;
1802 }
1803 if (inp_flags & INP_RECVRETOPTS) {
1804 db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
1805 comma = 1;
1806 }
1807 if (inp_flags & INP_RECVDSTADDR) {
1808 db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
1809 comma = 1;
1810 }
1811 if (inp_flags & INP_HDRINCL) {
1812 db_printf("%sINP_HDRINCL", comma ? ", " : "");
1813 comma = 1;
1814 }
1815 if (inp_flags & INP_HIGHPORT) {
1816 db_printf("%sINP_HIGHPORT", comma ? ", " : "");
1817 comma = 1;
1818 }
1819 if (inp_flags & INP_LOWPORT) {
1820 db_printf("%sINP_LOWPORT", comma ? ", " : "");
1821 comma = 1;
1822 }
1823 if (inp_flags & INP_ANONPORT) {
1824 db_printf("%sINP_ANONPORT", comma ? ", " : "");
1825 comma = 1;
1826 }
1827 if (inp_flags & INP_RECVIF) {
1828 db_printf("%sINP_RECVIF", comma ? ", " : "");
1829 comma = 1;
1830 }
1831 if (inp_flags & INP_MTUDISC) {
1832 db_printf("%sINP_MTUDISC", comma ? ", " : "");
1833 comma = 1;
1834 }
1835 if (inp_flags & INP_FAITH) {
1836 db_printf("%sINP_FAITH", comma ? ", " : "");
1837 comma = 1;
1838 }
1839 if (inp_flags & INP_RECVTTL) {
1840 db_printf("%sINP_RECVTTL", comma ? ", " : "");
1841 comma = 1;
1842 }
1843 if (inp_flags & INP_DONTFRAG) {
1844 db_printf("%sINP_DONTFRAG", comma ? ", " : "");
1845 comma = 1;
1846 }
1847 if (inp_flags & IN6P_IPV6_V6ONLY) {
1848 db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
1849 comma = 1;
1850 }
1851 if (inp_flags & IN6P_PKTINFO) {
1852 db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
1853 comma = 1;
1854 }
1855 if (inp_flags & IN6P_HOPLIMIT) {
1856 db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
1857 comma = 1;
1858 }
1859 if (inp_flags & IN6P_HOPOPTS) {
1860 db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
1861 comma = 1;
1862 }
1863 if (inp_flags & IN6P_DSTOPTS) {
1864 db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
1865 comma = 1;
1866 }
1867 if (inp_flags & IN6P_RTHDR) {
1868 db_printf("%sIN6P_RTHDR", comma ? ", " : "");
1869 comma = 1;
1870 }
1871 if (inp_flags & IN6P_RTHDRDSTOPTS) {
1872 db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
1873 comma = 1;
1874 }
1875 if (inp_flags & IN6P_TCLASS) {
1876 db_printf("%sIN6P_TCLASS", comma ? ", " : "");
1877 comma = 1;
1878 }
1879 if (inp_flags & IN6P_AUTOFLOWLABEL) {
1880 db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
1881 comma = 1;
1882 }
1883 if (inp_flags & INP_TIMEWAIT) {
1884 db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
1885 comma = 1;
1886 }
1887 if (inp_flags & INP_ONESBCAST) {
1888 db_printf("%sINP_ONESBCAST", comma ? ", " : "");
1889 comma = 1;
1890 }
1891 if (inp_flags & INP_DROPPED) {
1892 db_printf("%sINP_DROPPED", comma ? ", " : "");
1893 comma = 1;
1894 }
1895 if (inp_flags & INP_SOCKREF) {
1896 db_printf("%sINP_SOCKREF", comma ? ", " : "");
1897 comma = 1;
1898 }
1899 if (inp_flags & IN6P_RFC2292) {
1900 db_printf("%sIN6P_RFC2292", comma ? ", " : "");
1901 comma = 1;
1902 }
1903 if (inp_flags & IN6P_MTU) {
1904 db_printf("IN6P_MTU%s", comma ? ", " : "");
1905 comma = 1;
1906 }
1907}
1908
1909static void
1910db_print_inpvflag(u_char inp_vflag)
1911{
1912 int comma;
1913
1914 comma = 0;
1915 if (inp_vflag & INP_IPV4) {
1916 db_printf("%sINP_IPV4", comma ? ", " : "");
1917 comma = 1;
1918 }
1919 if (inp_vflag & INP_IPV6) {
1920 db_printf("%sINP_IPV6", comma ? ", " : "");
1921 comma = 1;
1922 }
1923 if (inp_vflag & INP_IPV6PROTO) {
1924 db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
1925 comma = 1;
1926 }
1927}
1928
1929static void
1930db_print_inpcb(struct inpcb *inp, const char *name, int indent)
1931{
1932
1933 db_print_indent(indent);
1934 db_printf("%s at %p\n", name, inp);
1935
1936 indent += 2;
1937
1938 db_print_indent(indent);
1939 db_printf("inp_flow: 0x%x\n", inp->inp_flow);
1940
1941 db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
1942
1943 db_print_indent(indent);
1944 db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n",
1945 inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
1946
1947 db_print_indent(indent);
1948 db_printf("inp_label: %p inp_flags: 0x%x (",
1949 inp->inp_label, inp->inp_flags);
1950 db_print_inpflags(inp->inp_flags);
1951 db_printf(")\n");
1952
1953 db_print_indent(indent);
1954 db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
1955 inp->inp_vflag);
1956 db_print_inpvflag(inp->inp_vflag);
1957 db_printf(")\n");
1958
1959 db_print_indent(indent);
1960 db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
1961 inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
1962
1963 db_print_indent(indent);
1964#ifdef INET6
1965 if (inp->inp_vflag & INP_IPV6) {
1966 db_printf("in6p_options: %p in6p_outputopts: %p "
1967 "in6p_moptions: %p\n", inp->in6p_options,
1968 inp->in6p_outputopts, inp->in6p_moptions);
1969 db_printf("in6p_icmp6filt: %p in6p_cksum %d "
1970 "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
1971 inp->in6p_hops);
1972 } else
1973#endif
1974 {
1975 db_printf("inp_ip_tos: %d inp_ip_options: %p "
1976 "inp_ip_moptions: %p\n", inp->inp_ip_tos,
1977 inp->inp_options, inp->inp_moptions);
1978 }
1979
1980 db_print_indent(indent);
1981 db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd,
1982 (uintmax_t)inp->inp_gencnt);
1983}
1984
1985DB_SHOW_COMMAND(inpcb, db_show_inpcb)
1986{
1987 struct inpcb *inp;
1988
1989 if (!have_addr) {
1990 db_printf("usage: show inpcb <addr>\n");
1991 return;
1992 }
1993 inp = (struct inpcb *)addr;
1994
1995 db_print_inpcb(inp, "inpcb", 0);
1996}
1997#endif
112
113#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
114
115#define RANGECHK(var, min, max) \
116 if ((var) < (min)) { (var) = (min); } \
117 else if ((var) > (max)) { (var) = (max); }
118
119static void in_pcbremlists(struct inpcb *inp);
120
121static int
122sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
123{
124 int error;
125
126#ifdef VIMAGE
127 error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
128#else
129 error = sysctl_handle_int(oidp, arg1, arg2, req);
130#endif
131 if (error == 0) {
132 RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
133 RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
134 RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
135 RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
136 RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
137 RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
138 }
139 return (error);
140}
141
142#undef RANGECHK
143
144SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
145
146SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
147 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
148 &sysctl_net_ipport_check, "I", "");
149SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
150 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
151 &sysctl_net_ipport_check, "I", "");
152SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
153 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
154 &sysctl_net_ipport_check, "I", "");
155SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
156 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
157 &sysctl_net_ipport_check, "I", "");
158SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
159 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
160 &sysctl_net_ipport_check, "I", "");
161SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
162 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
163 &sysctl_net_ipport_check, "I", "");
164SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
165 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
166SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
167 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
168SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
169 &VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
170SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
171 &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
172 "allocations before switching to a sequental one");
173SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
174 &VNET_NAME(ipport_randomtime), 0,
175 "Minimum time to keep sequental port "
176 "allocation before switching to a random one");
177
178/*
179 * in_pcb.c: manage the Protocol Control Blocks.
180 *
181 * NOTE: It is assumed that most of these functions will be called with
182 * the pcbinfo lock held, and often, the inpcb lock held, as these utility
183 * functions often modify hash chains or addresses in pcbs.
184 */
185
186/*
187 * Initialize an inpcbinfo -- we should be able to reduce the number of
188 * arguments in time.
189 */
190void
191in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
192 struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
193 char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
194 uint32_t inpcbzone_flags)
195{
196
197 INP_INFO_LOCK_INIT(pcbinfo, name);
198#ifdef VIMAGE
199 pcbinfo->ipi_vnet = curvnet;
200#endif
201 pcbinfo->ipi_listhead = listhead;
202 LIST_INIT(pcbinfo->ipi_listhead);
203 pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
204 &pcbinfo->ipi_hashmask);
205 pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
206 &pcbinfo->ipi_porthashmask);
207 pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
208 NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
209 inpcbzone_flags);
210 uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
211}
212
213/*
214 * Destroy an inpcbinfo.
215 */
216void
217in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
218{
219
220 hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
221 hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
222 pcbinfo->ipi_porthashmask);
223 uma_zdestroy(pcbinfo->ipi_zone);
224 INP_INFO_LOCK_DESTROY(pcbinfo);
225}
226
227/*
228 * Allocate a PCB and associate it with the socket.
229 * On success return with the PCB locked.
230 */
231int
232in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
233{
234 struct inpcb *inp;
235 int error;
236
237 INP_INFO_WLOCK_ASSERT(pcbinfo);
238 error = 0;
239 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
240 if (inp == NULL)
241 return (ENOBUFS);
242 bzero(inp, inp_zero_size);
243 inp->inp_pcbinfo = pcbinfo;
244 inp->inp_socket = so;
245 inp->inp_cred = crhold(so->so_cred);
246 inp->inp_inc.inc_fibnum = so->so_fibnum;
247#ifdef MAC
248 error = mac_inpcb_init(inp, M_NOWAIT);
249 if (error != 0)
250 goto out;
251 mac_inpcb_create(so, inp);
252#endif
253#ifdef IPSEC
254 error = ipsec_init_policy(so, &inp->inp_sp);
255 if (error != 0) {
256#ifdef MAC
257 mac_inpcb_destroy(inp);
258#endif
259 goto out;
260 }
261#endif /*IPSEC*/
262#ifdef INET6
263 if (INP_SOCKAF(so) == AF_INET6) {
264 inp->inp_vflag |= INP_IPV6PROTO;
265 if (V_ip6_v6only)
266 inp->inp_flags |= IN6P_IPV6_V6ONLY;
267 }
268#endif
269 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
270 pcbinfo->ipi_count++;
271 so->so_pcb = (caddr_t)inp;
272#ifdef INET6
273 if (V_ip6_auto_flowlabel)
274 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
275#endif
276 INP_WLOCK(inp);
277 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
278 inp->inp_refcount = 1; /* Reference from the inpcbinfo */
279#if defined(IPSEC) || defined(MAC)
280out:
281 if (error != 0) {
282 crfree(inp->inp_cred);
283 uma_zfree(pcbinfo->ipi_zone, inp);
284 }
285#endif
286 return (error);
287}
288
289int
290in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
291{
292 int anonport, error;
293
294 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
295 INP_WLOCK_ASSERT(inp);
296
297 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
298 return (EINVAL);
299 anonport = inp->inp_lport == 0 && (nam == NULL ||
300 ((struct sockaddr_in *)nam)->sin_port == 0);
301 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
302 &inp->inp_lport, cred);
303 if (error)
304 return (error);
305 if (in_pcbinshash(inp) != 0) {
306 inp->inp_laddr.s_addr = INADDR_ANY;
307 inp->inp_lport = 0;
308 return (EAGAIN);
309 }
310 if (anonport)
311 inp->inp_flags |= INP_ANONPORT;
312 return (0);
313}
314
315/*
316 * Set up a bind operation on a PCB, performing port allocation
317 * as required, but do not actually modify the PCB. Callers can
318 * either complete the bind by setting inp_laddr/inp_lport and
319 * calling in_pcbinshash(), or they can just use the resulting
320 * port and address to authorise the sending of a once-off packet.
321 *
322 * On error, the values of *laddrp and *lportp are not changed.
323 */
324int
325in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
326 u_short *lportp, struct ucred *cred)
327{
328 struct socket *so = inp->inp_socket;
329 unsigned short *lastport;
330 struct sockaddr_in *sin;
331 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
332 struct in_addr laddr;
333 u_short lport = 0;
334 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
335 int error;
336 int dorandom;
337
338 /*
339 * Because no actual state changes occur here, a global write lock on
340 * the pcbinfo isn't required.
341 */
342 INP_INFO_LOCK_ASSERT(pcbinfo);
343 INP_LOCK_ASSERT(inp);
344
345 if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
346 return (EADDRNOTAVAIL);
347 laddr.s_addr = *laddrp;
348 if (nam != NULL && laddr.s_addr != INADDR_ANY)
349 return (EINVAL);
350 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
351 wild = INPLOOKUP_WILDCARD;
352 if (nam == NULL) {
353 if ((error = prison_local_ip4(cred, &laddr)) != 0)
354 return (error);
355 } else {
356 sin = (struct sockaddr_in *)nam;
357 if (nam->sa_len != sizeof (*sin))
358 return (EINVAL);
359#ifdef notdef
360 /*
361 * We should check the family, but old programs
362 * incorrectly fail to initialize it.
363 */
364 if (sin->sin_family != AF_INET)
365 return (EAFNOSUPPORT);
366#endif
367 error = prison_local_ip4(cred, &sin->sin_addr);
368 if (error)
369 return (error);
370 if (sin->sin_port != *lportp) {
371 /* Don't allow the port to change. */
372 if (*lportp != 0)
373 return (EINVAL);
374 lport = sin->sin_port;
375 }
376 /* NB: lport is left as 0 if the port isn't being changed. */
377 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
378 /*
379 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
380 * allow complete duplication of binding if
381 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
382 * and a multicast address is bound on both
383 * new and duplicated sockets.
384 */
385 if (so->so_options & SO_REUSEADDR)
386 reuseport = SO_REUSEADDR|SO_REUSEPORT;
387 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
388 sin->sin_port = 0; /* yech... */
389 bzero(&sin->sin_zero, sizeof(sin->sin_zero));
390 /*
391 * Is the address a local IP address?
392 * If INP_BINDANY is set, then the socket may be bound
393 * to any endpoint address, local or not.
394 */
395 if ((inp->inp_flags & INP_BINDANY) == 0 &&
396 ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
397 return (EADDRNOTAVAIL);
398 }
399 laddr = sin->sin_addr;
400 if (lport) {
401 struct inpcb *t;
402 struct tcptw *tw;
403
404 /* GROSS */
405 if (ntohs(lport) <= V_ipport_reservedhigh &&
406 ntohs(lport) >= V_ipport_reservedlow &&
407 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
408 0))
409 return (EACCES);
410 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
411 priv_check_cred(inp->inp_cred,
412 PRIV_NETINET_REUSEPORT, 0) != 0) {
413 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
414 lport, INPLOOKUP_WILDCARD, cred);
415 /*
416 * XXX
417 * This entire block sorely needs a rewrite.
418 */
419 if (t &&
420 ((t->inp_flags & INP_TIMEWAIT) == 0) &&
421 (so->so_type != SOCK_STREAM ||
422 ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
423 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
424 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
425 (t->inp_socket->so_options &
426 SO_REUSEPORT) == 0) &&
427 (inp->inp_cred->cr_uid !=
428 t->inp_cred->cr_uid))
429 return (EADDRINUSE);
430 }
431 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
432 lport, wild, cred);
433 if (t && (t->inp_flags & INP_TIMEWAIT)) {
434 /*
435 * XXXRW: If an incpb has had its timewait
436 * state recycled, we treat the address as
437 * being in use (for now). This is better
438 * than a panic, but not desirable.
439 */
440 tw = intotw(inp);
441 if (tw == NULL ||
442 (reuseport & tw->tw_so_options) == 0)
443 return (EADDRINUSE);
444 } else if (t &&
445 (reuseport & t->inp_socket->so_options) == 0) {
446#ifdef INET6
447 if (ntohl(sin->sin_addr.s_addr) !=
448 INADDR_ANY ||
449 ntohl(t->inp_laddr.s_addr) !=
450 INADDR_ANY ||
451 INP_SOCKAF(so) ==
452 INP_SOCKAF(t->inp_socket))
453#endif
454 return (EADDRINUSE);
455 }
456 }
457 }
458 if (*lportp != 0)
459 lport = *lportp;
460 if (lport == 0) {
461 u_short first, last, aux;
462 int count;
463
464 if (inp->inp_flags & INP_HIGHPORT) {
465 first = V_ipport_hifirstauto; /* sysctl */
466 last = V_ipport_hilastauto;
467 lastport = &pcbinfo->ipi_lasthi;
468 } else if (inp->inp_flags & INP_LOWPORT) {
469 error = priv_check_cred(cred,
470 PRIV_NETINET_RESERVEDPORT, 0);
471 if (error)
472 return error;
473 first = V_ipport_lowfirstauto; /* 1023 */
474 last = V_ipport_lowlastauto; /* 600 */
475 lastport = &pcbinfo->ipi_lastlow;
476 } else {
477 first = V_ipport_firstauto; /* sysctl */
478 last = V_ipport_lastauto;
479 lastport = &pcbinfo->ipi_lastport;
480 }
481 /*
482 * For UDP, use random port allocation as long as the user
483 * allows it. For TCP (and as of yet unknown) connections,
484 * use random port allocation only if the user allows it AND
485 * ipport_tick() allows it.
486 */
487 if (V_ipport_randomized &&
488 (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
489 dorandom = 1;
490 else
491 dorandom = 0;
492 /*
493 * It makes no sense to do random port allocation if
494 * we have the only port available.
495 */
496 if (first == last)
497 dorandom = 0;
498 /* Make sure to not include UDP packets in the count. */
499 if (pcbinfo != &V_udbinfo)
500 V_ipport_tcpallocs++;
501 /*
502 * Instead of having two loops further down counting up or down
503 * make sure that first is always <= last and go with only one
504 * code path implementing all logic.
505 */
506 if (first > last) {
507 aux = first;
508 first = last;
509 last = aux;
510 }
511
512 if (dorandom)
513 *lastport = first +
514 (arc4random() % (last - first));
515
516 count = last - first;
517
518 do {
519 if (count-- < 0) /* completely used? */
520 return (EADDRNOTAVAIL);
521 ++*lastport;
522 if (*lastport < first || *lastport > last)
523 *lastport = first;
524 lport = htons(*lastport);
525 } while (in_pcblookup_local(pcbinfo, laddr,
526 lport, wild, cred));
527 }
528 *laddrp = laddr.s_addr;
529 *lportp = lport;
530 return (0);
531}
532
533/*
534 * Connect from a socket to a specified address.
535 * Both address and port must be specified in argument sin.
536 * If don't have a local address for this socket yet,
537 * then pick one.
538 */
539int
540in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
541{
542 u_short lport, fport;
543 in_addr_t laddr, faddr;
544 int anonport, error;
545
546 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
547 INP_WLOCK_ASSERT(inp);
548
549 lport = inp->inp_lport;
550 laddr = inp->inp_laddr.s_addr;
551 anonport = (lport == 0);
552 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
553 NULL, cred);
554 if (error)
555 return (error);
556
557 /* Do the initial binding of the local address if required. */
558 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
559 inp->inp_lport = lport;
560 inp->inp_laddr.s_addr = laddr;
561 if (in_pcbinshash(inp) != 0) {
562 inp->inp_laddr.s_addr = INADDR_ANY;
563 inp->inp_lport = 0;
564 return (EAGAIN);
565 }
566 }
567
568 /* Commit the remaining changes. */
569 inp->inp_lport = lport;
570 inp->inp_laddr.s_addr = laddr;
571 inp->inp_faddr.s_addr = faddr;
572 inp->inp_fport = fport;
573 in_pcbrehash(inp);
574
575 if (anonport)
576 inp->inp_flags |= INP_ANONPORT;
577 return (0);
578}
579
580/*
581 * Do proper source address selection on an unbound socket in case
582 * of connect. Take jails into account as well.
583 */
584static int
585in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
586 struct ucred *cred)
587{
588 struct ifaddr *ifa;
589 struct sockaddr *sa;
590 struct sockaddr_in *sin;
591 struct route sro;
592 int error;
593
594 KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
595
596 /*
597 * Bypass source address selection and use the primary jail IP
598 * if requested.
599 */
600 if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
601 return (0);
602
603 error = 0;
604 bzero(&sro, sizeof(sro));
605
606 sin = (struct sockaddr_in *)&sro.ro_dst;
607 sin->sin_family = AF_INET;
608 sin->sin_len = sizeof(struct sockaddr_in);
609 sin->sin_addr.s_addr = faddr->s_addr;
610
611 /*
612 * If route is known our src addr is taken from the i/f,
613 * else punt.
614 *
615 * Find out route to destination.
616 */
617 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
618 in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
619
620 /*
621 * If we found a route, use the address corresponding to
622 * the outgoing interface.
623 *
624 * Otherwise assume faddr is reachable on a directly connected
625 * network and try to find a corresponding interface to take
626 * the source address from.
627 */
628 if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
629 struct in_ifaddr *ia;
630 struct ifnet *ifp;
631
632 ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
633 if (ia == NULL)
634 ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0));
635 if (ia == NULL) {
636 error = ENETUNREACH;
637 goto done;
638 }
639
640 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
641 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
642 ifa_free(&ia->ia_ifa);
643 goto done;
644 }
645
646 ifp = ia->ia_ifp;
647 ifa_free(&ia->ia_ifa);
648 ia = NULL;
649 IF_ADDR_LOCK(ifp);
650 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
651
652 sa = ifa->ifa_addr;
653 if (sa->sa_family != AF_INET)
654 continue;
655 sin = (struct sockaddr_in *)sa;
656 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
657 ia = (struct in_ifaddr *)ifa;
658 break;
659 }
660 }
661 if (ia != NULL) {
662 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
663 IF_ADDR_UNLOCK(ifp);
664 goto done;
665 }
666 IF_ADDR_UNLOCK(ifp);
667
668 /* 3. As a last resort return the 'default' jail address. */
669 error = prison_get_ip4(cred, laddr);
670 goto done;
671 }
672
673 /*
674 * If the outgoing interface on the route found is not
675 * a loopback interface, use the address from that interface.
676 * In case of jails do those three steps:
677 * 1. check if the interface address belongs to the jail. If so use it.
678 * 2. check if we have any address on the outgoing interface
679 * belonging to this jail. If so use it.
680 * 3. as a last resort return the 'default' jail address.
681 */
682 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
683 struct in_ifaddr *ia;
684 struct ifnet *ifp;
685
686 /* If not jailed, use the default returned. */
687 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
688 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
689 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
690 goto done;
691 }
692
693 /* Jailed. */
694 /* 1. Check if the iface address belongs to the jail. */
695 sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
696 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
697 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
698 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
699 goto done;
700 }
701
702 /*
703 * 2. Check if we have any address on the outgoing interface
704 * belonging to this jail.
705 */
706 ia = NULL;
707 ifp = sro.ro_rt->rt_ifp;
708 IF_ADDR_LOCK(ifp);
709 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
710 sa = ifa->ifa_addr;
711 if (sa->sa_family != AF_INET)
712 continue;
713 sin = (struct sockaddr_in *)sa;
714 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
715 ia = (struct in_ifaddr *)ifa;
716 break;
717 }
718 }
719 if (ia != NULL) {
720 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
721 IF_ADDR_UNLOCK(ifp);
722 goto done;
723 }
724 IF_ADDR_UNLOCK(ifp);
725
726 /* 3. As a last resort return the 'default' jail address. */
727 error = prison_get_ip4(cred, laddr);
728 goto done;
729 }
730
731 /*
732 * The outgoing interface is marked with 'loopback net', so a route
733 * to ourselves is here.
734 * Try to find the interface of the destination address and then
735 * take the address from there. That interface is not necessarily
736 * a loopback interface.
737 * In case of jails, check that it is an address of the jail
738 * and if we cannot find, fall back to the 'default' jail address.
739 */
740 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
741 struct sockaddr_in sain;
742 struct in_ifaddr *ia;
743
744 bzero(&sain, sizeof(struct sockaddr_in));
745 sain.sin_family = AF_INET;
746 sain.sin_len = sizeof(struct sockaddr_in);
747 sain.sin_addr.s_addr = faddr->s_addr;
748
749 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
750 if (ia == NULL)
751 ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
752 if (ia == NULL)
753 ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
754
755 if (cred == NULL || !prison_flag(cred, PR_IP4)) {
756 if (ia == NULL) {
757 error = ENETUNREACH;
758 goto done;
759 }
760 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
761 ifa_free(&ia->ia_ifa);
762 goto done;
763 }
764
765 /* Jailed. */
766 if (ia != NULL) {
767 struct ifnet *ifp;
768
769 ifp = ia->ia_ifp;
770 ifa_free(&ia->ia_ifa);
771 ia = NULL;
772 IF_ADDR_LOCK(ifp);
773 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
774
775 sa = ifa->ifa_addr;
776 if (sa->sa_family != AF_INET)
777 continue;
778 sin = (struct sockaddr_in *)sa;
779 if (prison_check_ip4(cred,
780 &sin->sin_addr) == 0) {
781 ia = (struct in_ifaddr *)ifa;
782 break;
783 }
784 }
785 if (ia != NULL) {
786 laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
787 IF_ADDR_UNLOCK(ifp);
788 goto done;
789 }
790 IF_ADDR_UNLOCK(ifp);
791 }
792
793 /* 3. As a last resort return the 'default' jail address. */
794 error = prison_get_ip4(cred, laddr);
795 goto done;
796 }
797
798done:
799 if (sro.ro_rt != NULL)
800 RTFREE(sro.ro_rt);
801 return (error);
802}
803
804/*
805 * Set up for a connect from a socket to the specified address.
806 * On entry, *laddrp and *lportp should contain the current local
807 * address and port for the PCB; these are updated to the values
808 * that should be placed in inp_laddr and inp_lport to complete
809 * the connect.
810 *
811 * On success, *faddrp and *fportp will be set to the remote address
812 * and port. These are not updated in the error case.
813 *
814 * If the operation fails because the connection already exists,
815 * *oinpp will be set to the PCB of that connection so that the
816 * caller can decide to override it. In all other cases, *oinpp
817 * is set to NULL.
818 */
819int
820in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
821 in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
822 struct inpcb **oinpp, struct ucred *cred)
823{
824 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
825 struct in_ifaddr *ia;
826 struct inpcb *oinp;
827 struct in_addr laddr, faddr;
828 u_short lport, fport;
829 int error;
830
831 /*
832 * Because a global state change doesn't actually occur here, a read
833 * lock is sufficient.
834 */
835 INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
836 INP_LOCK_ASSERT(inp);
837
838 if (oinpp != NULL)
839 *oinpp = NULL;
840 if (nam->sa_len != sizeof (*sin))
841 return (EINVAL);
842 if (sin->sin_family != AF_INET)
843 return (EAFNOSUPPORT);
844 if (sin->sin_port == 0)
845 return (EADDRNOTAVAIL);
846 laddr.s_addr = *laddrp;
847 lport = *lportp;
848 faddr = sin->sin_addr;
849 fport = sin->sin_port;
850
851 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
852 /*
853 * If the destination address is INADDR_ANY,
854 * use the primary local address.
855 * If the supplied address is INADDR_BROADCAST,
856 * and the primary interface supports broadcast,
857 * choose the broadcast address for that interface.
858 */
859 if (faddr.s_addr == INADDR_ANY) {
860 IN_IFADDR_RLOCK();
861 faddr =
862 IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
863 IN_IFADDR_RUNLOCK();
864 if (cred != NULL &&
865 (error = prison_get_ip4(cred, &faddr)) != 0)
866 return (error);
867 } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
868 IN_IFADDR_RLOCK();
869 if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
870 IFF_BROADCAST)
871 faddr = satosin(&TAILQ_FIRST(
872 &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
873 IN_IFADDR_RUNLOCK();
874 }
875 }
876 if (laddr.s_addr == INADDR_ANY) {
877 /*
878 * If the destination address is multicast and an outgoing
879 * interface has been set as a multicast option, use the
880 * address of that interface as our source address.
881 */
882 if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
883 inp->inp_moptions != NULL) {
884 struct ip_moptions *imo;
885 struct ifnet *ifp;
886
887 imo = inp->inp_moptions;
888 if (imo->imo_multicast_ifp != NULL) {
889 ifp = imo->imo_multicast_ifp;
890 IN_IFADDR_RLOCK();
891 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
892 if (ia->ia_ifp == ifp)
893 break;
894 if (ia == NULL) {
895 IN_IFADDR_RUNLOCK();
896 return (EADDRNOTAVAIL);
897 }
898 laddr = ia->ia_addr.sin_addr;
899 IN_IFADDR_RUNLOCK();
900 }
901 } else {
902 error = in_pcbladdr(inp, &faddr, &laddr, cred);
903 if (error)
904 return (error);
905 }
906 }
907 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
908 0, NULL);
909 if (oinp != NULL) {
910 if (oinpp != NULL)
911 *oinpp = oinp;
912 return (EADDRINUSE);
913 }
914 if (lport == 0) {
915 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
916 cred);
917 if (error)
918 return (error);
919 }
920 *laddrp = laddr.s_addr;
921 *lportp = lport;
922 *faddrp = faddr.s_addr;
923 *fportp = fport;
924 return (0);
925}
926
927void
928in_pcbdisconnect(struct inpcb *inp)
929{
930
931 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
932 INP_WLOCK_ASSERT(inp);
933
934 inp->inp_faddr.s_addr = INADDR_ANY;
935 inp->inp_fport = 0;
936 in_pcbrehash(inp);
937}
938
939/*
940 * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
941 * For most protocols, this will be invoked immediately prior to calling
942 * in_pcbfree(). However, with TCP the inpcb may significantly outlive the
943 * socket, in which case in_pcbfree() is deferred.
944 */
945void
946in_pcbdetach(struct inpcb *inp)
947{
948
949 KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
950
951 inp->inp_socket->so_pcb = NULL;
952 inp->inp_socket = NULL;
953}
954
955/*
956 * in_pcbfree_internal() frees an inpcb that has been detached from its
957 * socket, and whose reference count has reached 0. It will also remove the
958 * inpcb from any global lists it might remain on.
959 */
960static void
961in_pcbfree_internal(struct inpcb *inp)
962{
963 struct inpcbinfo *ipi = inp->inp_pcbinfo;
964
965 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
966 KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
967
968 INP_INFO_WLOCK_ASSERT(ipi);
969 INP_WLOCK_ASSERT(inp);
970
971#ifdef IPSEC
972 if (inp->inp_sp != NULL)
973 ipsec_delete_pcbpolicy(inp);
974#endif /* IPSEC */
975 inp->inp_gencnt = ++ipi->ipi_gencnt;
976 in_pcbremlists(inp);
977#ifdef INET6
978 if (inp->inp_vflag & INP_IPV6PROTO) {
979 ip6_freepcbopts(inp->in6p_outputopts);
980 if (inp->in6p_moptions != NULL)
981 ip6_freemoptions(inp->in6p_moptions);
982 }
983#endif
984 if (inp->inp_options)
985 (void)m_free(inp->inp_options);
986 if (inp->inp_moptions != NULL)
987 inp_freemoptions(inp->inp_moptions);
988 inp->inp_vflag = 0;
989 crfree(inp->inp_cred);
990
991#ifdef MAC
992 mac_inpcb_destroy(inp);
993#endif
994 INP_WUNLOCK(inp);
995 uma_zfree(ipi->ipi_zone, inp);
996}
997
998/*
999 * in_pcbref() bumps the reference count on an inpcb in order to maintain
1000 * stability of an inpcb pointer despite the inpcb lock being released. This
1001 * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
1002 * but where the inpcb lock is already held.
1003 *
1004 * While the inpcb will not be freed, releasing the inpcb lock means that the
1005 * connection's state may change, so the caller should be careful to
1006 * revalidate any cached state on reacquiring the lock. Drop the reference
1007 * using in_pcbrele().
1008 */
1009void
1010in_pcbref(struct inpcb *inp)
1011{
1012
1013 INP_WLOCK_ASSERT(inp);
1014
1015 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
1016
1017 inp->inp_refcount++;
1018}
1019
1020/*
1021 * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
1022 * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
1023 * return a flag indicating whether or not the inpcb remains valid. If it is
1024 * valid, we return with the inpcb lock held.
1025 */
1026int
1027in_pcbrele(struct inpcb *inp)
1028{
1029#ifdef INVARIANTS
1030 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1031#endif
1032
1033 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
1034
1035 INP_INFO_WLOCK_ASSERT(ipi);
1036 INP_WLOCK_ASSERT(inp);
1037
1038 inp->inp_refcount--;
1039 if (inp->inp_refcount > 0)
1040 return (0);
1041 in_pcbfree_internal(inp);
1042 return (1);
1043}
1044
1045/*
1046 * Unconditionally schedule an inpcb to be freed by decrementing its
1047 * reference count, which should occur only after the inpcb has been detached
1048 * from its socket. If another thread holds a temporary reference (acquired
1049 * using in_pcbref()) then the free is deferred until that reference is
1050 * released using in_pcbrele(), but the inpcb is still unlocked.
1051 */
1052void
1053in_pcbfree(struct inpcb *inp)
1054{
1055#ifdef INVARIANTS
1056 struct inpcbinfo *ipi = inp->inp_pcbinfo;
1057#endif
1058
1059 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
1060 __func__));
1061
1062 INP_INFO_WLOCK_ASSERT(ipi);
1063 INP_WLOCK_ASSERT(inp);
1064
1065 if (!in_pcbrele(inp))
1066 INP_WUNLOCK(inp);
1067}
1068
1069/*
1070 * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
1071 * port reservation, and preventing it from being returned by inpcb lookups.
1072 *
1073 * It is used by TCP to mark an inpcb as unused and avoid future packet
1074 * delivery or event notification when a socket remains open but TCP has
1075 * closed. This might occur as a result of a shutdown()-initiated TCP close
1076 * or a RST on the wire, and allows the port binding to be reused while still
1077 * maintaining the invariant that so_pcb always points to a valid inpcb until
1078 * in_pcbdetach().
1079 *
1080 * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
1081 * lists, but can lead to confusing netstat output, as open sockets with
1082 * closed TCP connections will no longer appear to have their bound port
1083 * number. An explicit flag would be better, as it would allow us to leave
1084 * the port number intact after the connection is dropped.
1085 *
1086 * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
1087 * in_pcbnotifyall() and in_pcbpurgeif0()?
1088 */
1089void
1090in_pcbdrop(struct inpcb *inp)
1091{
1092
1093 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
1094 INP_WLOCK_ASSERT(inp);
1095
1096 inp->inp_flags |= INP_DROPPED;
1097 if (inp->inp_flags & INP_INHASHLIST) {
1098 struct inpcbport *phd = inp->inp_phd;
1099
1100 LIST_REMOVE(inp, inp_hash);
1101 LIST_REMOVE(inp, inp_portlist);
1102 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1103 LIST_REMOVE(phd, phd_hash);
1104 free(phd, M_PCB);
1105 }
1106 inp->inp_flags &= ~INP_INHASHLIST;
1107 }
1108}
1109
1110/*
1111 * Common routines to return the socket addresses associated with inpcbs.
1112 */
1113struct sockaddr *
1114in_sockaddr(in_port_t port, struct in_addr *addr_p)
1115{
1116 struct sockaddr_in *sin;
1117
1118 sin = malloc(sizeof *sin, M_SONAME,
1119 M_WAITOK | M_ZERO);
1120 sin->sin_family = AF_INET;
1121 sin->sin_len = sizeof(*sin);
1122 sin->sin_addr = *addr_p;
1123 sin->sin_port = port;
1124
1125 return (struct sockaddr *)sin;
1126}
1127
1128int
1129in_getsockaddr(struct socket *so, struct sockaddr **nam)
1130{
1131 struct inpcb *inp;
1132 struct in_addr addr;
1133 in_port_t port;
1134
1135 inp = sotoinpcb(so);
1136 KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
1137
1138 INP_RLOCK(inp);
1139 port = inp->inp_lport;
1140 addr = inp->inp_laddr;
1141 INP_RUNLOCK(inp);
1142
1143 *nam = in_sockaddr(port, &addr);
1144 return 0;
1145}
1146
1147int
1148in_getpeeraddr(struct socket *so, struct sockaddr **nam)
1149{
1150 struct inpcb *inp;
1151 struct in_addr addr;
1152 in_port_t port;
1153
1154 inp = sotoinpcb(so);
1155 KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
1156
1157 INP_RLOCK(inp);
1158 port = inp->inp_fport;
1159 addr = inp->inp_faddr;
1160 INP_RUNLOCK(inp);
1161
1162 *nam = in_sockaddr(port, &addr);
1163 return 0;
1164}
1165
1166void
1167in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
1168 struct inpcb *(*notify)(struct inpcb *, int))
1169{
1170 struct inpcb *inp, *inp_temp;
1171
1172 INP_INFO_WLOCK(pcbinfo);
1173 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
1174 INP_WLOCK(inp);
1175#ifdef INET6
1176 if ((inp->inp_vflag & INP_IPV4) == 0) {
1177 INP_WUNLOCK(inp);
1178 continue;
1179 }
1180#endif
1181 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1182 inp->inp_socket == NULL) {
1183 INP_WUNLOCK(inp);
1184 continue;
1185 }
1186 if ((*notify)(inp, errno))
1187 INP_WUNLOCK(inp);
1188 }
1189 INP_INFO_WUNLOCK(pcbinfo);
1190}
1191
1192void
1193in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
1194{
1195 struct inpcb *inp;
1196 struct ip_moptions *imo;
1197 int i, gap;
1198
1199 INP_INFO_RLOCK(pcbinfo);
1200 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1201 INP_WLOCK(inp);
1202 imo = inp->inp_moptions;
1203 if ((inp->inp_vflag & INP_IPV4) &&
1204 imo != NULL) {
1205 /*
1206 * Unselect the outgoing interface if it is being
1207 * detached.
1208 */
1209 if (imo->imo_multicast_ifp == ifp)
1210 imo->imo_multicast_ifp = NULL;
1211
1212 /*
1213 * Drop multicast group membership if we joined
1214 * through the interface being detached.
1215 */
1216 for (i = 0, gap = 0; i < imo->imo_num_memberships;
1217 i++) {
1218 if (imo->imo_membership[i]->inm_ifp == ifp) {
1219 in_delmulti(imo->imo_membership[i]);
1220 gap++;
1221 } else if (gap != 0)
1222 imo->imo_membership[i - gap] =
1223 imo->imo_membership[i];
1224 }
1225 imo->imo_num_memberships -= gap;
1226 }
1227 INP_WUNLOCK(inp);
1228 }
1229 INP_INFO_RUNLOCK(pcbinfo);
1230}
1231
1232/*
1233 * Lookup a PCB based on the local address and port.
1234 */
1235#define INP_LOOKUP_MAPPED_PCB_COST 3
1236struct inpcb *
1237in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1238 u_short lport, int wild_okay, struct ucred *cred)
1239{
1240 struct inpcb *inp;
1241#ifdef INET6
1242 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
1243#else
1244 int matchwild = 3;
1245#endif
1246 int wildcard;
1247
1248 INP_INFO_LOCK_ASSERT(pcbinfo);
1249
1250 if (!wild_okay) {
1251 struct inpcbhead *head;
1252 /*
1253 * Look for an unconnected (wildcard foreign addr) PCB that
1254 * matches the local address and port we're looking for.
1255 */
1256 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1257 0, pcbinfo->ipi_hashmask)];
1258 LIST_FOREACH(inp, head, inp_hash) {
1259#ifdef INET6
1260 /* XXX inp locking */
1261 if ((inp->inp_vflag & INP_IPV4) == 0)
1262 continue;
1263#endif
1264 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1265 inp->inp_laddr.s_addr == laddr.s_addr &&
1266 inp->inp_lport == lport) {
1267 /*
1268 * Found?
1269 */
1270 if (cred == NULL ||
1271 prison_equal_ip4(cred->cr_prison,
1272 inp->inp_cred->cr_prison))
1273 return (inp);
1274 }
1275 }
1276 /*
1277 * Not found.
1278 */
1279 return (NULL);
1280 } else {
1281 struct inpcbporthead *porthash;
1282 struct inpcbport *phd;
1283 struct inpcb *match = NULL;
1284 /*
1285 * Best fit PCB lookup.
1286 *
1287 * First see if this local port is in use by looking on the
1288 * port hash list.
1289 */
1290 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
1291 pcbinfo->ipi_porthashmask)];
1292 LIST_FOREACH(phd, porthash, phd_hash) {
1293 if (phd->phd_port == lport)
1294 break;
1295 }
1296 if (phd != NULL) {
1297 /*
1298 * Port is in use by one or more PCBs. Look for best
1299 * fit.
1300 */
1301 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1302 wildcard = 0;
1303 if (cred != NULL &&
1304 !prison_equal_ip4(inp->inp_cred->cr_prison,
1305 cred->cr_prison))
1306 continue;
1307#ifdef INET6
1308 /* XXX inp locking */
1309 if ((inp->inp_vflag & INP_IPV4) == 0)
1310 continue;
1311 /*
1312 * We never select the PCB that has
1313 * INP_IPV6 flag and is bound to :: if
1314 * we have another PCB which is bound
1315 * to 0.0.0.0. If a PCB has the
1316 * INP_IPV6 flag, then we set its cost
1317 * higher than IPv4 only PCBs.
1318 *
1319 * Note that the case only happens
1320 * when a socket is bound to ::, under
1321 * the condition that the use of the
1322 * mapped address is allowed.
1323 */
1324 if ((inp->inp_vflag & INP_IPV6) != 0)
1325 wildcard += INP_LOOKUP_MAPPED_PCB_COST;
1326#endif
1327 if (inp->inp_faddr.s_addr != INADDR_ANY)
1328 wildcard++;
1329 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1330 if (laddr.s_addr == INADDR_ANY)
1331 wildcard++;
1332 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1333 continue;
1334 } else {
1335 if (laddr.s_addr != INADDR_ANY)
1336 wildcard++;
1337 }
1338 if (wildcard < matchwild) {
1339 match = inp;
1340 matchwild = wildcard;
1341 if (matchwild == 0)
1342 break;
1343 }
1344 }
1345 }
1346 return (match);
1347 }
1348}
1349#undef INP_LOOKUP_MAPPED_PCB_COST
1350
1351/*
1352 * Lookup PCB in hash list.
1353 */
1354struct inpcb *
1355in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1356 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1357 struct ifnet *ifp)
1358{
1359 struct inpcbhead *head;
1360 struct inpcb *inp, *tmpinp;
1361 u_short fport = fport_arg, lport = lport_arg;
1362
1363 INP_INFO_LOCK_ASSERT(pcbinfo);
1364
1365 /*
1366 * First look for an exact match.
1367 */
1368 tmpinp = NULL;
1369 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1370 pcbinfo->ipi_hashmask)];
1371 LIST_FOREACH(inp, head, inp_hash) {
1372#ifdef INET6
1373 /* XXX inp locking */
1374 if ((inp->inp_vflag & INP_IPV4) == 0)
1375 continue;
1376#endif
1377 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1378 inp->inp_laddr.s_addr == laddr.s_addr &&
1379 inp->inp_fport == fport &&
1380 inp->inp_lport == lport) {
1381 /*
1382 * XXX We should be able to directly return
1383 * the inp here, without any checks.
1384 * Well unless both bound with SO_REUSEPORT?
1385 */
1386 if (prison_flag(inp->inp_cred, PR_IP4))
1387 return (inp);
1388 if (tmpinp == NULL)
1389 tmpinp = inp;
1390 }
1391 }
1392 if (tmpinp != NULL)
1393 return (tmpinp);
1394
1395 /*
1396 * Then look for a wildcard match, if requested.
1397 */
1398 if (wildcard == INPLOOKUP_WILDCARD) {
1399 struct inpcb *local_wild = NULL, *local_exact = NULL;
1400#ifdef INET6
1401 struct inpcb *local_wild_mapped = NULL;
1402#endif
1403 struct inpcb *jail_wild = NULL;
1404 int injail;
1405
1406 /*
1407 * Order of socket selection - we always prefer jails.
1408 * 1. jailed, non-wild.
1409 * 2. jailed, wild.
1410 * 3. non-jailed, non-wild.
1411 * 4. non-jailed, wild.
1412 */
1413
1414 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1415 0, pcbinfo->ipi_hashmask)];
1416 LIST_FOREACH(inp, head, inp_hash) {
1417#ifdef INET6
1418 /* XXX inp locking */
1419 if ((inp->inp_vflag & INP_IPV4) == 0)
1420 continue;
1421#endif
1422 if (inp->inp_faddr.s_addr != INADDR_ANY ||
1423 inp->inp_lport != lport)
1424 continue;
1425
1426 /* XXX inp locking */
1427 if (ifp && ifp->if_type == IFT_FAITH &&
1428 (inp->inp_flags & INP_FAITH) == 0)
1429 continue;
1430
1431 injail = prison_flag(inp->inp_cred, PR_IP4);
1432 if (injail) {
1433 if (prison_check_ip4(inp->inp_cred,
1434 &laddr) != 0)
1435 continue;
1436 } else {
1437 if (local_exact != NULL)
1438 continue;
1439 }
1440
1441 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1442 if (injail)
1443 return (inp);
1444 else
1445 local_exact = inp;
1446 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1447#ifdef INET6
1448 /* XXX inp locking, NULL check */
1449 if (inp->inp_vflag & INP_IPV6PROTO)
1450 local_wild_mapped = inp;
1451 else
1452#endif /* INET6 */
1453 if (injail)
1454 jail_wild = inp;
1455 else
1456 local_wild = inp;
1457 }
1458 } /* LIST_FOREACH */
1459 if (jail_wild != NULL)
1460 return (jail_wild);
1461 if (local_exact != NULL)
1462 return (local_exact);
1463 if (local_wild != NULL)
1464 return (local_wild);
1465#ifdef INET6
1466 if (local_wild_mapped != NULL)
1467 return (local_wild_mapped);
1468#endif /* defined(INET6) */
1469 } /* if (wildcard == INPLOOKUP_WILDCARD) */
1470
1471 return (NULL);
1472}
1473
1474/*
1475 * Insert PCB onto various hash lists.
1476 */
1477int
1478in_pcbinshash(struct inpcb *inp)
1479{
1480 struct inpcbhead *pcbhash;
1481 struct inpcbporthead *pcbporthash;
1482 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1483 struct inpcbport *phd;
1484 u_int32_t hashkey_faddr;
1485
1486 INP_INFO_WLOCK_ASSERT(pcbinfo);
1487 INP_WLOCK_ASSERT(inp);
1488 KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
1489 ("in_pcbinshash: INP_INHASHLIST"));
1490
1491#ifdef INET6
1492 if (inp->inp_vflag & INP_IPV6)
1493 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1494 else
1495#endif /* INET6 */
1496 hashkey_faddr = inp->inp_faddr.s_addr;
1497
1498 pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1499 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1500
1501 pcbporthash = &pcbinfo->ipi_porthashbase[
1502 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
1503
1504 /*
1505 * Go through port list and look for a head for this lport.
1506 */
1507 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1508 if (phd->phd_port == inp->inp_lport)
1509 break;
1510 }
1511 /*
1512 * If none exists, malloc one and tack it on.
1513 */
1514 if (phd == NULL) {
1515 phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1516 if (phd == NULL) {
1517 return (ENOBUFS); /* XXX */
1518 }
1519 phd->phd_port = inp->inp_lport;
1520 LIST_INIT(&phd->phd_pcblist);
1521 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1522 }
1523 inp->inp_phd = phd;
1524 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1525 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1526 inp->inp_flags |= INP_INHASHLIST;
1527 return (0);
1528}
1529
1530/*
1531 * Move PCB to the proper hash bucket when { faddr, fport } have been
1532 * changed. NOTE: This does not handle the case of the lport changing (the
1533 * hashed port list would have to be updated as well), so the lport must
1534 * not change after in_pcbinshash() has been called.
1535 */
1536void
1537in_pcbrehash(struct inpcb *inp)
1538{
1539 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1540 struct inpcbhead *head;
1541 u_int32_t hashkey_faddr;
1542
1543 INP_INFO_WLOCK_ASSERT(pcbinfo);
1544 INP_WLOCK_ASSERT(inp);
1545 KASSERT(inp->inp_flags & INP_INHASHLIST,
1546 ("in_pcbrehash: !INP_INHASHLIST"));
1547
1548#ifdef INET6
1549 if (inp->inp_vflag & INP_IPV6)
1550 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1551 else
1552#endif /* INET6 */
1553 hashkey_faddr = inp->inp_faddr.s_addr;
1554
1555 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1556 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1557
1558 LIST_REMOVE(inp, inp_hash);
1559 LIST_INSERT_HEAD(head, inp, inp_hash);
1560}
1561
1562/*
1563 * Remove PCB from various lists.
1564 */
1565static void
1566in_pcbremlists(struct inpcb *inp)
1567{
1568 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1569
1570 INP_INFO_WLOCK_ASSERT(pcbinfo);
1571 INP_WLOCK_ASSERT(inp);
1572
1573 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1574 if (inp->inp_flags & INP_INHASHLIST) {
1575 struct inpcbport *phd = inp->inp_phd;
1576
1577 LIST_REMOVE(inp, inp_hash);
1578 LIST_REMOVE(inp, inp_portlist);
1579 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1580 LIST_REMOVE(phd, phd_hash);
1581 free(phd, M_PCB);
1582 }
1583 inp->inp_flags &= ~INP_INHASHLIST;
1584 }
1585 LIST_REMOVE(inp, inp_list);
1586 pcbinfo->ipi_count--;
1587}
1588
1589/*
1590 * A set label operation has occurred at the socket layer, propagate the
1591 * label change into the in_pcb for the socket.
1592 */
1593void
1594in_pcbsosetlabel(struct socket *so)
1595{
1596#ifdef MAC
1597 struct inpcb *inp;
1598
1599 inp = sotoinpcb(so);
1600 KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
1601
1602 INP_WLOCK(inp);
1603 SOCK_LOCK(so);
1604 mac_inpcb_sosetlabel(so, inp);
1605 SOCK_UNLOCK(so);
1606 INP_WUNLOCK(inp);
1607#endif
1608}
1609
1610/*
1611 * ipport_tick runs once per second, determining if random port allocation
1612 * should be continued. If more than ipport_randomcps ports have been
1613 * allocated in the last second, then we return to sequential port
1614 * allocation. We return to random allocation only once we drop below
1615 * ipport_randomcps for at least ipport_randomtime seconds.
1616 */
1617void
1618ipport_tick(void *xtp)
1619{
1620 VNET_ITERATOR_DECL(vnet_iter);
1621
1622 VNET_LIST_RLOCK_NOSLEEP();
1623 VNET_FOREACH(vnet_iter) {
1624 CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
1625 if (V_ipport_tcpallocs <=
1626 V_ipport_tcplastcount + V_ipport_randomcps) {
1627 if (V_ipport_stoprandom > 0)
1628 V_ipport_stoprandom--;
1629 } else
1630 V_ipport_stoprandom = V_ipport_randomtime;
1631 V_ipport_tcplastcount = V_ipport_tcpallocs;
1632 CURVNET_RESTORE();
1633 }
1634 VNET_LIST_RUNLOCK_NOSLEEP();
1635 callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
1636}
1637
1638void
1639inp_wlock(struct inpcb *inp)
1640{
1641
1642 INP_WLOCK(inp);
1643}
1644
1645void
1646inp_wunlock(struct inpcb *inp)
1647{
1648
1649 INP_WUNLOCK(inp);
1650}
1651
1652void
1653inp_rlock(struct inpcb *inp)
1654{
1655
1656 INP_RLOCK(inp);
1657}
1658
1659void
1660inp_runlock(struct inpcb *inp)
1661{
1662
1663 INP_RUNLOCK(inp);
1664}
1665
1666#ifdef INVARIANTS
1667void
1668inp_lock_assert(struct inpcb *inp)
1669{
1670
1671 INP_WLOCK_ASSERT(inp);
1672}
1673
1674void
1675inp_unlock_assert(struct inpcb *inp)
1676{
1677
1678 INP_UNLOCK_ASSERT(inp);
1679}
1680#endif
1681
1682void
1683inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
1684{
1685 struct inpcb *inp;
1686
1687 INP_INFO_RLOCK(&V_tcbinfo);
1688 LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
1689 INP_WLOCK(inp);
1690 func(inp, arg);
1691 INP_WUNLOCK(inp);
1692 }
1693 INP_INFO_RUNLOCK(&V_tcbinfo);
1694}
1695
1696struct socket *
1697inp_inpcbtosocket(struct inpcb *inp)
1698{
1699
1700 INP_WLOCK_ASSERT(inp);
1701 return (inp->inp_socket);
1702}
1703
1704struct tcpcb *
1705inp_inpcbtotcpcb(struct inpcb *inp)
1706{
1707
1708 INP_WLOCK_ASSERT(inp);
1709 return ((struct tcpcb *)inp->inp_ppcb);
1710}
1711
1712int
1713inp_ip_tos_get(const struct inpcb *inp)
1714{
1715
1716 return (inp->inp_ip_tos);
1717}
1718
1719void
1720inp_ip_tos_set(struct inpcb *inp, int val)
1721{
1722
1723 inp->inp_ip_tos = val;
1724}
1725
1726void
1727inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
1728 uint32_t *faddr, uint16_t *fp)
1729{
1730
1731 INP_LOCK_ASSERT(inp);
1732 *laddr = inp->inp_laddr.s_addr;
1733 *faddr = inp->inp_faddr.s_addr;
1734 *lp = inp->inp_lport;
1735 *fp = inp->inp_fport;
1736}
1737
1738struct inpcb *
1739so_sotoinpcb(struct socket *so)
1740{
1741
1742 return (sotoinpcb(so));
1743}
1744
1745struct tcpcb *
1746so_sototcpcb(struct socket *so)
1747{
1748
1749 return (sototcpcb(so));
1750}
1751
1752#ifdef DDB
1753static void
1754db_print_indent(int indent)
1755{
1756 int i;
1757
1758 for (i = 0; i < indent; i++)
1759 db_printf(" ");
1760}
1761
1762static void
1763db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
1764{
1765 char faddr_str[48], laddr_str[48];
1766
1767 db_print_indent(indent);
1768 db_printf("%s at %p\n", name, inc);
1769
1770 indent += 2;
1771
1772#ifdef INET6
1773 if (inc->inc_flags & INC_ISIPV6) {
1774 /* IPv6. */
1775 ip6_sprintf(laddr_str, &inc->inc6_laddr);
1776 ip6_sprintf(faddr_str, &inc->inc6_faddr);
1777 } else {
1778#endif
1779 /* IPv4. */
1780 inet_ntoa_r(inc->inc_laddr, laddr_str);
1781 inet_ntoa_r(inc->inc_faddr, faddr_str);
1782#ifdef INET6
1783 }
1784#endif
1785 db_print_indent(indent);
1786 db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
1787 ntohs(inc->inc_lport));
1788 db_print_indent(indent);
1789 db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
1790 ntohs(inc->inc_fport));
1791}
1792
1793static void
1794db_print_inpflags(int inp_flags)
1795{
1796 int comma;
1797
1798 comma = 0;
1799 if (inp_flags & INP_RECVOPTS) {
1800 db_printf("%sINP_RECVOPTS", comma ? ", " : "");
1801 comma = 1;
1802 }
1803 if (inp_flags & INP_RECVRETOPTS) {
1804 db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
1805 comma = 1;
1806 }
1807 if (inp_flags & INP_RECVDSTADDR) {
1808 db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
1809 comma = 1;
1810 }
1811 if (inp_flags & INP_HDRINCL) {
1812 db_printf("%sINP_HDRINCL", comma ? ", " : "");
1813 comma = 1;
1814 }
1815 if (inp_flags & INP_HIGHPORT) {
1816 db_printf("%sINP_HIGHPORT", comma ? ", " : "");
1817 comma = 1;
1818 }
1819 if (inp_flags & INP_LOWPORT) {
1820 db_printf("%sINP_LOWPORT", comma ? ", " : "");
1821 comma = 1;
1822 }
1823 if (inp_flags & INP_ANONPORT) {
1824 db_printf("%sINP_ANONPORT", comma ? ", " : "");
1825 comma = 1;
1826 }
1827 if (inp_flags & INP_RECVIF) {
1828 db_printf("%sINP_RECVIF", comma ? ", " : "");
1829 comma = 1;
1830 }
1831 if (inp_flags & INP_MTUDISC) {
1832 db_printf("%sINP_MTUDISC", comma ? ", " : "");
1833 comma = 1;
1834 }
1835 if (inp_flags & INP_FAITH) {
1836 db_printf("%sINP_FAITH", comma ? ", " : "");
1837 comma = 1;
1838 }
1839 if (inp_flags & INP_RECVTTL) {
1840 db_printf("%sINP_RECVTTL", comma ? ", " : "");
1841 comma = 1;
1842 }
1843 if (inp_flags & INP_DONTFRAG) {
1844 db_printf("%sINP_DONTFRAG", comma ? ", " : "");
1845 comma = 1;
1846 }
1847 if (inp_flags & IN6P_IPV6_V6ONLY) {
1848 db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
1849 comma = 1;
1850 }
1851 if (inp_flags & IN6P_PKTINFO) {
1852 db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
1853 comma = 1;
1854 }
1855 if (inp_flags & IN6P_HOPLIMIT) {
1856 db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
1857 comma = 1;
1858 }
1859 if (inp_flags & IN6P_HOPOPTS) {
1860 db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
1861 comma = 1;
1862 }
1863 if (inp_flags & IN6P_DSTOPTS) {
1864 db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
1865 comma = 1;
1866 }
1867 if (inp_flags & IN6P_RTHDR) {
1868 db_printf("%sIN6P_RTHDR", comma ? ", " : "");
1869 comma = 1;
1870 }
1871 if (inp_flags & IN6P_RTHDRDSTOPTS) {
1872 db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
1873 comma = 1;
1874 }
1875 if (inp_flags & IN6P_TCLASS) {
1876 db_printf("%sIN6P_TCLASS", comma ? ", " : "");
1877 comma = 1;
1878 }
1879 if (inp_flags & IN6P_AUTOFLOWLABEL) {
1880 db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
1881 comma = 1;
1882 }
1883 if (inp_flags & INP_TIMEWAIT) {
1884 db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
1885 comma = 1;
1886 }
1887 if (inp_flags & INP_ONESBCAST) {
1888 db_printf("%sINP_ONESBCAST", comma ? ", " : "");
1889 comma = 1;
1890 }
1891 if (inp_flags & INP_DROPPED) {
1892 db_printf("%sINP_DROPPED", comma ? ", " : "");
1893 comma = 1;
1894 }
1895 if (inp_flags & INP_SOCKREF) {
1896 db_printf("%sINP_SOCKREF", comma ? ", " : "");
1897 comma = 1;
1898 }
1899 if (inp_flags & IN6P_RFC2292) {
1900 db_printf("%sIN6P_RFC2292", comma ? ", " : "");
1901 comma = 1;
1902 }
1903 if (inp_flags & IN6P_MTU) {
1904 db_printf("IN6P_MTU%s", comma ? ", " : "");
1905 comma = 1;
1906 }
1907}
1908
1909static void
1910db_print_inpvflag(u_char inp_vflag)
1911{
1912 int comma;
1913
1914 comma = 0;
1915 if (inp_vflag & INP_IPV4) {
1916 db_printf("%sINP_IPV4", comma ? ", " : "");
1917 comma = 1;
1918 }
1919 if (inp_vflag & INP_IPV6) {
1920 db_printf("%sINP_IPV6", comma ? ", " : "");
1921 comma = 1;
1922 }
1923 if (inp_vflag & INP_IPV6PROTO) {
1924 db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
1925 comma = 1;
1926 }
1927}
1928
1929static void
1930db_print_inpcb(struct inpcb *inp, const char *name, int indent)
1931{
1932
1933 db_print_indent(indent);
1934 db_printf("%s at %p\n", name, inp);
1935
1936 indent += 2;
1937
1938 db_print_indent(indent);
1939 db_printf("inp_flow: 0x%x\n", inp->inp_flow);
1940
1941 db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
1942
1943 db_print_indent(indent);
1944 db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n",
1945 inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
1946
1947 db_print_indent(indent);
1948 db_printf("inp_label: %p inp_flags: 0x%x (",
1949 inp->inp_label, inp->inp_flags);
1950 db_print_inpflags(inp->inp_flags);
1951 db_printf(")\n");
1952
1953 db_print_indent(indent);
1954 db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
1955 inp->inp_vflag);
1956 db_print_inpvflag(inp->inp_vflag);
1957 db_printf(")\n");
1958
1959 db_print_indent(indent);
1960 db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
1961 inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
1962
1963 db_print_indent(indent);
1964#ifdef INET6
1965 if (inp->inp_vflag & INP_IPV6) {
1966 db_printf("in6p_options: %p in6p_outputopts: %p "
1967 "in6p_moptions: %p\n", inp->in6p_options,
1968 inp->in6p_outputopts, inp->in6p_moptions);
1969 db_printf("in6p_icmp6filt: %p in6p_cksum %d "
1970 "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
1971 inp->in6p_hops);
1972 } else
1973#endif
1974 {
1975 db_printf("inp_ip_tos: %d inp_ip_options: %p "
1976 "inp_ip_moptions: %p\n", inp->inp_ip_tos,
1977 inp->inp_options, inp->inp_moptions);
1978 }
1979
1980 db_print_indent(indent);
1981 db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd,
1982 (uintmax_t)inp->inp_gencnt);
1983}
1984
1985DB_SHOW_COMMAND(inpcb, db_show_inpcb)
1986{
1987 struct inpcb *inp;
1988
1989 if (!have_addr) {
1990 db_printf("usage: show inpcb <addr>\n");
1991 return;
1992 }
1993 inp = (struct inpcb *)addr;
1994
1995 db_print_inpcb(inp, "inpcb", 0);
1996}
1997#endif