1/* $OpenBSD: in_pcb.h,v 1.157 2024/04/19 10:13:58 bluhm Exp $ */ 2/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ 3 4/* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (c) 1982, 1986, 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 62 */ 63 64#ifndef _NETINET_IN_PCB_H_ 65#define _NETINET_IN_PCB_H_ 66 67#include <sys/queue.h> 68#include <sys/mutex.h> 69#include <sys/rwlock.h> 70#include <sys/refcnt.h> 71#include <netinet/ip6.h> 72#include <netinet/icmp6.h> 73#include <netinet/ip_ipsp.h> 74 75#include <crypto/siphash.h> 76 77/* 78 * Locks used to protect struct members in this file: 79 * I immutable after creation 80 * N net lock 81 * t inpt_mtx pcb table mutex 82 * y inpt_notify pcb table rwlock for notify 83 * p inpcb_mtx pcb mutex 84 * L pf_inp_mtx link pf to inp mutex 85 * s so_lock socket rwlock 86 */ 87 88/* 89 * The pcb table mutex guarantees that all inpcb are consistent and 90 * that bind(2) and connect(2) create unique combinations of 91 * laddr/faddr/lport/fport/rtalbleid. This mutex is used to protect 92 * both address consistency and inpcb lookup during protocol input. 93 * All writes to inp_[lf]addr take table mutex. A per socket lock is 94 * needed, so that socket layer input have a consistent view at these 95 * values. 96 * 97 * In soconnect() and sosend() pcb mutex cannot be used. They eventually 98 * can call IP output which takes pf lock which is a sleeping lock. 99 * Also connect(2) does a route lookup for source selection. There 100 * route resolve happens, which creates a route, which sends a route 101 * message, which needs route lock, which is a rw-lock. 102 * 103 * On the other hand a mutex should be used in protocol input. It 104 * does not make sense to do a process switch per packet. Better spin 105 * until the packet can be processed. 106 * 107 * So there are three locks. Table mutex is for writing inp_[lf]addr/port 108 * and lookup, socket rw-lock to separate sockets in system calls, and 109 * pcb mutex to protect socket receive buffer. Changing inp_[lf]addr/port 110 * takes both per socket rw-lock and global table mutex. Protocol 111 * input only reads inp_[lf]addr/port during lookup and is safe. System 112 * call only reads when holding socket rw-lock and is safe. The socket 113 * layer needs pcb mutex only in soreceive(). 114 * 115 * Function pru_lock() grabs the pcb mutex and its existence indicates 116 * that a protocol is MP safe. Otherwise the exclusive net lock is 117 * used. 118 */ 119 120struct pf_state_key; 121 122union inpaddru { 123 struct in_addr iau_addr; 124 struct in6_addr iau_addr6; 125}; 126 127/* 128 * Common structure pcb for internet protocol implementation. 129 * Here are stored pointers to local and foreign host table 130 * entries, local and foreign socket numbers, and pointers 131 * up (to a socket structure) and down (to a protocol-specific) 132 * control block. 133 */ 134struct inpcb { 135 LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */ 136 LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */ 137 TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ 138 SIMPLEQ_ENTRY(inpcb) inp_notify; /* [y] notify or udp append */ 139 struct inpcbtable *inp_table; /* [I] inet queue/hash table */ 140 union inpaddru inp_faddru; /* [t] Foreign address. */ 141 union inpaddru inp_laddru; /* [t] Local address. */ 142#define inp_faddr inp_faddru.iau_addr 143#define inp_faddr6 inp_faddru.iau_addr6 144#define inp_laddr inp_laddru.iau_addr 145#define inp_laddr6 inp_laddru.iau_addr6 146 u_int16_t inp_fport; /* [t] foreign port */ 147 u_int16_t inp_lport; /* [t] local port */ 148 struct socket *inp_socket; /* [I] back pointer to socket */ 149 caddr_t inp_ppcb; /* pointer to per-protocol pcb */ 150 struct route inp_route; /* cached route */ 151 struct refcnt inp_refcnt; /* refcount PCB, delay memory free */ 152 struct mutex inp_mtx; /* protect PCB and socket members */ 153 int inp_flags; /* generic IP/datagram flags */ 154 union { /* Header prototype. */ 155 struct ip hu_ip; 156 struct ip6_hdr hu_ipv6; 157 } inp_hu; 158#define inp_ip inp_hu.hu_ip 159#define inp_ipv6 inp_hu.hu_ipv6 160 union { 161 struct mbuf *inp_options; /* IPv4 options */ 162 struct ip6_pktopts *inp_outputopts6; /* IPv6 options */ 163 }; 164 int inp_hops; 165 union { 166 struct ip_moptions *mou_mo; 167 struct ip6_moptions *mou_mo6; 168 } inp_mou; 169#define inp_moptions inp_mou.mou_mo /* [N] IPv4 multicast options */ 170#define inp_moptions6 inp_mou.mou_mo6 /* [N] IPv6 multicast options */ 171 struct ipsec_level inp_seclevel; /* [N] IPsec level of socket */ 172 u_char inp_ip_minttl; /* minimum TTL or drop */ 173#define inp_ip6_minhlim inp_ip_minttl /* minimum Hop Limit or drop */ 174#define inp_flowinfo inp_hu.hu_ipv6.ip6_flow 175 176 int inp_cksum6; 177 struct icmp6_filter *inp_icmp6filt; 178 struct pf_state_key *inp_pf_sk; /* [L] */ 179 struct mbuf *(*inp_upcall)(void *, struct mbuf *, 180 struct ip *, struct ip6_hdr *, void *, int); 181 void *inp_upcall_arg; 182 u_int inp_rtableid; /* [t] */ 183 int inp_pipex; /* pipex indication */ 184 uint16_t inp_flowid; /* [s] */ 185}; 186 187LIST_HEAD(inpcbhead, inpcb); 188 189struct inpcbtable { 190 struct mutex inpt_mtx; /* protect queue and hash */ 191 struct rwlock inpt_notify; /* protect inp_notify list */ 192 TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */ 193 struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */ 194 struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */ 195 SIPHASH_KEY inpt_key, inpt_lkey; /* [I] secrets for hashes */ 196 u_long inpt_mask, inpt_lmask; /* [t] hash masks */ 197 int inpt_count, inpt_size; /* [t] queue count, hash size */ 198}; 199 200/* flags in inp_flags: */ 201#define INP_RECVOPTS 0x001 /* receive incoming IP options */ 202#define INP_RECVRETOPTS 0x002 /* receive IP options for reply */ 203#define INP_RECVDSTADDR 0x004 /* receive IP dst address */ 204 205#define INP_RXDSTOPTS INP_RECVOPTS 206#define INP_RXHOPOPTS INP_RECVRETOPTS 207#define INP_RXINFO INP_RECVDSTADDR 208#define INP_RXSRCRT 0x010 209#define INP_HOPLIMIT 0x020 210 211#define INP_HDRINCL 0x008 /* user supplies entire IP header */ 212#define INP_HIGHPORT 0x010 /* user wants "high" port binding */ 213#define INP_LOWPORT 0x020 /* user wants "low" port binding */ 214#define INP_RECVIF 0x080 /* receive incoming interface */ 215#define INP_RECVTTL 0x040 /* receive incoming IP TTL */ 216#define INP_RECVDSTPORT 0x200 /* receive IP dst addr before rdr */ 217#define INP_RECVRTABLE 0x400 /* receive routing table */ 218#define INP_IPSECFLOWINFO 0x800 /* receive IPsec flow info */ 219 220#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR| \ 221 INP_RXSRCRT|INP_HOPLIMIT|INP_RECVIF|INP_RECVTTL|INP_RECVDSTPORT| \ 222 INP_RECVRTABLE) 223 224/* 225 * These flags' values should be determined by either the transport 226 * protocol at PRU_BIND, PRU_LISTEN, PRU_CONNECT, etc, or by in_pcb*(). 227 */ 228#define INP_IPV6 0x100 /* socket, proto, domain, family is PF_INET6 */ 229 230/* 231 * Flags in inp_flags for IPV6 232 */ 233#define IN6P_HIGHPORT INP_HIGHPORT /* user wants "high" port */ 234#define IN6P_LOWPORT INP_LOWPORT /* user wants "low" port */ 235#define IN6P_RECVDSTPORT INP_RECVDSTPORT /* receive IP dst addr before rdr */ 236#define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ 237#define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ 238#define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ 239#define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ 240#define IN6P_RTHDR 0x100000 /* receive routing header */ 241#define IN6P_TCLASS 0x400000 /* receive traffic class value */ 242#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ 243 244#define IN6P_ANONPORT 0x4000000 /* port chosen for user */ 245#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ 246#define IN6P_MTU 0x80000000 /* receive path MTU */ 247 248#define IN6P_MINMTU 0x20000000 /* use minimum MTU */ 249 250#define IN6P_CONTROLOPTS (IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ 251 IN6P_DSTOPTS|IN6P_RTHDR|\ 252 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ 253 IN6P_MTU|IN6P_RECVDSTPORT) 254 255#define INPLOOKUP_WILDCARD 1 256#define INPLOOKUP_SETLOCAL 2 257#define INPLOOKUP_IPV6 4 258 259#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) 260 261/* macros for handling bitmap of ports not to allocate dynamically */ 262#define DP_MAPBITS (sizeof(u_int32_t) * NBBY) 263#define DP_MAPSIZE (howmany(65536, DP_MAPBITS)) 264#define DP_SET(m, p) ((m)[(p) / DP_MAPBITS] |= (1U << ((p) % DP_MAPBITS))) 265#define DP_CLR(m, p) ((m)[(p) / DP_MAPBITS] &= ~(1U << ((p) % DP_MAPBITS))) 266#define DP_ISSET(m, p) ((m)[(p) / DP_MAPBITS] & (1U << ((p) % DP_MAPBITS))) 267 268/* default values for baddynamicports [see ip_init()] */ 269#define DEFBADDYNAMICPORTS_TCP { \ 270 587, 749, 750, 751, 853, 871, 2049, \ 271 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, \ 272 0 } 273#define DEFBADDYNAMICPORTS_UDP { 623, 664, 749, 750, 751, 2049, \ 274 3784, 3785, 7784, /* BFD/S-BFD ports */ \ 275 0 } 276 277#define DEFROOTONLYPORTS_TCP { \ 278 2049, \ 279 0 } 280#define DEFROOTONLYPORTS_UDP { \ 281 2049, \ 282 0 } 283 284struct baddynamicports { 285 u_int32_t tcp[DP_MAPSIZE]; 286 u_int32_t udp[DP_MAPSIZE]; 287}; 288 289#ifdef _KERNEL 290 291#define IN_PCBLOCK_HOLD 1 292#define IN_PCBLOCK_GRAB 2 293 294extern struct inpcbtable rawcbtable, rawin6pcbtable; 295extern struct baddynamicports baddynamicports; 296extern struct baddynamicports rootonlyports; 297extern int in_pcbnotifymiss; 298 299void in_init(void); 300void in_losing(struct inpcb *); 301int in_pcballoc(struct socket *, struct inpcbtable *, int); 302int in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *, 303 struct proc *); 304int in_pcbbind(struct inpcb *, struct mbuf *, struct proc *); 305int in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int, 306 struct proc *); 307int in_pcbconnect(struct inpcb *, struct mbuf *); 308void in_pcbdetach(struct inpcb *); 309struct inpcb * 310 in_pcbref(struct inpcb *); 311void in_pcbunref(struct inpcb *); 312void in_pcbdisconnect(struct inpcb *); 313struct inpcb * 314 in_pcblookup(struct inpcbtable *, struct in_addr, 315 u_int, struct in_addr, u_int, u_int); 316struct inpcb * 317 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int, 318 struct mbuf *, u_int); 319#ifdef INET6 320uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *, 321 u_short, const struct in6_addr *, u_short); 322struct inpcb * 323 in6_pcblookup(struct inpcbtable *, const struct in6_addr *, 324 u_int, const struct in6_addr *, u_int, u_int); 325struct inpcb * 326 in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int, 327 struct mbuf *, u_int); 328int in6_pcbaddrisavail_lock(const struct inpcb *, struct sockaddr_in6 *, 329 int, struct proc *, int); 330int in6_pcbaddrisavail(const struct inpcb *, struct sockaddr_in6 *, int, 331 struct proc *); 332int in6_pcbconnect(struct inpcb *, struct mbuf *); 333void in6_setsockaddr(struct inpcb *, struct mbuf *); 334void in6_setpeeraddr(struct inpcb *, struct mbuf *); 335int in6_sockaddr(struct socket *, struct mbuf *); 336int in6_peeraddr(struct socket *, struct mbuf *); 337#endif /* INET6 */ 338void in_pcbinit(struct inpcbtable *, int); 339struct inpcb * 340 in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int, 341 u_int, int); 342void in_pcbnotifyall(struct inpcbtable *, const struct sockaddr_in *, 343 u_int, int, void (*)(struct inpcb *, int)); 344void in_pcbrehash(struct inpcb *); 345void in_rtchange(struct inpcb *, int); 346void in_setpeeraddr(struct inpcb *, struct mbuf *); 347void in_setsockaddr(struct inpcb *, struct mbuf *); 348int in_sockaddr(struct socket *, struct mbuf *); 349int in_peeraddr(struct socket *, struct mbuf *); 350int in_baddynamic(u_int16_t, u_int16_t); 351int in_rootonly(u_int16_t, u_int16_t); 352int in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *); 353struct rtentry * 354 in_pcbrtentry(struct inpcb *); 355 356/* INET6 stuff */ 357struct rtentry * 358 in6_pcbrtentry(struct inpcb *); 359void in6_pcbnotify(struct inpcbtable *, const struct sockaddr_in6 *, 360 u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *, 361 void (*)(struct inpcb *, int)); 362int in6_selecthlim(const struct inpcb *); 363int in_pcbset_rtableid(struct inpcb *, u_int); 364void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int); 365void in_pcbunset_faddr(struct inpcb *); 366void in_pcbunset_laddr(struct inpcb *); 367 368#endif /* _KERNEL */ 369#endif /* _NETINET_IN_PCB_H_ */ 370