in_pcb.h revision 180536
1/*- 2 * Copyright (c) 1982, 1986, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 30 * $FreeBSD: head/sys/netinet/in_pcb.h 180536 2008-07-15 15:38:47Z rwatson $ 31 */ 32 33#ifndef _NETINET_IN_PCB_H_ 34#define _NETINET_IN_PCB_H_ 35 36#include <sys/queue.h> 37#include <sys/_lock.h> 38#include <sys/_mutex.h> 39#include <sys/_rwlock.h> 40 41#include <net/route.h> 42 43#ifdef _KERNEL 44#include <sys/rwlock.h> 45#endif 46 47#define in6pcb inpcb /* for KAME src sync over BSD*'s */ 48#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ 49struct inpcbpolicy; 50 51/* 52 * Struct inpcb is the ommon structure pcb for the Internet Protocol 53 * implementation. 54 * 55 * Pointers to local and foreign host table entries, local and foreign socket 56 * numbers, and pointers up (to a socket structure) and down (to a 57 * protocol-specific control block) are stored here. 58 */ 59LIST_HEAD(inpcbhead, inpcb); 60LIST_HEAD(inpcbporthead, inpcbport); 61typedef u_quad_t inp_gen_t; 62 63/* 64 * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. 65 * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing 66 * the following structure. 67 */ 68struct in_addr_4in6 { 69 u_int32_t ia46_pad32[3]; 70 struct in_addr ia46_addr4; 71}; 72 73/* 74 * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has 75 * some extra padding to accomplish this. 76 */ 77struct in_endpoints { 78 u_int16_t ie_fport; /* foreign port */ 79 u_int16_t ie_lport; /* local port */ 80 /* protocol dependent part, local and foreign addr */ 81 union { 82 /* foreign host table entry */ 83 struct in_addr_4in6 ie46_foreign; 84 struct in6_addr ie6_foreign; 85 } ie_dependfaddr; 86 union { 87 /* local host table entry */ 88 struct in_addr_4in6 ie46_local; 89 struct in6_addr ie6_local; 90 } ie_dependladdr; 91#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 92#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 93#define ie6_faddr ie_dependfaddr.ie6_foreign 94#define ie6_laddr ie_dependladdr.ie6_local 95}; 96 97/* 98 * XXX The defines for inc_* are hacks and should be changed to direct 99 * references. 100 */ 101struct in_conninfo { 102 u_int8_t inc_flags; 103 u_int8_t inc_len; 104 u_int16_t inc_fibnum; /* XXX was pad, 16 bits is plenty */ 105 /* protocol dependent part */ 106 struct in_endpoints inc_ie; 107}; 108#define inc_isipv6 inc_flags /* temp compatability */ 109#define inc_fport inc_ie.ie_fport 110#define inc_lport inc_ie.ie_lport 111#define inc_faddr inc_ie.ie_faddr 112#define inc_laddr inc_ie.ie_laddr 113#define inc6_faddr inc_ie.ie6_faddr 114#define inc6_laddr inc_ie.ie6_laddr 115 116struct icmp6_filter; 117 118/*- 119 * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 120 * and IPv6 sockets. In the case of TCP, further per-connection state is 121 * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb 122 * are static after creation or protected by a per-inpcb rwlock, inp_lock. A 123 * few fields also require the global pcbinfo lock for the inpcb to be held, 124 * when modified, such as the global connection lists and hashes, as well as 125 * binding information (which affects which hash a connection is on). This 126 * model means that connections can be looked up without holding the 127 * per-connection lock, which is important for performance when attempting to 128 * find the connection for a packet given its IP and port tuple. Writing to 129 * these fields that write locks be held on both the inpcb and global locks. 130 * 131 * Key: 132 * (c) - Constant after initialization 133 * (i) - Protected by the inpcb lock 134 * (p) - Protected by the pcbinfo lock for the inpcb 135 * (s) - Protected by another subsystem's locks 136 * (x) - Undefined locking 137 * 138 * A few other notes: 139 * 140 * When a read lock is held, stability of the field is guaranteed; to write 141 * to a field, a write lock must generally be held. 142 * 143 * netinet/netinet6-layer code should not assume that the inp_socket pointer 144 * is safe to dereference without inp_lock being held, even for protocols 145 * other than TCP (where the inpcb persists during TIMEWAIT even after the 146 * socket has been freed), or there may be close(2)-related races. 147 * 148 * The inp_vflag field is overloaded, and would otherwise ideally be (c). 149 */ 150struct inpcb { 151 LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ 152 LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ 153 void *inp_ppcb; /* (i) pointer to per-protocol pcb */ 154 struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ 155 struct socket *inp_socket; /* (i) back pointer to socket */ 156 157 u_int32_t inp_flow; /* (i) IPv6 flow information */ 158 int inp_flags; /* (i) generic IP/datagram flags */ 159 160 u_char inp_vflag; /* (i) IP version flag (v4/v6) */ 161#define INP_IPV4 0x1 162#define INP_IPV6 0x2 163#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ 164#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */ 165#define INP_ONESBCAST 0x10 /* send all-ones broadcast */ 166#define INP_DROPPED 0x20 /* protocol drop flag */ 167#define INP_SOCKREF 0x40 /* strong socket reference */ 168 u_char inp_ip_ttl; /* (i) time to live proto */ 169 u_char inp_ip_p; /* (c) protocol proto */ 170 u_char inp_ip_minttl; /* (i) minimum TTL or drop */ 171 uint32_t inp_ispare1; /* (x) connection id / queue id */ 172 void *inp_pspare[2]; /* (x) rtentry / general use */ 173 174 /* Local and foreign ports, local and foreign addr. */ 175 struct in_conninfo inp_inc; 176 177 /* (i/p) list for PCB's local port */ 178 struct label *inp_label; /* (i) MAC label */ 179 struct inpcbpolicy *inp_sp; /* (s) for IPSEC */ 180 181 /* Protocol-dependent part; options. */ 182 struct { 183 u_char inp4_ip_tos; /* (i) type of service proto */ 184 struct mbuf *inp4_options; /* (i) IP options */ 185 struct ip_moptions *inp4_moptions; /* (i) IP multicast options */ 186 } inp_depend4; 187#define inp_fport inp_inc.inc_fport 188#define inp_lport inp_inc.inc_lport 189#define inp_faddr inp_inc.inc_faddr 190#define inp_laddr inp_inc.inc_laddr 191#define inp_ip_tos inp_depend4.inp4_ip_tos 192#define inp_options inp_depend4.inp4_options 193#define inp_moptions inp_depend4.inp4_moptions 194 struct { 195 /* (i) IP options */ 196 struct mbuf *inp6_options; 197 /* (i) IP6 options for outgoing packets */ 198 struct ip6_pktopts *inp6_outputopts; 199 /* (i) IP multicast options */ 200 struct ip6_moptions *inp6_moptions; 201 /* (i) ICMPv6 code type filter */ 202 struct icmp6_filter *inp6_icmp6filt; 203 /* (i) IPV6_CHECKSUM setsockopt */ 204 int inp6_cksum; 205 short inp6_hops; 206 } inp_depend6; 207 LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */ 208 struct inpcbport *inp_phd; /* (i/p) head of this list */ 209#define inp_zero_size offsetof(struct inpcb, inp_gencnt) 210 inp_gen_t inp_gencnt; /* (c) generation count of this instance */ 211 struct rwlock inp_lock; 212 213#define in6p_faddr inp_inc.inc6_faddr 214#define in6p_laddr inp_inc.inc6_laddr 215#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ 216#define in6p_ip6_nxt inp_ip_p 217#define in6p_flowinfo inp_flow 218#define in6p_vflag inp_vflag 219#define in6p_options inp_depend6.inp6_options 220#define in6p_outputopts inp_depend6.inp6_outputopts 221#define in6p_moptions inp_depend6.inp6_moptions 222#define in6p_icmp6filt inp_depend6.inp6_icmp6filt 223#define in6p_cksum inp_depend6.inp6_cksum 224#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ 225#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ 226#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ 227#define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ 228#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ 229}; 230/* 231 * The range of the generation count, as used in this implementation, is 9e19. 232 * We would have to create 300 billion connections per second for this number 233 * to roll over in a year. This seems sufficiently unlikely that we simply 234 * don't concern ourselves with that possibility. 235 */ 236 237/* 238 * Interface exported to userland by various protocols which use inpcbs. Hack 239 * alert -- only define if struct xsocket is in scope. 240 */ 241#ifdef _SYS_SOCKETVAR_H_ 242struct xinpcb { 243 size_t xi_len; /* length of this structure */ 244 struct inpcb xi_inp; 245 struct xsocket xi_socket; 246 u_quad_t xi_alignment_hack; 247}; 248 249struct xinpgen { 250 size_t xig_len; /* length of this structure */ 251 u_int xig_count; /* number of PCBs at this time */ 252 inp_gen_t xig_gen; /* generation count at this time */ 253 so_gen_t xig_sogen; /* socket generation count at this time */ 254}; 255#endif /* _SYS_SOCKETVAR_H_ */ 256 257struct inpcbport { 258 LIST_ENTRY(inpcbport) phd_hash; 259 struct inpcbhead phd_pcblist; 260 u_short phd_port; 261}; 262 263/* 264 * Global data structure for each high-level protocol (UDP, TCP, ...) in both 265 * IPv4 and IPv6. Holds inpcb lists and information for managing them. 266 */ 267struct inpcbinfo { 268 /* 269 * Global list of inpcbs on the protocol. 270 */ 271 struct inpcbhead *ipi_listhead; 272 u_int ipi_count; 273 274 /* 275 * Global hash of inpcbs, hashed by local and foreign addresses and 276 * port numbers. 277 */ 278 struct inpcbhead *ipi_hashbase; 279 u_long ipi_hashmask; 280 281 /* 282 * Global hash of inpcbs, hashed by only local port number. 283 */ 284 struct inpcbporthead *ipi_porthashbase; 285 u_long ipi_porthashmask; 286 287 /* 288 * Fields associated with port lookup and allocation. 289 */ 290 u_short ipi_lastport; 291 u_short ipi_lastlow; 292 u_short ipi_lasthi; 293 294 /* 295 * UMA zone from which inpcbs are allocated for this protocol. 296 */ 297 struct uma_zone *ipi_zone; 298 299 /* 300 * Generation count--incremented each time a connection is allocated 301 * or freed. 302 */ 303 u_quad_t ipi_gencnt; 304 struct rwlock ipi_lock; 305 306 /* 307 * vimage 1 308 * general use 1 309 */ 310 void *ipi_pspare[2]; 311}; 312 313#define INP_LOCK_INIT(inp, d, t) \ 314 rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) 315#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) 316#define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock) 317#define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock) 318#define INP_TRY_RLOCK(inp) rw_try_rlock(&(inp)->inp_lock) 319#define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock) 320#define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock) 321#define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock) 322#define INP_LOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_LOCKED) 323#define INP_RLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_RLOCKED) 324#define INP_WLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_WLOCKED) 325#define INP_UNLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_UNLOCKED) 326 327#ifdef _KERNEL 328/* 329 * These locking functions are for inpcb consumers outside of sys/netinet, 330 * more specifically, they were added for the benefit of TOE drivers. The 331 * macros are reserved for use by the stack. 332 */ 333void inp_wlock(struct inpcb *); 334void inp_wunlock(struct inpcb *); 335void inp_rlock(struct inpcb *); 336void inp_runlock(struct inpcb *); 337 338#ifdef INVARIANTS 339void inp_lock_assert(struct inpcb *); 340void inp_unlock_assert(struct inpcb *); 341#else 342static __inline void 343inp_lock_assert(struct inpcb *inp __unused) 344{ 345} 346 347static __inline void 348inp_unlock_assert(struct inpcb *inp __unused) 349{ 350} 351 352#endif 353#endif /* _KERNEL */ 354 355 356#define INP_INFO_LOCK_INIT(ipi, d) \ 357 rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE) 358#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock) 359#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock) 360#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock) 361#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock) 362#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock) 363#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock) 364#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock) 365#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED) 366#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED) 367#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED) 368#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED) 369 370#define INP_PCBHASH(faddr, lport, fport, mask) \ 371 (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) 372#define INP_PCBPORTHASH(lport, mask) \ 373 (ntohs((lport)) & (mask)) 374 375/* flags in inp_flags: */ 376#define INP_RECVOPTS 0x01 /* receive incoming IP options */ 377#define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ 378#define INP_RECVDSTADDR 0x04 /* receive IP dst address */ 379#define INP_HDRINCL 0x08 /* user supplies entire IP header */ 380#define INP_HIGHPORT 0x10 /* user wants "high" port binding */ 381#define INP_LOWPORT 0x20 /* user wants "low" port binding */ 382#define INP_ANONPORT 0x40 /* port chosen for user */ 383#define INP_RECVIF 0x80 /* receive incoming interface */ 384#define INP_MTUDISC 0x100 /* user can do MTU discovery */ 385#define INP_FAITH 0x200 /* accept FAITH'ed connections */ 386#define INP_RECVTTL 0x400 /* receive incoming IP TTL */ 387#define INP_DONTFRAG 0x800 /* don't fragment packet */ 388 389#define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */ 390 391#define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ 392#define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ 393#define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ 394#define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ 395#define IN6P_RTHDR 0x100000 /* receive routing header */ 396#define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ 397#define IN6P_TCLASS 0x400000 /* receive traffic class value */ 398#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ 399#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ 400#define IN6P_MTU 0x80000000 /* receive path MTU */ 401 402#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ 403 INP_RECVIF|INP_RECVTTL|\ 404 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ 405 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ 406 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ 407 IN6P_MTU) 408#define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ 409 IN6P_TCLASS|IN6P_AUTOFLOWLABEL) 410 411 /* for KAME src sync over BSD*'s */ 412#define IN6P_HIGHPORT INP_HIGHPORT 413#define IN6P_LOWPORT INP_LOWPORT 414#define IN6P_ANONPORT INP_ANONPORT 415#define IN6P_RECVIF INP_RECVIF 416#define IN6P_MTUDISC INP_MTUDISC 417#define IN6P_FAITH INP_FAITH 418#define IN6P_CONTROLOPTS INP_CONTROLOPTS 419 /* 420 * socket AF version is {newer than,or include} 421 * actual datagram AF version 422 */ 423 424#define INPLOOKUP_WILDCARD 1 425#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) 426#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ 427 428#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family 429 430#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) 431 432#ifdef _KERNEL 433extern int ipport_reservedhigh; 434extern int ipport_reservedlow; 435extern int ipport_lowfirstauto; 436extern int ipport_lowlastauto; 437extern int ipport_firstauto; 438extern int ipport_lastauto; 439extern int ipport_hifirstauto; 440extern int ipport_hilastauto; 441extern struct callout ipport_tick_callout; 442 443void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); 444int in_pcballoc(struct socket *, struct inpcbinfo *); 445int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); 446int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, 447 u_short *, struct ucred *); 448int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); 449int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, 450 u_short *, in_addr_t *, u_short *, struct inpcb **, 451 struct ucred *); 452void in_pcbdetach(struct inpcb *); 453void in_pcbdisconnect(struct inpcb *); 454void in_pcbdrop(struct inpcb *); 455void in_pcbfree(struct inpcb *); 456int in_pcbinshash(struct inpcb *); 457struct inpcb * 458 in_pcblookup_local(struct inpcbinfo *, 459 struct in_addr, u_short, int, struct ucred *); 460struct inpcb * 461 in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, 462 struct in_addr, u_int, int, struct ifnet *); 463void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, 464 int, struct inpcb *(*)(struct inpcb *, int)); 465void in_pcbrehash(struct inpcb *); 466void in_pcbsetsolabel(struct socket *so); 467int in_getpeeraddr(struct socket *so, struct sockaddr **nam); 468int in_getsockaddr(struct socket *so, struct sockaddr **nam); 469struct sockaddr * 470 in_sockaddr(in_port_t port, struct in_addr *addr); 471void in_pcbsosetlabel(struct socket *so); 472void in_pcbremlists(struct inpcb *inp); 473void ipport_tick(void *xtp); 474 475/* 476 * Debugging routines compiled in when DDB is present. 477 */ 478void db_print_inpcb(struct inpcb *inp, const char *name, int indent); 479 480#endif /* _KERNEL */ 481 482#endif /* !_NETINET_IN_PCB_H_ */ 483