in_pcb.h revision 180536
1/*-
2 * Copyright (c) 1982, 1986, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
30 * $FreeBSD: head/sys/netinet/in_pcb.h 180536 2008-07-15 15:38:47Z rwatson $
31 */
32
33#ifndef _NETINET_IN_PCB_H_
34#define _NETINET_IN_PCB_H_
35
36#include <sys/queue.h>
37#include <sys/_lock.h>
38#include <sys/_mutex.h>
39#include <sys/_rwlock.h>
40
41#include <net/route.h>
42
43#ifdef _KERNEL
44#include <sys/rwlock.h>
45#endif
46
47#define	in6pcb		inpcb	/* for KAME src sync over BSD*'s */
48#define	in6p_sp		inp_sp	/* for KAME src sync over BSD*'s */
49struct inpcbpolicy;
50
51/*
52 * Struct inpcb is the ommon structure pcb for the Internet Protocol
53 * implementation.
54 *
55 * Pointers to local and foreign host table entries, local and foreign socket
56 * numbers, and pointers up (to a socket structure) and down (to a
57 * protocol-specific control block) are stored here.
58 */
59LIST_HEAD(inpcbhead, inpcb);
60LIST_HEAD(inpcbporthead, inpcbport);
61typedef	u_quad_t	inp_gen_t;
62
63/*
64 * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
65 * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
66 * the following structure.
67 */
68struct in_addr_4in6 {
69	u_int32_t	ia46_pad32[3];
70	struct	in_addr	ia46_addr4;
71};
72
73/*
74 * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
75 * some extra padding to accomplish this.
76 */
77struct in_endpoints {
78	u_int16_t	ie_fport;		/* foreign port */
79	u_int16_t	ie_lport;		/* local port */
80	/* protocol dependent part, local and foreign addr */
81	union {
82		/* foreign host table entry */
83		struct	in_addr_4in6 ie46_foreign;
84		struct	in6_addr ie6_foreign;
85	} ie_dependfaddr;
86	union {
87		/* local host table entry */
88		struct	in_addr_4in6 ie46_local;
89		struct	in6_addr ie6_local;
90	} ie_dependladdr;
91#define	ie_faddr	ie_dependfaddr.ie46_foreign.ia46_addr4
92#define	ie_laddr	ie_dependladdr.ie46_local.ia46_addr4
93#define	ie6_faddr	ie_dependfaddr.ie6_foreign
94#define	ie6_laddr	ie_dependladdr.ie6_local
95};
96
97/*
98 * XXX The defines for inc_* are hacks and should be changed to direct
99 * references.
100 */
101struct in_conninfo {
102	u_int8_t	inc_flags;
103	u_int8_t	inc_len;
104	u_int16_t	inc_fibnum;	/* XXX was pad, 16 bits is plenty */
105	/* protocol dependent part */
106	struct	in_endpoints inc_ie;
107};
108#define inc_isipv6	inc_flags	/* temp compatability */
109#define	inc_fport	inc_ie.ie_fport
110#define	inc_lport	inc_ie.ie_lport
111#define	inc_faddr	inc_ie.ie_faddr
112#define	inc_laddr	inc_ie.ie_laddr
113#define	inc6_faddr	inc_ie.ie6_faddr
114#define	inc6_laddr	inc_ie.ie6_laddr
115
116struct	icmp6_filter;
117
118/*-
119 * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
120 * and IPv6 sockets.  In the case of TCP, further per-connection state is
121 * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
122 * are static after creation or protected by a per-inpcb rwlock, inp_lock.  A
123 * few fields also require the global pcbinfo lock for the inpcb to be held,
124 * when modified, such as the global connection lists and hashes, as well as
125 * binding information (which affects which hash a connection is on).  This
126 * model means that connections can be looked up without holding the
127 * per-connection lock, which is important for performance when attempting to
128 * find the connection for a packet given its IP and port tuple.  Writing to
129 * these fields that write locks be held on both the inpcb and global locks.
130 *
131 * Key:
132 * (c) - Constant after initialization
133 * (i) - Protected by the inpcb lock
134 * (p) - Protected by the pcbinfo lock for the inpcb
135 * (s) - Protected by another subsystem's locks
136 * (x) - Undefined locking
137 *
138 * A few other notes:
139 *
140 * When a read lock is held, stability of the field is guaranteed; to write
141 * to a field, a write lock must generally be held.
142 *
143 * netinet/netinet6-layer code should not assume that the inp_socket pointer
144 * is safe to dereference without inp_lock being held, even for protocols
145 * other than TCP (where the inpcb persists during TIMEWAIT even after the
146 * socket has been freed), or there may be close(2)-related races.
147 *
148 * The inp_vflag field is overloaded, and would otherwise ideally be (c).
149 */
150struct inpcb {
151	LIST_ENTRY(inpcb) inp_hash;	/* (i/p) hash list */
152	LIST_ENTRY(inpcb) inp_list;	/* (i/p) list for all PCBs for proto */
153	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
154	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
155	struct	socket *inp_socket;	/* (i)  back pointer to socket */
156
157	u_int32_t	inp_flow;	/* (i) IPv6 flow information */
158	int	inp_flags;		/* (i) generic IP/datagram flags */
159
160	u_char	inp_vflag;		/* (i) IP version flag (v4/v6) */
161#define	INP_IPV4	0x1
162#define	INP_IPV6	0x2
163#define	INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
164#define	INP_TIMEWAIT	0x8		/* .. probably doesn't go here */
165#define	INP_ONESBCAST	0x10		/* send all-ones broadcast */
166#define	INP_DROPPED	0x20		/* protocol drop flag */
167#define	INP_SOCKREF	0x40		/* strong socket reference */
168	u_char	inp_ip_ttl;		/* (i) time to live proto */
169	u_char	inp_ip_p;		/* (c) protocol proto */
170	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
171	uint32_t inp_ispare1;		/* (x) connection id / queue id */
172	void	*inp_pspare[2];		/* (x) rtentry / general use */
173
174	/* Local and foreign ports, local and foreign addr. */
175	struct	in_conninfo inp_inc;
176
177					/* (i/p) list for PCB's local port */
178	struct	label *inp_label;	/* (i) MAC label */
179	struct	inpcbpolicy *inp_sp;    /* (s) for IPSEC */
180
181	/* Protocol-dependent part; options. */
182	struct {
183		u_char	inp4_ip_tos;		/* (i) type of service proto */
184		struct	mbuf *inp4_options;	/* (i) IP options */
185		struct	ip_moptions *inp4_moptions; /* (i) IP multicast options */
186	} inp_depend4;
187#define	inp_fport	inp_inc.inc_fport
188#define	inp_lport	inp_inc.inc_lport
189#define	inp_faddr	inp_inc.inc_faddr
190#define	inp_laddr	inp_inc.inc_laddr
191#define	inp_ip_tos	inp_depend4.inp4_ip_tos
192#define	inp_options	inp_depend4.inp4_options
193#define	inp_moptions	inp_depend4.inp4_moptions
194	struct {
195		/* (i) IP options */
196		struct	mbuf *inp6_options;
197		/* (i) IP6 options for outgoing packets */
198		struct	ip6_pktopts *inp6_outputopts;
199		/* (i) IP multicast options */
200		struct	ip6_moptions *inp6_moptions;
201		/* (i) ICMPv6 code type filter */
202		struct	icmp6_filter *inp6_icmp6filt;
203		/* (i) IPV6_CHECKSUM setsockopt */
204		int	inp6_cksum;
205		short	inp6_hops;
206	} inp_depend6;
207	LIST_ENTRY(inpcb) inp_portlist;	/* (i/p) */
208	struct	inpcbport *inp_phd;	/* (i/p) head of this list */
209#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
210	inp_gen_t	inp_gencnt;	/* (c) generation count of this instance */
211	struct rwlock	inp_lock;
212
213#define	in6p_faddr	inp_inc.inc6_faddr
214#define	in6p_laddr	inp_inc.inc6_laddr
215#define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
216#define	in6p_ip6_nxt	inp_ip_p
217#define	in6p_flowinfo	inp_flow
218#define	in6p_vflag	inp_vflag
219#define	in6p_options	inp_depend6.inp6_options
220#define	in6p_outputopts	inp_depend6.inp6_outputopts
221#define	in6p_moptions	inp_depend6.inp6_moptions
222#define	in6p_icmp6filt	inp_depend6.inp6_icmp6filt
223#define	in6p_cksum	inp_depend6.inp6_cksum
224#define	in6p_flags	inp_flags  /* for KAME src sync over BSD*'s */
225#define	in6p_socket	inp_socket  /* for KAME src sync over BSD*'s */
226#define	in6p_lport	inp_lport  /* for KAME src sync over BSD*'s */
227#define	in6p_fport	inp_fport  /* for KAME src sync over BSD*'s */
228#define	in6p_ppcb	inp_ppcb  /* for KAME src sync over BSD*'s */
229};
230/*
231 * The range of the generation count, as used in this implementation, is 9e19.
232 * We would have to create 300 billion connections per second for this number
233 * to roll over in a year.  This seems sufficiently unlikely that we simply
234 * don't concern ourselves with that possibility.
235 */
236
237/*
238 * Interface exported to userland by various protocols which use inpcbs.  Hack
239 * alert -- only define if struct xsocket is in scope.
240 */
241#ifdef _SYS_SOCKETVAR_H_
242struct	xinpcb {
243	size_t	xi_len;		/* length of this structure */
244	struct	inpcb xi_inp;
245	struct	xsocket xi_socket;
246	u_quad_t	xi_alignment_hack;
247};
248
249struct	xinpgen {
250	size_t	xig_len;	/* length of this structure */
251	u_int	xig_count;	/* number of PCBs at this time */
252	inp_gen_t xig_gen;	/* generation count at this time */
253	so_gen_t xig_sogen;	/* socket generation count at this time */
254};
255#endif /* _SYS_SOCKETVAR_H_ */
256
257struct inpcbport {
258	LIST_ENTRY(inpcbport) phd_hash;
259	struct inpcbhead phd_pcblist;
260	u_short phd_port;
261};
262
263/*
264 * Global data structure for each high-level protocol (UDP, TCP, ...) in both
265 * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
266 */
267struct inpcbinfo {
268	/*
269	 * Global list of inpcbs on the protocol.
270	 */
271	struct inpcbhead	*ipi_listhead;
272	u_int			 ipi_count;
273
274	/*
275	 * Global hash of inpcbs, hashed by local and foreign addresses and
276	 * port numbers.
277	 */
278	struct inpcbhead	*ipi_hashbase;
279	u_long			 ipi_hashmask;
280
281	/*
282	 * Global hash of inpcbs, hashed by only local port number.
283	 */
284	struct inpcbporthead	*ipi_porthashbase;
285	u_long			 ipi_porthashmask;
286
287	/*
288	 * Fields associated with port lookup and allocation.
289	 */
290	u_short			 ipi_lastport;
291	u_short			 ipi_lastlow;
292	u_short			 ipi_lasthi;
293
294	/*
295	 * UMA zone from which inpcbs are allocated for this protocol.
296	 */
297	struct	uma_zone	*ipi_zone;
298
299	/*
300	 * Generation count--incremented each time a connection is allocated
301	 * or freed.
302	 */
303	u_quad_t		 ipi_gencnt;
304	struct rwlock		 ipi_lock;
305
306	/*
307	 * vimage 1
308	 * general use 1
309	 */
310	void 			*ipi_pspare[2];
311};
312
313#define INP_LOCK_INIT(inp, d, t) \
314	rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE |  RW_DUPOK)
315#define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
316#define INP_RLOCK(inp)		rw_rlock(&(inp)->inp_lock)
317#define INP_WLOCK(inp)		rw_wlock(&(inp)->inp_lock)
318#define INP_TRY_RLOCK(inp)	rw_try_rlock(&(inp)->inp_lock)
319#define INP_TRY_WLOCK(inp)	rw_try_wlock(&(inp)->inp_lock)
320#define INP_RUNLOCK(inp)	rw_runlock(&(inp)->inp_lock)
321#define INP_WUNLOCK(inp)	rw_wunlock(&(inp)->inp_lock)
322#define INP_LOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_LOCKED)
323#define	INP_RLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_RLOCKED)
324#define	INP_WLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_WLOCKED)
325#define	INP_UNLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
326
327#ifdef _KERNEL
328/*
329 * These locking functions are for inpcb consumers outside of sys/netinet,
330 * more specifically, they were added for the benefit of TOE drivers. The
331 * macros are reserved for use by the stack.
332 */
333void inp_wlock(struct inpcb *);
334void inp_wunlock(struct inpcb *);
335void inp_rlock(struct inpcb *);
336void inp_runlock(struct inpcb *);
337
338#ifdef INVARIANTS
339void inp_lock_assert(struct inpcb *);
340void inp_unlock_assert(struct inpcb *);
341#else
342static __inline void
343inp_lock_assert(struct inpcb *inp __unused)
344{
345}
346
347static __inline void
348inp_unlock_assert(struct inpcb *inp __unused)
349{
350}
351
352#endif
353#endif /* _KERNEL */
354
355
356#define INP_INFO_LOCK_INIT(ipi, d) \
357	rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
358#define INP_INFO_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_lock)
359#define INP_INFO_RLOCK(ipi)	rw_rlock(&(ipi)->ipi_lock)
360#define INP_INFO_WLOCK(ipi)	rw_wlock(&(ipi)->ipi_lock)
361#define INP_INFO_TRY_RLOCK(ipi)	rw_try_rlock(&(ipi)->ipi_lock)
362#define INP_INFO_TRY_WLOCK(ipi)	rw_try_wlock(&(ipi)->ipi_lock)
363#define INP_INFO_RUNLOCK(ipi)	rw_runlock(&(ipi)->ipi_lock)
364#define INP_INFO_WUNLOCK(ipi)	rw_wunlock(&(ipi)->ipi_lock)
365#define	INP_INFO_LOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
366#define INP_INFO_RLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
367#define INP_INFO_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
368#define INP_INFO_UNLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
369
370#define INP_PCBHASH(faddr, lport, fport, mask) \
371	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
372#define INP_PCBPORTHASH(lport, mask) \
373	(ntohs((lport)) & (mask))
374
375/* flags in inp_flags: */
376#define	INP_RECVOPTS		0x01	/* receive incoming IP options */
377#define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
378#define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
379#define	INP_HDRINCL		0x08	/* user supplies entire IP header */
380#define	INP_HIGHPORT		0x10	/* user wants "high" port binding */
381#define	INP_LOWPORT		0x20	/* user wants "low" port binding */
382#define	INP_ANONPORT		0x40	/* port chosen for user */
383#define	INP_RECVIF		0x80	/* receive incoming interface */
384#define	INP_MTUDISC		0x100	/* user can do MTU discovery */
385#define	INP_FAITH		0x200	/* accept FAITH'ed connections */
386#define	INP_RECVTTL		0x400	/* receive incoming IP TTL */
387#define	INP_DONTFRAG		0x800	/* don't fragment packet */
388
389#define IN6P_IPV6_V6ONLY	0x008000 /* restrict AF_INET6 socket for v6 */
390
391#define	IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
392#define	IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
393#define	IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
394#define	IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
395#define	IN6P_RTHDR		0x100000 /* receive routing header */
396#define	IN6P_RTHDRDSTOPTS	0x200000 /* receive dstoptions before rthdr */
397#define	IN6P_TCLASS		0x400000 /* receive traffic class value */
398#define	IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
399#define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
400#define	IN6P_MTU		0x80000000 /* receive path MTU */
401
402#define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
403				 INP_RECVIF|INP_RECVTTL|\
404				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
405				 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
406				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
407				 IN6P_MTU)
408#define	INP_UNMAPPABLEOPTS	(IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\
409				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL)
410
411 /* for KAME src sync over BSD*'s */
412#define	IN6P_HIGHPORT		INP_HIGHPORT
413#define	IN6P_LOWPORT		INP_LOWPORT
414#define	IN6P_ANONPORT		INP_ANONPORT
415#define	IN6P_RECVIF		INP_RECVIF
416#define	IN6P_MTUDISC		INP_MTUDISC
417#define	IN6P_FAITH		INP_FAITH
418#define	IN6P_CONTROLOPTS INP_CONTROLOPTS
419	/*
420	 * socket AF version is {newer than,or include}
421	 * actual datagram AF version
422	 */
423
424#define	INPLOOKUP_WILDCARD	1
425#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
426#define	sotoin6pcb(so)	sotoinpcb(so) /* for KAME src sync over BSD*'s */
427
428#define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
429
430#define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
431
432#ifdef _KERNEL
433extern int	ipport_reservedhigh;
434extern int	ipport_reservedlow;
435extern int	ipport_lowfirstauto;
436extern int	ipport_lowlastauto;
437extern int	ipport_firstauto;
438extern int	ipport_lastauto;
439extern int	ipport_hifirstauto;
440extern int	ipport_hilastauto;
441extern struct callout ipport_tick_callout;
442
443void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
444int	in_pcballoc(struct socket *, struct inpcbinfo *);
445int	in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
446int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
447	    u_short *, struct ucred *);
448int	in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
449int	in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
450	    u_short *, in_addr_t *, u_short *, struct inpcb **,
451	    struct ucred *);
452void	in_pcbdetach(struct inpcb *);
453void	in_pcbdisconnect(struct inpcb *);
454void	in_pcbdrop(struct inpcb *);
455void	in_pcbfree(struct inpcb *);
456int	in_pcbinshash(struct inpcb *);
457struct inpcb *
458	in_pcblookup_local(struct inpcbinfo *,
459	    struct in_addr, u_short, int, struct ucred *);
460struct inpcb *
461	in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
462	    struct in_addr, u_int, int, struct ifnet *);
463void	in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
464	    int, struct inpcb *(*)(struct inpcb *, int));
465void	in_pcbrehash(struct inpcb *);
466void	in_pcbsetsolabel(struct socket *so);
467int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
468int	in_getsockaddr(struct socket *so, struct sockaddr **nam);
469struct sockaddr *
470	in_sockaddr(in_port_t port, struct in_addr *addr);
471void	in_pcbsosetlabel(struct socket *so);
472void	in_pcbremlists(struct inpcb *inp);
473void	ipport_tick(void *xtp);
474
475/*
476 * Debugging routines compiled in when DDB is present.
477 */
478void	db_print_inpcb(struct inpcb *inp, const char *name, int indent);
479
480#endif /* _KERNEL */
481
482#endif /* !_NETINET_IN_PCB_H_ */
483