1/*	$OpenBSD: in_pcb.h,v 1.157 2024/04/19 10:13:58 bluhm Exp $	*/
2/*	$NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
62 */
63
64#ifndef _NETINET_IN_PCB_H_
65#define _NETINET_IN_PCB_H_
66
67#include <sys/queue.h>
68#include <sys/mutex.h>
69#include <sys/rwlock.h>
70#include <sys/refcnt.h>
71#include <netinet/ip6.h>
72#include <netinet/icmp6.h>
73#include <netinet/ip_ipsp.h>
74
75#include <crypto/siphash.h>
76
77/*
78 * Locks used to protect struct members in this file:
79 *	I	immutable after creation
80 *	N	net lock
81 *	t	inpt_mtx		pcb table mutex
82 *	y	inpt_notify		pcb table rwlock for notify
83 *	p	inpcb_mtx		pcb mutex
84 *	L	pf_inp_mtx		link pf to inp mutex
85 *	s	so_lock			socket rwlock
86 */
87
88/*
89 * The pcb table mutex guarantees that all inpcb are consistent and
90 * that bind(2) and connect(2) create unique combinations of
91 * laddr/faddr/lport/fport/rtalbleid.  This mutex is used to protect
92 * both address consistency and inpcb lookup during protocol input.
93 * All writes to inp_[lf]addr take table mutex.  A per socket lock is
94 * needed, so that socket layer input have a consistent view at these
95 * values.
96 *
97 * In soconnect() and sosend() pcb mutex cannot be used.  They eventually
98 * can call IP output which takes pf lock which is a sleeping lock.
99 * Also connect(2) does a route lookup for source selection.  There
100 * route resolve happens, which creates a route, which sends a route
101 * message, which needs route lock, which is a rw-lock.
102 *
103 * On the other hand a mutex should be used in protocol input.  It
104 * does not make sense to do a process switch per packet.  Better spin
105 * until the packet can be processed.
106 *
107 * So there are three locks.  Table mutex is for writing inp_[lf]addr/port
108 * and lookup, socket rw-lock to separate sockets in system calls, and
109 * pcb mutex to protect socket receive buffer.  Changing inp_[lf]addr/port
110 * takes both per socket rw-lock and global table mutex.  Protocol
111 * input only reads inp_[lf]addr/port during lookup and is safe.  System
112 * call only reads when holding socket rw-lock and is safe.  The socket
113 * layer needs pcb mutex only in soreceive().
114 *
115 * Function pru_lock() grabs the pcb mutex and its existence indicates
116 * that a protocol is MP safe.  Otherwise the exclusive net lock is
117 * used.
118 */
119
120struct pf_state_key;
121
122union inpaddru {
123	struct in_addr iau_addr;
124	struct in6_addr iau_addr6;
125};
126
127/*
128 * Common structure pcb for internet protocol implementation.
129 * Here are stored pointers to local and foreign host table
130 * entries, local and foreign socket numbers, and pointers
131 * up (to a socket structure) and down (to a protocol-specific)
132 * control block.
133 */
134struct inpcb {
135	LIST_ENTRY(inpcb) inp_hash;		/* [t] local and foreign hash */
136	LIST_ENTRY(inpcb) inp_lhash;		/* [t] local port hash */
137	TAILQ_ENTRY(inpcb) inp_queue;		/* [t] inet PCB queue */
138	SIMPLEQ_ENTRY(inpcb) inp_notify;	/* [y] notify or udp append */
139	struct	  inpcbtable *inp_table;	/* [I] inet queue/hash table */
140	union	  inpaddru inp_faddru;		/* [t] Foreign address. */
141	union	  inpaddru inp_laddru;		/* [t] Local address. */
142#define	inp_faddr	inp_faddru.iau_addr
143#define	inp_faddr6	inp_faddru.iau_addr6
144#define	inp_laddr	inp_laddru.iau_addr
145#define	inp_laddr6	inp_laddru.iau_addr6
146	u_int16_t inp_fport;		/* [t] foreign port */
147	u_int16_t inp_lport;		/* [t] local port */
148	struct	  socket *inp_socket;	/* [I] back pointer to socket */
149	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
150	struct    route inp_route;	/* cached route */
151	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
152	struct	  mutex inp_mtx;	/* protect PCB and socket members */
153	int	  inp_flags;		/* generic IP/datagram flags */
154	union {				/* Header prototype. */
155		struct ip hu_ip;
156		struct ip6_hdr hu_ipv6;
157	} inp_hu;
158#define	inp_ip		inp_hu.hu_ip
159#define	inp_ipv6	inp_hu.hu_ipv6
160	union {
161		struct	mbuf *inp_options;		/* IPv4 options */
162		struct	ip6_pktopts *inp_outputopts6;	/* IPv6 options */
163	};
164	int inp_hops;
165	union {
166		struct ip_moptions *mou_mo;
167		struct ip6_moptions *mou_mo6;
168	} inp_mou;
169#define inp_moptions inp_mou.mou_mo	/* [N] IPv4 multicast options */
170#define inp_moptions6 inp_mou.mou_mo6	/* [N] IPv6 multicast options */
171	struct	ipsec_level   inp_seclevel;	/* [N] IPsec level of socket */
172	u_char	inp_ip_minttl;		/* minimum TTL or drop */
173#define inp_ip6_minhlim inp_ip_minttl	/* minimum Hop Limit or drop */
174#define	inp_flowinfo	inp_hu.hu_ipv6.ip6_flow
175
176	int	inp_cksum6;
177	struct	icmp6_filter *inp_icmp6filt;
178	struct	pf_state_key *inp_pf_sk; /* [L] */
179	struct	mbuf *(*inp_upcall)(void *, struct mbuf *,
180		    struct ip *, struct ip6_hdr *, void *, int);
181	void	*inp_upcall_arg;
182	u_int	inp_rtableid;		/* [t] */
183	int	inp_pipex;		/* pipex indication */
184	uint16_t inp_flowid;		/* [s] */
185};
186
187LIST_HEAD(inpcbhead, inpcb);
188
189struct inpcbtable {
190	struct mutex inpt_mtx;			/* protect queue and hash */
191	struct rwlock inpt_notify;		/* protect inp_notify list */
192	TAILQ_HEAD(inpthead, inpcb) inpt_queue;	/* [t] inet PCB queue */
193	struct	inpcbhead *inpt_hashtbl;	/* [t] local and foreign hash */
194	struct	inpcbhead *inpt_lhashtbl;	/* [t] local port hash */
195	SIPHASH_KEY inpt_key, inpt_lkey;	/* [I] secrets for hashes */
196	u_long	inpt_mask, inpt_lmask;		/* [t] hash masks */
197	int	inpt_count, inpt_size;		/* [t] queue count, hash size */
198};
199
200/* flags in inp_flags: */
201#define	INP_RECVOPTS	0x001	/* receive incoming IP options */
202#define	INP_RECVRETOPTS	0x002	/* receive IP options for reply */
203#define	INP_RECVDSTADDR	0x004	/* receive IP dst address */
204
205#define	INP_RXDSTOPTS	INP_RECVOPTS
206#define	INP_RXHOPOPTS	INP_RECVRETOPTS
207#define	INP_RXINFO	INP_RECVDSTADDR
208#define	INP_RXSRCRT	0x010
209#define	INP_HOPLIMIT	0x020
210
211#define	INP_HDRINCL	0x008	/* user supplies entire IP header */
212#define	INP_HIGHPORT	0x010	/* user wants "high" port binding */
213#define	INP_LOWPORT	0x020	/* user wants "low" port binding */
214#define	INP_RECVIF	0x080	/* receive incoming interface */
215#define	INP_RECVTTL	0x040	/* receive incoming IP TTL */
216#define	INP_RECVDSTPORT	0x200	/* receive IP dst addr before rdr */
217#define	INP_RECVRTABLE	0x400	/* receive routing table */
218#define	INP_IPSECFLOWINFO 0x800	/* receive IPsec flow info */
219
220#define	INP_CONTROLOPTS	(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR| \
221	    INP_RXSRCRT|INP_HOPLIMIT|INP_RECVIF|INP_RECVTTL|INP_RECVDSTPORT| \
222	    INP_RECVRTABLE)
223
224/*
225 * These flags' values should be determined by either the transport
226 * protocol at PRU_BIND, PRU_LISTEN, PRU_CONNECT, etc, or by in_pcb*().
227 */
228#define INP_IPV6	0x100	/* socket, proto, domain, family is PF_INET6 */
229
230/*
231 * Flags in inp_flags for IPV6
232 */
233#define IN6P_HIGHPORT		INP_HIGHPORT	/* user wants "high" port */
234#define IN6P_LOWPORT		INP_LOWPORT	/* user wants "low" port */
235#define IN6P_RECVDSTPORT	INP_RECVDSTPORT	/* receive IP dst addr before rdr */
236#define IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
237#define IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
238#define IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
239#define IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
240#define IN6P_RTHDR		0x100000 /* receive routing header */
241#define IN6P_TCLASS		0x400000 /* receive traffic class value */
242#define IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
243
244#define IN6P_ANONPORT		0x4000000 /* port chosen for user */
245#define IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
246#define IN6P_MTU		0x80000000 /* receive path MTU */
247
248#define IN6P_MINMTU		0x20000000 /* use minimum MTU */
249
250#define IN6P_CONTROLOPTS	(IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
251				 IN6P_DSTOPTS|IN6P_RTHDR|\
252				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
253				 IN6P_MTU|IN6P_RECVDSTPORT)
254
255#define	INPLOOKUP_WILDCARD	1
256#define	INPLOOKUP_SETLOCAL	2
257#define	INPLOOKUP_IPV6		4
258
259#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
260
261/* macros for handling bitmap of ports not to allocate dynamically */
262#define	DP_MAPBITS	(sizeof(u_int32_t) * NBBY)
263#define	DP_MAPSIZE	(howmany(65536, DP_MAPBITS))
264#define	DP_SET(m, p)	((m)[(p) / DP_MAPBITS] |= (1U << ((p) % DP_MAPBITS)))
265#define	DP_CLR(m, p)	((m)[(p) / DP_MAPBITS] &= ~(1U << ((p) % DP_MAPBITS)))
266#define	DP_ISSET(m, p)	((m)[(p) / DP_MAPBITS] & (1U << ((p) % DP_MAPBITS)))
267
268/* default values for baddynamicports [see ip_init()] */
269#define	DEFBADDYNAMICPORTS_TCP	{ \
270	587, 749, 750, 751, 853, 871, 2049, \
271	6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, \
272	0 }
273#define	DEFBADDYNAMICPORTS_UDP	{ 623, 664, 749, 750, 751, 2049, \
274	3784, 3785, 7784, /* BFD/S-BFD ports */ \
275	 0 }
276
277#define DEFROOTONLYPORTS_TCP { \
278	2049, \
279	0 }
280#define DEFROOTONLYPORTS_UDP { \
281	2049, \
282	0 }
283
284struct baddynamicports {
285	u_int32_t tcp[DP_MAPSIZE];
286	u_int32_t udp[DP_MAPSIZE];
287};
288
289#ifdef _KERNEL
290
291#define IN_PCBLOCK_HOLD	1
292#define IN_PCBLOCK_GRAB	2
293
294extern struct inpcbtable rawcbtable, rawin6pcbtable;
295extern struct baddynamicports baddynamicports;
296extern struct baddynamicports rootonlyports;
297extern int in_pcbnotifymiss;
298
299void	 in_init(void);
300void	 in_losing(struct inpcb *);
301int	 in_pcballoc(struct socket *, struct inpcbtable *, int);
302int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *,
303	    struct proc *);
304int	 in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
305int	 in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int,
306	    struct proc *);
307int	 in_pcbconnect(struct inpcb *, struct mbuf *);
308void	 in_pcbdetach(struct inpcb *);
309struct inpcb *
310	 in_pcbref(struct inpcb *);
311void	 in_pcbunref(struct inpcb *);
312void	 in_pcbdisconnect(struct inpcb *);
313struct inpcb *
314	 in_pcblookup(struct inpcbtable *, struct in_addr,
315			       u_int, struct in_addr, u_int, u_int);
316struct inpcb *
317	 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
318	    struct mbuf *, u_int);
319#ifdef INET6
320uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *,
321	    u_short, const struct in6_addr *, u_short);
322struct inpcb *
323	 in6_pcblookup(struct inpcbtable *, const struct in6_addr *,
324	    u_int, const struct in6_addr *, u_int, u_int);
325struct inpcb *
326	 in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int,
327	    struct mbuf *, u_int);
328int	 in6_pcbaddrisavail_lock(const struct inpcb *, struct sockaddr_in6 *,
329	    int, struct proc *, int);
330int	 in6_pcbaddrisavail(const struct inpcb *, struct sockaddr_in6 *, int,
331	    struct proc *);
332int	 in6_pcbconnect(struct inpcb *, struct mbuf *);
333void	 in6_setsockaddr(struct inpcb *, struct mbuf *);
334void	 in6_setpeeraddr(struct inpcb *, struct mbuf *);
335int	 in6_sockaddr(struct socket *, struct mbuf *);
336int	 in6_peeraddr(struct socket *, struct mbuf *);
337#endif /* INET6 */
338void	 in_pcbinit(struct inpcbtable *, int);
339struct inpcb *
340	 in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int,
341	    u_int, int);
342void	 in_pcbnotifyall(struct inpcbtable *, const struct sockaddr_in *,
343	    u_int, int, void (*)(struct inpcb *, int));
344void	 in_pcbrehash(struct inpcb *);
345void	 in_rtchange(struct inpcb *, int);
346void	 in_setpeeraddr(struct inpcb *, struct mbuf *);
347void	 in_setsockaddr(struct inpcb *, struct mbuf *);
348int	 in_sockaddr(struct socket *, struct mbuf *);
349int	 in_peeraddr(struct socket *, struct mbuf *);
350int	 in_baddynamic(u_int16_t, u_int16_t);
351int	 in_rootonly(u_int16_t, u_int16_t);
352int	 in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *);
353struct rtentry *
354	in_pcbrtentry(struct inpcb *);
355
356/* INET6 stuff */
357struct rtentry *
358	in6_pcbrtentry(struct inpcb *);
359void	in6_pcbnotify(struct inpcbtable *, const struct sockaddr_in6 *,
360	u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *,
361	void (*)(struct inpcb *, int));
362int	in6_selecthlim(const struct inpcb *);
363int	in_pcbset_rtableid(struct inpcb *, u_int);
364void	in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
365void	in_pcbunset_faddr(struct inpcb *);
366void	in_pcbunset_laddr(struct inpcb *);
367
368#endif /* _KERNEL */
369#endif /* _NETINET_IN_PCB_H_ */
370