Deleted Added
full compact
udp_usrreq.c (265909) udp_usrreq.c (269699)
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2008 Robert N. M. Watson
5 * Copyright (c) 2010-2011 Juniper Networks, Inc.
6 * Copyright (c) 2014 Kevin Lo
7 * All rights reserved.
8 *
9 * Portions of this software were developed by Robert N. M. Watson under
10 * contract to Juniper Networks, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
37 */
38
39#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2008 Robert N. M. Watson
5 * Copyright (c) 2010-2011 Juniper Networks, Inc.
6 * Copyright (c) 2014 Kevin Lo
7 * All rights reserved.
8 *
9 * Portions of this software were developed by Robert N. M. Watson under
10 * contract to Juniper Networks, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: head/sys/netinet/udp_usrreq.c 265909 2014-05-12 09:46:48Z tuexen $");
40__FBSDID("$FreeBSD: head/sys/netinet/udp_usrreq.c 269699 2014-08-08 01:57:15Z kevlo $");
41
42#include "opt_ipfw.h"
43#include "opt_inet.h"
44#include "opt_inet6.h"
45#include "opt_ipsec.h"
46
47#include <sys/param.h>
48#include <sys/domain.h>
49#include <sys/eventhandler.h>
50#include <sys/jail.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/protosw.h>
58#include <sys/sdt.h>
59#include <sys/signalvar.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/sx.h>
63#include <sys/sysctl.h>
64#include <sys/syslog.h>
65#include <sys/systm.h>
66
67#include <vm/uma.h>
68
69#include <net/if.h>
70#include <net/if_var.h>
71#include <net/route.h>
72
73#include <netinet/in.h>
74#include <netinet/in_kdtrace.h>
75#include <netinet/in_pcb.h>
76#include <netinet/in_systm.h>
77#include <netinet/in_var.h>
78#include <netinet/ip.h>
79#ifdef INET6
80#include <netinet/ip6.h>
81#endif
82#include <netinet/ip_icmp.h>
83#include <netinet/icmp_var.h>
84#include <netinet/ip_var.h>
85#include <netinet/ip_options.h>
86#ifdef INET6
87#include <netinet6/ip6_var.h>
88#endif
89#include <netinet/udp.h>
90#include <netinet/udp_var.h>
91#include <netinet/udplite.h>
92
93#ifdef IPSEC
94#include <netipsec/ipsec.h>
95#include <netipsec/esp.h>
96#endif
97
98#include <machine/in_cksum.h>
99
100#include <security/mac/mac_framework.h>
101
102/*
103 * UDP and UDP-Lite protocols implementation.
104 * Per RFC 768, August, 1980.
105 * Per RFC 3828, July, 2004.
106 */
107
108/*
109 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums
110 * removes the only data integrity mechanism for packets and malformed
111 * packets that would otherwise be discarded due to bad checksums, and may
112 * cause problems (especially for NFS data blocks).
113 */
114VNET_DEFINE(int, udp_cksum) = 1;
115SYSCTL_VNET_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
116 &VNET_NAME(udp_cksum), 0, "compute udp checksum");
117
118int udp_log_in_vain = 0;
119SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
120 &udp_log_in_vain, 0, "Log all incoming UDP packets");
121
122VNET_DEFINE(int, udp_blackhole) = 0;
123SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
124 &VNET_NAME(udp_blackhole), 0,
125 "Do not send port unreachables for refused connects");
126
127u_long udp_sendspace = 9216; /* really max datagram size */
128 /* 40 1K datagrams */
129SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
130 &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
131
132u_long udp_recvspace = 40 * (1024 +
133#ifdef INET6
134 sizeof(struct sockaddr_in6)
135#else
136 sizeof(struct sockaddr_in)
137#endif
138 );
139
140SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
141 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
142
143VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
144VNET_DEFINE(struct inpcbinfo, udbinfo);
145VNET_DEFINE(struct inpcbhead, ulitecb);
146VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
147static VNET_DEFINE(uma_zone_t, udpcb_zone);
148#define V_udpcb_zone VNET(udpcb_zone)
149
150#ifndef UDBHASHSIZE
151#define UDBHASHSIZE 128
152#endif
153
154VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
155VNET_PCPUSTAT_SYSINIT(udpstat);
156SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
157 udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
158
159#ifdef VIMAGE
160VNET_PCPUSTAT_SYSUNINIT(udpstat);
161#endif /* VIMAGE */
162#ifdef INET
163static void udp_detach(struct socket *so);
164static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
165 struct mbuf *, struct thread *);
166#endif
167
168#ifdef IPSEC
169#ifdef IPSEC_NAT_T
170#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
171#ifdef INET
172static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
173#endif
174#endif /* IPSEC_NAT_T */
175#endif /* IPSEC */
176
177static void
178udp_zone_change(void *tag)
179{
180
181 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
182 uma_zone_set_max(V_udpcb_zone, maxsockets);
183}
184
185static int
186udp_inpcb_init(void *mem, int size, int flags)
187{
188 struct inpcb *inp;
189
190 inp = mem;
191 INP_LOCK_INIT(inp, "inp", "udpinp");
192 return (0);
193}
194
195static int
196udplite_inpcb_init(void *mem, int size, int flags)
197{
198 struct inpcb *inp;
199
200 inp = mem;
201 INP_LOCK_INIT(inp, "inp", "udpliteinp");
202 return (0);
203}
204
205void
206udp_init(void)
207{
208
209 in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
210 "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
211 IPI_HASHFIELDS_2TUPLE);
212 V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
213 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
214 uma_zone_set_max(V_udpcb_zone, maxsockets);
215 uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
216 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
217 EVENTHANDLER_PRI_ANY);
218}
219
220void
221udplite_init(void)
222{
223
224 in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
225 UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
226 UMA_ZONE_NOFREE, IPI_HASHFIELDS_2TUPLE);
227}
228
229/*
230 * Kernel module interface for updating udpstat. The argument is an index
231 * into udpstat treated as an array of u_long. While this encodes the
232 * general layout of udpstat into the caller, it doesn't encode its location,
233 * so that future changes to add, for example, per-CPU stats support won't
234 * cause binary compatibility problems for kernel modules.
235 */
236void
237kmod_udpstat_inc(int statnum)
238{
239
240 counter_u64_add(VNET(udpstat)[statnum], 1);
241}
242
243int
244udp_newudpcb(struct inpcb *inp)
245{
246 struct udpcb *up;
247
248 up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
249 if (up == NULL)
250 return (ENOBUFS);
251 inp->inp_ppcb = up;
252 return (0);
253}
254
255void
256udp_discardcb(struct udpcb *up)
257{
258
259 uma_zfree(V_udpcb_zone, up);
260}
261
262#ifdef VIMAGE
263void
264udp_destroy(void)
265{
266
267 in_pcbinfo_destroy(&V_udbinfo);
268 uma_zdestroy(V_udpcb_zone);
269}
270
271void
272udplite_destroy(void)
273{
274
275 in_pcbinfo_destroy(&V_ulitecbinfo);
276}
277#endif
278
279#ifdef INET
280/*
281 * Subroutine of udp_input(), which appends the provided mbuf chain to the
282 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
283 * contains the source address. If the socket ends up being an IPv6 socket,
284 * udp_append() will convert to a sockaddr_in6 before passing the address
285 * into the socket code.
286 */
287static void
288udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
289 struct sockaddr_in *udp_in)
290{
291 struct sockaddr *append_sa;
292 struct socket *so;
293 struct mbuf *opts = 0;
294#ifdef INET6
295 struct sockaddr_in6 udp_in6;
296#endif
297 struct udpcb *up;
298
299 INP_LOCK_ASSERT(inp);
300
301 /*
302 * Engage the tunneling protocol.
303 */
304 up = intoudpcb(inp);
305 if (up->u_tun_func != NULL) {
306 (*up->u_tun_func)(n, off, inp);
307 return;
308 }
309
310 if (n == NULL)
311 return;
312
313 off += sizeof(struct udphdr);
314
315#ifdef IPSEC
316 /* Check AH/ESP integrity. */
317 if (ipsec4_in_reject(n, inp)) {
318 m_freem(n);
319 IPSECSTAT_INC(ips_in_polvio);
320 return;
321 }
322#ifdef IPSEC_NAT_T
323 up = intoudpcb(inp);
324 KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
325 if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */
326 n = udp4_espdecap(inp, n, off);
327 if (n == NULL) /* Consumed. */
328 return;
329 }
330#endif /* IPSEC_NAT_T */
331#endif /* IPSEC */
332#ifdef MAC
333 if (mac_inpcb_check_deliver(inp, n) != 0) {
334 m_freem(n);
335 return;
336 }
337#endif /* MAC */
338 if (inp->inp_flags & INP_CONTROLOPTS ||
339 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
340#ifdef INET6
341 if (inp->inp_vflag & INP_IPV6)
342 (void)ip6_savecontrol_v4(inp, n, &opts, NULL);
343 else
344#endif /* INET6 */
345 ip_savecontrol(inp, &opts, ip, n);
346 }
347#ifdef INET6
348 if (inp->inp_vflag & INP_IPV6) {
349 bzero(&udp_in6, sizeof(udp_in6));
350 udp_in6.sin6_len = sizeof(udp_in6);
351 udp_in6.sin6_family = AF_INET6;
352 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
353 append_sa = (struct sockaddr *)&udp_in6;
354 } else
355#endif /* INET6 */
356 append_sa = (struct sockaddr *)udp_in;
357 m_adj(n, off);
358
359 so = inp->inp_socket;
360 SOCKBUF_LOCK(&so->so_rcv);
361 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
362 SOCKBUF_UNLOCK(&so->so_rcv);
363 m_freem(n);
364 if (opts)
365 m_freem(opts);
366 UDPSTAT_INC(udps_fullsock);
367 } else
368 sorwakeup_locked(so);
369}
370
41
42#include "opt_ipfw.h"
43#include "opt_inet.h"
44#include "opt_inet6.h"
45#include "opt_ipsec.h"
46
47#include <sys/param.h>
48#include <sys/domain.h>
49#include <sys/eventhandler.h>
50#include <sys/jail.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/protosw.h>
58#include <sys/sdt.h>
59#include <sys/signalvar.h>
60#include <sys/socket.h>
61#include <sys/socketvar.h>
62#include <sys/sx.h>
63#include <sys/sysctl.h>
64#include <sys/syslog.h>
65#include <sys/systm.h>
66
67#include <vm/uma.h>
68
69#include <net/if.h>
70#include <net/if_var.h>
71#include <net/route.h>
72
73#include <netinet/in.h>
74#include <netinet/in_kdtrace.h>
75#include <netinet/in_pcb.h>
76#include <netinet/in_systm.h>
77#include <netinet/in_var.h>
78#include <netinet/ip.h>
79#ifdef INET6
80#include <netinet/ip6.h>
81#endif
82#include <netinet/ip_icmp.h>
83#include <netinet/icmp_var.h>
84#include <netinet/ip_var.h>
85#include <netinet/ip_options.h>
86#ifdef INET6
87#include <netinet6/ip6_var.h>
88#endif
89#include <netinet/udp.h>
90#include <netinet/udp_var.h>
91#include <netinet/udplite.h>
92
93#ifdef IPSEC
94#include <netipsec/ipsec.h>
95#include <netipsec/esp.h>
96#endif
97
98#include <machine/in_cksum.h>
99
100#include <security/mac/mac_framework.h>
101
102/*
103 * UDP and UDP-Lite protocols implementation.
104 * Per RFC 768, August, 1980.
105 * Per RFC 3828, July, 2004.
106 */
107
108/*
109 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums
110 * removes the only data integrity mechanism for packets and malformed
111 * packets that would otherwise be discarded due to bad checksums, and may
112 * cause problems (especially for NFS data blocks).
113 */
114VNET_DEFINE(int, udp_cksum) = 1;
115SYSCTL_VNET_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
116 &VNET_NAME(udp_cksum), 0, "compute udp checksum");
117
118int udp_log_in_vain = 0;
119SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
120 &udp_log_in_vain, 0, "Log all incoming UDP packets");
121
122VNET_DEFINE(int, udp_blackhole) = 0;
123SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
124 &VNET_NAME(udp_blackhole), 0,
125 "Do not send port unreachables for refused connects");
126
127u_long udp_sendspace = 9216; /* really max datagram size */
128 /* 40 1K datagrams */
129SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
130 &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
131
132u_long udp_recvspace = 40 * (1024 +
133#ifdef INET6
134 sizeof(struct sockaddr_in6)
135#else
136 sizeof(struct sockaddr_in)
137#endif
138 );
139
140SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
141 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
142
143VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
144VNET_DEFINE(struct inpcbinfo, udbinfo);
145VNET_DEFINE(struct inpcbhead, ulitecb);
146VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
147static VNET_DEFINE(uma_zone_t, udpcb_zone);
148#define V_udpcb_zone VNET(udpcb_zone)
149
150#ifndef UDBHASHSIZE
151#define UDBHASHSIZE 128
152#endif
153
154VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
155VNET_PCPUSTAT_SYSINIT(udpstat);
156SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
157 udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
158
159#ifdef VIMAGE
160VNET_PCPUSTAT_SYSUNINIT(udpstat);
161#endif /* VIMAGE */
162#ifdef INET
163static void udp_detach(struct socket *so);
164static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
165 struct mbuf *, struct thread *);
166#endif
167
168#ifdef IPSEC
169#ifdef IPSEC_NAT_T
170#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
171#ifdef INET
172static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
173#endif
174#endif /* IPSEC_NAT_T */
175#endif /* IPSEC */
176
177static void
178udp_zone_change(void *tag)
179{
180
181 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
182 uma_zone_set_max(V_udpcb_zone, maxsockets);
183}
184
185static int
186udp_inpcb_init(void *mem, int size, int flags)
187{
188 struct inpcb *inp;
189
190 inp = mem;
191 INP_LOCK_INIT(inp, "inp", "udpinp");
192 return (0);
193}
194
195static int
196udplite_inpcb_init(void *mem, int size, int flags)
197{
198 struct inpcb *inp;
199
200 inp = mem;
201 INP_LOCK_INIT(inp, "inp", "udpliteinp");
202 return (0);
203}
204
205void
206udp_init(void)
207{
208
209 in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
210 "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
211 IPI_HASHFIELDS_2TUPLE);
212 V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
213 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
214 uma_zone_set_max(V_udpcb_zone, maxsockets);
215 uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
216 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
217 EVENTHANDLER_PRI_ANY);
218}
219
220void
221udplite_init(void)
222{
223
224 in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
225 UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
226 UMA_ZONE_NOFREE, IPI_HASHFIELDS_2TUPLE);
227}
228
229/*
230 * Kernel module interface for updating udpstat. The argument is an index
231 * into udpstat treated as an array of u_long. While this encodes the
232 * general layout of udpstat into the caller, it doesn't encode its location,
233 * so that future changes to add, for example, per-CPU stats support won't
234 * cause binary compatibility problems for kernel modules.
235 */
236void
237kmod_udpstat_inc(int statnum)
238{
239
240 counter_u64_add(VNET(udpstat)[statnum], 1);
241}
242
243int
244udp_newudpcb(struct inpcb *inp)
245{
246 struct udpcb *up;
247
248 up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
249 if (up == NULL)
250 return (ENOBUFS);
251 inp->inp_ppcb = up;
252 return (0);
253}
254
255void
256udp_discardcb(struct udpcb *up)
257{
258
259 uma_zfree(V_udpcb_zone, up);
260}
261
262#ifdef VIMAGE
263void
264udp_destroy(void)
265{
266
267 in_pcbinfo_destroy(&V_udbinfo);
268 uma_zdestroy(V_udpcb_zone);
269}
270
271void
272udplite_destroy(void)
273{
274
275 in_pcbinfo_destroy(&V_ulitecbinfo);
276}
277#endif
278
279#ifdef INET
280/*
281 * Subroutine of udp_input(), which appends the provided mbuf chain to the
282 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
283 * contains the source address. If the socket ends up being an IPv6 socket,
284 * udp_append() will convert to a sockaddr_in6 before passing the address
285 * into the socket code.
286 */
287static void
288udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
289 struct sockaddr_in *udp_in)
290{
291 struct sockaddr *append_sa;
292 struct socket *so;
293 struct mbuf *opts = 0;
294#ifdef INET6
295 struct sockaddr_in6 udp_in6;
296#endif
297 struct udpcb *up;
298
299 INP_LOCK_ASSERT(inp);
300
301 /*
302 * Engage the tunneling protocol.
303 */
304 up = intoudpcb(inp);
305 if (up->u_tun_func != NULL) {
306 (*up->u_tun_func)(n, off, inp);
307 return;
308 }
309
310 if (n == NULL)
311 return;
312
313 off += sizeof(struct udphdr);
314
315#ifdef IPSEC
316 /* Check AH/ESP integrity. */
317 if (ipsec4_in_reject(n, inp)) {
318 m_freem(n);
319 IPSECSTAT_INC(ips_in_polvio);
320 return;
321 }
322#ifdef IPSEC_NAT_T
323 up = intoudpcb(inp);
324 KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
325 if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */
326 n = udp4_espdecap(inp, n, off);
327 if (n == NULL) /* Consumed. */
328 return;
329 }
330#endif /* IPSEC_NAT_T */
331#endif /* IPSEC */
332#ifdef MAC
333 if (mac_inpcb_check_deliver(inp, n) != 0) {
334 m_freem(n);
335 return;
336 }
337#endif /* MAC */
338 if (inp->inp_flags & INP_CONTROLOPTS ||
339 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
340#ifdef INET6
341 if (inp->inp_vflag & INP_IPV6)
342 (void)ip6_savecontrol_v4(inp, n, &opts, NULL);
343 else
344#endif /* INET6 */
345 ip_savecontrol(inp, &opts, ip, n);
346 }
347#ifdef INET6
348 if (inp->inp_vflag & INP_IPV6) {
349 bzero(&udp_in6, sizeof(udp_in6));
350 udp_in6.sin6_len = sizeof(udp_in6);
351 udp_in6.sin6_family = AF_INET6;
352 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
353 append_sa = (struct sockaddr *)&udp_in6;
354 } else
355#endif /* INET6 */
356 append_sa = (struct sockaddr *)udp_in;
357 m_adj(n, off);
358
359 so = inp->inp_socket;
360 SOCKBUF_LOCK(&so->so_rcv);
361 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
362 SOCKBUF_UNLOCK(&so->so_rcv);
363 m_freem(n);
364 if (opts)
365 m_freem(opts);
366 UDPSTAT_INC(udps_fullsock);
367 } else
368 sorwakeup_locked(so);
369}
370
371void
372udp_input(struct mbuf *m, int off)
371int
372udp_input(struct mbuf **mp, int *offp, int proto)
373{
373{
374 int iphlen = off;
375 struct ip *ip;
376 struct udphdr *uh;
377 struct ifnet *ifp;
378 struct inpcb *inp;
379 uint16_t len, ip_len;
380 struct inpcbinfo *pcbinfo;
381 struct ip save_ip;
382 struct sockaddr_in udp_in;
374 struct ip *ip;
375 struct udphdr *uh;
376 struct ifnet *ifp;
377 struct inpcb *inp;
378 uint16_t len, ip_len;
379 struct inpcbinfo *pcbinfo;
380 struct ip save_ip;
381 struct sockaddr_in udp_in;
382 struct mbuf *m;
383 struct m_tag *fwd_tag;
383 struct m_tag *fwd_tag;
384 int cscov_partial;
385 uint8_t pr;
384 int cscov_partial, iphlen;
386
385
386 m = *mp;
387 iphlen = *offp;
387 ifp = m->m_pkthdr.rcvif;
388 ifp = m->m_pkthdr.rcvif;
389 *mp = NULL;
388 UDPSTAT_INC(udps_ipackets);
389
390 /*
391 * Strip IP options, if any; should skip this, make available to
392 * user, and use on returned packets, but we don't yet have a way to
393 * check the checksum with options still present.
394 */
395 if (iphlen > sizeof (struct ip)) {
396 ip_stripoptions(m);
397 iphlen = sizeof(struct ip);
398 }
399
400 /*
401 * Get IP and UDP header together in first mbuf.
402 */
403 ip = mtod(m, struct ip *);
404 if (m->m_len < iphlen + sizeof(struct udphdr)) {
405 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
406 UDPSTAT_INC(udps_hdrops);
390 UDPSTAT_INC(udps_ipackets);
391
392 /*
393 * Strip IP options, if any; should skip this, make available to
394 * user, and use on returned packets, but we don't yet have a way to
395 * check the checksum with options still present.
396 */
397 if (iphlen > sizeof (struct ip)) {
398 ip_stripoptions(m);
399 iphlen = sizeof(struct ip);
400 }
401
402 /*
403 * Get IP and UDP header together in first mbuf.
404 */
405 ip = mtod(m, struct ip *);
406 if (m->m_len < iphlen + sizeof(struct udphdr)) {
407 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
408 UDPSTAT_INC(udps_hdrops);
407 return;
409 return (IPPROTO_DONE);
408 }
409 ip = mtod(m, struct ip *);
410 }
411 uh = (struct udphdr *)((caddr_t)ip + iphlen);
410 }
411 ip = mtod(m, struct ip *);
412 }
413 uh = (struct udphdr *)((caddr_t)ip + iphlen);
412 pr = ip->ip_p;
413 cscov_partial = (pr == IPPROTO_UDPLITE) ? 1 : 0;
414 cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
414
415 /*
416 * Destination port of 0 is illegal, based on RFC768.
417 */
418 if (uh->uh_dport == 0)
419 goto badunlocked;
420
421 /*
422 * Construct sockaddr format source address. Stuff source address
423 * and datagram in user buffer.
424 */
425 bzero(&udp_in, sizeof(udp_in));
426 udp_in.sin_len = sizeof(udp_in);
427 udp_in.sin_family = AF_INET;
428 udp_in.sin_port = uh->uh_sport;
429 udp_in.sin_addr = ip->ip_src;
430
431 /*
432 * Make mbuf data length reflect UDP length. If not enough data to
433 * reflect UDP length, drop.
434 */
435 len = ntohs((u_short)uh->uh_ulen);
436 ip_len = ntohs(ip->ip_len) - iphlen;
415
416 /*
417 * Destination port of 0 is illegal, based on RFC768.
418 */
419 if (uh->uh_dport == 0)
420 goto badunlocked;
421
422 /*
423 * Construct sockaddr format source address. Stuff source address
424 * and datagram in user buffer.
425 */
426 bzero(&udp_in, sizeof(udp_in));
427 udp_in.sin_len = sizeof(udp_in);
428 udp_in.sin_family = AF_INET;
429 udp_in.sin_port = uh->uh_sport;
430 udp_in.sin_addr = ip->ip_src;
431
432 /*
433 * Make mbuf data length reflect UDP length. If not enough data to
434 * reflect UDP length, drop.
435 */
436 len = ntohs((u_short)uh->uh_ulen);
437 ip_len = ntohs(ip->ip_len) - iphlen;
437 if (pr == IPPROTO_UDPLITE && len == 0) {
438 if (proto == IPPROTO_UDPLITE && len == 0) {
438 /* Zero means checksum over the complete packet. */
439 len = ip_len;
440 cscov_partial = 0;
441 }
442 if (ip_len != len) {
443 if (len > ip_len || len < sizeof(struct udphdr)) {
444 UDPSTAT_INC(udps_badlen);
445 goto badunlocked;
446 }
439 /* Zero means checksum over the complete packet. */
440 len = ip_len;
441 cscov_partial = 0;
442 }
443 if (ip_len != len) {
444 if (len > ip_len || len < sizeof(struct udphdr)) {
445 UDPSTAT_INC(udps_badlen);
446 goto badunlocked;
447 }
447 if (pr == IPPROTO_UDP)
448 if (proto == IPPROTO_UDP)
448 m_adj(m, len - ip_len);
449 }
450
451 /*
452 * Save a copy of the IP header in case we want restore it for
453 * sending an ICMP error message in response.
454 */
455 if (!V_udp_blackhole)
456 save_ip = *ip;
457 else
458 memset(&save_ip, 0, sizeof(save_ip));
459
460 /*
461 * Checksum extended UDP header and data.
462 */
463 if (uh->uh_sum) {
464 u_short uh_sum;
465
466 if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
467 !cscov_partial) {
468 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
469 uh_sum = m->m_pkthdr.csum_data;
470 else
471 uh_sum = in_pseudo(ip->ip_src.s_addr,
472 ip->ip_dst.s_addr, htonl((u_short)len +
449 m_adj(m, len - ip_len);
450 }
451
452 /*
453 * Save a copy of the IP header in case we want restore it for
454 * sending an ICMP error message in response.
455 */
456 if (!V_udp_blackhole)
457 save_ip = *ip;
458 else
459 memset(&save_ip, 0, sizeof(save_ip));
460
461 /*
462 * Checksum extended UDP header and data.
463 */
464 if (uh->uh_sum) {
465 u_short uh_sum;
466
467 if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
468 !cscov_partial) {
469 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
470 uh_sum = m->m_pkthdr.csum_data;
471 else
472 uh_sum = in_pseudo(ip->ip_src.s_addr,
473 ip->ip_dst.s_addr, htonl((u_short)len +
473 m->m_pkthdr.csum_data + pr));
474 m->m_pkthdr.csum_data + proto));
474 uh_sum ^= 0xffff;
475 } else {
476 char b[9];
477
478 bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
479 bzero(((struct ipovly *)ip)->ih_x1, 9);
475 uh_sum ^= 0xffff;
476 } else {
477 char b[9];
478
479 bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
480 bzero(((struct ipovly *)ip)->ih_x1, 9);
480 ((struct ipovly *)ip)->ih_len = (pr == IPPROTO_UDP) ?
481 ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
481 uh->uh_ulen : htons(ip_len);
482 uh_sum = in_cksum(m, len + sizeof (struct ip));
483 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
484 }
485 if (uh_sum) {
486 UDPSTAT_INC(udps_badsum);
487 m_freem(m);
482 uh->uh_ulen : htons(ip_len);
483 uh_sum = in_cksum(m, len + sizeof (struct ip));
484 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
485 }
486 if (uh_sum) {
487 UDPSTAT_INC(udps_badsum);
488 m_freem(m);
488 return;
489 return (IPPROTO_DONE);
489 }
490 } else
491 UDPSTAT_INC(udps_nosum);
492
490 }
491 } else
492 UDPSTAT_INC(udps_nosum);
493
493 pcbinfo = get_inpcbinfo(pr);
494 pcbinfo = get_inpcbinfo(proto);
494 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
495 in_broadcast(ip->ip_dst, ifp)) {
496 struct inpcb *last;
497 struct inpcbhead *pcblist;
498 struct ip_moptions *imo;
499
500 INP_INFO_RLOCK(pcbinfo);
495 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
496 in_broadcast(ip->ip_dst, ifp)) {
497 struct inpcb *last;
498 struct inpcbhead *pcblist;
499 struct ip_moptions *imo;
500
501 INP_INFO_RLOCK(pcbinfo);
501 pcblist = get_pcblist(pr);
502 pcblist = get_pcblist(proto);
502 last = NULL;
503 LIST_FOREACH(inp, pcblist, inp_list) {
504 if (inp->inp_lport != uh->uh_dport)
505 continue;
506#ifdef INET6
507 if ((inp->inp_vflag & INP_IPV4) == 0)
508 continue;
509#endif
510 if (inp->inp_laddr.s_addr != INADDR_ANY &&
511 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
512 continue;
513 if (inp->inp_faddr.s_addr != INADDR_ANY &&
514 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
515 continue;
516 if (inp->inp_fport != 0 &&
517 inp->inp_fport != uh->uh_sport)
518 continue;
519
520 INP_RLOCK(inp);
521
522 /*
523 * XXXRW: Because we weren't holding either the inpcb
524 * or the hash lock when we checked for a match
525 * before, we should probably recheck now that the
526 * inpcb lock is held.
527 */
528
529 /*
530 * Handle socket delivery policy for any-source
531 * and source-specific multicast. [RFC3678]
532 */
533 imo = inp->inp_moptions;
534 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
535 struct sockaddr_in group;
536 int blocked;
537 if (imo == NULL) {
538 INP_RUNLOCK(inp);
539 continue;
540 }
541 bzero(&group, sizeof(struct sockaddr_in));
542 group.sin_len = sizeof(struct sockaddr_in);
543 group.sin_family = AF_INET;
544 group.sin_addr = ip->ip_dst;
545
546 blocked = imo_multi_filter(imo, ifp,
547 (struct sockaddr *)&group,
548 (struct sockaddr *)&udp_in);
549 if (blocked != MCAST_PASS) {
550 if (blocked == MCAST_NOTGMEMBER)
551 IPSTAT_INC(ips_notmember);
552 if (blocked == MCAST_NOTSMEMBER ||
553 blocked == MCAST_MUTED)
554 UDPSTAT_INC(udps_filtermcast);
555 INP_RUNLOCK(inp);
556 continue;
557 }
558 }
559 if (last != NULL) {
560 struct mbuf *n;
561
562 n = m_copy(m, 0, M_COPYALL);
563 udp_append(last, ip, n, iphlen, &udp_in);
564 INP_RUNLOCK(last);
565 }
566 last = inp;
567 /*
568 * Don't look for additional matches if this one does
569 * not have either the SO_REUSEPORT or SO_REUSEADDR
570 * socket options set. This heuristic avoids
571 * searching through all pcbs in the common case of a
572 * non-shared port. It assumes that an application
573 * will never clear these options after setting them.
574 */
575 if ((last->inp_socket->so_options &
576 (SO_REUSEPORT|SO_REUSEADDR)) == 0)
577 break;
578 }
579
580 if (last == NULL) {
581 /*
582 * No matching pcb found; discard datagram. (No need
583 * to send an ICMP Port Unreachable for a broadcast
584 * or multicast datgram.)
585 */
586 UDPSTAT_INC(udps_noportbcast);
587 if (inp)
588 INP_RUNLOCK(inp);
589 INP_INFO_RUNLOCK(pcbinfo);
590 goto badunlocked;
591 }
592 udp_append(last, ip, m, iphlen, &udp_in);
593 INP_RUNLOCK(last);
594 INP_INFO_RUNLOCK(pcbinfo);
503 last = NULL;
504 LIST_FOREACH(inp, pcblist, inp_list) {
505 if (inp->inp_lport != uh->uh_dport)
506 continue;
507#ifdef INET6
508 if ((inp->inp_vflag & INP_IPV4) == 0)
509 continue;
510#endif
511 if (inp->inp_laddr.s_addr != INADDR_ANY &&
512 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
513 continue;
514 if (inp->inp_faddr.s_addr != INADDR_ANY &&
515 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
516 continue;
517 if (inp->inp_fport != 0 &&
518 inp->inp_fport != uh->uh_sport)
519 continue;
520
521 INP_RLOCK(inp);
522
523 /*
524 * XXXRW: Because we weren't holding either the inpcb
525 * or the hash lock when we checked for a match
526 * before, we should probably recheck now that the
527 * inpcb lock is held.
528 */
529
530 /*
531 * Handle socket delivery policy for any-source
532 * and source-specific multicast. [RFC3678]
533 */
534 imo = inp->inp_moptions;
535 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
536 struct sockaddr_in group;
537 int blocked;
538 if (imo == NULL) {
539 INP_RUNLOCK(inp);
540 continue;
541 }
542 bzero(&group, sizeof(struct sockaddr_in));
543 group.sin_len = sizeof(struct sockaddr_in);
544 group.sin_family = AF_INET;
545 group.sin_addr = ip->ip_dst;
546
547 blocked = imo_multi_filter(imo, ifp,
548 (struct sockaddr *)&group,
549 (struct sockaddr *)&udp_in);
550 if (blocked != MCAST_PASS) {
551 if (blocked == MCAST_NOTGMEMBER)
552 IPSTAT_INC(ips_notmember);
553 if (blocked == MCAST_NOTSMEMBER ||
554 blocked == MCAST_MUTED)
555 UDPSTAT_INC(udps_filtermcast);
556 INP_RUNLOCK(inp);
557 continue;
558 }
559 }
560 if (last != NULL) {
561 struct mbuf *n;
562
563 n = m_copy(m, 0, M_COPYALL);
564 udp_append(last, ip, n, iphlen, &udp_in);
565 INP_RUNLOCK(last);
566 }
567 last = inp;
568 /*
569 * Don't look for additional matches if this one does
570 * not have either the SO_REUSEPORT or SO_REUSEADDR
571 * socket options set. This heuristic avoids
572 * searching through all pcbs in the common case of a
573 * non-shared port. It assumes that an application
574 * will never clear these options after setting them.
575 */
576 if ((last->inp_socket->so_options &
577 (SO_REUSEPORT|SO_REUSEADDR)) == 0)
578 break;
579 }
580
581 if (last == NULL) {
582 /*
583 * No matching pcb found; discard datagram. (No need
584 * to send an ICMP Port Unreachable for a broadcast
585 * or multicast datgram.)
586 */
587 UDPSTAT_INC(udps_noportbcast);
588 if (inp)
589 INP_RUNLOCK(inp);
590 INP_INFO_RUNLOCK(pcbinfo);
591 goto badunlocked;
592 }
593 udp_append(last, ip, m, iphlen, &udp_in);
594 INP_RUNLOCK(last);
595 INP_INFO_RUNLOCK(pcbinfo);
595 return;
596 return (IPPROTO_DONE);
596 }
597
598 /*
599 * Locate pcb for datagram.
600 */
601
602 /*
603 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
604 */
605 if ((m->m_flags & M_IP_NEXTHOP) &&
606 (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
607 struct sockaddr_in *next_hop;
608
609 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
610
611 /*
612 * Transparently forwarded. Pretend to be the destination.
613 * Already got one like this?
614 */
615 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
616 ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
617 if (!inp) {
618 /*
619 * It's new. Try to find the ambushing socket.
620 * Because we've rewritten the destination address,
621 * any hardware-generated hash is ignored.
622 */
623 inp = in_pcblookup(pcbinfo, ip->ip_src,
624 uh->uh_sport, next_hop->sin_addr,
625 next_hop->sin_port ? htons(next_hop->sin_port) :
626 uh->uh_dport, INPLOOKUP_WILDCARD |
627 INPLOOKUP_RLOCKPCB, ifp);
628 }
629 /* Remove the tag from the packet. We don't need it anymore. */
630 m_tag_delete(m, fwd_tag);
631 m->m_flags &= ~M_IP_NEXTHOP;
632 } else
633 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
634 ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
635 INPLOOKUP_RLOCKPCB, ifp, m);
636 if (inp == NULL) {
637 if (udp_log_in_vain) {
638 char buf[4*sizeof "123"];
639
640 strcpy(buf, inet_ntoa(ip->ip_dst));
641 log(LOG_INFO,
642 "Connection attempt to UDP %s:%d from %s:%d\n",
643 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
644 ntohs(uh->uh_sport));
645 }
646 UDPSTAT_INC(udps_noport);
647 if (m->m_flags & (M_BCAST | M_MCAST)) {
648 UDPSTAT_INC(udps_noportbcast);
649 goto badunlocked;
650 }
651 if (V_udp_blackhole)
652 goto badunlocked;
653 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
654 goto badunlocked;
655 *ip = save_ip;
656 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
597 }
598
599 /*
600 * Locate pcb for datagram.
601 */
602
603 /*
604 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
605 */
606 if ((m->m_flags & M_IP_NEXTHOP) &&
607 (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
608 struct sockaddr_in *next_hop;
609
610 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
611
612 /*
613 * Transparently forwarded. Pretend to be the destination.
614 * Already got one like this?
615 */
616 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
617 ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
618 if (!inp) {
619 /*
620 * It's new. Try to find the ambushing socket.
621 * Because we've rewritten the destination address,
622 * any hardware-generated hash is ignored.
623 */
624 inp = in_pcblookup(pcbinfo, ip->ip_src,
625 uh->uh_sport, next_hop->sin_addr,
626 next_hop->sin_port ? htons(next_hop->sin_port) :
627 uh->uh_dport, INPLOOKUP_WILDCARD |
628 INPLOOKUP_RLOCKPCB, ifp);
629 }
630 /* Remove the tag from the packet. We don't need it anymore. */
631 m_tag_delete(m, fwd_tag);
632 m->m_flags &= ~M_IP_NEXTHOP;
633 } else
634 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
635 ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
636 INPLOOKUP_RLOCKPCB, ifp, m);
637 if (inp == NULL) {
638 if (udp_log_in_vain) {
639 char buf[4*sizeof "123"];
640
641 strcpy(buf, inet_ntoa(ip->ip_dst));
642 log(LOG_INFO,
643 "Connection attempt to UDP %s:%d from %s:%d\n",
644 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
645 ntohs(uh->uh_sport));
646 }
647 UDPSTAT_INC(udps_noport);
648 if (m->m_flags & (M_BCAST | M_MCAST)) {
649 UDPSTAT_INC(udps_noportbcast);
650 goto badunlocked;
651 }
652 if (V_udp_blackhole)
653 goto badunlocked;
654 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
655 goto badunlocked;
656 *ip = save_ip;
657 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
657 return;
658 return (IPPROTO_DONE);
658 }
659
660 /*
661 * Check the minimum TTL for socket.
662 */
663 INP_RLOCK_ASSERT(inp);
664 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
665 INP_RUNLOCK(inp);
666 m_freem(m);
659 }
660
661 /*
662 * Check the minimum TTL for socket.
663 */
664 INP_RLOCK_ASSERT(inp);
665 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
666 INP_RUNLOCK(inp);
667 m_freem(m);
667 return;
668 return (IPPROTO_DONE);
668 }
669 if (cscov_partial) {
670 struct udpcb *up;
671
672 up = intoudpcb(inp);
673 if (up->u_rxcslen > len) {
674 INP_RUNLOCK(inp);
675 m_freem(m);
669 }
670 if (cscov_partial) {
671 struct udpcb *up;
672
673 up = intoudpcb(inp);
674 if (up->u_rxcslen > len) {
675 INP_RUNLOCK(inp);
676 m_freem(m);
676 return;
677 return (IPPROTO_DONE);
677 }
678 }
679
680 UDP_PROBE(receive, NULL, inp, ip, inp, uh);
681 udp_append(inp, ip, m, iphlen, &udp_in);
682 INP_RUNLOCK(inp);
678 }
679 }
680
681 UDP_PROBE(receive, NULL, inp, ip, inp, uh);
682 udp_append(inp, ip, m, iphlen, &udp_in);
683 INP_RUNLOCK(inp);
683 return;
684 return (IPPROTO_DONE);
684
685badunlocked:
686 m_freem(m);
685
686badunlocked:
687 m_freem(m);
688 return (IPPROTO_DONE);
687}
688#endif /* INET */
689
690/*
691 * Notify a udp user of an asynchronous error; just wake up so that they can
692 * collect error status.
693 */
694struct inpcb *
695udp_notify(struct inpcb *inp, int errno)
696{
697
698 /*
699 * While udp_ctlinput() always calls udp_notify() with a read lock
700 * when invoking it directly, in_pcbnotifyall() currently uses write
701 * locks due to sharing code with TCP. For now, accept either a read
702 * or a write lock, but a read lock is sufficient.
703 */
704 INP_LOCK_ASSERT(inp);
705
706 inp->inp_socket->so_error = errno;
707 sorwakeup(inp->inp_socket);
708 sowwakeup(inp->inp_socket);
709 return (inp);
710}
711
712#ifdef INET
713static void
714udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
715 struct inpcbinfo *pcbinfo)
716{
717 struct ip *ip = vip;
718 struct udphdr *uh;
719 struct in_addr faddr;
720 struct inpcb *inp;
721
722 faddr = ((struct sockaddr_in *)sa)->sin_addr;
723 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
724 return;
725
726 /*
727 * Redirects don't need to be handled up here.
728 */
729 if (PRC_IS_REDIRECT(cmd))
730 return;
731
732 /*
733 * Hostdead is ugly because it goes linearly through all PCBs.
734 *
735 * XXX: We never get this from ICMP, otherwise it makes an excellent
736 * DoS attack on machines with many connections.
737 */
738 if (cmd == PRC_HOSTDEAD)
739 ip = NULL;
740 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
741 return;
742 if (ip != NULL) {
743 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
744 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
745 ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
746 if (inp != NULL) {
747 INP_RLOCK_ASSERT(inp);
748 if (inp->inp_socket != NULL) {
749 udp_notify(inp, inetctlerrmap[cmd]);
750 }
751 INP_RUNLOCK(inp);
752 }
753 } else
754 in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
755 udp_notify);
756}
757void
758udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
759{
760
761 return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
762}
763
764void
765udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
766{
767
768 return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
769}
770#endif /* INET */
771
772static int
773udp_pcblist(SYSCTL_HANDLER_ARGS)
774{
775 int error, i, n;
776 struct inpcb *inp, **inp_list;
777 inp_gen_t gencnt;
778 struct xinpgen xig;
779
780 /*
781 * The process of preparing the PCB list is too time-consuming and
782 * resource-intensive to repeat twice on every request.
783 */
784 if (req->oldptr == 0) {
785 n = V_udbinfo.ipi_count;
786 n += imax(n / 8, 10);
787 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
788 return (0);
789 }
790
791 if (req->newptr != 0)
792 return (EPERM);
793
794 /*
795 * OK, now we're committed to doing something.
796 */
797 INP_INFO_RLOCK(&V_udbinfo);
798 gencnt = V_udbinfo.ipi_gencnt;
799 n = V_udbinfo.ipi_count;
800 INP_INFO_RUNLOCK(&V_udbinfo);
801
802 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
803 + n * sizeof(struct xinpcb));
804 if (error != 0)
805 return (error);
806
807 xig.xig_len = sizeof xig;
808 xig.xig_count = n;
809 xig.xig_gen = gencnt;
810 xig.xig_sogen = so_gencnt;
811 error = SYSCTL_OUT(req, &xig, sizeof xig);
812 if (error)
813 return (error);
814
815 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
816 if (inp_list == 0)
817 return (ENOMEM);
818
819 INP_INFO_RLOCK(&V_udbinfo);
820 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
821 inp = LIST_NEXT(inp, inp_list)) {
822 INP_WLOCK(inp);
823 if (inp->inp_gencnt <= gencnt &&
824 cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
825 in_pcbref(inp);
826 inp_list[i++] = inp;
827 }
828 INP_WUNLOCK(inp);
829 }
830 INP_INFO_RUNLOCK(&V_udbinfo);
831 n = i;
832
833 error = 0;
834 for (i = 0; i < n; i++) {
835 inp = inp_list[i];
836 INP_RLOCK(inp);
837 if (inp->inp_gencnt <= gencnt) {
838 struct xinpcb xi;
839
840 bzero(&xi, sizeof(xi));
841 xi.xi_len = sizeof xi;
842 /* XXX should avoid extra copy */
843 bcopy(inp, &xi.xi_inp, sizeof *inp);
844 if (inp->inp_socket)
845 sotoxsocket(inp->inp_socket, &xi.xi_socket);
846 xi.xi_inp.inp_gencnt = inp->inp_gencnt;
847 INP_RUNLOCK(inp);
848 error = SYSCTL_OUT(req, &xi, sizeof xi);
849 } else
850 INP_RUNLOCK(inp);
851 }
852 INP_INFO_WLOCK(&V_udbinfo);
853 for (i = 0; i < n; i++) {
854 inp = inp_list[i];
855 INP_RLOCK(inp);
856 if (!in_pcbrele_rlocked(inp))
857 INP_RUNLOCK(inp);
858 }
859 INP_INFO_WUNLOCK(&V_udbinfo);
860
861 if (!error) {
862 /*
863 * Give the user an updated idea of our state. If the
864 * generation differs from what we told her before, she knows
865 * that something happened while we were processing this
866 * request, and it might be necessary to retry.
867 */
868 INP_INFO_RLOCK(&V_udbinfo);
869 xig.xig_gen = V_udbinfo.ipi_gencnt;
870 xig.xig_sogen = so_gencnt;
871 xig.xig_count = V_udbinfo.ipi_count;
872 INP_INFO_RUNLOCK(&V_udbinfo);
873 error = SYSCTL_OUT(req, &xig, sizeof xig);
874 }
875 free(inp_list, M_TEMP);
876 return (error);
877}
878
879SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
880 CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
881 udp_pcblist, "S,xinpcb", "List of active UDP sockets");
882
883#ifdef INET
884static int
885udp_getcred(SYSCTL_HANDLER_ARGS)
886{
887 struct xucred xuc;
888 struct sockaddr_in addrs[2];
889 struct inpcb *inp;
890 int error;
891
892 error = priv_check(req->td, PRIV_NETINET_GETCRED);
893 if (error)
894 return (error);
895 error = SYSCTL_IN(req, addrs, sizeof(addrs));
896 if (error)
897 return (error);
898 inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
899 addrs[0].sin_addr, addrs[0].sin_port,
900 INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
901 if (inp != NULL) {
902 INP_RLOCK_ASSERT(inp);
903 if (inp->inp_socket == NULL)
904 error = ENOENT;
905 if (error == 0)
906 error = cr_canseeinpcb(req->td->td_ucred, inp);
907 if (error == 0)
908 cru2x(inp->inp_cred, &xuc);
909 INP_RUNLOCK(inp);
910 } else
911 error = ENOENT;
912 if (error == 0)
913 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
914 return (error);
915}
916
917SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
918 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
919 udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
920#endif /* INET */
921
922int
923udp_ctloutput(struct socket *so, struct sockopt *sopt)
924{
925 struct inpcb *inp;
926 struct udpcb *up;
927 int isudplite, error, optval;
928
929 error = 0;
930 isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
931 inp = sotoinpcb(so);
932 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
933 INP_WLOCK(inp);
934 if (sopt->sopt_level != so->so_proto->pr_protocol) {
935#ifdef INET6
936 if (INP_CHECK_SOCKAF(so, AF_INET6)) {
937 INP_WUNLOCK(inp);
938 error = ip6_ctloutput(so, sopt);
939 }
940#endif
941#if defined(INET) && defined(INET6)
942 else
943#endif
944#ifdef INET
945 {
946 INP_WUNLOCK(inp);
947 error = ip_ctloutput(so, sopt);
948 }
949#endif
950 return (error);
951 }
952
953 switch (sopt->sopt_dir) {
954 case SOPT_SET:
955 switch (sopt->sopt_name) {
956 case UDP_ENCAP:
957 INP_WUNLOCK(inp);
958 error = sooptcopyin(sopt, &optval, sizeof optval,
959 sizeof optval);
960 if (error)
961 break;
962 inp = sotoinpcb(so);
963 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
964 INP_WLOCK(inp);
965#ifdef IPSEC_NAT_T
966 up = intoudpcb(inp);
967 KASSERT(up != NULL, ("%s: up == NULL", __func__));
968#endif
969 switch (optval) {
970 case 0:
971 /* Clear all UDP encap. */
972#ifdef IPSEC_NAT_T
973 up->u_flags &= ~UF_ESPINUDP_ALL;
974#endif
975 break;
976#ifdef IPSEC_NAT_T
977 case UDP_ENCAP_ESPINUDP:
978 case UDP_ENCAP_ESPINUDP_NON_IKE:
979 up->u_flags &= ~UF_ESPINUDP_ALL;
980 if (optval == UDP_ENCAP_ESPINUDP)
981 up->u_flags |= UF_ESPINUDP;
982 else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
983 up->u_flags |= UF_ESPINUDP_NON_IKE;
984 break;
985#endif
986 default:
987 error = EINVAL;
988 break;
989 }
990 INP_WUNLOCK(inp);
991 break;
992 case UDPLITE_SEND_CSCOV:
993 case UDPLITE_RECV_CSCOV:
994 if (!isudplite) {
995 INP_WUNLOCK(inp);
996 error = ENOPROTOOPT;
997 break;
998 }
999 INP_WUNLOCK(inp);
1000 error = sooptcopyin(sopt, &optval, sizeof(optval),
1001 sizeof(optval));
1002 if (error != 0)
1003 break;
1004 inp = sotoinpcb(so);
1005 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1006 INP_WLOCK(inp);
1007 up = intoudpcb(inp);
1008 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1009 if (optval != 0 && optval < 8) {
1010 INP_WUNLOCK(inp);
1011 error = EINVAL;
1012 break;
1013 }
1014 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1015 up->u_txcslen = optval;
1016 else
1017 up->u_rxcslen = optval;
1018 INP_WUNLOCK(inp);
1019 break;
1020 default:
1021 INP_WUNLOCK(inp);
1022 error = ENOPROTOOPT;
1023 break;
1024 }
1025 break;
1026 case SOPT_GET:
1027 switch (sopt->sopt_name) {
1028#ifdef IPSEC_NAT_T
1029 case UDP_ENCAP:
1030 up = intoudpcb(inp);
1031 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1032 optval = up->u_flags & UF_ESPINUDP_ALL;
1033 INP_WUNLOCK(inp);
1034 error = sooptcopyout(sopt, &optval, sizeof optval);
1035 break;
1036#endif
1037 case UDPLITE_SEND_CSCOV:
1038 case UDPLITE_RECV_CSCOV:
1039 if (!isudplite) {
1040 INP_WUNLOCK(inp);
1041 error = ENOPROTOOPT;
1042 break;
1043 }
1044 up = intoudpcb(inp);
1045 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1046 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1047 optval = up->u_txcslen;
1048 else
1049 optval = up->u_rxcslen;
1050 INP_WUNLOCK(inp);
1051 error = sooptcopyout(sopt, &optval, sizeof(optval));
1052 break;
1053 default:
1054 INP_WUNLOCK(inp);
1055 error = ENOPROTOOPT;
1056 break;
1057 }
1058 break;
1059 }
1060 return (error);
1061}
1062
1063#ifdef INET
1064#define UH_WLOCKED 2
1065#define UH_RLOCKED 1
1066#define UH_UNLOCKED 0
1067static int
1068udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1069 struct mbuf *control, struct thread *td)
1070{
1071 struct udpiphdr *ui;
1072 int len = m->m_pkthdr.len;
1073 struct in_addr faddr, laddr;
1074 struct cmsghdr *cm;
1075 struct inpcbinfo *pcbinfo;
1076 struct sockaddr_in *sin, src;
1077 int cscov_partial = 0;
1078 int error = 0;
1079 int ipflags;
1080 u_short fport, lport;
1081 int unlock_udbinfo;
1082 u_char tos;
1083 uint8_t pr;
1084 uint16_t cscov = 0;
1085
1086 /*
1087 * udp_output() may need to temporarily bind or connect the current
1088 * inpcb. As such, we don't know up front whether we will need the
1089 * pcbinfo lock or not. Do any work to decide what is needed up
1090 * front before acquiring any locks.
1091 */
1092 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1093 if (control)
1094 m_freem(control);
1095 m_freem(m);
1096 return (EMSGSIZE);
1097 }
1098
1099 src.sin_family = 0;
1100 INP_RLOCK(inp);
1101 tos = inp->inp_ip_tos;
1102 if (control != NULL) {
1103 /*
1104 * XXX: Currently, we assume all the optional information is
1105 * stored in a single mbuf.
1106 */
1107 if (control->m_next) {
1108 INP_RUNLOCK(inp);
1109 m_freem(control);
1110 m_freem(m);
1111 return (EINVAL);
1112 }
1113 for (; control->m_len > 0;
1114 control->m_data += CMSG_ALIGN(cm->cmsg_len),
1115 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
1116 cm = mtod(control, struct cmsghdr *);
1117 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
1118 || cm->cmsg_len > control->m_len) {
1119 error = EINVAL;
1120 break;
1121 }
1122 if (cm->cmsg_level != IPPROTO_IP)
1123 continue;
1124
1125 switch (cm->cmsg_type) {
1126 case IP_SENDSRCADDR:
1127 if (cm->cmsg_len !=
1128 CMSG_LEN(sizeof(struct in_addr))) {
1129 error = EINVAL;
1130 break;
1131 }
1132 bzero(&src, sizeof(src));
1133 src.sin_family = AF_INET;
1134 src.sin_len = sizeof(src);
1135 src.sin_port = inp->inp_lport;
1136 src.sin_addr =
1137 *(struct in_addr *)CMSG_DATA(cm);
1138 break;
1139
1140 case IP_TOS:
1141 if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
1142 error = EINVAL;
1143 break;
1144 }
1145 tos = *(u_char *)CMSG_DATA(cm);
1146 break;
1147
1148 default:
1149 error = ENOPROTOOPT;
1150 break;
1151 }
1152 if (error)
1153 break;
1154 }
1155 m_freem(control);
1156 }
1157 if (error) {
1158 INP_RUNLOCK(inp);
1159 m_freem(m);
1160 return (error);
1161 }
1162
1163 /*
1164 * Depending on whether or not the application has bound or connected
1165 * the socket, we may have to do varying levels of work. The optimal
1166 * case is for a connected UDP socket, as a global lock isn't
1167 * required at all.
1168 *
1169 * In order to decide which we need, we require stability of the
1170 * inpcb binding, which we ensure by acquiring a read lock on the
1171 * inpcb. This doesn't strictly follow the lock order, so we play
1172 * the trylock and retry game; note that we may end up with more
1173 * conservative locks than required the second time around, so later
1174 * assertions have to accept that. Further analysis of the number of
1175 * misses under contention is required.
1176 *
1177 * XXXRW: Check that hash locking update here is correct.
1178 */
1179 pr = inp->inp_socket->so_proto->pr_protocol;
1180 pcbinfo = get_inpcbinfo(pr);
1181 sin = (struct sockaddr_in *)addr;
1182 if (sin != NULL &&
1183 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
1184 INP_RUNLOCK(inp);
1185 INP_WLOCK(inp);
1186 INP_HASH_WLOCK(pcbinfo);
1187 unlock_udbinfo = UH_WLOCKED;
1188 } else if ((sin != NULL && (
1189 (sin->sin_addr.s_addr == INADDR_ANY) ||
1190 (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
1191 (inp->inp_laddr.s_addr == INADDR_ANY) ||
1192 (inp->inp_lport == 0))) ||
1193 (src.sin_family == AF_INET)) {
1194 INP_HASH_RLOCK(pcbinfo);
1195 unlock_udbinfo = UH_RLOCKED;
1196 } else
1197 unlock_udbinfo = UH_UNLOCKED;
1198
1199 /*
1200 * If the IP_SENDSRCADDR control message was specified, override the
1201 * source address for this datagram. Its use is invalidated if the
1202 * address thus specified is incomplete or clobbers other inpcbs.
1203 */
1204 laddr = inp->inp_laddr;
1205 lport = inp->inp_lport;
1206 if (src.sin_family == AF_INET) {
1207 INP_HASH_LOCK_ASSERT(pcbinfo);
1208 if ((lport == 0) ||
1209 (laddr.s_addr == INADDR_ANY &&
1210 src.sin_addr.s_addr == INADDR_ANY)) {
1211 error = EINVAL;
1212 goto release;
1213 }
1214 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
1215 &laddr.s_addr, &lport, td->td_ucred);
1216 if (error)
1217 goto release;
1218 }
1219
1220 /*
1221 * If a UDP socket has been connected, then a local address/port will
1222 * have been selected and bound.
1223 *
1224 * If a UDP socket has not been connected to, then an explicit
1225 * destination address must be used, in which case a local
1226 * address/port may not have been selected and bound.
1227 */
1228 if (sin != NULL) {
1229 INP_LOCK_ASSERT(inp);
1230 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1231 error = EISCONN;
1232 goto release;
1233 }
1234
1235 /*
1236 * Jail may rewrite the destination address, so let it do
1237 * that before we use it.
1238 */
1239 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1240 if (error)
1241 goto release;
1242
1243 /*
1244 * If a local address or port hasn't yet been selected, or if
1245 * the destination address needs to be rewritten due to using
1246 * a special INADDR_ constant, invoke in_pcbconnect_setup()
1247 * to do the heavy lifting. Once a port is selected, we
1248 * commit the binding back to the socket; we also commit the
1249 * binding of the address if in jail.
1250 *
1251 * If we already have a valid binding and we're not
1252 * requesting a destination address rewrite, use a fast path.
1253 */
1254 if (inp->inp_laddr.s_addr == INADDR_ANY ||
1255 inp->inp_lport == 0 ||
1256 sin->sin_addr.s_addr == INADDR_ANY ||
1257 sin->sin_addr.s_addr == INADDR_BROADCAST) {
1258 INP_HASH_LOCK_ASSERT(pcbinfo);
1259 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
1260 &lport, &faddr.s_addr, &fport, NULL,
1261 td->td_ucred);
1262 if (error)
1263 goto release;
1264
1265 /*
1266 * XXXRW: Why not commit the port if the address is
1267 * !INADDR_ANY?
1268 */
1269 /* Commit the local port if newly assigned. */
1270 if (inp->inp_laddr.s_addr == INADDR_ANY &&
1271 inp->inp_lport == 0) {
1272 INP_WLOCK_ASSERT(inp);
1273 INP_HASH_WLOCK_ASSERT(pcbinfo);
1274 /*
1275 * Remember addr if jailed, to prevent
1276 * rebinding.
1277 */
1278 if (prison_flag(td->td_ucred, PR_IP4))
1279 inp->inp_laddr = laddr;
1280 inp->inp_lport = lport;
1281 if (in_pcbinshash(inp) != 0) {
1282 inp->inp_lport = 0;
1283 error = EAGAIN;
1284 goto release;
1285 }
1286 inp->inp_flags |= INP_ANONPORT;
1287 }
1288 } else {
1289 faddr = sin->sin_addr;
1290 fport = sin->sin_port;
1291 }
1292 } else {
1293 INP_LOCK_ASSERT(inp);
1294 faddr = inp->inp_faddr;
1295 fport = inp->inp_fport;
1296 if (faddr.s_addr == INADDR_ANY) {
1297 error = ENOTCONN;
1298 goto release;
1299 }
1300 }
1301
1302 /*
1303 * Calculate data length and get a mbuf for UDP, IP, and possible
1304 * link-layer headers. Immediate slide the data pointer back forward
1305 * since we won't use that space at this layer.
1306 */
1307 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
1308 if (m == NULL) {
1309 error = ENOBUFS;
1310 goto release;
1311 }
1312 m->m_data += max_linkhdr;
1313 m->m_len -= max_linkhdr;
1314 m->m_pkthdr.len -= max_linkhdr;
1315
1316 /*
1317 * Fill in mbuf with extended UDP header and addresses and length put
1318 * into network format.
1319 */
1320 ui = mtod(m, struct udpiphdr *);
1321 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
1322 ui->ui_pr = pr;
1323 ui->ui_src = laddr;
1324 ui->ui_dst = faddr;
1325 ui->ui_sport = lport;
1326 ui->ui_dport = fport;
1327 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1328 if (pr == IPPROTO_UDPLITE) {
1329 struct udpcb *up;
1330 uint16_t plen;
1331
1332 up = intoudpcb(inp);
1333 cscov = up->u_txcslen;
1334 plen = (u_short)len + sizeof(struct udphdr);
1335 if (cscov >= plen)
1336 cscov = 0;
1337 ui->ui_len = htons(plen);
1338 ui->ui_ulen = htons(cscov);
1339 /*
1340 * For UDP-Lite, checksum coverage length of zero means
1341 * the entire UDPLite packet is covered by the checksum.
1342 */
1343 cscov_partial = (cscov == 0) ? 0 : 1;
1344 } else
1345 ui->ui_v = IPVERSION << 4;
1346
1347 /*
1348 * Set the Don't Fragment bit in the IP header.
1349 */
1350 if (inp->inp_flags & INP_DONTFRAG) {
1351 struct ip *ip;
1352
1353 ip = (struct ip *)&ui->ui_i;
1354 ip->ip_off |= htons(IP_DF);
1355 }
1356
1357 ipflags = 0;
1358 if (inp->inp_socket->so_options & SO_DONTROUTE)
1359 ipflags |= IP_ROUTETOIF;
1360 if (inp->inp_socket->so_options & SO_BROADCAST)
1361 ipflags |= IP_ALLOWBROADCAST;
1362 if (inp->inp_flags & INP_ONESBCAST)
1363 ipflags |= IP_SENDONES;
1364
1365#ifdef MAC
1366 mac_inpcb_create_mbuf(inp, m);
1367#endif
1368
1369 /*
1370 * Set up checksum and output datagram.
1371 */
1372 ui->ui_sum = 0;
1373 if (pr == IPPROTO_UDPLITE) {
1374 if (inp->inp_flags & INP_ONESBCAST)
1375 faddr.s_addr = INADDR_BROADCAST;
1376 if (cscov_partial) {
1377 if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
1378 ui->ui_sum = 0xffff;
1379 } else {
1380 if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
1381 ui->ui_sum = 0xffff;
1382 }
1383 } else if (V_udp_cksum) {
1384 if (inp->inp_flags & INP_ONESBCAST)
1385 faddr.s_addr = INADDR_BROADCAST;
1386 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
1387 htons((u_short)len + sizeof(struct udphdr) + pr));
1388 m->m_pkthdr.csum_flags = CSUM_UDP;
1389 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1390 }
1391 ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
1392 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
1393 ((struct ip *)ui)->ip_tos = tos; /* XXX */
1394 UDPSTAT_INC(udps_opackets);
1395
1396 if (unlock_udbinfo == UH_WLOCKED)
1397 INP_HASH_WUNLOCK(pcbinfo);
1398 else if (unlock_udbinfo == UH_RLOCKED)
1399 INP_HASH_RUNLOCK(pcbinfo);
1400 UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
1401 error = ip_output(m, inp->inp_options, NULL, ipflags,
1402 inp->inp_moptions, inp);
1403 if (unlock_udbinfo == UH_WLOCKED)
1404 INP_WUNLOCK(inp);
1405 else
1406 INP_RUNLOCK(inp);
1407 return (error);
1408
1409release:
1410 if (unlock_udbinfo == UH_WLOCKED) {
1411 INP_HASH_WUNLOCK(pcbinfo);
1412 INP_WUNLOCK(inp);
1413 } else if (unlock_udbinfo == UH_RLOCKED) {
1414 INP_HASH_RUNLOCK(pcbinfo);
1415 INP_RUNLOCK(inp);
1416 } else
1417 INP_RUNLOCK(inp);
1418 m_freem(m);
1419 return (error);
1420}
1421
1422
1423#if defined(IPSEC) && defined(IPSEC_NAT_T)
1424/*
1425 * Potentially decap ESP in UDP frame. Check for an ESP header
1426 * and optional marker; if present, strip the UDP header and
1427 * push the result through IPSec.
1428 *
1429 * Returns mbuf to be processed (potentially re-allocated) or
1430 * NULL if consumed and/or processed.
1431 */
1432static struct mbuf *
1433udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
1434{
1435 size_t minlen, payload, skip, iphlen;
1436 caddr_t data;
1437 struct udpcb *up;
1438 struct m_tag *tag;
1439 struct udphdr *udphdr;
1440 struct ip *ip;
1441
1442 INP_RLOCK_ASSERT(inp);
1443
1444 /*
1445 * Pull up data so the longest case is contiguous:
1446 * IP/UDP hdr + non ESP marker + ESP hdr.
1447 */
1448 minlen = off + sizeof(uint64_t) + sizeof(struct esp);
1449 if (minlen > m->m_pkthdr.len)
1450 minlen = m->m_pkthdr.len;
1451 if ((m = m_pullup(m, minlen)) == NULL) {
1452 IPSECSTAT_INC(ips_in_inval);
1453 return (NULL); /* Bypass caller processing. */
1454 }
1455 data = mtod(m, caddr_t); /* Points to ip header. */
1456 payload = m->m_len - off; /* Size of payload. */
1457
1458 if (payload == 1 && data[off] == '\xff')
1459 return (m); /* NB: keepalive packet, no decap. */
1460
1461 up = intoudpcb(inp);
1462 KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
1463 KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
1464 ("u_flags 0x%x", up->u_flags));
1465
1466 /*
1467 * Check that the payload is large enough to hold an
1468 * ESP header and compute the amount of data to remove.
1469 *
1470 * NB: the caller has already done a pullup for us.
1471 * XXX can we assume alignment and eliminate bcopys?
1472 */
1473 if (up->u_flags & UF_ESPINUDP_NON_IKE) {
1474 /*
1475 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
1476 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
1477 * possible AH mode non-IKE marker+non-ESP marker
1478 * from draft-ietf-ipsec-udp-encaps-00.txt.
1479 */
1480 uint64_t marker;
1481
1482 if (payload <= sizeof(uint64_t) + sizeof(struct esp))
1483 return (m); /* NB: no decap. */
1484 bcopy(data + off, &marker, sizeof(uint64_t));
1485 if (marker != 0) /* Non-IKE marker. */
1486 return (m); /* NB: no decap. */
1487 skip = sizeof(uint64_t) + sizeof(struct udphdr);
1488 } else {
1489 uint32_t spi;
1490
1491 if (payload <= sizeof(struct esp)) {
1492 IPSECSTAT_INC(ips_in_inval);
1493 m_freem(m);
1494 return (NULL); /* Discard. */
1495 }
1496 bcopy(data + off, &spi, sizeof(uint32_t));
1497 if (spi == 0) /* Non-ESP marker. */
1498 return (m); /* NB: no decap. */
1499 skip = sizeof(struct udphdr);
1500 }
1501
1502 /*
1503 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1504 * the UDP ports. This is required if we want to select
1505 * the right SPD for multiple hosts behind same NAT.
1506 *
1507 * NB: ports are maintained in network byte order everywhere
1508 * in the NAT-T code.
1509 */
1510 tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
1511 2 * sizeof(uint16_t), M_NOWAIT);
1512 if (tag == NULL) {
1513 IPSECSTAT_INC(ips_in_nomem);
1514 m_freem(m);
1515 return (NULL); /* Discard. */
1516 }
1517 iphlen = off - sizeof(struct udphdr);
1518 udphdr = (struct udphdr *)(data + iphlen);
1519 ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
1520 ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
1521 m_tag_prepend(m, tag);
1522
1523 /*
1524 * Remove the UDP header (and possibly the non ESP marker)
1525 * IP header length is iphlen
1526 * Before:
1527 * <--- off --->
1528 * +----+------+-----+
1529 * | IP | UDP | ESP |
1530 * +----+------+-----+
1531 * <-skip->
1532 * After:
1533 * +----+-----+
1534 * | IP | ESP |
1535 * +----+-----+
1536 * <-skip->
1537 */
1538 ovbcopy(data, data + skip, iphlen);
1539 m_adj(m, skip);
1540
1541 ip = mtod(m, struct ip *);
1542 ip->ip_len = htons(ntohs(ip->ip_len) - skip);
1543 ip->ip_p = IPPROTO_ESP;
1544
1545 /*
1546 * We cannot yet update the cksums so clear any
1547 * h/w cksum flags as they are no longer valid.
1548 */
1549 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
1550 m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
1551
1552 (void) ipsec4_common_input(m, iphlen, ip->ip_p);
1553 return (NULL); /* NB: consumed, bypass processing. */
1554}
1555#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
1556
1557static void
1558udp_abort(struct socket *so)
1559{
1560 struct inpcb *inp;
1561 struct inpcbinfo *pcbinfo;
1562
1563 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1564 inp = sotoinpcb(so);
1565 KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
1566 INP_WLOCK(inp);
1567 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1568 INP_HASH_WLOCK(pcbinfo);
1569 in_pcbdisconnect(inp);
1570 inp->inp_laddr.s_addr = INADDR_ANY;
1571 INP_HASH_WUNLOCK(pcbinfo);
1572 soisdisconnected(so);
1573 }
1574 INP_WUNLOCK(inp);
1575}
1576
1577static int
1578udp_attach(struct socket *so, int proto, struct thread *td)
1579{
1580 struct inpcb *inp;
1581 struct inpcbinfo *pcbinfo;
1582 int error;
1583
1584 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1585 inp = sotoinpcb(so);
1586 KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
1587 error = soreserve(so, udp_sendspace, udp_recvspace);
1588 if (error)
1589 return (error);
1590 INP_INFO_WLOCK(pcbinfo);
1591 error = in_pcballoc(so, pcbinfo);
1592 if (error) {
1593 INP_INFO_WUNLOCK(pcbinfo);
1594 return (error);
1595 }
1596
1597 inp = sotoinpcb(so);
1598 inp->inp_vflag |= INP_IPV4;
1599 inp->inp_ip_ttl = V_ip_defttl;
1600
1601 error = udp_newudpcb(inp);
1602 if (error) {
1603 in_pcbdetach(inp);
1604 in_pcbfree(inp);
1605 INP_INFO_WUNLOCK(pcbinfo);
1606 return (error);
1607 }
1608
1609 INP_WUNLOCK(inp);
1610 INP_INFO_WUNLOCK(pcbinfo);
1611 return (0);
1612}
1613#endif /* INET */
1614
1615int
1616udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
1617{
1618 struct inpcb *inp;
1619 struct udpcb *up;
1620
1621 KASSERT(so->so_type == SOCK_DGRAM,
1622 ("udp_set_kernel_tunneling: !dgram"));
1623 inp = sotoinpcb(so);
1624 KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
1625 INP_WLOCK(inp);
1626 up = intoudpcb(inp);
1627 if (up->u_tun_func != NULL) {
1628 INP_WUNLOCK(inp);
1629 return (EBUSY);
1630 }
1631 up->u_tun_func = f;
1632 INP_WUNLOCK(inp);
1633 return (0);
1634}
1635
1636#ifdef INET
1637static int
1638udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1639{
1640 struct inpcb *inp;
1641 struct inpcbinfo *pcbinfo;
1642 int error;
1643
1644 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1645 inp = sotoinpcb(so);
1646 KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1647 INP_WLOCK(inp);
1648 INP_HASH_WLOCK(pcbinfo);
1649 error = in_pcbbind(inp, nam, td->td_ucred);
1650 INP_HASH_WUNLOCK(pcbinfo);
1651 INP_WUNLOCK(inp);
1652 return (error);
1653}
1654
1655static void
1656udp_close(struct socket *so)
1657{
1658 struct inpcb *inp;
1659 struct inpcbinfo *pcbinfo;
1660
1661 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1662 inp = sotoinpcb(so);
1663 KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1664 INP_WLOCK(inp);
1665 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1666 INP_HASH_WLOCK(pcbinfo);
1667 in_pcbdisconnect(inp);
1668 inp->inp_laddr.s_addr = INADDR_ANY;
1669 INP_HASH_WUNLOCK(pcbinfo);
1670 soisdisconnected(so);
1671 }
1672 INP_WUNLOCK(inp);
1673}
1674
1675static int
1676udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1677{
1678 struct inpcb *inp;
1679 struct inpcbinfo *pcbinfo;
1680 struct sockaddr_in *sin;
1681 int error;
1682
1683 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1684 inp = sotoinpcb(so);
1685 KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1686 INP_WLOCK(inp);
1687 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1688 INP_WUNLOCK(inp);
1689 return (EISCONN);
1690 }
1691 sin = (struct sockaddr_in *)nam;
1692 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1693 if (error != 0) {
1694 INP_WUNLOCK(inp);
1695 return (error);
1696 }
1697 INP_HASH_WLOCK(pcbinfo);
1698 error = in_pcbconnect(inp, nam, td->td_ucred);
1699 INP_HASH_WUNLOCK(pcbinfo);
1700 if (error == 0)
1701 soisconnected(so);
1702 INP_WUNLOCK(inp);
1703 return (error);
1704}
1705
1706static void
1707udp_detach(struct socket *so)
1708{
1709 struct inpcb *inp;
1710 struct inpcbinfo *pcbinfo;
1711 struct udpcb *up;
1712
1713 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1714 inp = sotoinpcb(so);
1715 KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1716 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1717 ("udp_detach: not disconnected"));
1718 INP_INFO_WLOCK(pcbinfo);
1719 INP_WLOCK(inp);
1720 up = intoudpcb(inp);
1721 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1722 inp->inp_ppcb = NULL;
1723 in_pcbdetach(inp);
1724 in_pcbfree(inp);
1725 INP_INFO_WUNLOCK(pcbinfo);
1726 udp_discardcb(up);
1727}
1728
1729static int
1730udp_disconnect(struct socket *so)
1731{
1732 struct inpcb *inp;
1733 struct inpcbinfo *pcbinfo;
1734
1735 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1736 inp = sotoinpcb(so);
1737 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1738 INP_WLOCK(inp);
1739 if (inp->inp_faddr.s_addr == INADDR_ANY) {
1740 INP_WUNLOCK(inp);
1741 return (ENOTCONN);
1742 }
1743 INP_HASH_WLOCK(pcbinfo);
1744 in_pcbdisconnect(inp);
1745 inp->inp_laddr.s_addr = INADDR_ANY;
1746 INP_HASH_WUNLOCK(pcbinfo);
1747 SOCK_LOCK(so);
1748 so->so_state &= ~SS_ISCONNECTED; /* XXX */
1749 SOCK_UNLOCK(so);
1750 INP_WUNLOCK(inp);
1751 return (0);
1752}
1753
1754static int
1755udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1756 struct mbuf *control, struct thread *td)
1757{
1758 struct inpcb *inp;
1759
1760 inp = sotoinpcb(so);
1761 KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1762 return (udp_output(inp, m, addr, control, td));
1763}
1764#endif /* INET */
1765
1766int
1767udp_shutdown(struct socket *so)
1768{
1769 struct inpcb *inp;
1770
1771 inp = sotoinpcb(so);
1772 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1773 INP_WLOCK(inp);
1774 socantsendmore(so);
1775 INP_WUNLOCK(inp);
1776 return (0);
1777}
1778
1779#ifdef INET
1780struct pr_usrreqs udp_usrreqs = {
1781 .pru_abort = udp_abort,
1782 .pru_attach = udp_attach,
1783 .pru_bind = udp_bind,
1784 .pru_connect = udp_connect,
1785 .pru_control = in_control,
1786 .pru_detach = udp_detach,
1787 .pru_disconnect = udp_disconnect,
1788 .pru_peeraddr = in_getpeeraddr,
1789 .pru_send = udp_send,
1790 .pru_soreceive = soreceive_dgram,
1791 .pru_sosend = sosend_dgram,
1792 .pru_shutdown = udp_shutdown,
1793 .pru_sockaddr = in_getsockaddr,
1794 .pru_sosetlabel = in_pcbsosetlabel,
1795 .pru_close = udp_close,
1796};
1797#endif /* INET */
689}
690#endif /* INET */
691
692/*
693 * Notify a udp user of an asynchronous error; just wake up so that they can
694 * collect error status.
695 */
696struct inpcb *
697udp_notify(struct inpcb *inp, int errno)
698{
699
700 /*
701 * While udp_ctlinput() always calls udp_notify() with a read lock
702 * when invoking it directly, in_pcbnotifyall() currently uses write
703 * locks due to sharing code with TCP. For now, accept either a read
704 * or a write lock, but a read lock is sufficient.
705 */
706 INP_LOCK_ASSERT(inp);
707
708 inp->inp_socket->so_error = errno;
709 sorwakeup(inp->inp_socket);
710 sowwakeup(inp->inp_socket);
711 return (inp);
712}
713
714#ifdef INET
715static void
716udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
717 struct inpcbinfo *pcbinfo)
718{
719 struct ip *ip = vip;
720 struct udphdr *uh;
721 struct in_addr faddr;
722 struct inpcb *inp;
723
724 faddr = ((struct sockaddr_in *)sa)->sin_addr;
725 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
726 return;
727
728 /*
729 * Redirects don't need to be handled up here.
730 */
731 if (PRC_IS_REDIRECT(cmd))
732 return;
733
734 /*
735 * Hostdead is ugly because it goes linearly through all PCBs.
736 *
737 * XXX: We never get this from ICMP, otherwise it makes an excellent
738 * DoS attack on machines with many connections.
739 */
740 if (cmd == PRC_HOSTDEAD)
741 ip = NULL;
742 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
743 return;
744 if (ip != NULL) {
745 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
746 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
747 ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
748 if (inp != NULL) {
749 INP_RLOCK_ASSERT(inp);
750 if (inp->inp_socket != NULL) {
751 udp_notify(inp, inetctlerrmap[cmd]);
752 }
753 INP_RUNLOCK(inp);
754 }
755 } else
756 in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
757 udp_notify);
758}
759void
760udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
761{
762
763 return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
764}
765
766void
767udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
768{
769
770 return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
771}
772#endif /* INET */
773
774static int
775udp_pcblist(SYSCTL_HANDLER_ARGS)
776{
777 int error, i, n;
778 struct inpcb *inp, **inp_list;
779 inp_gen_t gencnt;
780 struct xinpgen xig;
781
782 /*
783 * The process of preparing the PCB list is too time-consuming and
784 * resource-intensive to repeat twice on every request.
785 */
786 if (req->oldptr == 0) {
787 n = V_udbinfo.ipi_count;
788 n += imax(n / 8, 10);
789 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
790 return (0);
791 }
792
793 if (req->newptr != 0)
794 return (EPERM);
795
796 /*
797 * OK, now we're committed to doing something.
798 */
799 INP_INFO_RLOCK(&V_udbinfo);
800 gencnt = V_udbinfo.ipi_gencnt;
801 n = V_udbinfo.ipi_count;
802 INP_INFO_RUNLOCK(&V_udbinfo);
803
804 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
805 + n * sizeof(struct xinpcb));
806 if (error != 0)
807 return (error);
808
809 xig.xig_len = sizeof xig;
810 xig.xig_count = n;
811 xig.xig_gen = gencnt;
812 xig.xig_sogen = so_gencnt;
813 error = SYSCTL_OUT(req, &xig, sizeof xig);
814 if (error)
815 return (error);
816
817 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
818 if (inp_list == 0)
819 return (ENOMEM);
820
821 INP_INFO_RLOCK(&V_udbinfo);
822 for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
823 inp = LIST_NEXT(inp, inp_list)) {
824 INP_WLOCK(inp);
825 if (inp->inp_gencnt <= gencnt &&
826 cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
827 in_pcbref(inp);
828 inp_list[i++] = inp;
829 }
830 INP_WUNLOCK(inp);
831 }
832 INP_INFO_RUNLOCK(&V_udbinfo);
833 n = i;
834
835 error = 0;
836 for (i = 0; i < n; i++) {
837 inp = inp_list[i];
838 INP_RLOCK(inp);
839 if (inp->inp_gencnt <= gencnt) {
840 struct xinpcb xi;
841
842 bzero(&xi, sizeof(xi));
843 xi.xi_len = sizeof xi;
844 /* XXX should avoid extra copy */
845 bcopy(inp, &xi.xi_inp, sizeof *inp);
846 if (inp->inp_socket)
847 sotoxsocket(inp->inp_socket, &xi.xi_socket);
848 xi.xi_inp.inp_gencnt = inp->inp_gencnt;
849 INP_RUNLOCK(inp);
850 error = SYSCTL_OUT(req, &xi, sizeof xi);
851 } else
852 INP_RUNLOCK(inp);
853 }
854 INP_INFO_WLOCK(&V_udbinfo);
855 for (i = 0; i < n; i++) {
856 inp = inp_list[i];
857 INP_RLOCK(inp);
858 if (!in_pcbrele_rlocked(inp))
859 INP_RUNLOCK(inp);
860 }
861 INP_INFO_WUNLOCK(&V_udbinfo);
862
863 if (!error) {
864 /*
865 * Give the user an updated idea of our state. If the
866 * generation differs from what we told her before, she knows
867 * that something happened while we were processing this
868 * request, and it might be necessary to retry.
869 */
870 INP_INFO_RLOCK(&V_udbinfo);
871 xig.xig_gen = V_udbinfo.ipi_gencnt;
872 xig.xig_sogen = so_gencnt;
873 xig.xig_count = V_udbinfo.ipi_count;
874 INP_INFO_RUNLOCK(&V_udbinfo);
875 error = SYSCTL_OUT(req, &xig, sizeof xig);
876 }
877 free(inp_list, M_TEMP);
878 return (error);
879}
880
881SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
882 CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
883 udp_pcblist, "S,xinpcb", "List of active UDP sockets");
884
885#ifdef INET
886static int
887udp_getcred(SYSCTL_HANDLER_ARGS)
888{
889 struct xucred xuc;
890 struct sockaddr_in addrs[2];
891 struct inpcb *inp;
892 int error;
893
894 error = priv_check(req->td, PRIV_NETINET_GETCRED);
895 if (error)
896 return (error);
897 error = SYSCTL_IN(req, addrs, sizeof(addrs));
898 if (error)
899 return (error);
900 inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
901 addrs[0].sin_addr, addrs[0].sin_port,
902 INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
903 if (inp != NULL) {
904 INP_RLOCK_ASSERT(inp);
905 if (inp->inp_socket == NULL)
906 error = ENOENT;
907 if (error == 0)
908 error = cr_canseeinpcb(req->td->td_ucred, inp);
909 if (error == 0)
910 cru2x(inp->inp_cred, &xuc);
911 INP_RUNLOCK(inp);
912 } else
913 error = ENOENT;
914 if (error == 0)
915 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
916 return (error);
917}
918
919SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
920 CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
921 udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
922#endif /* INET */
923
924int
925udp_ctloutput(struct socket *so, struct sockopt *sopt)
926{
927 struct inpcb *inp;
928 struct udpcb *up;
929 int isudplite, error, optval;
930
931 error = 0;
932 isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
933 inp = sotoinpcb(so);
934 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
935 INP_WLOCK(inp);
936 if (sopt->sopt_level != so->so_proto->pr_protocol) {
937#ifdef INET6
938 if (INP_CHECK_SOCKAF(so, AF_INET6)) {
939 INP_WUNLOCK(inp);
940 error = ip6_ctloutput(so, sopt);
941 }
942#endif
943#if defined(INET) && defined(INET6)
944 else
945#endif
946#ifdef INET
947 {
948 INP_WUNLOCK(inp);
949 error = ip_ctloutput(so, sopt);
950 }
951#endif
952 return (error);
953 }
954
955 switch (sopt->sopt_dir) {
956 case SOPT_SET:
957 switch (sopt->sopt_name) {
958 case UDP_ENCAP:
959 INP_WUNLOCK(inp);
960 error = sooptcopyin(sopt, &optval, sizeof optval,
961 sizeof optval);
962 if (error)
963 break;
964 inp = sotoinpcb(so);
965 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
966 INP_WLOCK(inp);
967#ifdef IPSEC_NAT_T
968 up = intoudpcb(inp);
969 KASSERT(up != NULL, ("%s: up == NULL", __func__));
970#endif
971 switch (optval) {
972 case 0:
973 /* Clear all UDP encap. */
974#ifdef IPSEC_NAT_T
975 up->u_flags &= ~UF_ESPINUDP_ALL;
976#endif
977 break;
978#ifdef IPSEC_NAT_T
979 case UDP_ENCAP_ESPINUDP:
980 case UDP_ENCAP_ESPINUDP_NON_IKE:
981 up->u_flags &= ~UF_ESPINUDP_ALL;
982 if (optval == UDP_ENCAP_ESPINUDP)
983 up->u_flags |= UF_ESPINUDP;
984 else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
985 up->u_flags |= UF_ESPINUDP_NON_IKE;
986 break;
987#endif
988 default:
989 error = EINVAL;
990 break;
991 }
992 INP_WUNLOCK(inp);
993 break;
994 case UDPLITE_SEND_CSCOV:
995 case UDPLITE_RECV_CSCOV:
996 if (!isudplite) {
997 INP_WUNLOCK(inp);
998 error = ENOPROTOOPT;
999 break;
1000 }
1001 INP_WUNLOCK(inp);
1002 error = sooptcopyin(sopt, &optval, sizeof(optval),
1003 sizeof(optval));
1004 if (error != 0)
1005 break;
1006 inp = sotoinpcb(so);
1007 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1008 INP_WLOCK(inp);
1009 up = intoudpcb(inp);
1010 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1011 if (optval != 0 && optval < 8) {
1012 INP_WUNLOCK(inp);
1013 error = EINVAL;
1014 break;
1015 }
1016 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1017 up->u_txcslen = optval;
1018 else
1019 up->u_rxcslen = optval;
1020 INP_WUNLOCK(inp);
1021 break;
1022 default:
1023 INP_WUNLOCK(inp);
1024 error = ENOPROTOOPT;
1025 break;
1026 }
1027 break;
1028 case SOPT_GET:
1029 switch (sopt->sopt_name) {
1030#ifdef IPSEC_NAT_T
1031 case UDP_ENCAP:
1032 up = intoudpcb(inp);
1033 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1034 optval = up->u_flags & UF_ESPINUDP_ALL;
1035 INP_WUNLOCK(inp);
1036 error = sooptcopyout(sopt, &optval, sizeof optval);
1037 break;
1038#endif
1039 case UDPLITE_SEND_CSCOV:
1040 case UDPLITE_RECV_CSCOV:
1041 if (!isudplite) {
1042 INP_WUNLOCK(inp);
1043 error = ENOPROTOOPT;
1044 break;
1045 }
1046 up = intoudpcb(inp);
1047 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1048 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1049 optval = up->u_txcslen;
1050 else
1051 optval = up->u_rxcslen;
1052 INP_WUNLOCK(inp);
1053 error = sooptcopyout(sopt, &optval, sizeof(optval));
1054 break;
1055 default:
1056 INP_WUNLOCK(inp);
1057 error = ENOPROTOOPT;
1058 break;
1059 }
1060 break;
1061 }
1062 return (error);
1063}
1064
1065#ifdef INET
1066#define UH_WLOCKED 2
1067#define UH_RLOCKED 1
1068#define UH_UNLOCKED 0
1069static int
1070udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1071 struct mbuf *control, struct thread *td)
1072{
1073 struct udpiphdr *ui;
1074 int len = m->m_pkthdr.len;
1075 struct in_addr faddr, laddr;
1076 struct cmsghdr *cm;
1077 struct inpcbinfo *pcbinfo;
1078 struct sockaddr_in *sin, src;
1079 int cscov_partial = 0;
1080 int error = 0;
1081 int ipflags;
1082 u_short fport, lport;
1083 int unlock_udbinfo;
1084 u_char tos;
1085 uint8_t pr;
1086 uint16_t cscov = 0;
1087
1088 /*
1089 * udp_output() may need to temporarily bind or connect the current
1090 * inpcb. As such, we don't know up front whether we will need the
1091 * pcbinfo lock or not. Do any work to decide what is needed up
1092 * front before acquiring any locks.
1093 */
1094 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1095 if (control)
1096 m_freem(control);
1097 m_freem(m);
1098 return (EMSGSIZE);
1099 }
1100
1101 src.sin_family = 0;
1102 INP_RLOCK(inp);
1103 tos = inp->inp_ip_tos;
1104 if (control != NULL) {
1105 /*
1106 * XXX: Currently, we assume all the optional information is
1107 * stored in a single mbuf.
1108 */
1109 if (control->m_next) {
1110 INP_RUNLOCK(inp);
1111 m_freem(control);
1112 m_freem(m);
1113 return (EINVAL);
1114 }
1115 for (; control->m_len > 0;
1116 control->m_data += CMSG_ALIGN(cm->cmsg_len),
1117 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
1118 cm = mtod(control, struct cmsghdr *);
1119 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
1120 || cm->cmsg_len > control->m_len) {
1121 error = EINVAL;
1122 break;
1123 }
1124 if (cm->cmsg_level != IPPROTO_IP)
1125 continue;
1126
1127 switch (cm->cmsg_type) {
1128 case IP_SENDSRCADDR:
1129 if (cm->cmsg_len !=
1130 CMSG_LEN(sizeof(struct in_addr))) {
1131 error = EINVAL;
1132 break;
1133 }
1134 bzero(&src, sizeof(src));
1135 src.sin_family = AF_INET;
1136 src.sin_len = sizeof(src);
1137 src.sin_port = inp->inp_lport;
1138 src.sin_addr =
1139 *(struct in_addr *)CMSG_DATA(cm);
1140 break;
1141
1142 case IP_TOS:
1143 if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
1144 error = EINVAL;
1145 break;
1146 }
1147 tos = *(u_char *)CMSG_DATA(cm);
1148 break;
1149
1150 default:
1151 error = ENOPROTOOPT;
1152 break;
1153 }
1154 if (error)
1155 break;
1156 }
1157 m_freem(control);
1158 }
1159 if (error) {
1160 INP_RUNLOCK(inp);
1161 m_freem(m);
1162 return (error);
1163 }
1164
1165 /*
1166 * Depending on whether or not the application has bound or connected
1167 * the socket, we may have to do varying levels of work. The optimal
1168 * case is for a connected UDP socket, as a global lock isn't
1169 * required at all.
1170 *
1171 * In order to decide which we need, we require stability of the
1172 * inpcb binding, which we ensure by acquiring a read lock on the
1173 * inpcb. This doesn't strictly follow the lock order, so we play
1174 * the trylock and retry game; note that we may end up with more
1175 * conservative locks than required the second time around, so later
1176 * assertions have to accept that. Further analysis of the number of
1177 * misses under contention is required.
1178 *
1179 * XXXRW: Check that hash locking update here is correct.
1180 */
1181 pr = inp->inp_socket->so_proto->pr_protocol;
1182 pcbinfo = get_inpcbinfo(pr);
1183 sin = (struct sockaddr_in *)addr;
1184 if (sin != NULL &&
1185 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
1186 INP_RUNLOCK(inp);
1187 INP_WLOCK(inp);
1188 INP_HASH_WLOCK(pcbinfo);
1189 unlock_udbinfo = UH_WLOCKED;
1190 } else if ((sin != NULL && (
1191 (sin->sin_addr.s_addr == INADDR_ANY) ||
1192 (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
1193 (inp->inp_laddr.s_addr == INADDR_ANY) ||
1194 (inp->inp_lport == 0))) ||
1195 (src.sin_family == AF_INET)) {
1196 INP_HASH_RLOCK(pcbinfo);
1197 unlock_udbinfo = UH_RLOCKED;
1198 } else
1199 unlock_udbinfo = UH_UNLOCKED;
1200
1201 /*
1202 * If the IP_SENDSRCADDR control message was specified, override the
1203 * source address for this datagram. Its use is invalidated if the
1204 * address thus specified is incomplete or clobbers other inpcbs.
1205 */
1206 laddr = inp->inp_laddr;
1207 lport = inp->inp_lport;
1208 if (src.sin_family == AF_INET) {
1209 INP_HASH_LOCK_ASSERT(pcbinfo);
1210 if ((lport == 0) ||
1211 (laddr.s_addr == INADDR_ANY &&
1212 src.sin_addr.s_addr == INADDR_ANY)) {
1213 error = EINVAL;
1214 goto release;
1215 }
1216 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
1217 &laddr.s_addr, &lport, td->td_ucred);
1218 if (error)
1219 goto release;
1220 }
1221
1222 /*
1223 * If a UDP socket has been connected, then a local address/port will
1224 * have been selected and bound.
1225 *
1226 * If a UDP socket has not been connected to, then an explicit
1227 * destination address must be used, in which case a local
1228 * address/port may not have been selected and bound.
1229 */
1230 if (sin != NULL) {
1231 INP_LOCK_ASSERT(inp);
1232 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1233 error = EISCONN;
1234 goto release;
1235 }
1236
1237 /*
1238 * Jail may rewrite the destination address, so let it do
1239 * that before we use it.
1240 */
1241 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1242 if (error)
1243 goto release;
1244
1245 /*
1246 * If a local address or port hasn't yet been selected, or if
1247 * the destination address needs to be rewritten due to using
1248 * a special INADDR_ constant, invoke in_pcbconnect_setup()
1249 * to do the heavy lifting. Once a port is selected, we
1250 * commit the binding back to the socket; we also commit the
1251 * binding of the address if in jail.
1252 *
1253 * If we already have a valid binding and we're not
1254 * requesting a destination address rewrite, use a fast path.
1255 */
1256 if (inp->inp_laddr.s_addr == INADDR_ANY ||
1257 inp->inp_lport == 0 ||
1258 sin->sin_addr.s_addr == INADDR_ANY ||
1259 sin->sin_addr.s_addr == INADDR_BROADCAST) {
1260 INP_HASH_LOCK_ASSERT(pcbinfo);
1261 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
1262 &lport, &faddr.s_addr, &fport, NULL,
1263 td->td_ucred);
1264 if (error)
1265 goto release;
1266
1267 /*
1268 * XXXRW: Why not commit the port if the address is
1269 * !INADDR_ANY?
1270 */
1271 /* Commit the local port if newly assigned. */
1272 if (inp->inp_laddr.s_addr == INADDR_ANY &&
1273 inp->inp_lport == 0) {
1274 INP_WLOCK_ASSERT(inp);
1275 INP_HASH_WLOCK_ASSERT(pcbinfo);
1276 /*
1277 * Remember addr if jailed, to prevent
1278 * rebinding.
1279 */
1280 if (prison_flag(td->td_ucred, PR_IP4))
1281 inp->inp_laddr = laddr;
1282 inp->inp_lport = lport;
1283 if (in_pcbinshash(inp) != 0) {
1284 inp->inp_lport = 0;
1285 error = EAGAIN;
1286 goto release;
1287 }
1288 inp->inp_flags |= INP_ANONPORT;
1289 }
1290 } else {
1291 faddr = sin->sin_addr;
1292 fport = sin->sin_port;
1293 }
1294 } else {
1295 INP_LOCK_ASSERT(inp);
1296 faddr = inp->inp_faddr;
1297 fport = inp->inp_fport;
1298 if (faddr.s_addr == INADDR_ANY) {
1299 error = ENOTCONN;
1300 goto release;
1301 }
1302 }
1303
1304 /*
1305 * Calculate data length and get a mbuf for UDP, IP, and possible
1306 * link-layer headers. Immediate slide the data pointer back forward
1307 * since we won't use that space at this layer.
1308 */
1309 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
1310 if (m == NULL) {
1311 error = ENOBUFS;
1312 goto release;
1313 }
1314 m->m_data += max_linkhdr;
1315 m->m_len -= max_linkhdr;
1316 m->m_pkthdr.len -= max_linkhdr;
1317
1318 /*
1319 * Fill in mbuf with extended UDP header and addresses and length put
1320 * into network format.
1321 */
1322 ui = mtod(m, struct udpiphdr *);
1323 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
1324 ui->ui_pr = pr;
1325 ui->ui_src = laddr;
1326 ui->ui_dst = faddr;
1327 ui->ui_sport = lport;
1328 ui->ui_dport = fport;
1329 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1330 if (pr == IPPROTO_UDPLITE) {
1331 struct udpcb *up;
1332 uint16_t plen;
1333
1334 up = intoudpcb(inp);
1335 cscov = up->u_txcslen;
1336 plen = (u_short)len + sizeof(struct udphdr);
1337 if (cscov >= plen)
1338 cscov = 0;
1339 ui->ui_len = htons(plen);
1340 ui->ui_ulen = htons(cscov);
1341 /*
1342 * For UDP-Lite, checksum coverage length of zero means
1343 * the entire UDPLite packet is covered by the checksum.
1344 */
1345 cscov_partial = (cscov == 0) ? 0 : 1;
1346 } else
1347 ui->ui_v = IPVERSION << 4;
1348
1349 /*
1350 * Set the Don't Fragment bit in the IP header.
1351 */
1352 if (inp->inp_flags & INP_DONTFRAG) {
1353 struct ip *ip;
1354
1355 ip = (struct ip *)&ui->ui_i;
1356 ip->ip_off |= htons(IP_DF);
1357 }
1358
1359 ipflags = 0;
1360 if (inp->inp_socket->so_options & SO_DONTROUTE)
1361 ipflags |= IP_ROUTETOIF;
1362 if (inp->inp_socket->so_options & SO_BROADCAST)
1363 ipflags |= IP_ALLOWBROADCAST;
1364 if (inp->inp_flags & INP_ONESBCAST)
1365 ipflags |= IP_SENDONES;
1366
1367#ifdef MAC
1368 mac_inpcb_create_mbuf(inp, m);
1369#endif
1370
1371 /*
1372 * Set up checksum and output datagram.
1373 */
1374 ui->ui_sum = 0;
1375 if (pr == IPPROTO_UDPLITE) {
1376 if (inp->inp_flags & INP_ONESBCAST)
1377 faddr.s_addr = INADDR_BROADCAST;
1378 if (cscov_partial) {
1379 if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
1380 ui->ui_sum = 0xffff;
1381 } else {
1382 if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
1383 ui->ui_sum = 0xffff;
1384 }
1385 } else if (V_udp_cksum) {
1386 if (inp->inp_flags & INP_ONESBCAST)
1387 faddr.s_addr = INADDR_BROADCAST;
1388 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
1389 htons((u_short)len + sizeof(struct udphdr) + pr));
1390 m->m_pkthdr.csum_flags = CSUM_UDP;
1391 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1392 }
1393 ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
1394 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
1395 ((struct ip *)ui)->ip_tos = tos; /* XXX */
1396 UDPSTAT_INC(udps_opackets);
1397
1398 if (unlock_udbinfo == UH_WLOCKED)
1399 INP_HASH_WUNLOCK(pcbinfo);
1400 else if (unlock_udbinfo == UH_RLOCKED)
1401 INP_HASH_RUNLOCK(pcbinfo);
1402 UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
1403 error = ip_output(m, inp->inp_options, NULL, ipflags,
1404 inp->inp_moptions, inp);
1405 if (unlock_udbinfo == UH_WLOCKED)
1406 INP_WUNLOCK(inp);
1407 else
1408 INP_RUNLOCK(inp);
1409 return (error);
1410
1411release:
1412 if (unlock_udbinfo == UH_WLOCKED) {
1413 INP_HASH_WUNLOCK(pcbinfo);
1414 INP_WUNLOCK(inp);
1415 } else if (unlock_udbinfo == UH_RLOCKED) {
1416 INP_HASH_RUNLOCK(pcbinfo);
1417 INP_RUNLOCK(inp);
1418 } else
1419 INP_RUNLOCK(inp);
1420 m_freem(m);
1421 return (error);
1422}
1423
1424
1425#if defined(IPSEC) && defined(IPSEC_NAT_T)
1426/*
1427 * Potentially decap ESP in UDP frame. Check for an ESP header
1428 * and optional marker; if present, strip the UDP header and
1429 * push the result through IPSec.
1430 *
1431 * Returns mbuf to be processed (potentially re-allocated) or
1432 * NULL if consumed and/or processed.
1433 */
1434static struct mbuf *
1435udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
1436{
1437 size_t minlen, payload, skip, iphlen;
1438 caddr_t data;
1439 struct udpcb *up;
1440 struct m_tag *tag;
1441 struct udphdr *udphdr;
1442 struct ip *ip;
1443
1444 INP_RLOCK_ASSERT(inp);
1445
1446 /*
1447 * Pull up data so the longest case is contiguous:
1448 * IP/UDP hdr + non ESP marker + ESP hdr.
1449 */
1450 minlen = off + sizeof(uint64_t) + sizeof(struct esp);
1451 if (minlen > m->m_pkthdr.len)
1452 minlen = m->m_pkthdr.len;
1453 if ((m = m_pullup(m, minlen)) == NULL) {
1454 IPSECSTAT_INC(ips_in_inval);
1455 return (NULL); /* Bypass caller processing. */
1456 }
1457 data = mtod(m, caddr_t); /* Points to ip header. */
1458 payload = m->m_len - off; /* Size of payload. */
1459
1460 if (payload == 1 && data[off] == '\xff')
1461 return (m); /* NB: keepalive packet, no decap. */
1462
1463 up = intoudpcb(inp);
1464 KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
1465 KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
1466 ("u_flags 0x%x", up->u_flags));
1467
1468 /*
1469 * Check that the payload is large enough to hold an
1470 * ESP header and compute the amount of data to remove.
1471 *
1472 * NB: the caller has already done a pullup for us.
1473 * XXX can we assume alignment and eliminate bcopys?
1474 */
1475 if (up->u_flags & UF_ESPINUDP_NON_IKE) {
1476 /*
1477 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
1478 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
1479 * possible AH mode non-IKE marker+non-ESP marker
1480 * from draft-ietf-ipsec-udp-encaps-00.txt.
1481 */
1482 uint64_t marker;
1483
1484 if (payload <= sizeof(uint64_t) + sizeof(struct esp))
1485 return (m); /* NB: no decap. */
1486 bcopy(data + off, &marker, sizeof(uint64_t));
1487 if (marker != 0) /* Non-IKE marker. */
1488 return (m); /* NB: no decap. */
1489 skip = sizeof(uint64_t) + sizeof(struct udphdr);
1490 } else {
1491 uint32_t spi;
1492
1493 if (payload <= sizeof(struct esp)) {
1494 IPSECSTAT_INC(ips_in_inval);
1495 m_freem(m);
1496 return (NULL); /* Discard. */
1497 }
1498 bcopy(data + off, &spi, sizeof(uint32_t));
1499 if (spi == 0) /* Non-ESP marker. */
1500 return (m); /* NB: no decap. */
1501 skip = sizeof(struct udphdr);
1502 }
1503
1504 /*
1505 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1506 * the UDP ports. This is required if we want to select
1507 * the right SPD for multiple hosts behind same NAT.
1508 *
1509 * NB: ports are maintained in network byte order everywhere
1510 * in the NAT-T code.
1511 */
1512 tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
1513 2 * sizeof(uint16_t), M_NOWAIT);
1514 if (tag == NULL) {
1515 IPSECSTAT_INC(ips_in_nomem);
1516 m_freem(m);
1517 return (NULL); /* Discard. */
1518 }
1519 iphlen = off - sizeof(struct udphdr);
1520 udphdr = (struct udphdr *)(data + iphlen);
1521 ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
1522 ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
1523 m_tag_prepend(m, tag);
1524
1525 /*
1526 * Remove the UDP header (and possibly the non ESP marker)
1527 * IP header length is iphlen
1528 * Before:
1529 * <--- off --->
1530 * +----+------+-----+
1531 * | IP | UDP | ESP |
1532 * +----+------+-----+
1533 * <-skip->
1534 * After:
1535 * +----+-----+
1536 * | IP | ESP |
1537 * +----+-----+
1538 * <-skip->
1539 */
1540 ovbcopy(data, data + skip, iphlen);
1541 m_adj(m, skip);
1542
1543 ip = mtod(m, struct ip *);
1544 ip->ip_len = htons(ntohs(ip->ip_len) - skip);
1545 ip->ip_p = IPPROTO_ESP;
1546
1547 /*
1548 * We cannot yet update the cksums so clear any
1549 * h/w cksum flags as they are no longer valid.
1550 */
1551 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
1552 m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
1553
1554 (void) ipsec4_common_input(m, iphlen, ip->ip_p);
1555 return (NULL); /* NB: consumed, bypass processing. */
1556}
1557#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
1558
1559static void
1560udp_abort(struct socket *so)
1561{
1562 struct inpcb *inp;
1563 struct inpcbinfo *pcbinfo;
1564
1565 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1566 inp = sotoinpcb(so);
1567 KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
1568 INP_WLOCK(inp);
1569 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1570 INP_HASH_WLOCK(pcbinfo);
1571 in_pcbdisconnect(inp);
1572 inp->inp_laddr.s_addr = INADDR_ANY;
1573 INP_HASH_WUNLOCK(pcbinfo);
1574 soisdisconnected(so);
1575 }
1576 INP_WUNLOCK(inp);
1577}
1578
1579static int
1580udp_attach(struct socket *so, int proto, struct thread *td)
1581{
1582 struct inpcb *inp;
1583 struct inpcbinfo *pcbinfo;
1584 int error;
1585
1586 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1587 inp = sotoinpcb(so);
1588 KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
1589 error = soreserve(so, udp_sendspace, udp_recvspace);
1590 if (error)
1591 return (error);
1592 INP_INFO_WLOCK(pcbinfo);
1593 error = in_pcballoc(so, pcbinfo);
1594 if (error) {
1595 INP_INFO_WUNLOCK(pcbinfo);
1596 return (error);
1597 }
1598
1599 inp = sotoinpcb(so);
1600 inp->inp_vflag |= INP_IPV4;
1601 inp->inp_ip_ttl = V_ip_defttl;
1602
1603 error = udp_newudpcb(inp);
1604 if (error) {
1605 in_pcbdetach(inp);
1606 in_pcbfree(inp);
1607 INP_INFO_WUNLOCK(pcbinfo);
1608 return (error);
1609 }
1610
1611 INP_WUNLOCK(inp);
1612 INP_INFO_WUNLOCK(pcbinfo);
1613 return (0);
1614}
1615#endif /* INET */
1616
1617int
1618udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
1619{
1620 struct inpcb *inp;
1621 struct udpcb *up;
1622
1623 KASSERT(so->so_type == SOCK_DGRAM,
1624 ("udp_set_kernel_tunneling: !dgram"));
1625 inp = sotoinpcb(so);
1626 KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
1627 INP_WLOCK(inp);
1628 up = intoudpcb(inp);
1629 if (up->u_tun_func != NULL) {
1630 INP_WUNLOCK(inp);
1631 return (EBUSY);
1632 }
1633 up->u_tun_func = f;
1634 INP_WUNLOCK(inp);
1635 return (0);
1636}
1637
1638#ifdef INET
1639static int
1640udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1641{
1642 struct inpcb *inp;
1643 struct inpcbinfo *pcbinfo;
1644 int error;
1645
1646 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1647 inp = sotoinpcb(so);
1648 KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1649 INP_WLOCK(inp);
1650 INP_HASH_WLOCK(pcbinfo);
1651 error = in_pcbbind(inp, nam, td->td_ucred);
1652 INP_HASH_WUNLOCK(pcbinfo);
1653 INP_WUNLOCK(inp);
1654 return (error);
1655}
1656
1657static void
1658udp_close(struct socket *so)
1659{
1660 struct inpcb *inp;
1661 struct inpcbinfo *pcbinfo;
1662
1663 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1664 inp = sotoinpcb(so);
1665 KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1666 INP_WLOCK(inp);
1667 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1668 INP_HASH_WLOCK(pcbinfo);
1669 in_pcbdisconnect(inp);
1670 inp->inp_laddr.s_addr = INADDR_ANY;
1671 INP_HASH_WUNLOCK(pcbinfo);
1672 soisdisconnected(so);
1673 }
1674 INP_WUNLOCK(inp);
1675}
1676
1677static int
1678udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1679{
1680 struct inpcb *inp;
1681 struct inpcbinfo *pcbinfo;
1682 struct sockaddr_in *sin;
1683 int error;
1684
1685 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1686 inp = sotoinpcb(so);
1687 KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1688 INP_WLOCK(inp);
1689 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1690 INP_WUNLOCK(inp);
1691 return (EISCONN);
1692 }
1693 sin = (struct sockaddr_in *)nam;
1694 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1695 if (error != 0) {
1696 INP_WUNLOCK(inp);
1697 return (error);
1698 }
1699 INP_HASH_WLOCK(pcbinfo);
1700 error = in_pcbconnect(inp, nam, td->td_ucred);
1701 INP_HASH_WUNLOCK(pcbinfo);
1702 if (error == 0)
1703 soisconnected(so);
1704 INP_WUNLOCK(inp);
1705 return (error);
1706}
1707
1708static void
1709udp_detach(struct socket *so)
1710{
1711 struct inpcb *inp;
1712 struct inpcbinfo *pcbinfo;
1713 struct udpcb *up;
1714
1715 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1716 inp = sotoinpcb(so);
1717 KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1718 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1719 ("udp_detach: not disconnected"));
1720 INP_INFO_WLOCK(pcbinfo);
1721 INP_WLOCK(inp);
1722 up = intoudpcb(inp);
1723 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1724 inp->inp_ppcb = NULL;
1725 in_pcbdetach(inp);
1726 in_pcbfree(inp);
1727 INP_INFO_WUNLOCK(pcbinfo);
1728 udp_discardcb(up);
1729}
1730
1731static int
1732udp_disconnect(struct socket *so)
1733{
1734 struct inpcb *inp;
1735 struct inpcbinfo *pcbinfo;
1736
1737 pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
1738 inp = sotoinpcb(so);
1739 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1740 INP_WLOCK(inp);
1741 if (inp->inp_faddr.s_addr == INADDR_ANY) {
1742 INP_WUNLOCK(inp);
1743 return (ENOTCONN);
1744 }
1745 INP_HASH_WLOCK(pcbinfo);
1746 in_pcbdisconnect(inp);
1747 inp->inp_laddr.s_addr = INADDR_ANY;
1748 INP_HASH_WUNLOCK(pcbinfo);
1749 SOCK_LOCK(so);
1750 so->so_state &= ~SS_ISCONNECTED; /* XXX */
1751 SOCK_UNLOCK(so);
1752 INP_WUNLOCK(inp);
1753 return (0);
1754}
1755
1756static int
1757udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1758 struct mbuf *control, struct thread *td)
1759{
1760 struct inpcb *inp;
1761
1762 inp = sotoinpcb(so);
1763 KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1764 return (udp_output(inp, m, addr, control, td));
1765}
1766#endif /* INET */
1767
1768int
1769udp_shutdown(struct socket *so)
1770{
1771 struct inpcb *inp;
1772
1773 inp = sotoinpcb(so);
1774 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1775 INP_WLOCK(inp);
1776 socantsendmore(so);
1777 INP_WUNLOCK(inp);
1778 return (0);
1779}
1780
1781#ifdef INET
1782struct pr_usrreqs udp_usrreqs = {
1783 .pru_abort = udp_abort,
1784 .pru_attach = udp_attach,
1785 .pru_bind = udp_bind,
1786 .pru_connect = udp_connect,
1787 .pru_control = in_control,
1788 .pru_detach = udp_detach,
1789 .pru_disconnect = udp_disconnect,
1790 .pru_peeraddr = in_getpeeraddr,
1791 .pru_send = udp_send,
1792 .pru_soreceive = soreceive_dgram,
1793 .pru_sosend = sosend_dgram,
1794 .pru_shutdown = udp_shutdown,
1795 .pru_sockaddr = in_getsockaddr,
1796 .pru_sosetlabel = in_pcbsosetlabel,
1797 .pru_close = udp_close,
1798};
1799#endif /* INET */