1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
61 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.13 2001/08/08 18:59:54 ghelmer Exp $
62 */
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/kernel.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/domain.h>
70#include <sys/protosw.h>
71#include <sys/socket.h>
72#include <sys/socketvar.h>
73#include <sys/sysctl.h>
74#include <sys/syslog.h>
75#include <sys/mcache.h>
76#include <net/ntstat.h>
77
78#include <kern/zalloc.h>
79#include <mach/boolean.h>
80
81#include <net/if.h>
82#include <net/if_types.h>
83#include <net/route.h>
84#include <net/dlil.h>
85
86#include <netinet/in.h>
87#include <netinet/in_systm.h>
88#include <netinet/ip.h>
89#if INET6
90#include <netinet/ip6.h>
91#endif /* INET6 */
92#include <netinet/in_pcb.h>
93#include <netinet/in_var.h>
94#include <netinet/ip_var.h>
95#if INET6
96#include <netinet6/in6_pcb.h>
97#include <netinet6/ip6_var.h>
98#include <netinet6/udp6_var.h>
99#endif /* INET6 */
100#include <netinet/ip_icmp.h>
101#include <netinet/icmp_var.h>
102#include <netinet/udp.h>
103#include <netinet/udp_var.h>
104#include <sys/kdebug.h>
105
106#if IPSEC
107#include <netinet6/ipsec.h>
108#include <netinet6/esp.h>
109extern int ipsec_bypass;
110extern int esp_udp_encap_port;
111#endif /* IPSEC */
112
113#if NECP
114#include <net/necp.h>
115#endif /* NECP */
116
117#define	DBG_LAYER_IN_BEG	NETDBG_CODE(DBG_NETUDP, 0)
118#define	DBG_LAYER_IN_END	NETDBG_CODE(DBG_NETUDP, 2)
119#define	DBG_LAYER_OUT_BEG	NETDBG_CODE(DBG_NETUDP, 1)
120#define	DBG_LAYER_OUT_END	NETDBG_CODE(DBG_NETUDP, 3)
121#define	DBG_FNC_UDP_INPUT	NETDBG_CODE(DBG_NETUDP, (5 << 8))
122#define	DBG_FNC_UDP_OUTPUT	NETDBG_CODE(DBG_NETUDP, (6 << 8) | 1)
123
124/*
125 * UDP protocol implementation.
126 * Per RFC 768, August, 1980.
127 */
128#ifndef	COMPAT_42
129static int udpcksum = 1;
130#else
131static int udpcksum = 0;		/* XXX */
132#endif
133SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum,
134    CTLFLAG_RW | CTLFLAG_LOCKED, &udpcksum, 0, "");
135
136int udp_log_in_vain = 0;
137SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED,
138    &udp_log_in_vain, 0, "Log all incoming UDP packets");
139
140static int blackhole = 0;
141SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED,
142    &blackhole, 0, "Do not send port unreachables for refused connects");
143
144struct inpcbhead udb;		/* from udp_var.h */
145#define	udb6	udb  /* for KAME src sync over BSD*'s */
146struct inpcbinfo udbinfo;
147
148#ifndef UDBHASHSIZE
149#define UDBHASHSIZE 16
150#endif
151
152/* Garbage collection performed during most recent udp_gc() run */
153static boolean_t udp_gc_done = FALSE;
154
155#if IPFIREWALL
156extern int fw_verbose;
157extern void ipfwsyslog( int level, const char *format,...);
158extern void ipfw_stealth_stats_incr_udp(void);
159
160/* Apple logging, log to ipfw.log */
161#define log_in_vain_log(a) {						\
162	if ((udp_log_in_vain == 3) && (fw_verbose == 2)) {		\
163		ipfwsyslog a;						\
164	} else if ((udp_log_in_vain == 4) && (fw_verbose == 2)) {       \
165	        ipfw_stealth_stats_incr_udp();				\
166	} else {							\
167		log a;							\
168	}								\
169}
170#else /* !IPFIREWALL */
171#define log_in_vain_log( a ) { log a; }
172#endif /* !IPFIREWALL */
173
174static int udp_getstat SYSCTL_HANDLER_ARGS;
175struct	udpstat udpstat;	/* from udp_var.h */
176SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats,
177    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
178    0, 0, udp_getstat, "S,udpstat",
179    "UDP statistics (struct udpstat, netinet/udp_var.h)");
180
181SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount,
182    CTLFLAG_RD | CTLFLAG_LOCKED, &udbinfo.ipi_count, 0,
183    "Number of active PCBs");
184
185__private_extern__ int udp_use_randomport = 1;
186SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports,
187    CTLFLAG_RW | CTLFLAG_LOCKED, &udp_use_randomport, 0,
188    "Randomize UDP port numbers");
189
190#if INET6
191struct udp_in6 {
192	struct sockaddr_in6	uin6_sin;
193	u_char			uin6_init_done : 1;
194};
195struct udp_ip6 {
196	struct ip6_hdr		uip6_ip6;
197	u_char			uip6_init_done : 1;
198};
199
200static int udp_abort(struct socket *);
201static int udp_attach(struct socket *, int, struct proc *);
202static int udp_bind(struct socket *, struct sockaddr *, struct proc *);
203static int udp_connect(struct socket *, struct sockaddr *, struct proc *);
204static int udp_connectx(struct socket *, struct sockaddr_list **,
205    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
206    uint32_t, void *, uint32_t);
207static int udp_detach(struct socket *);
208static int udp_disconnect(struct socket *);
209static int udp_disconnectx(struct socket *, associd_t, connid_t);
210static int udp_send(struct socket *, int, struct mbuf *, struct sockaddr *,
211    struct mbuf *, struct proc *);
212static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int,
213    struct sockaddr_in *, struct udp_in6 *, struct udp_ip6 *, struct ifnet *);
214#else /* !INET6 */
215static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int,
216    struct sockaddr_in *, struct ifnet *);
217#endif /* !INET6 */
218static int udp_input_checksum(struct mbuf *, struct udphdr *, int, int);
219static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
220    struct mbuf *, struct proc *);
221static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip);
222static void udp_gc(struct inpcbinfo *);
223
224struct pr_usrreqs udp_usrreqs = {
225	.pru_abort =		udp_abort,
226	.pru_attach =		udp_attach,
227	.pru_bind =		udp_bind,
228	.pru_connect =		udp_connect,
229	.pru_connectx =		udp_connectx,
230	.pru_control =		in_control,
231	.pru_detach =		udp_detach,
232	.pru_disconnect =	udp_disconnect,
233	.pru_disconnectx =	udp_disconnectx,
234	.pru_peeraddr =		in_getpeeraddr,
235	.pru_send =		udp_send,
236	.pru_shutdown =		udp_shutdown,
237	.pru_sockaddr =		in_getsockaddr,
238	.pru_sosend =		sosend,
239	.pru_soreceive =	soreceive,
240};
241
242void
243udp_init(struct protosw *pp, struct domain *dp)
244{
245#pragma unused(dp)
246	static int udp_initialized = 0;
247	vm_size_t		str_size;
248	struct inpcbinfo	*pcbinfo;
249
250	VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
251
252	if (udp_initialized)
253		return;
254	udp_initialized = 1;
255
256	LIST_INIT(&udb);
257	udbinfo.ipi_listhead = &udb;
258	udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
259	    &udbinfo.ipi_hashmask);
260	udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB,
261	    &udbinfo.ipi_porthashmask);
262	str_size = (vm_size_t) sizeof (struct inpcb);
263	udbinfo.ipi_zone = zinit(str_size, 80000*str_size, 8192, "udpcb");
264
265	pcbinfo = &udbinfo;
266	/*
267	 * allocate lock group attribute and group for udp pcb mutexes
268	 */
269	pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init();
270	pcbinfo->ipi_lock_grp = lck_grp_alloc_init("udppcb",
271	    pcbinfo->ipi_lock_grp_attr);
272	pcbinfo->ipi_lock_attr = lck_attr_alloc_init();
273	if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp,
274	    pcbinfo->ipi_lock_attr)) == NULL) {
275		panic("%s: unable to allocate PCB lock\n", __func__);
276		/* NOTREACHED */
277	}
278
279	udbinfo.ipi_gc = udp_gc;
280	in_pcbinfo_attach(&udbinfo);
281}
282
283void
284udp_input(struct mbuf *m, int iphlen)
285{
286	struct ip *ip;
287	struct udphdr *uh;
288	struct inpcb *inp;
289	struct mbuf *opts = NULL;
290	int len, isbroadcast;
291	struct ip save_ip;
292	struct sockaddr *append_sa;
293	struct inpcbinfo *pcbinfo = &udbinfo;
294	struct sockaddr_in udp_in;
295	struct ip_moptions *imo = NULL;
296	int foundmembership = 0, ret = 0;
297#if INET6
298	struct udp_in6 udp_in6;
299	struct udp_ip6 udp_ip6;
300#endif /* INET6 */
301	struct ifnet *ifp = m->m_pkthdr.rcvif;
302	boolean_t cell = IFNET_IS_CELLULAR(ifp);
303	boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp));
304	boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp));
305
306	bzero(&udp_in, sizeof (udp_in));
307	udp_in.sin_len = sizeof (struct sockaddr_in);
308	udp_in.sin_family = AF_INET;
309#if INET6
310	bzero(&udp_in6, sizeof (udp_in6));
311	udp_in6.uin6_sin.sin6_len = sizeof (struct sockaddr_in6);
312	udp_in6.uin6_sin.sin6_family = AF_INET6;
313#endif /* INET6 */
314
315	udpstat.udps_ipackets++;
316
317	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_START, 0,0,0,0,0);
318
319	/* Expect 32-bit aligned data pointer on strict-align platforms */
320	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
321
322	/*
323	 * Strip IP options, if any; should skip this,
324	 * make available to user, and use on returned packets,
325	 * but we don't yet have a way to check the checksum
326	 * with options still present.
327	 */
328	if (iphlen > sizeof (struct ip)) {
329		ip_stripoptions(m, (struct mbuf *)0);
330		iphlen = sizeof (struct ip);
331	}
332
333	/*
334	 * Get IP and UDP header together in first mbuf.
335	 */
336	ip = mtod(m, struct ip *);
337	if (m->m_len < iphlen + sizeof (struct udphdr)) {
338		m = m_pullup(m, iphlen + sizeof (struct udphdr));
339		if (m == NULL) {
340			udpstat.udps_hdrops++;
341			KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
342			    0,0,0,0,0);
343			return;
344		}
345		ip = mtod(m, struct ip *);
346	}
347	uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
348
349	/* destination port of 0 is illegal, based on RFC768. */
350	if (uh->uh_dport == 0) {
351		IF_UDP_STATINC(ifp, port0);
352		goto bad;
353	}
354
355	KERNEL_DEBUG(DBG_LAYER_IN_BEG, uh->uh_dport, uh->uh_sport,
356	    ip->ip_src.s_addr, ip->ip_dst.s_addr, uh->uh_ulen);
357
358	/*
359	 * Make mbuf data length reflect UDP length.
360	 * If not enough data to reflect UDP length, drop.
361	 */
362	len = ntohs((u_short)uh->uh_ulen);
363	if (ip->ip_len != len) {
364		if (len > ip->ip_len || len < sizeof (struct udphdr)) {
365			udpstat.udps_badlen++;
366			IF_UDP_STATINC(ifp, badlength);
367			goto bad;
368		}
369		m_adj(m, len - ip->ip_len);
370		/* ip->ip_len = len; */
371	}
372	/*
373	 * Save a copy of the IP header in case we want restore it
374	 * for sending an ICMP error message in response.
375	 */
376	save_ip = *ip;
377
378	/*
379	 * Checksum extended UDP header and data.
380	 */
381	if (udp_input_checksum(m, uh, iphlen, len))
382		goto bad;
383
384	isbroadcast = in_broadcast(ip->ip_dst, ifp);
385
386	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || isbroadcast) {
387		int reuse_sock = 0, mcast_delivered = 0;
388
389		lck_rw_lock_shared(pcbinfo->ipi_lock);
390		/*
391		 * Deliver a multicast or broadcast datagram to *all* sockets
392		 * for which the local and remote addresses and ports match
393		 * those of the incoming datagram.  This allows more than
394		 * one process to receive multi/broadcasts on the same port.
395		 * (This really ought to be done for unicast datagrams as
396		 * well, but that would cause problems with existing
397		 * applications that open both address-specific sockets and
398		 * a wildcard socket listening to the same port -- they would
399		 * end up receiving duplicates of every unicast datagram.
400		 * Those applications open the multiple sockets to overcome an
401		 * inadequacy of the UDP socket interface, but for backwards
402		 * compatibility we avoid the problem here rather than
403		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
404		 */
405
406		/*
407		 * Construct sockaddr format source address.
408		 */
409		udp_in.sin_port = uh->uh_sport;
410		udp_in.sin_addr = ip->ip_src;
411		/*
412		 * Locate pcb(s) for datagram.
413		 * (Algorithm copied from raw_intr().)
414		 */
415#if INET6
416		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
417#endif /* INET6 */
418		LIST_FOREACH(inp, &udb, inp_list) {
419#if IPSEC
420			int skipit;
421#endif /* IPSEC */
422
423			if (inp->inp_socket == NULL)
424				continue;
425			if (inp != sotoinpcb(inp->inp_socket)) {
426				panic("%s: bad so back ptr inp=%p\n",
427				    __func__, inp);
428				/* NOTREACHED */
429			}
430#if INET6
431                        if ((inp->inp_vflag & INP_IPV4) == 0)
432                                continue;
433#endif /* INET6 */
434			if (inp_restricted_recv(inp, ifp))
435				continue;
436
437			if ((inp->inp_moptions == NULL) &&
438			    (ntohl(ip->ip_dst.s_addr) !=
439			    INADDR_ALLHOSTS_GROUP) && (isbroadcast == 0))
440				continue;
441
442			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) ==
443			    WNT_STOPUSING)
444				continue;
445
446			udp_lock(inp->inp_socket, 1, 0);
447
448			if (in_pcb_checkstate(inp, WNT_RELEASE, 1) ==
449			    WNT_STOPUSING) {
450				udp_unlock(inp->inp_socket, 1, 0);
451				continue;
452			}
453
454			if (inp->inp_lport != uh->uh_dport) {
455				udp_unlock(inp->inp_socket, 1, 0);
456				continue;
457			}
458			if (inp->inp_laddr.s_addr != INADDR_ANY) {
459				if (inp->inp_laddr.s_addr !=
460				    ip->ip_dst.s_addr) {
461					udp_unlock(inp->inp_socket, 1, 0);
462					continue;
463				}
464			}
465			if (inp->inp_faddr.s_addr != INADDR_ANY) {
466				if (inp->inp_faddr.s_addr !=
467				    ip->ip_src.s_addr ||
468				    inp->inp_fport != uh->uh_sport) {
469					udp_unlock(inp->inp_socket, 1, 0);
470					continue;
471				}
472			}
473
474			if (isbroadcast == 0 && (ntohl(ip->ip_dst.s_addr) !=
475			    INADDR_ALLHOSTS_GROUP)) {
476				struct sockaddr_in group;
477				int blocked;
478
479				if ((imo = inp->inp_moptions) == NULL) {
480					udp_unlock(inp->inp_socket, 1, 0);
481					continue;
482				}
483				IMO_LOCK(imo);
484
485				bzero(&group, sizeof (struct sockaddr_in));
486				group.sin_len = sizeof (struct sockaddr_in);
487				group.sin_family = AF_INET;
488				group.sin_addr = ip->ip_dst;
489
490				blocked = imo_multi_filter(imo, ifp,
491				    (struct sockaddr *)&group,
492				    (struct sockaddr *)&udp_in);
493				if (blocked == MCAST_PASS)
494					foundmembership = 1;
495
496				IMO_UNLOCK(imo);
497				if (!foundmembership) {
498					udp_unlock(inp->inp_socket, 1, 0);
499					if (blocked == MCAST_NOTSMEMBER ||
500					    blocked == MCAST_MUTED)
501						udpstat.udps_filtermcast++;
502					continue;
503				}
504				foundmembership = 0;
505			}
506
507			reuse_sock = (inp->inp_socket->so_options &
508			    (SO_REUSEPORT|SO_REUSEADDR));
509
510#if NECP
511			skipit = 0;
512			if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
513				/* do not inject data to pcb */
514				skipit = 1;
515			}
516			if (skipit == 0)
517#endif /* NECP */
518			{
519				struct mbuf *n = NULL;
520
521				if (reuse_sock)
522					n = m_copy(m, 0, M_COPYALL);
523#if INET6
524				udp_append(inp, ip, m,
525				    iphlen + sizeof (struct udphdr),
526				    &udp_in, &udp_in6, &udp_ip6, ifp);
527#else /* !INET6 */
528				udp_append(inp, ip, m,
529				    iphlen + sizeof (struct udphdr),
530				    &udp_in, ifp);
531#endif /* !INET6 */
532				mcast_delivered++;
533
534				m = n;
535			}
536			udp_unlock(inp->inp_socket, 1, 0);
537
538			/*
539			 * Don't look for additional matches if this one does
540			 * not have either the SO_REUSEPORT or SO_REUSEADDR
541			 * socket options set.  This heuristic avoids searching
542			 * through all pcbs in the common case of a non-shared
543			 * port.  It assumes that an application will never
544			 * clear these options after setting them.
545			 */
546			if (reuse_sock == 0 || m == NULL)
547				break;
548
549			/*
550			 * Expect 32-bit aligned data pointer on strict-align
551			 * platforms.
552			 */
553			MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
554			/*
555			 * Recompute IP and UDP header pointers for new mbuf
556			 */
557			ip = mtod(m, struct ip *);
558			uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
559		}
560		lck_rw_done(pcbinfo->ipi_lock);
561
562		if (mcast_delivered == 0) {
563			/*
564			 * No matching pcb found; discard datagram.
565			 * (No need to send an ICMP Port Unreachable
566			 * for a broadcast or multicast datgram.)
567			 */
568			udpstat.udps_noportbcast++;
569			IF_UDP_STATINC(ifp, port_unreach);
570			goto bad;
571		}
572
573		/* free the extra copy of mbuf or skipped by IPSec */
574		if (m != NULL)
575			m_freem(m);
576		KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
577		return;
578	}
579
580#if IPSEC
581	/*
582	 * UDP to port 4500 with a payload where the first four bytes are
583	 * not zero is a UDP encapsulated IPSec packet. Packets where
584	 * the payload is one byte and that byte is 0xFF are NAT keepalive
585	 * packets. Decapsulate the ESP packet and carry on with IPSec input
586	 * or discard the NAT keep-alive.
587	 */
588	if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
589	    uh->uh_dport == ntohs((u_short)esp_udp_encap_port)) {
590		int payload_len = len - sizeof (struct udphdr) > 4 ? 4 :
591		    len - sizeof (struct udphdr);
592
593		if (m->m_len < iphlen + sizeof (struct udphdr) + payload_len) {
594			if ((m = m_pullup(m, iphlen + sizeof (struct udphdr) +
595			    payload_len)) == NULL) {
596				udpstat.udps_hdrops++;
597				KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
598				    0,0,0,0,0);
599				return;
600			}
601			/*
602			 * Expect 32-bit aligned data pointer on strict-align
603			 * platforms.
604			 */
605			MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
606
607			ip = mtod(m, struct ip *);
608			uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
609		}
610		/* Check for NAT keepalive packet */
611		if (payload_len == 1 && *(u_int8_t*)
612		    ((caddr_t)uh + sizeof (struct udphdr)) == 0xFF) {
613			m_freem(m);
614			KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
615			    0,0,0,0,0);
616			return;
617		} else if (payload_len == 4 && *(u_int32_t*)(void *)
618		    ((caddr_t)uh + sizeof (struct udphdr)) != 0) {
619			/* UDP encapsulated IPSec packet to pass through NAT */
620			KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
621			    0,0,0,0,0);
622			/* preserve the udp header */
623			esp4_input(m, iphlen + sizeof (struct udphdr));
624			return;
625		}
626	}
627#endif /* IPSEC */
628
629	/*
630	 * Locate pcb for datagram.
631	 */
632	inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
633	    ip->ip_dst, uh->uh_dport, 1, ifp);
634	if (inp == NULL) {
635		IF_UDP_STATINC(ifp, port_unreach);
636
637		if (udp_log_in_vain) {
638			char buf[MAX_IPv4_STR_LEN];
639			char buf2[MAX_IPv4_STR_LEN];
640
641			/* check src and dst address */
642			if (udp_log_in_vain < 3) {
643				log(LOG_INFO, "Connection attempt to "
644				    "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET,
645				        &ip->ip_dst, buf, sizeof (buf)),
646					ntohs(uh->uh_dport), inet_ntop(AF_INET,
647					&ip->ip_src, buf2, sizeof (buf2)),
648					ntohs(uh->uh_sport));
649			} else if (!(m->m_flags & (M_BCAST | M_MCAST)) &&
650			    ip->ip_dst.s_addr != ip->ip_src.s_addr) {
651				log_in_vain_log((LOG_INFO,
652				    "Stealth Mode connection attempt to "
653				    "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET,
654				    &ip->ip_dst, buf, sizeof (buf)),
655				    ntohs(uh->uh_dport), inet_ntop(AF_INET,
656				    &ip->ip_src, buf2, sizeof (buf2)),
657				    ntohs(uh->uh_sport)))
658			}
659		}
660		udpstat.udps_noport++;
661		if (m->m_flags & (M_BCAST | M_MCAST)) {
662			udpstat.udps_noportbcast++;
663			goto bad;
664		}
665#if ICMP_BANDLIM
666		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
667			goto bad;
668#endif /* ICMP_BANDLIM */
669		if (blackhole)
670			if (ifp && ifp->if_type != IFT_LOOP)
671				goto bad;
672		*ip = save_ip;
673		ip->ip_len += iphlen;
674		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
675		KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
676		return;
677	}
678	udp_lock(inp->inp_socket, 1, 0);
679
680	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
681		udp_unlock(inp->inp_socket, 1, 0);
682		IF_UDP_STATINC(ifp, cleanup);
683		goto bad;
684	}
685#if NECP
686	if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
687		udp_unlock(inp->inp_socket, 1, 0);
688		IF_UDP_STATINC(ifp, badipsec);
689		goto bad;
690	}
691#endif /* NECP */
692
693	/*
694	 * Construct sockaddr format source address.
695	 * Stuff source address and datagram in user buffer.
696	 */
697	udp_in.sin_port = uh->uh_sport;
698	udp_in.sin_addr = ip->ip_src;
699	if ((inp->inp_flags & INP_CONTROLOPTS) != 0 ||
700	    (inp->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
701	    (inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
702#if INET6
703		if (inp->inp_vflag & INP_IPV6) {
704			int savedflags;
705
706			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
707			savedflags = inp->inp_flags;
708			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
709			ret = ip6_savecontrol(inp, m, &opts);
710			inp->inp_flags = savedflags;
711		} else
712#endif /* INET6 */
713		{
714			ret = ip_savecontrol(inp, &opts, ip, m);
715		}
716		if (ret != 0) {
717			udp_unlock(inp->inp_socket, 1, 0);
718			goto bad;
719		}
720	}
721	m_adj(m, iphlen + sizeof (struct udphdr));
722
723	KERNEL_DEBUG(DBG_LAYER_IN_END, uh->uh_dport, uh->uh_sport,
724	    save_ip.ip_src.s_addr, save_ip.ip_dst.s_addr, uh->uh_ulen);
725
726#if INET6
727	if (inp->inp_vflag & INP_IPV6) {
728		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
729		append_sa = (struct sockaddr *)&udp_in6.uin6_sin;
730	} else
731#endif /* INET6 */
732	{
733		append_sa = (struct sockaddr *)&udp_in;
734	}
735	if (nstat_collect) {
736		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
737		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, m->m_pkthdr.len);
738	}
739	so_recv_data_stat(inp->inp_socket, m, 0);
740	if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa,
741	    m, opts, NULL) == 0) {
742		udpstat.udps_fullsock++;
743	} else {
744		sorwakeup(inp->inp_socket);
745	}
746	udp_unlock(inp->inp_socket, 1, 0);
747	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
748	return;
749bad:
750	m_freem(m);
751	if (opts)
752		m_freem(opts);
753	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
754}
755
756#if INET6
757static void
758ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip)
759{
760	bzero(ip6, sizeof (*ip6));
761
762	ip6->ip6_vfc = IPV6_VERSION;
763	ip6->ip6_plen = ip->ip_len;
764	ip6->ip6_nxt = ip->ip_p;
765	ip6->ip6_hlim = ip->ip_ttl;
766	if (ip->ip_src.s_addr) {
767		ip6->ip6_src.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
768		ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
769	}
770	if (ip->ip_dst.s_addr) {
771		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
772		ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
773	}
774}
775#endif /* INET6 */
776
777/*
778 * subroutine of udp_input(), mainly for source code readability.
779 */
780static void
781#if INET6
782udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
783    struct sockaddr_in *pudp_in, struct udp_in6 *pudp_in6,
784    struct udp_ip6 *pudp_ip6, struct ifnet *ifp)
785#else /* !INET6 */
786udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
787    struct sockaddr_in *pudp_in, struct ifnet *ifp)
788#endif /* !INET6 */
789{
790	struct sockaddr *append_sa;
791	struct mbuf *opts = 0;
792	boolean_t cell = IFNET_IS_CELLULAR(ifp);
793	boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp));
794	boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp));
795	int ret = 0;
796
797#if CONFIG_MACF_NET
798	if (mac_inpcb_check_deliver(last, n, AF_INET, SOCK_DGRAM) != 0) {
799		m_freem(n);
800		return;
801	}
802#endif /* CONFIG_MACF_NET */
803	if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
804	    (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
805	    (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
806#if INET6
807		if (last->inp_vflag & INP_IPV6) {
808			int savedflags;
809
810			if (pudp_ip6->uip6_init_done == 0) {
811				ip_2_ip6_hdr(&pudp_ip6->uip6_ip6, ip);
812				pudp_ip6->uip6_init_done = 1;
813			}
814			savedflags = last->inp_flags;
815			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
816			ret = ip6_savecontrol(last, n, &opts);
817			if (ret != 0) {
818				last->inp_flags = savedflags;
819				goto error;
820			}
821			last->inp_flags = savedflags;
822		} else
823#endif /* INET6 */
824		{
825			ret = ip_savecontrol(last, &opts, ip, n);
826			if (ret != 0) {
827				goto error;
828			}
829		}
830	}
831#if INET6
832	if (last->inp_vflag & INP_IPV6) {
833		if (pudp_in6->uin6_init_done == 0) {
834			in6_sin_2_v4mapsin6(pudp_in, &pudp_in6->uin6_sin);
835			pudp_in6->uin6_init_done = 1;
836		}
837		append_sa = (struct sockaddr *)&pudp_in6->uin6_sin;
838	} else
839#endif /* INET6 */
840	append_sa = (struct sockaddr *)pudp_in;
841	if (nstat_collect) {
842		INP_ADD_STAT(last, cell, wifi, wired, rxpackets, 1);
843		INP_ADD_STAT(last, cell, wifi, wired, rxbytes,
844		    n->m_pkthdr.len);
845	}
846	so_recv_data_stat(last->inp_socket, n, 0);
847	m_adj(n, off);
848	if (sbappendaddr(&last->inp_socket->so_rcv, append_sa,
849	    n, opts, NULL) == 0) {
850		udpstat.udps_fullsock++;
851	} else {
852		sorwakeup(last->inp_socket);
853	}
854	return;
855error:
856	m_freem(n);
857	m_freem(opts);
858	return;
859}
860
861/*
862 * Notify a udp user of an asynchronous error;
863 * just wake up so that he can collect error status.
864 */
865void
866udp_notify(struct inpcb *inp, int errno)
867{
868	inp->inp_socket->so_error = errno;
869	sorwakeup(inp->inp_socket);
870	sowwakeup(inp->inp_socket);
871}
872
873void
874udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
875{
876	struct ip *ip = vip;
877	void (*notify)(struct inpcb *, int) = udp_notify;
878        struct in_addr faddr;
879	struct inpcb *inp;
880
881	faddr = ((struct sockaddr_in *)(void *)sa)->sin_addr;
882	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
883		return;
884
885	if (PRC_IS_REDIRECT(cmd)) {
886		ip = 0;
887		notify = in_rtchange;
888	} else if (cmd == PRC_HOSTDEAD) {
889		ip = 0;
890	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
891		return;
892	}
893	if (ip) {
894		struct udphdr uh;
895
896		bcopy(((caddr_t)ip + (ip->ip_hl << 2)), &uh, sizeof (uh));
897		inp = in_pcblookup_hash(&udbinfo, faddr, uh.uh_dport,
898                    ip->ip_src, uh.uh_sport, 0, NULL);
899		if (inp != NULL && inp->inp_socket != NULL) {
900			udp_lock(inp->inp_socket, 1, 0);
901			if (in_pcb_checkstate(inp, WNT_RELEASE, 1) ==
902			    WNT_STOPUSING)  {
903				udp_unlock(inp->inp_socket, 1, 0);
904				return;
905			}
906			(*notify)(inp, inetctlerrmap[cmd]);
907			udp_unlock(inp->inp_socket, 1, 0);
908		}
909	} else {
910		in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
911	}
912}
913
914int
915udp_ctloutput(struct socket *so, struct sockopt *sopt)
916{
917	int	error, optval;
918	struct	inpcb *inp;
919
920	/* Allow <SOL_SOCKET,SO_FLUSH> at this level */
921	if (sopt->sopt_level != IPPROTO_UDP &&
922	    !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
923		return (ip_ctloutput(so, sopt));
924
925	error = 0;
926	inp = sotoinpcb(so);
927
928	switch (sopt->sopt_dir) {
929	case SOPT_SET:
930		switch (sopt->sopt_name) {
931		case UDP_NOCKSUM:
932			/* This option is settable only for UDP over IPv4 */
933			if (!(inp->inp_vflag & INP_IPV4)) {
934				error = EINVAL;
935				break;
936			}
937
938			if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
939			    sizeof (optval))) != 0)
940				break;
941
942			if (optval != 0)
943				inp->inp_flags |= INP_UDP_NOCKSUM;
944			else
945				inp->inp_flags &= ~INP_UDP_NOCKSUM;
946			break;
947
948		case SO_FLUSH:
949			if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
950			    sizeof (optval))) != 0)
951				break;
952
953			error = inp_flush(inp, optval);
954			break;
955
956		default:
957			error = ENOPROTOOPT;
958			break;
959		}
960		break;
961
962	case SOPT_GET:
963		switch (sopt->sopt_name) {
964		case UDP_NOCKSUM:
965			optval = inp->inp_flags & INP_UDP_NOCKSUM;
966			break;
967
968		default:
969			error = ENOPROTOOPT;
970			break;
971		}
972		if (error == 0)
973			error = sooptcopyout(sopt, &optval, sizeof (optval));
974		break;
975	}
976	return (error);
977}
978
979static int
980udp_pcblist SYSCTL_HANDLER_ARGS
981{
982#pragma unused(oidp, arg1, arg2)
983	int error, i, n;
984	struct inpcb *inp, **inp_list;
985	inp_gen_t gencnt;
986	struct xinpgen xig;
987
988	/*
989	 * The process of preparing the TCB list is too time-consuming and
990	 * resource-intensive to repeat twice on every request.
991	 */
992	lck_rw_lock_exclusive(udbinfo.ipi_lock);
993	if (req->oldptr == USER_ADDR_NULL) {
994		n = udbinfo.ipi_count;
995		req->oldidx = 2 * (sizeof (xig))
996			+ (n + n/8) * sizeof (struct xinpcb);
997		lck_rw_done(udbinfo.ipi_lock);
998		return (0);
999	}
1000
1001	if (req->newptr != USER_ADDR_NULL) {
1002		lck_rw_done(udbinfo.ipi_lock);
1003		return (EPERM);
1004	}
1005
1006	/*
1007	 * OK, now we're committed to doing something.
1008	 */
1009	gencnt = udbinfo.ipi_gencnt;
1010	n = udbinfo.ipi_count;
1011
1012	bzero(&xig, sizeof (xig));
1013	xig.xig_len = sizeof (xig);
1014	xig.xig_count = n;
1015	xig.xig_gen = gencnt;
1016	xig.xig_sogen = so_gencnt;
1017	error = SYSCTL_OUT(req, &xig, sizeof (xig));
1018	if (error) {
1019		lck_rw_done(udbinfo.ipi_lock);
1020		return (error);
1021	}
1022	/*
1023	 * We are done if there is no pcb
1024	 */
1025	if (n == 0) {
1026		lck_rw_done(udbinfo.ipi_lock);
1027		return (0);
1028	}
1029
1030	inp_list = _MALLOC(n * sizeof (*inp_list), M_TEMP, M_WAITOK);
1031	if (inp_list == 0) {
1032		lck_rw_done(udbinfo.ipi_lock);
1033		return (ENOMEM);
1034	}
1035
1036	for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
1037	     inp = LIST_NEXT(inp, inp_list)) {
1038		if (inp->inp_gencnt <= gencnt &&
1039		    inp->inp_state != INPCB_STATE_DEAD)
1040			inp_list[i++] = inp;
1041	}
1042	n = i;
1043
1044	error = 0;
1045	for (i = 0; i < n; i++) {
1046		inp = inp_list[i];
1047		if (inp->inp_gencnt <= gencnt &&
1048		    inp->inp_state != INPCB_STATE_DEAD) {
1049			struct xinpcb xi;
1050
1051			bzero(&xi, sizeof (xi));
1052			xi.xi_len = sizeof (xi);
1053			/* XXX should avoid extra copy */
1054			inpcb_to_compat(inp, &xi.xi_inp);
1055			if (inp->inp_socket)
1056				sotoxsocket(inp->inp_socket, &xi.xi_socket);
1057			error = SYSCTL_OUT(req, &xi, sizeof (xi));
1058		}
1059	}
1060	if (!error) {
1061		/*
1062		 * Give the user an updated idea of our state.
1063		 * If the generation differs from what we told
1064		 * her before, she knows that something happened
1065		 * while we were processing this request, and it
1066		 * might be necessary to retry.
1067		 */
1068		bzero(&xig, sizeof (xig));
1069		xig.xig_len = sizeof (xig);
1070		xig.xig_gen = udbinfo.ipi_gencnt;
1071		xig.xig_sogen = so_gencnt;
1072		xig.xig_count = udbinfo.ipi_count;
1073		error = SYSCTL_OUT(req, &xig, sizeof (xig));
1074	}
1075	FREE(inp_list, M_TEMP);
1076	lck_rw_done(udbinfo.ipi_lock);
1077	return (error);
1078}
1079
1080SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
1081    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist,
1082    "S,xinpcb", "List of active UDP sockets");
1083
1084
1085static int
1086udp_pcblist64 SYSCTL_HANDLER_ARGS
1087{
1088#pragma unused(oidp, arg1, arg2)
1089        int error, i, n;
1090        struct inpcb *inp, **inp_list;
1091        inp_gen_t gencnt;
1092        struct xinpgen xig;
1093
1094        /*
1095         * The process of preparing the TCB list is too time-consuming and
1096         * resource-intensive to repeat twice on every request.
1097         */
1098        lck_rw_lock_shared(udbinfo.ipi_lock);
1099        if (req->oldptr == USER_ADDR_NULL) {
1100                n = udbinfo.ipi_count;
1101                req->oldidx =
1102		    2 * (sizeof (xig)) + (n + n/8) * sizeof (struct xinpcb64);
1103                lck_rw_done(udbinfo.ipi_lock);
1104                return (0);
1105        }
1106
1107        if (req->newptr != USER_ADDR_NULL) {
1108                lck_rw_done(udbinfo.ipi_lock);
1109                return (EPERM);
1110        }
1111
1112        /*
1113         * OK, now we're committed to doing something.
1114         */
1115        gencnt = udbinfo.ipi_gencnt;
1116        n = udbinfo.ipi_count;
1117
1118        bzero(&xig, sizeof (xig));
1119        xig.xig_len = sizeof (xig);
1120        xig.xig_count = n;
1121        xig.xig_gen = gencnt;
1122        xig.xig_sogen = so_gencnt;
1123        error = SYSCTL_OUT(req, &xig, sizeof (xig));
1124        if (error) {
1125                lck_rw_done(udbinfo.ipi_lock);
1126                return (error);
1127        }
1128	/*
1129	 * We are done if there is no pcb
1130	 */
1131	if (n == 0) {
1132		lck_rw_done(udbinfo.ipi_lock);
1133		return (0);
1134	}
1135
1136        inp_list = _MALLOC(n * sizeof (*inp_list), M_TEMP, M_WAITOK);
1137        if (inp_list == 0) {
1138                lck_rw_done(udbinfo.ipi_lock);
1139                return (ENOMEM);
1140        }
1141
1142        for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
1143             inp = LIST_NEXT(inp, inp_list)) {
1144                if (inp->inp_gencnt <= gencnt &&
1145		    inp->inp_state != INPCB_STATE_DEAD)
1146                        inp_list[i++] = inp;
1147        }
1148        n = i;
1149
1150        error = 0;
1151        for (i = 0; i < n; i++) {
1152                inp = inp_list[i];
1153                if (inp->inp_gencnt <= gencnt &&
1154		    inp->inp_state != INPCB_STATE_DEAD) {
1155                        struct xinpcb64 xi;
1156
1157                        bzero(&xi, sizeof (xi));
1158                        xi.xi_len = sizeof (xi);
1159                        inpcb_to_xinpcb64(inp, &xi);
1160                        if (inp->inp_socket)
1161                                sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1162                        error = SYSCTL_OUT(req, &xi, sizeof (xi));
1163                }
1164        }
1165        if (!error) {
1166                /*
1167                 * Give the user an updated idea of our state.
1168                 * If the generation differs from what we told
1169                 * her before, she knows that something happened
1170                 * while we were processing this request, and it
1171                 * might be necessary to retry.
1172                 */
1173                bzero(&xig, sizeof (xig));
1174                xig.xig_len = sizeof (xig);
1175                xig.xig_gen = udbinfo.ipi_gencnt;
1176                xig.xig_sogen = so_gencnt;
1177                xig.xig_count = udbinfo.ipi_count;
1178                error = SYSCTL_OUT(req, &xig, sizeof (xig));
1179        }
1180        FREE(inp_list, M_TEMP);
1181        lck_rw_done(udbinfo.ipi_lock);
1182        return (error);
1183}
1184
1185SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64,
1186    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist64,
1187    "S,xinpcb64", "List of active UDP sockets");
1188
1189
1190static int
1191udp_pcblist_n SYSCTL_HANDLER_ARGS
1192{
1193#pragma unused(oidp, arg1, arg2)
1194	return (get_pcblist_n(IPPROTO_UDP, req, &udbinfo));
1195}
1196
1197SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n,
1198    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist_n,
1199    "S,xinpcb_n", "List of active UDP sockets");
1200
1201__private_extern__ void
1202udp_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
1203    bitstr_t *bitfield)
1204{
1205	inpcb_get_ports_used(ifindex, protocol, flags, bitfield, &udbinfo);
1206}
1207
1208__private_extern__ uint32_t
1209udp_count_opportunistic(unsigned int ifindex, u_int32_t flags)
1210{
1211	return (inpcb_count_opportunistic(ifindex, &udbinfo, flags));
1212}
1213
1214__private_extern__ uint32_t
1215udp_find_anypcb_byaddr(struct ifaddr *ifa)
1216{
1217	return (inpcb_find_anypcb_byaddr(ifa, &udbinfo));
1218}
1219
1220static int
1221udp_check_pktinfo(struct mbuf *control, struct ifnet **outif,
1222    struct in_addr *laddr)
1223{
1224	struct cmsghdr *cm = 0;
1225	struct in_pktinfo *pktinfo;
1226	struct ifnet *ifp;
1227
1228	if (outif != NULL)
1229		*outif = NULL;
1230
1231	/*
1232	 * XXX: Currently, we assume all the optional information is stored
1233	 * in a single mbuf.
1234	 */
1235	if (control->m_next)
1236		return (EINVAL);
1237
1238	if (control->m_len < CMSG_LEN(0))
1239		return (EINVAL);
1240
1241	for (cm = M_FIRST_CMSGHDR(control); cm;
1242	    cm = M_NXT_CMSGHDR(control, cm)) {
1243		if (cm->cmsg_len < sizeof (struct cmsghdr) ||
1244		    cm->cmsg_len > control->m_len)
1245			return (EINVAL);
1246
1247		if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_PKTINFO)
1248			continue;
1249
1250		if (cm->cmsg_len != CMSG_LEN(sizeof (struct in_pktinfo)))
1251			return (EINVAL);
1252
1253		pktinfo =  (struct in_pktinfo *)(void *)CMSG_DATA(cm);
1254
1255		/* Check for a valid ifindex in pktinfo */
1256		ifnet_head_lock_shared();
1257
1258		if (pktinfo->ipi_ifindex > if_index) {
1259			ifnet_head_done();
1260			return (ENXIO);
1261		}
1262
1263		/*
1264		 * If ipi_ifindex is specified it takes precedence
1265		 * over ipi_spec_dst.
1266		 */
1267		if (pktinfo->ipi_ifindex) {
1268			ifp = ifindex2ifnet[pktinfo->ipi_ifindex];
1269			if (ifp == NULL) {
1270				ifnet_head_done();
1271				return (ENXIO);
1272			}
1273			if (outif != NULL) {
1274				ifnet_reference(ifp);
1275				*outif = ifp;
1276			}
1277			ifnet_head_done();
1278			laddr->s_addr = INADDR_ANY;
1279			break;
1280		}
1281
1282		ifnet_head_done();
1283
1284		/*
1285		 * Use the provided ipi_spec_dst address for temp
1286		 * source address.
1287		 */
1288		*laddr = pktinfo->ipi_spec_dst;
1289		break;
1290	}
1291	return (0);
1292}
1293
1294static int
1295udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1296    struct mbuf *control, struct proc *p)
1297{
1298	struct udpiphdr *ui;
1299	int len = m->m_pkthdr.len;
1300	struct sockaddr_in *sin;
1301	struct in_addr origladdr, laddr, faddr, pi_laddr;
1302	u_short lport, fport;
1303	int error = 0, udp_dodisconnect = 0, pktinfo = 0;
1304	struct socket *so = inp->inp_socket;
1305	int soopts = 0;
1306	struct mbuf *inpopts;
1307	struct ip_moptions *mopts;
1308	struct route ro;
1309	struct ip_out_args ipoa =
1310	    { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 };
1311	struct ifnet *outif = NULL;
1312	struct flowadv *adv = &ipoa.ipoa_flowadv;
1313	mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
1314	struct ifnet *origoutifp = NULL;
1315	int flowadv = 0;
1316
1317	/* Enable flow advisory only when connected */
1318	flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0;
1319	pi_laddr.s_addr = INADDR_ANY;
1320
1321	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
1322
1323	lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
1324	if (control != NULL) {
1325		msc = mbuf_service_class_from_control(control);
1326		VERIFY(outif == NULL);
1327		error = udp_check_pktinfo(control, &outif, &pi_laddr);
1328		m_freem(control);
1329		control = NULL;
1330		if (error)
1331			goto release;
1332		pktinfo++;
1333		if (outif != NULL)
1334			ipoa.ipoa_boundif = outif->if_index;
1335	}
1336
1337	KERNEL_DEBUG(DBG_LAYER_OUT_BEG, inp->inp_fport, inp->inp_lport,
1338	    inp->inp_laddr.s_addr, inp->inp_faddr.s_addr,
1339	    (htons((u_short)len + sizeof (struct udphdr))));
1340
1341	if (len + sizeof (struct udpiphdr) > IP_MAXPACKET) {
1342		error = EMSGSIZE;
1343		goto release;
1344	}
1345
1346	if (flowadv && INP_WAIT_FOR_IF_FEEDBACK(inp)) {
1347		/*
1348		 * The socket is flow-controlled, drop the packets
1349		 * until the inp is not flow controlled
1350		 */
1351		error = ENOBUFS;
1352		goto release;
1353	}
1354	/*
1355	 * If socket was bound to an ifindex, tell ip_output about it.
1356	 * If the ancillary IP_PKTINFO option contains an interface index,
1357	 * it takes precedence over the one specified by IP_BOUND_IF.
1358	 */
1359	if (ipoa.ipoa_boundif == IFSCOPE_NONE &&
1360	    (inp->inp_flags & INP_BOUND_IF)) {
1361		VERIFY(inp->inp_boundifp != NULL);
1362		ifnet_reference(inp->inp_boundifp);	/* for this routine */
1363		if (outif != NULL)
1364			ifnet_release(outif);
1365		outif = inp->inp_boundifp;
1366		ipoa.ipoa_boundif = outif->if_index;
1367	}
1368	if (INP_NO_CELLULAR(inp))
1369		ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
1370	if (INP_NO_EXPENSIVE(inp))
1371		ipoa.ipoa_flags |=  IPOAF_NO_EXPENSIVE;
1372	if (INP_AWDL_UNRESTRICTED(inp))
1373		ipoa.ipoa_flags |=  IPOAF_AWDL_UNRESTRICTED;
1374	soopts |= IP_OUTARGS;
1375
1376	/*
1377	 * If there was a routing change, discard cached route and check
1378	 * that we have a valid source address.  Reacquire a new source
1379	 * address if INADDR_ANY was specified.
1380	 */
1381	if (ROUTE_UNUSABLE(&inp->inp_route)) {
1382		struct in_ifaddr *ia = NULL;
1383
1384		ROUTE_RELEASE(&inp->inp_route);
1385
1386		/* src address is gone? */
1387		if (inp->inp_laddr.s_addr != INADDR_ANY &&
1388		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
1389			if (!(inp->inp_flags & INP_INADDR_ANY) ||
1390			    (so->so_state & SS_ISCONNECTED)) {
1391				/*
1392				 * Rdar://5448998
1393				 * If the source address is gone, return an
1394				 * error if:
1395				 * - the source was specified
1396				 * - the socket was already connected
1397				 */
1398				soevent(so, (SO_FILT_HINT_LOCKED |
1399				    SO_FILT_HINT_NOSRCADDR));
1400				error = EADDRNOTAVAIL;
1401				goto release;
1402			} else {
1403				/* new src will be set later */
1404				inp->inp_laddr.s_addr = INADDR_ANY;
1405				inp->inp_last_outifp = NULL;
1406			}
1407		}
1408		if (ia != NULL)
1409			IFA_REMREF(&ia->ia_ifa);
1410	}
1411
1412	/*
1413	 * IP_PKTINFO option check.  If a temporary scope or src address
1414	 * is provided, use it for this packet only and make sure we forget
1415	 * it after sending this datagram.
1416	 */
1417	if (pi_laddr.s_addr != INADDR_ANY ||
1418	    (ipoa.ipoa_boundif != IFSCOPE_NONE && pktinfo)) {
1419		/* temp src address for this datagram only */
1420		laddr = pi_laddr;
1421		origladdr.s_addr = INADDR_ANY;
1422		/* we don't want to keep the laddr or route */
1423		udp_dodisconnect = 1;
1424		/* remember we don't care about src addr.*/
1425		inp->inp_flags |= INP_INADDR_ANY;
1426	} else {
1427		origladdr = laddr = inp->inp_laddr;
1428	}
1429
1430	origoutifp = inp->inp_last_outifp;
1431	faddr = inp->inp_faddr;
1432	lport = inp->inp_lport;
1433	fport = inp->inp_fport;
1434
1435	if (addr) {
1436		sin = (struct sockaddr_in *)(void *)addr;
1437		if (faddr.s_addr != INADDR_ANY) {
1438			error = EISCONN;
1439			goto release;
1440		}
1441		if (lport == 0) {
1442			/*
1443			 * In case we don't have a local port set, go through
1444			 * the full connect.  We don't have a local port yet
1445			 * (i.e., we can't be looked up), so it's not an issue
1446			 * if the input runs at the same time we do this.
1447			 */
1448			/* if we have a source address specified, use that */
1449			if (pi_laddr.s_addr != INADDR_ANY)
1450				inp->inp_laddr = pi_laddr;
1451			/*
1452			 * If a scope is specified, use it.  Scope from
1453			 * IP_PKTINFO takes precendence over the the scope
1454			 * set via INP_BOUND_IF.
1455			 */
1456			error = in_pcbconnect(inp, addr, p, ipoa.ipoa_boundif,
1457			    &outif);
1458			if (error)
1459				goto release;
1460
1461			laddr = inp->inp_laddr;
1462			lport = inp->inp_lport;
1463			faddr = inp->inp_faddr;
1464			fport = inp->inp_fport;
1465			udp_dodisconnect = 1;
1466
1467			/* synch up in case in_pcbladdr() overrides */
1468			if (outif != NULL && ipoa.ipoa_boundif != IFSCOPE_NONE)
1469				ipoa.ipoa_boundif = outif->if_index;
1470		}
1471		else {
1472			/*
1473			 * Fast path case
1474			 *
1475			 * We have a full address and a local port; use those
1476			 * info to build the packet without changing the pcb
1477			 * and interfering with the input path. See 3851370.
1478			 *
1479			 * Scope from IP_PKTINFO takes precendence over the
1480			 * the scope set via INP_BOUND_IF.
1481			 */
1482			if (laddr.s_addr == INADDR_ANY) {
1483				if ((error = in_pcbladdr(inp, addr, &laddr,
1484				    ipoa.ipoa_boundif, &outif)) != 0)
1485					goto release;
1486				/*
1487				 * from pcbconnect: remember we don't
1488				 * care about src addr.
1489				 */
1490				inp->inp_flags |= INP_INADDR_ANY;
1491
1492				/* synch up in case in_pcbladdr() overrides */
1493				if (outif != NULL &&
1494				    ipoa.ipoa_boundif != IFSCOPE_NONE)
1495					ipoa.ipoa_boundif = outif->if_index;
1496			}
1497
1498			faddr = sin->sin_addr;
1499			fport = sin->sin_port;
1500		}
1501	} else {
1502		if (faddr.s_addr == INADDR_ANY) {
1503			error = ENOTCONN;
1504			goto release;
1505		}
1506	}
1507
1508#if CONFIG_MACF_NET
1509	mac_mbuf_label_associate_inpcb(inp, m);
1510#endif /* CONFIG_MACF_NET */
1511
1512	if (inp->inp_flowhash == 0)
1513		inp->inp_flowhash = inp_calc_flowhash(inp);
1514
1515	/*
1516	 * Calculate data length and get a mbuf
1517	 * for UDP and IP headers.
1518	 */
1519	M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT);
1520	if (m == 0) {
1521		error = ENOBUFS;
1522		goto abort;
1523	}
1524
1525	/*
1526	 * Fill in mbuf with extended UDP header
1527	 * and addresses and length put into network format.
1528	 */
1529	ui = mtod(m, struct udpiphdr *);
1530	bzero(ui->ui_x1, sizeof (ui->ui_x1));	/* XXX still needed? */
1531	ui->ui_pr = IPPROTO_UDP;
1532	ui->ui_src = laddr;
1533	ui->ui_dst = faddr;
1534	ui->ui_sport = lport;
1535	ui->ui_dport = fport;
1536	ui->ui_ulen = htons((u_short)len + sizeof (struct udphdr));
1537
1538	/*
1539	 * Set up checksum and output datagram.
1540	 */
1541	if (udpcksum && !(inp->inp_flags & INP_UDP_NOCKSUM)) {
1542		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
1543		    htons((u_short)len + sizeof (struct udphdr) + IPPROTO_UDP));
1544		m->m_pkthdr.csum_flags = CSUM_UDP;
1545		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1546	} else {
1547		ui->ui_sum = 0;
1548	}
1549	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
1550	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
1551	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
1552	udpstat.udps_opackets++;
1553
1554	KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport,
1555		     ui->ui_src.s_addr, ui->ui_dst.s_addr, ui->ui_ulen);
1556
1557#if NECP
1558	{
1559		necp_kernel_policy_id policy_id;
1560		if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport, &laddr, &faddr, NULL, &policy_id)) {
1561			error = EHOSTUNREACH;
1562			goto abort;
1563		}
1564
1565		necp_mark_packet_from_socket(m, inp, policy_id);
1566	}
1567#endif /* NECP */
1568
1569#if IPSEC
1570	if (inp->inp_sp != NULL && ipsec_setsocket(m, inp->inp_socket) != 0) {
1571		error = ENOBUFS;
1572		goto abort;
1573	}
1574#endif /* IPSEC */
1575
1576	inpopts = inp->inp_options;
1577	soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
1578	mopts = inp->inp_moptions;
1579	if (mopts != NULL) {
1580		IMO_LOCK(mopts);
1581		IMO_ADDREF_LOCKED(mopts);
1582		if (IN_MULTICAST(ntohl(ui->ui_dst.s_addr)) &&
1583		    mopts->imo_multicast_ifp != NULL) {
1584			/* no reference needed */
1585			inp->inp_last_outifp = mopts->imo_multicast_ifp;
1586		}
1587		IMO_UNLOCK(mopts);
1588	}
1589
1590	/* Copy the cached route and take an extra reference */
1591	inp_route_copyout(inp, &ro);
1592
1593	set_packet_service_class(m, so, msc, 0);
1594	m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB;
1595	m->m_pkthdr.pkt_flowid = inp->inp_flowhash;
1596	m->m_pkthdr.pkt_proto = IPPROTO_UDP;
1597	m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC);
1598	if (flowadv)
1599		m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV;
1600
1601	if (ipoa.ipoa_boundif != IFSCOPE_NONE)
1602		ipoa.ipoa_flags |= IPOAF_BOUND_IF;
1603
1604	if (laddr.s_addr != INADDR_ANY)
1605		ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
1606
1607	inp->inp_sndinprog_cnt++;
1608
1609	socket_unlock(so, 0);
1610	error = ip_output(m, inpopts, &ro, soopts, mopts, &ipoa);
1611	m = NULL;
1612	socket_lock(so, 0);
1613	if (mopts != NULL)
1614		IMO_REMREF(mopts);
1615
1616	if (error == 0 && nstat_collect) {
1617		boolean_t cell, wifi, wired;
1618
1619		if (ro.ro_rt != NULL) {
1620			cell = IFNET_IS_CELLULAR(ro.ro_rt->rt_ifp);
1621			wifi = (!cell && IFNET_IS_WIFI(ro.ro_rt->rt_ifp));
1622			wired = (!wifi && IFNET_IS_WIRED(ro.ro_rt->rt_ifp));
1623		} else {
1624			cell = wifi = wired = FALSE;
1625		}
1626		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
1627		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, len);
1628	}
1629
1630	if (flowadv && (adv->code == FADV_FLOW_CONTROLLED ||
1631	    adv->code == FADV_SUSPENDED)) {
1632		/* return a hint to the application that
1633		 * the packet has been dropped
1634		 */
1635		error = ENOBUFS;
1636		inp_set_fc_state(inp, adv->code);
1637	}
1638
1639	VERIFY(inp->inp_sndinprog_cnt > 0);
1640	if ( --inp->inp_sndinprog_cnt == 0)
1641		inp->inp_flags &= ~(INP_FC_FEEDBACK);
1642
1643	/* Synchronize PCB cached route */
1644	inp_route_copyin(inp, &ro);
1645
1646abort:
1647	if (udp_dodisconnect) {
1648		/* Always discard the cached route for unconnected socket */
1649		ROUTE_RELEASE(&inp->inp_route);
1650		in_pcbdisconnect(inp);
1651		inp->inp_laddr = origladdr;	/* XXX rehash? */
1652		/* no reference needed */
1653		inp->inp_last_outifp = origoutifp;
1654	} else if (inp->inp_route.ro_rt != NULL) {
1655		struct rtentry *rt = inp->inp_route.ro_rt;
1656		struct ifnet *outifp;
1657
1658		if (rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST))
1659			rt = NULL;	/* unusable */
1660		/*
1661		 * Always discard if it is a multicast or broadcast route.
1662		 */
1663		if (rt == NULL)
1664			ROUTE_RELEASE(&inp->inp_route);
1665
1666		/*
1667		 * If the destination route is unicast, update outifp with
1668		 * that of the route interface used by IP.
1669		 */
1670		if (rt != NULL && (outifp = rt->rt_ifp) != inp->inp_last_outifp)
1671			inp->inp_last_outifp = outifp;	/* no reference needed */
1672	} else {
1673		ROUTE_RELEASE(&inp->inp_route);
1674	}
1675
1676	/*
1677	 * If output interface was cellular/expensive, and this socket is
1678	 * denied access to it, generate an event.
1679	 */
1680	if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) &&
1681	    (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp)))
1682		soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED));
1683
1684release:
1685	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0);
1686
1687	if (m != NULL)
1688		m_freem(m);
1689
1690	if (outif != NULL)
1691		ifnet_release(outif);
1692
1693	return (error);
1694}
1695
1696u_int32_t	udp_sendspace = 9216;		/* really max datagram size */
1697/* 187 1K datagrams (approx 192 KB) */
1698u_int32_t	udp_recvspace = 187 * (1024 +
1699#if INET6
1700		    sizeof (struct sockaddr_in6)
1701#else /* !INET6 */
1702		    sizeof (struct sockaddr_in)
1703#endif /* !INET6 */
1704		);
1705
1706/* Check that the values of udp send and recv space do not exceed sb_max */
1707static int
1708sysctl_udp_sospace(struct sysctl_oid *oidp, void *arg1, int arg2,
1709    struct sysctl_req *req)
1710{
1711#pragma unused(arg1, arg2)
1712	u_int32_t new_value = 0, *space_p = NULL;
1713	int changed = 0, error = 0;
1714	u_quad_t sb_effective_max = (sb_max/(MSIZE+MCLBYTES)) * MCLBYTES;
1715
1716	switch (oidp->oid_number) {
1717	case UDPCTL_RECVSPACE:
1718		space_p = &udp_recvspace;
1719		break;
1720	case UDPCTL_MAXDGRAM:
1721		space_p = &udp_sendspace;
1722		break;
1723	default:
1724		return EINVAL;
1725	}
1726        error = sysctl_io_number(req, *space_p, sizeof (u_int32_t),
1727	    &new_value, &changed);
1728        if (changed) {
1729                if (new_value > 0 && new_value <= sb_effective_max)
1730                        *space_p = new_value;
1731                else
1732                        error = ERANGE;
1733        }
1734        return (error);
1735}
1736
1737SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace,
1738    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_recvspace, 0,
1739    &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size");
1740
1741SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram,
1742    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_sendspace, 0,
1743    &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size");
1744
1745static int
1746udp_abort(struct socket *so)
1747{
1748	struct inpcb *inp;
1749
1750	inp = sotoinpcb(so);
1751	if (inp == NULL) {
1752		panic("%s: so=%p null inp\n", __func__, so);
1753		/* NOTREACHED */
1754	}
1755	soisdisconnected(so);
1756	in_pcbdetach(inp);
1757	return (0);
1758}
1759
1760static int
1761udp_attach(struct socket *so, int proto, struct proc *p)
1762{
1763#pragma unused(proto)
1764	struct inpcb *inp;
1765	int error;
1766
1767	inp = sotoinpcb(so);
1768	if (inp != NULL) {
1769		panic ("%s so=%p inp=%p\n", __func__, so, inp);
1770		/* NOTREACHED */
1771	}
1772	error = in_pcballoc(so, &udbinfo, p);
1773	if (error != 0)
1774		return (error);
1775	error = soreserve(so, udp_sendspace, udp_recvspace);
1776	if (error != 0)
1777		return (error);
1778	inp = (struct inpcb *)so->so_pcb;
1779	inp->inp_vflag |= INP_IPV4;
1780	inp->inp_ip_ttl = ip_defttl;
1781	if (nstat_collect)
1782		nstat_udp_new_pcb(inp);
1783	return (0);
1784}
1785
1786static int
1787udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
1788{
1789	struct inpcb *inp;
1790	int error;
1791
1792	if (nam->sa_family != 0 && nam->sa_family != AF_INET &&
1793	    nam->sa_family != AF_INET6)
1794		return (EAFNOSUPPORT);
1795
1796	inp = sotoinpcb(so);
1797	if (inp == NULL
1798#if NECP
1799		|| (necp_socket_should_use_flow_divert(inp))
1800#endif /* NECP */
1801		)
1802		return (inp == NULL ? EINVAL : EPROTOTYPE);
1803	error = in_pcbbind(inp, nam, p);
1804	return (error);
1805}
1806
1807static int
1808udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
1809{
1810	struct inpcb *inp;
1811	int error;
1812
1813	inp = sotoinpcb(so);
1814	if (inp == NULL
1815#if NECP
1816		|| (necp_socket_should_use_flow_divert(inp))
1817#endif /* NECP */
1818		)
1819		return (inp == NULL ? EINVAL : EPROTOTYPE);
1820	if (inp->inp_faddr.s_addr != INADDR_ANY)
1821		return (EISCONN);
1822	error = in_pcbconnect(inp, nam, p, IFSCOPE_NONE, NULL);
1823	if (error == 0) {
1824		soisconnected(so);
1825		if (inp->inp_flowhash == 0)
1826			inp->inp_flowhash = inp_calc_flowhash(inp);
1827	}
1828	return (error);
1829}
1830
1831int
1832udp_connectx_common(struct socket *so, int af,
1833    struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl,
1834    struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid,
1835    uint32_t flags, void *arg, uint32_t arglen)
1836{
1837#pragma unused(aid, flags, arg, arglen)
1838	struct sockaddr_entry *src_se = NULL, *dst_se = NULL;
1839	struct inpcb *inp = sotoinpcb(so);
1840	int error;
1841
1842	if (inp == NULL)
1843		return (EINVAL);
1844
1845	VERIFY(dst_sl != NULL);
1846
1847	/* select source (if specified) and destination addresses */
1848	error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se);
1849	if (error != 0)
1850		return (error);
1851
1852	VERIFY(*dst_sl != NULL && dst_se != NULL);
1853	VERIFY(src_se == NULL || *src_sl != NULL);
1854	VERIFY(dst_se->se_addr->sa_family == af);
1855	VERIFY(src_se == NULL || src_se->se_addr->sa_family == af);
1856
1857#if NECP
1858	inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope);
1859#endif /* NECP */
1860
1861	/* bind socket to the specified interface, if requested */
1862	if (ifscope != IFSCOPE_NONE &&
1863	    (error = inp_bindif(inp, ifscope, NULL)) != 0)
1864		return (error);
1865
1866	/* if source address and/or port is specified, bind to it */
1867	if (src_se != NULL) {
1868		struct sockaddr *sa = src_se->se_addr;
1869		error = sobindlock(so, sa, 0);	/* already locked */
1870		if (error != 0)
1871			return (error);
1872	}
1873
1874	switch (af) {
1875	case AF_INET:
1876		error = udp_connect(so, dst_se->se_addr, p);
1877		break;
1878#if INET6
1879	case AF_INET6:
1880		error = udp6_connect(so, dst_se->se_addr, p);
1881		break;
1882#endif /* INET6 */
1883	default:
1884		VERIFY(0);
1885		/* NOTREACHED */
1886	}
1887
1888	if (error == 0 && pcid != NULL)
1889		*pcid = 1;	/* there is only 1 connection for a UDP */
1890
1891	return (error);
1892}
1893
1894static int
1895udp_connectx(struct socket *so, struct sockaddr_list **src_sl,
1896    struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
1897    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
1898    uint32_t arglen)
1899{
1900	return (udp_connectx_common(so, AF_INET, src_sl, dst_sl,
1901	    p, ifscope, aid, pcid, flags, arg, arglen));
1902}
1903
1904static int
1905udp_detach(struct socket *so)
1906{
1907	struct inpcb *inp;
1908
1909	inp = sotoinpcb(so);
1910	if (inp == NULL) {
1911		panic("%s: so=%p null inp\n", __func__, so);
1912		/* NOTREACHED */
1913	}
1914
1915	/*
1916	 * If this is a socket that does not want to wakeup the device
1917	 * for it's traffic, the application might be waiting for
1918	 * close to complete before going to sleep. Send a notification
1919	 * for this kind of sockets
1920	 */
1921	if (so->so_options & SO_NOWAKEFROMSLEEP)
1922		socket_post_kev_msg_closed(so);
1923
1924	in_pcbdetach(inp);
1925	inp->inp_state = INPCB_STATE_DEAD;
1926	return (0);
1927}
1928
1929static int
1930udp_disconnect(struct socket *so)
1931{
1932	struct inpcb *inp;
1933
1934	inp = sotoinpcb(so);
1935	if (inp == NULL
1936#if NECP
1937		|| (necp_socket_should_use_flow_divert(inp))
1938#endif /* NECP */
1939		)
1940		return (inp == NULL ? EINVAL : EPROTOTYPE);
1941	if (inp->inp_faddr.s_addr == INADDR_ANY)
1942		return (ENOTCONN);
1943
1944	in_pcbdisconnect(inp);
1945
1946	/* reset flow controlled state, just in case */
1947	inp_reset_fc_state(inp);
1948
1949	inp->inp_laddr.s_addr = INADDR_ANY;
1950	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
1951	inp->inp_last_outifp = NULL;
1952	return (0);
1953}
1954
1955static int
1956udp_disconnectx(struct socket *so, associd_t aid, connid_t cid)
1957{
1958#pragma unused(cid)
1959	if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
1960		return (EINVAL);
1961
1962	return (udp_disconnect(so));
1963}
1964
1965static int
1966udp_send(struct socket *so, int flags, struct mbuf *m,
1967    struct sockaddr *addr, struct mbuf *control, struct proc *p)
1968{
1969#pragma unused(flags)
1970	struct inpcb *inp;
1971
1972	inp = sotoinpcb(so);
1973	if (inp == NULL
1974#if NECP
1975		|| (necp_socket_should_use_flow_divert(inp))
1976#endif /* NECP */
1977		) {
1978		if (m != NULL)
1979			m_freem(m);
1980		if (control != NULL)
1981			m_freem(control);
1982		return (inp == NULL ? EINVAL : EPROTOTYPE);
1983	}
1984
1985	return (udp_output(inp, m, addr, control, p));
1986}
1987
1988int
1989udp_shutdown(struct socket *so)
1990{
1991	struct inpcb *inp;
1992
1993	inp = sotoinpcb(so);
1994	if (inp == NULL)
1995		return (EINVAL);
1996	socantsendmore(so);
1997	return (0);
1998}
1999
2000int
2001udp_lock(struct socket *so, int refcount, void *debug)
2002{
2003	void *lr_saved;
2004
2005	if (debug == NULL)
2006		lr_saved = __builtin_return_address(0);
2007	else
2008		lr_saved = debug;
2009
2010	if (so->so_pcb != NULL) {
2011		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
2012		    LCK_MTX_ASSERT_NOTOWNED);
2013		lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
2014	} else {
2015		panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
2016		    so, lr_saved, solockhistory_nr(so));
2017		/* NOTREACHED */
2018	}
2019	if (refcount)
2020		so->so_usecount++;
2021
2022	so->lock_lr[so->next_lock_lr] = lr_saved;
2023	so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2024	return (0);
2025}
2026
2027int
2028udp_unlock(struct socket *so, int refcount, void *debug)
2029{
2030	void *lr_saved;
2031
2032	if (debug == NULL)
2033		lr_saved = __builtin_return_address(0);
2034	else
2035		lr_saved = debug;
2036
2037	if (refcount)
2038		so->so_usecount--;
2039
2040	if (so->so_pcb == NULL) {
2041		panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
2042		    so, lr_saved, solockhistory_nr(so));
2043		/* NOTREACHED */
2044	} else {
2045		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
2046		    LCK_MTX_ASSERT_OWNED);
2047		so->unlock_lr[so->next_unlock_lr] = lr_saved;
2048		so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2049		lck_mtx_unlock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
2050	}
2051	return (0);
2052}
2053
2054lck_mtx_t *
2055udp_getlock(struct socket *so, int locktype)
2056{
2057#pragma unused(locktype)
2058	struct inpcb *inp = sotoinpcb(so);
2059
2060	if (so->so_pcb == NULL) {
2061		panic("%s: so=%p NULL so_pcb lrh= %s\n", __func__,
2062		    so, solockhistory_nr(so));
2063		/* NOTREACHED */
2064	}
2065	return (&inp->inpcb_mtx);
2066}
2067
2068/*
2069 * UDP garbage collector callback (inpcb_timer_func_t).
2070 *
2071 * Returns > 0 to keep timer active.
2072 */
2073static void
2074udp_gc(struct inpcbinfo *ipi)
2075{
2076	struct inpcb *inp, *inpnxt;
2077	struct socket *so;
2078
2079	if (lck_rw_try_lock_exclusive(ipi->ipi_lock) == FALSE) {
2080		if (udp_gc_done == TRUE) {
2081			udp_gc_done = FALSE;
2082			/* couldn't get the lock, must lock next time */
2083			atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
2084			return;
2085		}
2086		lck_rw_lock_exclusive(ipi->ipi_lock);
2087	}
2088
2089	udp_gc_done = TRUE;
2090
2091	for (inp = udb.lh_first; inp != NULL; inp = inpnxt) {
2092		inpnxt = inp->inp_list.le_next;
2093
2094		/*
2095		 * Skip unless it's STOPUSING; garbage collector will
2096		 * be triggered by in_pcb_checkstate() upon setting
2097		 * wantcnt to that value.  If the PCB is already dead,
2098		 * keep gc active to anticipate wantcnt changing.
2099		 */
2100		if (inp->inp_wantcnt != WNT_STOPUSING)
2101			continue;
2102
2103		/*
2104		 * Skip if busy, no hurry for cleanup.  Keep gc active
2105		 * and try the lock again during next round.
2106		 */
2107		if (!lck_mtx_try_lock(&inp->inpcb_mtx)) {
2108			atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
2109			continue;
2110		}
2111
2112		/*
2113		 * Keep gc active unless usecount is 0.
2114		 */
2115		so = inp->inp_socket;
2116		if (so->so_usecount == 0) {
2117			if (inp->inp_state != INPCB_STATE_DEAD) {
2118#if INET6
2119				if (SOCK_CHECK_DOM(so, PF_INET6))
2120					in6_pcbdetach(inp);
2121				else
2122#endif /* INET6 */
2123					in_pcbdetach(inp);
2124			}
2125			in_pcbdispose(inp);
2126		} else {
2127			lck_mtx_unlock(&inp->inpcb_mtx);
2128			atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
2129		}
2130	}
2131	lck_rw_done(ipi->ipi_lock);
2132
2133	return;
2134}
2135
2136static int
2137udp_getstat SYSCTL_HANDLER_ARGS
2138{
2139#pragma unused(oidp, arg1, arg2)
2140	if (req->oldptr == USER_ADDR_NULL)
2141		req->oldlen = (size_t)sizeof (struct udpstat);
2142
2143	return (SYSCTL_OUT(req, &udpstat, MIN(sizeof (udpstat), req->oldlen)));
2144}
2145
2146void
2147udp_in_cksum_stats(u_int32_t len)
2148{
2149	udpstat.udps_rcv_swcsum++;
2150	udpstat.udps_rcv_swcsum_bytes += len;
2151}
2152
2153void
2154udp_out_cksum_stats(u_int32_t len)
2155{
2156	udpstat.udps_snd_swcsum++;
2157	udpstat.udps_snd_swcsum_bytes += len;
2158}
2159
2160#if INET6
2161void
2162udp_in6_cksum_stats(u_int32_t len)
2163{
2164	udpstat.udps_rcv6_swcsum++;
2165	udpstat.udps_rcv6_swcsum_bytes += len;
2166}
2167
2168void
2169udp_out6_cksum_stats(u_int32_t len)
2170{
2171	udpstat.udps_snd6_swcsum++;
2172	udpstat.udps_snd6_swcsum_bytes += len;
2173}
2174#endif /* INET6 */
2175
2176/*
2177 * Checksum extended UDP header and data.
2178 */
2179static int
2180udp_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen)
2181{
2182	struct ifnet *ifp = m->m_pkthdr.rcvif;
2183	struct ip *ip = mtod(m, struct ip *);
2184	struct ipovly *ipov = (struct ipovly *)ip;
2185
2186	if (uh->uh_sum == 0) {
2187		udpstat.udps_nosum++;
2188		return (0);
2189	}
2190
2191	if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) ||
2192	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) &&
2193	    (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
2194		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
2195			uh->uh_sum = m->m_pkthdr.csum_rx_val;
2196		} else {
2197			uint16_t sum = m->m_pkthdr.csum_rx_val;
2198			uint16_t start = m->m_pkthdr.csum_rx_start;
2199
2200			/*
2201			 * Perform 1's complement adjustment of octets
2202			 * that got included/excluded in the hardware-
2203			 * calculated checksum value.  Ignore cases
2204			 * where the value includes or excludes the
2205			 * IP header span, as the sum for those octets
2206			 * would already be 0xffff and thus no-op.
2207			 */
2208			if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) &&
2209			    start != 0 && (off - start) != off) {
2210#if BYTE_ORDER != BIG_ENDIAN
2211				if (start < off) {
2212					HTONS(ip->ip_len);
2213					HTONS(ip->ip_off);
2214				}
2215#endif /* BYTE_ORDER != BIG_ENDIAN */
2216				/* callee folds in sum */
2217				sum = m_adj_sum16(m, start, off, sum);
2218#if BYTE_ORDER != BIG_ENDIAN
2219				if (start < off) {
2220					NTOHS(ip->ip_off);
2221					NTOHS(ip->ip_len);
2222				}
2223#endif /* BYTE_ORDER != BIG_ENDIAN */
2224			}
2225
2226			/* callee folds in sum */
2227			uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
2228			    ip->ip_dst.s_addr, sum + htonl(ulen + IPPROTO_UDP));
2229		}
2230		uh->uh_sum ^= 0xffff;
2231	} else {
2232		uint16_t ip_sum;
2233		char b[9];
2234
2235		bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
2236		bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
2237		ip_sum = ipov->ih_len;
2238		ipov->ih_len = uh->uh_ulen;
2239		uh->uh_sum = in_cksum(m, ulen + sizeof (struct ip));
2240		bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
2241		ipov->ih_len = ip_sum;
2242
2243		udp_in_cksum_stats(ulen);
2244	}
2245
2246	if (uh->uh_sum != 0) {
2247		udpstat.udps_badsum++;
2248		IF_UDP_STATINC(ifp, badchksum);
2249		return (-1);
2250	}
2251
2252	return (0);
2253}
2254