1/*
2 * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#define	__KPI__
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/types.h>
33#include <sys/socket.h>
34#include <sys/socketvar.h>
35#include <sys/param.h>
36#include <sys/proc.h>
37#include <sys/errno.h>
38#include <sys/malloc.h>
39#include <sys/protosw.h>
40#include <sys/domain.h>
41#include <sys/mbuf.h>
42#include <sys/fcntl.h>
43#include <sys/filio.h>
44#include <sys/uio_internal.h>
45#include <kern/lock.h>
46#include <netinet/in.h>
47#include <libkern/OSAtomic.h>
48
49static errno_t sock_send_internal(socket_t, const struct msghdr	*,
50    mbuf_t, int, size_t	*);
51static void sock_setupcalls_common(socket_t, sock_upcall, void *,
52    sock_upcall, void *);
53
54errno_t
55sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
56    sock_upcall callback, void *cookie, socket_t *new_sock)
57{
58	struct sockaddr *sa;
59	struct socket *new_so;
60	lck_mtx_t *mutex_held;
61	int dosocklock;
62	errno_t	error = 0;
63
64	if (sock == NULL || new_sock == NULL)
65		return (EINVAL);
66
67	socket_lock(sock, 1);
68	if ((sock->so_options & SO_ACCEPTCONN) == 0) {
69		socket_unlock(sock, 1);
70		return (EINVAL);
71	}
72	if ((flags & ~(MSG_DONTWAIT)) != 0) {
73		socket_unlock(sock, 1);
74		return (ENOTSUP);
75	}
76	if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
77	    sock->so_comp.tqh_first == NULL) {
78		socket_unlock(sock, 1);
79		return (EWOULDBLOCK);
80	}
81
82	if (sock->so_proto->pr_getlock != NULL)  {
83		mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
84		dosocklock = 1;
85	} else {
86		mutex_held = sock->so_proto->pr_domain->dom_mtx;
87		dosocklock = 0;
88	}
89
90	while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
91		if (sock->so_state & SS_CANTRCVMORE) {
92			sock->so_error = ECONNABORTED;
93			break;
94		}
95		error = msleep((caddr_t)&sock->so_timeo, mutex_held,
96		    PSOCK | PCATCH, "sock_accept", NULL);
97		if (error != 0) {
98			socket_unlock(sock, 1);
99			return (error);
100		}
101	}
102	if (sock->so_error != 0) {
103		error = sock->so_error;
104		sock->so_error = 0;
105		socket_unlock(sock, 1);
106		return (error);
107	}
108
109	new_so = TAILQ_FIRST(&sock->so_comp);
110	TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
111	sock->so_qlen--;
112
113	/*
114	 * Pass the pre-accepted socket to any interested socket filter(s).
115	 * Upon failure, the socket would have been closed by the callee.
116	 */
117	if (new_so->so_filt != NULL) {
118		/*
119		 * Temporarily drop the listening socket's lock before we
120		 * hand off control over to the socket filter(s), but keep
121		 * a reference so that it won't go away.  We'll grab it
122		 * again once we're done with the filter(s).
123		 */
124		socket_unlock(sock, 0);
125		if ((error = soacceptfilter(new_so)) != 0) {
126			/* Drop reference on listening socket */
127			sodereference(sock);
128			return (error);
129		}
130		socket_lock(sock, 0);
131	}
132
133	if (dosocklock)	{
134		lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0),
135		    LCK_MTX_ASSERT_NOTOWNED);
136		socket_lock(new_so, 1);
137	}
138
139	new_so->so_state &= ~SS_COMP;
140	new_so->so_head = NULL;
141	(void) soacceptlock(new_so, &sa, 0);
142
143	socket_unlock(sock, 1);	/* release the head */
144
145	/* see comments in sock_setupcall() */
146	if (callback != NULL) {
147		sock_setupcalls_common(new_so, callback, cookie, NULL, NULL);
148	}
149
150	if (sa != NULL && from != NULL) {
151		if (fromlen > sa->sa_len)
152			fromlen = sa->sa_len;
153		memcpy(from, sa, fromlen);
154	}
155	if (sa != NULL)
156		FREE(sa, M_SONAME);
157
158	/*
159	 * If the socket has been marked as inactive by sosetdefunct(),
160	 * disallow further operations on it.
161	 */
162	if (new_so->so_flags & SOF_DEFUNCT) {
163		(void) sodefunct(current_proc(), new_so,
164		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
165	}
166	*new_sock = new_so;
167	if (dosocklock)
168		socket_unlock(new_so, 1);
169	return (error);
170}
171
172errno_t
173sock_bind(socket_t sock, const struct sockaddr *to)
174{
175	int error = 0;
176	struct sockaddr *sa = NULL;
177	struct sockaddr_storage ss;
178	boolean_t want_free = TRUE;
179
180	if (sock == NULL || to == NULL)
181		return (EINVAL);
182
183	if (to->sa_len > sizeof (ss)) {
184		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
185		if (sa == NULL)
186			return (ENOBUFS);
187	} else {
188		sa = (struct sockaddr *)&ss;
189		want_free = FALSE;
190	}
191	memcpy(sa, to, to->sa_len);
192
193	error = sobindlock(sock, sa, 1);	/* will lock socket */
194
195	if (sa != NULL && want_free == TRUE)
196		FREE(sa, M_SONAME);
197
198	return (error);
199}
200
201errno_t
202sock_connect(socket_t sock, const struct sockaddr *to, int flags)
203{
204	int error = 0;
205	lck_mtx_t *mutex_held;
206	struct sockaddr *sa = NULL;
207	struct sockaddr_storage ss;
208	boolean_t want_free = TRUE;
209
210	if (sock == NULL || to == NULL)
211		return (EINVAL);
212
213	if (to->sa_len > sizeof (ss)) {
214		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
215		    (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
216		if (sa == NULL)
217			return (ENOBUFS);
218	} else {
219		sa = (struct sockaddr *)&ss;
220		want_free = FALSE;
221	}
222	memcpy(sa, to, to->sa_len);
223
224	socket_lock(sock, 1);
225
226	if ((sock->so_state & SS_ISCONNECTING) &&
227	    ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
228		error = EALREADY;
229		goto out;
230	}
231	error = soconnectlock(sock, sa, 0);
232	if (!error) {
233		if ((sock->so_state & SS_ISCONNECTING) &&
234		    ((sock->so_state & SS_NBIO) != 0 ||
235		    (flags & MSG_DONTWAIT) != 0)) {
236			error = EINPROGRESS;
237			goto out;
238		}
239
240		if (sock->so_proto->pr_getlock != NULL)
241			mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
242		else
243			mutex_held = sock->so_proto->pr_domain->dom_mtx;
244
245		while ((sock->so_state & SS_ISCONNECTING) &&
246		    sock->so_error == 0) {
247			error = msleep((caddr_t)&sock->so_timeo,
248			    mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
249			if (error != 0)
250				break;
251		}
252
253		if (error == 0) {
254			error = sock->so_error;
255			sock->so_error = 0;
256		}
257	} else {
258		sock->so_state &= ~SS_ISCONNECTING;
259	}
260out:
261	socket_unlock(sock, 1);
262
263	if (sa != NULL && want_free == TRUE)
264		FREE(sa, M_SONAME);
265
266	return (error);
267}
268
269errno_t
270sock_connectwait(socket_t sock, const struct timeval *tv)
271{
272	lck_mtx_t *mutex_held;
273	errno_t	retval = 0;
274	struct timespec ts;
275
276	socket_lock(sock, 1);
277
278	/* Check if we're already connected or if we've already errored out */
279	if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
280		if (sock->so_error != 0) {
281			retval = sock->so_error;
282			sock->so_error = 0;
283		} else {
284			if ((sock->so_state & SS_ISCONNECTED) != 0)
285				retval = 0;
286			else
287				retval = EINVAL;
288		}
289		goto done;
290	}
291
292	/* copied translation from timeval to hertz from SO_RCVTIMEO handling */
293	if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
294	    tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
295		retval = EDOM;
296		goto done;
297	}
298
299	ts.tv_sec = tv->tv_sec;
300	ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
301	if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100  >  SHRT_MAX)  {
302		retval = EDOM;
303		goto done;
304	}
305
306	if (sock->so_proto->pr_getlock != NULL)
307		mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
308	else
309		mutex_held = sock->so_proto->pr_domain->dom_mtx;
310
311	msleep((caddr_t)&sock->so_timeo, mutex_held,
312	    PSOCK, "sock_connectwait", &ts);
313
314	/* Check if we're still waiting to connect */
315	if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
316		retval = EINPROGRESS;
317		goto done;
318	}
319
320	if (sock->so_error != 0) {
321		retval = sock->so_error;
322		sock->so_error = 0;
323	}
324
325done:
326	socket_unlock(sock, 1);
327	return (retval);
328}
329
330errno_t
331sock_nointerrupt(socket_t sock, int on)
332{
333	socket_lock(sock, 1);
334
335	if (on) {
336		sock->so_rcv.sb_flags |= SB_NOINTR;	/* This isn't safe */
337		sock->so_snd.sb_flags |= SB_NOINTR;	/* This isn't safe */
338	} else {
339		sock->so_rcv.sb_flags &= ~SB_NOINTR;	/* This isn't safe */
340		sock->so_snd.sb_flags &= ~SB_NOINTR;	/* This isn't safe */
341	}
342
343	socket_unlock(sock, 1);
344
345	return (0);
346}
347
348errno_t
349sock_getpeername(socket_t sock, struct sockaddr	*peername, int peernamelen)
350{
351	int error;
352	struct sockaddr	*sa = NULL;
353
354	if (sock == NULL || peername == NULL || peernamelen < 0)
355		return (EINVAL);
356
357	socket_lock(sock, 1);
358	if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
359		socket_unlock(sock, 1);
360		return (ENOTCONN);
361	}
362	error = sogetaddr_locked(sock, &sa, 1);
363	socket_unlock(sock, 1);
364	if (error == 0) {
365		if (peernamelen > sa->sa_len)
366			peernamelen = sa->sa_len;
367		memcpy(peername, sa, peernamelen);
368		FREE(sa, M_SONAME);
369	}
370	return (error);
371}
372
373errno_t
374sock_getsockname(socket_t sock, struct sockaddr	*sockname, int socknamelen)
375{
376	int error;
377	struct sockaddr	*sa = NULL;
378
379	if (sock == NULL || sockname == NULL || socknamelen < 0)
380		return (EINVAL);
381
382	socket_lock(sock, 1);
383	error = sogetaddr_locked(sock, &sa, 0);
384	socket_unlock(sock, 1);
385	if (error == 0) {
386		if (socknamelen > sa->sa_len)
387			socknamelen = sa->sa_len;
388		memcpy(sockname, sa, socknamelen);
389		FREE(sa, M_SONAME);
390	}
391	return (error);
392}
393
394__private_extern__ int
395sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
396{
397	int error;
398
399	if (so == NULL || psa == NULL)
400		return (EINVAL);
401
402	*psa = NULL;
403	error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
404	    so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
405
406	if (error == 0 && *psa == NULL) {
407		error = ENOMEM;
408	} else if (error != 0 && *psa != NULL) {
409		FREE(*psa, M_SONAME);
410		*psa = NULL;
411	}
412	return (error);
413}
414
415errno_t
416sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
417{
418	int error;
419
420	if (sock == NULL || psa == NULL)
421		return (EINVAL);
422
423	socket_lock(sock, 1);
424	error = sogetaddr_locked(sock, psa, peer);
425	socket_unlock(sock, 1);
426
427	return (error);
428}
429
430void
431sock_freeaddr(struct sockaddr *sa)
432{
433	if (sa != NULL)
434		FREE(sa, M_SONAME);
435}
436
437errno_t
438sock_getsockopt(socket_t sock, int level, int optname, void *optval,
439    int	*optlen)
440{
441	int error = 0;
442	struct sockopt	sopt;
443
444	if (sock == NULL || optval == NULL || optlen == NULL)
445		return (EINVAL);
446
447	sopt.sopt_dir = SOPT_GET;
448	sopt.sopt_level = level;
449	sopt.sopt_name = optname;
450	sopt.sopt_val = CAST_USER_ADDR_T(optval);
451	sopt.sopt_valsize = *optlen;
452	sopt.sopt_p = kernproc;
453	error = sogetoptlock(sock, &sopt, 1);	/* will lock socket */
454	if (error == 0)
455		*optlen = sopt.sopt_valsize;
456	return (error);
457}
458
459errno_t
460sock_ioctl(socket_t sock, unsigned long request, void *argp)
461{
462	return (soioctl(sock, request, argp, kernproc)); /* will lock socket */
463}
464
465errno_t
466sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
467    int	optlen)
468{
469	struct sockopt	sopt;
470
471	if (sock == NULL || optval == NULL)
472		return (EINVAL);
473
474	sopt.sopt_dir = SOPT_SET;
475	sopt.sopt_level = level;
476	sopt.sopt_name = optname;
477	sopt.sopt_val = CAST_USER_ADDR_T(optval);
478	sopt.sopt_valsize = optlen;
479	sopt.sopt_p = kernproc;
480	return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */
481}
482
483/*
484 * This follows the recommended mappings between DSCP code points
485 * and WMM access classes.
486 */
487static u_int32_t so_tc_from_dscp(u_int8_t dscp);
488static u_int32_t
489so_tc_from_dscp(u_int8_t dscp)
490{
491	u_int32_t tc;
492
493	if (dscp >= 0x30 && dscp <= 0x3f)
494		tc = SO_TC_VO;
495	else if (dscp >= 0x20 && dscp <= 0x2f)
496		tc = SO_TC_VI;
497	else if (dscp >= 0x08 && dscp <= 0x17)
498		tc = SO_TC_BK;
499	else
500		tc = SO_TC_BE;
501
502	return (tc);
503}
504
505errno_t
506sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
507{
508	errno_t error = 0;
509	struct sockopt sopt;
510	int sotc;
511
512	if (sock == NULL || optval == NULL || optlen != sizeof (int))
513		return (EINVAL);
514
515	socket_lock(sock, 1);
516	if (!(sock->so_state & SS_ISCONNECTED)) {
517		/*
518		 * If the socket is not connected then we don't know
519		 * if the destination is on LAN  or not. Skip
520		 * setting traffic class in this case
521		 */
522		error = ENOTCONN;
523		goto out;
524	}
525
526	if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
527	    sock->so_pcb == NULL) {
528		error = EINVAL;
529		goto out;
530	}
531
532	/*
533	 * Set the socket traffic class based on the passed DSCP code point
534	 * regardless of the scope of the destination
535	 */
536	sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
537
538	sopt.sopt_dir = SOPT_SET;
539	sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
540	sopt.sopt_valsize = sizeof (sotc);
541	sopt.sopt_p = kernproc;
542	sopt.sopt_level = SOL_SOCKET;
543	sopt.sopt_name = SO_TRAFFIC_CLASS;
544
545	error = sosetoptlock(sock, &sopt, 0);	/* already locked */
546
547	if (error != 0) {
548		printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
549		    __func__, error);
550		goto out;
551	}
552
553	/*
554	 * Check if the destination address is LAN or link local address.
555	 * We do not want to set traffic class bits if the destination
556	 * is not local.
557	 */
558	if (!so_isdstlocal(sock))
559		goto out;
560
561	sopt.sopt_dir = SOPT_SET;
562	sopt.sopt_val = CAST_USER_ADDR_T(optval);
563	sopt.sopt_valsize = optlen;
564	sopt.sopt_p = kernproc;
565
566	switch (SOCK_DOM(sock)) {
567	case PF_INET:
568		sopt.sopt_level = IPPROTO_IP;
569		sopt.sopt_name = IP_TOS;
570		break;
571	case PF_INET6:
572		sopt.sopt_level = IPPROTO_IPV6;
573		sopt.sopt_name = IPV6_TCLASS;
574		break;
575	default:
576		error = EINVAL;
577		goto out;
578	}
579
580	error = sosetoptlock(sock, &sopt, 0);	/* already locked */
581	socket_unlock(sock, 1);
582	return (error);
583out:
584	socket_unlock(sock, 1);
585	return (error);
586}
587
588errno_t
589sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
590{
591	errno_t error = 0;
592	struct sockopt sopt;
593
594	if (sock == NULL || optval == NULL || optlen == NULL)
595		return (EINVAL);
596
597	sopt.sopt_dir = SOPT_GET;
598	sopt.sopt_val = CAST_USER_ADDR_T(optval);
599	sopt.sopt_valsize = *optlen;
600	sopt.sopt_p = kernproc;
601
602	socket_lock(sock, 1);
603	if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
604		socket_unlock(sock, 1);
605		return (EINVAL);
606	}
607
608	switch (SOCK_DOM(sock)) {
609	case PF_INET:
610		sopt.sopt_level = IPPROTO_IP;
611		sopt.sopt_name = IP_TOS;
612		break;
613	case PF_INET6:
614		sopt.sopt_level = IPPROTO_IPV6;
615		sopt.sopt_name = IPV6_TCLASS;
616		break;
617	default:
618		socket_unlock(sock, 1);
619		return (EINVAL);
620
621	}
622	error = sogetoptlock(sock, &sopt, 0);	/* already locked */
623	socket_unlock(sock, 1);
624	if (error == 0)
625		*optlen = sopt.sopt_valsize;
626	return (error);
627}
628
629errno_t
630sock_listen(socket_t sock, int backlog)
631{
632	if (sock == NULL)
633		return (EINVAL);
634
635	return (solisten(sock, backlog)); /* will lock socket */
636}
637
638errno_t
639sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
640    int flags, size_t *recvdlen)
641{
642	uio_t auio;
643	struct mbuf *control = NULL;
644	int error = 0;
645	int length = 0;
646	struct sockaddr	*fromsa = NULL;
647	char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
648
649	if (sock == NULL)
650		return (EINVAL);
651
652	auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
653	    0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf));
654	if (msg != NULL && data == NULL) {
655		int i;
656		struct iovec *tempp = msg->msg_iov;
657
658		for (i = 0; i < msg->msg_iovlen; i++) {
659			uio_addiov(auio,
660			    CAST_USER_ADDR_T((tempp + i)->iov_base),
661			    (tempp + i)->iov_len);
662		}
663		if (uio_resid(auio) < 0)
664			return (EINVAL);
665	} else if (recvdlen != NULL) {
666		uio_setresid(auio, (uio_resid(auio) + *recvdlen));
667	}
668	length = uio_resid(auio);
669
670	if (recvdlen != NULL)
671		*recvdlen = 0;
672
673	/* let pru_soreceive handle the socket locking */
674	error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
675	    data, (msg && msg->msg_control) ? &control : NULL, &flags);
676	if (error != 0)
677		goto cleanup;
678
679	if (recvdlen != NULL)
680		*recvdlen = length - uio_resid(auio);
681	if (msg != NULL) {
682		msg->msg_flags = flags;
683
684		if (msg->msg_name != NULL) {
685			int salen;
686			salen = msg->msg_namelen;
687			if (msg->msg_namelen > 0 && fromsa != NULL) {
688				salen = MIN(salen, fromsa->sa_len);
689				memcpy(msg->msg_name, fromsa,
690				    msg->msg_namelen > fromsa->sa_len ?
691				    fromsa->sa_len : msg->msg_namelen);
692			}
693		}
694
695		if (msg->msg_control != NULL) {
696			struct mbuf *m = control;
697			u_char *ctlbuf = msg->msg_control;
698			int clen = msg->msg_controllen;
699
700			msg->msg_controllen = 0;
701
702			while (m != NULL && clen > 0) {
703				unsigned int tocopy;
704
705				if (clen >= m->m_len) {
706					tocopy = m->m_len;
707				} else {
708					msg->msg_flags |= MSG_CTRUNC;
709					tocopy = clen;
710				}
711				memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
712				ctlbuf += tocopy;
713				clen -= tocopy;
714				m = m->m_next;
715			}
716			msg->msg_controllen =
717			    (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
718		}
719	}
720
721cleanup:
722	if (control != NULL)
723		m_freem(control);
724	if (fromsa != NULL)
725		FREE(fromsa, M_SONAME);
726	return (error);
727}
728
729errno_t
730sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
731{
732	if ((msg == NULL) || (msg->msg_iovlen < 1) ||
733	    (msg->msg_iov[0].iov_len == 0) ||
734	    (msg->msg_iov[0].iov_base == NULL))
735		return (EINVAL);
736
737	return (sock_receive_internal(sock, msg, NULL, flags, recvdlen));
738}
739
740errno_t
741sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
742    size_t *recvlen)
743{
744	if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
745	    (msg->msg_iov != NULL || msg->msg_iovlen != 0)))
746		return (EINVAL);
747
748	return (sock_receive_internal(sock, msg, data, flags, recvlen));
749}
750
751errno_t
752sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
753    int flags, size_t *sentlen)
754{
755	uio_t auio = NULL;
756	struct mbuf *control = NULL;
757	int error = 0;
758	int datalen = 0;
759	char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
760
761	if (sock == NULL) {
762		error = EINVAL;
763		goto errorout;
764	}
765
766	if (data == NULL && msg != NULL) {
767		struct iovec *tempp = msg->msg_iov;
768
769		auio = uio_createwithbuffer(msg->msg_iovlen, 0,
770		    UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf));
771		if (tempp != NULL) {
772			int i;
773
774			for (i = 0; i < msg->msg_iovlen; i++) {
775				uio_addiov(auio,
776				    CAST_USER_ADDR_T((tempp + i)->iov_base),
777				    (tempp + i)->iov_len);
778			}
779
780			if (uio_resid(auio) < 0) {
781				error = EINVAL;
782				goto errorout;
783			}
784		}
785	}
786
787	if (sentlen != NULL)
788		*sentlen = 0;
789
790	if (auio != NULL)
791		datalen = uio_resid(auio);
792	else
793		datalen = data->m_pkthdr.len;
794
795	if (msg != NULL && msg->msg_control) {
796		if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) {
797			error = EINVAL;
798			goto errorout;
799		}
800
801		if ((size_t)msg->msg_controllen > MLEN) {
802			error = EINVAL;
803			goto errorout;
804		}
805
806		control = m_get(M_NOWAIT, MT_CONTROL);
807		if (control == NULL) {
808			error = ENOMEM;
809			goto errorout;
810		}
811		memcpy(mtod(control, caddr_t), msg->msg_control,
812		    msg->msg_controllen);
813		control->m_len = msg->msg_controllen;
814	}
815
816	error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
817	    (struct sockaddr *)msg->msg_name : NULL, auio, data,
818	    control, flags);
819
820	/*
821	 * Residual data is possible in the case of IO vectors but not
822	 * in the mbuf case since the latter is treated as atomic send.
823	 * If pru_sosend() consumed a portion of the iovecs data and
824	 * the error returned is transient, treat it as success; this
825	 * is consistent with sendit() behavior.
826	 */
827	if (auio != NULL && uio_resid(auio) != datalen &&
828	    (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
829		error = 0;
830
831	if (error == 0 && sentlen != NULL) {
832		if (auio != NULL)
833			*sentlen = datalen - uio_resid(auio);
834		else
835			*sentlen = datalen;
836	}
837
838	return (error);
839
840/*
841 * In cases where we detect an error before returning, we need to
842 * free the mbuf chain if there is one. sosend (and pru_sosend) will
843 * free the mbuf chain if they encounter an error.
844 */
845errorout:
846	if (control)
847		m_freem(control);
848	if (data)
849		m_freem(data);
850	if (sentlen)
851		*sentlen = 0;
852	return (error);
853}
854
855errno_t
856sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
857{
858	if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
859		return (EINVAL);
860
861	return (sock_send_internal(sock, msg, NULL, flags, sentlen));
862}
863
864errno_t
865sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
866    int	flags, size_t *sentlen)
867{
868	if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
869	    msg->msg_iovlen != 0))) {
870		if (data != NULL)
871			m_freem(data);
872		return (EINVAL);
873	}
874	return (sock_send_internal(sock, msg, data, flags, sentlen));
875}
876
877errno_t
878sock_shutdown(socket_t sock, int how)
879{
880	if (sock == NULL)
881		return (EINVAL);
882
883	return (soshutdown(sock, how));
884}
885
886
887errno_t
888sock_socket(int	domain, int type, int protocol, sock_upcall callback,
889    void *context, socket_t *new_so)
890{
891	int error = 0;
892
893	if (new_so == NULL)
894		return (EINVAL);
895
896	/* socreate will create an initial so_count */
897	error = socreate(domain, new_so, type, protocol);
898	if (error == 0) {
899		/* see comments in sock_setupcall() */
900		if (callback != NULL) {
901			sock_setupcalls_common(*new_so, callback, context,
902			    NULL, NULL);
903		}
904		/*
905		 * last_pid and last_upid should be zero for sockets
906		 * created using sock_socket
907		 */
908		(*new_so)->last_pid = 0;
909		(*new_so)->last_upid = 0;
910	}
911	return (error);
912}
913
914void
915sock_close(socket_t sock)
916{
917	if (sock == NULL)
918		return;
919
920	soclose(sock);
921}
922
923/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
924void
925sock_retain(socket_t sock)
926{
927	if (sock == NULL)
928		return;
929
930	socket_lock(sock, 1);
931	sock->so_retaincnt++;
932	sock->so_usecount++;	/* add extra reference for holding the socket */
933	socket_unlock(sock, 1);
934}
935
936/* Do we want this to be APPLE_PRIVATE API? */
937void
938sock_release(socket_t sock)
939{
940	if (sock == NULL)
941		return;
942
943	socket_lock(sock, 1);
944	if (sock->so_upcallusecount > 0)
945		soclose_wait_locked(sock);
946
947	sock->so_retaincnt--;
948	if (sock->so_retaincnt < 0) {
949		panic("%s: negative retain count (%d) for sock=%p\n",
950		    __func__, sock->so_retaincnt, sock);
951		/* NOTREACHED */
952	}
953	if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2)) {
954		/* close socket only if the FD is not holding it */
955		soclose_locked(sock);
956	} else {
957		/* remove extra reference holding the socket */
958		sock->so_usecount--;
959	}
960	socket_unlock(sock, 1);
961}
962
963errno_t
964sock_setpriv(socket_t sock, int on)
965{
966	if (sock == NULL)
967		return (EINVAL);
968
969	socket_lock(sock, 1);
970	if (on)
971		sock->so_state |= SS_PRIV;
972	else
973		sock->so_state &= ~SS_PRIV;
974	socket_unlock(sock, 1);
975	return (0);
976}
977
978int
979sock_isconnected(socket_t sock)
980{
981	int retval;
982
983	socket_lock(sock, 1);
984	retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
985	socket_unlock(sock, 1);
986	return (retval);
987}
988
989int
990sock_isnonblocking(socket_t sock)
991{
992	int retval;
993
994	socket_lock(sock, 1);
995	retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
996	socket_unlock(sock, 1);
997	return (retval);
998}
999
1000errno_t
1001sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1002{
1003	socket_lock(sock, 1);
1004	if (outDomain != NULL)
1005		*outDomain = SOCK_DOM(sock);
1006	if (outType != NULL)
1007		*outType = sock->so_type;
1008	if (outProtocol != NULL)
1009		*outProtocol = SOCK_PROTO(sock);
1010	socket_unlock(sock, 1);
1011	return (0);
1012}
1013
1014/*
1015 * Return the listening socket of a pre-accepted socket.  It returns the
1016 * listener (so_head) value of a given socket.  This is intended to be
1017 * called by a socket filter during a filter attach (sf_attach) callback.
1018 * The value returned by this routine is safe to be used only in the
1019 * context of that callback, because we hold the listener's lock across
1020 * the sflt_initsock() call.
1021 */
1022socket_t
1023sock_getlistener(socket_t sock)
1024{
1025	return (sock->so_head);
1026}
1027
1028static inline void
1029sock_set_tcp_stream_priority(socket_t sock)
1030{
1031	if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1032	    SOCK_TYPE(sock) == SOCK_STREAM) {
1033		set_tcp_stream_priority(sock);
1034	}
1035}
1036
1037/*
1038 * Caller must have ensured socket is valid and won't be going away.
1039 */
1040void
1041socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
1042{
1043	(void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags);
1044	sock_set_tcp_stream_priority(sock);
1045}
1046
1047void
1048socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
1049{
1050	socket_lock(sock, 1);
1051	socket_set_traffic_mgt_flags_locked(sock, flags);
1052	socket_unlock(sock, 1);
1053}
1054
1055/*
1056 * Caller must have ensured socket is valid and won't be going away.
1057 */
1058void
1059socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
1060{
1061	(void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags);
1062	sock_set_tcp_stream_priority(sock);
1063}
1064
1065void
1066socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags)
1067{
1068	socket_lock(sock, 1);
1069	socket_clear_traffic_mgt_flags_locked(sock, flags);
1070	socket_unlock(sock, 1);
1071}
1072
1073
1074/*
1075 * Caller must have ensured socket is valid and won't be going away.
1076 */
1077errno_t
1078socket_defunct(struct proc *p, socket_t so, int level)
1079{
1080	errno_t retval;
1081
1082	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1083	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1084		return (EINVAL);
1085
1086	socket_lock(so, 1);
1087	/*
1088	 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1089	 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1090	 * that this is an implementation artifact of mDNSResponder.  We do
1091	 * a quick test against the socket buffers for SB_UNIX, since that
1092	 * would have been set by unp_attach() at socket creation time.
1093	 */
1094	if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1095	    (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1096		socket_unlock(so, 1);
1097		return (EOPNOTSUPP);
1098	}
1099	retval = sosetdefunct(p, so, level, TRUE);
1100	if (retval == 0)
1101		retval = sodefunct(p, so, level);
1102	socket_unlock(so, 1);
1103	return (retval);
1104}
1105
1106static void
1107sock_setupcalls_common(socket_t sock, sock_upcall rcallback, void *rcontext,
1108    sock_upcall wcallback, void *wcontext)
1109{
1110	if (rcallback != NULL) {
1111		sock->so_rcv.sb_flags |= SB_UPCALL;
1112		sock->so_rcv.sb_upcall = rcallback;
1113		sock->so_rcv.sb_upcallarg = rcontext;
1114	} else {
1115		sock->so_rcv.sb_flags &= ~SB_UPCALL;
1116		sock->so_rcv.sb_upcall = NULL;
1117		sock->so_rcv.sb_upcallarg = NULL;
1118	}
1119
1120	if (wcallback != NULL) {
1121		sock->so_snd.sb_flags |= SB_UPCALL;
1122		sock->so_snd.sb_upcall = wcallback;
1123		sock->so_snd.sb_upcallarg = wcontext;
1124	} else {
1125		sock->so_snd.sb_flags &= ~SB_UPCALL;
1126		sock->so_snd.sb_upcall = NULL;
1127		sock->so_snd.sb_upcallarg = NULL;
1128	}
1129}
1130
1131errno_t
1132sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1133{
1134	if (sock == NULL)
1135		return (EINVAL);
1136
1137	/*
1138	 * Note that we don't wait for any in progress upcall to complete.
1139	 * On embedded, sock_setupcall() causes both read and write
1140	 * callbacks to be set; on desktop, only read callback is set
1141	 * to maintain legacy KPI behavior.
1142	 *
1143	 * The newer sock_setupcalls() KPI should be used instead to set
1144	 * the read and write callbacks and their respective parameters.
1145	 */
1146	socket_lock(sock, 1);
1147	sock_setupcalls_common(sock, callback, context, NULL, NULL);
1148	socket_unlock(sock, 1);
1149
1150	return (0);
1151}
1152
1153errno_t
1154sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1155    sock_upcall wcallback, void *wcontext)
1156{
1157	if (sock == NULL)
1158		return (EINVAL);
1159
1160	/*
1161	 * Note that we don't wait for any in progress upcall to complete.
1162	 */
1163	socket_lock(sock, 1);
1164	sock_setupcalls_common(sock, rcallback, rcontext, wcallback, wcontext);
1165	socket_unlock(sock, 1);
1166
1167	return (0);
1168}
1169
1170errno_t
1171sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1172    u_int32_t emask)
1173{
1174	if (sock == NULL)
1175		return (EINVAL);
1176
1177	/*
1178	 * Note that we don't wait for any in progress upcall to complete.
1179	 */
1180	socket_lock(sock, 1);
1181	if (ecallback != NULL) {
1182		sock->so_event = ecallback;
1183		sock->so_eventarg = econtext;
1184		sock->so_eventmask = emask;
1185	} else {
1186		sock->so_event = NULL;
1187		sock->so_eventarg = NULL;
1188		sock->so_eventmask = 0;
1189	}
1190	socket_unlock(sock, 1);
1191
1192	return (0);
1193}
1194