1/*
2 * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#define __KPI__
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/types.h>
33#include <sys/socket.h>
34#include <sys/socketvar.h>
35#include <sys/param.h>
36#include <sys/proc.h>
37#include <sys/errno.h>
38#include <sys/malloc.h>
39#include <sys/protosw.h>
40#include <sys/domain.h>
41#include <sys/mbuf.h>
42#include <sys/fcntl.h>
43#include <sys/filio.h>
44#include <sys/uio_internal.h>
45#include <kern/lock.h>
46#include <netinet/in.h>
47#include <libkern/OSAtomic.h>
48
49extern int soclose_locked(struct socket *so);
50extern void soclose_wait_locked(struct socket *so);
51extern int so_isdstlocal(struct socket *so);
52
53errno_t sock_send_internal(
54	socket_t			sock,
55	const struct msghdr	*msg,
56	mbuf_t				data,
57	int					flags,
58	size_t				*sentlen);
59
60typedef	void	(*so_upcall)(struct socket *, caddr_t , int );
61
62
63errno_t
64sock_accept(
65	socket_t		sock,
66	struct sockaddr	*from,
67	int				fromlen,
68	int				flags,
69	sock_upcall		callback,
70	void*			cookie,
71	socket_t		*new_sock)
72{
73	struct sockaddr *sa;
74	struct socket *new_so;
75	lck_mtx_t *mutex_held;
76	int 	dosocklock;
77	errno_t	error = 0;
78
79	if (sock == NULL || new_sock == NULL) return EINVAL;
80	socket_lock(sock, 1);
81	if ((sock->so_options & SO_ACCEPTCONN) == 0) {
82		socket_unlock(sock, 1);
83	       	return EINVAL;
84	}
85	if ((flags & ~(MSG_DONTWAIT)) != 0) {
86		socket_unlock(sock, 1);
87		return ENOTSUP;
88	}
89	if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
90		sock->so_comp.tqh_first == NULL) {
91		socket_unlock(sock, 1);
92		return EWOULDBLOCK;
93	}
94
95	if (sock->so_proto->pr_getlock != NULL)  {
96		mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
97		dosocklock = 1;
98	}
99	else {
100		mutex_held = sock->so_proto->pr_domain->dom_mtx;
101		dosocklock = 0;
102	}
103
104	while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
105		if (sock->so_state & SS_CANTRCVMORE) {
106			sock->so_error = ECONNABORTED;
107			break;
108		}
109		error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH, "sock_accept", NULL);
110		if (error) {
111			socket_unlock(sock, 1);
112			return (error);
113		}
114	}
115	if (sock->so_error) {
116		error = sock->so_error;
117		sock->so_error = 0;
118		socket_unlock(sock, 1);
119		return (error);
120	}
121
122	new_so = TAILQ_FIRST(&sock->so_comp);
123	TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
124	sock->so_qlen--;
125
126	/*
127	 * Pass the pre-accepted socket to any interested socket filter(s).
128	 * Upon failure, the socket would have been closed by the callee.
129	 */
130	if (new_so->so_filt != NULL) {
131		/*
132		 * Temporarily drop the listening socket's lock before we
133		 * hand off control over to the socket filter(s), but keep
134		 * a reference so that it won't go away.  We'll grab it
135		 * again once we're done with the filter(s).
136		 */
137		socket_unlock(sock, 0);
138		if ((error = soacceptfilter(new_so)) != 0) {
139			/* Drop reference on listening socket */
140			sodereference(sock);
141			return (error);
142		}
143		socket_lock(sock, 0);
144	}
145
146	if (dosocklock)	{
147		lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0),
148					   LCK_MTX_ASSERT_NOTOWNED);
149		socket_lock(new_so, 1);
150	}
151
152	new_so->so_state &= ~SS_COMP;
153	new_so->so_head = NULL;
154	(void) soacceptlock(new_so, &sa, 0);
155
156	socket_unlock(sock, 1);	/* release the head */
157
158	if (callback) {
159		new_so->so_upcall = (so_upcall) callback;
160		new_so->so_upcallarg = cookie;
161		new_so->so_rcv.sb_flags |= SB_UPCALL;
162#if CONFIG_SOWUPCALL
163		new_so->so_snd.sb_flags |= SB_UPCALL;
164#endif
165	}
166
167	if (sa && from)
168	{
169		if (fromlen > sa->sa_len) fromlen = sa->sa_len;
170		memcpy(from, sa, fromlen);
171	}
172	if (sa) FREE(sa, M_SONAME);
173
174	/*
175	 * If the socket has been marked as inactive by sosetdefunct(),
176	 * disallow further operations on it.
177	 */
178	if (new_so->so_flags & SOF_DEFUNCT) {
179		(void) sodefunct(current_proc(), new_so,
180		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
181	}
182	*new_sock = new_so;
183	if (dosocklock)
184		socket_unlock(new_so, 1);
185	return error;
186}
187
188errno_t
189sock_bind(
190	socket_t				sock,
191	const struct sockaddr	*to)
192{
193	int	error = 0;
194	struct sockaddr *sa = NULL;
195	struct sockaddr_storage ss;
196	boolean_t want_free = TRUE;
197
198	if (sock == NULL || to == NULL)
199		return EINVAL;
200
201	if (to->sa_len > sizeof(ss)) {
202		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
203		if (sa == NULL)
204			return ENOBUFS;
205	} else {
206		sa = (struct sockaddr *)&ss;
207		want_free = FALSE;
208	}
209	memcpy(sa, to, to->sa_len);
210
211	error = sobind(sock, sa);
212
213	if (sa != NULL && want_free == TRUE)
214		FREE(sa, M_SONAME);
215
216	return error;
217}
218
219errno_t
220sock_connect(
221	socket_t				sock,
222	const struct sockaddr	*to,
223	int						flags)
224{
225	int	error = 0;
226	lck_mtx_t *mutex_held;
227	struct sockaddr *sa = NULL;
228	struct sockaddr_storage ss;
229	boolean_t want_free = TRUE;
230
231	if (sock == NULL || to == NULL) return EINVAL;
232
233	if (to->sa_len > sizeof(ss)) {
234		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
235			(flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
236		if (sa == NULL)
237			return ENOBUFS;
238	} else {
239		sa = (struct sockaddr *)&ss;
240		want_free = FALSE;
241	}
242	memcpy(sa, to, to->sa_len);
243
244	socket_lock(sock, 1);
245
246	if ((sock->so_state & SS_ISCONNECTING) &&
247		((sock->so_state & SS_NBIO) != 0 ||
248		 (flags & MSG_DONTWAIT) != 0)) {
249		error = EALREADY;
250		goto out;
251	}
252	error = soconnectlock(sock, sa, 0);
253	if (!error) {
254		if ((sock->so_state & SS_ISCONNECTING) &&
255			((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
256			error = EINPROGRESS;
257			goto out;
258		}
259
260		if (sock->so_proto->pr_getlock != NULL)
261			mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
262		else
263               		mutex_held = sock->so_proto->pr_domain->dom_mtx;
264
265		while ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
266			error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH,
267				"sock_connect", NULL);
268			if (error)
269				break;
270		}
271
272		if (error == 0) {
273			error = sock->so_error;
274			sock->so_error = 0;
275		}
276	}
277	else {
278		sock->so_state &= ~SS_ISCONNECTING;
279	}
280out:
281	socket_unlock(sock, 1);
282
283	if (sa != NULL && want_free == TRUE)
284		FREE(sa, M_SONAME);
285
286	return error;
287}
288
289errno_t
290sock_connectwait(
291	socket_t				sock,
292	const struct timeval	*tv)
293{
294	lck_mtx_t * mutex_held;
295	errno_t	retval = 0;
296	struct timespec ts;
297
298	socket_lock(sock, 1);
299
300	// Check if we're already connected or if we've already errored out
301	if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error) {
302		if (sock->so_error) {
303			retval = sock->so_error;
304			sock->so_error = 0;
305		}
306		else {
307			if ((sock->so_state & SS_ISCONNECTED) != 0)
308				retval = 0;
309			else
310				retval = EINVAL;
311		}
312		goto done;
313	}
314
315	// copied translation from timeval to hertz from SO_RCVTIMEO handling
316	if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
317		tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
318		retval = EDOM;
319		goto done;
320	}
321
322	ts.tv_sec = tv->tv_sec;
323	ts.tv_nsec = (tv->tv_usec * NSEC_PER_USEC);
324	if ( (ts.tv_sec + (ts.tv_nsec/NSEC_PER_SEC))/100  >  SHRT_MAX)  {
325		retval = EDOM;
326		goto done;
327	}
328
329	if (sock->so_proto->pr_getlock != NULL)
330		mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
331	else
332          	mutex_held = sock->so_proto->pr_domain->dom_mtx;
333
334	msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK, "sock_connectwait", &ts);
335
336	// Check if we're still waiting to connect
337	if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
338		retval = EINPROGRESS;
339		goto done;
340	}
341
342	if (sock->so_error) {
343		retval = sock->so_error;
344		sock->so_error = 0;
345	}
346
347done:
348	socket_unlock(sock, 1);
349	return retval;
350}
351
352errno_t
353sock_nointerrupt(
354	socket_t	sock,
355	int			on)
356{
357	socket_lock(sock, 1);
358
359	if (on) {
360		sock->so_rcv.sb_flags |= SB_NOINTR; // This isn't safe
361		sock->so_snd.sb_flags |= SB_NOINTR; // This isn't safe
362	}
363	else {
364		sock->so_rcv.sb_flags &= ~SB_NOINTR; // This isn't safe
365		sock->so_snd.sb_flags &= ~SB_NOINTR; // This isn't safe
366	}
367
368	socket_unlock(sock, 1);
369
370	return 0;
371}
372
373errno_t
374sock_getpeername(socket_t sock, struct sockaddr	*peername, int peernamelen)
375{
376	int error;
377	struct sockaddr	*sa = NULL;
378
379	if (sock == NULL || peername == NULL || peernamelen < 0)
380		return (EINVAL);
381
382	socket_lock(sock, 1);
383	if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
384		socket_unlock(sock, 1);
385		return (ENOTCONN);
386	}
387	error = sogetaddr_locked(sock, &sa, 1);
388	socket_unlock(sock, 1);
389	if (error == 0) {
390		if (peernamelen > sa->sa_len)
391			peernamelen = sa->sa_len;
392		memcpy(peername, sa, peernamelen);
393		FREE(sa, M_SONAME);
394	}
395	return (error);
396}
397
398errno_t
399sock_getsockname(socket_t sock, struct sockaddr	*sockname, int socknamelen)
400{
401	int error;
402	struct sockaddr	*sa = NULL;
403
404	if (sock == NULL || sockname == NULL || socknamelen < 0)
405		return (EINVAL);
406
407	socket_lock(sock, 1);
408	error = sogetaddr_locked(sock, &sa, 0);
409	socket_unlock(sock, 1);
410	if (error == 0) {
411		if (socknamelen > sa->sa_len)
412			socknamelen = sa->sa_len;
413		memcpy(sockname, sa, socknamelen);
414		FREE(sa, M_SONAME);
415	}
416	return (error);
417}
418
419__private_extern__ int
420sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
421{
422	int error;
423
424	if (so == NULL || psa == NULL)
425		return (EINVAL);
426
427	*psa = NULL;
428	error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
429	    so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
430
431	if (error == 0 && *psa == NULL) {
432		error = ENOMEM;
433	} else if (error != 0 && *psa != NULL) {
434		FREE(*psa, M_SONAME);
435		*psa = NULL;
436	}
437	return (error);
438}
439
440errno_t
441sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
442{
443	int error;
444
445	if (sock == NULL || psa == NULL)
446		return (EINVAL);
447
448	socket_lock(sock, 1);
449	error = sogetaddr_locked(sock, psa, peer);
450	socket_unlock(sock, 1);
451
452	return (error);
453}
454
455void
456sock_freeaddr(struct sockaddr *sa)
457{
458	if (sa != NULL)
459		FREE(sa, M_SONAME);
460}
461
462errno_t
463sock_getsockopt(
464	socket_t	sock,
465	int			level,
466	int			optname,
467	void		*optval,
468	int			*optlen)
469{
470	int				error = 0;
471	struct sockopt	sopt;
472
473	if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL;
474	sopt.sopt_dir = SOPT_GET;
475	sopt.sopt_level = level;
476	sopt.sopt_name = optname;
477	sopt.sopt_val = CAST_USER_ADDR_T(optval);
478	sopt.sopt_valsize = *optlen;
479	sopt.sopt_p = kernproc;
480	error = sogetopt(sock, &sopt); /* will lock socket */
481	if (error == 0) *optlen = sopt.sopt_valsize;
482	return error;
483}
484
485errno_t
486sock_ioctl(
487	socket_t	sock,
488	unsigned long request,
489	void *argp)
490{
491	return soioctl(sock, request, argp, kernproc); /* will lock socket */
492}
493
494errno_t
495sock_setsockopt(
496	socket_t	sock,
497	int			level,
498	int			optname,
499	const void	*optval,
500	int			optlen)
501{
502	struct sockopt	sopt;
503
504	if (sock == NULL || optval == NULL) return EINVAL;
505	sopt.sopt_dir = SOPT_SET;
506	sopt.sopt_level = level;
507	sopt.sopt_name = optname;
508	sopt.sopt_val = CAST_USER_ADDR_T(optval);
509	sopt.sopt_valsize = optlen;
510	sopt.sopt_p = kernproc;
511	return sosetopt(sock, &sopt); /* will lock socket */
512}
513
514/*
515 * This follows the recommended mappings between DSCP code points and WMM access classes
516 */
517static u_int32_t so_tc_from_dscp(u_int8_t dscp);
518static u_int32_t
519so_tc_from_dscp(u_int8_t dscp)
520{
521	u_int32_t tc;
522
523	if (dscp >= 0x30 && dscp <= 0x3f)
524		tc = SO_TC_VO;
525	else if (dscp >= 0x20 && dscp <= 0x2f)
526		tc = SO_TC_VI;
527	else if (dscp >= 0x08 && dscp <= 0x17)
528		tc = SO_TC_BK;
529	else
530		tc = SO_TC_BE;
531
532	return (tc);
533}
534
535errno_t
536sock_settclassopt(
537	socket_t	sock,
538	const void	*optval,
539	size_t		optlen) {
540
541	errno_t error = 0;
542	struct sockopt sopt;
543	int sotc;
544
545	if (sock == NULL || optval == NULL || optlen != sizeof(int)) return EINVAL;
546
547	socket_lock(sock, 1);
548	if (!(sock->so_state & SS_ISCONNECTED)) {
549		/* If the socket is not connected then we don't know
550		 * if the destination is on LAN  or not. Skip
551		 * setting traffic class in this case
552		 */
553		error = ENOTCONN;
554		goto out;
555	}
556
557	if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL || sock->so_pcb == NULL) {
558		error = EINVAL;
559		goto out;
560	}
561
562	/*
563	 * Set the socket traffic class based on the passed DSCP code point
564	 * regardless of the scope of the destination
565	 */
566	sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
567
568	sopt.sopt_dir = SOPT_SET;
569	sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
570	sopt.sopt_valsize = sizeof(sotc);
571	sopt.sopt_p = kernproc;
572	sopt.sopt_level = SOL_SOCKET;
573	sopt.sopt_name = SO_TRAFFIC_CLASS;
574
575	socket_unlock(sock, 0);
576	error = sosetopt(sock, &sopt);
577	socket_lock(sock, 0);
578
579	if (error != 0) {
580		printf("sock_settclassopt: sosetopt SO_TRAFFIC_CLASS failed %d\n", error);
581		goto out;
582	}
583
584	/* Check if the destination address is LAN or link local address.
585	 * We do not want to set traffic class bits if the destination
586	 * is not local
587	 */
588	if (!so_isdstlocal(sock)) {
589		goto out;
590	}
591
592	sopt.sopt_dir = SOPT_SET;
593	sopt.sopt_val = CAST_USER_ADDR_T(optval);
594	sopt.sopt_valsize = optlen;
595	sopt.sopt_p = kernproc;
596
597	switch (sock->so_proto->pr_domain->dom_family) {
598	case AF_INET:
599		sopt.sopt_level = IPPROTO_IP;
600		sopt.sopt_name = IP_TOS;
601		break;
602	case AF_INET6:
603		sopt.sopt_level = IPPROTO_IPV6;
604		sopt.sopt_name = IPV6_TCLASS;
605		break;
606	default:
607		error = EINVAL;
608		goto out;
609	}
610
611	socket_unlock(sock, 1);
612	return sosetopt(sock, &sopt);
613out:
614	socket_unlock(sock, 1);
615	return error;
616}
617
618errno_t
619sock_gettclassopt(
620	socket_t	sock,
621	void		*optval,
622	size_t		*optlen) {
623
624	errno_t		error = 0;
625	struct sockopt	sopt;
626
627	if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL;
628
629	sopt.sopt_dir = SOPT_GET;
630	sopt.sopt_val = CAST_USER_ADDR_T(optval);
631	sopt.sopt_valsize = *optlen;
632	sopt.sopt_p = kernproc;
633
634	socket_lock(sock, 1);
635	if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
636		socket_unlock(sock, 1);
637		return EINVAL;
638	}
639
640	switch (sock->so_proto->pr_domain->dom_family) {
641	case AF_INET:
642		sopt.sopt_level = IPPROTO_IP;
643		sopt.sopt_name = IP_TOS;
644		break;
645	case AF_INET6:
646		sopt.sopt_level = IPPROTO_IPV6;
647		sopt.sopt_name = IPV6_TCLASS;
648		break;
649	default:
650		socket_unlock(sock, 1);
651		return EINVAL;
652
653	}
654	socket_unlock(sock, 1);
655	error = sogetopt(sock, &sopt); /* will lock socket */
656	if (error == 0) *optlen = sopt.sopt_valsize;
657	return error;
658}
659
660errno_t
661sock_listen(
662	socket_t	sock,
663	int			backlog)
664{
665	if (sock == NULL) return EINVAL;
666	return solisten(sock, backlog); /* will lock socket */
667}
668
669static errno_t
670sock_receive_internal(
671	socket_t		sock,
672	struct msghdr		*msg,
673	mbuf_t			*data,
674	int			flags,
675	size_t			*recvdlen)
676{
677	uio_t 		auio;
678	struct mbuf	*control = NULL;
679	int			error = 0;
680	int			length = 0;
681	struct sockaddr	*fromsa;
682	char		uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
683
684	if (sock == NULL) return EINVAL;
685
686	auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
687								  0, UIO_SYSSPACE, UIO_READ,
688								  &uio_buf[0], sizeof(uio_buf));
689	if (msg && data == NULL) {
690		int i;
691		struct iovec *tempp = msg->msg_iov;
692
693		for (i = 0; i < msg->msg_iovlen; i++) {
694			uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len);
695		}
696		if (uio_resid(auio) < 0) return EINVAL;
697	}
698	else {
699		uio_setresid(auio, (uio_resid(auio) + *recvdlen));
700	}
701	length = uio_resid(auio);
702
703	if (recvdlen)
704		*recvdlen = 0;
705
706	/* let pru_soreceive handle the socket locking */
707	error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
708	    data, (msg && msg->msg_control) ? &control : NULL, &flags);
709	if (error) goto cleanup;
710
711	if (recvdlen)
712		*recvdlen = length - uio_resid(auio);
713	if (msg) {
714		msg->msg_flags = flags;
715
716		if (msg->msg_name)
717		{
718			int salen;
719			salen = msg->msg_namelen;
720			if (msg->msg_namelen > 0 && fromsa != 0)
721			{
722				salen = MIN(salen, fromsa->sa_len);
723				memcpy(msg->msg_name, fromsa,
724					msg->msg_namelen > fromsa->sa_len ? fromsa->sa_len :  msg->msg_namelen);
725			}
726		}
727
728		if (msg->msg_control)
729		{
730			struct mbuf*	m = control;
731			u_char*			ctlbuf = msg->msg_control;
732			int				clen = msg->msg_controllen;
733			msg->msg_controllen = 0;
734
735			while (m && clen > 0)
736			{
737				unsigned int tocopy;
738				if (clen >= m->m_len)
739				{
740					tocopy = m->m_len;
741				}
742				else
743				{
744					msg->msg_flags |= MSG_CTRUNC;
745					tocopy = clen;
746				}
747				memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
748				ctlbuf += tocopy;
749				clen -= tocopy;
750				m = m->m_next;
751			}
752			msg->msg_controllen = (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
753		}
754	}
755
756cleanup:
757	if (control) m_freem(control);
758	if (fromsa) FREE(fromsa, M_SONAME);
759	return error;
760}
761
762errno_t
763sock_receive(
764	socket_t		sock,
765	struct msghdr	*msg,
766	int				flags,
767	size_t			*recvdlen)
768{
769	if ((msg == NULL) ||
770		(msg->msg_iovlen < 1) ||
771		(msg->msg_iov[0].iov_len == 0) ||
772		(msg->msg_iov[0].iov_base == NULL))
773		return EINVAL;
774	return sock_receive_internal(sock, msg, NULL, flags, recvdlen);
775}
776
777errno_t
778sock_receivembuf(
779	socket_t		sock,
780	struct msghdr	*msg,
781	mbuf_t			*data,
782	int				flags,
783	size_t			*recvlen)
784{
785	if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg &&
786		(msg->msg_iov != NULL || msg->msg_iovlen != 0)))
787		return EINVAL;
788	return sock_receive_internal(sock, msg, data, flags, recvlen);
789}
790
791errno_t
792sock_send_internal(
793	socket_t			sock,
794	const struct msghdr	*msg,
795	mbuf_t				data,
796	int					flags,
797	size_t				*sentlen)
798{
799	uio_t			auio = NULL;
800	struct mbuf		*control = NULL;
801	int				error = 0;
802	int				datalen = 0;
803	char			uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
804
805	if (sock == NULL) {
806		error = EINVAL;
807		goto errorout;
808	}
809
810	if (data == 0 && msg != NULL) {
811		struct iovec *tempp = msg->msg_iov;
812
813		auio = uio_createwithbuffer(msg->msg_iovlen, 0, UIO_SYSSPACE, UIO_WRITE,
814								  &uio_buf[0], sizeof(uio_buf));
815		if (tempp != NULL)
816		{
817			int i;
818
819			for (i = 0; i < msg->msg_iovlen; i++) {
820				uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len);
821			}
822
823			if (uio_resid(auio) < 0) {
824				error = EINVAL;
825				goto errorout;
826			}
827		}
828	}
829
830	if (sentlen)
831		*sentlen = 0;
832
833	if (auio)
834		datalen = uio_resid(auio);
835	else
836		datalen = data->m_pkthdr.len;
837
838	if (msg && msg->msg_control)
839	{
840		if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL;
841		if ((size_t)msg->msg_controllen > MLEN) return EINVAL;
842		control = m_get(M_NOWAIT, MT_CONTROL);
843		if (control == NULL) {
844			error = ENOMEM;
845			goto errorout;
846		}
847		memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen);
848		control->m_len = msg->msg_controllen;
849	}
850
851	error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
852	    (struct sockaddr*)msg->msg_name : NULL, auio, data, control, flags);
853
854	/*
855	 * Residual data is possible in the case of IO vectors but not
856	 * in the mbuf case since the latter is treated as atomic send.
857	 * If pru_sosend() consumed a portion of the iovecs data and
858	 * the error returned is transient, treat it as success; this
859	 * is consistent with sendit() behavior.
860	 */
861	if (auio != NULL && uio_resid(auio) != datalen &&
862	    (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
863		error = 0;
864
865	if (error == 0 && sentlen != NULL) {
866		if (auio != NULL)
867			*sentlen = datalen - uio_resid(auio);
868		else
869			*sentlen = datalen;
870	}
871
872	return error;
873
874/*
875 * In cases where we detect an error before returning, we need to
876 * free the mbuf chain if there is one. sosend (and pru_sosend) will
877 * free the mbuf chain if they encounter an error.
878 */
879errorout:
880	if (control)
881		m_freem(control);
882	if (data)
883		m_freem(data);
884	if (sentlen)
885		*sentlen = 0;
886	return error;
887}
888
889errno_t
890sock_send(
891	socket_t			sock,
892	const struct msghdr	*msg,
893	int					flags,
894	size_t				*sentlen)
895{
896	if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
897		return EINVAL;
898	return sock_send_internal(sock, msg, NULL, flags, sentlen);
899}
900
901errno_t
902sock_sendmbuf(
903	socket_t			sock,
904	const struct msghdr	*msg,
905	mbuf_t				data,
906	int					flags,
907	size_t				*sentlen)
908{
909	if (data == NULL || (msg &&
910		(msg->msg_iov != NULL || msg->msg_iovlen != 0))) {
911		if (data)
912			m_freem(data);
913		return EINVAL;
914	}
915	return sock_send_internal(sock, msg, data, flags, sentlen);
916}
917
918errno_t
919sock_shutdown(
920	socket_t	sock,
921	int			how)
922{
923	if (sock == NULL) return EINVAL;
924	return soshutdown(sock, how);
925}
926
927
928errno_t
929sock_socket(
930	int				domain,
931	int				type,
932	int				protocol,
933	sock_upcall		callback,
934	void*			context,
935	socket_t		*new_so)
936{
937	int	error = 0;
938	if (new_so == NULL) return EINVAL;
939	/* socreate will create an initial so_count */
940	error = socreate(domain, new_so, type, protocol);
941	if (error == 0 && callback)
942	{
943		(*new_so)->so_rcv.sb_flags |= SB_UPCALL;
944#if CONFIG_SOWUPCALL
945		(*new_so)->so_snd.sb_flags |= SB_UPCALL;
946#endif
947		(*new_so)->so_upcall = (so_upcall)callback;
948		(*new_so)->so_upcallarg = context;
949		(*new_so)->last_pid = 0;
950		(*new_so)->last_upid = 0;
951	}
952	return error;
953}
954
955void
956sock_close(
957	socket_t	sock)
958{
959	if (sock == NULL) return;
960	soclose(sock);
961}
962
963/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04)*/
964void
965sock_retain(
966	socket_t	sock)
967{
968	if (sock == NULL) return;
969	socket_lock(sock, 1);
970	sock->so_retaincnt++;
971	sock->so_usecount++;	/* add extra reference for holding the socket */
972	socket_unlock(sock, 1);
973}
974
975/* Do we want this to be APPLE_PRIVATE API? */
976void
977sock_release(socket_t sock)
978{
979	if (sock == NULL)
980		return;
981	socket_lock(sock, 1);
982
983	if (sock->so_upcallusecount)
984		soclose_wait_locked(sock);
985
986	sock->so_retaincnt--;
987	if (sock->so_retaincnt < 0)
988		panic("sock_release: negative retain count for sock=%p "
989		    "cnt=%x\n", sock, sock->so_retaincnt);
990	if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2)) {
991		/* close socket only if the FD is not holding it */
992		soclose_locked(sock);
993	} else {
994		/* remove extra reference holding the socket */
995		sock->so_usecount--;
996	}
997	socket_unlock(sock, 1);
998}
999
1000errno_t
1001sock_setpriv(
1002	socket_t	sock,
1003	int			on)
1004{
1005	if (sock == NULL) return EINVAL;
1006	socket_lock(sock, 1);
1007	if (on)
1008	{
1009		sock->so_state |= SS_PRIV;
1010	}
1011	else
1012	{
1013		sock->so_state &= ~SS_PRIV;
1014	}
1015	socket_unlock(sock, 1);
1016	return 0;
1017}
1018
1019int
1020sock_isconnected(
1021	socket_t sock)
1022{
1023	int retval;
1024	socket_lock(sock, 1);
1025	retval = (sock->so_state & SS_ISCONNECTED) != 0;
1026	socket_unlock(sock, 1);
1027	return (retval);
1028}
1029
1030int
1031sock_isnonblocking(
1032	socket_t sock)
1033{
1034	int retval;
1035	socket_lock(sock, 1);
1036	retval = (sock->so_state & SS_NBIO) != 0;
1037	socket_unlock(sock, 1);
1038	return (retval);
1039}
1040
1041errno_t
1042sock_gettype(
1043	socket_t	sock,
1044	int			*outDomain,
1045	int			*outType,
1046	int			*outProtocol)
1047{
1048	socket_lock(sock, 1);
1049	if (outDomain)
1050		*outDomain = sock->so_proto->pr_domain->dom_family;
1051	if (outType)
1052		*outType = sock->so_type;
1053	if (outProtocol)
1054		*outProtocol = sock->so_proto->pr_protocol;
1055	socket_unlock(sock, 1);
1056	return 0;
1057}
1058
1059/*
1060 * Return the listening socket of a pre-accepted socket.  It returns the
1061 * listener (so_head) value of a given socket.  This is intended to be
1062 * called by a socket filter during a filter attach (sf_attach) callback.
1063 * The value returned by this routine is safe to be used only in the
1064 * context of that callback, because we hold the listener's lock across
1065 * the sflt_initsock() call.
1066 */
1067socket_t
1068sock_getlistener(socket_t sock)
1069{
1070	return (sock->so_head);
1071}
1072
1073static inline void
1074sock_set_tcp_stream_priority(socket_t sock)
1075{
1076	if ((sock->so_proto->pr_domain->dom_family == AF_INET ||
1077		sock->so_proto->pr_domain->dom_family == AF_INET6) &&
1078		sock->so_proto->pr_type == SOCK_STREAM) {
1079
1080		set_tcp_stream_priority(sock);
1081
1082	}
1083}
1084
1085/*
1086 * Caller must have ensured socket is valid and won't be going away.
1087 */
1088void
1089socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
1090{
1091	(void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags);
1092	sock_set_tcp_stream_priority(sock);
1093}
1094
1095void
1096socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
1097{
1098	socket_lock(sock, 1);
1099	socket_set_traffic_mgt_flags_locked(sock, flags);
1100	socket_unlock(sock, 1);
1101}
1102
1103/*
1104 * Caller must have ensured socket is valid and won't be going away.
1105 */
1106void
1107socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
1108{
1109	(void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags);
1110	sock_set_tcp_stream_priority(sock);
1111}
1112
1113void
1114socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags)
1115{
1116	socket_lock(sock, 1);
1117	socket_clear_traffic_mgt_flags_locked(sock, flags);
1118	socket_unlock(sock, 1);
1119}
1120
1121
1122/*
1123 * Caller must have ensured socket is valid and won't be going away.
1124 */
1125errno_t
1126socket_defunct(struct proc *p, socket_t so, int level)
1127{
1128	errno_t retval;
1129
1130	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1131	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1132		return (EINVAL);
1133
1134	socket_lock(so, 1);
1135	/*
1136	 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1137	 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1138	 * that this is an implementation artifact of mDNSResponder.  We do
1139	 * a quick test against the socket buffers for SB_UNIX, since that
1140	 * would have been set by unp_attach() at socket creation time.
1141	 */
1142	if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1143	    (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1144		socket_unlock(so, 1);
1145		return (EOPNOTSUPP);
1146	}
1147	retval = sosetdefunct(p, so, level, TRUE);
1148	if (retval == 0)
1149		retval = sodefunct(p, so, level);
1150	socket_unlock(so, 1);
1151	return (retval);
1152}
1153
1154errno_t
1155sock_setupcall(socket_t sock, sock_upcall callback, void* context)
1156{
1157	if (sock == NULL)
1158		return EINVAL;
1159
1160	/*
1161	 * Note that we don't wait for any in progress upcall to complete.
1162	 */
1163	socket_lock(sock, 1);
1164
1165	sock->so_upcall = (so_upcall) callback;
1166	sock->so_upcallarg = context;
1167	if (callback) {
1168		sock->so_rcv.sb_flags |= SB_UPCALL;
1169#if CONFIG_SOWUPCALL
1170		sock->so_snd.sb_flags |= SB_UPCALL;
1171#endif /* CONFIG_SOWUPCALL */
1172	} else {
1173		sock->so_rcv.sb_flags &= ~SB_UPCALL;
1174#if CONFIG_SOWUPCALL
1175		sock->so_snd.sb_flags &= ~SB_UPCALL;
1176#endif /* CONFIG_SOWUPCALL */
1177	}
1178
1179	socket_unlock(sock, 1);
1180
1181	return 0;
1182}
1183
1184