1/*
2 * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Kernel Control domain - allows control connections to
31 *  and to read/write data.
32 *
33 * Vincent Lubet, 040506
34 * Christophe Allie, 010928
35 * Justin C. Walker, 990319
36 */
37
38#include <sys/types.h>
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/syslog.h>
42#include <sys/socket.h>
43#include <sys/socketvar.h>
44#include <sys/protosw.h>
45#include <sys/domain.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/sys_domain.h>
49#include <sys/kern_event.h>
50#include <sys/kern_control.h>
51#include <sys/kauth.h>
52#include <sys/sysctl.h>
53#include <net/if_var.h>
54
55#include <mach/vm_types.h>
56
57#include <kern/thread.h>
58
59#ifndef ROUNDUP64
60#define	ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
61#endif
62
63#ifndef ADVANCE64
64#define	ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
65#endif
66
67/*
68 * Definitions and vars for we support
69 */
70
71#define	CTL_SENDSIZE	(2 * 1024)	/* default buffer size */
72#define	CTL_RECVSIZE 	(8 * 1024)	/* default buffer size */
73
74/*
75 * Definitions and vars for we support
76 */
77
78static u_int32_t	ctl_maxunit = 65536;
79static lck_grp_attr_t	*ctl_lck_grp_attr = 0;
80static lck_attr_t	*ctl_lck_attr = 0;
81static lck_grp_t	*ctl_lck_grp = 0;
82static lck_mtx_t 	*ctl_mtx;
83
84/* all the controllers are chained */
85TAILQ_HEAD(kctl_list, kctl) 	ctl_head;
86
87
88static int ctl_attach(struct socket *, int, struct proc *);
89static int ctl_detach(struct socket *);
90static int ctl_sofreelastref(struct socket *so);
91static int ctl_connect(struct socket *, struct sockaddr *, struct proc *);
92static int ctl_disconnect(struct socket *);
93static int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data,
94			struct ifnet *ifp, struct proc *p);
95static int ctl_send(struct socket *, int, struct mbuf *,
96	    struct sockaddr *, struct mbuf *, struct proc *);
97static int ctl_send_list(struct socket *, int, struct mbuf *,
98	    struct sockaddr *, struct mbuf *, struct proc *);
99static int ctl_ctloutput(struct socket *, struct sockopt *);
100static int ctl_peeraddr(struct socket *so, struct sockaddr **nam);
101static int ctl_usr_rcvd(struct socket *so, int flags);
102
103static struct kctl *ctl_find_by_name(const char *);
104static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit);
105
106static struct socket *kcb_find_socket(struct kctl *, u_int32_t unit);
107static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit);
108static void ctl_post_msg(u_int32_t event_code, u_int32_t id);
109
110static int ctl_lock(struct socket *, int, void *);
111static int ctl_unlock(struct socket *, int, void *);
112static lck_mtx_t * ctl_getlock(struct socket *, int);
113
114static struct pr_usrreqs ctl_usrreqs = {
115	.pru_attach =		ctl_attach,
116	.pru_connect =		ctl_connect,
117	.pru_control =		ctl_ioctl,
118	.pru_detach =		ctl_detach,
119	.pru_disconnect =	ctl_disconnect,
120	.pru_peeraddr =		ctl_peeraddr,
121	.pru_rcvd =		ctl_usr_rcvd,
122	.pru_send =		ctl_send,
123	.pru_send_list =	ctl_send_list,
124	.pru_sosend =		sosend,
125	.pru_sosend_list =	sosend_list,
126	.pru_soreceive =	soreceive,
127	.pru_soreceive_list =	soreceive_list,
128};
129
130static struct protosw kctlsw[] = {
131{
132	.pr_type =	SOCK_DGRAM,
133	.pr_protocol =	SYSPROTO_CONTROL,
134	.pr_flags =	PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD,
135	.pr_ctloutput =	ctl_ctloutput,
136	.pr_usrreqs =	&ctl_usrreqs,
137	.pr_lock =	ctl_lock,
138	.pr_unlock =	ctl_unlock,
139	.pr_getlock =	ctl_getlock,
140},
141{
142	.pr_type =	SOCK_STREAM,
143	.pr_protocol =	SYSPROTO_CONTROL,
144	.pr_flags =	PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD,
145	.pr_ctloutput =	ctl_ctloutput,
146	.pr_usrreqs =	&ctl_usrreqs,
147	.pr_lock =	ctl_lock,
148	.pr_unlock =	ctl_unlock,
149	.pr_getlock =	ctl_getlock,
150}
151};
152
153__private_extern__ int kctl_reg_list SYSCTL_HANDLER_ARGS;
154__private_extern__ int kctl_pcblist SYSCTL_HANDLER_ARGS;
155__private_extern__ int kctl_getstat SYSCTL_HANDLER_ARGS;
156
157static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw));
158
159SYSCTL_NODE(_net_systm, OID_AUTO, kctl,
160	CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel control family");
161
162struct kctlstat kctlstat;
163SYSCTL_PROC(_net_systm_kctl, OID_AUTO, stats,
164    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
165    kctl_getstat, "S,kctlstat", "");
166
167SYSCTL_PROC(_net_systm_kctl, OID_AUTO, reg_list,
168	CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
169	kctl_reg_list, "S,xkctl_reg", "");
170
171SYSCTL_PROC(_net_systm_kctl, OID_AUTO, pcblist,
172	CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
173	kctl_pcblist, "S,xkctlpcb", "");
174
175u_int32_t ctl_autorcvbuf_max = 256 * 1024;
176SYSCTL_INT(_net_systm_kctl, OID_AUTO, autorcvbufmax,
177	CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_autorcvbuf_max, 0, "");
178
179u_int32_t ctl_autorcvbuf_high = 0;
180SYSCTL_INT(_net_systm_kctl, OID_AUTO, autorcvbufhigh,
181	CTLFLAG_RD | CTLFLAG_LOCKED, &ctl_autorcvbuf_high, 0, "");
182
183u_int32_t ctl_debug = 0;
184SYSCTL_INT(_net_systm_kctl, OID_AUTO, debug,
185	CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_debug, 0, "");
186
187/*
188 * Install the protosw's for the Kernel Control manager.
189 */
190__private_extern__ void
191kern_control_init(struct domain *dp)
192{
193	struct protosw *pr;
194	int i;
195
196	VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
197	VERIFY(dp == systemdomain);
198
199	ctl_lck_grp_attr = lck_grp_attr_alloc_init();
200	if (ctl_lck_grp_attr == NULL) {
201		panic("%s: lck_grp_attr_alloc_init failed\n", __func__);
202		/* NOTREACHED */
203	}
204
205	ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol",
206	    ctl_lck_grp_attr);
207	if (ctl_lck_grp == NULL) {
208		panic("%s: lck_grp_alloc_init failed\n", __func__);
209		/* NOTREACHED */
210	}
211
212	ctl_lck_attr = lck_attr_alloc_init();
213	if (ctl_lck_attr == NULL) {
214		panic("%s: lck_attr_alloc_init failed\n", __func__);
215		/* NOTREACHED */
216	}
217
218	ctl_mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr);
219	if (ctl_mtx == NULL) {
220		panic("%s: lck_mtx_alloc_init failed\n", __func__);
221		/* NOTREACHED */
222	}
223	TAILQ_INIT(&ctl_head);
224
225	for (i = 0, pr = &kctlsw[0]; i < kctl_proto_count; i++, pr++)
226		net_add_proto(pr, dp, 1);
227}
228
229static void
230kcb_delete(struct ctl_cb *kcb)
231{
232	if (kcb != 0) {
233		if (kcb->mtx != 0)
234			lck_mtx_free(kcb->mtx, ctl_lck_grp);
235		FREE(kcb, M_TEMP);
236	}
237}
238
239/*
240 * Kernel Controller user-request functions
241 * attach function must exist and succeed
242 * detach not necessary
243 * we need a pcb for the per socket mutex
244 */
245static int
246ctl_attach(struct socket *so, int proto, struct proc *p)
247{
248#pragma unused(proto, p)
249	int error = 0;
250	struct ctl_cb			*kcb = 0;
251
252	MALLOC(kcb, struct ctl_cb *, sizeof(struct ctl_cb), M_TEMP, M_WAITOK);
253	if (kcb == NULL) {
254		error = ENOMEM;
255		goto quit;
256	}
257	bzero(kcb, sizeof(struct ctl_cb));
258
259	kcb->mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr);
260	if (kcb->mtx == NULL) {
261		error = ENOMEM;
262		goto quit;
263	}
264	kcb->so = so;
265	so->so_pcb = (caddr_t)kcb;
266
267quit:
268	if (error != 0) {
269		kcb_delete(kcb);
270		kcb = 0;
271	}
272	return (error);
273}
274
275static int
276ctl_sofreelastref(struct socket *so)
277{
278	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
279
280	so->so_pcb = 0;
281
282	if (kcb != 0) {
283		struct kctl		*kctl;
284		if ((kctl = kcb->kctl) != 0) {
285			lck_mtx_lock(ctl_mtx);
286			TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
287			kctlstat.kcs_pcbcount--;
288			kctlstat.kcs_gencnt++;
289			lck_mtx_unlock(ctl_mtx);
290		}
291		kcb_delete(kcb);
292	}
293	sofreelastref(so, 1);
294	return (0);
295}
296
297static int
298ctl_detach(struct socket *so)
299{
300	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
301
302	if (kcb == 0)
303		return (0);
304
305	soisdisconnected(so);
306	so->so_flags |= SOF_PCBCLEARING;
307	return (0);
308}
309
310
311static int
312ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
313{
314#pragma unused(p)
315	struct kctl		*kctl;
316	int			error = 0;
317	struct sockaddr_ctl	sa;
318	struct ctl_cb		*kcb = (struct ctl_cb *)so->so_pcb;
319	struct ctl_cb		*kcb_next = NULL;
320
321	if (kcb == 0)
322		panic("ctl_connect so_pcb null\n");
323
324	if (nam->sa_len !=  sizeof(struct sockaddr_ctl))
325		return (EINVAL);
326
327	bcopy(nam, &sa, sizeof(struct sockaddr_ctl));
328
329	lck_mtx_lock(ctl_mtx);
330	kctl = ctl_find_by_id_unit(sa.sc_id, sa.sc_unit);
331	if (kctl == NULL) {
332		lck_mtx_unlock(ctl_mtx);
333		return (ENOENT);
334	}
335
336	if (((kctl->flags & CTL_FLAG_REG_SOCK_STREAM) &&
337			(so->so_type != SOCK_STREAM)) ||
338		(!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) &&
339			(so->so_type != SOCK_DGRAM))) {
340		lck_mtx_unlock(ctl_mtx);
341		return (EPROTOTYPE);
342	}
343
344	if (kctl->flags & CTL_FLAG_PRIVILEGED) {
345		if (p == 0) {
346			lck_mtx_unlock(ctl_mtx);
347			return (EINVAL);
348		}
349		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
350			lck_mtx_unlock(ctl_mtx);
351			return (EPERM);
352		}
353	}
354
355	if ((kctl->flags & CTL_FLAG_REG_ID_UNIT) || sa.sc_unit != 0) {
356		if (kcb_find(kctl, sa.sc_unit) != NULL) {
357			lck_mtx_unlock(ctl_mtx);
358			return (EBUSY);
359		}
360	} else {
361		/* Find an unused ID, assumes control IDs are in order */
362		u_int32_t	unit = 1;
363
364		TAILQ_FOREACH(kcb_next, &kctl->kcb_head, next) {
365			if (kcb_next->unit > unit) {
366				/* Found a gap, lets fill it in */
367				break;
368			}
369			unit = kcb_next->unit + 1;
370			if (unit == ctl_maxunit)
371				break;
372		}
373
374		if (unit == ctl_maxunit) {
375			lck_mtx_unlock(ctl_mtx);
376			return (EBUSY);
377		}
378
379		sa.sc_unit = unit;
380	}
381
382	kcb->unit = sa.sc_unit;
383	kcb->kctl = kctl;
384	if (kcb_next != NULL) {
385		TAILQ_INSERT_BEFORE(kcb_next, kcb, next);
386	} else {
387		TAILQ_INSERT_TAIL(&kctl->kcb_head, kcb, next);
388	}
389	kctlstat.kcs_pcbcount++;
390	kctlstat.kcs_gencnt++;
391	kctlstat.kcs_connections++;
392	lck_mtx_unlock(ctl_mtx);
393
394	error = soreserve(so, kctl->sendbufsize, kctl->recvbufsize);
395	if (error) {
396		printf("%s - soreserve(%llx, %u, %u) error %d\n", __func__,
397			(uint64_t)VM_KERNEL_ADDRPERM(so),
398			kctl->sendbufsize, kctl->recvbufsize, error);
399		goto done;
400	}
401	soisconnecting(so);
402
403	socket_unlock(so, 0);
404	error = (*kctl->connect)(kctl, &sa, &kcb->userdata);
405	socket_lock(so, 0);
406	if (error)
407		goto end;
408
409	soisconnected(so);
410
411end:
412	if (error && kctl->disconnect) {
413		socket_unlock(so, 0);
414		(*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
415		socket_lock(so, 0);
416	}
417done:
418	if (error) {
419		soisdisconnected(so);
420		lck_mtx_lock(ctl_mtx);
421		kcb->kctl = 0;
422		kcb->unit = 0;
423		TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
424		kctlstat.kcs_pcbcount--;
425		kctlstat.kcs_gencnt++;
426		kctlstat.kcs_conn_fail++;
427		lck_mtx_unlock(ctl_mtx);
428	}
429	return (error);
430}
431
432static int
433ctl_disconnect(struct socket *so)
434{
435	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
436
437	if ((kcb = (struct ctl_cb *)so->so_pcb)) {
438		struct kctl		*kctl = kcb->kctl;
439
440		if (kctl && kctl->disconnect) {
441			socket_unlock(so, 0);
442			(*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
443			socket_lock(so, 0);
444		}
445
446		soisdisconnected(so);
447
448		socket_unlock(so, 0);
449		lck_mtx_lock(ctl_mtx);
450		kcb->kctl = 0;
451		kcb->unit = 0;
452		while (kcb->usecount != 0) {
453			msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0);
454		}
455		TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
456		kctlstat.kcs_pcbcount--;
457		kctlstat.kcs_gencnt++;
458		lck_mtx_unlock(ctl_mtx);
459		socket_lock(so, 0);
460	}
461	return (0);
462}
463
464static int
465ctl_peeraddr(struct socket *so, struct sockaddr **nam)
466{
467	struct ctl_cb 		*kcb = (struct ctl_cb *)so->so_pcb;
468	struct kctl			*kctl;
469	struct sockaddr_ctl	sc;
470
471	if (kcb == NULL)	/* sanity check */
472		return (ENOTCONN);
473
474	if ((kctl = kcb->kctl) == NULL)
475		return (EINVAL);
476
477	bzero(&sc, sizeof(struct sockaddr_ctl));
478	sc.sc_len = sizeof(struct sockaddr_ctl);
479	sc.sc_family = AF_SYSTEM;
480	sc.ss_sysaddr = AF_SYS_CONTROL;
481	sc.sc_id =  kctl->id;
482	sc.sc_unit = kcb->unit;
483
484	*nam = dup_sockaddr((struct sockaddr *)&sc, 1);
485
486	return (0);
487}
488
489static void
490ctl_sbrcv_trim(struct socket *so)
491{
492	struct sockbuf *sb = &so->so_rcv;
493
494	if (sb->sb_hiwat > sb->sb_idealsize) {
495		u_int32_t diff;
496		int32_t trim;
497
498		/*
499		 * The difference between the ideal size and the
500		 * current size is the upper bound of the trimage
501		 */
502		diff = sb->sb_hiwat - sb->sb_idealsize;
503		/*
504		 * We cannot trim below the outstanding data
505		 */
506		trim = sb->sb_hiwat - sb->sb_cc;
507
508		trim = imin(trim, (int32_t)diff);
509
510		if (trim > 0) {
511			sbreserve(sb, (sb->sb_hiwat - trim));
512
513			if (ctl_debug)
514				printf("%s - shrunk to %d\n",
515				    __func__, sb->sb_hiwat);
516		}
517	}
518}
519
520static int
521ctl_usr_rcvd(struct socket *so, int flags)
522{
523	struct ctl_cb		*kcb = (struct ctl_cb *)so->so_pcb;
524	struct kctl			*kctl;
525
526	if ((kctl = kcb->kctl) == NULL) {
527		return (EINVAL);
528	}
529
530	if (kctl->rcvd) {
531		socket_unlock(so, 0);
532		(*kctl->rcvd)(kctl, kcb->unit, kcb->userdata, flags);
533		socket_lock(so, 0);
534	}
535
536	ctl_sbrcv_trim(so);
537
538	return (0);
539}
540
541static int
542ctl_send(struct socket *so, int flags, struct mbuf *m,
543	struct sockaddr *addr, struct mbuf *control,
544	struct proc *p)
545{
546#pragma unused(addr, p)
547	int		error = 0;
548	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
549	struct kctl	*kctl;
550
551	if (control)
552		m_freem(control);
553
554	if (kcb == NULL)	/* sanity check */
555		error = ENOTCONN;
556
557	if (error == 0 && (kctl = kcb->kctl) == NULL)
558		error = EINVAL;
559
560	if (error == 0 && kctl->send) {
561		so_tc_update_stats(m, so, m_get_service_class(m));
562		socket_unlock(so, 0);
563		error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags);
564		socket_lock(so, 0);
565	} else {
566		m_freem(m);
567		if (error == 0)
568			error = ENOTSUP;
569	}
570	if (error != 0)
571		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail);
572	return (error);
573}
574
575static int
576ctl_send_list(struct socket *so, int flags, struct mbuf *m,
577	    __unused struct sockaddr *addr, struct mbuf *control,
578	    __unused struct proc *p)
579{
580	int		error = 0;
581	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
582	struct kctl	*kctl;
583
584	if (control)
585		m_freem_list(control);
586
587	if (kcb == NULL)	/* sanity check */
588		error = ENOTCONN;
589
590	if (error == 0 && (kctl = kcb->kctl) == NULL)
591		error = EINVAL;
592
593	if (error == 0 && kctl->send_list) {
594		struct mbuf *nxt;
595
596		for (nxt = m; nxt != NULL; nxt = nxt->m_nextpkt)
597			so_tc_update_stats(nxt, so, m_get_service_class(nxt));
598
599		socket_unlock(so, 0);
600		error = (*kctl->send_list)(kctl, kcb->unit, kcb->userdata, m,
601			flags);
602		socket_lock(so, 0);
603	} else if (error == 0 && kctl->send) {
604		while (m != NULL && error == 0) {
605			struct mbuf *nextpkt = m->m_nextpkt;
606
607			m->m_nextpkt = NULL;
608			so_tc_update_stats(m, so, m_get_service_class(m));
609			socket_unlock(so, 0);
610			error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m,
611				flags);
612			socket_lock(so, 0);
613			m = nextpkt;
614		}
615		if (m != NULL)
616			m_freem_list(m);
617	} else {
618		m_freem_list(m);
619		if (error == 0)
620			error = ENOTSUP;
621	}
622	if (error != 0)
623		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail);
624	return (error);
625}
626
627static errno_t
628ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize,
629	u_int32_t flags)
630{
631	struct sockbuf *sb = &so->so_rcv;
632	u_int32_t space = sbspace(sb);
633	errno_t error;
634
635	if ((kctl->flags & CTL_FLAG_REG_CRIT) == 0) {
636		if ((u_int32_t) space >= datasize)
637			error = 0;
638		else
639			error = ENOBUFS;
640	} else if ((flags & CTL_DATA_CRIT) == 0) {
641			/*
642			 * Reserve 25% for critical messages
643			 */
644			if (space < (sb->sb_hiwat >> 2) ||
645			    space  < datasize)
646				error = ENOBUFS;
647			else
648				error = 0;
649	} else {
650		u_int32_t autorcvbuf_max;
651
652		/*
653		 * Allow overcommit of 25%
654		 */
655		autorcvbuf_max = min(sb->sb_idealsize + (sb->sb_idealsize >> 2),
656			ctl_autorcvbuf_max);
657
658		if ((u_int32_t) space >= datasize) {
659			error = 0;
660		} else if (tcp_cansbgrow(sb) &&
661		    sb->sb_hiwat < autorcvbuf_max) {
662			/*
663			 * Grow with a little bit of leeway
664			 */
665			u_int32_t grow = datasize - space + MSIZE;
666
667			if (sbreserve(sb,
668			    min((sb->sb_hiwat + grow), autorcvbuf_max)) == 1) {
669
670				if (sb->sb_hiwat > ctl_autorcvbuf_high)
671					ctl_autorcvbuf_high = sb->sb_hiwat;
672
673				if (ctl_debug)
674					printf("%s - grown to %d\n",
675					    __func__, sb->sb_hiwat);
676				error = 0;
677			} else {
678				error = ENOBUFS;
679			}
680		} else {
681			error = ENOBUFS;
682		}
683	}
684	return (error);
685}
686
687errno_t
688ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags)
689{
690	struct socket 	*so;
691	errno_t 	error = 0;
692	struct kctl	*kctl = (struct kctl *)kctlref;
693	int		len = m->m_pkthdr.len;
694
695	if (kctl == NULL)
696		return (EINVAL);
697
698	so = kcb_find_socket(kctl, unit);
699
700	if (so == NULL)
701		return (EINVAL);
702
703	if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
704		error = ENOBUFS;
705		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
706		goto bye;
707	}
708	if ((flags & CTL_DATA_EOR))
709		m->m_flags |= M_EOR;
710
711	so_recv_data_stat(so, m, 0);
712	if (sbappend(&so->so_rcv, m) != 0) {
713		if ((flags & CTL_DATA_NOWAKEUP) == 0)
714			sorwakeup(so);
715	} else {
716		error = ENOBUFS;
717		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
718	}
719bye:
720	if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT))
721		printf("%s - crit data err %d len %d hiwat %d cc: %d\n",
722			__func__, error, len,
723			so->so_rcv.sb_hiwat, so->so_rcv.sb_cc);
724
725	socket_unlock(so, 1);
726	if (error != 0)
727		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail);
728
729	return (error);
730}
731
732/*
733 * Compute space occupied by mbuf like sbappendrecord
734 */
735static int
736m_space(struct mbuf *m)
737{
738	int space = 0;
739	struct mbuf *nxt;
740
741	for (nxt = m; nxt != NULL; nxt = nxt->m_next)
742		space += nxt->m_len;
743
744	return (space);
745}
746
747errno_t
748ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list,
749	u_int32_t flags, struct mbuf **m_remain)
750{
751	struct socket *so = NULL;
752	errno_t error = 0;
753	struct kctl *kctl = (struct kctl *)kctlref;
754	struct mbuf *m, *nextpkt;
755	int needwakeup = 0;
756	int len;
757
758	/*
759	 * Need to point the beginning of the list in case of early exit
760	 */
761	m = m_list;
762
763	if (kctl == NULL) {
764		error = EINVAL;
765		goto done;
766	}
767	if (kctl->flags & CTL_FLAG_REG_SOCK_STREAM) {
768		error = EOPNOTSUPP;
769		goto done;
770	}
771	if (flags & CTL_DATA_EOR) {
772		error = EINVAL;
773		goto done;
774	}
775	/*
776	 * kcb_find_socket takes the socket lock with a reference
777	 */
778	so = kcb_find_socket(kctl, unit);
779	if (so == NULL) {
780		error = EINVAL;
781		goto done;
782	}
783
784	for (m = m_list; m != NULL; m = nextpkt) {
785		nextpkt = m->m_nextpkt;
786
787		if (m->m_pkthdr.len == 0)
788			printf("%s: %llx m_pkthdr.len is 0",
789				__func__, (uint64_t)VM_KERNEL_ADDRPERM(m));
790
791		/*
792		 * The mbuf is either appended or freed by sbappendrecord()
793		 * so it's not reliable from a data standpoint
794		 */
795		len = m_space(m);
796		if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
797			error = ENOBUFS;
798			OSIncrementAtomic64(
799			    (SInt64 *)&kctlstat.kcs_enqueue_fullsock);
800			break;
801		} else {
802			/*
803			 * Unlink from the list, m is on its own
804			 */
805			m->m_nextpkt = NULL;
806			so_recv_data_stat(so, m, 0);
807			if (sbappendrecord(&so->so_rcv, m) != 0) {
808				needwakeup = 1;
809			} else {
810				/*
811				 * We free or return the remaining
812				 * mbufs in the list
813				 */
814				m = nextpkt;
815				error = ENOBUFS;
816				OSIncrementAtomic64(
817				    (SInt64 *)&kctlstat.kcs_enqueue_fullsock);
818				break;
819			}
820		}
821	}
822	if (needwakeup && (flags & CTL_DATA_NOWAKEUP) == 0)
823		sorwakeup(so);
824
825done:
826	if (so != NULL) {
827		if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT))
828			printf("%s - crit data err %d len %d hiwat %d cc: %d\n",
829				__func__, error, len,
830				so->so_rcv.sb_hiwat, so->so_rcv.sb_cc);
831
832		socket_unlock(so, 1);
833	}
834	if (m_remain) {
835		*m_remain = m;
836
837		if (m != NULL && socket_debug && so != NULL &&
838		    (so->so_options & SO_DEBUG)) {
839			struct mbuf *n;
840
841			printf("%s m_list %llx\n", __func__,
842			    (uint64_t) VM_KERNEL_ADDRPERM(m_list));
843			for (n = m; n != NULL; n = n->m_nextpkt)
844				printf(" remain %llx m_next %llx\n",
845				    (uint64_t) VM_KERNEL_ADDRPERM(n),
846				    (uint64_t) VM_KERNEL_ADDRPERM(n->m_next));
847		}
848	} else {
849		if (m != NULL)
850			m_freem_list(m);
851	}
852	if (error != 0)
853		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail);
854	return (error);
855}
856
857errno_t
858ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len,
859    u_int32_t flags)
860{
861	struct socket 	*so;
862	struct mbuf 	*m;
863	errno_t		error = 0;
864	struct kctl	*kctl = (struct kctl *)kctlref;
865	unsigned int 	num_needed;
866	struct mbuf 	*n;
867	size_t		curlen = 0;
868
869	if (kctlref == NULL)
870		return (EINVAL);
871
872	so = kcb_find_socket(kctl, unit);
873	if (so == NULL)
874		return (EINVAL);
875
876	if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
877		error = ENOBUFS;
878		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
879		goto bye;
880	}
881
882	num_needed = 1;
883	m = m_allocpacket_internal(&num_needed, len, NULL, M_NOWAIT, 1, 0);
884	if (m == NULL) {
885		printf("ctl_enqueuedata: m_allocpacket_internal(%lu) failed\n",
886			len);
887		error = ENOMEM;
888		goto bye;
889	}
890
891	for (n = m; n != NULL; n = n->m_next) {
892		size_t mlen = mbuf_maxlen(n);
893
894		if (mlen + curlen > len)
895			mlen = len - curlen;
896		n->m_len = mlen;
897		bcopy((char *)data + curlen, n->m_data, mlen);
898		curlen += mlen;
899	}
900	mbuf_pkthdr_setlen(m, curlen);
901
902	if ((flags & CTL_DATA_EOR))
903		m->m_flags |= M_EOR;
904	so_recv_data_stat(so, m, 0);
905	if (sbappend(&so->so_rcv, m) != 0) {
906		if ((flags & CTL_DATA_NOWAKEUP) == 0)
907			sorwakeup(so);
908	} else {
909		error = ENOBUFS;
910		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
911	}
912
913bye:
914	if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT))
915		printf("%s - crit data err %d len %d hiwat %d cc: %d\n",
916			__func__, error, (int)len,
917			so->so_rcv.sb_hiwat, so->so_rcv.sb_cc);
918
919	socket_unlock(so, 1);
920	if (error != 0)
921		OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail);
922	return (error);
923}
924
925
926errno_t
927ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space)
928{
929	struct kctl		*kctl = (struct kctl *)kctlref;
930	struct socket 	*so;
931	long avail;
932
933	if (kctlref == NULL || space == NULL)
934		return (EINVAL);
935
936	so = kcb_find_socket(kctl, unit);
937	if (so == NULL)
938		return (EINVAL);
939
940	avail = sbspace(&so->so_rcv);
941	*space = (avail < 0) ? 0 : avail;
942	socket_unlock(so, 1);
943
944	return (0);
945}
946
947errno_t
948ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit,
949    u_int32_t *difference)
950{
951	struct kctl		*kctl = (struct kctl *)kctlref;
952	struct socket 	*so;
953
954	if (kctlref == NULL || difference == NULL)
955		return (EINVAL);
956
957	so = kcb_find_socket(kctl, unit);
958	if (so == NULL)
959		return (EINVAL);
960
961	if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) {
962		*difference = 0;
963	} else {
964		*difference = (so->so_rcv.sb_lowat - so->so_rcv.sb_cc);
965	}
966	socket_unlock(so, 1);
967
968	return (0);
969}
970
971static int
972ctl_ctloutput(struct socket *so, struct sockopt *sopt)
973{
974	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
975	struct kctl	*kctl;
976	int 	error = 0;
977	void 	*data;
978	size_t	len;
979
980	if (sopt->sopt_level != SYSPROTO_CONTROL) {
981		return (EINVAL);
982	}
983
984	if (kcb == NULL)	/* sanity check */
985		return (ENOTCONN);
986
987	if ((kctl = kcb->kctl) == NULL)
988		return (EINVAL);
989
990	switch (sopt->sopt_dir) {
991		case SOPT_SET:
992			if (kctl->setopt == NULL)
993				return (ENOTSUP);
994			if (sopt->sopt_valsize == 0) {
995				data = NULL;
996			} else {
997				MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
998					M_WAITOK);
999				if (data == NULL)
1000					return (ENOMEM);
1001				error = sooptcopyin(sopt, data,
1002						sopt->sopt_valsize,
1003						sopt->sopt_valsize);
1004			}
1005			if (error == 0) {
1006				socket_unlock(so, 0);
1007				error = (*kctl->setopt)(kcb->kctl, kcb->unit,
1008							kcb->userdata,
1009							sopt->sopt_name,
1010							data,
1011							sopt->sopt_valsize);
1012				socket_lock(so, 0);
1013			}
1014			FREE(data, M_TEMP);
1015			break;
1016
1017		case SOPT_GET:
1018			if (kctl->getopt == NULL)
1019				return (ENOTSUP);
1020			data = NULL;
1021			if (sopt->sopt_valsize && sopt->sopt_val) {
1022				MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
1023					M_WAITOK);
1024				if (data == NULL)
1025					return (ENOMEM);
1026				/*
1027				 * 4108337 - copy user data in case the
1028				 * kernel control needs it
1029				 */
1030				error = sooptcopyin(sopt, data,
1031					sopt->sopt_valsize, sopt->sopt_valsize);
1032			}
1033			len = sopt->sopt_valsize;
1034			socket_unlock(so, 0);
1035			error = (*kctl->getopt)(kcb->kctl, kcb->unit,
1036					kcb->userdata, sopt->sopt_name,
1037						data, &len);
1038			if (data != NULL && len > sopt->sopt_valsize)
1039				panic_plain("ctl_ctloutput: ctl %s returned "
1040					"len (%lu) > sopt_valsize (%lu)\n",
1041						kcb->kctl->name, len,
1042						sopt->sopt_valsize);
1043			socket_lock(so, 0);
1044			if (error == 0) {
1045				if (data != NULL)
1046					error = sooptcopyout(sopt, data, len);
1047				else
1048					sopt->sopt_valsize = len;
1049			}
1050			if (data != NULL)
1051				FREE(data, M_TEMP);
1052			break;
1053	}
1054	return (error);
1055}
1056
1057static int
1058ctl_ioctl(struct socket *so, u_long cmd, caddr_t data,
1059	struct ifnet *ifp, struct proc *p)
1060{
1061#pragma unused(so, ifp, p)
1062	int 	error = ENOTSUP;
1063
1064	switch (cmd) {
1065		/* get the number of controllers */
1066		case CTLIOCGCOUNT: {
1067			struct kctl	*kctl;
1068			u_int32_t n = 0;
1069
1070			lck_mtx_lock(ctl_mtx);
1071			TAILQ_FOREACH(kctl, &ctl_head, next)
1072				n++;
1073			lck_mtx_unlock(ctl_mtx);
1074
1075			bcopy(&n, data, sizeof (n));
1076			error = 0;
1077			break;
1078		}
1079		case CTLIOCGINFO: {
1080			struct ctl_info ctl_info;
1081			struct kctl 	*kctl = 0;
1082			size_t name_len;
1083
1084			bcopy(data, &ctl_info, sizeof (ctl_info));
1085			name_len = strnlen(ctl_info.ctl_name, MAX_KCTL_NAME);
1086
1087			if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME) {
1088				error = EINVAL;
1089				break;
1090			}
1091			lck_mtx_lock(ctl_mtx);
1092			kctl = ctl_find_by_name(ctl_info.ctl_name);
1093			lck_mtx_unlock(ctl_mtx);
1094			if (kctl == 0) {
1095				error = ENOENT;
1096				break;
1097			}
1098			ctl_info.ctl_id = kctl->id;
1099			bcopy(&ctl_info, data, sizeof (ctl_info));
1100			error = 0;
1101			break;
1102		}
1103
1104		/* add controls to get list of NKEs */
1105
1106	}
1107
1108	return (error);
1109}
1110
1111/*
1112 * Register/unregister a NKE
1113 */
1114errno_t
1115ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
1116{
1117	struct kctl 	*kctl = NULL;
1118	struct kctl 	*kctl_next = NULL;
1119	u_int32_t		id = 1;
1120	size_t			name_len;
1121	int				is_extended = 0;
1122	u_quad_t	sbmaxsize;
1123
1124	if (userkctl == NULL)	/* sanity check */
1125		return (EINVAL);
1126	if (userkctl->ctl_connect == NULL)
1127		return (EINVAL);
1128	name_len = strlen(userkctl->ctl_name);
1129	if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME)
1130		return (EINVAL);
1131
1132	MALLOC(kctl, struct kctl *, sizeof(*kctl), M_TEMP, M_WAITOK);
1133	if (kctl == NULL)
1134		return (ENOMEM);
1135	bzero((char *)kctl, sizeof(*kctl));
1136
1137	lck_mtx_lock(ctl_mtx);
1138
1139	/*
1140	 * Kernel Control IDs
1141	 *
1142	 * CTL_FLAG_REG_ID_UNIT indicates the control ID and unit number are
1143	 * static. If they do not exist, add them to the list in order. If the
1144	 * flag is not set, we must find a new unique value. We assume the
1145	 * list is in order. We find the last item in the list and add one. If
1146	 * this leads to wrapping the id around, we start at the front of the
1147	 * list and look for a gap.
1148	 */
1149
1150	if ((userkctl->ctl_flags & CTL_FLAG_REG_ID_UNIT) == 0) {
1151		/* Must dynamically assign an unused ID */
1152
1153		/* Verify the same name isn't already registered */
1154		if (ctl_find_by_name(userkctl->ctl_name) != NULL) {
1155			lck_mtx_unlock(ctl_mtx);
1156			FREE(kctl, M_TEMP);
1157			return (EEXIST);
1158		}
1159
1160		/* Start with 1 in case the list is empty */
1161		id = 1;
1162		kctl_next = TAILQ_LAST(&ctl_head, kctl_list);
1163
1164		if (kctl_next != NULL) {
1165			/* List was not empty, add one to the last item */
1166			id = kctl_next->id + 1;
1167			kctl_next = NULL;
1168
1169			/*
1170			 * If this wrapped the id number, start looking at
1171			 * the front of the list for an unused id.
1172			 */
1173			if (id == 0) {
1174				/* Find the next unused ID */
1175				id = 1;
1176
1177				TAILQ_FOREACH(kctl_next, &ctl_head, next) {
1178					if (kctl_next->id > id) {
1179						/* We found a gap */
1180						break;
1181					}
1182
1183					id = kctl_next->id + 1;
1184				}
1185			}
1186		}
1187
1188		userkctl->ctl_id = id;
1189		kctl->id = id;
1190		kctl->reg_unit = -1;
1191	} else {
1192		TAILQ_FOREACH(kctl_next, &ctl_head, next) {
1193			if (kctl_next->id > userkctl->ctl_id)
1194				break;
1195		}
1196
1197		if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) {
1198			lck_mtx_unlock(ctl_mtx);
1199			FREE(kctl, M_TEMP);
1200			return (EEXIST);
1201		}
1202		kctl->id = userkctl->ctl_id;
1203		kctl->reg_unit = userkctl->ctl_unit;
1204	}
1205
1206	is_extended = (userkctl->ctl_flags & CTL_FLAG_REG_EXTENDED);
1207
1208	strlcpy(kctl->name, userkctl->ctl_name, MAX_KCTL_NAME);
1209	kctl->flags = userkctl->ctl_flags;
1210
1211	/*
1212	 * Let the caller know the default send and receive sizes
1213	 *
1214	 * rdar://15526688: Limit the send and receive sizes to sb_max
1215	 * by using the same scaling as sbreserve()
1216	 */
1217	sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
1218
1219	if (userkctl->ctl_sendsize == 0)
1220		kctl->sendbufsize = CTL_SENDSIZE;
1221	else if (userkctl->ctl_sendsize > sbmaxsize)
1222		kctl->sendbufsize = sbmaxsize;
1223	else
1224	kctl->sendbufsize = userkctl->ctl_sendsize;
1225	userkctl->ctl_sendsize = kctl->sendbufsize;
1226
1227	if (userkctl->ctl_recvsize == 0)
1228		kctl->recvbufsize = CTL_RECVSIZE;
1229	else if (userkctl->ctl_recvsize > sbmaxsize)
1230		kctl->recvbufsize = sbmaxsize;
1231	else
1232	kctl->recvbufsize = userkctl->ctl_recvsize;
1233	userkctl->ctl_recvsize = kctl->recvbufsize;
1234
1235	kctl->connect = userkctl->ctl_connect;
1236	kctl->disconnect = userkctl->ctl_disconnect;
1237	kctl->send = userkctl->ctl_send;
1238	kctl->setopt = userkctl->ctl_setopt;
1239	kctl->getopt = userkctl->ctl_getopt;
1240	if (is_extended) {
1241		kctl->rcvd = userkctl->ctl_rcvd;
1242		kctl->send_list = userkctl->ctl_send_list;
1243	}
1244
1245	TAILQ_INIT(&kctl->kcb_head);
1246
1247	if (kctl_next)
1248		TAILQ_INSERT_BEFORE(kctl_next, kctl, next);
1249	else
1250		TAILQ_INSERT_TAIL(&ctl_head, kctl, next);
1251
1252	kctlstat.kcs_reg_count++;
1253	kctlstat.kcs_gencnt++;
1254
1255	lck_mtx_unlock(ctl_mtx);
1256
1257	*kctlref = kctl;
1258
1259	ctl_post_msg(KEV_CTL_REGISTERED, kctl->id);
1260	return (0);
1261}
1262
1263errno_t
1264ctl_deregister(void *kctlref)
1265{
1266	struct kctl		*kctl;
1267
1268	if (kctlref == NULL)	/* sanity check */
1269		return (EINVAL);
1270
1271	lck_mtx_lock(ctl_mtx);
1272	TAILQ_FOREACH(kctl, &ctl_head, next) {
1273		if (kctl == (struct kctl *)kctlref)
1274			break;
1275	}
1276	if (kctl != (struct kctl *)kctlref) {
1277		lck_mtx_unlock(ctl_mtx);
1278		return (EINVAL);
1279	}
1280	if (!TAILQ_EMPTY(&kctl->kcb_head)) {
1281		lck_mtx_unlock(ctl_mtx);
1282		return (EBUSY);
1283	}
1284
1285	TAILQ_REMOVE(&ctl_head, kctl, next);
1286
1287	kctlstat.kcs_reg_count--;
1288	kctlstat.kcs_gencnt++;
1289
1290	lck_mtx_unlock(ctl_mtx);
1291
1292	ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id);
1293	FREE(kctl, M_TEMP);
1294	return (0);
1295}
1296
1297/*
1298 * Must be called with global ctl_mtx lock taked
1299 */
1300static struct kctl *
1301ctl_find_by_name(const char *name)
1302{
1303	struct kctl 	*kctl;
1304
1305	lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
1306
1307	TAILQ_FOREACH(kctl, &ctl_head, next)
1308		if (strncmp(kctl->name, name, sizeof(kctl->name)) == 0)
1309			return (kctl);
1310
1311	return (NULL);
1312}
1313
1314u_int32_t
1315ctl_id_by_name(const char *name)
1316{
1317	u_int32_t	ctl_id = 0;
1318	struct kctl	*kctl;
1319
1320	lck_mtx_lock(ctl_mtx);
1321	kctl = ctl_find_by_name(name);
1322	if (kctl)
1323		ctl_id = kctl->id;
1324	lck_mtx_unlock(ctl_mtx);
1325
1326	return (ctl_id);
1327}
1328
1329errno_t
1330ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize)
1331{
1332	int 		found = 0;
1333	struct kctl *kctl;
1334
1335	lck_mtx_lock(ctl_mtx);
1336	TAILQ_FOREACH(kctl, &ctl_head, next) {
1337		if (kctl->id == id)
1338			break;
1339	}
1340
1341	if (kctl && kctl->name) {
1342		if (maxsize > MAX_KCTL_NAME)
1343			maxsize = MAX_KCTL_NAME;
1344		strlcpy(out_name, kctl->name, maxsize);
1345		found = 1;
1346	}
1347	lck_mtx_unlock(ctl_mtx);
1348
1349	return (found ? 0 : ENOENT);
1350}
1351
1352/*
1353 * Must be called with global ctl_mtx lock taked
1354 *
1355 */
1356static struct kctl *
1357ctl_find_by_id_unit(u_int32_t id, u_int32_t unit)
1358{
1359	struct kctl 	*kctl;
1360
1361	lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
1362
1363	TAILQ_FOREACH(kctl, &ctl_head, next) {
1364		if (kctl->id == id && (kctl->flags & CTL_FLAG_REG_ID_UNIT) == 0)
1365			return (kctl);
1366		else if (kctl->id == id && kctl->reg_unit == unit)
1367			return (kctl);
1368	}
1369	return (NULL);
1370}
1371
1372/*
1373 * Must be called with kernel controller lock taken
1374 */
1375static struct ctl_cb *
1376kcb_find(struct kctl *kctl, u_int32_t unit)
1377{
1378	struct ctl_cb 	*kcb;
1379
1380	lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
1381
1382	TAILQ_FOREACH(kcb, &kctl->kcb_head, next)
1383		if (kcb->unit == unit)
1384			return (kcb);
1385
1386	return (NULL);
1387}
1388
1389static struct socket *
1390kcb_find_socket(struct kctl *kctl, u_int32_t unit)
1391{
1392	struct socket *so = NULL;
1393	struct ctl_cb	*kcb;
1394	void *lr_saved;
1395
1396	lr_saved = __builtin_return_address(0);
1397
1398	lck_mtx_lock(ctl_mtx);
1399	kcb = kcb_find(kctl, unit);
1400	if (kcb && kcb->kctl == kctl) {
1401		so = kcb->so;
1402		if (so) {
1403			kcb->usecount++;
1404		}
1405	}
1406	lck_mtx_unlock(ctl_mtx);
1407
1408	if (so == NULL) {
1409		return (NULL);
1410	}
1411
1412	socket_lock(so, 1);
1413
1414	lck_mtx_lock(ctl_mtx);
1415	if (kcb->kctl == NULL) {
1416		lck_mtx_unlock(ctl_mtx);
1417		socket_unlock(so, 1);
1418		so = NULL;
1419		lck_mtx_lock(ctl_mtx);
1420	} else {
1421		/*
1422		 * The socket lock history is more useful if we store
1423		 * the address of the caller.
1424		 */
1425		int i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX;
1426
1427		so->lock_lr[i] = lr_saved;
1428	}
1429	kcb->usecount--;
1430	if (kcb->usecount == 0)
1431		wakeup((event_t)&kcb->usecount);
1432	lck_mtx_unlock(ctl_mtx);
1433
1434	return (so);
1435}
1436
1437static void
1438ctl_post_msg(u_int32_t event_code, u_int32_t id)
1439{
1440	struct ctl_event_data  	ctl_ev_data;
1441	struct kev_msg  		ev_msg;
1442
1443	lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_NOTOWNED);
1444
1445	bzero(&ev_msg, sizeof(struct kev_msg));
1446	ev_msg.vendor_code = KEV_VENDOR_APPLE;
1447
1448	ev_msg.kev_class = KEV_SYSTEM_CLASS;
1449	ev_msg.kev_subclass = KEV_CTL_SUBCLASS;
1450	ev_msg.event_code = event_code;
1451
1452	/* common nke subclass data */
1453	bzero(&ctl_ev_data, sizeof(ctl_ev_data));
1454	ctl_ev_data.ctl_id = id;
1455	ev_msg.dv[0].data_ptr = &ctl_ev_data;
1456	ev_msg.dv[0].data_length = sizeof(ctl_ev_data);
1457
1458	ev_msg.dv[1].data_length = 0;
1459
1460	kev_post_msg(&ev_msg);
1461}
1462
1463static int
1464ctl_lock(struct socket *so, int refcount, void *lr)
1465{
1466	void *lr_saved;
1467
1468	if (lr == NULL)
1469		lr_saved = __builtin_return_address(0);
1470	else
1471		lr_saved = lr;
1472
1473	if (so->so_pcb != NULL) {
1474		lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx);
1475	} else  {
1476		panic("ctl_lock: so=%p NO PCB! lr=%p lrh= %s\n",
1477		    so, lr_saved, solockhistory_nr(so));
1478		/* NOTREACHED */
1479	}
1480
1481	if (so->so_usecount < 0) {
1482		panic("ctl_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n",
1483			so, so->so_pcb, lr_saved, so->so_usecount,
1484			solockhistory_nr(so));
1485		/* NOTREACHED */
1486	}
1487
1488	if (refcount)
1489		so->so_usecount++;
1490
1491	so->lock_lr[so->next_lock_lr] = lr_saved;
1492	so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
1493	return (0);
1494}
1495
1496static int
1497ctl_unlock(struct socket *so, int refcount, void *lr)
1498{
1499	void *lr_saved;
1500	lck_mtx_t *mutex_held;
1501
1502	if (lr == NULL)
1503		lr_saved = __builtin_return_address(0);
1504	else
1505		lr_saved = lr;
1506
1507#ifdef MORE_KCTLLOCK_DEBUG
1508	printf("ctl_unlock: so=%llx sopcb=%x lock=%llx ref=%u lr=%llx\n",
1509	    (uint64_t)VM_KERNEL_ADDRPERM(so),
1510	    (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb,
1511	    (uint64_t)VM_KERNEL_ADDRPERM(((struct ctl_cb *)so->so_pcb)->mtx),
1512	    so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved));
1513#endif
1514	if (refcount)
1515		so->so_usecount--;
1516
1517	if (so->so_usecount < 0) {
1518		panic("ctl_unlock: so=%p usecount=%x lrh= %s\n",
1519		    so, so->so_usecount, solockhistory_nr(so));
1520		/* NOTREACHED */
1521	}
1522	if (so->so_pcb == NULL) {
1523		panic("ctl_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n",
1524			so, so->so_usecount, (void *)lr_saved,
1525			solockhistory_nr(so));
1526		/* NOTREACHED */
1527	}
1528	mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx;
1529
1530	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1531	so->unlock_lr[so->next_unlock_lr] = lr_saved;
1532	so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
1533	lck_mtx_unlock(mutex_held);
1534
1535	if (so->so_usecount == 0)
1536		ctl_sofreelastref(so);
1537
1538	return (0);
1539}
1540
1541static lck_mtx_t *
1542ctl_getlock(struct socket *so, int locktype)
1543{
1544#pragma unused(locktype)
1545	struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb;
1546
1547	if (so->so_pcb)  {
1548		if (so->so_usecount < 0)
1549			panic("ctl_getlock: so=%p usecount=%x lrh= %s\n",
1550			    so, so->so_usecount, solockhistory_nr(so));
1551		return (kcb->mtx);
1552	} else {
1553		panic("ctl_getlock: so=%p NULL NO so_pcb %s\n",
1554		    so, solockhistory_nr(so));
1555		return (so->so_proto->pr_domain->dom_mtx);
1556	}
1557}
1558
1559__private_extern__ int
1560kctl_reg_list SYSCTL_HANDLER_ARGS
1561{
1562#pragma unused(oidp, arg1, arg2)
1563	int error = 0;
1564	int n, i;
1565	struct xsystmgen xsg;
1566	void *buf = NULL;
1567	struct kctl *kctl;
1568	size_t item_size = ROUNDUP64(sizeof (struct xkctl_reg));
1569
1570	buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO);
1571	if (buf == NULL)
1572		return (ENOMEM);
1573
1574	lck_mtx_lock(ctl_mtx);
1575
1576	n = kctlstat.kcs_reg_count;
1577
1578	if (req->oldptr == USER_ADDR_NULL) {
1579		req->oldidx = (n + n/8) * sizeof(struct xkctl_reg);
1580		goto done;
1581	}
1582	if (req->newptr != USER_ADDR_NULL) {
1583		error = EPERM;
1584		goto done;
1585	}
1586	bzero(&xsg, sizeof (xsg));
1587	xsg.xg_len = sizeof (xsg);
1588	xsg.xg_count = n;
1589	xsg.xg_gen = kctlstat.kcs_gencnt;
1590	xsg.xg_sogen = so_gencnt;
1591	error = SYSCTL_OUT(req, &xsg, sizeof (xsg));
1592	if (error) {
1593		goto done;
1594	}
1595	/*
1596	 * We are done if there is no pcb
1597	 */
1598	if (n == 0) {
1599		goto done;
1600	}
1601
1602	i = 0;
1603	for (i = 0, kctl = TAILQ_FIRST(&ctl_head);
1604	    i < n && kctl != NULL;
1605	    i++, kctl = TAILQ_NEXT(kctl, next)) {
1606		struct xkctl_reg *xkr = (struct xkctl_reg *)buf;
1607		struct ctl_cb *kcb;
1608		u_int32_t pcbcount = 0;
1609
1610		TAILQ_FOREACH(kcb, &kctl->kcb_head, next)
1611			pcbcount++;
1612
1613		bzero(buf, item_size);
1614
1615		xkr->xkr_len = sizeof(struct xkctl_reg);
1616		xkr->xkr_kind = XSO_KCREG;
1617		xkr->xkr_id = kctl->id;
1618		xkr->xkr_reg_unit = kctl->reg_unit;
1619		xkr->xkr_flags = kctl->flags;
1620		xkr->xkr_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl);
1621		xkr->xkr_recvbufsize = kctl->recvbufsize;
1622		xkr->xkr_sendbufsize = kctl->sendbufsize;
1623		xkr->xkr_lastunit = kctl->lastunit;
1624		xkr->xkr_pcbcount = pcbcount;
1625		xkr->xkr_connect = (uint64_t)VM_KERNEL_ADDRPERM(kctl->connect);
1626		xkr->xkr_disconnect =
1627		    (uint64_t)VM_KERNEL_ADDRPERM(kctl->disconnect);
1628		xkr->xkr_send = (uint64_t)VM_KERNEL_ADDRPERM(kctl->send);
1629		xkr->xkr_send_list =
1630		    (uint64_t)VM_KERNEL_ADDRPERM(kctl->send_list);
1631		xkr->xkr_setopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->setopt);
1632		xkr->xkr_getopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->getopt);
1633		xkr->xkr_rcvd = (uint64_t)VM_KERNEL_ADDRPERM(kctl->rcvd);
1634		strlcpy(xkr->xkr_name, kctl->name, sizeof(xkr->xkr_name));
1635
1636		error = SYSCTL_OUT(req, buf, item_size);
1637	}
1638
1639	if (error == 0) {
1640		/*
1641		 * Give the user an updated idea of our state.
1642		 * If the generation differs from what we told
1643		 * her before, she knows that something happened
1644		 * while we were processing this request, and it
1645		 * might be necessary to retry.
1646		 */
1647		bzero(&xsg, sizeof (xsg));
1648		xsg.xg_len = sizeof (xsg);
1649		xsg.xg_count = n;
1650		xsg.xg_gen = kctlstat.kcs_gencnt;
1651		xsg.xg_sogen = so_gencnt;
1652		error = SYSCTL_OUT(req, &xsg, sizeof (xsg));
1653		if (error) {
1654			goto done;
1655		}
1656	}
1657
1658done:
1659	lck_mtx_unlock(ctl_mtx);
1660
1661	if (buf != NULL)
1662		FREE(buf, M_TEMP);
1663
1664	return (error);
1665}
1666
1667__private_extern__ int
1668kctl_pcblist SYSCTL_HANDLER_ARGS
1669{
1670#pragma unused(oidp, arg1, arg2)
1671	int error = 0;
1672	int n, i;
1673	struct xsystmgen xsg;
1674	void *buf = NULL;
1675	struct kctl *kctl;
1676	size_t item_size = ROUNDUP64(sizeof (struct xkctlpcb)) +
1677		ROUNDUP64(sizeof (struct xsocket_n)) +
1678		2 * ROUNDUP64(sizeof (struct xsockbuf_n)) +
1679		ROUNDUP64(sizeof (struct xsockstat_n));
1680
1681	buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO);
1682	if (buf == NULL)
1683		return (ENOMEM);
1684
1685	lck_mtx_lock(ctl_mtx);
1686
1687	n = kctlstat.kcs_pcbcount;
1688
1689	if (req->oldptr == USER_ADDR_NULL) {
1690		req->oldidx = (n + n/8) * item_size;
1691		goto done;
1692	}
1693	if (req->newptr != USER_ADDR_NULL) {
1694		error = EPERM;
1695		goto done;
1696	}
1697	bzero(&xsg, sizeof (xsg));
1698	xsg.xg_len = sizeof (xsg);
1699	xsg.xg_count = n;
1700	xsg.xg_gen = kctlstat.kcs_gencnt;
1701	xsg.xg_sogen = so_gencnt;
1702	error = SYSCTL_OUT(req, &xsg, sizeof (xsg));
1703	if (error) {
1704		goto done;
1705	}
1706	/*
1707	 * We are done if there is no pcb
1708	 */
1709	if (n == 0) {
1710		goto done;
1711	}
1712
1713	i = 0;
1714	for (i = 0, kctl = TAILQ_FIRST(&ctl_head);
1715	    i < n && kctl != NULL;
1716	    kctl = TAILQ_NEXT(kctl, next)) {
1717		struct ctl_cb *kcb;
1718
1719		for (kcb = TAILQ_FIRST(&kctl->kcb_head);
1720		    i < n && kcb != NULL;
1721		    i++, kcb = TAILQ_NEXT(kcb, next)) {
1722			struct xkctlpcb *xk = (struct xkctlpcb *)buf;
1723			struct xsocket_n *xso = (struct xsocket_n *)
1724				ADVANCE64(xk, sizeof (*xk));
1725			struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
1726				ADVANCE64(xso, sizeof (*xso));
1727			struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
1728				ADVANCE64(xsbrcv, sizeof (*xsbrcv));
1729			struct xsockstat_n *xsostats = (struct xsockstat_n *)
1730				ADVANCE64(xsbsnd, sizeof (*xsbsnd));
1731
1732			bzero(buf, item_size);
1733
1734			xk->xkp_len = sizeof(struct xkctlpcb);
1735			xk->xkp_kind = XSO_KCB;
1736			xk->xkp_unit = kcb->unit;
1737			xk->xkp_kctpcb = (uint64_t)VM_KERNEL_ADDRPERM(kcb);
1738			xk->xkp_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl);
1739			xk->xkp_kctlid = kctl->id;
1740			strlcpy(xk->xkp_kctlname, kctl->name,
1741			    sizeof(xk->xkp_kctlname));
1742
1743			sotoxsocket_n(kcb->so, xso);
1744			sbtoxsockbuf_n(kcb->so ?
1745				&kcb->so->so_rcv : NULL, xsbrcv);
1746			sbtoxsockbuf_n(kcb->so ?
1747				&kcb->so->so_snd : NULL, xsbsnd);
1748			sbtoxsockstat_n(kcb->so, xsostats);
1749
1750			error = SYSCTL_OUT(req, buf, item_size);
1751		}
1752	}
1753
1754	if (error == 0) {
1755		/*
1756		 * Give the user an updated idea of our state.
1757		 * If the generation differs from what we told
1758		 * her before, she knows that something happened
1759		 * while we were processing this request, and it
1760		 * might be necessary to retry.
1761		 */
1762		bzero(&xsg, sizeof (xsg));
1763		xsg.xg_len = sizeof (xsg);
1764		xsg.xg_count = n;
1765		xsg.xg_gen = kctlstat.kcs_gencnt;
1766		xsg.xg_sogen = so_gencnt;
1767		error = SYSCTL_OUT(req, &xsg, sizeof (xsg));
1768		if (error) {
1769			goto done;
1770		}
1771	}
1772
1773done:
1774	lck_mtx_unlock(ctl_mtx);
1775
1776	return (error);
1777}
1778
1779int
1780kctl_getstat SYSCTL_HANDLER_ARGS
1781{
1782#pragma unused(oidp, arg1, arg2)
1783	int error = 0;
1784
1785	lck_mtx_lock(ctl_mtx);
1786
1787	if (req->newptr != USER_ADDR_NULL) {
1788		error = EPERM;
1789		goto done;
1790	}
1791	if (req->oldptr == USER_ADDR_NULL) {
1792		req->oldidx = sizeof(struct kctlstat);
1793		goto done;
1794	}
1795
1796	error = SYSCTL_OUT(req, &kctlstat,
1797	    MIN(sizeof(struct kctlstat), req->oldlen));
1798done:
1799	lck_mtx_unlock(ctl_mtx);
1800	return (error);
1801}
1802