1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29/*-
30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54/*
55 *	@(#)kern_event.c       1.0 (3/31/2000)
56 */
57#include <stdint.h>
58
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/filedesc.h>
62#include <sys/kernel.h>
63#include <sys/proc_internal.h>
64#include <sys/kauth.h>
65#include <sys/malloc.h>
66#include <sys/unistd.h>
67#include <sys/file_internal.h>
68#include <sys/fcntl.h>
69#include <sys/select.h>
70#include <sys/queue.h>
71#include <sys/event.h>
72#include <sys/eventvar.h>
73#include <sys/protosw.h>
74#include <sys/socket.h>
75#include <sys/socketvar.h>
76#include <sys/stat.h>
77#include <sys/sysctl.h>
78#include <sys/uio.h>
79#include <sys/sysproto.h>
80#include <sys/user.h>
81#include <sys/vnode_internal.h>
82#include <string.h>
83#include <sys/proc_info.h>
84
85#include <kern/lock.h>
86#include <kern/clock.h>
87#include <kern/thread_call.h>
88#include <kern/sched_prim.h>
89#include <kern/zalloc.h>
90#include <kern/assert.h>
91
92#include <libkern/libkern.h>
93#include "net/net_str_id.h"
94
95#include <mach/task.h>
96
97#if VM_PRESSURE_EVENTS
98#include <kern/vm_pressure.h>
99#endif
100
101MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
102
103#define KQ_EVENT NULL
104
105static inline void kqlock(struct kqueue *kq);
106static inline void kqunlock(struct kqueue *kq);
107
108static int	kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
109static int	kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
110static int	kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
111static int	knoteuse2kqlock(struct kqueue *kq, struct knote *kn);
112
113static void 	kqueue_wakeup(struct kqueue *kq, int closed);
114static int 	kqueue_read(struct fileproc *fp, struct uio *uio,
115		    int flags, vfs_context_t ctx);
116static int	kqueue_write(struct fileproc *fp, struct uio *uio,
117		    int flags, vfs_context_t ctx);
118static int	kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
119		    vfs_context_t ctx);
120static int 	kqueue_select(struct fileproc *fp, int which, void *wql,
121		    vfs_context_t ctx);
122static int 	kqueue_close(struct fileglob *fg, vfs_context_t ctx);
123static int 	kqueue_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx);
124static int 	kqueue_drain(struct fileproc *fp, vfs_context_t ctx);
125extern int	kqueue_stat(struct fileproc *fp, void  *ub, int isstat64, vfs_context_t ctx);
126
127static struct fileops kqueueops = {
128 	.fo_read = kqueue_read,
129 	.fo_write = kqueue_write,
130 	.fo_ioctl = kqueue_ioctl,
131 	.fo_select = kqueue_select,
132 	.fo_close = kqueue_close,
133 	.fo_kqfilter = kqueue_kqfilter,
134	.fo_drain = kqueue_drain,
135};
136
137static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
138		int nchanges, user_addr_t eventlist, int nevents, int fd,
139		user_addr_t utimeout, unsigned int flags, int32_t *retval);
140static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64);
141static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64);
142char * kevent_description(struct kevent64_s *kevp, char *s, size_t n);
143
144static int	kevent_callback(struct kqueue *kq, struct kevent64_s *kevp, void *data);
145static void	kevent_continue(struct kqueue *kq, void *data, int error);
146static void	kqueue_scan_continue(void *contp, wait_result_t wait_result);
147static int	kqueue_process(struct kqueue *kq, kevent_callback_t callback,
148			       void *data, int *countp, struct proc *p);
149static int	kqueue_begin_processing(struct kqueue *kq);
150static void	kqueue_end_processing(struct kqueue *kq);
151static int	knote_process(struct knote *kn, kevent_callback_t callback,
152			      void *data, struct kqtailq *inprocessp, struct proc *p);
153static void	knote_put(struct knote *kn);
154static int 	knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p);
155static void 	knote_drop(struct knote *kn, struct proc *p);
156static void	knote_activate(struct knote *kn, int);
157static void	knote_deactivate(struct knote *kn);
158static void 	knote_enqueue(struct knote *kn);
159static void 	knote_dequeue(struct knote *kn);
160static struct 	knote *knote_alloc(void);
161static void 	knote_free(struct knote *kn);
162
163static int	filt_fileattach(struct knote *kn);
164static struct filterops file_filtops = {
165        .f_isfd = 1,
166        .f_attach = filt_fileattach,
167};
168
169static void	filt_kqdetach(struct knote *kn);
170static int	filt_kqueue(struct knote *kn, long hint);
171static struct filterops kqread_filtops = {
172        .f_isfd = 1,
173        .f_detach = filt_kqdetach,
174        .f_event = filt_kqueue,
175};
176
177/*
178 * placeholder for not-yet-implemented filters
179 */
180static int	filt_badattach(struct knote *kn);
181static struct filterops bad_filtops = {
182        .f_attach = filt_badattach,
183};
184
185static int	filt_procattach(struct knote *kn);
186static void	filt_procdetach(struct knote *kn);
187static int	filt_proc(struct knote *kn, long hint);
188static struct filterops proc_filtops = {
189        .f_attach = filt_procattach,
190        .f_detach = filt_procdetach,
191        .f_event = filt_proc,
192};
193
194#if VM_PRESSURE_EVENTS
195static int filt_vmattach(struct knote *kn);
196static void filt_vmdetach(struct knote *kn);
197static int filt_vm(struct knote *kn, long hint);
198static struct filterops vm_filtops = {
199	.f_attach = filt_vmattach,
200	.f_detach = filt_vmdetach,
201	.f_event = filt_vm,
202};
203#endif /* VM_PRESSURE_EVENTS */
204
205extern struct filterops fs_filtops;
206
207extern struct filterops sig_filtops;
208
209/* Timer filter */
210static int	filt_timerattach(struct knote *kn);
211static void	filt_timerdetach(struct knote *kn);
212static int	filt_timer(struct knote *kn, long hint);
213static void     filt_timertouch(struct knote *kn, struct kevent64_s *kev,
214		long type);
215static struct filterops timer_filtops = {
216        .f_attach = filt_timerattach,
217        .f_detach = filt_timerdetach,
218        .f_event = filt_timer,
219        .f_touch = filt_timertouch,
220};
221
222/* Helpers */
223
224static void	filt_timerexpire(void *knx, void *param1);
225static int	filt_timervalidate(struct knote *kn);
226static void	filt_timerupdate(struct knote *kn);
227static void	filt_timercancel(struct knote *kn);
228
229#define TIMER_RUNNING		0x1
230#define TIMER_CANCELWAIT	0x2
231
232static lck_mtx_t _filt_timerlock;
233static void	filt_timerlock(void);
234static void	filt_timerunlock(void);
235
236static zone_t	knote_zone;
237
238#define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
239
240#if 0
241extern struct filterops aio_filtops;
242#endif
243
244/* Mach portset filter */
245extern struct filterops machport_filtops;
246
247/* User filter */
248static int      filt_userattach(struct knote *kn);
249static void	filt_userdetach(struct knote *kn);
250static int	filt_user(struct knote *kn, long hint);
251static void     filt_usertouch(struct knote *kn, struct kevent64_s *kev,
252		long type);
253static struct filterops user_filtops = {
254        .f_attach = filt_userattach,
255        .f_detach = filt_userdetach,
256        .f_event = filt_user,
257        .f_touch = filt_usertouch,
258};
259
260/*
261 * Table for for all system-defined filters.
262 */
263static struct filterops *sysfilt_ops[] = {
264	&file_filtops,			/* EVFILT_READ */
265	&file_filtops,			/* EVFILT_WRITE */
266#if 0
267	&aio_filtops,			/* EVFILT_AIO */
268#else
269	&bad_filtops,			/* EVFILT_AIO */
270#endif
271	&file_filtops,			/* EVFILT_VNODE */
272	&proc_filtops,			/* EVFILT_PROC */
273	&sig_filtops,			/* EVFILT_SIGNAL */
274	&timer_filtops,			/* EVFILT_TIMER */
275	&machport_filtops,		/* EVFILT_MACHPORT */
276	&fs_filtops,			/* EVFILT_FS */
277	&user_filtops,			/* EVFILT_USER */
278	&bad_filtops,			/* unused */
279#if VM_PRESSURE_EVENTS
280	&vm_filtops,			/* EVFILT_VM */
281#else
282	&bad_filtops,			/* EVFILT_VM */
283#endif
284	&file_filtops,			/* EVFILT_SOCK */
285};
286
287/*
288 * kqueue/note lock attributes and implementations
289 *
290 *	kqueues have locks, while knotes have use counts
291 *	Most of the knote state is guarded by the object lock.
292 *	the knote "inuse" count and status use the kqueue lock.
293 */
294lck_grp_attr_t * kq_lck_grp_attr;
295lck_grp_t * kq_lck_grp;
296lck_attr_t * kq_lck_attr;
297
298static inline void
299kqlock(struct kqueue *kq)
300{
301	lck_spin_lock(&kq->kq_lock);
302}
303
304static inline void
305kqunlock(struct kqueue *kq)
306{
307	lck_spin_unlock(&kq->kq_lock);
308}
309
310/*
311 * Convert a kq lock to a knote use referece.
312 *
313 *	If the knote is being dropped, we can't get
314 *	a use reference, so just return with it
315 *	still locked.
316 *
317 *	- kq locked at entry
318 *	- unlock on exit if we get the use reference
319 */
320static int
321kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
322{
323	if (kn->kn_status & KN_DROPPING)
324		return 0;
325	kn->kn_inuse++;
326	kqunlock(kq);
327	return 1;
328 }
329
330/*
331 * Convert a kq lock to a knote use referece,
332 * but wait for attach and drop events to complete.
333 *
334 *	If the knote is being dropped, we can't get
335 *	a use reference, so just return with it
336 *	still locked.
337 *
338 *	- kq locked at entry
339 *	- kq always unlocked on exit
340 */
341static int
342kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
343{
344	if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) {
345		kn->kn_status |= KN_USEWAIT;
346		wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0);
347		kqunlock(kq);
348		thread_block(THREAD_CONTINUE_NULL);
349		return 0;
350	}
351	kn->kn_inuse++;
352	kqunlock(kq);
353	return 1;
354 }
355
356
357/*
358 * Convert from a knote use reference back to kq lock.
359 *
360 *	Drop a use reference and wake any waiters if
361 *	this is the last one.
362 *
363 *	The exit return indicates if the knote is
364 *	still alive - but the kqueue lock is taken
365 *	unconditionally.
366 */
367static int
368knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
369{
370	kqlock(kq);
371	if (--kn->kn_inuse == 0) {
372		if ((kn->kn_status & KN_ATTACHING) != 0) {
373			kn->kn_status &= ~KN_ATTACHING;
374		}
375		if ((kn->kn_status & KN_USEWAIT) != 0) {
376			kn->kn_status &= ~KN_USEWAIT;
377			wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED);
378		}
379	}
380	return ((kn->kn_status & KN_DROPPING) == 0);
381 }
382
383/*
384 * Convert a kq lock to a knote drop referece.
385 *
386 *	If the knote is in use, wait for the use count
387 *	to subside.  We first mark our intention to drop
388 *	it - keeping other users from "piling on."
389 *	If we are too late, we have to wait for the
390 *	other drop to complete.
391 *
392 *	- kq locked at entry
393 *	- always unlocked on exit.
394 *	- caller can't hold any locks that would prevent
395 *	  the other dropper from completing.
396 */
397static int
398kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
399{
400	int oktodrop;
401
402	oktodrop = ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) == 0);
403	kn->kn_status |= KN_DROPPING;
404	if (oktodrop) {
405		if (kn->kn_inuse == 0) {
406			kqunlock(kq);
407			return oktodrop;
408		}
409	}
410	kn->kn_status |= KN_USEWAIT;
411	wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0);
412	kqunlock(kq);
413	thread_block(THREAD_CONTINUE_NULL);
414	return oktodrop;
415}
416
417/*
418 * Release a knote use count reference.
419 */
420static void
421knote_put(struct knote *kn)
422{
423	struct kqueue *kq = kn->kn_kq;
424
425	kqlock(kq);
426	if (--kn->kn_inuse == 0) {
427		if ((kn->kn_status & KN_USEWAIT) != 0) {
428			kn->kn_status &= ~KN_USEWAIT;
429			wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED);
430		}
431	}
432	kqunlock(kq);
433 }
434
435static int
436filt_fileattach(struct knote *kn)
437{
438
439	return (fo_kqfilter(kn->kn_fp, kn, vfs_context_current()));
440}
441
442#define f_flag f_fglob->fg_flag
443#define f_type f_fglob->fg_type
444#define f_msgcount f_fglob->fg_msgcount
445#define f_cred f_fglob->fg_cred
446#define f_ops f_fglob->fg_ops
447#define f_offset f_fglob->fg_offset
448#define f_data f_fglob->fg_data
449
450static void
451filt_kqdetach(struct knote *kn)
452{
453	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
454
455	kqlock(kq);
456	KNOTE_DETACH(&kq->kq_sel.si_note, kn);
457	kqunlock(kq);
458}
459
460/*ARGSUSED*/
461static int
462filt_kqueue(struct knote *kn, __unused long hint)
463{
464	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
465
466	kn->kn_data = kq->kq_count;
467	return (kn->kn_data > 0);
468}
469
470static int
471filt_procattach(struct knote *kn)
472{
473	struct proc *p;
474
475	assert(PID_MAX < NOTE_PDATAMASK);
476
477	if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0)
478		return(ENOTSUP);
479
480	p = proc_find(kn->kn_id);
481	if (p == NULL) {
482		return (ESRCH);
483	}
484
485	const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS;
486
487	if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits)
488		do {
489			pid_t selfpid = proc_selfpid();
490
491			if (p->p_ppid == selfpid)
492				break;	/* parent => ok */
493
494			if ((p->p_lflag & P_LTRACED) != 0 &&
495			    (p->p_oppid == selfpid))
496				break;	/* parent-in-waiting => ok */
497
498			proc_rele(p);
499			return (EACCES);
500		} while (0);
501
502	proc_klist_lock();
503
504	kn->kn_flags |= EV_CLEAR;	/* automatically set */
505	kn->kn_ptr.p_proc = p;		/* store the proc handle */
506
507	KNOTE_ATTACH(&p->p_klist, kn);
508
509	proc_klist_unlock();
510
511	proc_rele(p);
512
513	return (0);
514}
515
516/*
517 * The knote may be attached to a different process, which may exit,
518 * leaving nothing for the knote to be attached to.  In that case,
519 * the pointer to the process will have already been nulled out.
520 */
521static void
522filt_procdetach(struct knote *kn)
523{
524	struct proc *p;
525
526	proc_klist_lock();
527
528	p = kn->kn_ptr.p_proc;
529	if (p != PROC_NULL) {
530		kn->kn_ptr.p_proc = PROC_NULL;
531		KNOTE_DETACH(&p->p_klist, kn);
532	}
533
534	proc_klist_unlock();
535}
536
537static int
538filt_proc(struct knote *kn, long hint)
539{
540	/* hint is 0 when called from above */
541	if (hint != 0) {
542		u_int event;
543
544		/* ALWAYS CALLED WITH proc_klist_lock when (hint != 0) */
545
546		/*
547		 * mask off extra data
548		 */
549		event = (u_int)hint & NOTE_PCTRLMASK;
550
551		/*
552		 * termination lifecycle events can happen while a debugger
553		 * has reparented a process, in which case notifications
554		 * should be quashed except to the tracing parent. When
555		 * the debugger reaps the child (either via wait4(2) or
556		 * process exit), the child will be reparented to the original
557		 * parent and these knotes re-fired.
558		 */
559		if (event & NOTE_EXIT) {
560			if ((kn->kn_ptr.p_proc->p_oppid != 0)
561				&& (kn->kn_kq->kq_p->p_pid != kn->kn_ptr.p_proc->p_ppid)) {
562				/*
563				 * This knote is not for the current ptrace(2) parent, ignore.
564				 */
565				return 0;
566			}
567		}
568
569		/*
570		 * if the user is interested in this event, record it.
571		 */
572		if (kn->kn_sfflags & event)
573			kn->kn_fflags |= event;
574
575		if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) {
576			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
577		}
578		if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) {
579			kn->kn_fflags |= NOTE_EXITSTATUS;
580			kn->kn_data = (hint & NOTE_PDATAMASK);
581		}
582		if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) {
583			kn->kn_fflags |= NOTE_RESOURCEEND;
584			kn->kn_data = (hint & NOTE_PDATAMASK);
585		}
586#if CONFIG_EMBEDDED
587		/* If the event is one of the APPSTATE events,remove the rest */
588		if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) {
589			/* only one state at a time */
590			kn->kn_fflags &= ~NOTE_APPALLSTATES;
591			kn->kn_fflags |= event;
592		}
593#endif /* CONFIG_EMBEDDED */
594	}
595
596	/* atomic check, no locking need when called from above */
597	return (kn->kn_fflags != 0);
598}
599
600#if VM_PRESSURE_EVENTS
601/*
602 * Virtual memory kevents
603 *
604 * author: Matt Jacobson [matthew_jacobson@apple.com]
605 */
606
607static int
608filt_vmattach(struct knote *kn)
609{
610	/*
611	 * The note will be cleared once the information has been flushed to the client.
612	 * If there is still pressure, we will be re-alerted.
613	 */
614	kn->kn_flags |= EV_CLEAR;
615
616	return vm_knote_register(kn);
617}
618
619static void
620filt_vmdetach(struct knote *kn)
621{
622	vm_knote_unregister(kn);
623}
624
625static int
626filt_vm(struct knote *kn, long hint)
627{
628	/* hint == 0 means this is just an alive? check (always true) */
629	if (hint != 0) {
630		const pid_t pid = (pid_t)hint;
631		if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) {
632			kn->kn_fflags |= NOTE_VM_PRESSURE;
633		}
634	}
635
636	return (kn->kn_fflags != 0);
637}
638#endif /* VM_PRESSURE_EVENTS */
639
640/*
641 * filt_timervalidate - process data from user
642 *
643 * 	Converts to either interval or deadline format.
644 *
645 *	The saved-data field in the knote contains the
646 *	time value.  The saved filter-flags indicates
647 *	the unit of measurement.
648 *
649 *	After validation, either the saved-data field
650 *	contains the interval in absolute time, or ext[0]
651 *	contains the expected deadline. If that deadline
652 *	is in the past, ext[0] is 0.
653 *
654 *	Returns EINVAL for unrecognized units of time.
655 *
656 *	Timer filter lock is held.
657 *
658 */
659static int
660filt_timervalidate(struct knote *kn)
661{
662	uint64_t multiplier;
663	uint64_t raw;
664
665	switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
666	case NOTE_SECONDS:
667		multiplier = NSEC_PER_SEC;
668		break;
669	case NOTE_USECONDS:
670		multiplier = NSEC_PER_USEC;
671		break;
672	case NOTE_NSECONDS:
673		multiplier = 1;
674		break;
675	case 0: /* milliseconds (default) */
676		multiplier = NSEC_PER_SEC / 1000;
677		break;
678	default:
679		return EINVAL;
680	}
681
682	nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);
683
684	kn->kn_ext[0] = 0;
685	kn->kn_sdata = 0;
686
687	if (kn->kn_sfflags & NOTE_ABSOLUTE) {
688		clock_sec_t seconds;
689		clock_nsec_t nanoseconds;
690		uint64_t now;
691
692		clock_get_calendar_nanotime(&seconds, &nanoseconds);
693		nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC +
694				nanoseconds, &now);
695
696		if (raw < now) {
697			/* time has already passed */
698			kn->kn_ext[0] = 0;
699		} else {
700			raw -= now;
701			clock_absolutetime_interval_to_deadline(raw,
702					&kn->kn_ext[0]);
703		}
704	} else {
705		kn->kn_sdata = raw;
706	}
707
708	return 0;
709}
710
711/*
712 * filt_timerupdate - compute the next deadline
713 *
714 * 	Repeating timers store their interval in kn_sdata. Absolute
715 * 	timers have already calculated the deadline, stored in ext[0].
716 *
717 * 	On return, the next deadline (or zero if no deadline is needed)
718 * 	is stored in kn_ext[0].
719 *
720 * 	Timer filter lock is held.
721 */
722static void
723filt_timerupdate(struct knote *kn)
724{
725	/* if there's no interval, deadline is just in kn_ext[0] */
726	if (kn->kn_sdata == 0)
727		return;
728
729	/* if timer hasn't fired before, fire in interval nsecs */
730	if (kn->kn_ext[0] == 0) {
731		clock_absolutetime_interval_to_deadline(kn->kn_sdata,
732				&kn->kn_ext[0]);
733	} else {
734		/*
735		 * If timer has fired before, schedule the next pop
736		 * relative to the last intended deadline.
737		 *
738		 * We could check for whether the deadline has expired,
739		 * but the thread call layer can handle that.
740		 */
741		kn->kn_ext[0] += kn->kn_sdata;
742	}
743}
744
745/*
746 * filt_timerexpire - the timer callout routine
747 *
748 *	Just propagate the timer event into the knote
749 *	filter routine (by going through the knote
750 *	synchronization point).  Pass a hint to
751 *	indicate this is a real event, not just a
752 *	query from above.
753 */
754static void
755filt_timerexpire(void *knx, __unused void *spare)
756{
757	struct klist timer_list;
758	struct knote *kn = knx;
759
760	filt_timerlock();
761
762	kn->kn_hookid &= ~TIMER_RUNNING;
763
764	/* no "object" for timers, so fake a list */
765	SLIST_INIT(&timer_list);
766	SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
767	KNOTE(&timer_list, 1);
768
769	/* if someone is waiting for timer to pop */
770	if (kn->kn_hookid & TIMER_CANCELWAIT) {
771		struct kqueue *kq = kn->kn_kq;
772		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook,
773				THREAD_AWAKENED);
774	}
775
776	filt_timerunlock();
777}
778
779/*
780 * Cancel a running timer (or wait for the pop).
781 * Timer filter lock is held.
782 */
783static void
784filt_timercancel(struct knote *kn)
785{
786	struct kqueue *kq = kn->kn_kq;
787	thread_call_t callout = kn->kn_hook;
788	boolean_t cancelled;
789
790	if (kn->kn_hookid & TIMER_RUNNING) {
791		/* cancel the callout if we can */
792		cancelled = thread_call_cancel(callout);
793		if (cancelled) {
794			kn->kn_hookid &= ~TIMER_RUNNING;
795		} else {
796			/* we have to wait for the expire routine.  */
797			kn->kn_hookid |= TIMER_CANCELWAIT;
798			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
799					&kn->kn_hook, THREAD_UNINT, 0);
800			filt_timerunlock();
801			thread_block(THREAD_CONTINUE_NULL);
802			filt_timerlock();
803			assert((kn->kn_hookid & TIMER_RUNNING) == 0);
804		}
805	}
806}
807
808/*
809 * Allocate a thread call for the knote's lifetime, and kick off the timer.
810 */
811static int
812filt_timerattach(struct knote *kn)
813{
814	thread_call_t callout;
815	int error;
816
817	callout = thread_call_allocate(filt_timerexpire, kn);
818	if (NULL == callout)
819		return (ENOMEM);
820
821	filt_timerlock();
822	error = filt_timervalidate(kn);
823	if (error) {
824		filt_timerunlock();
825		return (error);
826	}
827
828	kn->kn_hook = (void*)callout;
829	kn->kn_hookid = 0;
830
831	/* absolute=EV_ONESHOT */
832	if (kn->kn_sfflags & NOTE_ABSOLUTE)
833		kn->kn_flags |= EV_ONESHOT;
834
835	filt_timerupdate(kn);
836	if (kn->kn_ext[0]) {
837		kn->kn_flags |= EV_CLEAR;
838		thread_call_enter_delayed(callout, kn->kn_ext[0]);
839		kn->kn_hookid |= TIMER_RUNNING;
840	} else {
841		/* fake immediate */
842		kn->kn_data = 1;
843	}
844
845	filt_timerunlock();
846	return (0);
847}
848
849/*
850 * Shut down the timer if it's running, and free the callout.
851 */
852static void
853filt_timerdetach(struct knote *kn)
854{
855	thread_call_t callout;
856
857	filt_timerlock();
858
859	callout = (thread_call_t)kn->kn_hook;
860	filt_timercancel(kn);
861
862	filt_timerunlock();
863
864	thread_call_free(callout);
865}
866
867
868
869static int
870filt_timer(struct knote *kn, long hint)
871{
872	int result;
873
874	if (hint) {
875		/* real timer pop -- timer lock held by filt_timerexpire */
876
877		kn->kn_data++;
878
879		if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) &&
880				((kn->kn_flags & EV_ONESHOT) == 0)) {
881
882			/* evaluate next time to fire */
883			filt_timerupdate(kn);
884
885			if (kn->kn_ext[0]) {
886				/* keep the callout and re-arm */
887				thread_call_enter_delayed(kn->kn_hook,
888						kn->kn_ext[0]);
889				kn->kn_hookid |= TIMER_RUNNING;
890			}
891		}
892
893		return 1;
894	}
895
896	/* user-query */
897	filt_timerlock();
898
899	result = (kn->kn_data != 0);
900
901	filt_timerunlock();
902	return result;
903}
904
905
906/*
907 * filt_timertouch - update knote with new user input
908 *
909 * 	Cancel and restart the timer based on new user data. When
910 * 	the user picks up a knote, clear the count of how many timer
911 * 	pops have gone off (in kn_data).
912 */
913static void
914filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type)
915{
916	int error;
917	filt_timerlock();
918
919	switch (type) {
920	case EVENT_REGISTER:
921		/* cancel current call */
922		filt_timercancel(kn);
923
924		/* recalculate deadline */
925		kn->kn_sdata = kev->data;
926		kn->kn_sfflags = kev->fflags;
927
928		error = filt_timervalidate(kn);
929		if (error) {
930			/* no way to report error, so mark it in the knote */
931			kn->kn_flags |= EV_ERROR;
932			kn->kn_data = error;
933			break;
934		}
935
936		/* start timer if necessary */
937		filt_timerupdate(kn);
938		if (kn->kn_ext[0]) {
939			thread_call_enter_delayed(kn->kn_hook, kn->kn_ext[0]);
940			kn->kn_hookid |= TIMER_RUNNING;
941		} else {
942			/* pretend the timer has fired */
943			kn->kn_data = 1;
944		}
945
946		break;
947
948	case EVENT_PROCESS:
949		/* reset the timer pop count in kn_data */
950		*kev = kn->kn_kevent;
951		kev->ext[0] = 0;
952		kn->kn_data = 0;
953		if (kn->kn_flags & EV_CLEAR)
954			kn->kn_fflags = 0;
955		break;
956	default:
957		panic("filt_timertouch() - invalid type (%ld)", type);
958		break;
959	}
960
961	filt_timerunlock();
962}
963
964static void
965filt_timerlock(void)
966{
967	lck_mtx_lock(&_filt_timerlock);
968}
969
970static void
971filt_timerunlock(void)
972{
973	lck_mtx_unlock(&_filt_timerlock);
974}
975
976static int
977filt_userattach(struct knote *kn)
978{
979        /* EVFILT_USER knotes are not attached to anything in the kernel */
980        kn->kn_hook = NULL;
981	if (kn->kn_fflags & NOTE_TRIGGER) {
982		kn->kn_hookid = 1;
983	} else {
984		kn->kn_hookid = 0;
985	}
986        return 0;
987}
988
989static void
990filt_userdetach(__unused struct knote *kn)
991{
992        /* EVFILT_USER knotes are not attached to anything in the kernel */
993}
994
995static int
996filt_user(struct knote *kn, __unused long hint)
997{
998        return kn->kn_hookid;
999}
1000
1001static void
1002filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
1003{
1004        uint32_t ffctrl;
1005        switch (type) {
1006        case EVENT_REGISTER:
1007                if (kev->fflags & NOTE_TRIGGER) {
1008                        kn->kn_hookid = 1;
1009                }
1010
1011                ffctrl = kev->fflags & NOTE_FFCTRLMASK;
1012                kev->fflags &= NOTE_FFLAGSMASK;
1013                switch (ffctrl) {
1014                case NOTE_FFNOP:
1015                        break;
1016                case NOTE_FFAND:
1017                        OSBitAndAtomic(kev->fflags, &kn->kn_sfflags);
1018                        break;
1019                case NOTE_FFOR:
1020                        OSBitOrAtomic(kev->fflags, &kn->kn_sfflags);
1021                        break;
1022                case NOTE_FFCOPY:
1023                        kn->kn_sfflags = kev->fflags;
1024                        break;
1025                }
1026                kn->kn_sdata = kev->data;
1027                break;
1028        case EVENT_PROCESS:
1029                *kev = kn->kn_kevent;
1030                kev->fflags = (volatile UInt32)kn->kn_sfflags;
1031                kev->data = kn->kn_sdata;
1032                if (kn->kn_flags & EV_CLEAR) {
1033			kn->kn_hookid = 0;
1034			kn->kn_data = 0;
1035			kn->kn_fflags = 0;
1036		}
1037                break;
1038        default:
1039                panic("filt_usertouch() - invalid type (%ld)", type);
1040                break;
1041        }
1042}
1043
1044/*
1045 * JMM - placeholder for not-yet-implemented filters
1046 */
1047static int
1048filt_badattach(__unused struct knote *kn)
1049{
1050	return(ENOTSUP);
1051}
1052
1053
1054struct kqueue *
1055kqueue_alloc(struct proc *p)
1056{
1057	struct filedesc *fdp = p->p_fd;
1058	struct kqueue *kq;
1059
1060	MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK);
1061	if (kq != NULL) {
1062		wait_queue_set_t wqs;
1063
1064		wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST);
1065		if (wqs != NULL) {
1066			bzero(kq, sizeof(struct kqueue));
1067			lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
1068			TAILQ_INIT(&kq->kq_head);
1069			kq->kq_wqs = wqs;
1070			kq->kq_p = p;
1071		} else {
1072			FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
1073		}
1074	}
1075
1076	if (fdp->fd_knlistsize < 0) {
1077		proc_fdlock(p);
1078		if (fdp->fd_knlistsize < 0)
1079			fdp->fd_knlistsize = 0;		/* this process has had a kq */
1080		proc_fdunlock(p);
1081	}
1082
1083	return kq;
1084}
1085
1086
1087/*
1088 * kqueue_dealloc - detach all knotes from a kqueue and free it
1089 *
1090 * 	We walk each list looking for knotes referencing this
1091 *	this kqueue.  If we find one, we try to drop it.  But
1092 *	if we fail to get a drop reference, that will wait
1093 *	until it is dropped.  So, we can just restart again
1094 *	safe in the assumption that the list will eventually
1095 *	not contain any more references to this kqueue (either
1096 *	we dropped them all, or someone else did).
1097 *
1098 *	Assumes no new events are being added to the kqueue.
1099 *	Nothing locked on entry or exit.
1100 */
1101void
1102kqueue_dealloc(struct kqueue *kq)
1103{
1104	struct proc *p = kq->kq_p;
1105	struct filedesc *fdp = p->p_fd;
1106	struct knote *kn;
1107	int i;
1108
1109	proc_fdlock(p);
1110	for (i = 0; i < fdp->fd_knlistsize; i++) {
1111		kn = SLIST_FIRST(&fdp->fd_knlist[i]);
1112		while (kn != NULL) {
1113			if (kq == kn->kn_kq) {
1114				kqlock(kq);
1115				proc_fdunlock(p);
1116				/* drop it ourselves or wait */
1117				if (kqlock2knotedrop(kq, kn)) {
1118					kn->kn_fop->f_detach(kn);
1119					knote_drop(kn, p);
1120				}
1121				proc_fdlock(p);
1122				/* start over at beginning of list */
1123				kn = SLIST_FIRST(&fdp->fd_knlist[i]);
1124				continue;
1125			}
1126			kn = SLIST_NEXT(kn, kn_link);
1127		}
1128	}
1129	if (fdp->fd_knhashmask != 0) {
1130		for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
1131			kn = SLIST_FIRST(&fdp->fd_knhash[i]);
1132			while (kn != NULL) {
1133				if (kq == kn->kn_kq) {
1134					kqlock(kq);
1135					proc_fdunlock(p);
1136					/* drop it ourselves or wait */
1137					if (kqlock2knotedrop(kq, kn)) {
1138						kn->kn_fop->f_detach(kn);
1139						knote_drop(kn, p);
1140					}
1141					proc_fdlock(p);
1142					/* start over at beginning of list */
1143					kn = SLIST_FIRST(&fdp->fd_knhash[i]);
1144					continue;
1145				}
1146				kn = SLIST_NEXT(kn, kn_link);
1147			}
1148		}
1149	}
1150	proc_fdunlock(p);
1151
1152	/*
1153	 * before freeing the wait queue set for this kqueue,
1154	 * make sure it is unlinked from all its containing (select) sets.
1155	 */
1156	wait_queue_unlink_all((wait_queue_t)kq->kq_wqs);
1157	wait_queue_set_free(kq->kq_wqs);
1158	lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
1159	FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE);
1160}
1161
1162int
1163kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval)
1164{
1165	struct kqueue *kq;
1166	struct fileproc *fp;
1167	int fd, error;
1168
1169	error = falloc(p, &fp, &fd, vfs_context_current());
1170	if (error) {
1171		return (error);
1172	}
1173
1174	kq = kqueue_alloc(p);
1175	if (kq == NULL) {
1176		fp_free(p, fd, fp);
1177		return (ENOMEM);
1178	}
1179
1180	fp->f_flag = FREAD | FWRITE;
1181	fp->f_type = DTYPE_KQUEUE;
1182	fp->f_ops = &kqueueops;
1183	fp->f_data = (caddr_t)kq;
1184
1185	proc_fdlock(p);
1186	procfdtbl_releasefd(p, fd, NULL);
1187	fp_drop(p, fd, fp, 1);
1188	proc_fdunlock(p);
1189
1190	*retval = fd;
1191	return (error);
1192}
1193
1194static int
1195kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64)
1196{
1197	int advance;
1198	int error;
1199
1200	if (iskev64) {
1201		advance = sizeof(struct kevent64_s);
1202		error = copyin(*addrp, (caddr_t)kevp, advance);
1203	} else if (IS_64BIT_PROCESS(p)) {
1204		struct user64_kevent kev64;
1205		bzero(kevp, sizeof(struct kevent64_s));
1206
1207		advance = sizeof(kev64);
1208		error = copyin(*addrp, (caddr_t)&kev64, advance);
1209		if (error)
1210			return error;
1211		kevp->ident = kev64.ident;
1212		kevp->filter = kev64.filter;
1213		kevp->flags = kev64.flags;
1214		kevp->fflags = kev64.fflags;
1215		kevp->data = kev64.data;
1216		kevp->udata = kev64.udata;
1217	} else {
1218		struct user32_kevent kev32;
1219		bzero(kevp, sizeof(struct kevent64_s));
1220
1221		advance = sizeof(kev32);
1222		error = copyin(*addrp, (caddr_t)&kev32, advance);
1223		if (error)
1224			return error;
1225		kevp->ident = (uintptr_t)kev32.ident;
1226		kevp->filter = kev32.filter;
1227		kevp->flags = kev32.flags;
1228		kevp->fflags = kev32.fflags;
1229		kevp->data = (intptr_t)kev32.data;
1230		kevp->udata = CAST_USER_ADDR_T(kev32.udata);
1231	}
1232	if (!error)
1233		*addrp += advance;
1234	return error;
1235}
1236
1237static int
1238kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64)
1239{
1240	int advance;
1241	int error;
1242
1243	if (iskev64) {
1244		advance = sizeof(struct kevent64_s);
1245		error = copyout((caddr_t)kevp, *addrp, advance);
1246	} else if (IS_64BIT_PROCESS(p)) {
1247		struct user64_kevent kev64;
1248
1249		/*
1250		 * deal with the special case of a user-supplied
1251		 * value of (uintptr_t)-1.
1252		 */
1253		kev64.ident = (kevp->ident == (uintptr_t)-1) ?
1254			   (uint64_t)-1LL : (uint64_t)kevp->ident;
1255
1256		kev64.filter = kevp->filter;
1257		kev64.flags = kevp->flags;
1258		kev64.fflags = kevp->fflags;
1259		kev64.data = (int64_t) kevp->data;
1260		kev64.udata = kevp->udata;
1261		advance = sizeof(kev64);
1262		error = copyout((caddr_t)&kev64, *addrp, advance);
1263	} else {
1264		struct user32_kevent kev32;
1265
1266		kev32.ident = (uint32_t)kevp->ident;
1267		kev32.filter = kevp->filter;
1268		kev32.flags = kevp->flags;
1269		kev32.fflags = kevp->fflags;
1270		kev32.data = (int32_t)kevp->data;
1271		kev32.udata = kevp->udata;
1272		advance = sizeof(kev32);
1273		error = copyout((caddr_t)&kev32, *addrp, advance);
1274	}
1275	if (!error)
1276		*addrp += advance;
1277	return error;
1278}
1279
1280/*
1281 * kevent_continue - continue a kevent syscall after blocking
1282 *
1283 *	assume we inherit a use count on the kq fileglob.
1284 */
1285
1286static void
1287kevent_continue(__unused struct kqueue *kq, void *data, int error)
1288{
1289	struct _kevent *cont_args;
1290	struct fileproc *fp;
1291	int32_t *retval;
1292	int noutputs;
1293	int fd;
1294	struct proc *p = current_proc();
1295
1296	cont_args = (struct _kevent *)data;
1297	noutputs = cont_args->eventout;
1298	retval = cont_args->retval;
1299	fd = cont_args->fd;
1300	fp = cont_args->fp;
1301
1302	fp_drop(p, fd, fp, 0);
1303
1304	/* don't restart after signals... */
1305	if (error == ERESTART)
1306		error = EINTR;
1307	else if (error == EWOULDBLOCK)
1308		error = 0;
1309	if (error == 0)
1310		*retval = noutputs;
1311	unix_syscall_return(error);
1312}
1313
1314/*
1315 * kevent - [syscall] register and wait for kernel events
1316 *
1317 */
1318int
1319kevent(struct proc *p, struct kevent_args *uap, int32_t *retval)
1320{
1321	return kevent_internal(p,
1322			0,
1323			uap->changelist,
1324			uap->nchanges,
1325			uap->eventlist,
1326			uap->nevents,
1327			uap->fd,
1328			uap->timeout,
1329			0, /* no flags from old kevent() call */
1330			retval);
1331}
1332
1333int
1334kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval)
1335{
1336	return kevent_internal(p,
1337			1,
1338			uap->changelist,
1339			uap->nchanges,
1340			uap->eventlist,
1341			uap->nevents,
1342			uap->fd,
1343			uap->timeout,
1344			uap->flags,
1345			retval);
1346}
1347
1348static int
1349kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
1350		int nchanges, user_addr_t ueventlist, int nevents, int fd,
1351		user_addr_t utimeout, __unused unsigned int flags,
1352		int32_t *retval)
1353{
1354	struct _kevent *cont_args;
1355	uthread_t ut;
1356	struct kqueue *kq;
1357	struct fileproc *fp;
1358	struct kevent64_s kev;
1359	int error, noutputs;
1360	struct timeval atv;
1361
1362	/* convert timeout to absolute - if we have one */
1363	if (utimeout != USER_ADDR_NULL) {
1364		struct timeval rtv;
1365		if (IS_64BIT_PROCESS(p)) {
1366			struct user64_timespec ts;
1367			error = copyin(utimeout, &ts, sizeof(ts));
1368			if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
1369				error = EINVAL;
1370			else
1371				TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1372		} else {
1373			struct user32_timespec ts;
1374			error = copyin(utimeout, &ts, sizeof(ts));
1375			TIMESPEC_TO_TIMEVAL(&rtv, &ts);
1376		}
1377		if (error)
1378			return error;
1379		if (itimerfix(&rtv))
1380			return EINVAL;
1381		getmicrouptime(&atv);
1382		timevaladd(&atv, &rtv);
1383	} else {
1384		atv.tv_sec = 0;
1385		atv.tv_usec = 0;
1386	}
1387
1388	/* get a usecount for the kq itself */
1389	if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
1390		return(error);
1391
1392	/* each kq should only be used for events of one type */
1393	kqlock(kq);
1394	if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) {
1395		if (((iskev64 && (kq->kq_state & KQ_KEV32)) ||
1396			(!iskev64 && (kq->kq_state & KQ_KEV64)))) {
1397			error = EINVAL;
1398			kqunlock(kq);
1399			goto errorout;
1400		}
1401	} else {
1402		kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32);
1403	}
1404	kqunlock(kq);
1405
1406	/* register all the change requests the user provided... */
1407	noutputs = 0;
1408	while (nchanges > 0 && error == 0) {
1409		error = kevent_copyin(&changelist, &kev, p, iskev64);
1410		if (error)
1411			break;
1412
1413		kev.flags &= ~EV_SYSFLAGS;
1414		error = kevent_register(kq, &kev, p);
1415		if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) {
1416			kev.flags = EV_ERROR;
1417			kev.data = error;
1418			error = kevent_copyout(&kev, &ueventlist, p, iskev64);
1419			if (error == 0) {
1420				nevents--;
1421				noutputs++;
1422			}
1423		}
1424		nchanges--;
1425	}
1426
1427	/* store the continuation/completion data in the uthread */
1428	ut = (uthread_t)get_bsdthread_info(current_thread());
1429	cont_args = &ut->uu_kevent.ss_kevent;
1430	cont_args->fp = fp;
1431	cont_args->fd = fd;
1432	cont_args->retval = retval;
1433	cont_args->eventlist = ueventlist;
1434	cont_args->eventcount = nevents;
1435	cont_args->eventout = noutputs;
1436	cont_args->eventsize = iskev64;
1437
1438	if (nevents > 0 && noutputs == 0 && error == 0)
1439		error = kqueue_scan(kq, kevent_callback,
1440				    kevent_continue, cont_args,
1441				    &atv, p);
1442	kevent_continue(kq, cont_args, error);
1443
1444errorout:
1445	fp_drop(p, fd, fp, 0);
1446	return error;
1447}
1448
1449
1450/*
1451 * kevent_callback - callback for each individual event
1452 *
1453 *	called with nothing locked
1454 *	caller holds a reference on the kqueue
1455 */
1456
1457static int
1458kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp,
1459		void *data)
1460{
1461	struct _kevent *cont_args;
1462	int error;
1463	int iskev64;
1464
1465	cont_args = (struct _kevent *)data;
1466	assert(cont_args->eventout < cont_args->eventcount);
1467
1468	iskev64 = cont_args->eventsize;
1469
1470	/*
1471	 * Copy out the appropriate amount of event data for this user.
1472	 */
1473	error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), iskev64);
1474
1475	/*
1476	 * If there isn't space for additional events, return
1477	 * a harmless error to stop the processing here
1478	 */
1479	if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
1480			error = EWOULDBLOCK;
1481	return error;
1482}
1483
1484/*
1485 * kevent_description - format a description of a kevent for diagnostic output
1486 *
1487 *      called with a 128-byte string buffer
1488 */
1489
1490char *
1491kevent_description(struct kevent64_s *kevp, char *s, size_t n)
1492{
1493        snprintf(s, n,
1494                 "kevent="
1495                 "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
1496                 kevp->ident,
1497                 kevp->filter,
1498                 kevp->flags,
1499                 kevp->fflags,
1500                 kevp->data,
1501                 kevp->udata,
1502		 kevp->ext[0],
1503		 kevp->ext[1]);
1504        return s;
1505}
1506
1507/*
1508 * kevent_register - add a new event to a kqueue
1509 *
1510 *	Creates a mapping between the event source and
1511 *	the kqueue via a knote data structure.
1512 *
1513 *	Because many/most the event sources are file
1514 *	descriptor related, the knote is linked off
1515 *	the filedescriptor table for quick access.
1516 *
1517 *	called with nothing locked
1518 *	caller holds a reference on the kqueue
1519 */
1520
1521int
1522kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc *ctxp)
1523{
1524	struct proc *p = kq->kq_p;
1525	struct filedesc *fdp = p->p_fd;
1526	struct filterops *fops;
1527	struct fileproc *fp = NULL;
1528	struct knote *kn = NULL;
1529	int error = 0;
1530
1531	if (kev->filter < 0) {
1532		if (kev->filter + EVFILT_SYSCOUNT < 0)
1533			return (EINVAL);
1534		fops = sysfilt_ops[~kev->filter];	/* to 0-base index */
1535	} else {
1536		/*
1537		 * XXX
1538		 * filter attach routine is responsible for insuring that
1539		 * the identifier can be attached to it.
1540		 */
1541		printf("unknown filter: %d\n", kev->filter);
1542		return (EINVAL);
1543	}
1544
1545 restart:
1546	/* this iocount needs to be dropped if it is not registered */
1547	proc_fdlock(p);
1548	if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) {
1549		proc_fdunlock(p);
1550		return(error);
1551	}
1552
1553	if (fops->f_isfd) {
1554		/* fd-based knotes are linked off the fd table */
1555		if (kev->ident < (u_int)fdp->fd_knlistsize) {
1556			SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
1557				if (kq == kn->kn_kq &&
1558				    kev->filter == kn->kn_filter)
1559					break;
1560		}
1561	} else {
1562		/* hash non-fd knotes here too */
1563		if (fdp->fd_knhashmask != 0) {
1564			struct klist *list;
1565
1566			list = &fdp->fd_knhash[
1567			    KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
1568			SLIST_FOREACH(kn, list, kn_link)
1569				if (kev->ident == kn->kn_id &&
1570				    kq == kn->kn_kq &&
1571				    kev->filter == kn->kn_filter)
1572					break;
1573		}
1574	}
1575
1576	/*
1577	 * kn now contains the matching knote, or NULL if no match
1578	 */
1579	if (kn == NULL) {
1580		if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
1581			kn = knote_alloc();
1582			if (kn == NULL) {
1583				proc_fdunlock(p);
1584				error = ENOMEM;
1585				goto done;
1586			}
1587			kn->kn_fp = fp;
1588			kn->kn_kq = kq;
1589			kn->kn_tq = &kq->kq_head;
1590			kn->kn_fop = fops;
1591			kn->kn_sfflags = kev->fflags;
1592			kn->kn_sdata = kev->data;
1593			kev->fflags = 0;
1594			kev->data = 0;
1595			kn->kn_kevent = *kev;
1596			kn->kn_inuse = 1;  /* for f_attach() */
1597			kn->kn_status = KN_ATTACHING;
1598
1599			/* before anyone can find it */
1600			if (kev->flags & EV_DISABLE)
1601				kn->kn_status |= KN_DISABLED;
1602
1603			error = knote_fdpattach(kn, fdp, p);
1604			proc_fdunlock(p);
1605
1606			if (error) {
1607				knote_free(kn);
1608				goto done;
1609			}
1610
1611			/*
1612			 * apply reference count to knote structure, and
1613			 * do not release it at the end of this routine.
1614			 */
1615			fp = NULL;
1616
1617			error = fops->f_attach(kn);
1618
1619			kqlock(kq);
1620
1621			if (error != 0) {
1622				/*
1623				 * Failed to attach correctly, so drop.
1624				 * All other possible users/droppers
1625				 * have deferred to us.
1626				 */
1627				kn->kn_status |= KN_DROPPING;
1628				kqunlock(kq);
1629				knote_drop(kn, p);
1630				goto done;
1631			} else if (kn->kn_status & KN_DROPPING) {
1632				/*
1633				 * Attach succeeded, but someone else
1634				 * deferred their drop - now we have
1635				 * to do it for them (after detaching).
1636				 */
1637				kqunlock(kq);
1638				kn->kn_fop->f_detach(kn);
1639				knote_drop(kn, p);
1640				goto done;
1641			}
1642			kn->kn_status &= ~KN_ATTACHING;
1643			kqunlock(kq);
1644		} else {
1645			proc_fdunlock(p);
1646			error = ENOENT;
1647			goto done;
1648		}
1649	} else {
1650		/* existing knote - get kqueue lock */
1651		kqlock(kq);
1652		proc_fdunlock(p);
1653
1654		if (kev->flags & EV_DELETE) {
1655			knote_dequeue(kn);
1656			kn->kn_status |= KN_DISABLED;
1657			if (kqlock2knotedrop(kq, kn)) {
1658				kn->kn_fop->f_detach(kn);
1659				knote_drop(kn, p);
1660			}
1661			goto done;
1662		}
1663
1664		/* update status flags for existing knote */
1665		if (kev->flags & EV_DISABLE) {
1666			knote_dequeue(kn);
1667			kn->kn_status |= KN_DISABLED;
1668		} else if (kev->flags & EV_ENABLE) {
1669			kn->kn_status &= ~KN_DISABLED;
1670			if (kn->kn_status & KN_ACTIVE)
1671				knote_enqueue(kn);
1672		}
1673
1674		/*
1675		 * The user may change some filter values after the
1676		 * initial EV_ADD, but doing so will not reset any
1677		 * filter which have already been triggered.
1678		 */
1679		kn->kn_kevent.udata = kev->udata;
1680		if (fops->f_isfd || fops->f_touch == NULL) {
1681	        	kn->kn_sfflags = kev->fflags;
1682        		kn->kn_sdata = kev->data;
1683		}
1684
1685		/*
1686		 * If somebody is in the middle of dropping this
1687		 * knote - go find/insert a new one.  But we have
1688		 * wait for this one to go away first. Attaches
1689		 * running in parallel may also drop/modify the
1690		 * knote.  Wait for those to complete as well and
1691		 * then start over if we encounter one.
1692		 */
1693		if (!kqlock2knoteusewait(kq, kn)) {
1694			/* kqueue, proc_fdlock both unlocked */
1695			goto restart;
1696		}
1697
1698		/*
1699		 * Call touch routine to notify filter of changes
1700		 * in filter values.
1701		 */
1702		if (!fops->f_isfd && fops->f_touch != NULL)
1703		        fops->f_touch(kn, kev, EVENT_REGISTER);
1704	}
1705	/* still have use ref on knote */
1706
1707	/*
1708	 * If the knote is not marked to always stay enqueued,
1709	 * invoke the filter routine to see if it should be
1710	 * enqueued now.
1711	 */
1712	if ((kn->kn_status & KN_STAYQUEUED) == 0 && kn->kn_fop->f_event(kn, 0)) {
1713		if (knoteuse2kqlock(kq, kn))
1714			knote_activate(kn, 1);
1715		kqunlock(kq);
1716	} else {
1717		knote_put(kn);
1718	}
1719
1720done:
1721	if (fp != NULL)
1722		fp_drop(p, kev->ident, fp, 0);
1723	return (error);
1724}
1725
1726
1727/*
1728 * knote_process - process a triggered event
1729 *
1730 *	Validate that it is really still a triggered event
1731 *	by calling the filter routines (if necessary).  Hold
1732 *	a use reference on the knote to avoid it being detached.
1733 *	If it is still considered triggered, invoke the callback
1734 *	routine provided and move it to the provided inprocess
1735 *	queue.
1736 *
1737 *	caller holds a reference on the kqueue.
1738 *	kqueue locked on entry and exit - but may be dropped
1739 */
1740static int
1741knote_process(struct knote 	*kn,
1742	      kevent_callback_t callback,
1743	      void		*data,
1744	      struct kqtailq	*inprocessp,
1745	      struct proc 	*p)
1746{
1747	struct kqueue *kq = kn->kn_kq;
1748	struct kevent64_s kev;
1749	int touch;
1750	int result;
1751	int error;
1752
1753	/*
1754	 * Determine the kevent state we want to return.
1755	 *
1756	 * Some event states need to be revalidated before returning
1757	 * them, others we take the snapshot at the time the event
1758	 * was enqueued.
1759	 *
1760	 * Events with non-NULL f_touch operations must be touched.
1761	 * Triggered events must fill in kev for the callback.
1762	 *
1763	 * Convert our lock to a use-count and call the event's
1764	 * filter routine(s) to update.
1765	 */
1766	if ((kn->kn_status & KN_DISABLED) != 0) {
1767		result = 0;
1768		touch = 0;
1769	} else {
1770		int revalidate;
1771
1772		result = 1;
1773		revalidate = ((kn->kn_status & KN_STAYQUEUED) != 0 ||
1774			      (kn->kn_flags & EV_ONESHOT) == 0);
1775		touch =	(!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL);
1776
1777		if (revalidate || touch) {
1778			if (revalidate)
1779				knote_deactivate(kn);
1780
1781			/* call the filter/touch routines with just a ref */
1782			if (kqlock2knoteuse(kq, kn)) {
1783
1784				/* if we have to revalidate, call the filter */
1785				if (revalidate) {
1786					result = kn->kn_fop->f_event(kn, 0);
1787				}
1788
1789				/* capture the kevent data - using touch if specified */
1790				if (result && touch) {
1791					kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS);
1792				}
1793
1794				/* convert back to a kqlock - bail if the knote went away */
1795				if (!knoteuse2kqlock(kq, kn)) {
1796					return EJUSTRETURN;
1797				} else if (result) {
1798					/* if revalidated as alive, make sure it's active */
1799					if (!(kn->kn_status & KN_ACTIVE)) {
1800						knote_activate(kn, 0);
1801					}
1802
1803					/* capture all events that occurred during filter */
1804					if (!touch) {
1805						kev = kn->kn_kevent;
1806					}
1807
1808				} else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
1809					/* was already dequeued, so just bail on this one */
1810					return EJUSTRETURN;
1811				}
1812			} else {
1813				return EJUSTRETURN;
1814			}
1815		} else {
1816			kev = kn->kn_kevent;
1817		}
1818	}
1819
1820	/* move knote onto inprocess queue */
1821	assert(kn->kn_tq == &kq->kq_head);
1822	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1823	kn->kn_tq = inprocessp;
1824	TAILQ_INSERT_TAIL(inprocessp, kn, kn_tqe);
1825
1826	/*
1827	 * Determine how to dispatch the knote for future event handling.
1828	 * not-fired: just return (do not callout).
1829	 * One-shot: deactivate it.
1830	 * Clear: deactivate and clear the state.
1831	 * Dispatch: don't clear state, just deactivate it and mark it disabled.
1832	 * All others: just leave where they are.
1833	 */
1834
1835	if (result == 0) {
1836		return EJUSTRETURN;
1837	} else if ((kn->kn_flags & EV_ONESHOT) != 0) {
1838		knote_deactivate(kn);
1839		if (kqlock2knotedrop(kq, kn)) {
1840			kn->kn_fop->f_detach(kn);
1841			knote_drop(kn, p);
1842		}
1843	} else if ((kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) != 0) {
1844		if ((kn->kn_flags & EV_DISPATCH) != 0) {
1845			/* deactivate and disable all dispatch knotes */
1846			knote_deactivate(kn);
1847			kn->kn_status |= KN_DISABLED;
1848		} else if (!touch || kn->kn_fflags == 0) {
1849			/* only deactivate if nothing since the touch */
1850			knote_deactivate(kn);
1851		}
1852		if (!touch && (kn->kn_flags & EV_CLEAR) != 0) {
1853			/* manually clear non-touch knotes */
1854			kn->kn_data = 0;
1855			kn->kn_fflags = 0;
1856		}
1857		kqunlock(kq);
1858	} else {
1859		/*
1860		 * leave on inprocess queue.  We'll
1861		 * move all the remaining ones back
1862		 * the kq queue and wakeup any
1863		 * waiters when we are done.
1864		 */
1865		kqunlock(kq);
1866	}
1867
1868	/* callback to handle each event as we find it */
1869	error = (callback)(kq, &kev, data);
1870
1871	kqlock(kq);
1872	return error;
1873}
1874
1875/*
1876 * Return 0 to indicate that processing should proceed,
1877 * -1 if there is nothing to process.
1878 *
1879 * Called with kqueue locked and returns the same way,
1880 * but may drop lock temporarily.
1881 */
1882static int
1883kqueue_begin_processing(struct kqueue *kq)
1884{
1885	for (;;) {
1886		if (kq->kq_count == 0) {
1887			return -1;
1888		}
1889
1890		/* if someone else is processing the queue, wait */
1891		if (kq->kq_nprocess != 0) {
1892			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
1893			kq->kq_state |= KQ_PROCWAIT;
1894			kqunlock(kq);
1895			thread_block(THREAD_CONTINUE_NULL);
1896			kqlock(kq);
1897		} else {
1898			kq->kq_nprocess = 1;
1899			return 0;
1900		}
1901	}
1902}
1903
1904/*
1905 * Called with kqueue lock held.
1906 */
1907static void
1908kqueue_end_processing(struct kqueue *kq)
1909{
1910	kq->kq_nprocess = 0;
1911	if (kq->kq_state & KQ_PROCWAIT) {
1912		kq->kq_state &= ~KQ_PROCWAIT;
1913		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
1914	}
1915}
1916
1917/*
1918 * kqueue_process - process the triggered events in a kqueue
1919 *
1920 *	Walk the queued knotes and validate that they are
1921 *	really still triggered events by calling the filter
1922 *	routines (if necessary).  Hold a use reference on
1923 *	the knote to avoid it being detached. For each event
1924 *	that is still considered triggered, invoke the
1925 *	callback routine provided.
1926 *
1927 *	caller holds a reference on the kqueue.
1928 *	kqueue locked on entry and exit - but may be dropped
1929 *	kqueue list locked (held for duration of call)
1930 */
1931
1932static int
1933kqueue_process(struct kqueue *kq,
1934	       kevent_callback_t callback,
1935	       void *data,
1936	       int *countp,
1937	       struct proc *p)
1938{
1939        struct kqtailq inprocess;
1940	struct knote *kn;
1941	int nevents;
1942	int error;
1943
1944        TAILQ_INIT(&inprocess);
1945
1946	if (kqueue_begin_processing(kq) == -1) {
1947		*countp = 0;
1948		/* Nothing to process */
1949		return 0;
1950	}
1951
1952	/*
1953	 * Clear any pre-posted status from previous runs, so we only
1954	 * detect events that occur during this run.
1955	 */
1956	wait_queue_sub_clearrefs(kq->kq_wqs);
1957
1958	/*
1959	 * loop through the enqueued knotes, processing each one and
1960	 * revalidating those that need it. As they are processed,
1961	 * they get moved to the inprocess queue (so the loop can end).
1962	 */
1963	error = 0;
1964	nevents = 0;
1965
1966	while (error == 0 &&
1967	       (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
1968		error = knote_process(kn, callback, data, &inprocess, p);
1969		if (error == EJUSTRETURN)
1970			error = 0;
1971		else
1972			nevents++;
1973	}
1974
1975	/*
1976	 * With the kqueue still locked, move any knotes
1977	 * remaining on the inprocess queue back to the
1978	 * kq's queue and wake up any waiters.
1979	 */
1980	while ((kn = TAILQ_FIRST(&inprocess)) != NULL) {
1981		assert(kn->kn_tq == &inprocess);
1982		TAILQ_REMOVE(&inprocess, kn, kn_tqe);
1983		kn->kn_tq = &kq->kq_head;
1984		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1985	}
1986
1987	kqueue_end_processing(kq);
1988
1989	*countp = nevents;
1990	return error;
1991}
1992
1993
1994static void
1995kqueue_scan_continue(void *data, wait_result_t wait_result)
1996{
1997	thread_t self = current_thread();
1998	uthread_t ut = (uthread_t)get_bsdthread_info(self);
1999	struct _kqueue_scan * cont_args = &ut->uu_kevent.ss_kqueue_scan;
2000	struct kqueue *kq = (struct kqueue *)data;
2001	int error;
2002	int count;
2003
2004	/* convert the (previous) wait_result to a proper error */
2005	switch (wait_result) {
2006	case THREAD_AWAKENED:
2007		kqlock(kq);
2008		error = kqueue_process(kq, cont_args->call, cont_args, &count, current_proc());
2009		if (error == 0 && count == 0) {
2010			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT,
2011					       THREAD_ABORTSAFE, cont_args->deadline);
2012			kq->kq_state |= KQ_SLEEP;
2013			kqunlock(kq);
2014			thread_block_parameter(kqueue_scan_continue, kq);
2015			/* NOTREACHED */
2016		}
2017		kqunlock(kq);
2018		break;
2019	case THREAD_TIMED_OUT:
2020		error = EWOULDBLOCK;
2021		break;
2022	case THREAD_INTERRUPTED:
2023		error = EINTR;
2024		break;
2025	default:
2026		panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result);
2027		error = 0;
2028	}
2029
2030	/* call the continuation with the results */
2031	assert(cont_args->cont != NULL);
2032	(cont_args->cont)(kq, cont_args->data, error);
2033}
2034
2035
2036/*
2037 * kqueue_scan - scan and wait for events in a kqueue
2038 *
2039 *	Process the triggered events in a kqueue.
2040 *
2041 *	If there are no events triggered arrange to
2042 *	wait for them. If the caller provided a
2043 *	continuation routine, then kevent_scan will
2044 *	also.
2045 *
2046 *	The callback routine must be valid.
2047 *	The caller must hold a use-count reference on the kq.
2048 */
2049
2050int
2051kqueue_scan(struct kqueue *kq,
2052	    kevent_callback_t callback,
2053	    kqueue_continue_t continuation,
2054	    void *data,
2055	    struct timeval *atvp,
2056	    struct proc *p)
2057{
2058	thread_continue_t cont = THREAD_CONTINUE_NULL;
2059	uint64_t deadline;
2060	int error;
2061	int first;
2062
2063	assert(callback != NULL);
2064
2065	first = 1;
2066	for (;;) {
2067		wait_result_t wait_result;
2068		int count;
2069
2070		/*
2071		 * Make a pass through the kq to find events already
2072		 * triggered.
2073		 */
2074		kqlock(kq);
2075		error = kqueue_process(kq, callback, data, &count, p);
2076		if (error || count)
2077			break; /* lock still held */
2078
2079		/* looks like we have to consider blocking */
2080		if (first) {
2081			first = 0;
2082			/* convert the timeout to a deadline once */
2083			if (atvp->tv_sec || atvp->tv_usec) {
2084				uint64_t now;
2085
2086				clock_get_uptime(&now);
2087				nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
2088							    atvp->tv_usec * NSEC_PER_USEC,
2089							    &deadline);
2090				if (now >= deadline) {
2091					/* non-blocking call */
2092					error = EWOULDBLOCK;
2093					break; /* lock still held */
2094				}
2095				deadline -= now;
2096				clock_absolutetime_interval_to_deadline(deadline, &deadline);
2097			} else {
2098				deadline = 0; 	/* block forever */
2099			}
2100
2101			if (continuation) {
2102				uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
2103				struct _kqueue_scan *cont_args = &ut->uu_kevent.ss_kqueue_scan;
2104
2105				cont_args->call = callback;
2106				cont_args->cont = continuation;
2107				cont_args->deadline = deadline;
2108				cont_args->data = data;
2109				cont = kqueue_scan_continue;
2110			}
2111		}
2112
2113		/* go ahead and wait */
2114		wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT, THREAD_ABORTSAFE, deadline);
2115		kq->kq_state |= KQ_SLEEP;
2116		kqunlock(kq);
2117		wait_result = thread_block_parameter(cont, kq);
2118		/* NOTREACHED if (continuation != NULL) */
2119
2120		switch (wait_result) {
2121		case THREAD_AWAKENED:
2122			continue;
2123		case THREAD_TIMED_OUT:
2124			return EWOULDBLOCK;
2125		case THREAD_INTERRUPTED:
2126			return EINTR;
2127		default:
2128			panic("kevent_scan - bad wait_result (%d)",
2129			      wait_result);
2130			error = 0;
2131		}
2132	}
2133	kqunlock(kq);
2134	return error;
2135}
2136
2137
2138/*
2139 * XXX
2140 * This could be expanded to call kqueue_scan, if desired.
2141 */
2142/*ARGSUSED*/
2143static int
2144kqueue_read(__unused struct fileproc *fp,
2145			__unused struct uio *uio,
2146			__unused int flags,
2147			__unused vfs_context_t ctx)
2148{
2149	return (ENXIO);
2150}
2151
2152/*ARGSUSED*/
2153static int
2154kqueue_write(__unused struct fileproc *fp,
2155			 __unused struct uio *uio,
2156	 		 __unused int flags,
2157	 		 __unused vfs_context_t ctx)
2158{
2159	return (ENXIO);
2160}
2161
2162/*ARGSUSED*/
2163static int
2164kqueue_ioctl(__unused struct fileproc *fp,
2165			 __unused u_long com,
2166			 __unused caddr_t data,
2167			 __unused vfs_context_t ctx)
2168{
2169	return (ENOTTY);
2170}
2171
2172/*ARGSUSED*/
2173static int
2174kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
2175{
2176	struct kqueue *kq = (struct kqueue *)fp->f_data;
2177	struct knote *kn;
2178	struct kqtailq inprocessq;
2179	int retnum = 0;
2180
2181	if (which != FREAD)
2182		return 0;
2183
2184	TAILQ_INIT(&inprocessq);
2185
2186	kqlock(kq);
2187	/*
2188	 * If this is the first pass, link the wait queue associated with the
2189	 * the kqueue onto the wait queue set for the select().  Normally we
2190	 * use selrecord() for this, but it uses the wait queue within the
2191	 * selinfo structure and we need to use the main one for the kqueue to
2192	 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
2193	 * (The select() call will unlink them when it ends).
2194	 */
2195	if (wql != NULL) {
2196		thread_t	cur_act = current_thread();
2197		struct uthread * ut = get_bsdthread_info(cur_act);
2198
2199		kq->kq_state |= KQ_SEL;
2200		wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset,
2201					(wait_queue_link_t)wql);
2202	}
2203
2204	if (kqueue_begin_processing(kq) == -1) {
2205		kqunlock(kq);
2206		return 0;
2207	}
2208
2209	if (kq->kq_count != 0) {
2210		/*
2211		 * there is something queued - but it might be a
2212		 * KN_STAYQUEUED knote, which may or may not have
2213		 * any events pending.  So, we have to walk the
2214		 * list of knotes to see, and peek at the stay-
2215		 * queued ones to be really sure.
2216		 */
2217		while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) {
2218			if ((kn->kn_status & KN_STAYQUEUED) == 0) {
2219				retnum = 1;
2220				goto out;
2221			}
2222
2223			TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
2224			TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe);
2225
2226			if (kqlock2knoteuse(kq, kn)) {
2227				unsigned peek;
2228
2229				peek = kn->kn_fop->f_peek(kn);
2230				if (knoteuse2kqlock(kq, kn)) {
2231					if (peek > 0) {
2232						retnum = 1;
2233						goto out;
2234					}
2235				} else {
2236					retnum = 0;
2237				}
2238			}
2239		}
2240	}
2241
2242out:
2243	/* Return knotes to active queue */
2244	while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) {
2245		TAILQ_REMOVE(&inprocessq, kn, kn_tqe);
2246		kn->kn_tq = &kq->kq_head;
2247		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
2248	}
2249
2250	kqueue_end_processing(kq);
2251	kqunlock(kq);
2252	return retnum;
2253}
2254
2255/*
2256 * kqueue_close -
2257 */
2258/*ARGSUSED*/
2259static int
2260kqueue_close(struct fileglob *fg, __unused vfs_context_t ctx)
2261{
2262	struct kqueue *kq = (struct kqueue *)fg->fg_data;
2263
2264	kqueue_dealloc(kq);
2265	fg->fg_data = NULL;
2266	return (0);
2267}
2268
2269/*ARGSUSED*/
2270/*
2271 * The callers has taken a use-count reference on this kqueue and will donate it
2272 * to the kqueue we are being added to.  This keeps the kqueue from closing until
2273 * that relationship is torn down.
2274 */
2275static int
2276kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
2277{
2278	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
2279	struct kqueue *parentkq = kn->kn_kq;
2280
2281	if (parentkq == kq ||
2282	    kn->kn_filter != EVFILT_READ)
2283		return (1);
2284
2285	/*
2286	 * We have to avoid creating a cycle when nesting kqueues
2287	 * inside another.  Rather than trying to walk the whole
2288	 * potential DAG of nested kqueues, we just use a simple
2289	 * ceiling protocol.  When a kqueue is inserted into another,
2290	 * we check that the (future) parent is not already nested
2291	 * into another kqueue at a lower level than the potenial
2292	 * child (because it could indicate a cycle).  If that test
2293	 * passes, we just mark the nesting levels accordingly.
2294	 */
2295
2296	kqlock(parentkq);
2297	if (parentkq->kq_level > 0 &&
2298	    parentkq->kq_level < kq->kq_level)
2299	{
2300		kqunlock(parentkq);
2301		return (1);
2302	} else {
2303		/* set parent level appropriately */
2304		if (parentkq->kq_level == 0)
2305			parentkq->kq_level = 2;
2306		if (parentkq->kq_level < kq->kq_level + 1)
2307			parentkq->kq_level = kq->kq_level + 1;
2308		kqunlock(parentkq);
2309
2310		kn->kn_fop = &kqread_filtops;
2311		kqlock(kq);
2312		KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
2313		/* indicate nesting in child, if needed */
2314		if (kq->kq_level == 0)
2315			kq->kq_level = 1;
2316		kqunlock(kq);
2317		return (0);
2318	}
2319}
2320
2321/*
2322 * kqueue_drain - called when kq is closed
2323 */
2324/*ARGSUSED*/
2325static int
2326kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2327{
2328	struct kqueue *kq = (struct kqueue *)fp->f_fglob->fg_data;
2329	kqlock(kq);
2330	kqueue_wakeup(kq, 1);
2331	kqunlock(kq);
2332	return 0;
2333}
2334
2335/*ARGSUSED*/
2336int
2337kqueue_stat(struct fileproc *fp, void *ub, int isstat64,  __unused vfs_context_t ctx)
2338{
2339
2340	struct kqueue *kq = (struct kqueue *)fp->f_data;
2341	if (isstat64 != 0) {
2342		struct stat64 *sb64 = (struct stat64 *)ub;
2343
2344		bzero((void *)sb64, sizeof(*sb64));
2345		sb64->st_size = kq->kq_count;
2346		if (kq->kq_state & KQ_KEV64)
2347			sb64->st_blksize = sizeof(struct kevent64_s);
2348		else
2349			sb64->st_blksize = sizeof(struct kevent);
2350		sb64->st_mode = S_IFIFO;
2351	} else {
2352		struct stat *sb = (struct stat *)ub;
2353
2354		bzero((void *)sb, sizeof(*sb));
2355		sb->st_size = kq->kq_count;
2356		if (kq->kq_state & KQ_KEV64)
2357			sb->st_blksize = sizeof(struct kevent64_s);
2358		else
2359			sb->st_blksize = sizeof(struct kevent);
2360		sb->st_mode = S_IFIFO;
2361	}
2362
2363	return (0);
2364}
2365
2366/*
2367 * Called with the kqueue locked
2368 */
2369static void
2370kqueue_wakeup(struct kqueue *kq, int closed)
2371{
2372	if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) {
2373		kq->kq_state &= ~(KQ_SLEEP | KQ_SEL);
2374		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT,
2375				      (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED);
2376	}
2377}
2378
2379void
2380klist_init(struct klist *list)
2381{
2382	SLIST_INIT(list);
2383}
2384
2385
2386/*
2387 * Query/Post each knote in the object's list
2388 *
2389 *	The object lock protects the list. It is assumed
2390 *	that the filter/event routine for the object can
2391 *	determine that the object is already locked (via
2392 *	the hint) and not deadlock itself.
2393 *
2394 *	The object lock should also hold off pending
2395 *	detach/drop operations.  But we'll prevent it here
2396 *	too - just in case.
2397 */
2398void
2399knote(struct klist *list, long hint)
2400{
2401	struct knote *kn;
2402
2403	SLIST_FOREACH(kn, list, kn_selnext) {
2404		struct kqueue *kq = kn->kn_kq;
2405
2406		kqlock(kq);
2407		if (kqlock2knoteuse(kq, kn)) {
2408			int result;
2409
2410			/* call the event with only a use count */
2411			result = kn->kn_fop->f_event(kn, hint);
2412
2413			/* if its not going away and triggered */
2414			if (knoteuse2kqlock(kq, kn) && result)
2415				knote_activate(kn, 1);
2416			/* lock held again */
2417		}
2418		kqunlock(kq);
2419	}
2420}
2421
2422/*
2423 * attach a knote to the specified list.  Return true if this is the first entry.
2424 * The list is protected by whatever lock the object it is associated with uses.
2425 */
2426int
2427knote_attach(struct klist *list, struct knote *kn)
2428{
2429	int ret = SLIST_EMPTY(list);
2430	SLIST_INSERT_HEAD(list, kn, kn_selnext);
2431	return ret;
2432}
2433
2434/*
2435 * detach a knote from the specified list.  Return true if that was the last entry.
2436 * The list is protected by whatever lock the object it is associated with uses.
2437 */
2438int
2439knote_detach(struct klist *list, struct knote *kn)
2440{
2441	SLIST_REMOVE(list, kn, knote, kn_selnext);
2442	return SLIST_EMPTY(list);
2443}
2444
2445/*
2446 * For a given knote, link a provided wait queue directly with the kqueue.
2447 * Wakeups will happen via recursive wait queue support.  But nothing will move
2448 * the knote to the active list at wakeup (nothing calls knote()).  Instead,
2449 * we permanently enqueue them here.
2450 *
2451 * kqueue and knote references are held by caller.
2452 *
2453 * caller provides the wait queue link structure.
2454 */
2455int
2456knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
2457{
2458	struct kqueue *kq = kn->kn_kq;
2459	kern_return_t kr;
2460
2461	kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
2462	if (kr == KERN_SUCCESS) {
2463		knote_markstayqueued(kn);
2464		return 0;
2465	} else {
2466		return EINVAL;
2467	}
2468}
2469
2470/*
2471 * Unlink the provided wait queue from the kqueue associated with a knote.
2472 * Also remove it from the magic list of directly attached knotes.
2473 *
2474 * Note that the unlink may have already happened from the other side, so
2475 * ignore any failures to unlink and just remove it from the kqueue list.
2476 *
2477 * On success, caller is responsible for the link structure
2478 */
2479int
2480knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
2481{
2482	struct kqueue *kq = kn->kn_kq;
2483	kern_return_t kr;
2484
2485	kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
2486	kqlock(kq);
2487	kn->kn_status &= ~KN_STAYQUEUED;
2488	knote_dequeue(kn);
2489	kqunlock(kq);
2490	return (kr != KERN_SUCCESS) ? EINVAL : 0;
2491}
2492
2493/*
2494 * remove all knotes referencing a specified fd
2495 *
2496 * Essentially an inlined knote_remove & knote_drop
2497 * when we know for sure that the thing is a file
2498 *
2499 * Entered with the proc_fd lock already held.
2500 * It returns the same way, but may drop it temporarily.
2501 */
2502void
2503knote_fdclose(struct proc *p, int fd)
2504{
2505	struct filedesc *fdp = p->p_fd;
2506	struct klist *list;
2507	struct knote *kn;
2508
2509	list = &fdp->fd_knlist[fd];
2510	while ((kn = SLIST_FIRST(list)) != NULL) {
2511		struct kqueue *kq = kn->kn_kq;
2512
2513		if (kq->kq_p != p)
2514			panic("knote_fdclose: proc mismatch (kq->kq_p=%p != p=%p)", kq->kq_p, p);
2515
2516		kqlock(kq);
2517		proc_fdunlock(p);
2518
2519		/*
2520		 * Convert the lock to a drop ref.
2521		 * If we get it, go ahead and drop it.
2522		 * Otherwise, we waited for it to
2523		 * be dropped by the other guy, so
2524		 * it is safe to move on in the list.
2525		 */
2526		if (kqlock2knotedrop(kq, kn)) {
2527			kn->kn_fop->f_detach(kn);
2528			knote_drop(kn, p);
2529		}
2530
2531		proc_fdlock(p);
2532
2533		/* the fd tables may have changed - start over */
2534		list = &fdp->fd_knlist[fd];
2535	}
2536}
2537
2538/* proc_fdlock held on entry (and exit) */
2539static int
2540knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p)
2541{
2542	struct klist *list = NULL;
2543
2544	if (! kn->kn_fop->f_isfd) {
2545		if (fdp->fd_knhashmask == 0)
2546			fdp->fd_knhash = hashinit(CONFIG_KN_HASHSIZE, M_KQUEUE,
2547			    &fdp->fd_knhashmask);
2548		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
2549	} else {
2550		if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
2551			u_int size = 0;
2552
2553			if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur
2554			    || kn->kn_id >= (uint64_t)maxfiles)
2555				return (EINVAL);
2556
2557			/* have to grow the fd_knlist */
2558			size = fdp->fd_knlistsize;
2559			while (size <= kn->kn_id)
2560				size += KQEXTENT;
2561
2562			if (size >= (UINT_MAX/sizeof(struct klist *)))
2563				return (EINVAL);
2564
2565			MALLOC(list, struct klist *,
2566			       size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
2567			if (list == NULL)
2568				return (ENOMEM);
2569
2570			bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
2571			      fdp->fd_knlistsize * sizeof(struct klist *));
2572			bzero((caddr_t)list +
2573			      fdp->fd_knlistsize * sizeof(struct klist *),
2574			      (size - fdp->fd_knlistsize) * sizeof(struct klist *));
2575			FREE(fdp->fd_knlist, M_KQUEUE);
2576			fdp->fd_knlist = list;
2577			fdp->fd_knlistsize = size;
2578		}
2579		list = &fdp->fd_knlist[kn->kn_id];
2580	}
2581	SLIST_INSERT_HEAD(list, kn, kn_link);
2582	return (0);
2583}
2584
2585
2586
2587/*
2588 * should be called at spl == 0, since we don't want to hold spl
2589 * while calling fdrop and free.
2590 */
2591static void
2592knote_drop(struct knote *kn, __unused struct proc *ctxp)
2593{
2594	struct kqueue *kq = kn->kn_kq;
2595	struct proc *p = kq->kq_p;
2596        struct filedesc *fdp = p->p_fd;
2597	struct klist *list;
2598	int needswakeup;
2599
2600	proc_fdlock(p);
2601	if (kn->kn_fop->f_isfd)
2602		list = &fdp->fd_knlist[kn->kn_id];
2603	else
2604		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
2605
2606	SLIST_REMOVE(list, kn, knote, kn_link);
2607	kqlock(kq);
2608	knote_dequeue(kn);
2609	needswakeup = (kn->kn_status & KN_USEWAIT);
2610	kqunlock(kq);
2611	proc_fdunlock(p);
2612
2613	if (needswakeup)
2614		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED);
2615
2616	if (kn->kn_fop->f_isfd)
2617		fp_drop(p, kn->kn_id, kn->kn_fp, 0);
2618
2619	knote_free(kn);
2620}
2621
2622/* called with kqueue lock held */
2623static void
2624knote_activate(struct knote *kn, int propagate)
2625{
2626	struct kqueue *kq = kn->kn_kq;
2627
2628	kn->kn_status |= KN_ACTIVE;
2629	knote_enqueue(kn);
2630	kqueue_wakeup(kq, 0);
2631
2632	/* this is a real event: wake up the parent kq, too */
2633	if (propagate)
2634		KNOTE(&kq->kq_sel.si_note, 0);
2635}
2636
2637/* called with kqueue lock held */
2638static void
2639knote_deactivate(struct knote *kn)
2640{
2641	kn->kn_status &= ~KN_ACTIVE;
2642	knote_dequeue(kn);
2643}
2644
2645/* called with kqueue lock held */
2646static void
2647knote_enqueue(struct knote *kn)
2648{
2649	if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_STAYQUEUED ||
2650	    (kn->kn_status & (KN_QUEUED | KN_STAYQUEUED | KN_DISABLED)) == 0) {
2651		struct kqtailq *tq = kn->kn_tq;
2652		struct kqueue *kq = kn->kn_kq;
2653
2654		TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
2655		kn->kn_status |= KN_QUEUED;
2656		kq->kq_count++;
2657	}
2658}
2659
2660/* called with kqueue lock held */
2661static void
2662knote_dequeue(struct knote *kn)
2663{
2664	struct kqueue *kq = kn->kn_kq;
2665
2666	if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_QUEUED) {
2667		struct kqtailq *tq = kn->kn_tq;
2668
2669		TAILQ_REMOVE(tq, kn, kn_tqe);
2670		kn->kn_tq = &kq->kq_head;
2671		kn->kn_status &= ~KN_QUEUED;
2672		kq->kq_count--;
2673	}
2674}
2675
2676void
2677knote_init(void)
2678{
2679	knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone");
2680
2681	/* allocate kq lock group attribute and group */
2682	kq_lck_grp_attr= lck_grp_attr_alloc_init();
2683
2684	kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);
2685
2686	/* Allocate kq lock attribute */
2687	kq_lck_attr = lck_attr_alloc_init();
2688
2689	/* Initialize the timer filter lock */
2690	lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
2691
2692#if VM_PRESSURE_EVENTS
2693	/* Initialize the vm pressure list lock */
2694	vm_pressure_init(kq_lck_grp, kq_lck_attr);
2695#endif
2696}
2697SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
2698
2699static struct knote *
2700knote_alloc(void)
2701{
2702	return ((struct knote *)zalloc(knote_zone));
2703}
2704
2705static void
2706knote_free(struct knote *kn)
2707{
2708	zfree(knote_zone, kn);
2709}
2710
2711#if SOCKETS
2712#include <sys/param.h>
2713#include <sys/socket.h>
2714#include <sys/protosw.h>
2715#include <sys/domain.h>
2716#include <sys/mbuf.h>
2717#include <sys/kern_event.h>
2718#include <sys/malloc.h>
2719#include <sys/sys_domain.h>
2720#include <sys/syslog.h>
2721
2722
2723static int kev_attach(struct socket *so, int proto, struct proc *p);
2724static int kev_detach(struct socket *so);
2725static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p);
2726
2727struct pr_usrreqs event_usrreqs = {
2728     pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp,
2729     pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp,
2730     pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp,
2731     pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp,
2732     pru_sosend_notsupp, soreceive, pru_sopoll_notsupp
2733};
2734
2735struct protosw eventsw[] = {
2736     {
2737	  .pr_type = SOCK_RAW,
2738	  .pr_domain = &systemdomain,
2739	  .pr_protocol = SYSPROTO_EVENT,
2740	  .pr_flags = PR_ATOMIC,
2741	  .pr_usrreqs = &event_usrreqs,
2742     }
2743};
2744
2745static
2746struct kern_event_head kern_event_head;
2747
2748static u_int32_t static_event_id = 0;
2749struct domain *sysdom = &systemdomain;
2750static lck_mtx_t *sys_mtx;
2751
2752/*
2753 * Install the protosw's for the NKE manager.  Invoked at
2754 *  extension load time
2755 */
2756int
2757kern_event_init(void)
2758{
2759    int retval;
2760
2761    if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) {
2762    	    log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval);
2763            return(retval);
2764	}
2765
2766    /*
2767     * Use the domain mutex for all system event sockets
2768     */
2769    sys_mtx = sysdom->dom_mtx;
2770
2771    return(KERN_SUCCESS);
2772}
2773
2774static int
2775kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
2776{
2777     int error;
2778     struct kern_event_pcb  *ev_pcb;
2779
2780     error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
2781     if (error)
2782          return error;
2783
2784     MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK);
2785     if (ev_pcb == 0)
2786	  return ENOBUFS;
2787
2788     ev_pcb->ev_socket = so;
2789     ev_pcb->vendor_code_filter = 0xffffffff;
2790
2791     so->so_pcb = (caddr_t) ev_pcb;
2792     lck_mtx_lock(sys_mtx);
2793     LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link);
2794     lck_mtx_unlock(sys_mtx);
2795
2796     return 0;
2797}
2798
2799
2800static int
2801kev_detach(struct socket *so)
2802{
2803     struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2804
2805     if (ev_pcb != 0) {
2806		LIST_REMOVE(ev_pcb, ev_link);
2807		FREE(ev_pcb, M_PCB);
2808		so->so_pcb = 0;
2809		so->so_flags |= SOF_PCBCLEARING;
2810     }
2811
2812     return 0;
2813}
2814
2815/*
2816 * For now, kev_vendor_code and mbuf_tags use the same
2817 * mechanism.
2818 */
2819
2820errno_t kev_vendor_code_find(
2821	const char	*string,
2822	u_int32_t 	*out_vendor_code)
2823{
2824	if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
2825		return EINVAL;
2826	}
2827	return net_str_id_find_internal(string, out_vendor_code, NSI_VENDOR_CODE, 1);
2828}
2829
2830errno_t  kev_msg_post(struct kev_msg *event_msg)
2831{
2832	mbuf_tag_id_t	min_vendor, max_vendor;
2833
2834	net_str_id_first_last(&min_vendor, &max_vendor, NSI_VENDOR_CODE);
2835
2836	if (event_msg == NULL)
2837		return EINVAL;
2838
2839	/* Limit third parties to posting events for registered vendor codes only */
2840	if (event_msg->vendor_code < min_vendor ||
2841		event_msg->vendor_code > max_vendor)
2842	{
2843		return EINVAL;
2844	}
2845
2846	return kev_post_msg(event_msg);
2847}
2848
2849
2850int  kev_post_msg(struct kev_msg *event_msg)
2851{
2852     struct mbuf *m, *m2;
2853     struct kern_event_pcb  *ev_pcb;
2854     struct kern_event_msg  *ev;
2855     char              *tmp;
2856     u_int32_t     total_size;
2857     int               i;
2858
2859	/* Verify the message is small enough to fit in one mbuf w/o cluster */
2860	total_size = KEV_MSG_HEADER_SIZE;
2861
2862	for (i = 0; i < 5; i++) {
2863		if (event_msg->dv[i].data_length == 0)
2864			break;
2865		total_size += event_msg->dv[i].data_length;
2866	}
2867
2868	if (total_size > MLEN) {
2869		return EMSGSIZE;
2870	}
2871
2872     m = m_get(M_DONTWAIT, MT_DATA);
2873     if (m == 0)
2874	  return ENOBUFS;
2875
2876     ev = mtod(m, struct kern_event_msg *);
2877     total_size = KEV_MSG_HEADER_SIZE;
2878
2879     tmp = (char *) &ev->event_data[0];
2880     for (i = 0; i < 5; i++) {
2881	  if (event_msg->dv[i].data_length == 0)
2882	       break;
2883
2884	  total_size += event_msg->dv[i].data_length;
2885	  bcopy(event_msg->dv[i].data_ptr, tmp,
2886		event_msg->dv[i].data_length);
2887	  tmp += event_msg->dv[i].data_length;
2888     }
2889
2890     ev->id = ++static_event_id;
2891     ev->total_size   = total_size;
2892     ev->vendor_code  = event_msg->vendor_code;
2893     ev->kev_class    = event_msg->kev_class;
2894     ev->kev_subclass = event_msg->kev_subclass;
2895     ev->event_code   = event_msg->event_code;
2896
2897     m->m_len = total_size;
2898     lck_mtx_lock(sys_mtx);
2899     for (ev_pcb = LIST_FIRST(&kern_event_head);
2900	  ev_pcb;
2901	  ev_pcb = LIST_NEXT(ev_pcb, ev_link)) {
2902
2903	  if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) {
2904	       if (ev_pcb->vendor_code_filter != ev->vendor_code)
2905		    continue;
2906
2907	       if (ev_pcb->class_filter != KEV_ANY_CLASS) {
2908		    if (ev_pcb->class_filter != ev->kev_class)
2909			 continue;
2910
2911		    if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) &&
2912			(ev_pcb->subclass_filter != ev->kev_subclass))
2913			 continue;
2914	       }
2915	  }
2916
2917	  m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
2918	  if (m2 == 0) {
2919	       m_free(m);
2920	 	   lck_mtx_unlock(sys_mtx);
2921	       return ENOBUFS;
2922	  }
2923	  /* the socket is already locked because we hold the sys_mtx here */
2924	  if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2))
2925		  sorwakeup(ev_pcb->ev_socket);
2926     }
2927
2928     m_free(m);
2929     lck_mtx_unlock(sys_mtx);
2930     return 0;
2931}
2932
2933static int
2934kev_control(struct socket *so,
2935			u_long cmd,
2936			caddr_t data,
2937			__unused struct ifnet *ifp,
2938			__unused struct proc *p)
2939{
2940	struct kev_request *kev_req = (struct kev_request *) data;
2941	struct kern_event_pcb  *ev_pcb;
2942	struct kev_vendor_code *kev_vendor;
2943	u_int32_t  *id_value = (u_int32_t *) data;
2944
2945
2946	switch (cmd) {
2947
2948		case SIOCGKEVID:
2949			*id_value = static_event_id;
2950			break;
2951
2952		case SIOCSKEVFILT:
2953			ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2954			ev_pcb->vendor_code_filter = kev_req->vendor_code;
2955			ev_pcb->class_filter     = kev_req->kev_class;
2956			ev_pcb->subclass_filter  = kev_req->kev_subclass;
2957			break;
2958
2959		case SIOCGKEVFILT:
2960			ev_pcb = (struct kern_event_pcb *) so->so_pcb;
2961			kev_req->vendor_code = ev_pcb->vendor_code_filter;
2962			kev_req->kev_class   = ev_pcb->class_filter;
2963			kev_req->kev_subclass = ev_pcb->subclass_filter;
2964			break;
2965
2966		case SIOCGKEVVENDOR:
2967			kev_vendor = (struct kev_vendor_code*)data;
2968
2969			/* Make sure string is NULL terminated */
2970			kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
2971
2972			return net_str_id_find_internal(kev_vendor->vendor_string,
2973					&kev_vendor->vendor_code, NSI_VENDOR_CODE, 0);
2974
2975		default:
2976			return ENOTSUP;
2977	}
2978
2979	return 0;
2980}
2981
2982#endif /* SOCKETS */
2983
2984
2985int
2986fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
2987{
2988	struct vinfo_stat * st;
2989
2990	/* No need for the funnel as fd is kept alive */
2991
2992	st = &kinfo->kq_stat;
2993
2994	st->vst_size = kq->kq_count;
2995	if (kq->kq_state & KQ_KEV64)
2996		st->vst_blksize = sizeof(struct kevent64_s);
2997	else
2998		st->vst_blksize = sizeof(struct kevent);
2999	st->vst_mode = S_IFIFO;
3000	if (kq->kq_state & KQ_SEL)
3001		kinfo->kq_state |=  PROC_KQUEUE_SELECT;
3002	if (kq->kq_state & KQ_SLEEP)
3003		kinfo->kq_state |= PROC_KQUEUE_SLEEP;
3004
3005	return(0);
3006}
3007
3008
3009void
3010knote_markstayqueued(struct knote *kn)
3011{
3012	kqlock(kn->kn_kq);
3013	kn->kn_status |= KN_STAYQUEUED;
3014	knote_enqueue(kn);
3015	kqunlock(kn->kn_kq);
3016}
3017