kern_event.c revision 197243
159290Sjlemon/*-
272969Sjlemon * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3133741Sjmg * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
4197240Ssson * Copyright (c) 2009 Apple, Inc.
559290Sjlemon * All rights reserved.
659290Sjlemon *
759290Sjlemon * Redistribution and use in source and binary forms, with or without
859290Sjlemon * modification, are permitted provided that the following conditions
959290Sjlemon * are met:
1059290Sjlemon * 1. Redistributions of source code must retain the above copyright
1159290Sjlemon *    notice, this list of conditions and the following disclaimer.
1259290Sjlemon * 2. Redistributions in binary form must reproduce the above copyright
1359290Sjlemon *    notice, this list of conditions and the following disclaimer in the
1459290Sjlemon *    documentation and/or other materials provided with the distribution.
1559290Sjlemon *
1659290Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1759290Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1859290Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1959290Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2059290Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2159290Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2259290Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2359290Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2459290Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2559290Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2659290Sjlemon * SUCH DAMAGE.
2759290Sjlemon */
2859290Sjlemon
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 197243 2009-09-16 03:49:54Z sson $");
31116182Sobrien
32162592Sjmg#include "opt_ktrace.h"
33162592Sjmg
3459290Sjlemon#include <sys/param.h>
3559290Sjlemon#include <sys/systm.h>
3659290Sjlemon#include <sys/kernel.h>
3776166Smarkm#include <sys/lock.h>
3876166Smarkm#include <sys/mutex.h>
3959290Sjlemon#include <sys/proc.h>
40132138Salfred#include <sys/malloc.h>
4159290Sjlemon#include <sys/unistd.h>
4259290Sjlemon#include <sys/file.h>
43108524Salfred#include <sys/filedesc.h>
44132138Salfred#include <sys/filio.h>
4559290Sjlemon#include <sys/fcntl.h>
46133741Sjmg#include <sys/kthread.h>
4770834Swollman#include <sys/selinfo.h>
4859290Sjlemon#include <sys/queue.h>
4959290Sjlemon#include <sys/event.h>
5059290Sjlemon#include <sys/eventvar.h>
5159290Sjlemon#include <sys/poll.h>
5259290Sjlemon#include <sys/protosw.h>
53132138Salfred#include <sys/sigio.h>
54132138Salfred#include <sys/signalvar.h>
5559290Sjlemon#include <sys/socket.h>
5659290Sjlemon#include <sys/socketvar.h>
5759290Sjlemon#include <sys/stat.h>
5884138Sjlemon#include <sys/sysctl.h>
5959290Sjlemon#include <sys/sysproto.h>
60142934Sps#include <sys/syscallsubr.h>
61133741Sjmg#include <sys/taskqueue.h>
6259290Sjlemon#include <sys/uio.h>
63162592Sjmg#ifdef KTRACE
64162592Sjmg#include <sys/ktrace.h>
65162592Sjmg#endif
6659290Sjlemon
6792751Sjeff#include <vm/uma.h>
6859290Sjlemon
69141616Sphkstatic MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
70141616Sphk
71133741Sjmg/*
72133741Sjmg * This lock is used if multiple kq locks are required.  This possibly
73133741Sjmg * should be made into a per proc lock.
74133741Sjmg */
75133741Sjmgstatic struct mtx	kq_global;
76133741SjmgMTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
77133741Sjmg#define KQ_GLOBAL_LOCK(lck, haslck)	do {	\
78133741Sjmg	if (!haslck)				\
79133741Sjmg		mtx_lock(lck);			\
80133741Sjmg	haslck = 1;				\
81133741Sjmg} while (0)
82133741Sjmg#define KQ_GLOBAL_UNLOCK(lck, haslck)	do {	\
83133741Sjmg	if (haslck)				\
84133741Sjmg		mtx_unlock(lck);			\
85133741Sjmg	haslck = 0;				\
86133741Sjmg} while (0)
8784138Sjlemon
88133741SjmgTASKQUEUE_DEFINE_THREAD(kqueue);
89133741Sjmg
90146950Spsstatic int	kevent_copyout(void *arg, struct kevent *kevp, int count);
91146950Spsstatic int	kevent_copyin(void *arg, struct kevent *kevp, int count);
92162594Sjmgstatic int	kqueue_register(struct kqueue *kq, struct kevent *kev,
93162594Sjmg		    struct thread *td, int waitok);
94170029Srwatsonstatic int	kqueue_acquire(struct file *fp, struct kqueue **kqp);
95133741Sjmgstatic void	kqueue_release(struct kqueue *kq, int locked);
96133741Sjmgstatic int	kqueue_expand(struct kqueue *kq, struct filterops *fops,
97133741Sjmg		    uintptr_t ident, int waitok);
98133741Sjmgstatic void	kqueue_task(void *arg, int pending);
99133741Sjmgstatic int	kqueue_scan(struct kqueue *kq, int maxevents,
100146950Sps		    struct kevent_copyops *k_ops,
101146950Sps		    const struct timespec *timeout,
102146950Sps		    struct kevent *keva, struct thread *td);
10359290Sjlemonstatic void 	kqueue_wakeup(struct kqueue *kq);
104133741Sjmgstatic struct filterops *kqueue_fo_find(int filt);
105133741Sjmgstatic void	kqueue_fo_release(int filt);
10659290Sjlemon
107108255Sphkstatic fo_rdwr_t	kqueue_read;
108108255Sphkstatic fo_rdwr_t	kqueue_write;
109175140Sjhbstatic fo_truncate_t	kqueue_truncate;
110108255Sphkstatic fo_ioctl_t	kqueue_ioctl;
111108255Sphkstatic fo_poll_t	kqueue_poll;
112108255Sphkstatic fo_kqfilter_t	kqueue_kqfilter;
113108255Sphkstatic fo_stat_t	kqueue_stat;
114108255Sphkstatic fo_close_t	kqueue_close;
115108238Sphk
11672521Sjlemonstatic struct fileops kqueueops = {
117116546Sphk	.fo_read = kqueue_read,
118116546Sphk	.fo_write = kqueue_write,
119175140Sjhb	.fo_truncate = kqueue_truncate,
120116546Sphk	.fo_ioctl = kqueue_ioctl,
121116546Sphk	.fo_poll = kqueue_poll,
122116546Sphk	.fo_kqfilter = kqueue_kqfilter,
123116546Sphk	.fo_stat = kqueue_stat,
124116546Sphk	.fo_close = kqueue_close,
12572521Sjlemon};
12672521Sjlemon
127133741Sjmgstatic int 	knote_attach(struct knote *kn, struct kqueue *kq);
12883366Sjulianstatic void 	knote_drop(struct knote *kn, struct thread *td);
12959290Sjlemonstatic void 	knote_enqueue(struct knote *kn);
13059290Sjlemonstatic void 	knote_dequeue(struct knote *kn);
13159290Sjlemonstatic void 	knote_init(void);
132133741Sjmgstatic struct 	knote *knote_alloc(int waitok);
13359290Sjlemonstatic void 	knote_free(struct knote *kn);
13459290Sjlemon
13572521Sjlemonstatic void	filt_kqdetach(struct knote *kn);
13672521Sjlemonstatic int	filt_kqueue(struct knote *kn, long hint);
13772521Sjlemonstatic int	filt_procattach(struct knote *kn);
13872521Sjlemonstatic void	filt_procdetach(struct knote *kn);
13972521Sjlemonstatic int	filt_proc(struct knote *kn, long hint);
14072521Sjlemonstatic int	filt_fileattach(struct knote *kn);
14179989Sjlemonstatic void	filt_timerexpire(void *knx);
14279989Sjlemonstatic int	filt_timerattach(struct knote *kn);
14379989Sjlemonstatic void	filt_timerdetach(struct knote *kn);
14479989Sjlemonstatic int	filt_timer(struct knote *kn, long hint);
145197241Sssonstatic int	filt_userattach(struct knote *kn);
146197241Sssonstatic void	filt_userdetach(struct knote *kn);
147197241Sssonstatic int	filt_user(struct knote *kn, long hint);
148197241Sssonstatic void	filt_usertouch(struct knote *kn, struct kevent *kev, long type);
14972521Sjlemon
150197134Srwatsonstatic struct filterops file_filtops = {
151197134Srwatson	.f_isfd = 1,
152197134Srwatson	.f_attach = filt_fileattach,
153197134Srwatson};
154197134Srwatsonstatic struct filterops kqread_filtops = {
155197134Srwatson	.f_isfd = 1,
156197134Srwatson	.f_detach = filt_kqdetach,
157197134Srwatson	.f_event = filt_kqueue,
158197134Srwatson};
159133741Sjmg/* XXX - move to kern_proc.c?  */
160197134Srwatsonstatic struct filterops proc_filtops = {
161197134Srwatson	.f_isfd = 0,
162197134Srwatson	.f_attach = filt_procattach,
163197134Srwatson	.f_detach = filt_procdetach,
164197134Srwatson	.f_event = filt_proc,
165197134Srwatson};
166197134Srwatsonstatic struct filterops timer_filtops = {
167197134Srwatson	.f_isfd = 0,
168197134Srwatson	.f_attach = filt_timerattach,
169197134Srwatson	.f_detach = filt_timerdetach,
170197134Srwatson	.f_event = filt_timer,
171197134Srwatson};
172197241Sssonstatic struct filterops user_filtops = {
173197241Ssson	.f_attach = filt_userattach,
174197241Ssson	.f_detach = filt_userdetach,
175197241Ssson	.f_event = filt_user,
176197241Ssson	.f_touch = filt_usertouch,
177197241Ssson};
17872521Sjlemon
17992751Sjeffstatic uma_zone_t	knote_zone;
18084138Sjlemonstatic int 		kq_ncallouts = 0;
18184138Sjlemonstatic int 		kq_calloutmax = (4 * 1024);
18284138SjlemonSYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
18384138Sjlemon    &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
18459290Sjlemon
185133741Sjmg/* XXX - ensure not KN_INFLUX?? */
186133741Sjmg#define KNOTE_ACTIVATE(kn, islock) do { 				\
187133741Sjmg	if ((islock))							\
188133741Sjmg		mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED);		\
189133741Sjmg	else								\
190133741Sjmg		KQ_LOCK((kn)->kn_kq);					\
191133741Sjmg	(kn)->kn_status |= KN_ACTIVE;					\
192133741Sjmg	if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)		\
193133741Sjmg		knote_enqueue((kn));					\
194133741Sjmg	if (!(islock))							\
195133741Sjmg		KQ_UNLOCK((kn)->kn_kq);					\
19659290Sjlemon} while(0)
197133741Sjmg#define KQ_LOCK(kq) do {						\
198133741Sjmg	mtx_lock(&(kq)->kq_lock);					\
199133741Sjmg} while (0)
200133741Sjmg#define KQ_FLUX_WAKEUP(kq) do {						\
201133741Sjmg	if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) {		\
202133741Sjmg		(kq)->kq_state &= ~KQ_FLUXWAIT;				\
203133741Sjmg		wakeup((kq));						\
204133741Sjmg	}								\
205133741Sjmg} while (0)
206133741Sjmg#define KQ_UNLOCK_FLUX(kq) do {						\
207133741Sjmg	KQ_FLUX_WAKEUP(kq);						\
208133741Sjmg	mtx_unlock(&(kq)->kq_lock);					\
209133741Sjmg} while (0)
210133741Sjmg#define KQ_UNLOCK(kq) do {						\
211133741Sjmg	mtx_unlock(&(kq)->kq_lock);					\
212133741Sjmg} while (0)
213133741Sjmg#define KQ_OWNED(kq) do {						\
214133741Sjmg	mtx_assert(&(kq)->kq_lock, MA_OWNED);				\
215133741Sjmg} while (0)
216133741Sjmg#define KQ_NOTOWNED(kq) do {						\
217133741Sjmg	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED);			\
218133741Sjmg} while (0)
219133741Sjmg#define KN_LIST_LOCK(kn) do {						\
220133741Sjmg	if (kn->kn_knlist != NULL)					\
221147730Sssouhlal		kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg);	\
222133741Sjmg} while (0)
223133741Sjmg#define KN_LIST_UNLOCK(kn) do {						\
224147730Sssouhlal	if (kn->kn_knlist != NULL) 					\
225147730Sssouhlal		kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg);	\
226133741Sjmg} while (0)
227147730Sssouhlal#define	KNL_ASSERT_LOCK(knl, islocked) do {				\
228147730Sssouhlal	if (islocked)							\
229147730Sssouhlal		KNL_ASSERT_LOCKED(knl);				\
230147730Sssouhlal	else								\
231147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);				\
232147730Sssouhlal} while (0)
233147730Sssouhlal#ifdef INVARIANTS
234147730Sssouhlal#define	KNL_ASSERT_LOCKED(knl) do {					\
235193951Skib	knl->kl_assert_locked((knl)->kl_lockarg);			\
236147730Sssouhlal} while (0)
237193951Skib#define	KNL_ASSERT_UNLOCKED(knl) do {					\
238193951Skib	knl->kl_assert_unlocked((knl)->kl_lockarg);			\
239147730Sssouhlal} while (0)
240147730Sssouhlal#else /* !INVARIANTS */
241147730Sssouhlal#define	KNL_ASSERT_LOCKED(knl) do {} while(0)
242147730Sssouhlal#define	KNL_ASSERT_UNLOCKED(knl) do {} while (0)
243147730Sssouhlal#endif /* INVARIANTS */
24459290Sjlemon
24559290Sjlemon#define	KN_HASHSIZE		64		/* XXX should be tunable */
24659290Sjlemon#define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
24759290Sjlemon
24888633Salfredstatic int
24988633Salfredfilt_nullattach(struct knote *kn)
25088633Salfred{
25188633Salfred
25288633Salfred	return (ENXIO);
25388633Salfred};
25488633Salfred
255197134Srwatsonstruct filterops null_filtops = {
256197134Srwatson	.f_isfd = 0,
257197134Srwatson	.f_attach = filt_nullattach,
258197134Srwatson};
25988633Salfred
260133741Sjmg/* XXX - make SYSINIT to add these, and move into respective modules. */
26159290Sjlemonextern struct filterops sig_filtops;
262131562Salfredextern struct filterops fs_filtops;
26359290Sjlemon
26459290Sjlemon/*
26572521Sjlemon * Table for for all system-defined filters.
26659290Sjlemon */
267133741Sjmgstatic struct mtx	filterops_lock;
268133741SjmgMTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
269133741Sjmg	MTX_DEF);
270133741Sjmgstatic struct {
271133741Sjmg	struct filterops *for_fop;
272133741Sjmg	int for_refcnt;
273133741Sjmg} sysfilt_ops[EVFILT_SYSCOUNT] = {
274133741Sjmg	{ &file_filtops },			/* EVFILT_READ */
275133741Sjmg	{ &file_filtops },			/* EVFILT_WRITE */
276133741Sjmg	{ &null_filtops },			/* EVFILT_AIO */
277133741Sjmg	{ &file_filtops },			/* EVFILT_VNODE */
278133741Sjmg	{ &proc_filtops },			/* EVFILT_PROC */
279133741Sjmg	{ &sig_filtops },			/* EVFILT_SIGNAL */
280133741Sjmg	{ &timer_filtops },			/* EVFILT_TIMER */
281133741Sjmg	{ &file_filtops },			/* EVFILT_NETDEV */
282133741Sjmg	{ &fs_filtops },			/* EVFILT_FS */
283151260Sambrisko	{ &null_filtops },			/* EVFILT_LIO */
284197241Ssson	{ &user_filtops },			/* EVFILT_USER */
28559290Sjlemon};
28659290Sjlemon
287133741Sjmg/*
288133741Sjmg * Simple redirection for all cdevsw style objects to call their fo_kqfilter
289133741Sjmg * method.
290133741Sjmg */
29159290Sjlemonstatic int
29272521Sjlemonfilt_fileattach(struct knote *kn)
29359290Sjlemon{
294133635Sjmg
29572521Sjlemon	return (fo_kqfilter(kn->kn_fp, kn));
29659290Sjlemon}
29759290Sjlemon
29872521Sjlemon/*ARGSUSED*/
29959290Sjlemonstatic int
30072521Sjlemonkqueue_kqfilter(struct file *fp, struct knote *kn)
30159290Sjlemon{
302109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
30359290Sjlemon
30472521Sjlemon	if (kn->kn_filter != EVFILT_READ)
305133741Sjmg		return (EINVAL);
30659290Sjlemon
307133741Sjmg	kn->kn_status |= KN_KQUEUE;
30872521Sjlemon	kn->kn_fop = &kqread_filtops;
309133741Sjmg	knlist_add(&kq->kq_sel.si_note, kn, 0);
310133741Sjmg
31159290Sjlemon	return (0);
31259290Sjlemon}
31359290Sjlemon
31459290Sjlemonstatic void
31559290Sjlemonfilt_kqdetach(struct knote *kn)
31659290Sjlemon{
317109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
31859290Sjlemon
319133741Sjmg	knlist_remove(&kq->kq_sel.si_note, kn, 0);
32059290Sjlemon}
32159290Sjlemon
32259290Sjlemon/*ARGSUSED*/
32359290Sjlemonstatic int
32459290Sjlemonfilt_kqueue(struct knote *kn, long hint)
32559290Sjlemon{
326109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
32759290Sjlemon
32859290Sjlemon	kn->kn_data = kq->kq_count;
32959290Sjlemon	return (kn->kn_data > 0);
33059290Sjlemon}
33159290Sjlemon
332133741Sjmg/* XXX - move to kern_proc.c?  */
33359290Sjlemonstatic int
33459290Sjlemonfilt_procattach(struct knote *kn)
33559290Sjlemon{
33659290Sjlemon	struct proc *p;
337113377Skbyanc	int immediate;
33875451Srwatson	int error;
33959290Sjlemon
340113377Skbyanc	immediate = 0;
34159290Sjlemon	p = pfind(kn->kn_id);
342113377Skbyanc	if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
343113377Skbyanc		p = zpfind(kn->kn_id);
344113377Skbyanc		immediate = 1;
345133741Sjmg	} else if (p != NULL && (p->p_flag & P_WEXIT)) {
346133741Sjmg		immediate = 1;
347113377Skbyanc	}
348133741Sjmg
349122019Scognet	if (p == NULL)
350122019Scognet		return (ESRCH);
351133741Sjmg	if ((error = p_cansee(curthread, p)))
35275451Srwatson		return (error);
35359290Sjlemon
35459290Sjlemon	kn->kn_ptr.p_proc = p;
35559290Sjlemon	kn->kn_flags |= EV_CLEAR;		/* automatically set */
35659290Sjlemon
35759290Sjlemon	/*
35859290Sjlemon	 * internal flag indicating registration done by kernel
35959290Sjlemon	 */
36059290Sjlemon	if (kn->kn_flags & EV_FLAG1) {
36159290Sjlemon		kn->kn_data = kn->kn_sdata;		/* ppid */
36259290Sjlemon		kn->kn_fflags = NOTE_CHILD;
36359290Sjlemon		kn->kn_flags &= ~EV_FLAG1;
36459290Sjlemon	}
36559290Sjlemon
366122686Scognet	if (immediate == 0)
367133741Sjmg		knlist_add(&p->p_klist, kn, 1);
368113377Skbyanc
369113377Skbyanc	/*
370113377Skbyanc	 * Immediately activate any exit notes if the target process is a
371113377Skbyanc	 * zombie.  This is necessary to handle the case where the target
372113377Skbyanc	 * process, e.g. a child, dies before the kevent is registered.
373113377Skbyanc	 */
374113377Skbyanc	if (immediate && filt_proc(kn, NOTE_EXIT))
375133741Sjmg		KNOTE_ACTIVATE(kn, 0);
376113377Skbyanc
37771500Sjhb	PROC_UNLOCK(p);
37859290Sjlemon
37959290Sjlemon	return (0);
38059290Sjlemon}
38159290Sjlemon
38259290Sjlemon/*
38359290Sjlemon * The knote may be attached to a different process, which may exit,
38459290Sjlemon * leaving nothing for the knote to be attached to.  So when the process
38559290Sjlemon * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
38659290Sjlemon * it will be deleted when read out.  However, as part of the knote deletion,
38759290Sjlemon * this routine is called, so a check is needed to avoid actually performing
38859290Sjlemon * a detach, because the original process does not exist any more.
38959290Sjlemon */
390133741Sjmg/* XXX - move to kern_proc.c?  */
39159290Sjlemonstatic void
39259290Sjlemonfilt_procdetach(struct knote *kn)
39359290Sjlemon{
394133741Sjmg	struct proc *p;
39559290Sjlemon
396133741Sjmg	p = kn->kn_ptr.p_proc;
397133741Sjmg	knlist_remove(&p->p_klist, kn, 0);
398133741Sjmg	kn->kn_ptr.p_proc = NULL;
39959290Sjlemon}
40059290Sjlemon
401133741Sjmg/* XXX - move to kern_proc.c?  */
40259290Sjlemonstatic int
40359290Sjlemonfilt_proc(struct knote *kn, long hint)
40459290Sjlemon{
405133741Sjmg	struct proc *p = kn->kn_ptr.p_proc;
40659290Sjlemon	u_int event;
40759290Sjlemon
40859290Sjlemon	/*
40959290Sjlemon	 * mask off extra data
41059290Sjlemon	 */
41159290Sjlemon	event = (u_int)hint & NOTE_PCTRLMASK;
41259290Sjlemon
41359290Sjlemon	/*
41459290Sjlemon	 * if the user is interested in this event, record it.
41559290Sjlemon	 */
41659290Sjlemon	if (kn->kn_sfflags & event)
41759290Sjlemon		kn->kn_fflags |= event;
41859290Sjlemon
41959290Sjlemon	/*
42059290Sjlemon	 * process is gone, so flag the event as finished.
42159290Sjlemon	 */
42259290Sjlemon	if (event == NOTE_EXIT) {
423133741Sjmg		if (!(kn->kn_status & KN_DETACHED))
424133741Sjmg			knlist_remove_inevent(&p->p_klist, kn);
425133590Srwatson		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
426164451Sjhb		kn->kn_data = p->p_xstat;
427133741Sjmg		kn->kn_ptr.p_proc = NULL;
42859290Sjlemon		return (1);
42959290Sjlemon	}
43059290Sjlemon
431180340Skib	return (kn->kn_fflags != 0);
432180340Skib}
43359290Sjlemon
434180340Skib/*
435180340Skib * Called when the process forked. It mostly does the same as the
436180340Skib * knote(), activating all knotes registered to be activated when the
437180340Skib * process forked. Additionally, for each knote attached to the
438180340Skib * parent, check whether user wants to track the new process. If so
439180340Skib * attach a new knote to it, and immediately report an event with the
440180340Skib * child's pid.
441180340Skib */
442180340Skibvoid
443180340Skibknote_fork(struct knlist *list, int pid)
444180340Skib{
445180340Skib	struct kqueue *kq;
446180340Skib	struct knote *kn;
447180340Skib	struct kevent kev;
448180340Skib	int error;
449180340Skib
450180340Skib	if (list == NULL)
451180340Skib		return;
452180340Skib	list->kl_lock(list->kl_lockarg);
453180340Skib
454180340Skib	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
455180340Skib		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX)
456180340Skib			continue;
457180340Skib		kq = kn->kn_kq;
458180340Skib		KQ_LOCK(kq);
459180340Skib		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
460180340Skib			KQ_UNLOCK(kq);
461180340Skib			continue;
462180340Skib		}
463180340Skib
46459290Sjlemon		/*
465180340Skib		 * The same as knote(), activate the event.
46659290Sjlemon		 */
467180340Skib		if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
468180340Skib			kn->kn_status |= KN_HASKQLOCK;
469180340Skib			if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
470180340Skib				KNOTE_ACTIVATE(kn, 1);
471180340Skib			kn->kn_status &= ~KN_HASKQLOCK;
472180340Skib			KQ_UNLOCK(kq);
473180340Skib			continue;
474180340Skib		}
475180340Skib
476180340Skib		/*
477180340Skib		 * The NOTE_TRACK case. In addition to the activation
478180340Skib		 * of the event, we need to register new event to
479180340Skib		 * track the child. Drop the locks in preparation for
480180340Skib		 * the call to kqueue_register().
481180340Skib		 */
482180340Skib		kn->kn_status |= KN_INFLUX;
483180340Skib		KQ_UNLOCK(kq);
484180340Skib		list->kl_unlock(list->kl_lockarg);
485180340Skib
486180340Skib		/*
487180340Skib		 * Activate existing knote and register a knote with
488180340Skib		 * new process.
489180340Skib		 */
490180340Skib		kev.ident = pid;
49159290Sjlemon		kev.filter = kn->kn_filter;
49259290Sjlemon		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
49359290Sjlemon		kev.fflags = kn->kn_sfflags;
494180340Skib		kev.data = kn->kn_id;		/* parent */
495180340Skib		kev.udata = kn->kn_kevent.udata;/* preserve udata */
496180340Skib		error = kqueue_register(kq, &kev, NULL, 0);
497180340Skib		if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
498180340Skib			KNOTE_ACTIVATE(kn, 0);
49959290Sjlemon		if (error)
50059290Sjlemon			kn->kn_fflags |= NOTE_TRACKERR;
501180340Skib		KQ_LOCK(kq);
502180340Skib		kn->kn_status &= ~KN_INFLUX;
503180340Skib		KQ_UNLOCK_FLUX(kq);
504180340Skib		list->kl_lock(list->kl_lockarg);
50559290Sjlemon	}
506180340Skib	list->kl_unlock(list->kl_lockarg);
50759290Sjlemon}
50859290Sjlemon
509133741Sjmgstatic int
510133741Sjmgtimertoticks(intptr_t data)
511133741Sjmg{
512133741Sjmg	struct timeval tv;
513133741Sjmg	int tticks;
514133741Sjmg
515133741Sjmg	tv.tv_sec = data / 1000;
516133741Sjmg	tv.tv_usec = (data % 1000) * 1000;
517133741Sjmg	tticks = tvtohz(&tv);
518133741Sjmg
519133741Sjmg	return tticks;
520133741Sjmg}
521133741Sjmg
522133741Sjmg/* XXX - move to kern_timeout.c? */
52379989Sjlemonstatic void
52479989Sjlemonfilt_timerexpire(void *knx)
52579989Sjlemon{
52679989Sjlemon	struct knote *kn = knx;
52784138Sjlemon	struct callout *calloutp;
52879989Sjlemon
52979989Sjlemon	kn->kn_data++;
530133741Sjmg	KNOTE_ACTIVATE(kn, 0);	/* XXX - handle locking */
53179989Sjlemon
532133741Sjmg	if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
53384138Sjlemon		calloutp = (struct callout *)kn->kn_hook;
534177860Sjeff		callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata),
535133741Sjmg		    filt_timerexpire, kn);
53679989Sjlemon	}
53779989Sjlemon}
53879989Sjlemon
53979989Sjlemon/*
54079989Sjlemon * data contains amount of time to sleep, in milliseconds
541133590Srwatson */
542133741Sjmg/* XXX - move to kern_timeout.c? */
54379989Sjlemonstatic int
54479989Sjlemonfilt_timerattach(struct knote *kn)
54579989Sjlemon{
54684138Sjlemon	struct callout *calloutp;
54779989Sjlemon
548133741Sjmg	atomic_add_int(&kq_ncallouts, 1);
549133741Sjmg
550133741Sjmg	if (kq_ncallouts >= kq_calloutmax) {
551133741Sjmg		atomic_add_int(&kq_ncallouts, -1);
55284138Sjlemon		return (ENOMEM);
553133741Sjmg	}
55484138Sjlemon
55579989Sjlemon	kn->kn_flags |= EV_CLEAR;		/* automatically set */
556136500Sjmg	kn->kn_status &= ~KN_DETACHED;		/* knlist_add usually sets it */
557184214Sdes	calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK);
558142217Srwatson	callout_init(calloutp, CALLOUT_MPSAFE);
559127982Scperciva	kn->kn_hook = calloutp;
560177860Sjeff	callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata),
561177860Sjeff	    filt_timerexpire, kn);
56279989Sjlemon
56379989Sjlemon	return (0);
56479989Sjlemon}
56579989Sjlemon
566133741Sjmg/* XXX - move to kern_timeout.c? */
56779989Sjlemonstatic void
56879989Sjlemonfilt_timerdetach(struct knote *kn)
56979989Sjlemon{
57084138Sjlemon	struct callout *calloutp;
57179989Sjlemon
57284138Sjlemon	calloutp = (struct callout *)kn->kn_hook;
573127982Scperciva	callout_drain(calloutp);
574184205Sdes	free(calloutp, M_KQUEUE);
575133741Sjmg	atomic_add_int(&kq_ncallouts, -1);
576136500Sjmg	kn->kn_status |= KN_DETACHED;	/* knlist_remove usually clears it */
57779989Sjlemon}
57879989Sjlemon
579133741Sjmg/* XXX - move to kern_timeout.c? */
58079989Sjlemonstatic int
58179989Sjlemonfilt_timer(struct knote *kn, long hint)
58279989Sjlemon{
58379989Sjlemon
58479989Sjlemon	return (kn->kn_data != 0);
58579989Sjlemon}
58679989Sjlemon
587197241Sssonstatic int
588197241Sssonfilt_userattach(struct knote *kn)
589197241Ssson{
590197241Ssson
591197241Ssson	/*
592197241Ssson	 * EVFILT_USER knotes are not attached to anything in the kernel.
593197241Ssson	 */
594197241Ssson	kn->kn_hook = NULL;
595197241Ssson	if (kn->kn_fflags & NOTE_TRIGGER)
596197241Ssson		kn->kn_hookid = 1;
597197241Ssson	else
598197241Ssson		kn->kn_hookid = 0;
599197241Ssson	return (0);
600197241Ssson}
601197241Ssson
602197241Sssonstatic void
603197241Sssonfilt_userdetach(__unused struct knote *kn)
604197241Ssson{
605197241Ssson
606197241Ssson	/*
607197241Ssson	 * EVFILT_USER knotes are not attached to anything in the kernel.
608197241Ssson	 */
609197241Ssson}
610197241Ssson
611197241Sssonstatic int
612197241Sssonfilt_user(struct knote *kn, __unused long hint)
613197241Ssson{
614197241Ssson
615197241Ssson	return (kn->kn_hookid);
616197241Ssson}
617197241Ssson
618197241Sssonstatic void
619197241Sssonfilt_usertouch(struct knote *kn, struct kevent *kev, long type)
620197241Ssson{
621197241Ssson	int ffctrl;
622197241Ssson
623197241Ssson	switch (type) {
624197241Ssson	case EVENT_REGISTER:
625197241Ssson		if (kev->fflags & NOTE_TRIGGER)
626197241Ssson			kn->kn_hookid = 1;
627197241Ssson
628197241Ssson		ffctrl = kev->fflags & NOTE_FFCTRLMASK;
629197241Ssson		kev->fflags &= NOTE_FFLAGSMASK;
630197241Ssson		switch (ffctrl) {
631197241Ssson		case NOTE_FFNOP:
632197241Ssson			break;
633197241Ssson
634197241Ssson		case NOTE_FFAND:
635197241Ssson			kn->kn_sfflags &= kev->fflags;
636197241Ssson			break;
637197241Ssson
638197241Ssson		case NOTE_FFOR:
639197241Ssson			kn->kn_sfflags |= kev->fflags;
640197241Ssson			break;
641197241Ssson
642197241Ssson		case NOTE_FFCOPY:
643197241Ssson			kn->kn_sfflags = kev->fflags;
644197241Ssson			break;
645197241Ssson
646197241Ssson		default:
647197241Ssson			/* XXX Return error? */
648197241Ssson			break;
649197241Ssson		}
650197241Ssson		kn->kn_sdata = kev->data;
651197241Ssson		if (kev->flags & EV_CLEAR) {
652197241Ssson			kn->kn_hookid = 0;
653197241Ssson			kn->kn_data = 0;
654197241Ssson			kn->kn_fflags = 0;
655197241Ssson		}
656197241Ssson		break;
657197241Ssson
658197241Ssson        case EVENT_PROCESS:
659197241Ssson		*kev = kn->kn_kevent;
660197241Ssson		kev->fflags = kn->kn_sfflags;
661197241Ssson		kev->data = kn->kn_sdata;
662197241Ssson		if (kn->kn_flags & EV_CLEAR) {
663197241Ssson			kn->kn_hookid = 0;
664197241Ssson			kn->kn_data = 0;
665197241Ssson			kn->kn_fflags = 0;
666197241Ssson		}
667197241Ssson		break;
668197241Ssson
669197241Ssson	default:
670197241Ssson		panic("filt_usertouch() - invalid type (%ld)", type);
671197241Ssson		break;
672197241Ssson	}
673197241Ssson}
674197241Ssson
67561468Sjlemonint
67683366Sjuliankqueue(struct thread *td, struct kqueue_args *uap)
67759290Sjlemon{
67882710Sdillon	struct filedesc *fdp;
67959290Sjlemon	struct kqueue *kq;
68061468Sjlemon	struct file *fp;
68161468Sjlemon	int fd, error;
68259290Sjlemon
68383366Sjulian	fdp = td->td_proc->p_fd;
68483366Sjulian	error = falloc(td, &fp, &fd);
68561468Sjlemon	if (error)
68682710Sdillon		goto done2;
687133741Sjmg
688121256Sdwmalone	/* An extra reference on `nfp' has been held for us by falloc(). */
689133741Sjmg	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
690133741Sjmg	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
69189306Salfred	TAILQ_INIT(&kq->kq_head);
692133741Sjmg	kq->kq_fdp = fdp;
693193951Skib	knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
694133741Sjmg	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
695133741Sjmg
696168355Srwatson	FILEDESC_XLOCK(fdp);
697133741Sjmg	SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
698168355Srwatson	FILEDESC_XUNLOCK(fdp);
699133741Sjmg
700174988Sjeff	finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
701121256Sdwmalone	fdrop(fp, td);
702133741Sjmg
70383366Sjulian	td->td_retval[0] = fd;
70482710Sdillondone2:
70561468Sjlemon	return (error);
70659290Sjlemon}
70759290Sjlemon
70859290Sjlemon#ifndef _SYS_SYSPROTO_H_
70959290Sjlemonstruct kevent_args {
71059290Sjlemon	int	fd;
71163977Speter	const struct kevent *changelist;
71259290Sjlemon	int	nchanges;
71363452Sjlemon	struct	kevent *eventlist;
71459290Sjlemon	int	nevents;
71563977Speter	const struct timespec *timeout;
71659290Sjlemon};
71759290Sjlemon#endif
71859290Sjlemonint
71983366Sjuliankevent(struct thread *td, struct kevent_args *uap)
72059290Sjlemon{
721142934Sps	struct timespec ts, *tsp;
722146950Sps	struct kevent_copyops k_ops = { uap,
723146950Sps					kevent_copyout,
724146950Sps					kevent_copyin};
725142934Sps	int error;
726162592Sjmg#ifdef KTRACE
727162592Sjmg	struct uio ktruio;
728162592Sjmg	struct iovec ktriov;
729162592Sjmg	struct uio *ktruioin = NULL;
730162592Sjmg	struct uio *ktruioout = NULL;
731162592Sjmg#endif
732142934Sps
733142934Sps	if (uap->timeout != NULL) {
734142934Sps		error = copyin(uap->timeout, &ts, sizeof(ts));
735142934Sps		if (error)
736142934Sps			return (error);
737142934Sps		tsp = &ts;
738142934Sps	} else
739142934Sps		tsp = NULL;
740142934Sps
741162592Sjmg#ifdef KTRACE
742162592Sjmg	if (KTRPOINT(td, KTR_GENIO)) {
743162592Sjmg		ktriov.iov_base = uap->changelist;
744162592Sjmg		ktriov.iov_len = uap->nchanges * sizeof(struct kevent);
745162592Sjmg		ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1,
746162592Sjmg		    .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ,
747162592Sjmg		    .uio_td = td };
748162592Sjmg		ktruioin = cloneuio(&ktruio);
749162592Sjmg		ktriov.iov_base = uap->eventlist;
750162592Sjmg		ktriov.iov_len = uap->nevents * sizeof(struct kevent);
751162592Sjmg		ktruioout = cloneuio(&ktruio);
752162592Sjmg	}
753162592Sjmg#endif
754162592Sjmg
755162592Sjmg	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
756162592Sjmg	    &k_ops, tsp);
757162592Sjmg
758162592Sjmg#ifdef KTRACE
759162592Sjmg	if (ktruioin != NULL) {
760162592Sjmg		ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent);
761162592Sjmg		ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0);
762162592Sjmg		ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent);
763162592Sjmg		ktrgenio(uap->fd, UIO_READ, ktruioout, error);
764162592Sjmg	}
765162592Sjmg#endif
766162592Sjmg
767162592Sjmg	return (error);
768142934Sps}
769142934Sps
770142934Sps/*
771146950Sps * Copy 'count' items into the destination list pointed to by uap->eventlist.
772142934Sps */
773142934Spsstatic int
774146950Spskevent_copyout(void *arg, struct kevent *kevp, int count)
775142934Sps{
776146950Sps	struct kevent_args *uap;
777142934Sps	int error;
778142934Sps
779146950Sps	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
780146950Sps	uap = (struct kevent_args *)arg;
781146950Sps
782146950Sps	error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
783146950Sps	if (error == 0)
784146950Sps		uap->eventlist += count;
785142934Sps	return (error);
786142934Sps}
787142934Sps
788146950Sps/*
789146950Sps * Copy 'count' items from the list pointed to by uap->changelist.
790146950Sps */
791146950Spsstatic int
792146950Spskevent_copyin(void *arg, struct kevent *kevp, int count)
793146950Sps{
794146950Sps	struct kevent_args *uap;
795146950Sps	int error;
796146950Sps
797146950Sps	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
798146950Sps	uap = (struct kevent_args *)arg;
799146950Sps
800146950Sps	error = copyin(uap->changelist, kevp, count * sizeof *kevp);
801146950Sps	if (error == 0)
802146950Sps		uap->changelist += count;
803146950Sps	return (error);
804146950Sps}
805146950Sps
806142934Spsint
807146950Spskern_kevent(struct thread *td, int fd, int nchanges, int nevents,
808146950Sps    struct kevent_copyops *k_ops, const struct timespec *timeout)
809142934Sps{
810133741Sjmg	struct kevent keva[KQ_NEVENTS];
811142934Sps	struct kevent *kevp, *changes;
81259290Sjlemon	struct kqueue *kq;
81386341Sdillon	struct file *fp;
81459290Sjlemon	int i, n, nerrors, error;
81559290Sjlemon
816142934Sps	if ((error = fget(td, fd, &fp)) != 0)
81789319Salfred		return (error);
818170029Srwatson	if ((error = kqueue_acquire(fp, &kq)) != 0)
819133741Sjmg		goto done_norel;
820133741Sjmg
82159290Sjlemon	nerrors = 0;
82259290Sjlemon
823142934Sps	while (nchanges > 0) {
824146950Sps		n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
825146950Sps		error = k_ops->k_copyin(k_ops->arg, keva, n);
826146950Sps		if (error)
827146950Sps			goto done;
828146950Sps		changes = keva;
82959290Sjlemon		for (i = 0; i < n; i++) {
830142934Sps			kevp = &changes[i];
831151260Sambrisko			if (!kevp->filter)
832151260Sambrisko				continue;
83363452Sjlemon			kevp->flags &= ~EV_SYSFLAGS;
834133741Sjmg			error = kqueue_register(kq, kevp, td, 1);
835197243Ssson			if (error || (kevp->flags & EV_RECEIPT)) {
836142934Sps				if (nevents != 0) {
83763452Sjlemon					kevp->flags = EV_ERROR;
83863452Sjlemon					kevp->data = error;
839146950Sps					(void) k_ops->k_copyout(k_ops->arg,
840146950Sps					    kevp, 1);
841142934Sps					nevents--;
84259290Sjlemon					nerrors++;
84359290Sjlemon				} else {
84468883Sdillon					goto done;
84559290Sjlemon				}
84659290Sjlemon			}
84759290Sjlemon		}
848142934Sps		nchanges -= n;
84959290Sjlemon	}
85059290Sjlemon	if (nerrors) {
851133741Sjmg		td->td_retval[0] = nerrors;
85268883Sdillon		error = 0;
85368883Sdillon		goto done;
85459290Sjlemon	}
85559290Sjlemon
856146950Sps	error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td);
85768883Sdillondone:
858133741Sjmg	kqueue_release(kq, 0);
859133741Sjmgdone_norel:
860170066Srwatson	fdrop(fp, td);
86159290Sjlemon	return (error);
86259290Sjlemon}
86359290Sjlemon
86459290Sjlemonint
86588633Salfredkqueue_add_filteropts(int filt, struct filterops *filtops)
86688633Salfred{
867133741Sjmg	int error;
86888633Salfred
869133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) {
870133741Sjmg		printf(
871133741Sjmg"trying to add a filterop that is out of range: %d is beyond %d\n",
872133741Sjmg		    ~filt, EVFILT_SYSCOUNT);
873133741Sjmg		return EINVAL;
874133741Sjmg	}
875133741Sjmg	mtx_lock(&filterops_lock);
876133741Sjmg	if (sysfilt_ops[~filt].for_fop != &null_filtops &&
877133741Sjmg	    sysfilt_ops[~filt].for_fop != NULL)
878133741Sjmg		error = EEXIST;
879133741Sjmg	else {
880133741Sjmg		sysfilt_ops[~filt].for_fop = filtops;
881133741Sjmg		sysfilt_ops[~filt].for_refcnt = 0;
882133741Sjmg	}
883133741Sjmg	mtx_unlock(&filterops_lock);
884133741Sjmg
88588633Salfred	return (0);
88688633Salfred}
88788633Salfred
88888633Salfredint
88988633Salfredkqueue_del_filteropts(int filt)
89088633Salfred{
891133741Sjmg	int error;
89288633Salfred
893133741Sjmg	error = 0;
894133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
895133741Sjmg		return EINVAL;
896133741Sjmg
897133741Sjmg	mtx_lock(&filterops_lock);
898133741Sjmg	if (sysfilt_ops[~filt].for_fop == &null_filtops ||
899133741Sjmg	    sysfilt_ops[~filt].for_fop == NULL)
900133741Sjmg		error = EINVAL;
901133741Sjmg	else if (sysfilt_ops[~filt].for_refcnt != 0)
902133741Sjmg		error = EBUSY;
903133741Sjmg	else {
904133741Sjmg		sysfilt_ops[~filt].for_fop = &null_filtops;
905133741Sjmg		sysfilt_ops[~filt].for_refcnt = 0;
906133741Sjmg	}
907133741Sjmg	mtx_unlock(&filterops_lock);
908133741Sjmg
909133741Sjmg	return error;
91088633Salfred}
91188633Salfred
912133741Sjmgstatic struct filterops *
913133741Sjmgkqueue_fo_find(int filt)
914133741Sjmg{
915133741Sjmg
916133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
917133741Sjmg		return NULL;
918133741Sjmg
919133741Sjmg	mtx_lock(&filterops_lock);
920133741Sjmg	sysfilt_ops[~filt].for_refcnt++;
921133741Sjmg	if (sysfilt_ops[~filt].for_fop == NULL)
922133741Sjmg		sysfilt_ops[~filt].for_fop = &null_filtops;
923133741Sjmg	mtx_unlock(&filterops_lock);
924133741Sjmg
925133741Sjmg	return sysfilt_ops[~filt].for_fop;
926133741Sjmg}
927133741Sjmg
928133741Sjmgstatic void
929133741Sjmgkqueue_fo_release(int filt)
930133741Sjmg{
931133741Sjmg
932133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
933133741Sjmg		return;
934133741Sjmg
935133741Sjmg	mtx_lock(&filterops_lock);
936133741Sjmg	KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
937133741Sjmg	    ("filter object refcount not valid on release"));
938133741Sjmg	sysfilt_ops[~filt].for_refcnt--;
939133741Sjmg	mtx_unlock(&filterops_lock);
940133741Sjmg}
941133741Sjmg
942133741Sjmg/*
943170029Srwatson * A ref to kq (obtained via kqueue_acquire) must be held.  waitok will
944133741Sjmg * influence if memory allocation should wait.  Make sure it is 0 if you
945133741Sjmg * hold any mutexes.
946133741Sjmg */
947162594Sjmgstatic int
948133741Sjmgkqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok)
94959290Sjlemon{
95059290Sjlemon	struct filterops *fops;
951133741Sjmg	struct file *fp;
952133741Sjmg	struct knote *kn, *tkn;
953133741Sjmg	int error, filt, event;
954133741Sjmg	int haskqglobal;
95559290Sjlemon
956133741Sjmg	fp = NULL;
957133741Sjmg	kn = NULL;
958133741Sjmg	error = 0;
959133741Sjmg	haskqglobal = 0;
96059290Sjlemon
961133741Sjmg	filt = kev->filter;
962133741Sjmg	fops = kqueue_fo_find(filt);
963133741Sjmg	if (fops == NULL)
964133741Sjmg		return EINVAL;
965133741Sjmg
966133741Sjmg	tkn = knote_alloc(waitok);		/* prevent waiting with locks */
967133741Sjmg
968133741Sjmgfindkn:
96959290Sjlemon	if (fops->f_isfd) {
970133741Sjmg		KASSERT(td != NULL, ("td is NULL"));
971159553Sjhb		error = fget(td, kev->ident, &fp);
972159553Sjhb		if (error)
973133741Sjmg			goto done;
97459290Sjlemon
975133741Sjmg		if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
976133741Sjmg		    kev->ident, 0) != 0) {
977159553Sjhb			/* try again */
978133741Sjmg			fdrop(fp, td);
979133741Sjmg			fp = NULL;
980133741Sjmg			error = kqueue_expand(kq, fops, kev->ident, waitok);
981133741Sjmg			if (error)
982133741Sjmg				goto done;
983133741Sjmg			goto findkn;
984133741Sjmg		}
985133741Sjmg
986133741Sjmg		if (fp->f_type == DTYPE_KQUEUE) {
987133741Sjmg			/*
988133741Sjmg			 * if we add some inteligence about what we are doing,
989133741Sjmg			 * we should be able to support events on ourselves.
990133741Sjmg			 * We need to know when we are doing this to prevent
991133741Sjmg			 * getting both the knlist lock and the kq lock since
992133741Sjmg			 * they are the same thing.
993133741Sjmg			 */
994133741Sjmg			if (fp->f_data == kq) {
995133741Sjmg				error = EINVAL;
996159172Spjd				goto done;
997133741Sjmg			}
998133741Sjmg
999133741Sjmg			KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1000133741Sjmg		}
1001133741Sjmg
1002133741Sjmg		KQ_LOCK(kq);
1003133741Sjmg		if (kev->ident < kq->kq_knlistsize) {
1004133741Sjmg			SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
1005133741Sjmg				if (kev->filter == kn->kn_filter)
100659290Sjlemon					break;
100759290Sjlemon		}
100859290Sjlemon	} else {
1009133741Sjmg		if ((kev->flags & EV_ADD) == EV_ADD)
1010133741Sjmg			kqueue_expand(kq, fops, kev->ident, waitok);
1011133741Sjmg
1012133741Sjmg		KQ_LOCK(kq);
1013133741Sjmg		if (kq->kq_knhashmask != 0) {
101459290Sjlemon			struct klist *list;
1015133635Sjmg
1016133741Sjmg			list = &kq->kq_knhash[
1017133741Sjmg			    KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
101859290Sjlemon			SLIST_FOREACH(kn, list, kn_link)
101959290Sjlemon				if (kev->ident == kn->kn_id &&
102059290Sjlemon				    kev->filter == kn->kn_filter)
102159290Sjlemon					break;
102259290Sjlemon		}
102359290Sjlemon	}
102459290Sjlemon
1025133741Sjmg	/* knote is in the process of changing, wait for it to stablize. */
1026133741Sjmg	if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1027133741Sjmg		if (fp != NULL) {
1028133741Sjmg			fdrop(fp, td);
1029133741Sjmg			fp = NULL;
1030133741Sjmg		}
1031133741Sjmg		KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1032133741Sjmg		kq->kq_state |= KQ_FLUXWAIT;
1033133741Sjmg		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0);
1034133741Sjmg		goto findkn;
1035133741Sjmg	}
1036133741Sjmg
103759290Sjlemon	/*
103859290Sjlemon	 * kn now contains the matching knote, or NULL if no match
103959290Sjlemon	 */
1040197240Ssson	if (kn == NULL) {
1041197240Ssson		if (kev->flags & EV_ADD) {
1042133741Sjmg			kn = tkn;
1043133741Sjmg			tkn = NULL;
104468883Sdillon			if (kn == NULL) {
1045159173Spjd				KQ_UNLOCK(kq);
104668883Sdillon				error = ENOMEM;
104768883Sdillon				goto done;
104868883Sdillon			}
104959290Sjlemon			kn->kn_fp = fp;
105059290Sjlemon			kn->kn_kq = kq;
105159290Sjlemon			kn->kn_fop = fops;
105268883Sdillon			/*
1053133741Sjmg			 * apply reference counts to knote structure, and
105468883Sdillon			 * do not release it at the end of this routine.
105568883Sdillon			 */
1056133741Sjmg			fops = NULL;
105768883Sdillon			fp = NULL;
105868883Sdillon
105961962Sjlemon			kn->kn_sfflags = kev->fflags;
106061962Sjlemon			kn->kn_sdata = kev->data;
106161962Sjlemon			kev->fflags = 0;
106261962Sjlemon			kev->data = 0;
106361962Sjlemon			kn->kn_kevent = *kev;
1064157383Sjmg			kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE |
1065157383Sjmg			    EV_ENABLE | EV_DISABLE);
1066133741Sjmg			kn->kn_status = KN_INFLUX|KN_DETACHED;
106761962Sjlemon
1068133741Sjmg			error = knote_attach(kn, kq);
1069133741Sjmg			KQ_UNLOCK(kq);
1070133741Sjmg			if (error != 0) {
1071133741Sjmg				tkn = kn;
1072133741Sjmg				goto done;
1073133741Sjmg			}
1074133741Sjmg
1075133741Sjmg			if ((error = kn->kn_fop->f_attach(kn)) != 0) {
107683366Sjulian				knote_drop(kn, td);
107759290Sjlemon				goto done;
107859290Sjlemon			}
1079133741Sjmg			KN_LIST_LOCK(kn);
1080197240Ssson			goto done_ev_add;
108161962Sjlemon		} else {
1082197240Ssson			/* No matching knote and the EV_ADD flag is not set. */
1083133741Sjmg			KQ_UNLOCK(kq);
1084197240Ssson			error = ENOENT;
1085197240Ssson			goto done;
108659290Sjlemon		}
1087197240Ssson	}
1088197240Ssson
1089197240Ssson	if (kev->flags & EV_DELETE) {
1090133741Sjmg		kn->kn_status |= KN_INFLUX;
1091133741Sjmg		KQ_UNLOCK(kq);
1092134859Sjmg		if (!(kn->kn_status & KN_DETACHED))
1093134859Sjmg			kn->kn_fop->f_detach(kn);
109483366Sjulian		knote_drop(kn, td);
109559290Sjlemon		goto done;
109659290Sjlemon	}
109759290Sjlemon
1098197240Ssson	/*
1099197240Ssson	 * The user may change some filter values after the initial EV_ADD,
1100197240Ssson	 * but doing so will not reset any filter which has already been
1101197240Ssson	 * triggered.
1102197240Ssson	 */
1103197240Ssson	kn->kn_status |= KN_INFLUX;
1104197240Ssson	KQ_UNLOCK(kq);
1105197240Ssson	KN_LIST_LOCK(kn);
1106197240Ssson	kn->kn_kevent.udata = kev->udata;
1107197240Ssson	if (!fops->f_isfd && fops->f_touch != NULL) {
1108197240Ssson		fops->f_touch(kn, kev, EVENT_REGISTER);
1109197240Ssson	} else {
1110197240Ssson		kn->kn_sfflags = kev->fflags;
1111197240Ssson		kn->kn_sdata = kev->data;
1112197240Ssson	}
1113197240Ssson
1114197240Ssson	/*
1115197240Ssson	 * We can get here with kn->kn_knlist == NULL.  This can happen when
1116197240Ssson	 * the initial attach event decides that the event is "completed"
1117197240Ssson	 * already.  i.e. filt_procattach is called on a zombie process.  It
1118197240Ssson	 * will call filt_proc which will remove it from the list, and NULL
1119197240Ssson	 * kn_knlist.
1120197240Ssson	 */
1121197240Sssondone_ev_add:
1122197240Ssson	event = kn->kn_fop->f_event(kn, 0);
1123197240Ssson	KQ_LOCK(kq);
1124197240Ssson	if (event)
1125197240Ssson		KNOTE_ACTIVATE(kn, 1);
1126197240Ssson	kn->kn_status &= ~KN_INFLUX;
1127197240Ssson	KN_LIST_UNLOCK(kn);
1128197240Ssson
112959290Sjlemon	if ((kev->flags & EV_DISABLE) &&
113059290Sjlemon	    ((kn->kn_status & KN_DISABLED) == 0)) {
113159290Sjlemon		kn->kn_status |= KN_DISABLED;
113259290Sjlemon	}
113359290Sjlemon
113459290Sjlemon	if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
113559290Sjlemon		kn->kn_status &= ~KN_DISABLED;
113659290Sjlemon		if ((kn->kn_status & KN_ACTIVE) &&
113759290Sjlemon		    ((kn->kn_status & KN_QUEUED) == 0))
113859290Sjlemon			knote_enqueue(kn);
113959290Sjlemon	}
1140133741Sjmg	KQ_UNLOCK_FLUX(kq);
114159290Sjlemon
114259290Sjlemondone:
1143133741Sjmg	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
114468883Sdillon	if (fp != NULL)
114583366Sjulian		fdrop(fp, td);
1146133741Sjmg	if (tkn != NULL)
1147133741Sjmg		knote_free(tkn);
1148133741Sjmg	if (fops != NULL)
1149133741Sjmg		kqueue_fo_release(filt);
115059290Sjlemon	return (error);
115159290Sjlemon}
115259290Sjlemon
115359290Sjlemonstatic int
1154170029Srwatsonkqueue_acquire(struct file *fp, struct kqueue **kqp)
115559290Sjlemon{
1156133741Sjmg	int error;
115789306Salfred	struct kqueue *kq;
1158133741Sjmg
1159133741Sjmg	error = 0;
1160133741Sjmg
1161174988Sjeff	kq = fp->f_data;
1162174988Sjeff	if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
1163174988Sjeff		return (EBADF);
1164174988Sjeff	*kqp = kq;
1165174988Sjeff	KQ_LOCK(kq);
1166174988Sjeff	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
1167133741Sjmg		KQ_UNLOCK(kq);
1168174988Sjeff		return (EBADF);
1169174988Sjeff	}
1170174988Sjeff	kq->kq_refcnt++;
1171174988Sjeff	KQ_UNLOCK(kq);
1172133741Sjmg
1173133741Sjmg	return error;
1174133741Sjmg}
1175133741Sjmg
1176133741Sjmgstatic void
1177133741Sjmgkqueue_release(struct kqueue *kq, int locked)
1178133741Sjmg{
1179133741Sjmg	if (locked)
1180133741Sjmg		KQ_OWNED(kq);
1181133741Sjmg	else
1182133741Sjmg		KQ_LOCK(kq);
1183133741Sjmg	kq->kq_refcnt--;
1184133741Sjmg	if (kq->kq_refcnt == 1)
1185133741Sjmg		wakeup(&kq->kq_refcnt);
1186133741Sjmg	if (!locked)
1187133741Sjmg		KQ_UNLOCK(kq);
1188133741Sjmg}
1189133741Sjmg
1190133741Sjmgstatic void
1191133741Sjmgkqueue_schedtask(struct kqueue *kq)
1192133741Sjmg{
1193133741Sjmg
1194133741Sjmg	KQ_OWNED(kq);
1195133741Sjmg	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
1196133741Sjmg	    ("scheduling kqueue task while draining"));
1197133741Sjmg
1198133741Sjmg	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
1199133741Sjmg		taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task);
1200133741Sjmg		kq->kq_state |= KQ_TASKSCHED;
1201133741Sjmg	}
1202133741Sjmg}
1203133741Sjmg
1204133741Sjmg/*
1205133741Sjmg * Expand the kq to make sure we have storage for fops/ident pair.
1206133741Sjmg *
1207133741Sjmg * Return 0 on success (or no work necessary), return errno on failure.
1208133741Sjmg *
1209133741Sjmg * Not calling hashinit w/ waitok (proper malloc flag) should be safe.
1210133741Sjmg * If kqueue_register is called from a non-fd context, there usually/should
1211133741Sjmg * be no locks held.
1212133741Sjmg */
1213133741Sjmgstatic int
1214133741Sjmgkqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
1215133741Sjmg	int waitok)
1216133741Sjmg{
1217133741Sjmg	struct klist *list, *tmp_knhash;
1218133741Sjmg	u_long tmp_knhashmask;
1219133741Sjmg	int size;
1220133741Sjmg	int fd;
1221133741Sjmg	int mflag = waitok ? M_WAITOK : M_NOWAIT;
1222133741Sjmg
1223133741Sjmg	KQ_NOTOWNED(kq);
1224133741Sjmg
1225133741Sjmg	if (fops->f_isfd) {
1226133741Sjmg		fd = ident;
1227133741Sjmg		if (kq->kq_knlistsize <= fd) {
1228133741Sjmg			size = kq->kq_knlistsize;
1229133741Sjmg			while (size <= fd)
1230133741Sjmg				size += KQEXTENT;
1231184214Sdes			list = malloc(size * sizeof list, M_KQUEUE, mflag);
1232133741Sjmg			if (list == NULL)
1233133741Sjmg				return ENOMEM;
1234133741Sjmg			KQ_LOCK(kq);
1235133741Sjmg			if (kq->kq_knlistsize > fd) {
1236184205Sdes				free(list, M_KQUEUE);
1237133741Sjmg				list = NULL;
1238133741Sjmg			} else {
1239133741Sjmg				if (kq->kq_knlist != NULL) {
1240133741Sjmg					bcopy(kq->kq_knlist, list,
1241133741Sjmg					    kq->kq_knlistsize * sizeof list);
1242184205Sdes					free(kq->kq_knlist, M_KQUEUE);
1243133741Sjmg					kq->kq_knlist = NULL;
1244133741Sjmg				}
1245133741Sjmg				bzero((caddr_t)list +
1246133741Sjmg				    kq->kq_knlistsize * sizeof list,
1247133741Sjmg				    (size - kq->kq_knlistsize) * sizeof list);
1248133741Sjmg				kq->kq_knlistsize = size;
1249133741Sjmg				kq->kq_knlist = list;
1250133741Sjmg			}
1251133741Sjmg			KQ_UNLOCK(kq);
1252133741Sjmg		}
1253133741Sjmg	} else {
1254133741Sjmg		if (kq->kq_knhashmask == 0) {
1255133741Sjmg			tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1256133741Sjmg			    &tmp_knhashmask);
1257133741Sjmg			if (tmp_knhash == NULL)
1258133741Sjmg				return ENOMEM;
1259133741Sjmg			KQ_LOCK(kq);
1260133741Sjmg			if (kq->kq_knhashmask == 0) {
1261133741Sjmg				kq->kq_knhash = tmp_knhash;
1262133741Sjmg				kq->kq_knhashmask = tmp_knhashmask;
1263133741Sjmg			} else {
1264133741Sjmg				free(tmp_knhash, M_KQUEUE);
1265133741Sjmg			}
1266133741Sjmg			KQ_UNLOCK(kq);
1267133741Sjmg		}
1268133741Sjmg	}
1269133741Sjmg
1270133741Sjmg	KQ_NOTOWNED(kq);
1271133741Sjmg	return 0;
1272133741Sjmg}
1273133741Sjmg
1274133741Sjmgstatic void
1275133741Sjmgkqueue_task(void *arg, int pending)
1276133741Sjmg{
1277133741Sjmg	struct kqueue *kq;
1278133741Sjmg	int haskqglobal;
1279133741Sjmg
1280133741Sjmg	haskqglobal = 0;
1281133741Sjmg	kq = arg;
1282133741Sjmg
1283133741Sjmg	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1284133741Sjmg	KQ_LOCK(kq);
1285133741Sjmg
1286133741Sjmg	KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
1287133741Sjmg
1288133741Sjmg	kq->kq_state &= ~KQ_TASKSCHED;
1289133741Sjmg	if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
1290133741Sjmg		wakeup(&kq->kq_state);
1291133741Sjmg	}
1292133741Sjmg	KQ_UNLOCK(kq);
1293133741Sjmg	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1294133741Sjmg}
1295133741Sjmg
1296133741Sjmg/*
1297133741Sjmg * Scan, update kn_data (if not ONESHOT), and copyout triggered events.
1298133741Sjmg * We treat KN_MARKER knotes as if they are INFLUX.
1299133741Sjmg */
1300133741Sjmgstatic int
1301146950Spskqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
1302146950Sps    const struct timespec *tsp, struct kevent *keva, struct thread *td)
1303133741Sjmg{
130459290Sjlemon	struct kevent *kevp;
130559290Sjlemon	struct timeval atv, rtv, ttv;
1306133794Sgreen	struct knote *kn, *marker;
1307178914Skib	int count, timeout, nkev, error, influx;
1308197240Ssson	int haskqglobal, touch;
130959290Sjlemon
131059290Sjlemon	count = maxevents;
1311133741Sjmg	nkev = 0;
1312133741Sjmg	error = 0;
1313133741Sjmg	haskqglobal = 0;
131459290Sjlemon
1315133741Sjmg	if (maxevents == 0)
1316133741Sjmg		goto done_nl;
1317133741Sjmg
131864343Sjlemon	if (tsp != NULL) {
131959290Sjlemon		TIMESPEC_TO_TIMEVAL(&atv, tsp);
132064343Sjlemon		if (itimerfix(&atv)) {
132159290Sjlemon			error = EINVAL;
1322133741Sjmg			goto done_nl;
132359290Sjlemon		}
132464343Sjlemon		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
132564343Sjlemon			timeout = -1;
1326133590Srwatson		else
132764343Sjlemon			timeout = atv.tv_sec > 24 * 60 * 60 ?
132864343Sjlemon			    24 * 60 * 60 * hz : tvtohz(&atv);
132964343Sjlemon		getmicrouptime(&rtv);
133064343Sjlemon		timevaladd(&atv, &rtv);
133164343Sjlemon	} else {
133264343Sjlemon		atv.tv_sec = 0;
133364343Sjlemon		atv.tv_usec = 0;
133459290Sjlemon		timeout = 0;
133559290Sjlemon	}
1336133794Sgreen	marker = knote_alloc(1);
1337133794Sgreen	if (marker == NULL) {
1338133794Sgreen		error = ENOMEM;
1339133794Sgreen		goto done_nl;
1340133794Sgreen	}
1341133794Sgreen	marker->kn_status = KN_MARKER;
1342133741Sjmg	KQ_LOCK(kq);
134359290Sjlemon	goto start;
134459290Sjlemon
134559290Sjlemonretry:
134664343Sjlemon	if (atv.tv_sec || atv.tv_usec) {
134759290Sjlemon		getmicrouptime(&rtv);
134859290Sjlemon		if (timevalcmp(&rtv, &atv, >=))
134959290Sjlemon			goto done;
135059290Sjlemon		ttv = atv;
135159290Sjlemon		timevalsub(&ttv, &rtv);
135259290Sjlemon		timeout = ttv.tv_sec > 24 * 60 * 60 ?
135359290Sjlemon			24 * 60 * 60 * hz : tvtohz(&ttv);
135459290Sjlemon	}
135559290Sjlemon
135659290Sjlemonstart:
1357133741Sjmg	kevp = keva;
135859290Sjlemon	if (kq->kq_count == 0) {
1359133590Srwatson		if (timeout < 0) {
136064343Sjlemon			error = EWOULDBLOCK;
136164343Sjlemon		} else {
136264343Sjlemon			kq->kq_state |= KQ_SLEEP;
1363133741Sjmg			error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH,
1364133741Sjmg			    "kqread", timeout);
136564343Sjlemon		}
136664084Sjlemon		if (error == 0)
136759290Sjlemon			goto retry;
136864084Sjlemon		/* don't restart after signals... */
136964084Sjlemon		if (error == ERESTART)
137064084Sjlemon			error = EINTR;
137164084Sjlemon		else if (error == EWOULDBLOCK)
137259290Sjlemon			error = 0;
137359290Sjlemon		goto done;
137459290Sjlemon	}
137559290Sjlemon
1376133794Sgreen	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
1377178914Skib	influx = 0;
137859290Sjlemon	while (count) {
1379133741Sjmg		KQ_OWNED(kq);
138059290Sjlemon		kn = TAILQ_FIRST(&kq->kq_head);
1381133741Sjmg
1382133794Sgreen		if ((kn->kn_status == KN_MARKER && kn != marker) ||
1383133741Sjmg		    (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1384178914Skib			if (influx) {
1385178914Skib				influx = 0;
1386178914Skib				KQ_FLUX_WAKEUP(kq);
1387178914Skib			}
1388180336Skib			kq->kq_state |= KQ_FLUXWAIT;
1389133741Sjmg			error = msleep(kq, &kq->kq_lock, PSOCK,
1390133741Sjmg			    "kqflxwt", 0);
1391133741Sjmg			continue;
1392133741Sjmg		}
1393133741Sjmg
1394133590Srwatson		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1395133741Sjmg		if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
1396133741Sjmg			kn->kn_status &= ~KN_QUEUED;
1397133741Sjmg			kq->kq_count--;
1398133741Sjmg			continue;
1399133741Sjmg		}
1400133794Sgreen		if (kn == marker) {
1401133741Sjmg			KQ_FLUX_WAKEUP(kq);
140259290Sjlemon			if (count == maxevents)
140359290Sjlemon				goto retry;
140459290Sjlemon			goto done;
140559290Sjlemon		}
1406133741Sjmg		KASSERT((kn->kn_status & KN_INFLUX) == 0,
1407133741Sjmg		    ("KN_INFLUX set when not suppose to be"));
1408133741Sjmg
1409133741Sjmg		if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
141059290Sjlemon			kn->kn_status &= ~KN_QUEUED;
1411133741Sjmg			kn->kn_status |= KN_INFLUX;
141259290Sjlemon			kq->kq_count--;
1413133741Sjmg			KQ_UNLOCK(kq);
1414133741Sjmg			/*
1415133741Sjmg			 * We don't need to lock the list since we've marked
1416133741Sjmg			 * it _INFLUX.
1417133741Sjmg			 */
1418133741Sjmg			*kevp = kn->kn_kevent;
1419134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
1420134859Sjmg				kn->kn_fop->f_detach(kn);
142183366Sjulian			knote_drop(kn, td);
1422133741Sjmg			KQ_LOCK(kq);
1423133741Sjmg			kn = NULL;
142459290Sjlemon		} else {
1425133741Sjmg			kn->kn_status |= KN_INFLUX;
1426133741Sjmg			KQ_UNLOCK(kq);
1427133741Sjmg			if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
1428133741Sjmg				KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1429133741Sjmg			KN_LIST_LOCK(kn);
1430133741Sjmg			if (kn->kn_fop->f_event(kn, 0) == 0) {
1431133741Sjmg				KQ_LOCK(kq);
1432157754Sjhb				KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1433133741Sjmg				kn->kn_status &=
1434133741Sjmg				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX);
1435133741Sjmg				kq->kq_count--;
1436150199Sups				KN_LIST_UNLOCK(kn);
1437178914Skib				influx = 1;
1438133741Sjmg				continue;
1439133741Sjmg			}
1440197240Ssson			touch = (!kn->kn_fop->f_isfd &&
1441197240Ssson			    kn->kn_fop->f_touch != NULL);
1442197240Ssson			if (touch)
1443197240Ssson				kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS);
1444197240Ssson			else
1445197240Ssson				*kevp = kn->kn_kevent;
1446133741Sjmg			KQ_LOCK(kq);
1447157754Sjhb			KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1448197242Ssson			if (kn->kn_flags & (EV_CLEAR |  EV_DISPATCH)) {
1449197240Ssson				/*
1450197240Ssson				 * Manually clear knotes who weren't
1451197240Ssson				 * 'touch'ed.
1452197240Ssson				 */
1453197242Ssson				if (touch == 0 && kn->kn_flags & EV_CLEAR) {
1454197240Ssson					kn->kn_data = 0;
1455197240Ssson					kn->kn_fflags = 0;
1456197240Ssson				}
1457197242Ssson				if (kn->kn_flags & EV_DISPATCH)
1458197242Ssson					kn->kn_status |= KN_DISABLED;
1459133741Sjmg				kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
1460133741Sjmg				kq->kq_count--;
1461133741Sjmg			} else
1462133741Sjmg				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1463150199Sups
1464150199Sups			kn->kn_status &= ~(KN_INFLUX);
1465133741Sjmg			KN_LIST_UNLOCK(kn);
1466178914Skib			influx = 1;
146759290Sjlemon		}
1468133741Sjmg
1469133741Sjmg		/* we are returning a copy to the user */
1470133741Sjmg		kevp++;
1471133741Sjmg		nkev++;
147259290Sjlemon		count--;
1473133741Sjmg
147459290Sjlemon		if (nkev == KQ_NEVENTS) {
1475178914Skib			influx = 0;
1476133741Sjmg			KQ_UNLOCK_FLUX(kq);
1477146950Sps			error = k_ops->k_copyout(k_ops->arg, keva, nkev);
147859290Sjlemon			nkev = 0;
1479133741Sjmg			kevp = keva;
1480133741Sjmg			KQ_LOCK(kq);
148159997Sjlemon			if (error)
148259997Sjlemon				break;
148359290Sjlemon		}
148459290Sjlemon	}
1485133794Sgreen	TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
148659290Sjlemondone:
1487133741Sjmg	KQ_OWNED(kq);
1488133741Sjmg	KQ_UNLOCK_FLUX(kq);
1489133794Sgreen	knote_free(marker);
1490133741Sjmgdone_nl:
1491133741Sjmg	KQ_NOTOWNED(kq);
149259290Sjlemon	if (nkev != 0)
1493146950Sps		error = k_ops->k_copyout(k_ops->arg, keva, nkev);
1494133741Sjmg	td->td_retval[0] = maxevents - count;
149559290Sjlemon	return (error);
149659290Sjlemon}
149759290Sjlemon
149859290Sjlemon/*
149959290Sjlemon * XXX
150059290Sjlemon * This could be expanded to call kqueue_scan, if desired.
150159290Sjlemon */
150259290Sjlemon/*ARGSUSED*/
150359290Sjlemonstatic int
1504101941Srwatsonkqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
150583366Sjulian	int flags, struct thread *td)
150659290Sjlemon{
150759290Sjlemon	return (ENXIO);
150859290Sjlemon}
150959290Sjlemon
151059290Sjlemon/*ARGSUSED*/
151159290Sjlemonstatic int
1512101941Srwatsonkqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
151383366Sjulian	 int flags, struct thread *td)
151459290Sjlemon{
151559290Sjlemon	return (ENXIO);
151659290Sjlemon}
151759290Sjlemon
151859290Sjlemon/*ARGSUSED*/
151959290Sjlemonstatic int
1520175140Sjhbkqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
1521175140Sjhb	struct thread *td)
1522175140Sjhb{
1523175140Sjhb
1524175140Sjhb	return (EINVAL);
1525175140Sjhb}
1526175140Sjhb
1527175140Sjhb/*ARGSUSED*/
1528175140Sjhbstatic int
1529132138Salfredkqueue_ioctl(struct file *fp, u_long cmd, void *data,
1530102003Srwatson	struct ucred *active_cred, struct thread *td)
153159290Sjlemon{
1532132174Salfred	/*
1533132174Salfred	 * Enabling sigio causes two major problems:
1534132174Salfred	 * 1) infinite recursion:
1535132174Salfred	 * Synopsys: kevent is being used to track signals and have FIOASYNC
1536132174Salfred	 * set.  On receipt of a signal this will cause a kqueue to recurse
1537132174Salfred	 * into itself over and over.  Sending the sigio causes the kqueue
1538132174Salfred	 * to become ready, which in turn posts sigio again, forever.
1539132174Salfred	 * Solution: this can be solved by setting a flag in the kqueue that
1540132174Salfred	 * we have a SIGIO in progress.
1541132174Salfred	 * 2) locking problems:
1542132174Salfred	 * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
1543132174Salfred	 * us above the proc and pgrp locks.
1544132174Salfred	 * Solution: Post a signal using an async mechanism, being sure to
1545132174Salfred	 * record a generation count in the delivery so that we do not deliver
1546132174Salfred	 * a signal to the wrong process.
1547132174Salfred	 *
1548132174Salfred	 * Note, these two mechanisms are somewhat mutually exclusive!
1549132174Salfred	 */
1550132174Salfred#if 0
1551132138Salfred	struct kqueue *kq;
1552132138Salfred
1553132138Salfred	kq = fp->f_data;
1554132138Salfred	switch (cmd) {
1555132138Salfred	case FIOASYNC:
1556132138Salfred		if (*(int *)data) {
1557132138Salfred			kq->kq_state |= KQ_ASYNC;
1558132138Salfred		} else {
1559132138Salfred			kq->kq_state &= ~KQ_ASYNC;
1560132138Salfred		}
1561132138Salfred		return (0);
1562132138Salfred
1563132138Salfred	case FIOSETOWN:
1564132138Salfred		return (fsetown(*(int *)data, &kq->kq_sigio));
1565132138Salfred
1566132138Salfred	case FIOGETOWN:
1567132138Salfred		*(int *)data = fgetown(&kq->kq_sigio);
1568132138Salfred		return (0);
1569132138Salfred	}
1570132174Salfred#endif
1571132138Salfred
157259290Sjlemon	return (ENOTTY);
157359290Sjlemon}
157459290Sjlemon
157559290Sjlemon/*ARGSUSED*/
157659290Sjlemonstatic int
1577101983Srwatsonkqueue_poll(struct file *fp, int events, struct ucred *active_cred,
1578101987Srwatson	struct thread *td)
157959290Sjlemon{
158089306Salfred	struct kqueue *kq;
158159290Sjlemon	int revents = 0;
1582133741Sjmg	int error;
158359290Sjlemon
1584170029Srwatson	if ((error = kqueue_acquire(fp, &kq)))
1585133741Sjmg		return POLLERR;
1586133741Sjmg
1587133741Sjmg	KQ_LOCK(kq);
1588133741Sjmg	if (events & (POLLIN | POLLRDNORM)) {
1589133741Sjmg		if (kq->kq_count) {
1590133741Sjmg			revents |= events & (POLLIN | POLLRDNORM);
159159290Sjlemon		} else {
1592133741Sjmg			selrecord(td, &kq->kq_sel);
1593174647Sjeff			if (SEL_WAITING(&kq->kq_sel))
1594174647Sjeff				kq->kq_state |= KQ_SEL;
159559290Sjlemon		}
159659290Sjlemon	}
1597133741Sjmg	kqueue_release(kq, 1);
1598133741Sjmg	KQ_UNLOCK(kq);
159959290Sjlemon	return (revents);
160059290Sjlemon}
160159290Sjlemon
160259290Sjlemon/*ARGSUSED*/
160359290Sjlemonstatic int
1604101983Srwatsonkqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
1605101987Srwatson	struct thread *td)
160659290Sjlemon{
160759290Sjlemon
1608146603Sjmg	bzero((void *)st, sizeof *st);
1609146603Sjmg	/*
1610146603Sjmg	 * We no longer return kq_count because the unlocked value is useless.
1611146603Sjmg	 * If you spent all this time getting the count, why not spend your
1612146603Sjmg	 * syscall better by calling kevent?
1613146603Sjmg	 *
1614146603Sjmg	 * XXX - This is needed for libc_r.
1615146603Sjmg	 */
1616146603Sjmg	st->st_mode = S_IFIFO;
1617146603Sjmg	return (0);
161859290Sjlemon}
161959290Sjlemon
162059290Sjlemon/*ARGSUSED*/
162159290Sjlemonstatic int
162283366Sjuliankqueue_close(struct file *fp, struct thread *td)
162359290Sjlemon{
1624109153Sdillon	struct kqueue *kq = fp->f_data;
1625133741Sjmg	struct filedesc *fdp;
1626133741Sjmg	struct knote *kn;
162759290Sjlemon	int i;
1628133741Sjmg	int error;
162959290Sjlemon
1630170029Srwatson	if ((error = kqueue_acquire(fp, &kq)))
1631133741Sjmg		return error;
1632133741Sjmg
1633133741Sjmg	KQ_LOCK(kq);
1634133741Sjmg
1635133741Sjmg	KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
1636133741Sjmg	    ("kqueue already closing"));
1637133741Sjmg	kq->kq_state |= KQ_CLOSING;
1638133741Sjmg	if (kq->kq_refcnt > 1)
1639133741Sjmg		msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
1640133741Sjmg
1641133741Sjmg	KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
1642133741Sjmg	fdp = kq->kq_fdp;
1643133741Sjmg
1644133741Sjmg	KASSERT(knlist_empty(&kq->kq_sel.si_note),
1645133741Sjmg	    ("kqueue's knlist not empty"));
1646133741Sjmg
1647133741Sjmg	for (i = 0; i < kq->kq_knlistsize; i++) {
1648133741Sjmg		while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
1649178913Skib			if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1650178913Skib				kq->kq_state |= KQ_FLUXWAIT;
1651178913Skib				msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
1652178913Skib				continue;
1653178913Skib			}
1654133741Sjmg			kn->kn_status |= KN_INFLUX;
1655133741Sjmg			KQ_UNLOCK(kq);
1656134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
1657134859Sjmg				kn->kn_fop->f_detach(kn);
1658133741Sjmg			knote_drop(kn, td);
1659133741Sjmg			KQ_LOCK(kq);
166059290Sjlemon		}
166159290Sjlemon	}
1662133741Sjmg	if (kq->kq_knhashmask != 0) {
1663133741Sjmg		for (i = 0; i <= kq->kq_knhashmask; i++) {
1664133741Sjmg			while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
1665178913Skib				if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1666178913Skib					kq->kq_state |= KQ_FLUXWAIT;
1667178913Skib					msleep(kq, &kq->kq_lock, PSOCK,
1668178913Skib					       "kqclo2", 0);
1669178913Skib					continue;
1670178913Skib				}
1671133741Sjmg				kn->kn_status |= KN_INFLUX;
1672133741Sjmg				KQ_UNLOCK(kq);
1673134859Sjmg				if (!(kn->kn_status & KN_DETACHED))
1674134859Sjmg					kn->kn_fop->f_detach(kn);
1675133741Sjmg				knote_drop(kn, td);
1676133741Sjmg				KQ_LOCK(kq);
167759290Sjlemon			}
167859290Sjlemon		}
167959290Sjlemon	}
1680133741Sjmg
1681133741Sjmg	if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
1682133741Sjmg		kq->kq_state |= KQ_TASKDRAIN;
1683133741Sjmg		msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
1684133741Sjmg	}
1685133741Sjmg
1686133741Sjmg	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
1687126033Sgreen		selwakeuppri(&kq->kq_sel, PSOCK);
1688174647Sjeff		if (!SEL_WAITING(&kq->kq_sel))
1689174647Sjeff			kq->kq_state &= ~KQ_SEL;
1690126033Sgreen	}
1691133741Sjmg
1692133741Sjmg	KQ_UNLOCK(kq);
1693133741Sjmg
1694168355Srwatson	FILEDESC_XLOCK(fdp);
1695133741Sjmg	SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
1696168355Srwatson	FILEDESC_XUNLOCK(fdp);
1697133741Sjmg
1698133741Sjmg	knlist_destroy(&kq->kq_sel.si_note);
1699133741Sjmg	mtx_destroy(&kq->kq_lock);
1700133741Sjmg	kq->kq_fdp = NULL;
1701133741Sjmg
1702133741Sjmg	if (kq->kq_knhash != NULL)
1703133741Sjmg		free(kq->kq_knhash, M_KQUEUE);
1704133741Sjmg	if (kq->kq_knlist != NULL)
1705133741Sjmg		free(kq->kq_knlist, M_KQUEUE);
1706133741Sjmg
1707132138Salfred	funsetown(&kq->kq_sigio);
170884138Sjlemon	free(kq, M_KQUEUE);
1709109153Sdillon	fp->f_data = NULL;
171059290Sjlemon
171159290Sjlemon	return (0);
171259290Sjlemon}
171359290Sjlemon
171459290Sjlemonstatic void
171559290Sjlemonkqueue_wakeup(struct kqueue *kq)
171659290Sjlemon{
1717133741Sjmg	KQ_OWNED(kq);
171859290Sjlemon
1719133741Sjmg	if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
172059290Sjlemon		kq->kq_state &= ~KQ_SLEEP;
172159290Sjlemon		wakeup(kq);
172259290Sjlemon	}
1723133741Sjmg	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
1724122352Stanimura		selwakeuppri(&kq->kq_sel, PSOCK);
1725174647Sjeff		if (!SEL_WAITING(&kq->kq_sel))
1726174647Sjeff			kq->kq_state &= ~KQ_SEL;
172759290Sjlemon	}
1728133741Sjmg	if (!knlist_empty(&kq->kq_sel.si_note))
1729133741Sjmg		kqueue_schedtask(kq);
1730133741Sjmg	if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
1731132138Salfred		pgsigio(&kq->kq_sigio, SIGIO, 0);
1732132138Salfred	}
173359290Sjlemon}
173459290Sjlemon
173559290Sjlemon/*
1736133741Sjmg * Walk down a list of knotes, activating them if their event has triggered.
1737133741Sjmg *
1738133741Sjmg * There is a possibility to optimize in the case of one kq watching another.
1739133741Sjmg * Instead of scheduling a task to wake it up, you could pass enough state
1740133741Sjmg * down the chain to make up the parent kqueue.  Make this code functional
1741133741Sjmg * first.
174259290Sjlemon */
174359290Sjlemonvoid
1744195148Sstasknote(struct knlist *list, long hint, int lockflags)
174559290Sjlemon{
1746133741Sjmg	struct kqueue *kq;
174759290Sjlemon	struct knote *kn;
1748195148Sstas	int error;
174959290Sjlemon
1750133741Sjmg	if (list == NULL)
1751133741Sjmg		return;
1752133741Sjmg
1753195148Sstas	KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED);
1754147730Sssouhlal
1755195148Sstas	if ((lockflags & KNF_LISTLOCKED) == 0)
1756147730Sssouhlal		list->kl_lock(list->kl_lockarg);
1757147730Sssouhlal
1758133741Sjmg	/*
1759133741Sjmg	 * If we unlock the list lock (and set KN_INFLUX), we can eliminate
1760133741Sjmg	 * the kqueue scheduling, but this will introduce four
1761133741Sjmg	 * lock/unlock's for each knote to test.  If we do, continue to use
1762133741Sjmg	 * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is
1763133741Sjmg	 * only safe if you want to remove the current item, which we are
1764133741Sjmg	 * not doing.
1765133741Sjmg	 */
1766133741Sjmg	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
1767133741Sjmg		kq = kn->kn_kq;
1768133741Sjmg		if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
1769133741Sjmg			KQ_LOCK(kq);
1770195148Sstas			if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1771195148Sstas				KQ_UNLOCK(kq);
1772195148Sstas			} else if ((lockflags & KNF_NOKQLOCK) != 0) {
1773195148Sstas				kn->kn_status |= KN_INFLUX;
1774195148Sstas				KQ_UNLOCK(kq);
1775195148Sstas				error = kn->kn_fop->f_event(kn, hint);
1776195148Sstas				KQ_LOCK(kq);
1777195148Sstas				kn->kn_status &= ~KN_INFLUX;
1778195148Sstas				if (error)
1779195148Sstas					KNOTE_ACTIVATE(kn, 1);
1780195148Sstas				KQ_UNLOCK_FLUX(kq);
1781195148Sstas			} else {
1782133741Sjmg				kn->kn_status |= KN_HASKQLOCK;
1783133741Sjmg				if (kn->kn_fop->f_event(kn, hint))
1784133741Sjmg					KNOTE_ACTIVATE(kn, 1);
1785133741Sjmg				kn->kn_status &= ~KN_HASKQLOCK;
1786195148Sstas				KQ_UNLOCK(kq);
1787133741Sjmg			}
1788133741Sjmg		}
1789133741Sjmg		kq = NULL;
1790133741Sjmg	}
1791195148Sstas	if ((lockflags & KNF_LISTLOCKED) == 0)
1792147730Sssouhlal		list->kl_unlock(list->kl_lockarg);
179359290Sjlemon}
179459290Sjlemon
179559290Sjlemon/*
1796133741Sjmg * add a knote to a knlist
1797133741Sjmg */
1798133741Sjmgvoid
1799133741Sjmgknlist_add(struct knlist *knl, struct knote *kn, int islocked)
1800133741Sjmg{
1801147730Sssouhlal	KNL_ASSERT_LOCK(knl, islocked);
1802133741Sjmg	KQ_NOTOWNED(kn->kn_kq);
1803133741Sjmg	KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) ==
1804133741Sjmg	    (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED"));
1805133741Sjmg	if (!islocked)
1806147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
1807133741Sjmg	SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
1808133741Sjmg	if (!islocked)
1809147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1810133741Sjmg	KQ_LOCK(kn->kn_kq);
1811133741Sjmg	kn->kn_knlist = knl;
1812133741Sjmg	kn->kn_status &= ~KN_DETACHED;
1813133741Sjmg	KQ_UNLOCK(kn->kn_kq);
1814133741Sjmg}
1815133741Sjmg
1816133741Sjmgstatic void
1817133741Sjmgknlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked)
1818133741Sjmg{
1819133741Sjmg	KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked"));
1820147730Sssouhlal	KNL_ASSERT_LOCK(knl, knlislocked);
1821133741Sjmg	mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
1822133741Sjmg	if (!kqislocked)
1823133741Sjmg		KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX,
1824133741Sjmg    ("knlist_remove called w/o knote being KN_INFLUX or already removed"));
1825133741Sjmg	if (!knlislocked)
1826147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
1827133741Sjmg	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
1828133741Sjmg	kn->kn_knlist = NULL;
1829133741Sjmg	if (!knlislocked)
1830147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1831133741Sjmg	if (!kqislocked)
1832133741Sjmg		KQ_LOCK(kn->kn_kq);
1833133741Sjmg	kn->kn_status |= KN_DETACHED;
1834133741Sjmg	if (!kqislocked)
1835133741Sjmg		KQ_UNLOCK(kn->kn_kq);
1836133741Sjmg}
1837133741Sjmg
1838133741Sjmg/*
183959290Sjlemon * remove all knotes from a specified klist
184059290Sjlemon */
184159290Sjlemonvoid
1842133741Sjmgknlist_remove(struct knlist *knl, struct knote *kn, int islocked)
184359290Sjlemon{
1844133741Sjmg
1845133741Sjmg	knlist_remove_kq(knl, kn, islocked, 0);
1846133741Sjmg}
1847133741Sjmg
1848133741Sjmg/*
1849133741Sjmg * remove knote from a specified klist while in f_event handler.
1850133741Sjmg */
1851133741Sjmgvoid
1852133741Sjmgknlist_remove_inevent(struct knlist *knl, struct knote *kn)
1853133741Sjmg{
1854133741Sjmg
1855133741Sjmg	knlist_remove_kq(knl, kn, 1,
1856133741Sjmg	    (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK);
1857133741Sjmg}
1858133741Sjmg
1859133741Sjmgint
1860133741Sjmgknlist_empty(struct knlist *knl)
1861133741Sjmg{
1862147730Sssouhlal	KNL_ASSERT_LOCKED(knl);
1863133741Sjmg	return SLIST_EMPTY(&knl->kl_list);
1864133741Sjmg}
1865133741Sjmg
1866133741Sjmgstatic struct mtx	knlist_lock;
1867133741SjmgMTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
1868133741Sjmg	MTX_DEF);
1869147730Sssouhlalstatic void knlist_mtx_lock(void *arg);
1870147730Sssouhlalstatic void knlist_mtx_unlock(void *arg);
1871133741Sjmg
1872147730Sssouhlalstatic void
1873147730Sssouhlalknlist_mtx_lock(void *arg)
1874147730Sssouhlal{
1875147730Sssouhlal	mtx_lock((struct mtx *)arg);
1876147730Sssouhlal}
1877147730Sssouhlal
1878147730Sssouhlalstatic void
1879147730Sssouhlalknlist_mtx_unlock(void *arg)
1880147730Sssouhlal{
1881147730Sssouhlal	mtx_unlock((struct mtx *)arg);
1882147730Sssouhlal}
1883147730Sssouhlal
1884193951Skibstatic void
1885193951Skibknlist_mtx_assert_locked(void *arg)
1886147730Sssouhlal{
1887193951Skib	mtx_assert((struct mtx *)arg, MA_OWNED);
1888147730Sssouhlal}
1889147730Sssouhlal
1890193951Skibstatic void
1891193951Skibknlist_mtx_assert_unlocked(void *arg)
1892193951Skib{
1893193951Skib	mtx_assert((struct mtx *)arg, MA_NOTOWNED);
1894193951Skib}
1895193951Skib
1896133741Sjmgvoid
1897147730Sssouhlalknlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
1898193951Skib    void (*kl_unlock)(void *),
1899193951Skib    void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *))
1900133741Sjmg{
1901133741Sjmg
1902147730Sssouhlal	if (lock == NULL)
1903147730Sssouhlal		knl->kl_lockarg = &knlist_lock;
1904133741Sjmg	else
1905147730Sssouhlal		knl->kl_lockarg = lock;
1906133741Sjmg
1907147730Sssouhlal	if (kl_lock == NULL)
1908147730Sssouhlal		knl->kl_lock = knlist_mtx_lock;
1909147730Sssouhlal	else
1910147730Sssouhlal		knl->kl_lock = kl_lock;
1911157582Sjmg	if (kl_unlock == NULL)
1912147730Sssouhlal		knl->kl_unlock = knlist_mtx_unlock;
1913147730Sssouhlal	else
1914147730Sssouhlal		knl->kl_unlock = kl_unlock;
1915193951Skib	if (kl_assert_locked == NULL)
1916193951Skib		knl->kl_assert_locked = knlist_mtx_assert_locked;
1917147730Sssouhlal	else
1918193951Skib		knl->kl_assert_locked = kl_assert_locked;
1919193951Skib	if (kl_assert_unlocked == NULL)
1920193951Skib		knl->kl_assert_unlocked = knlist_mtx_assert_unlocked;
1921193951Skib	else
1922193951Skib		knl->kl_assert_unlocked = kl_assert_unlocked;
1923147730Sssouhlal
1924133741Sjmg	SLIST_INIT(&knl->kl_list);
1925133741Sjmg}
1926133741Sjmg
1927133741Sjmgvoid
1928193951Skibknlist_init_mtx(struct knlist *knl, struct mtx *lock)
1929193951Skib{
1930193951Skib
1931193951Skib	knlist_init(knl, lock, NULL, NULL, NULL, NULL);
1932193951Skib}
1933193951Skib
1934193951Skibvoid
1935133741Sjmgknlist_destroy(struct knlist *knl)
1936133741Sjmg{
1937133741Sjmg
1938133741Sjmg#ifdef INVARIANTS
1939133741Sjmg	/*
1940133741Sjmg	 * if we run across this error, we need to find the offending
1941133741Sjmg	 * driver and have it call knlist_clear.
1942133741Sjmg	 */
1943133741Sjmg	if (!SLIST_EMPTY(&knl->kl_list))
1944133741Sjmg		printf("WARNING: destroying knlist w/ knotes on it!\n");
1945133741Sjmg#endif
1946133741Sjmg
1947147730Sssouhlal	knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL;
1948133741Sjmg	SLIST_INIT(&knl->kl_list);
1949133741Sjmg}
1950133741Sjmg
1951133741Sjmg/*
1952133741Sjmg * Even if we are locked, we may need to drop the lock to allow any influx
1953133741Sjmg * knotes time to "settle".
1954133741Sjmg */
1955133741Sjmgvoid
1956143776Sjmgknlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
1957133741Sjmg{
1958159171Spjd	struct knote *kn, *kn2;
1959133741Sjmg	struct kqueue *kq;
196059290Sjlemon
1961133741Sjmg	if (islocked)
1962147730Sssouhlal		KNL_ASSERT_LOCKED(knl);
1963133741Sjmg	else {
1964147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);
1965170029Srwatsonagain:		/* need to reacquire lock since we have dropped it */
1966147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
196759290Sjlemon	}
1968133741Sjmg
1969159171Spjd	SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
1970133741Sjmg		kq = kn->kn_kq;
1971133741Sjmg		KQ_LOCK(kq);
1972143776Sjmg		if ((kn->kn_status & KN_INFLUX)) {
1973133741Sjmg			KQ_UNLOCK(kq);
1974133741Sjmg			continue;
1975133741Sjmg		}
1976133741Sjmg		knlist_remove_kq(knl, kn, 1, 1);
1977143776Sjmg		if (killkn) {
1978143776Sjmg			kn->kn_status |= KN_INFLUX | KN_DETACHED;
1979143776Sjmg			KQ_UNLOCK(kq);
1980143776Sjmg			knote_drop(kn, td);
1981143776Sjmg		} else {
1982143776Sjmg			/* Make sure cleared knotes disappear soon */
1983143776Sjmg			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1984143776Sjmg			KQ_UNLOCK(kq);
1985143776Sjmg		}
1986133741Sjmg		kq = NULL;
1987133741Sjmg	}
1988133741Sjmg
1989133741Sjmg	if (!SLIST_EMPTY(&knl->kl_list)) {
1990133741Sjmg		/* there are still KN_INFLUX remaining */
1991133741Sjmg		kn = SLIST_FIRST(&knl->kl_list);
1992133741Sjmg		kq = kn->kn_kq;
1993133741Sjmg		KQ_LOCK(kq);
1994133741Sjmg		KASSERT(kn->kn_status & KN_INFLUX,
1995133741Sjmg		    ("knote removed w/o list lock"));
1996147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1997133741Sjmg		kq->kq_state |= KQ_FLUXWAIT;
1998133741Sjmg		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0);
1999133741Sjmg		kq = NULL;
2000133741Sjmg		goto again;
2001133741Sjmg	}
2002133741Sjmg
2003133741Sjmg	if (islocked)
2004147730Sssouhlal		KNL_ASSERT_LOCKED(knl);
2005133741Sjmg	else {
2006147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
2007147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);
2008133741Sjmg	}
200959290Sjlemon}
201059290Sjlemon
201159290Sjlemon/*
2012168355Srwatson * Remove all knotes referencing a specified fd must be called with FILEDESC
2013168355Srwatson * lock.  This prevents a race where a new fd comes along and occupies the
2014168355Srwatson * entry and we attach a knote to the fd.
201559290Sjlemon */
201659290Sjlemonvoid
201783366Sjulianknote_fdclose(struct thread *td, int fd)
201859290Sjlemon{
201983366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
2020133741Sjmg	struct kqueue *kq;
2021133741Sjmg	struct knote *kn;
2022133741Sjmg	int influx;
202359290Sjlemon
2024168355Srwatson	FILEDESC_XLOCK_ASSERT(fdp);
2025133741Sjmg
2026133741Sjmg	/*
2027133741Sjmg	 * We shouldn't have to worry about new kevents appearing on fd
2028133741Sjmg	 * since filedesc is locked.
2029133741Sjmg	 */
2030133741Sjmg	SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
2031133741Sjmg		KQ_LOCK(kq);
2032133741Sjmg
2033133741Sjmgagain:
2034133741Sjmg		influx = 0;
2035133741Sjmg		while (kq->kq_knlistsize > fd &&
2036133741Sjmg		    (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
2037133741Sjmg			if (kn->kn_status & KN_INFLUX) {
2038133741Sjmg				/* someone else might be waiting on our knote */
2039133741Sjmg				if (influx)
2040133741Sjmg					wakeup(kq);
2041133741Sjmg				kq->kq_state |= KQ_FLUXWAIT;
2042133741Sjmg				msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
2043133741Sjmg				goto again;
2044133741Sjmg			}
2045133741Sjmg			kn->kn_status |= KN_INFLUX;
2046133741Sjmg			KQ_UNLOCK(kq);
2047134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
2048134859Sjmg				kn->kn_fop->f_detach(kn);
2049133741Sjmg			knote_drop(kn, td);
2050133741Sjmg			influx = 1;
2051133741Sjmg			KQ_LOCK(kq);
2052133741Sjmg		}
2053133741Sjmg		KQ_UNLOCK_FLUX(kq);
2054133741Sjmg	}
205559290Sjlemon}
205659290Sjlemon
2057133741Sjmgstatic int
2058133741Sjmgknote_attach(struct knote *kn, struct kqueue *kq)
205959290Sjlemon{
2060133741Sjmg	struct klist *list;
206159290Sjlemon
2062133741Sjmg	KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX"));
2063133741Sjmg	KQ_OWNED(kq);
206489306Salfred
2065133741Sjmg	if (kn->kn_fop->f_isfd) {
2066133741Sjmg		if (kn->kn_id >= kq->kq_knlistsize)
2067133741Sjmg			return ENOMEM;
2068133741Sjmg		list = &kq->kq_knlist[kn->kn_id];
2069133741Sjmg	} else {
2070133741Sjmg		if (kq->kq_knhash == NULL)
2071133741Sjmg			return ENOMEM;
2072133741Sjmg		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
207359290Sjlemon	}
207459290Sjlemon
207559290Sjlemon	SLIST_INSERT_HEAD(list, kn, kn_link);
2076133741Sjmg
2077133741Sjmg	return 0;
207859290Sjlemon}
207959290Sjlemon
208059290Sjlemon/*
2081151260Sambrisko * knote must already have been detached using the f_detach method.
2082133741Sjmg * no lock need to be held, it is assumed that the KN_INFLUX flag is set
2083133741Sjmg * to prevent other removal.
208459290Sjlemon */
208559290Sjlemonstatic void
208683366Sjulianknote_drop(struct knote *kn, struct thread *td)
208759290Sjlemon{
2088133741Sjmg	struct kqueue *kq;
208959290Sjlemon	struct klist *list;
209059290Sjlemon
2091133741Sjmg	kq = kn->kn_kq;
2092133741Sjmg
2093133741Sjmg	KQ_NOTOWNED(kq);
2094133741Sjmg	KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX,
2095133741Sjmg	    ("knote_drop called without KN_INFLUX set in kn_status"));
2096133741Sjmg
2097133741Sjmg	KQ_LOCK(kq);
209859290Sjlemon	if (kn->kn_fop->f_isfd)
2099133741Sjmg		list = &kq->kq_knlist[kn->kn_id];
210059290Sjlemon	else
2101133741Sjmg		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
210259290Sjlemon
2103151260Sambrisko	if (!SLIST_EMPTY(list))
2104151260Sambrisko		SLIST_REMOVE(list, kn, knote, kn_link);
210559290Sjlemon	if (kn->kn_status & KN_QUEUED)
210659290Sjlemon		knote_dequeue(kn);
2107133741Sjmg	KQ_UNLOCK_FLUX(kq);
2108133741Sjmg
2109133741Sjmg	if (kn->kn_fop->f_isfd) {
2110133741Sjmg		fdrop(kn->kn_fp, td);
2111133741Sjmg		kn->kn_fp = NULL;
2112133741Sjmg	}
2113133741Sjmg	kqueue_fo_release(kn->kn_kevent.filter);
2114133741Sjmg	kn->kn_fop = NULL;
211559290Sjlemon	knote_free(kn);
211659290Sjlemon}
211759290Sjlemon
211859290Sjlemonstatic void
211959290Sjlemonknote_enqueue(struct knote *kn)
212059290Sjlemon{
212159290Sjlemon	struct kqueue *kq = kn->kn_kq;
212259290Sjlemon
2123133741Sjmg	KQ_OWNED(kn->kn_kq);
212459997Sjlemon	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
212559997Sjlemon
2126133590Srwatson	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
212759290Sjlemon	kn->kn_status |= KN_QUEUED;
212859290Sjlemon	kq->kq_count++;
212959290Sjlemon	kqueue_wakeup(kq);
213059290Sjlemon}
213159290Sjlemon
213259290Sjlemonstatic void
213359290Sjlemonknote_dequeue(struct knote *kn)
213459290Sjlemon{
213559290Sjlemon	struct kqueue *kq = kn->kn_kq;
213659290Sjlemon
2137133741Sjmg	KQ_OWNED(kn->kn_kq);
213859997Sjlemon	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
213959997Sjlemon
2140133590Srwatson	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
214159290Sjlemon	kn->kn_status &= ~KN_QUEUED;
214259290Sjlemon	kq->kq_count--;
214359290Sjlemon}
214459290Sjlemon
214559290Sjlemonstatic void
214659290Sjlemonknote_init(void)
214759290Sjlemon{
2148133741Sjmg
214992751Sjeff	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
215092751Sjeff	    NULL, NULL, UMA_ALIGN_PTR, 0);
215159290Sjlemon}
2152177253SrwatsonSYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
215359290Sjlemon
215459290Sjlemonstatic struct knote *
2155133741Sjmgknote_alloc(int waitok)
215659290Sjlemon{
2157133741Sjmg	return ((struct knote *)uma_zalloc(knote_zone,
2158133741Sjmg	    (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO));
215959290Sjlemon}
216059290Sjlemon
216159290Sjlemonstatic void
216259290Sjlemonknote_free(struct knote *kn)
216359290Sjlemon{
2164133741Sjmg	if (kn != NULL)
2165133741Sjmg		uma_zfree(knote_zone, kn);
216659290Sjlemon}
2167162594Sjmg
2168162594Sjmg/*
2169162594Sjmg * Register the kev w/ the kq specified by fd.
2170162594Sjmg */
2171162594Sjmgint
2172162594Sjmgkqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok)
2173162594Sjmg{
2174162594Sjmg	struct kqueue *kq;
2175162594Sjmg	struct file *fp;
2176162594Sjmg	int error;
2177162594Sjmg
2178162594Sjmg	if ((error = fget(td, fd, &fp)) != 0)
2179162594Sjmg		return (error);
2180170029Srwatson	if ((error = kqueue_acquire(fp, &kq)) != 0)
2181170029Srwatson		goto noacquire;
2182162594Sjmg
2183162594Sjmg	error = kqueue_register(kq, kev, td, waitok);
2184162594Sjmg
2185162594Sjmg	kqueue_release(kq, 0);
2186162594Sjmg
2187170029Srwatsonnoacquire:
2188162608Sjmg	fdrop(fp, td);
2189162594Sjmg
2190162594Sjmg	return error;
2191162594Sjmg}
2192