kern_event.c revision 147730
159290Sjlemon/*-
272969Sjlemon * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3133741Sjmg * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
459290Sjlemon * All rights reserved.
559290Sjlemon *
659290Sjlemon * Redistribution and use in source and binary forms, with or without
759290Sjlemon * modification, are permitted provided that the following conditions
859290Sjlemon * are met:
959290Sjlemon * 1. Redistributions of source code must retain the above copyright
1059290Sjlemon *    notice, this list of conditions and the following disclaimer.
1159290Sjlemon * 2. Redistributions in binary form must reproduce the above copyright
1259290Sjlemon *    notice, this list of conditions and the following disclaimer in the
1359290Sjlemon *    documentation and/or other materials provided with the distribution.
1459290Sjlemon *
1559290Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1659290Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1759290Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1859290Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1959290Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2059290Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2159290Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2259290Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2359290Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2459290Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2559290Sjlemon * SUCH DAMAGE.
2659290Sjlemon */
2759290Sjlemon
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 147730 2005-07-01 16:28:32Z ssouhlal $");
30116182Sobrien
3159290Sjlemon#include <sys/param.h>
3259290Sjlemon#include <sys/systm.h>
3359290Sjlemon#include <sys/kernel.h>
3476166Smarkm#include <sys/lock.h>
3576166Smarkm#include <sys/mutex.h>
3659290Sjlemon#include <sys/proc.h>
37132138Salfred#include <sys/malloc.h>
3859290Sjlemon#include <sys/unistd.h>
3959290Sjlemon#include <sys/file.h>
40108524Salfred#include <sys/filedesc.h>
41132138Salfred#include <sys/filio.h>
4259290Sjlemon#include <sys/fcntl.h>
43133741Sjmg#include <sys/kthread.h>
4470834Swollman#include <sys/selinfo.h>
4559290Sjlemon#include <sys/queue.h>
4659290Sjlemon#include <sys/event.h>
4759290Sjlemon#include <sys/eventvar.h>
4859290Sjlemon#include <sys/poll.h>
4959290Sjlemon#include <sys/protosw.h>
50132138Salfred#include <sys/sigio.h>
51132138Salfred#include <sys/signalvar.h>
5259290Sjlemon#include <sys/socket.h>
5359290Sjlemon#include <sys/socketvar.h>
5459290Sjlemon#include <sys/stat.h>
5584138Sjlemon#include <sys/sysctl.h>
5659290Sjlemon#include <sys/sysproto.h>
57142934Sps#include <sys/syscallsubr.h>
58133741Sjmg#include <sys/taskqueue.h>
5959290Sjlemon#include <sys/uio.h>
6059290Sjlemon
6192751Sjeff#include <vm/uma.h>
6259290Sjlemon
63141616Sphkstatic MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
64141616Sphk
65133741Sjmg/*
66133741Sjmg * This lock is used if multiple kq locks are required.  This possibly
67133741Sjmg * should be made into a per proc lock.
68133741Sjmg */
69133741Sjmgstatic struct mtx	kq_global;
70133741SjmgMTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
71133741Sjmg#define KQ_GLOBAL_LOCK(lck, haslck)	do {	\
72133741Sjmg	if (!haslck)				\
73133741Sjmg		mtx_lock(lck);			\
74133741Sjmg	haslck = 1;				\
75133741Sjmg} while (0)
76133741Sjmg#define KQ_GLOBAL_UNLOCK(lck, haslck)	do {	\
77133741Sjmg	if (haslck)				\
78133741Sjmg		mtx_unlock(lck);			\
79133741Sjmg	haslck = 0;				\
80133741Sjmg} while (0)
8184138Sjlemon
82133741SjmgTASKQUEUE_DEFINE_THREAD(kqueue);
83133741Sjmg
84146950Spsstatic int	kevent_copyout(void *arg, struct kevent *kevp, int count);
85146950Spsstatic int	kevent_copyin(void *arg, struct kevent *kevp, int count);
86133741Sjmgstatic int	kqueue_aquire(struct file *fp, struct kqueue **kqp);
87133741Sjmgstatic void	kqueue_release(struct kqueue *kq, int locked);
88133741Sjmgstatic int	kqueue_expand(struct kqueue *kq, struct filterops *fops,
89133741Sjmg		    uintptr_t ident, int waitok);
90133741Sjmgstatic void	kqueue_task(void *arg, int pending);
91133741Sjmgstatic int	kqueue_scan(struct kqueue *kq, int maxevents,
92146950Sps		    struct kevent_copyops *k_ops,
93146950Sps		    const struct timespec *timeout,
94146950Sps		    struct kevent *keva, struct thread *td);
9559290Sjlemonstatic void 	kqueue_wakeup(struct kqueue *kq);
96133741Sjmgstatic struct filterops *kqueue_fo_find(int filt);
97133741Sjmgstatic void	kqueue_fo_release(int filt);
9859290Sjlemon
99108255Sphkstatic fo_rdwr_t	kqueue_read;
100108255Sphkstatic fo_rdwr_t	kqueue_write;
101108255Sphkstatic fo_ioctl_t	kqueue_ioctl;
102108255Sphkstatic fo_poll_t	kqueue_poll;
103108255Sphkstatic fo_kqfilter_t	kqueue_kqfilter;
104108255Sphkstatic fo_stat_t	kqueue_stat;
105108255Sphkstatic fo_close_t	kqueue_close;
106108238Sphk
10772521Sjlemonstatic struct fileops kqueueops = {
108116546Sphk	.fo_read = kqueue_read,
109116546Sphk	.fo_write = kqueue_write,
110116546Sphk	.fo_ioctl = kqueue_ioctl,
111116546Sphk	.fo_poll = kqueue_poll,
112116546Sphk	.fo_kqfilter = kqueue_kqfilter,
113116546Sphk	.fo_stat = kqueue_stat,
114116546Sphk	.fo_close = kqueue_close,
11572521Sjlemon};
11672521Sjlemon
117133741Sjmgstatic int 	knote_attach(struct knote *kn, struct kqueue *kq);
11883366Sjulianstatic void 	knote_drop(struct knote *kn, struct thread *td);
11959290Sjlemonstatic void 	knote_enqueue(struct knote *kn);
12059290Sjlemonstatic void 	knote_dequeue(struct knote *kn);
12159290Sjlemonstatic void 	knote_init(void);
122133741Sjmgstatic struct 	knote *knote_alloc(int waitok);
12359290Sjlemonstatic void 	knote_free(struct knote *kn);
12459290Sjlemon
12572521Sjlemonstatic void	filt_kqdetach(struct knote *kn);
12672521Sjlemonstatic int	filt_kqueue(struct knote *kn, long hint);
12772521Sjlemonstatic int	filt_procattach(struct knote *kn);
12872521Sjlemonstatic void	filt_procdetach(struct knote *kn);
12972521Sjlemonstatic int	filt_proc(struct knote *kn, long hint);
13072521Sjlemonstatic int	filt_fileattach(struct knote *kn);
13179989Sjlemonstatic void	filt_timerexpire(void *knx);
13279989Sjlemonstatic int	filt_timerattach(struct knote *kn);
13379989Sjlemonstatic void	filt_timerdetach(struct knote *kn);
13479989Sjlemonstatic int	filt_timer(struct knote *kn, long hint);
13572521Sjlemon
13679989Sjlemonstatic struct filterops file_filtops =
13779989Sjlemon	{ 1, filt_fileattach, NULL, NULL };
13872521Sjlemonstatic struct filterops kqread_filtops =
13972521Sjlemon	{ 1, NULL, filt_kqdetach, filt_kqueue };
140133741Sjmg/* XXX - move to kern_proc.c?  */
14172521Sjlemonstatic struct filterops proc_filtops =
14272521Sjlemon	{ 0, filt_procattach, filt_procdetach, filt_proc };
14379989Sjlemonstatic struct filterops timer_filtops =
14479989Sjlemon	{ 0, filt_timerattach, filt_timerdetach, filt_timer };
14572521Sjlemon
14692751Sjeffstatic uma_zone_t	knote_zone;
14784138Sjlemonstatic int 		kq_ncallouts = 0;
14884138Sjlemonstatic int 		kq_calloutmax = (4 * 1024);
14984138SjlemonSYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
15084138Sjlemon    &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
15159290Sjlemon
152133741Sjmg/* XXX - ensure not KN_INFLUX?? */
153133741Sjmg#define KNOTE_ACTIVATE(kn, islock) do { 				\
154133741Sjmg	if ((islock))							\
155133741Sjmg		mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED);		\
156133741Sjmg	else								\
157133741Sjmg		KQ_LOCK((kn)->kn_kq);					\
158133741Sjmg	(kn)->kn_status |= KN_ACTIVE;					\
159133741Sjmg	if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)		\
160133741Sjmg		knote_enqueue((kn));					\
161133741Sjmg	if (!(islock))							\
162133741Sjmg		KQ_UNLOCK((kn)->kn_kq);					\
16359290Sjlemon} while(0)
164133741Sjmg#define KQ_LOCK(kq) do {						\
165133741Sjmg	mtx_lock(&(kq)->kq_lock);					\
166133741Sjmg} while (0)
167133741Sjmg#define KQ_FLUX_WAKEUP(kq) do {						\
168133741Sjmg	if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) {		\
169133741Sjmg		(kq)->kq_state &= ~KQ_FLUXWAIT;				\
170133741Sjmg		wakeup((kq));						\
171133741Sjmg	}								\
172133741Sjmg} while (0)
173133741Sjmg#define KQ_UNLOCK_FLUX(kq) do {						\
174133741Sjmg	KQ_FLUX_WAKEUP(kq);						\
175133741Sjmg	mtx_unlock(&(kq)->kq_lock);					\
176133741Sjmg} while (0)
177133741Sjmg#define KQ_UNLOCK(kq) do {						\
178133741Sjmg	mtx_unlock(&(kq)->kq_lock);					\
179133741Sjmg} while (0)
180133741Sjmg#define KQ_OWNED(kq) do {						\
181133741Sjmg	mtx_assert(&(kq)->kq_lock, MA_OWNED);				\
182133741Sjmg} while (0)
183133741Sjmg#define KQ_NOTOWNED(kq) do {						\
184133741Sjmg	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED);			\
185133741Sjmg} while (0)
186133741Sjmg#define KN_LIST_LOCK(kn) do {						\
187133741Sjmg	if (kn->kn_knlist != NULL)					\
188147730Sssouhlal		kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg);	\
189133741Sjmg} while (0)
190133741Sjmg#define KN_LIST_UNLOCK(kn) do {						\
191147730Sssouhlal	if (kn->kn_knlist != NULL) 					\
192147730Sssouhlal		kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg);	\
193133741Sjmg} while (0)
194147730Sssouhlal#define	KNL_ASSERT_LOCK(knl, islocked) do {				\
195147730Sssouhlal	if (islocked)							\
196147730Sssouhlal		KNL_ASSERT_LOCKED(knl);				\
197147730Sssouhlal	else								\
198147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);				\
199147730Sssouhlal} while (0)
200147730Sssouhlal#ifdef INVARIANTS
201147730Sssouhlal#define	KNL_ASSERT_LOCKED(knl) do {					\
202147730Sssouhlal	if (!knl->kl_locked((knl)->kl_lockarg))				\
203147730Sssouhlal			panic("knlist not locked, but should be");	\
204147730Sssouhlal} while (0)
205147730Sssouhlal#define	KNL_ASSERT_UNLOCKED(knl) do {				\
206147730Sssouhlal	if (knl->kl_locked((knl)->kl_lockarg))				\
207147730Sssouhlal		panic("knlist locked, but should not be");		\
208147730Sssouhlal} while (0)
209147730Sssouhlal#else /* !INVARIANTS */
210147730Sssouhlal#define	KNL_ASSERT_LOCKED(knl) do {} while(0)
211147730Sssouhlal#define	KNL_ASSERT_UNLOCKED(knl) do {} while (0)
212147730Sssouhlal#endif /* INVARIANTS */
21359290Sjlemon
21459290Sjlemon#define	KN_HASHSIZE		64		/* XXX should be tunable */
21559290Sjlemon#define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
21659290Sjlemon
21788633Salfredstatic int
21888633Salfredfilt_nullattach(struct knote *kn)
21988633Salfred{
22088633Salfred
22188633Salfred	return (ENXIO);
22288633Salfred};
22388633Salfred
22488633Salfredstruct filterops null_filtops =
22588633Salfred	{ 0, filt_nullattach, NULL, NULL };
22688633Salfred
227133741Sjmg/* XXX - make SYSINIT to add these, and move into respective modules. */
22859290Sjlemonextern struct filterops sig_filtops;
229131562Salfredextern struct filterops fs_filtops;
23059290Sjlemon
23159290Sjlemon/*
23272521Sjlemon * Table for for all system-defined filters.
23359290Sjlemon */
234133741Sjmgstatic struct mtx	filterops_lock;
235133741SjmgMTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
236133741Sjmg	MTX_DEF);
237133741Sjmgstatic struct {
238133741Sjmg	struct filterops *for_fop;
239133741Sjmg	int for_refcnt;
240133741Sjmg} sysfilt_ops[EVFILT_SYSCOUNT] = {
241133741Sjmg	{ &file_filtops },			/* EVFILT_READ */
242133741Sjmg	{ &file_filtops },			/* EVFILT_WRITE */
243133741Sjmg	{ &null_filtops },			/* EVFILT_AIO */
244133741Sjmg	{ &file_filtops },			/* EVFILT_VNODE */
245133741Sjmg	{ &proc_filtops },			/* EVFILT_PROC */
246133741Sjmg	{ &sig_filtops },			/* EVFILT_SIGNAL */
247133741Sjmg	{ &timer_filtops },			/* EVFILT_TIMER */
248133741Sjmg	{ &file_filtops },			/* EVFILT_NETDEV */
249133741Sjmg	{ &fs_filtops },			/* EVFILT_FS */
25059290Sjlemon};
25159290Sjlemon
252133741Sjmg/*
253133741Sjmg * Simple redirection for all cdevsw style objects to call their fo_kqfilter
254133741Sjmg * method.
255133741Sjmg */
25659290Sjlemonstatic int
25772521Sjlemonfilt_fileattach(struct knote *kn)
25859290Sjlemon{
259133635Sjmg
26072521Sjlemon	return (fo_kqfilter(kn->kn_fp, kn));
26159290Sjlemon}
26259290Sjlemon
26372521Sjlemon/*ARGSUSED*/
26459290Sjlemonstatic int
26572521Sjlemonkqueue_kqfilter(struct file *fp, struct knote *kn)
26659290Sjlemon{
267109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
26859290Sjlemon
26972521Sjlemon	if (kn->kn_filter != EVFILT_READ)
270133741Sjmg		return (EINVAL);
27159290Sjlemon
272133741Sjmg	kn->kn_status |= KN_KQUEUE;
27372521Sjlemon	kn->kn_fop = &kqread_filtops;
274133741Sjmg	knlist_add(&kq->kq_sel.si_note, kn, 0);
275133741Sjmg
27659290Sjlemon	return (0);
27759290Sjlemon}
27859290Sjlemon
27959290Sjlemonstatic void
28059290Sjlemonfilt_kqdetach(struct knote *kn)
28159290Sjlemon{
282109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
28359290Sjlemon
284133741Sjmg	knlist_remove(&kq->kq_sel.si_note, kn, 0);
28559290Sjlemon}
28659290Sjlemon
28759290Sjlemon/*ARGSUSED*/
28859290Sjlemonstatic int
28959290Sjlemonfilt_kqueue(struct knote *kn, long hint)
29059290Sjlemon{
291109153Sdillon	struct kqueue *kq = kn->kn_fp->f_data;
29259290Sjlemon
29359290Sjlemon	kn->kn_data = kq->kq_count;
29459290Sjlemon	return (kn->kn_data > 0);
29559290Sjlemon}
29659290Sjlemon
297133741Sjmg/* XXX - move to kern_proc.c?  */
29859290Sjlemonstatic int
29959290Sjlemonfilt_procattach(struct knote *kn)
30059290Sjlemon{
30159290Sjlemon	struct proc *p;
302113377Skbyanc	int immediate;
30375451Srwatson	int error;
30459290Sjlemon
305113377Skbyanc	immediate = 0;
30659290Sjlemon	p = pfind(kn->kn_id);
307113377Skbyanc	if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
308113377Skbyanc		p = zpfind(kn->kn_id);
309113377Skbyanc		immediate = 1;
310133741Sjmg	} else if (p != NULL && (p->p_flag & P_WEXIT)) {
311133741Sjmg		immediate = 1;
312113377Skbyanc	}
313133741Sjmg
314122019Scognet	if (p == NULL)
315122019Scognet		return (ESRCH);
316133741Sjmg	if ((error = p_cansee(curthread, p)))
31775451Srwatson		return (error);
31859290Sjlemon
31959290Sjlemon	kn->kn_ptr.p_proc = p;
32059290Sjlemon	kn->kn_flags |= EV_CLEAR;		/* automatically set */
32159290Sjlemon
32259290Sjlemon	/*
32359290Sjlemon	 * internal flag indicating registration done by kernel
32459290Sjlemon	 */
32559290Sjlemon	if (kn->kn_flags & EV_FLAG1) {
32659290Sjlemon		kn->kn_data = kn->kn_sdata;		/* ppid */
32759290Sjlemon		kn->kn_fflags = NOTE_CHILD;
32859290Sjlemon		kn->kn_flags &= ~EV_FLAG1;
32959290Sjlemon	}
33059290Sjlemon
331122686Scognet	if (immediate == 0)
332133741Sjmg		knlist_add(&p->p_klist, kn, 1);
333113377Skbyanc
334113377Skbyanc	/*
335113377Skbyanc	 * Immediately activate any exit notes if the target process is a
336113377Skbyanc	 * zombie.  This is necessary to handle the case where the target
337113377Skbyanc	 * process, e.g. a child, dies before the kevent is registered.
338113377Skbyanc	 */
339113377Skbyanc	if (immediate && filt_proc(kn, NOTE_EXIT))
340133741Sjmg		KNOTE_ACTIVATE(kn, 0);
341113377Skbyanc
34271500Sjhb	PROC_UNLOCK(p);
34359290Sjlemon
34459290Sjlemon	return (0);
34559290Sjlemon}
34659290Sjlemon
34759290Sjlemon/*
34859290Sjlemon * The knote may be attached to a different process, which may exit,
34959290Sjlemon * leaving nothing for the knote to be attached to.  So when the process
35059290Sjlemon * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
35159290Sjlemon * it will be deleted when read out.  However, as part of the knote deletion,
35259290Sjlemon * this routine is called, so a check is needed to avoid actually performing
35359290Sjlemon * a detach, because the original process does not exist any more.
35459290Sjlemon */
355133741Sjmg/* XXX - move to kern_proc.c?  */
35659290Sjlemonstatic void
35759290Sjlemonfilt_procdetach(struct knote *kn)
35859290Sjlemon{
359133741Sjmg	struct proc *p;
36059290Sjlemon
361133741Sjmg	p = kn->kn_ptr.p_proc;
362133741Sjmg	knlist_remove(&p->p_klist, kn, 0);
363133741Sjmg	kn->kn_ptr.p_proc = NULL;
36459290Sjlemon}
36559290Sjlemon
366133741Sjmg/* XXX - move to kern_proc.c?  */
36759290Sjlemonstatic int
36859290Sjlemonfilt_proc(struct knote *kn, long hint)
36959290Sjlemon{
370133741Sjmg	struct proc *p = kn->kn_ptr.p_proc;
37159290Sjlemon	u_int event;
37259290Sjlemon
37359290Sjlemon	/*
37459290Sjlemon	 * mask off extra data
37559290Sjlemon	 */
37659290Sjlemon	event = (u_int)hint & NOTE_PCTRLMASK;
37759290Sjlemon
37859290Sjlemon	/*
37959290Sjlemon	 * if the user is interested in this event, record it.
38059290Sjlemon	 */
38159290Sjlemon	if (kn->kn_sfflags & event)
38259290Sjlemon		kn->kn_fflags |= event;
38359290Sjlemon
38459290Sjlemon	/*
38559290Sjlemon	 * process is gone, so flag the event as finished.
38659290Sjlemon	 */
38759290Sjlemon	if (event == NOTE_EXIT) {
388133741Sjmg		if (!(kn->kn_status & KN_DETACHED))
389133741Sjmg			knlist_remove_inevent(&p->p_klist, kn);
390133590Srwatson		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
391133741Sjmg		kn->kn_ptr.p_proc = NULL;
39259290Sjlemon		return (1);
39359290Sjlemon	}
39459290Sjlemon
39559290Sjlemon	/*
39659290Sjlemon	 * process forked, and user wants to track the new process,
39759290Sjlemon	 * so attach a new knote to it, and immediately report an
39859290Sjlemon	 * event with the parent's pid.
39959290Sjlemon	 */
40059290Sjlemon	if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
40159290Sjlemon		struct kevent kev;
40259290Sjlemon		int error;
40359290Sjlemon
40459290Sjlemon		/*
40559290Sjlemon		 * register knote with new process.
40659290Sjlemon		 */
40759290Sjlemon		kev.ident = hint & NOTE_PDATAMASK;	/* pid */
40859290Sjlemon		kev.filter = kn->kn_filter;
40959290Sjlemon		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
41059290Sjlemon		kev.fflags = kn->kn_sfflags;
41159290Sjlemon		kev.data = kn->kn_id;			/* parent */
41261962Sjlemon		kev.udata = kn->kn_kevent.udata;	/* preserve udata */
413133741Sjmg		error = kqueue_register(kn->kn_kq, &kev, NULL, 0);
41459290Sjlemon		if (error)
41559290Sjlemon			kn->kn_fflags |= NOTE_TRACKERR;
41659290Sjlemon	}
41759290Sjlemon
41859290Sjlemon	return (kn->kn_fflags != 0);
41959290Sjlemon}
42059290Sjlemon
421133741Sjmgstatic int
422133741Sjmgtimertoticks(intptr_t data)
423133741Sjmg{
424133741Sjmg	struct timeval tv;
425133741Sjmg	int tticks;
426133741Sjmg
427133741Sjmg	tv.tv_sec = data / 1000;
428133741Sjmg	tv.tv_usec = (data % 1000) * 1000;
429133741Sjmg	tticks = tvtohz(&tv);
430133741Sjmg
431133741Sjmg	return tticks;
432133741Sjmg}
433133741Sjmg
434133741Sjmg/* XXX - move to kern_timeout.c? */
43579989Sjlemonstatic void
43679989Sjlemonfilt_timerexpire(void *knx)
43779989Sjlemon{
43879989Sjlemon	struct knote *kn = knx;
43984138Sjlemon	struct callout *calloutp;
44079989Sjlemon
44179989Sjlemon	kn->kn_data++;
442133741Sjmg	KNOTE_ACTIVATE(kn, 0);	/* XXX - handle locking */
44379989Sjlemon
444133741Sjmg	if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
44584138Sjlemon		calloutp = (struct callout *)kn->kn_hook;
446133741Sjmg		callout_reset(calloutp, timertoticks(kn->kn_sdata),
447133741Sjmg		    filt_timerexpire, kn);
44879989Sjlemon	}
44979989Sjlemon}
45079989Sjlemon
45179989Sjlemon/*
45279989Sjlemon * data contains amount of time to sleep, in milliseconds
453133590Srwatson */
454133741Sjmg/* XXX - move to kern_timeout.c? */
45579989Sjlemonstatic int
45679989Sjlemonfilt_timerattach(struct knote *kn)
45779989Sjlemon{
45884138Sjlemon	struct callout *calloutp;
45979989Sjlemon
460133741Sjmg	atomic_add_int(&kq_ncallouts, 1);
461133741Sjmg
462133741Sjmg	if (kq_ncallouts >= kq_calloutmax) {
463133741Sjmg		atomic_add_int(&kq_ncallouts, -1);
46484138Sjlemon		return (ENOMEM);
465133741Sjmg	}
46684138Sjlemon
46779989Sjlemon	kn->kn_flags |= EV_CLEAR;		/* automatically set */
468136500Sjmg	kn->kn_status &= ~KN_DETACHED;		/* knlist_add usually sets it */
46984138Sjlemon	MALLOC(calloutp, struct callout *, sizeof(*calloutp),
470111119Simp	    M_KQUEUE, M_WAITOK);
471142217Srwatson	callout_init(calloutp, CALLOUT_MPSAFE);
472127982Scperciva	kn->kn_hook = calloutp;
473133741Sjmg	callout_reset(calloutp, timertoticks(kn->kn_sdata), filt_timerexpire,
474133741Sjmg	    kn);
47579989Sjlemon
47679989Sjlemon	return (0);
47779989Sjlemon}
47879989Sjlemon
479133741Sjmg/* XXX - move to kern_timeout.c? */
48079989Sjlemonstatic void
48179989Sjlemonfilt_timerdetach(struct knote *kn)
48279989Sjlemon{
48384138Sjlemon	struct callout *calloutp;
48479989Sjlemon
48584138Sjlemon	calloutp = (struct callout *)kn->kn_hook;
486127982Scperciva	callout_drain(calloutp);
48784138Sjlemon	FREE(calloutp, M_KQUEUE);
488133741Sjmg	atomic_add_int(&kq_ncallouts, -1);
489136500Sjmg	kn->kn_status |= KN_DETACHED;	/* knlist_remove usually clears it */
49079989Sjlemon}
49179989Sjlemon
492133741Sjmg/* XXX - move to kern_timeout.c? */
49379989Sjlemonstatic int
49479989Sjlemonfilt_timer(struct knote *kn, long hint)
49579989Sjlemon{
49679989Sjlemon
49779989Sjlemon	return (kn->kn_data != 0);
49879989Sjlemon}
49979989Sjlemon
50082710Sdillon/*
50182710Sdillon * MPSAFE
50282710Sdillon */
50361468Sjlemonint
50483366Sjuliankqueue(struct thread *td, struct kqueue_args *uap)
50559290Sjlemon{
50682710Sdillon	struct filedesc *fdp;
50759290Sjlemon	struct kqueue *kq;
50861468Sjlemon	struct file *fp;
50961468Sjlemon	int fd, error;
51059290Sjlemon
51183366Sjulian	fdp = td->td_proc->p_fd;
51283366Sjulian	error = falloc(td, &fp, &fd);
51361468Sjlemon	if (error)
51482710Sdillon		goto done2;
515133741Sjmg
516121256Sdwmalone	/* An extra reference on `nfp' has been held for us by falloc(). */
517133741Sjmg	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
518133741Sjmg	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
51989306Salfred	TAILQ_INIT(&kq->kq_head);
520133741Sjmg	kq->kq_fdp = fdp;
521147730Sssouhlal	knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL);
522133741Sjmg	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
523133741Sjmg
524137647Sphk	FILEDESC_LOCK_FAST(fdp);
525133741Sjmg	SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
526137647Sphk	FILEDESC_UNLOCK_FAST(fdp);
527133741Sjmg
52889306Salfred	FILE_LOCK(fp);
52961468Sjlemon	fp->f_flag = FREAD | FWRITE;
53061468Sjlemon	fp->f_type = DTYPE_KQUEUE;
53161468Sjlemon	fp->f_ops = &kqueueops;
532109153Sdillon	fp->f_data = kq;
53389306Salfred	FILE_UNLOCK(fp);
534121256Sdwmalone	fdrop(fp, td);
535133741Sjmg
53683366Sjulian	td->td_retval[0] = fd;
53782710Sdillondone2:
53861468Sjlemon	return (error);
53959290Sjlemon}
54059290Sjlemon
54159290Sjlemon#ifndef _SYS_SYSPROTO_H_
54259290Sjlemonstruct kevent_args {
54359290Sjlemon	int	fd;
54463977Speter	const struct kevent *changelist;
54559290Sjlemon	int	nchanges;
54663452Sjlemon	struct	kevent *eventlist;
54759290Sjlemon	int	nevents;
54863977Speter	const struct timespec *timeout;
54959290Sjlemon};
55059290Sjlemon#endif
55182710Sdillon/*
55282710Sdillon * MPSAFE
55382710Sdillon */
55459290Sjlemonint
55583366Sjuliankevent(struct thread *td, struct kevent_args *uap)
55659290Sjlemon{
557142934Sps	struct timespec ts, *tsp;
558146950Sps	struct kevent_copyops k_ops = { uap,
559146950Sps					kevent_copyout,
560146950Sps					kevent_copyin};
561142934Sps	int error;
562142934Sps
563142934Sps	if (uap->timeout != NULL) {
564142934Sps		error = copyin(uap->timeout, &ts, sizeof(ts));
565142934Sps		if (error)
566142934Sps			return (error);
567142934Sps		tsp = &ts;
568142934Sps	} else
569142934Sps		tsp = NULL;
570142934Sps
571146950Sps	return (kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
572146950Sps	    &k_ops, tsp));
573142934Sps}
574142934Sps
575142934Sps/*
576146950Sps * Copy 'count' items into the destination list pointed to by uap->eventlist.
577142934Sps */
578142934Spsstatic int
579146950Spskevent_copyout(void *arg, struct kevent *kevp, int count)
580142934Sps{
581146950Sps	struct kevent_args *uap;
582142934Sps	int error;
583142934Sps
584146950Sps	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
585146950Sps	uap = (struct kevent_args *)arg;
586146950Sps
587146950Sps	error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
588146950Sps	if (error == 0)
589146950Sps		uap->eventlist += count;
590142934Sps	return (error);
591142934Sps}
592142934Sps
593146950Sps/*
594146950Sps * Copy 'count' items from the list pointed to by uap->changelist.
595146950Sps */
596146950Spsstatic int
597146950Spskevent_copyin(void *arg, struct kevent *kevp, int count)
598146950Sps{
599146950Sps	struct kevent_args *uap;
600146950Sps	int error;
601146950Sps
602146950Sps	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
603146950Sps	uap = (struct kevent_args *)arg;
604146950Sps
605146950Sps	error = copyin(uap->changelist, kevp, count * sizeof *kevp);
606146950Sps	if (error == 0)
607146950Sps		uap->changelist += count;
608146950Sps	return (error);
609146950Sps}
610146950Sps
611142934Spsint
612146950Spskern_kevent(struct thread *td, int fd, int nchanges, int nevents,
613146950Sps    struct kevent_copyops *k_ops, const struct timespec *timeout)
614142934Sps{
615133741Sjmg	struct kevent keva[KQ_NEVENTS];
616142934Sps	struct kevent *kevp, *changes;
61759290Sjlemon	struct kqueue *kq;
61886341Sdillon	struct file *fp;
61959290Sjlemon	int i, n, nerrors, error;
62059290Sjlemon
621142934Sps	if ((error = fget(td, fd, &fp)) != 0)
62289319Salfred		return (error);
623133741Sjmg	if ((error = kqueue_aquire(fp, &kq)) != 0)
624133741Sjmg		goto done_norel;
625133741Sjmg
62659290Sjlemon	nerrors = 0;
62759290Sjlemon
628142934Sps	while (nchanges > 0) {
629146950Sps		n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
630146950Sps		error = k_ops->k_copyin(k_ops->arg, keva, n);
631146950Sps		if (error)
632146950Sps			goto done;
633146950Sps		changes = keva;
63459290Sjlemon		for (i = 0; i < n; i++) {
635142934Sps			kevp = &changes[i];
63663452Sjlemon			kevp->flags &= ~EV_SYSFLAGS;
637133741Sjmg			error = kqueue_register(kq, kevp, td, 1);
63859290Sjlemon			if (error) {
639142934Sps				if (nevents != 0) {
64063452Sjlemon					kevp->flags = EV_ERROR;
64163452Sjlemon					kevp->data = error;
642146950Sps					(void) k_ops->k_copyout(k_ops->arg,
643146950Sps					    kevp, 1);
644142934Sps					nevents--;
64559290Sjlemon					nerrors++;
64659290Sjlemon				} else {
64768883Sdillon					goto done;
64859290Sjlemon				}
64959290Sjlemon			}
65059290Sjlemon		}
651142934Sps		nchanges -= n;
65259290Sjlemon	}
65359290Sjlemon	if (nerrors) {
654133741Sjmg		td->td_retval[0] = nerrors;
65568883Sdillon		error = 0;
65668883Sdillon		goto done;
65759290Sjlemon	}
65859290Sjlemon
659146950Sps	error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td);
66068883Sdillondone:
661133741Sjmg	kqueue_release(kq, 0);
662133741Sjmgdone_norel:
66368883Sdillon	if (fp != NULL)
66483366Sjulian		fdrop(fp, td);
66559290Sjlemon	return (error);
66659290Sjlemon}
66759290Sjlemon
66859290Sjlemonint
66988633Salfredkqueue_add_filteropts(int filt, struct filterops *filtops)
67088633Salfred{
671133741Sjmg	int error;
67288633Salfred
673133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) {
674133741Sjmg		printf(
675133741Sjmg"trying to add a filterop that is out of range: %d is beyond %d\n",
676133741Sjmg		    ~filt, EVFILT_SYSCOUNT);
677133741Sjmg		return EINVAL;
678133741Sjmg	}
679133741Sjmg	mtx_lock(&filterops_lock);
680133741Sjmg	if (sysfilt_ops[~filt].for_fop != &null_filtops &&
681133741Sjmg	    sysfilt_ops[~filt].for_fop != NULL)
682133741Sjmg		error = EEXIST;
683133741Sjmg	else {
684133741Sjmg		sysfilt_ops[~filt].for_fop = filtops;
685133741Sjmg		sysfilt_ops[~filt].for_refcnt = 0;
686133741Sjmg	}
687133741Sjmg	mtx_unlock(&filterops_lock);
688133741Sjmg
68988633Salfred	return (0);
69088633Salfred}
69188633Salfred
69288633Salfredint
69388633Salfredkqueue_del_filteropts(int filt)
69488633Salfred{
695133741Sjmg	int error;
69688633Salfred
697133741Sjmg	error = 0;
698133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
699133741Sjmg		return EINVAL;
700133741Sjmg
701133741Sjmg	mtx_lock(&filterops_lock);
702133741Sjmg	if (sysfilt_ops[~filt].for_fop == &null_filtops ||
703133741Sjmg	    sysfilt_ops[~filt].for_fop == NULL)
704133741Sjmg		error = EINVAL;
705133741Sjmg	else if (sysfilt_ops[~filt].for_refcnt != 0)
706133741Sjmg		error = EBUSY;
707133741Sjmg	else {
708133741Sjmg		sysfilt_ops[~filt].for_fop = &null_filtops;
709133741Sjmg		sysfilt_ops[~filt].for_refcnt = 0;
710133741Sjmg	}
711133741Sjmg	mtx_unlock(&filterops_lock);
712133741Sjmg
713133741Sjmg	return error;
71488633Salfred}
71588633Salfred
716133741Sjmgstatic struct filterops *
717133741Sjmgkqueue_fo_find(int filt)
718133741Sjmg{
719133741Sjmg
720133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
721133741Sjmg		return NULL;
722133741Sjmg
723133741Sjmg	mtx_lock(&filterops_lock);
724133741Sjmg	sysfilt_ops[~filt].for_refcnt++;
725133741Sjmg	if (sysfilt_ops[~filt].for_fop == NULL)
726133741Sjmg		sysfilt_ops[~filt].for_fop = &null_filtops;
727133741Sjmg	mtx_unlock(&filterops_lock);
728133741Sjmg
729133741Sjmg	return sysfilt_ops[~filt].for_fop;
730133741Sjmg}
731133741Sjmg
732133741Sjmgstatic void
733133741Sjmgkqueue_fo_release(int filt)
734133741Sjmg{
735133741Sjmg
736133741Sjmg	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
737133741Sjmg		return;
738133741Sjmg
739133741Sjmg	mtx_lock(&filterops_lock);
740133741Sjmg	KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
741133741Sjmg	    ("filter object refcount not valid on release"));
742133741Sjmg	sysfilt_ops[~filt].for_refcnt--;
743133741Sjmg	mtx_unlock(&filterops_lock);
744133741Sjmg}
745133741Sjmg
746133741Sjmg/*
747133741Sjmg * A ref to kq (obtained via kqueue_aquire) should be held.  waitok will
748133741Sjmg * influence if memory allocation should wait.  Make sure it is 0 if you
749133741Sjmg * hold any mutexes.
750133741Sjmg */
75188633Salfredint
752133741Sjmgkqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok)
75359290Sjlemon{
754133741Sjmg	struct filedesc *fdp;
75559290Sjlemon	struct filterops *fops;
756133741Sjmg	struct file *fp;
757133741Sjmg	struct knote *kn, *tkn;
758133741Sjmg	int error, filt, event;
759133741Sjmg	int haskqglobal;
760133741Sjmg	int fd;
76159290Sjlemon
762133741Sjmg	fdp = NULL;
763133741Sjmg	fp = NULL;
764133741Sjmg	kn = NULL;
765133741Sjmg	error = 0;
766133741Sjmg	haskqglobal = 0;
76759290Sjlemon
768133741Sjmg	filt = kev->filter;
769133741Sjmg	fops = kqueue_fo_find(filt);
770133741Sjmg	if (fops == NULL)
771133741Sjmg		return EINVAL;
772133741Sjmg
773133741Sjmg	tkn = knote_alloc(waitok);		/* prevent waiting with locks */
774133741Sjmg
775133741Sjmgfindkn:
77659290Sjlemon	if (fops->f_isfd) {
777133741Sjmg		KASSERT(td != NULL, ("td is NULL"));
778133741Sjmg		fdp = td->td_proc->p_fd;
779133741Sjmg		FILEDESC_LOCK(fdp);
78064343Sjlemon		/* validate descriptor */
781133741Sjmg		fd = kev->ident;
782133741Sjmg		if (fd < 0 || fd >= fdp->fd_nfiles ||
783133741Sjmg		    (fp = fdp->fd_ofiles[fd]) == NULL) {
78489306Salfred			FILEDESC_UNLOCK(fdp);
785133741Sjmg			error = EBADF;
786133741Sjmg			goto done;
78789306Salfred		}
78868883Sdillon		fhold(fp);
78959290Sjlemon
790133741Sjmg		if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
791133741Sjmg		    kev->ident, 0) != 0) {
792133741Sjmg			/* unlock and try again */
793133741Sjmg			FILEDESC_UNLOCK(fdp);
794133741Sjmg			fdrop(fp, td);
795133741Sjmg			fp = NULL;
796133741Sjmg			error = kqueue_expand(kq, fops, kev->ident, waitok);
797133741Sjmg			if (error)
798133741Sjmg				goto done;
799133741Sjmg			goto findkn;
800133741Sjmg		}
801133741Sjmg
802133741Sjmg		if (fp->f_type == DTYPE_KQUEUE) {
803133741Sjmg			/*
804133741Sjmg			 * if we add some inteligence about what we are doing,
805133741Sjmg			 * we should be able to support events on ourselves.
806133741Sjmg			 * We need to know when we are doing this to prevent
807133741Sjmg			 * getting both the knlist lock and the kq lock since
808133741Sjmg			 * they are the same thing.
809133741Sjmg			 */
810133741Sjmg			if (fp->f_data == kq) {
811133741Sjmg				FILEDESC_UNLOCK(fdp);
812133741Sjmg				error = EINVAL;
813133741Sjmg				goto done_noglobal;
814133741Sjmg			}
815133741Sjmg
816133741Sjmg			KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
817133741Sjmg		}
818133741Sjmg
819137772Sphk		FILEDESC_UNLOCK(fdp);
820133741Sjmg		KQ_LOCK(kq);
821133741Sjmg		if (kev->ident < kq->kq_knlistsize) {
822133741Sjmg			SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
823133741Sjmg				if (kev->filter == kn->kn_filter)
82459290Sjlemon					break;
82559290Sjlemon		}
82659290Sjlemon	} else {
827133741Sjmg		if ((kev->flags & EV_ADD) == EV_ADD)
828133741Sjmg			kqueue_expand(kq, fops, kev->ident, waitok);
829133741Sjmg
830133741Sjmg		KQ_LOCK(kq);
831133741Sjmg		if (kq->kq_knhashmask != 0) {
83259290Sjlemon			struct klist *list;
833133635Sjmg
834133741Sjmg			list = &kq->kq_knhash[
835133741Sjmg			    KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
83659290Sjlemon			SLIST_FOREACH(kn, list, kn_link)
83759290Sjlemon				if (kev->ident == kn->kn_id &&
83859290Sjlemon				    kev->filter == kn->kn_filter)
83959290Sjlemon					break;
84059290Sjlemon		}
84159290Sjlemon	}
84259290Sjlemon
843133741Sjmg	/* knote is in the process of changing, wait for it to stablize. */
844133741Sjmg	if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
845133741Sjmg		if (fp != NULL) {
846133741Sjmg			fdrop(fp, td);
847133741Sjmg			fp = NULL;
848133741Sjmg		}
849133741Sjmg		KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
850133741Sjmg		kq->kq_state |= KQ_FLUXWAIT;
851133741Sjmg		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0);
852133741Sjmg		goto findkn;
853133741Sjmg	}
854133741Sjmg
85568883Sdillon	if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
856133741Sjmg		KQ_UNLOCK(kq);
85768883Sdillon		error = ENOENT;
85868883Sdillon		goto done;
85968883Sdillon	}
86059290Sjlemon
86159290Sjlemon	/*
86259290Sjlemon	 * kn now contains the matching knote, or NULL if no match
86359290Sjlemon	 */
86459290Sjlemon	if (kev->flags & EV_ADD) {
86559290Sjlemon		if (kn == NULL) {
866133741Sjmg			kn = tkn;
867133741Sjmg			tkn = NULL;
86868883Sdillon			if (kn == NULL) {
86968883Sdillon				error = ENOMEM;
87068883Sdillon				goto done;
87168883Sdillon			}
87259290Sjlemon			kn->kn_fp = fp;
87359290Sjlemon			kn->kn_kq = kq;
87459290Sjlemon			kn->kn_fop = fops;
87568883Sdillon			/*
876133741Sjmg			 * apply reference counts to knote structure, and
87768883Sdillon			 * do not release it at the end of this routine.
87868883Sdillon			 */
879133741Sjmg			fops = NULL;
88068883Sdillon			fp = NULL;
88168883Sdillon
88261962Sjlemon			kn->kn_sfflags = kev->fflags;
88361962Sjlemon			kn->kn_sdata = kev->data;
88461962Sjlemon			kev->fflags = 0;
88561962Sjlemon			kev->data = 0;
88661962Sjlemon			kn->kn_kevent = *kev;
887133741Sjmg			kn->kn_status = KN_INFLUX|KN_DETACHED;
88861962Sjlemon
889133741Sjmg			error = knote_attach(kn, kq);
890133741Sjmg			KQ_UNLOCK(kq);
891133741Sjmg			if (error != 0) {
892133741Sjmg				tkn = kn;
893133741Sjmg				goto done;
894133741Sjmg			}
895133741Sjmg
896133741Sjmg			if ((error = kn->kn_fop->f_attach(kn)) != 0) {
89783366Sjulian				knote_drop(kn, td);
89859290Sjlemon				goto done;
89959290Sjlemon			}
900133741Sjmg			KN_LIST_LOCK(kn);
90161962Sjlemon		} else {
90261962Sjlemon			/*
90361962Sjlemon			 * The user may change some filter values after the
904133590Srwatson			 * initial EV_ADD, but doing so will not reset any
905106171Srwatson			 * filter which has already been triggered.
90661962Sjlemon			 */
907133741Sjmg			kn->kn_status |= KN_INFLUX;
908133741Sjmg			KQ_UNLOCK(kq);
909133741Sjmg			KN_LIST_LOCK(kn);
91061962Sjlemon			kn->kn_sfflags = kev->fflags;
91161962Sjlemon			kn->kn_sdata = kev->data;
91261962Sjlemon			kn->kn_kevent.udata = kev->udata;
91359290Sjlemon		}
91461962Sjlemon
915133741Sjmg		/*
916133741Sjmg		 * We can get here with kn->kn_knlist == NULL.
917133741Sjmg		 * This can happen when the initial attach event decides that
918133741Sjmg		 * the event is "completed" already.  i.e. filt_procattach
919133741Sjmg		 * is called on a zombie process.  It will call filt_proc
920133741Sjmg		 * which will remove it from the list, and NULL kn_knlist.
921133741Sjmg		 */
922133741Sjmg		event = kn->kn_fop->f_event(kn, 0);
923133741Sjmg		KN_LIST_UNLOCK(kn);
924133741Sjmg		KQ_LOCK(kq);
925133741Sjmg		if (event)
926133741Sjmg			KNOTE_ACTIVATE(kn, 1);
927133741Sjmg		kn->kn_status &= ~KN_INFLUX;
92859290Sjlemon	} else if (kev->flags & EV_DELETE) {
929133741Sjmg		kn->kn_status |= KN_INFLUX;
930133741Sjmg		KQ_UNLOCK(kq);
931134859Sjmg		if (!(kn->kn_status & KN_DETACHED))
932134859Sjmg			kn->kn_fop->f_detach(kn);
93383366Sjulian		knote_drop(kn, td);
93459290Sjlemon		goto done;
93559290Sjlemon	}
93659290Sjlemon
93759290Sjlemon	if ((kev->flags & EV_DISABLE) &&
93859290Sjlemon	    ((kn->kn_status & KN_DISABLED) == 0)) {
93959290Sjlemon		kn->kn_status |= KN_DISABLED;
94059290Sjlemon	}
94159290Sjlemon
94259290Sjlemon	if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
94359290Sjlemon		kn->kn_status &= ~KN_DISABLED;
94459290Sjlemon		if ((kn->kn_status & KN_ACTIVE) &&
94559290Sjlemon		    ((kn->kn_status & KN_QUEUED) == 0))
94659290Sjlemon			knote_enqueue(kn);
94759290Sjlemon	}
948133741Sjmg	KQ_UNLOCK_FLUX(kq);
94959290Sjlemon
95059290Sjlemondone:
951133741Sjmg	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
952133741Sjmgdone_noglobal:
95368883Sdillon	if (fp != NULL)
95483366Sjulian		fdrop(fp, td);
955133741Sjmg	if (tkn != NULL)
956133741Sjmg		knote_free(tkn);
957133741Sjmg	if (fops != NULL)
958133741Sjmg		kqueue_fo_release(filt);
95959290Sjlemon	return (error);
96059290Sjlemon}
96159290Sjlemon
96259290Sjlemonstatic int
963133741Sjmgkqueue_aquire(struct file *fp, struct kqueue **kqp)
96459290Sjlemon{
965133741Sjmg	int error;
96689306Salfred	struct kqueue *kq;
967133741Sjmg
968133741Sjmg	error = 0;
969133741Sjmg
970133741Sjmg	FILE_LOCK(fp);
971133741Sjmg	do {
972133741Sjmg		kq = fp->f_data;
973133741Sjmg		if (fp->f_type != DTYPE_KQUEUE || kq == NULL) {
974133741Sjmg			error = EBADF;
975133741Sjmg			break;
976133741Sjmg		}
977133741Sjmg		*kqp = kq;
978133741Sjmg		KQ_LOCK(kq);
979133741Sjmg		if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
980133741Sjmg			KQ_UNLOCK(kq);
981133741Sjmg			error = EBADF;
982133741Sjmg			break;
983133741Sjmg		}
984133741Sjmg		kq->kq_refcnt++;
985133741Sjmg		KQ_UNLOCK(kq);
986133741Sjmg	} while (0);
987133741Sjmg	FILE_UNLOCK(fp);
988133741Sjmg
989133741Sjmg	return error;
990133741Sjmg}
991133741Sjmg
992133741Sjmgstatic void
993133741Sjmgkqueue_release(struct kqueue *kq, int locked)
994133741Sjmg{
995133741Sjmg	if (locked)
996133741Sjmg		KQ_OWNED(kq);
997133741Sjmg	else
998133741Sjmg		KQ_LOCK(kq);
999133741Sjmg	kq->kq_refcnt--;
1000133741Sjmg	if (kq->kq_refcnt == 1)
1001133741Sjmg		wakeup(&kq->kq_refcnt);
1002133741Sjmg	if (!locked)
1003133741Sjmg		KQ_UNLOCK(kq);
1004133741Sjmg}
1005133741Sjmg
1006133741Sjmgstatic void
1007133741Sjmgkqueue_schedtask(struct kqueue *kq)
1008133741Sjmg{
1009133741Sjmg
1010133741Sjmg	KQ_OWNED(kq);
1011133741Sjmg	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
1012133741Sjmg	    ("scheduling kqueue task while draining"));
1013133741Sjmg
1014133741Sjmg	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
1015133741Sjmg		taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task);
1016133741Sjmg		kq->kq_state |= KQ_TASKSCHED;
1017133741Sjmg	}
1018133741Sjmg}
1019133741Sjmg
1020133741Sjmg/*
1021133741Sjmg * Expand the kq to make sure we have storage for fops/ident pair.
1022133741Sjmg *
1023133741Sjmg * Return 0 on success (or no work necessary), return errno on failure.
1024133741Sjmg *
1025133741Sjmg * Not calling hashinit w/ waitok (proper malloc flag) should be safe.
1026133741Sjmg * If kqueue_register is called from a non-fd context, there usually/should
1027133741Sjmg * be no locks held.
1028133741Sjmg */
1029133741Sjmgstatic int
1030133741Sjmgkqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
1031133741Sjmg	int waitok)
1032133741Sjmg{
1033133741Sjmg	struct klist *list, *tmp_knhash;
1034133741Sjmg	u_long tmp_knhashmask;
1035133741Sjmg	int size;
1036133741Sjmg	int fd;
1037133741Sjmg	int mflag = waitok ? M_WAITOK : M_NOWAIT;
1038133741Sjmg
1039133741Sjmg	KQ_NOTOWNED(kq);
1040133741Sjmg
1041133741Sjmg	if (fops->f_isfd) {
1042133741Sjmg		fd = ident;
1043133741Sjmg		if (kq->kq_knlistsize <= fd) {
1044133741Sjmg			size = kq->kq_knlistsize;
1045133741Sjmg			while (size <= fd)
1046133741Sjmg				size += KQEXTENT;
1047133741Sjmg			MALLOC(list, struct klist *,
1048133741Sjmg			    size * sizeof list, M_KQUEUE, mflag);
1049133741Sjmg			if (list == NULL)
1050133741Sjmg				return ENOMEM;
1051133741Sjmg			KQ_LOCK(kq);
1052133741Sjmg			if (kq->kq_knlistsize > fd) {
1053133741Sjmg				FREE(list, M_KQUEUE);
1054133741Sjmg				list = NULL;
1055133741Sjmg			} else {
1056133741Sjmg				if (kq->kq_knlist != NULL) {
1057133741Sjmg					bcopy(kq->kq_knlist, list,
1058133741Sjmg					    kq->kq_knlistsize * sizeof list);
1059133741Sjmg					FREE(kq->kq_knlist, M_KQUEUE);
1060133741Sjmg					kq->kq_knlist = NULL;
1061133741Sjmg				}
1062133741Sjmg				bzero((caddr_t)list +
1063133741Sjmg				    kq->kq_knlistsize * sizeof list,
1064133741Sjmg				    (size - kq->kq_knlistsize) * sizeof list);
1065133741Sjmg				kq->kq_knlistsize = size;
1066133741Sjmg				kq->kq_knlist = list;
1067133741Sjmg			}
1068133741Sjmg			KQ_UNLOCK(kq);
1069133741Sjmg		}
1070133741Sjmg	} else {
1071133741Sjmg		if (kq->kq_knhashmask == 0) {
1072133741Sjmg			tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1073133741Sjmg			    &tmp_knhashmask);
1074133741Sjmg			if (tmp_knhash == NULL)
1075133741Sjmg				return ENOMEM;
1076133741Sjmg			KQ_LOCK(kq);
1077133741Sjmg			if (kq->kq_knhashmask == 0) {
1078133741Sjmg				kq->kq_knhash = tmp_knhash;
1079133741Sjmg				kq->kq_knhashmask = tmp_knhashmask;
1080133741Sjmg			} else {
1081133741Sjmg				free(tmp_knhash, M_KQUEUE);
1082133741Sjmg			}
1083133741Sjmg			KQ_UNLOCK(kq);
1084133741Sjmg		}
1085133741Sjmg	}
1086133741Sjmg
1087133741Sjmg	KQ_NOTOWNED(kq);
1088133741Sjmg	return 0;
1089133741Sjmg}
1090133741Sjmg
1091133741Sjmgstatic void
1092133741Sjmgkqueue_task(void *arg, int pending)
1093133741Sjmg{
1094133741Sjmg	struct kqueue *kq;
1095133741Sjmg	int haskqglobal;
1096133741Sjmg
1097133741Sjmg	haskqglobal = 0;
1098133741Sjmg	kq = arg;
1099133741Sjmg
1100133741Sjmg	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1101133741Sjmg	KQ_LOCK(kq);
1102133741Sjmg
1103133741Sjmg	KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
1104133741Sjmg
1105133741Sjmg	kq->kq_state &= ~KQ_TASKSCHED;
1106133741Sjmg	if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
1107133741Sjmg		wakeup(&kq->kq_state);
1108133741Sjmg	}
1109133741Sjmg	KQ_UNLOCK(kq);
1110133741Sjmg	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1111133741Sjmg}
1112133741Sjmg
1113133741Sjmg/*
1114133741Sjmg * Scan, update kn_data (if not ONESHOT), and copyout triggered events.
1115133741Sjmg * We treat KN_MARKER knotes as if they are INFLUX.
1116133741Sjmg */
1117133741Sjmgstatic int
1118146950Spskqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
1119146950Sps    const struct timespec *tsp, struct kevent *keva, struct thread *td)
1120133741Sjmg{
112159290Sjlemon	struct kevent *kevp;
112259290Sjlemon	struct timeval atv, rtv, ttv;
1123133794Sgreen	struct knote *kn, *marker;
1124133741Sjmg	int count, timeout, nkev, error;
1125133741Sjmg	int haskqglobal;
112659290Sjlemon
112759290Sjlemon	count = maxevents;
1128133741Sjmg	nkev = 0;
1129133741Sjmg	error = 0;
1130133741Sjmg	haskqglobal = 0;
113159290Sjlemon
1132133741Sjmg	if (maxevents == 0)
1133133741Sjmg		goto done_nl;
1134133741Sjmg
113564343Sjlemon	if (tsp != NULL) {
113659290Sjlemon		TIMESPEC_TO_TIMEVAL(&atv, tsp);
113764343Sjlemon		if (itimerfix(&atv)) {
113859290Sjlemon			error = EINVAL;
1139133741Sjmg			goto done_nl;
114059290Sjlemon		}
114164343Sjlemon		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
114264343Sjlemon			timeout = -1;
1143133590Srwatson		else
114464343Sjlemon			timeout = atv.tv_sec > 24 * 60 * 60 ?
114564343Sjlemon			    24 * 60 * 60 * hz : tvtohz(&atv);
114664343Sjlemon		getmicrouptime(&rtv);
114764343Sjlemon		timevaladd(&atv, &rtv);
114864343Sjlemon	} else {
114964343Sjlemon		atv.tv_sec = 0;
115064343Sjlemon		atv.tv_usec = 0;
115159290Sjlemon		timeout = 0;
115259290Sjlemon	}
1153133794Sgreen	marker = knote_alloc(1);
1154133794Sgreen	if (marker == NULL) {
1155133794Sgreen		error = ENOMEM;
1156133794Sgreen		goto done_nl;
1157133794Sgreen	}
1158133794Sgreen	marker->kn_status = KN_MARKER;
1159133741Sjmg	KQ_LOCK(kq);
116059290Sjlemon	goto start;
116159290Sjlemon
116259290Sjlemonretry:
116364343Sjlemon	if (atv.tv_sec || atv.tv_usec) {
116459290Sjlemon		getmicrouptime(&rtv);
116559290Sjlemon		if (timevalcmp(&rtv, &atv, >=))
116659290Sjlemon			goto done;
116759290Sjlemon		ttv = atv;
116859290Sjlemon		timevalsub(&ttv, &rtv);
116959290Sjlemon		timeout = ttv.tv_sec > 24 * 60 * 60 ?
117059290Sjlemon			24 * 60 * 60 * hz : tvtohz(&ttv);
117159290Sjlemon	}
117259290Sjlemon
117359290Sjlemonstart:
1174133741Sjmg	kevp = keva;
117559290Sjlemon	if (kq->kq_count == 0) {
1176133590Srwatson		if (timeout < 0) {
117764343Sjlemon			error = EWOULDBLOCK;
117864343Sjlemon		} else {
1179135240Sjmg			KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
118064343Sjlemon			kq->kq_state |= KQ_SLEEP;
1181133741Sjmg			error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH,
1182133741Sjmg			    "kqread", timeout);
118364343Sjlemon		}
118464084Sjlemon		if (error == 0)
118559290Sjlemon			goto retry;
118664084Sjlemon		/* don't restart after signals... */
118764084Sjlemon		if (error == ERESTART)
118864084Sjlemon			error = EINTR;
118964084Sjlemon		else if (error == EWOULDBLOCK)
119059290Sjlemon			error = 0;
119159290Sjlemon		goto done;
119259290Sjlemon	}
119359290Sjlemon
1194133794Sgreen	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
119559290Sjlemon	while (count) {
1196133741Sjmg		KQ_OWNED(kq);
119759290Sjlemon		kn = TAILQ_FIRST(&kq->kq_head);
1198133741Sjmg
1199133794Sgreen		if ((kn->kn_status == KN_MARKER && kn != marker) ||
1200133741Sjmg		    (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
1201135240Sjmg			KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1202133741Sjmg			kq->kq_state |= KQ_FLUXWAIT;
1203133741Sjmg			error = msleep(kq, &kq->kq_lock, PSOCK,
1204133741Sjmg			    "kqflxwt", 0);
1205133741Sjmg			continue;
1206133741Sjmg		}
1207133741Sjmg
1208133590Srwatson		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1209133741Sjmg		if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
1210133741Sjmg			kn->kn_status &= ~KN_QUEUED;
1211133741Sjmg			kq->kq_count--;
1212133741Sjmg			continue;
1213133741Sjmg		}
1214133794Sgreen		if (kn == marker) {
1215133741Sjmg			KQ_FLUX_WAKEUP(kq);
121659290Sjlemon			if (count == maxevents)
121759290Sjlemon				goto retry;
121859290Sjlemon			goto done;
121959290Sjlemon		}
1220133741Sjmg		KASSERT((kn->kn_status & KN_INFLUX) == 0,
1221133741Sjmg		    ("KN_INFLUX set when not suppose to be"));
1222133741Sjmg
1223133741Sjmg		if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
122459290Sjlemon			kn->kn_status &= ~KN_QUEUED;
1225133741Sjmg			kn->kn_status |= KN_INFLUX;
122659290Sjlemon			kq->kq_count--;
1227133741Sjmg			KQ_UNLOCK(kq);
1228133741Sjmg			/*
1229133741Sjmg			 * We don't need to lock the list since we've marked
1230133741Sjmg			 * it _INFLUX.
1231133741Sjmg			 */
1232133741Sjmg			*kevp = kn->kn_kevent;
1233134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
1234134859Sjmg				kn->kn_fop->f_detach(kn);
123583366Sjulian			knote_drop(kn, td);
1236133741Sjmg			KQ_LOCK(kq);
1237133741Sjmg			kn = NULL;
123859290Sjlemon		} else {
1239133741Sjmg			kn->kn_status |= KN_INFLUX;
1240133741Sjmg			KQ_UNLOCK(kq);
1241133741Sjmg			if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
1242133741Sjmg				KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
1243133741Sjmg			KN_LIST_LOCK(kn);
1244133741Sjmg			if (kn->kn_fop->f_event(kn, 0) == 0) {
1245133741Sjmg				KN_LIST_UNLOCK(kn);
1246133741Sjmg				KQ_LOCK(kq);
1247133741Sjmg				kn->kn_status &=
1248133741Sjmg				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX);
1249133741Sjmg				kq->kq_count--;
1250133741Sjmg				continue;
1251133741Sjmg			}
1252133741Sjmg			*kevp = kn->kn_kevent;
1253133741Sjmg			KQ_LOCK(kq);
1254133741Sjmg			if (kn->kn_flags & EV_CLEAR) {
1255133741Sjmg				kn->kn_data = 0;
1256133741Sjmg				kn->kn_fflags = 0;
1257133741Sjmg				kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
1258133741Sjmg				kq->kq_count--;
1259133741Sjmg			} else
1260133741Sjmg				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1261133741Sjmg			KN_LIST_UNLOCK(kn);
1262133741Sjmg			kn->kn_status &= ~(KN_INFLUX);
126359290Sjlemon		}
1264133741Sjmg
1265133741Sjmg		/* we are returning a copy to the user */
1266133741Sjmg		kevp++;
1267133741Sjmg		nkev++;
126859290Sjlemon		count--;
1269133741Sjmg
127059290Sjlemon		if (nkev == KQ_NEVENTS) {
1271133741Sjmg			KQ_UNLOCK_FLUX(kq);
1272146950Sps			error = k_ops->k_copyout(k_ops->arg, keva, nkev);
127359290Sjlemon			nkev = 0;
1274133741Sjmg			kevp = keva;
1275133741Sjmg			KQ_LOCK(kq);
127659997Sjlemon			if (error)
127759997Sjlemon				break;
127859290Sjlemon		}
127959290Sjlemon	}
1280133794Sgreen	TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
128159290Sjlemondone:
1282133741Sjmg	KQ_OWNED(kq);
1283133741Sjmg	KQ_UNLOCK_FLUX(kq);
1284133741Sjmg	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
1285133794Sgreen	knote_free(marker);
1286133741Sjmgdone_nl:
1287133741Sjmg	KQ_NOTOWNED(kq);
128859290Sjlemon	if (nkev != 0)
1289146950Sps		error = k_ops->k_copyout(k_ops->arg, keva, nkev);
1290133741Sjmg	td->td_retval[0] = maxevents - count;
129159290Sjlemon	return (error);
129259290Sjlemon}
129359290Sjlemon
129459290Sjlemon/*
129559290Sjlemon * XXX
129659290Sjlemon * This could be expanded to call kqueue_scan, if desired.
129759290Sjlemon */
129859290Sjlemon/*ARGSUSED*/
129959290Sjlemonstatic int
1300101941Srwatsonkqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
130183366Sjulian	int flags, struct thread *td)
130259290Sjlemon{
130359290Sjlemon	return (ENXIO);
130459290Sjlemon}
130559290Sjlemon
130659290Sjlemon/*ARGSUSED*/
130759290Sjlemonstatic int
1308101941Srwatsonkqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
130983366Sjulian	 int flags, struct thread *td)
131059290Sjlemon{
131159290Sjlemon	return (ENXIO);
131259290Sjlemon}
131359290Sjlemon
131459290Sjlemon/*ARGSUSED*/
131559290Sjlemonstatic int
1316132138Salfredkqueue_ioctl(struct file *fp, u_long cmd, void *data,
1317102003Srwatson	struct ucred *active_cred, struct thread *td)
131859290Sjlemon{
1319132174Salfred	/*
1320132174Salfred	 * Enabling sigio causes two major problems:
1321132174Salfred	 * 1) infinite recursion:
1322132174Salfred	 * Synopsys: kevent is being used to track signals and have FIOASYNC
1323132174Salfred	 * set.  On receipt of a signal this will cause a kqueue to recurse
1324132174Salfred	 * into itself over and over.  Sending the sigio causes the kqueue
1325132174Salfred	 * to become ready, which in turn posts sigio again, forever.
1326132174Salfred	 * Solution: this can be solved by setting a flag in the kqueue that
1327132174Salfred	 * we have a SIGIO in progress.
1328132174Salfred	 * 2) locking problems:
1329132174Salfred	 * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
1330132174Salfred	 * us above the proc and pgrp locks.
1331132174Salfred	 * Solution: Post a signal using an async mechanism, being sure to
1332132174Salfred	 * record a generation count in the delivery so that we do not deliver
1333132174Salfred	 * a signal to the wrong process.
1334132174Salfred	 *
1335132174Salfred	 * Note, these two mechanisms are somewhat mutually exclusive!
1336132174Salfred	 */
1337132174Salfred#if 0
1338132138Salfred	struct kqueue *kq;
1339132138Salfred
1340132138Salfred	kq = fp->f_data;
1341132138Salfred	switch (cmd) {
1342132138Salfred	case FIOASYNC:
1343132138Salfred		if (*(int *)data) {
1344132138Salfred			kq->kq_state |= KQ_ASYNC;
1345132138Salfred		} else {
1346132138Salfred			kq->kq_state &= ~KQ_ASYNC;
1347132138Salfred		}
1348132138Salfred		return (0);
1349132138Salfred
1350132138Salfred	case FIOSETOWN:
1351132138Salfred		return (fsetown(*(int *)data, &kq->kq_sigio));
1352132138Salfred
1353132138Salfred	case FIOGETOWN:
1354132138Salfred		*(int *)data = fgetown(&kq->kq_sigio);
1355132138Salfred		return (0);
1356132138Salfred	}
1357132174Salfred#endif
1358132138Salfred
135959290Sjlemon	return (ENOTTY);
136059290Sjlemon}
136159290Sjlemon
136259290Sjlemon/*ARGSUSED*/
136359290Sjlemonstatic int
1364101983Srwatsonkqueue_poll(struct file *fp, int events, struct ucred *active_cred,
1365101987Srwatson	struct thread *td)
136659290Sjlemon{
136789306Salfred	struct kqueue *kq;
136859290Sjlemon	int revents = 0;
1369133741Sjmg	int error;
137059290Sjlemon
1371133741Sjmg	if ((error = kqueue_aquire(fp, &kq)))
1372133741Sjmg		return POLLERR;
1373133741Sjmg
1374133741Sjmg	KQ_LOCK(kq);
1375133741Sjmg	if (events & (POLLIN | POLLRDNORM)) {
1376133741Sjmg		if (kq->kq_count) {
1377133741Sjmg			revents |= events & (POLLIN | POLLRDNORM);
137859290Sjlemon		} else {
1379133741Sjmg			selrecord(td, &kq->kq_sel);
138059290Sjlemon			kq->kq_state |= KQ_SEL;
138159290Sjlemon		}
138259290Sjlemon	}
1383133741Sjmg	kqueue_release(kq, 1);
1384133741Sjmg	KQ_UNLOCK(kq);
138559290Sjlemon	return (revents);
138659290Sjlemon}
138759290Sjlemon
138859290Sjlemon/*ARGSUSED*/
138959290Sjlemonstatic int
1390101983Srwatsonkqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
1391101987Srwatson	struct thread *td)
139259290Sjlemon{
139359290Sjlemon
1394146603Sjmg	bzero((void *)st, sizeof *st);
1395146603Sjmg	/*
1396146603Sjmg	 * We no longer return kq_count because the unlocked value is useless.
1397146603Sjmg	 * If you spent all this time getting the count, why not spend your
1398146603Sjmg	 * syscall better by calling kevent?
1399146603Sjmg	 *
1400146603Sjmg	 * XXX - This is needed for libc_r.
1401146603Sjmg	 */
1402146603Sjmg	st->st_mode = S_IFIFO;
1403146603Sjmg	return (0);
140459290Sjlemon}
140559290Sjlemon
140659290Sjlemon/*ARGSUSED*/
140759290Sjlemonstatic int
140883366Sjuliankqueue_close(struct file *fp, struct thread *td)
140959290Sjlemon{
1410109153Sdillon	struct kqueue *kq = fp->f_data;
1411133741Sjmg	struct filedesc *fdp;
1412133741Sjmg	struct knote *kn;
141359290Sjlemon	int i;
1414133741Sjmg	int error;
141559290Sjlemon
1416133741Sjmg	if ((error = kqueue_aquire(fp, &kq)))
1417133741Sjmg		return error;
1418133741Sjmg
1419133741Sjmg	KQ_LOCK(kq);
1420133741Sjmg
1421133741Sjmg	KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
1422133741Sjmg	    ("kqueue already closing"));
1423133741Sjmg	kq->kq_state |= KQ_CLOSING;
1424133741Sjmg	if (kq->kq_refcnt > 1)
1425133741Sjmg		msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
1426133741Sjmg
1427133741Sjmg	KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
1428133741Sjmg	fdp = kq->kq_fdp;
1429133741Sjmg
1430133741Sjmg	KASSERT(knlist_empty(&kq->kq_sel.si_note),
1431133741Sjmg	    ("kqueue's knlist not empty"));
1432133741Sjmg
1433133741Sjmg	for (i = 0; i < kq->kq_knlistsize; i++) {
1434133741Sjmg		while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
1435133741Sjmg			KASSERT((kn->kn_status & KN_INFLUX) == 0,
1436133741Sjmg			    ("KN_INFLUX set when not suppose to be"));
1437133741Sjmg			kn->kn_status |= KN_INFLUX;
1438133741Sjmg			KQ_UNLOCK(kq);
1439134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
1440134859Sjmg				kn->kn_fop->f_detach(kn);
1441133741Sjmg			knote_drop(kn, td);
1442133741Sjmg			KQ_LOCK(kq);
144359290Sjlemon		}
144459290Sjlemon	}
1445133741Sjmg	if (kq->kq_knhashmask != 0) {
1446133741Sjmg		for (i = 0; i <= kq->kq_knhashmask; i++) {
1447133741Sjmg			while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
1448133741Sjmg				KASSERT((kn->kn_status & KN_INFLUX) == 0,
1449133741Sjmg				    ("KN_INFLUX set when not suppose to be"));
1450133741Sjmg				kn->kn_status |= KN_INFLUX;
1451133741Sjmg				KQ_UNLOCK(kq);
1452134859Sjmg				if (!(kn->kn_status & KN_DETACHED))
1453134859Sjmg					kn->kn_fop->f_detach(kn);
1454133741Sjmg				knote_drop(kn, td);
1455133741Sjmg				KQ_LOCK(kq);
145659290Sjlemon			}
145759290Sjlemon		}
145859290Sjlemon	}
1459133741Sjmg
1460133741Sjmg	if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
1461133741Sjmg		kq->kq_state |= KQ_TASKDRAIN;
1462133741Sjmg		msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
1463133741Sjmg	}
1464133741Sjmg
1465133741Sjmg	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
1466126033Sgreen		kq->kq_state &= ~KQ_SEL;
1467126033Sgreen		selwakeuppri(&kq->kq_sel, PSOCK);
1468126033Sgreen	}
1469133741Sjmg
1470133741Sjmg	KQ_UNLOCK(kq);
1471133741Sjmg
1472137647Sphk	FILEDESC_LOCK_FAST(fdp);
1473133741Sjmg	SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
1474137647Sphk	FILEDESC_UNLOCK_FAST(fdp);
1475133741Sjmg
1476133741Sjmg	knlist_destroy(&kq->kq_sel.si_note);
1477133741Sjmg	mtx_destroy(&kq->kq_lock);
1478133741Sjmg	kq->kq_fdp = NULL;
1479133741Sjmg
1480133741Sjmg	if (kq->kq_knhash != NULL)
1481133741Sjmg		free(kq->kq_knhash, M_KQUEUE);
1482133741Sjmg	if (kq->kq_knlist != NULL)
1483133741Sjmg		free(kq->kq_knlist, M_KQUEUE);
1484133741Sjmg
1485132138Salfred	funsetown(&kq->kq_sigio);
148684138Sjlemon	free(kq, M_KQUEUE);
1487109153Sdillon	fp->f_data = NULL;
148859290Sjlemon
148959290Sjlemon	return (0);
149059290Sjlemon}
149159290Sjlemon
149259290Sjlemonstatic void
149359290Sjlemonkqueue_wakeup(struct kqueue *kq)
149459290Sjlemon{
1495133741Sjmg	KQ_OWNED(kq);
149659290Sjlemon
1497133741Sjmg	if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
149859290Sjlemon		kq->kq_state &= ~KQ_SLEEP;
149959290Sjlemon		wakeup(kq);
150059290Sjlemon	}
1501133741Sjmg	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
150259290Sjlemon		kq->kq_state &= ~KQ_SEL;
1503122352Stanimura		selwakeuppri(&kq->kq_sel, PSOCK);
150459290Sjlemon	}
1505133741Sjmg	if (!knlist_empty(&kq->kq_sel.si_note))
1506133741Sjmg		kqueue_schedtask(kq);
1507133741Sjmg	if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
1508132138Salfred		pgsigio(&kq->kq_sigio, SIGIO, 0);
1509132138Salfred	}
151059290Sjlemon}
151159290Sjlemon
151259290Sjlemon/*
1513133741Sjmg * Walk down a list of knotes, activating them if their event has triggered.
1514133741Sjmg *
1515133741Sjmg * There is a possibility to optimize in the case of one kq watching another.
1516133741Sjmg * Instead of scheduling a task to wake it up, you could pass enough state
1517133741Sjmg * down the chain to make up the parent kqueue.  Make this code functional
1518133741Sjmg * first.
151959290Sjlemon */
152059290Sjlemonvoid
1521133741Sjmgknote(struct knlist *list, long hint, int islocked)
152259290Sjlemon{
1523133741Sjmg	struct kqueue *kq;
152459290Sjlemon	struct knote *kn;
152559290Sjlemon
1526133741Sjmg	if (list == NULL)
1527133741Sjmg		return;
1528133741Sjmg
1529147730Sssouhlal	KNL_ASSERT_LOCK(list, islocked);
1530147730Sssouhlal
1531147730Sssouhlal	if (!islocked)
1532147730Sssouhlal		list->kl_lock(list->kl_lockarg);
1533147730Sssouhlal
1534133741Sjmg	/*
1535133741Sjmg	 * If we unlock the list lock (and set KN_INFLUX), we can eliminate
1536133741Sjmg	 * the kqueue scheduling, but this will introduce four
1537133741Sjmg	 * lock/unlock's for each knote to test.  If we do, continue to use
1538133741Sjmg	 * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is
1539133741Sjmg	 * only safe if you want to remove the current item, which we are
1540133741Sjmg	 * not doing.
1541133741Sjmg	 */
1542133741Sjmg	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
1543133741Sjmg		kq = kn->kn_kq;
1544133741Sjmg		if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
1545133741Sjmg			KQ_LOCK(kq);
1546133741Sjmg			if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
1547133741Sjmg				kn->kn_status |= KN_HASKQLOCK;
1548133741Sjmg				if (kn->kn_fop->f_event(kn, hint))
1549133741Sjmg					KNOTE_ACTIVATE(kn, 1);
1550133741Sjmg				kn->kn_status &= ~KN_HASKQLOCK;
1551133741Sjmg			}
1552133741Sjmg			KQ_UNLOCK(kq);
1553133741Sjmg		}
1554133741Sjmg		kq = NULL;
1555133741Sjmg	}
1556133741Sjmg	if (!islocked)
1557147730Sssouhlal		list->kl_unlock(list->kl_lockarg);
155859290Sjlemon}
155959290Sjlemon
156059290Sjlemon/*
1561133741Sjmg * add a knote to a knlist
1562133741Sjmg */
1563133741Sjmgvoid
1564133741Sjmgknlist_add(struct knlist *knl, struct knote *kn, int islocked)
1565133741Sjmg{
1566147730Sssouhlal	KNL_ASSERT_LOCK(knl, islocked);
1567133741Sjmg	KQ_NOTOWNED(kn->kn_kq);
1568133741Sjmg	KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) ==
1569133741Sjmg	    (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED"));
1570133741Sjmg	if (!islocked)
1571147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
1572133741Sjmg	SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
1573133741Sjmg	if (!islocked)
1574147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1575133741Sjmg	KQ_LOCK(kn->kn_kq);
1576133741Sjmg	kn->kn_knlist = knl;
1577133741Sjmg	kn->kn_status &= ~KN_DETACHED;
1578133741Sjmg	KQ_UNLOCK(kn->kn_kq);
1579133741Sjmg}
1580133741Sjmg
1581133741Sjmgstatic void
1582133741Sjmgknlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked)
1583133741Sjmg{
1584133741Sjmg	KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked"));
1585147730Sssouhlal	KNL_ASSERT_LOCK(knl, knlislocked);
1586133741Sjmg	mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
1587133741Sjmg	if (!kqislocked)
1588133741Sjmg		KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX,
1589133741Sjmg    ("knlist_remove called w/o knote being KN_INFLUX or already removed"));
1590133741Sjmg	if (!knlislocked)
1591147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
1592133741Sjmg	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
1593133741Sjmg	kn->kn_knlist = NULL;
1594133741Sjmg	if (!knlislocked)
1595147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1596133741Sjmg	if (!kqislocked)
1597133741Sjmg		KQ_LOCK(kn->kn_kq);
1598133741Sjmg	kn->kn_status |= KN_DETACHED;
1599133741Sjmg	if (!kqislocked)
1600133741Sjmg		KQ_UNLOCK(kn->kn_kq);
1601133741Sjmg}
1602133741Sjmg
1603133741Sjmg/*
160459290Sjlemon * remove all knotes from a specified klist
160559290Sjlemon */
160659290Sjlemonvoid
1607133741Sjmgknlist_remove(struct knlist *knl, struct knote *kn, int islocked)
160859290Sjlemon{
1609133741Sjmg
1610133741Sjmg	knlist_remove_kq(knl, kn, islocked, 0);
1611133741Sjmg}
1612133741Sjmg
1613133741Sjmg/*
1614133741Sjmg * remove knote from a specified klist while in f_event handler.
1615133741Sjmg */
1616133741Sjmgvoid
1617133741Sjmgknlist_remove_inevent(struct knlist *knl, struct knote *kn)
1618133741Sjmg{
1619133741Sjmg
1620133741Sjmg	knlist_remove_kq(knl, kn, 1,
1621133741Sjmg	    (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK);
1622133741Sjmg}
1623133741Sjmg
1624133741Sjmgint
1625133741Sjmgknlist_empty(struct knlist *knl)
1626133741Sjmg{
1627147730Sssouhlal	KNL_ASSERT_LOCKED(knl);
1628133741Sjmg	return SLIST_EMPTY(&knl->kl_list);
1629133741Sjmg}
1630133741Sjmg
1631133741Sjmgstatic struct mtx	knlist_lock;
1632133741SjmgMTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
1633133741Sjmg	MTX_DEF);
1634147730Sssouhlalstatic void knlist_mtx_lock(void *arg);
1635147730Sssouhlalstatic void knlist_mtx_unlock(void *arg);
1636147730Sssouhlalstatic int knlist_mtx_locked(void *arg);
1637133741Sjmg
1638147730Sssouhlalstatic void
1639147730Sssouhlalknlist_mtx_lock(void *arg)
1640147730Sssouhlal{
1641147730Sssouhlal	mtx_lock((struct mtx *)arg);
1642147730Sssouhlal}
1643147730Sssouhlal
1644147730Sssouhlalstatic void
1645147730Sssouhlalknlist_mtx_unlock(void *arg)
1646147730Sssouhlal{
1647147730Sssouhlal	mtx_unlock((struct mtx *)arg);
1648147730Sssouhlal}
1649147730Sssouhlal
1650147730Sssouhlalstatic int
1651147730Sssouhlalknlist_mtx_locked(void *arg)
1652147730Sssouhlal{
1653147730Sssouhlal	return (mtx_owned((struct mtx *)arg));
1654147730Sssouhlal}
1655147730Sssouhlal
1656133741Sjmgvoid
1657147730Sssouhlalknlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
1658147730Sssouhlal    void (*kl_unlock)(void *), int (*kl_locked)(void *))
1659133741Sjmg{
1660133741Sjmg
1661147730Sssouhlal	if (lock == NULL)
1662147730Sssouhlal		knl->kl_lockarg = &knlist_lock;
1663133741Sjmg	else
1664147730Sssouhlal		knl->kl_lockarg = lock;
1665133741Sjmg
1666147730Sssouhlal	if (kl_lock == NULL)
1667147730Sssouhlal		knl->kl_lock = knlist_mtx_lock;
1668147730Sssouhlal	else
1669147730Sssouhlal		knl->kl_lock = kl_lock;
1670147730Sssouhlal	if (kl_lock == NULL)
1671147730Sssouhlal		knl->kl_unlock = knlist_mtx_unlock;
1672147730Sssouhlal	else
1673147730Sssouhlal		knl->kl_unlock = kl_unlock;
1674147730Sssouhlal	if (kl_locked == NULL)
1675147730Sssouhlal		knl->kl_locked = knlist_mtx_locked;
1676147730Sssouhlal	else
1677147730Sssouhlal		knl->kl_locked = kl_locked;
1678147730Sssouhlal
1679133741Sjmg	SLIST_INIT(&knl->kl_list);
1680133741Sjmg}
1681133741Sjmg
1682133741Sjmgvoid
1683133741Sjmgknlist_destroy(struct knlist *knl)
1684133741Sjmg{
1685133741Sjmg
1686133741Sjmg#ifdef INVARIANTS
1687133741Sjmg	/*
1688133741Sjmg	 * if we run across this error, we need to find the offending
1689133741Sjmg	 * driver and have it call knlist_clear.
1690133741Sjmg	 */
1691133741Sjmg	if (!SLIST_EMPTY(&knl->kl_list))
1692133741Sjmg		printf("WARNING: destroying knlist w/ knotes on it!\n");
1693133741Sjmg#endif
1694133741Sjmg
1695147730Sssouhlal	knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL;
1696133741Sjmg	SLIST_INIT(&knl->kl_list);
1697133741Sjmg}
1698133741Sjmg
1699133741Sjmg/*
1700133741Sjmg * Even if we are locked, we may need to drop the lock to allow any influx
1701133741Sjmg * knotes time to "settle".
1702133741Sjmg */
1703133741Sjmgvoid
1704143776Sjmgknlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
1705133741Sjmg{
170659290Sjlemon	struct knote *kn;
1707133741Sjmg	struct kqueue *kq;
170859290Sjlemon
1709133741Sjmg	if (islocked)
1710147730Sssouhlal		KNL_ASSERT_LOCKED(knl);
1711133741Sjmg	else {
1712147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);
1713133741Sjmgagain:		/* need to reaquire lock since we have dropped it */
1714147730Sssouhlal		knl->kl_lock(knl->kl_lockarg);
171559290Sjlemon	}
1716133741Sjmg
1717133741Sjmg	SLIST_FOREACH(kn, &knl->kl_list, kn_selnext) {
1718133741Sjmg		kq = kn->kn_kq;
1719133741Sjmg		KQ_LOCK(kq);
1720143776Sjmg		if ((kn->kn_status & KN_INFLUX)) {
1721133741Sjmg			KQ_UNLOCK(kq);
1722133741Sjmg			continue;
1723133741Sjmg		}
1724133741Sjmg		knlist_remove_kq(knl, kn, 1, 1);
1725143776Sjmg		if (killkn) {
1726143776Sjmg			kn->kn_status |= KN_INFLUX | KN_DETACHED;
1727143776Sjmg			KQ_UNLOCK(kq);
1728143776Sjmg			knote_drop(kn, td);
1729143776Sjmg		} else {
1730143776Sjmg			/* Make sure cleared knotes disappear soon */
1731143776Sjmg			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1732143776Sjmg			KQ_UNLOCK(kq);
1733143776Sjmg		}
1734133741Sjmg		kq = NULL;
1735133741Sjmg	}
1736133741Sjmg
1737133741Sjmg	if (!SLIST_EMPTY(&knl->kl_list)) {
1738133741Sjmg		/* there are still KN_INFLUX remaining */
1739133741Sjmg		kn = SLIST_FIRST(&knl->kl_list);
1740133741Sjmg		kq = kn->kn_kq;
1741133741Sjmg		KQ_LOCK(kq);
1742133741Sjmg		KASSERT(kn->kn_status & KN_INFLUX,
1743133741Sjmg		    ("knote removed w/o list lock"));
1744147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1745133741Sjmg		kq->kq_state |= KQ_FLUXWAIT;
1746133741Sjmg		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0);
1747133741Sjmg		kq = NULL;
1748133741Sjmg		goto again;
1749133741Sjmg	}
1750133741Sjmg
1751133741Sjmg	if (islocked)
1752147730Sssouhlal		KNL_ASSERT_LOCKED(knl);
1753133741Sjmg	else {
1754147730Sssouhlal		knl->kl_unlock(knl->kl_lockarg);
1755147730Sssouhlal		KNL_ASSERT_UNLOCKED(knl);
1756133741Sjmg	}
175759290Sjlemon}
175859290Sjlemon
175959290Sjlemon/*
176059290Sjlemon * remove all knotes referencing a specified fd
1761133741Sjmg * must be called with FILEDESC lock.  This prevents a race where a new fd
1762133741Sjmg * comes along and occupies the entry and we attach a knote to the fd.
176359290Sjlemon */
176459290Sjlemonvoid
176583366Sjulianknote_fdclose(struct thread *td, int fd)
176659290Sjlemon{
176783366Sjulian	struct filedesc *fdp = td->td_proc->p_fd;
1768133741Sjmg	struct kqueue *kq;
1769133741Sjmg	struct knote *kn;
1770133741Sjmg	int influx;
177159290Sjlemon
1772133741Sjmg	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1773133741Sjmg
1774133741Sjmg	/*
1775133741Sjmg	 * We shouldn't have to worry about new kevents appearing on fd
1776133741Sjmg	 * since filedesc is locked.
1777133741Sjmg	 */
1778133741Sjmg	SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
1779133741Sjmg		KQ_LOCK(kq);
1780133741Sjmg
1781133741Sjmgagain:
1782133741Sjmg		influx = 0;
1783133741Sjmg		while (kq->kq_knlistsize > fd &&
1784133741Sjmg		    (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
1785133741Sjmg			if (kn->kn_status & KN_INFLUX) {
1786133741Sjmg				/* someone else might be waiting on our knote */
1787133741Sjmg				if (influx)
1788133741Sjmg					wakeup(kq);
1789133741Sjmg				kq->kq_state |= KQ_FLUXWAIT;
1790133741Sjmg				msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
1791133741Sjmg				goto again;
1792133741Sjmg			}
1793133741Sjmg			kn->kn_status |= KN_INFLUX;
1794133741Sjmg			KQ_UNLOCK(kq);
1795134859Sjmg			if (!(kn->kn_status & KN_DETACHED))
1796134859Sjmg				kn->kn_fop->f_detach(kn);
1797133741Sjmg			knote_drop(kn, td);
1798133741Sjmg			influx = 1;
1799133741Sjmg			KQ_LOCK(kq);
1800133741Sjmg		}
1801133741Sjmg		KQ_UNLOCK_FLUX(kq);
1802133741Sjmg	}
180359290Sjlemon}
180459290Sjlemon
1805133741Sjmgstatic int
1806133741Sjmgknote_attach(struct knote *kn, struct kqueue *kq)
180759290Sjlemon{
1808133741Sjmg	struct klist *list;
180959290Sjlemon
1810133741Sjmg	KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX"));
1811133741Sjmg	KQ_OWNED(kq);
181289306Salfred
1813133741Sjmg	if (kn->kn_fop->f_isfd) {
1814133741Sjmg		if (kn->kn_id >= kq->kq_knlistsize)
1815133741Sjmg			return ENOMEM;
1816133741Sjmg		list = &kq->kq_knlist[kn->kn_id];
1817133741Sjmg	} else {
1818133741Sjmg		if (kq->kq_knhash == NULL)
1819133741Sjmg			return ENOMEM;
1820133741Sjmg		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
182159290Sjlemon	}
182259290Sjlemon
182359290Sjlemon	SLIST_INSERT_HEAD(list, kn, kn_link);
1824133741Sjmg
1825133741Sjmg	return 0;
182659290Sjlemon}
182759290Sjlemon
182859290Sjlemon/*
1829133741Sjmg * knote must already have been detatched using the f_detach method.
1830133741Sjmg * no lock need to be held, it is assumed that the KN_INFLUX flag is set
1831133741Sjmg * to prevent other removal.
183259290Sjlemon */
183359290Sjlemonstatic void
183483366Sjulianknote_drop(struct knote *kn, struct thread *td)
183559290Sjlemon{
1836133741Sjmg	struct kqueue *kq;
183759290Sjlemon	struct klist *list;
183859290Sjlemon
1839133741Sjmg	kq = kn->kn_kq;
1840133741Sjmg
1841133741Sjmg	KQ_NOTOWNED(kq);
1842133741Sjmg	KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX,
1843133741Sjmg	    ("knote_drop called without KN_INFLUX set in kn_status"));
1844133741Sjmg
1845133741Sjmg	KQ_LOCK(kq);
184659290Sjlemon	if (kn->kn_fop->f_isfd)
1847133741Sjmg		list = &kq->kq_knlist[kn->kn_id];
184859290Sjlemon	else
1849133741Sjmg		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
185059290Sjlemon
185160938Sjake	SLIST_REMOVE(list, kn, knote, kn_link);
185259290Sjlemon	if (kn->kn_status & KN_QUEUED)
185359290Sjlemon		knote_dequeue(kn);
1854133741Sjmg	KQ_UNLOCK_FLUX(kq);
1855133741Sjmg
1856133741Sjmg	if (kn->kn_fop->f_isfd) {
1857133741Sjmg		fdrop(kn->kn_fp, td);
1858133741Sjmg		kn->kn_fp = NULL;
1859133741Sjmg	}
1860133741Sjmg	kqueue_fo_release(kn->kn_kevent.filter);
1861133741Sjmg	kn->kn_fop = NULL;
186259290Sjlemon	knote_free(kn);
186359290Sjlemon}
186459290Sjlemon
186559290Sjlemonstatic void
186659290Sjlemonknote_enqueue(struct knote *kn)
186759290Sjlemon{
186859290Sjlemon	struct kqueue *kq = kn->kn_kq;
186959290Sjlemon
1870133741Sjmg	KQ_OWNED(kn->kn_kq);
187159997Sjlemon	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
187259997Sjlemon
1873133590Srwatson	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
187459290Sjlemon	kn->kn_status |= KN_QUEUED;
187559290Sjlemon	kq->kq_count++;
187659290Sjlemon	kqueue_wakeup(kq);
187759290Sjlemon}
187859290Sjlemon
187959290Sjlemonstatic void
188059290Sjlemonknote_dequeue(struct knote *kn)
188159290Sjlemon{
188259290Sjlemon	struct kqueue *kq = kn->kn_kq;
188359290Sjlemon
1884133741Sjmg	KQ_OWNED(kn->kn_kq);
188559997Sjlemon	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
188659997Sjlemon
1887133590Srwatson	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
188859290Sjlemon	kn->kn_status &= ~KN_QUEUED;
188959290Sjlemon	kq->kq_count--;
189059290Sjlemon}
189159290Sjlemon
189259290Sjlemonstatic void
189359290Sjlemonknote_init(void)
189459290Sjlemon{
1895133741Sjmg
189692751Sjeff	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
189792751Sjeff	    NULL, NULL, UMA_ALIGN_PTR, 0);
189859290Sjlemon}
189959290SjlemonSYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
190059290Sjlemon
190159290Sjlemonstatic struct knote *
1902133741Sjmgknote_alloc(int waitok)
190359290Sjlemon{
1904133741Sjmg	return ((struct knote *)uma_zalloc(knote_zone,
1905133741Sjmg	    (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO));
190659290Sjlemon}
190759290Sjlemon
190859290Sjlemonstatic void
190959290Sjlemonknote_free(struct knote *kn)
191059290Sjlemon{
1911133741Sjmg	if (kn != NULL)
1912133741Sjmg		uma_zfree(knote_zone, kn);
191359290Sjlemon}
1914