kern_event.c revision 142217
159290Sjlemon/*- 272969Sjlemon * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3133741Sjmg * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org> 459290Sjlemon * All rights reserved. 559290Sjlemon * 659290Sjlemon * Redistribution and use in source and binary forms, with or without 759290Sjlemon * modification, are permitted provided that the following conditions 859290Sjlemon * are met: 959290Sjlemon * 1. Redistributions of source code must retain the above copyright 1059290Sjlemon * notice, this list of conditions and the following disclaimer. 1159290Sjlemon * 2. Redistributions in binary form must reproduce the above copyright 1259290Sjlemon * notice, this list of conditions and the following disclaimer in the 1359290Sjlemon * documentation and/or other materials provided with the distribution. 1459290Sjlemon * 1559290Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1659290Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1759290Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1859290Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1959290Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2059290Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2159290Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2259290Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2359290Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2459290Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2559290Sjlemon * SUCH DAMAGE. 2659290Sjlemon */ 2759290Sjlemon 28116182Sobrien#include <sys/cdefs.h> 29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 142217 2005-02-22 13:11:33Z rwatson $"); 30116182Sobrien 3159290Sjlemon#include <sys/param.h> 3259290Sjlemon#include <sys/systm.h> 3359290Sjlemon#include <sys/kernel.h> 3476166Smarkm#include <sys/lock.h> 3576166Smarkm#include <sys/mutex.h> 3659290Sjlemon#include <sys/proc.h> 37132138Salfred#include <sys/malloc.h> 3859290Sjlemon#include <sys/unistd.h> 3959290Sjlemon#include <sys/file.h> 40108524Salfred#include <sys/filedesc.h> 41132138Salfred#include <sys/filio.h> 4259290Sjlemon#include <sys/fcntl.h> 43133741Sjmg#include <sys/kthread.h> 4470834Swollman#include <sys/selinfo.h> 4559290Sjlemon#include <sys/queue.h> 4659290Sjlemon#include <sys/event.h> 4759290Sjlemon#include <sys/eventvar.h> 4859290Sjlemon#include <sys/poll.h> 4959290Sjlemon#include <sys/protosw.h> 50132138Salfred#include <sys/sigio.h> 51132138Salfred#include <sys/signalvar.h> 5259290Sjlemon#include <sys/socket.h> 5359290Sjlemon#include <sys/socketvar.h> 5459290Sjlemon#include <sys/stat.h> 5584138Sjlemon#include <sys/sysctl.h> 5659290Sjlemon#include <sys/sysproto.h> 57133741Sjmg#include <sys/taskqueue.h> 5859290Sjlemon#include <sys/uio.h> 5959290Sjlemon 6092751Sjeff#include <vm/uma.h> 6159290Sjlemon 62141616Sphkstatic MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 63141616Sphk 64133741Sjmg/* 65133741Sjmg * This lock is used if multiple kq locks are required. This possibly 66133741Sjmg * should be made into a per proc lock. 67133741Sjmg */ 68133741Sjmgstatic struct mtx kq_global; 69133741SjmgMTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); 70133741Sjmg#define KQ_GLOBAL_LOCK(lck, haslck) do { \ 71133741Sjmg if (!haslck) \ 72133741Sjmg mtx_lock(lck); \ 73133741Sjmg haslck = 1; \ 74133741Sjmg} while (0) 75133741Sjmg#define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ 76133741Sjmg if (haslck) \ 77133741Sjmg mtx_unlock(lck); \ 78133741Sjmg haslck = 0; \ 79133741Sjmg} while (0) 8084138Sjlemon 81133741SjmgTASKQUEUE_DEFINE_THREAD(kqueue); 82133741Sjmg 83133741Sjmgstatic int kqueue_aquire(struct file *fp, struct kqueue **kqp); 84133741Sjmgstatic void kqueue_release(struct kqueue *kq, int locked); 85133741Sjmgstatic int kqueue_expand(struct kqueue *kq, struct filterops *fops, 86133741Sjmg uintptr_t ident, int waitok); 87133741Sjmgstatic void kqueue_task(void *arg, int pending); 88133741Sjmgstatic int kqueue_scan(struct kqueue *kq, int maxevents, 8963977Speter struct kevent *ulistp, const struct timespec *timeout, 90133741Sjmg struct kevent *keva, struct thread *td); 9159290Sjlemonstatic void kqueue_wakeup(struct kqueue *kq); 92133741Sjmgstatic struct filterops *kqueue_fo_find(int filt); 93133741Sjmgstatic void kqueue_fo_release(int filt); 9459290Sjlemon 95108255Sphkstatic fo_rdwr_t kqueue_read; 96108255Sphkstatic fo_rdwr_t kqueue_write; 97108255Sphkstatic fo_ioctl_t kqueue_ioctl; 98108255Sphkstatic fo_poll_t kqueue_poll; 99108255Sphkstatic fo_kqfilter_t kqueue_kqfilter; 100108255Sphkstatic fo_stat_t kqueue_stat; 101108255Sphkstatic fo_close_t kqueue_close; 102108238Sphk 10372521Sjlemonstatic struct fileops kqueueops = { 104116546Sphk .fo_read = kqueue_read, 105116546Sphk .fo_write = kqueue_write, 106116546Sphk .fo_ioctl = kqueue_ioctl, 107116546Sphk .fo_poll = kqueue_poll, 108116546Sphk .fo_kqfilter = kqueue_kqfilter, 109116546Sphk .fo_stat = kqueue_stat, 110116546Sphk .fo_close = kqueue_close, 11172521Sjlemon}; 11272521Sjlemon 113133741Sjmgstatic int knote_attach(struct knote *kn, struct kqueue *kq); 11483366Sjulianstatic void knote_drop(struct knote *kn, struct thread *td); 11559290Sjlemonstatic void knote_enqueue(struct knote *kn); 11659290Sjlemonstatic void knote_dequeue(struct knote *kn); 11759290Sjlemonstatic void knote_init(void); 118133741Sjmgstatic struct knote *knote_alloc(int waitok); 11959290Sjlemonstatic void knote_free(struct knote *kn); 12059290Sjlemon 12172521Sjlemonstatic void filt_kqdetach(struct knote *kn); 12272521Sjlemonstatic int filt_kqueue(struct knote *kn, long hint); 12372521Sjlemonstatic int filt_procattach(struct knote *kn); 12472521Sjlemonstatic void filt_procdetach(struct knote *kn); 12572521Sjlemonstatic int filt_proc(struct knote *kn, long hint); 12672521Sjlemonstatic int filt_fileattach(struct knote *kn); 12779989Sjlemonstatic void filt_timerexpire(void *knx); 12879989Sjlemonstatic int filt_timerattach(struct knote *kn); 12979989Sjlemonstatic void filt_timerdetach(struct knote *kn); 13079989Sjlemonstatic int filt_timer(struct knote *kn, long hint); 13172521Sjlemon 13279989Sjlemonstatic struct filterops file_filtops = 13379989Sjlemon { 1, filt_fileattach, NULL, NULL }; 13472521Sjlemonstatic struct filterops kqread_filtops = 13572521Sjlemon { 1, NULL, filt_kqdetach, filt_kqueue }; 136133741Sjmg/* XXX - move to kern_proc.c? */ 13772521Sjlemonstatic struct filterops proc_filtops = 13872521Sjlemon { 0, filt_procattach, filt_procdetach, filt_proc }; 13979989Sjlemonstatic struct filterops timer_filtops = 14079989Sjlemon { 0, filt_timerattach, filt_timerdetach, filt_timer }; 14172521Sjlemon 14292751Sjeffstatic uma_zone_t knote_zone; 14384138Sjlemonstatic int kq_ncallouts = 0; 14484138Sjlemonstatic int kq_calloutmax = (4 * 1024); 14584138SjlemonSYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 14684138Sjlemon &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 14759290Sjlemon 148133741Sjmg/* XXX - ensure not KN_INFLUX?? */ 149133741Sjmg#define KNOTE_ACTIVATE(kn, islock) do { \ 150133741Sjmg if ((islock)) \ 151133741Sjmg mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ 152133741Sjmg else \ 153133741Sjmg KQ_LOCK((kn)->kn_kq); \ 154133741Sjmg (kn)->kn_status |= KN_ACTIVE; \ 155133741Sjmg if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 156133741Sjmg knote_enqueue((kn)); \ 157133741Sjmg if (!(islock)) \ 158133741Sjmg KQ_UNLOCK((kn)->kn_kq); \ 15959290Sjlemon} while(0) 160133741Sjmg#define KQ_LOCK(kq) do { \ 161133741Sjmg mtx_lock(&(kq)->kq_lock); \ 162133741Sjmg} while (0) 163133741Sjmg#define KQ_FLUX_WAKEUP(kq) do { \ 164133741Sjmg if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ 165133741Sjmg (kq)->kq_state &= ~KQ_FLUXWAIT; \ 166133741Sjmg wakeup((kq)); \ 167133741Sjmg } \ 168133741Sjmg} while (0) 169133741Sjmg#define KQ_UNLOCK_FLUX(kq) do { \ 170133741Sjmg KQ_FLUX_WAKEUP(kq); \ 171133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 172133741Sjmg} while (0) 173133741Sjmg#define KQ_UNLOCK(kq) do { \ 174133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 175133741Sjmg} while (0) 176133741Sjmg#define KQ_OWNED(kq) do { \ 177133741Sjmg mtx_assert(&(kq)->kq_lock, MA_OWNED); \ 178133741Sjmg} while (0) 179133741Sjmg#define KQ_NOTOWNED(kq) do { \ 180133741Sjmg mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ 181133741Sjmg} while (0) 182133741Sjmg#define KN_LIST_LOCK(kn) do { \ 183133741Sjmg if (kn->kn_knlist != NULL) \ 184133741Sjmg mtx_lock(kn->kn_knlist->kl_lock); \ 185133741Sjmg} while (0) 186133741Sjmg#define KN_LIST_UNLOCK(kn) do { \ 187133741Sjmg if (kn->kn_knlist != NULL) \ 188133741Sjmg mtx_unlock(kn->kn_knlist->kl_lock); \ 189133741Sjmg} while (0) 19059290Sjlemon 19159290Sjlemon#define KN_HASHSIZE 64 /* XXX should be tunable */ 19259290Sjlemon#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 19359290Sjlemon 19488633Salfredstatic int 19588633Salfredfilt_nullattach(struct knote *kn) 19688633Salfred{ 19788633Salfred 19888633Salfred return (ENXIO); 19988633Salfred}; 20088633Salfred 20188633Salfredstruct filterops null_filtops = 20288633Salfred { 0, filt_nullattach, NULL, NULL }; 20388633Salfred 204133741Sjmg/* XXX - make SYSINIT to add these, and move into respective modules. */ 20559290Sjlemonextern struct filterops sig_filtops; 206131562Salfredextern struct filterops fs_filtops; 20759290Sjlemon 20859290Sjlemon/* 20972521Sjlemon * Table for for all system-defined filters. 21059290Sjlemon */ 211133741Sjmgstatic struct mtx filterops_lock; 212133741SjmgMTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", 213133741Sjmg MTX_DEF); 214133741Sjmgstatic struct { 215133741Sjmg struct filterops *for_fop; 216133741Sjmg int for_refcnt; 217133741Sjmg} sysfilt_ops[EVFILT_SYSCOUNT] = { 218133741Sjmg { &file_filtops }, /* EVFILT_READ */ 219133741Sjmg { &file_filtops }, /* EVFILT_WRITE */ 220133741Sjmg { &null_filtops }, /* EVFILT_AIO */ 221133741Sjmg { &file_filtops }, /* EVFILT_VNODE */ 222133741Sjmg { &proc_filtops }, /* EVFILT_PROC */ 223133741Sjmg { &sig_filtops }, /* EVFILT_SIGNAL */ 224133741Sjmg { &timer_filtops }, /* EVFILT_TIMER */ 225133741Sjmg { &file_filtops }, /* EVFILT_NETDEV */ 226133741Sjmg { &fs_filtops }, /* EVFILT_FS */ 22759290Sjlemon}; 22859290Sjlemon 229133741Sjmg/* 230133741Sjmg * Simple redirection for all cdevsw style objects to call their fo_kqfilter 231133741Sjmg * method. 232133741Sjmg */ 23359290Sjlemonstatic int 23472521Sjlemonfilt_fileattach(struct knote *kn) 23559290Sjlemon{ 236133635Sjmg 23772521Sjlemon return (fo_kqfilter(kn->kn_fp, kn)); 23859290Sjlemon} 23959290Sjlemon 24072521Sjlemon/*ARGSUSED*/ 24159290Sjlemonstatic int 24272521Sjlemonkqueue_kqfilter(struct file *fp, struct knote *kn) 24359290Sjlemon{ 244109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 24559290Sjlemon 24672521Sjlemon if (kn->kn_filter != EVFILT_READ) 247133741Sjmg return (EINVAL); 24859290Sjlemon 249133741Sjmg kn->kn_status |= KN_KQUEUE; 25072521Sjlemon kn->kn_fop = &kqread_filtops; 251133741Sjmg knlist_add(&kq->kq_sel.si_note, kn, 0); 252133741Sjmg 25359290Sjlemon return (0); 25459290Sjlemon} 25559290Sjlemon 25659290Sjlemonstatic void 25759290Sjlemonfilt_kqdetach(struct knote *kn) 25859290Sjlemon{ 259109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 26059290Sjlemon 261133741Sjmg knlist_remove(&kq->kq_sel.si_note, kn, 0); 26259290Sjlemon} 26359290Sjlemon 26459290Sjlemon/*ARGSUSED*/ 26559290Sjlemonstatic int 26659290Sjlemonfilt_kqueue(struct knote *kn, long hint) 26759290Sjlemon{ 268109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 26959290Sjlemon 27059290Sjlemon kn->kn_data = kq->kq_count; 27159290Sjlemon return (kn->kn_data > 0); 27259290Sjlemon} 27359290Sjlemon 274133741Sjmg/* XXX - move to kern_proc.c? */ 27559290Sjlemonstatic int 27659290Sjlemonfilt_procattach(struct knote *kn) 27759290Sjlemon{ 27859290Sjlemon struct proc *p; 279113377Skbyanc int immediate; 28075451Srwatson int error; 28159290Sjlemon 282113377Skbyanc immediate = 0; 28359290Sjlemon p = pfind(kn->kn_id); 284113377Skbyanc if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 285113377Skbyanc p = zpfind(kn->kn_id); 286113377Skbyanc immediate = 1; 287133741Sjmg } else if (p != NULL && (p->p_flag & P_WEXIT)) { 288133741Sjmg immediate = 1; 289113377Skbyanc } 290133741Sjmg 291122019Scognet if (p == NULL) 292122019Scognet return (ESRCH); 293133741Sjmg if ((error = p_cansee(curthread, p))) 29475451Srwatson return (error); 29559290Sjlemon 29659290Sjlemon kn->kn_ptr.p_proc = p; 29759290Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 29859290Sjlemon 29959290Sjlemon /* 30059290Sjlemon * internal flag indicating registration done by kernel 30159290Sjlemon */ 30259290Sjlemon if (kn->kn_flags & EV_FLAG1) { 30359290Sjlemon kn->kn_data = kn->kn_sdata; /* ppid */ 30459290Sjlemon kn->kn_fflags = NOTE_CHILD; 30559290Sjlemon kn->kn_flags &= ~EV_FLAG1; 30659290Sjlemon } 30759290Sjlemon 308122686Scognet if (immediate == 0) 309133741Sjmg knlist_add(&p->p_klist, kn, 1); 310113377Skbyanc 311113377Skbyanc /* 312113377Skbyanc * Immediately activate any exit notes if the target process is a 313113377Skbyanc * zombie. This is necessary to handle the case where the target 314113377Skbyanc * process, e.g. a child, dies before the kevent is registered. 315113377Skbyanc */ 316113377Skbyanc if (immediate && filt_proc(kn, NOTE_EXIT)) 317133741Sjmg KNOTE_ACTIVATE(kn, 0); 318113377Skbyanc 31971500Sjhb PROC_UNLOCK(p); 32059290Sjlemon 32159290Sjlemon return (0); 32259290Sjlemon} 32359290Sjlemon 32459290Sjlemon/* 32559290Sjlemon * The knote may be attached to a different process, which may exit, 32659290Sjlemon * leaving nothing for the knote to be attached to. So when the process 32759290Sjlemon * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 32859290Sjlemon * it will be deleted when read out. However, as part of the knote deletion, 32959290Sjlemon * this routine is called, so a check is needed to avoid actually performing 33059290Sjlemon * a detach, because the original process does not exist any more. 33159290Sjlemon */ 332133741Sjmg/* XXX - move to kern_proc.c? */ 33359290Sjlemonstatic void 33459290Sjlemonfilt_procdetach(struct knote *kn) 33559290Sjlemon{ 336133741Sjmg struct proc *p; 33759290Sjlemon 338133741Sjmg p = kn->kn_ptr.p_proc; 339133741Sjmg knlist_remove(&p->p_klist, kn, 0); 340133741Sjmg kn->kn_ptr.p_proc = NULL; 34159290Sjlemon} 34259290Sjlemon 343133741Sjmg/* XXX - move to kern_proc.c? */ 34459290Sjlemonstatic int 34559290Sjlemonfilt_proc(struct knote *kn, long hint) 34659290Sjlemon{ 347133741Sjmg struct proc *p = kn->kn_ptr.p_proc; 34859290Sjlemon u_int event; 34959290Sjlemon 35059290Sjlemon /* 35159290Sjlemon * mask off extra data 35259290Sjlemon */ 35359290Sjlemon event = (u_int)hint & NOTE_PCTRLMASK; 35459290Sjlemon 35559290Sjlemon /* 35659290Sjlemon * if the user is interested in this event, record it. 35759290Sjlemon */ 35859290Sjlemon if (kn->kn_sfflags & event) 35959290Sjlemon kn->kn_fflags |= event; 36059290Sjlemon 36159290Sjlemon /* 36259290Sjlemon * process is gone, so flag the event as finished. 36359290Sjlemon */ 36459290Sjlemon if (event == NOTE_EXIT) { 365133741Sjmg if (!(kn->kn_status & KN_DETACHED)) 366133741Sjmg knlist_remove_inevent(&p->p_klist, kn); 367133590Srwatson kn->kn_flags |= (EV_EOF | EV_ONESHOT); 368133741Sjmg kn->kn_ptr.p_proc = NULL; 36959290Sjlemon return (1); 37059290Sjlemon } 37159290Sjlemon 37259290Sjlemon /* 37359290Sjlemon * process forked, and user wants to track the new process, 37459290Sjlemon * so attach a new knote to it, and immediately report an 37559290Sjlemon * event with the parent's pid. 37659290Sjlemon */ 37759290Sjlemon if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 37859290Sjlemon struct kevent kev; 37959290Sjlemon int error; 38059290Sjlemon 38159290Sjlemon /* 38259290Sjlemon * register knote with new process. 38359290Sjlemon */ 38459290Sjlemon kev.ident = hint & NOTE_PDATAMASK; /* pid */ 38559290Sjlemon kev.filter = kn->kn_filter; 38659290Sjlemon kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 38759290Sjlemon kev.fflags = kn->kn_sfflags; 38859290Sjlemon kev.data = kn->kn_id; /* parent */ 38961962Sjlemon kev.udata = kn->kn_kevent.udata; /* preserve udata */ 390133741Sjmg error = kqueue_register(kn->kn_kq, &kev, NULL, 0); 39159290Sjlemon if (error) 39259290Sjlemon kn->kn_fflags |= NOTE_TRACKERR; 39359290Sjlemon } 39459290Sjlemon 39559290Sjlemon return (kn->kn_fflags != 0); 39659290Sjlemon} 39759290Sjlemon 398133741Sjmgstatic int 399133741Sjmgtimertoticks(intptr_t data) 400133741Sjmg{ 401133741Sjmg struct timeval tv; 402133741Sjmg int tticks; 403133741Sjmg 404133741Sjmg tv.tv_sec = data / 1000; 405133741Sjmg tv.tv_usec = (data % 1000) * 1000; 406133741Sjmg tticks = tvtohz(&tv); 407133741Sjmg 408133741Sjmg return tticks; 409133741Sjmg} 410133741Sjmg 411133741Sjmg/* XXX - move to kern_timeout.c? */ 41279989Sjlemonstatic void 41379989Sjlemonfilt_timerexpire(void *knx) 41479989Sjlemon{ 41579989Sjlemon struct knote *kn = knx; 41684138Sjlemon struct callout *calloutp; 41779989Sjlemon 41879989Sjlemon kn->kn_data++; 419133741Sjmg KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ 42079989Sjlemon 421133741Sjmg if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { 42284138Sjlemon calloutp = (struct callout *)kn->kn_hook; 423133741Sjmg callout_reset(calloutp, timertoticks(kn->kn_sdata), 424133741Sjmg filt_timerexpire, kn); 42579989Sjlemon } 42679989Sjlemon} 42779989Sjlemon 42879989Sjlemon/* 42979989Sjlemon * data contains amount of time to sleep, in milliseconds 430133590Srwatson */ 431133741Sjmg/* XXX - move to kern_timeout.c? */ 43279989Sjlemonstatic int 43379989Sjlemonfilt_timerattach(struct knote *kn) 43479989Sjlemon{ 43584138Sjlemon struct callout *calloutp; 43679989Sjlemon 437133741Sjmg atomic_add_int(&kq_ncallouts, 1); 438133741Sjmg 439133741Sjmg if (kq_ncallouts >= kq_calloutmax) { 440133741Sjmg atomic_add_int(&kq_ncallouts, -1); 44184138Sjlemon return (ENOMEM); 442133741Sjmg } 44384138Sjlemon 44479989Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 445136500Sjmg kn->kn_status &= ~KN_DETACHED; /* knlist_add usually sets it */ 44684138Sjlemon MALLOC(calloutp, struct callout *, sizeof(*calloutp), 447111119Simp M_KQUEUE, M_WAITOK); 448142217Srwatson callout_init(calloutp, CALLOUT_MPSAFE); 449127982Scperciva kn->kn_hook = calloutp; 450133741Sjmg callout_reset(calloutp, timertoticks(kn->kn_sdata), filt_timerexpire, 451133741Sjmg kn); 45279989Sjlemon 45379989Sjlemon return (0); 45479989Sjlemon} 45579989Sjlemon 456133741Sjmg/* XXX - move to kern_timeout.c? */ 45779989Sjlemonstatic void 45879989Sjlemonfilt_timerdetach(struct knote *kn) 45979989Sjlemon{ 46084138Sjlemon struct callout *calloutp; 46179989Sjlemon 46284138Sjlemon calloutp = (struct callout *)kn->kn_hook; 463127982Scperciva callout_drain(calloutp); 46484138Sjlemon FREE(calloutp, M_KQUEUE); 465133741Sjmg atomic_add_int(&kq_ncallouts, -1); 466136500Sjmg kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */ 46779989Sjlemon} 46879989Sjlemon 469133741Sjmg/* XXX - move to kern_timeout.c? */ 47079989Sjlemonstatic int 47179989Sjlemonfilt_timer(struct knote *kn, long hint) 47279989Sjlemon{ 47379989Sjlemon 47479989Sjlemon return (kn->kn_data != 0); 47579989Sjlemon} 47679989Sjlemon 47782710Sdillon/* 47882710Sdillon * MPSAFE 47982710Sdillon */ 48061468Sjlemonint 48183366Sjuliankqueue(struct thread *td, struct kqueue_args *uap) 48259290Sjlemon{ 48382710Sdillon struct filedesc *fdp; 48459290Sjlemon struct kqueue *kq; 48561468Sjlemon struct file *fp; 48661468Sjlemon int fd, error; 48759290Sjlemon 48883366Sjulian fdp = td->td_proc->p_fd; 48983366Sjulian error = falloc(td, &fp, &fd); 49061468Sjlemon if (error) 49182710Sdillon goto done2; 492133741Sjmg 493121256Sdwmalone /* An extra reference on `nfp' has been held for us by falloc(). */ 494133741Sjmg kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); 495133741Sjmg mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); 49689306Salfred TAILQ_INIT(&kq->kq_head); 497133741Sjmg kq->kq_fdp = fdp; 498133741Sjmg knlist_init(&kq->kq_sel.si_note, &kq->kq_lock); 499133741Sjmg TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); 500133741Sjmg 501137647Sphk FILEDESC_LOCK_FAST(fdp); 502133741Sjmg SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); 503137647Sphk FILEDESC_UNLOCK_FAST(fdp); 504133741Sjmg 50589306Salfred FILE_LOCK(fp); 50661468Sjlemon fp->f_flag = FREAD | FWRITE; 50761468Sjlemon fp->f_type = DTYPE_KQUEUE; 50861468Sjlemon fp->f_ops = &kqueueops; 509109153Sdillon fp->f_data = kq; 51089306Salfred FILE_UNLOCK(fp); 511121256Sdwmalone fdrop(fp, td); 512133741Sjmg 51383366Sjulian td->td_retval[0] = fd; 51482710Sdillondone2: 51561468Sjlemon return (error); 51659290Sjlemon} 51759290Sjlemon 51859290Sjlemon#ifndef _SYS_SYSPROTO_H_ 51959290Sjlemonstruct kevent_args { 52059290Sjlemon int fd; 52163977Speter const struct kevent *changelist; 52259290Sjlemon int nchanges; 52363452Sjlemon struct kevent *eventlist; 52459290Sjlemon int nevents; 52563977Speter const struct timespec *timeout; 52659290Sjlemon}; 52759290Sjlemon#endif 52882710Sdillon/* 52982710Sdillon * MPSAFE 53082710Sdillon */ 53159290Sjlemonint 53283366Sjuliankevent(struct thread *td, struct kevent_args *uap) 53359290Sjlemon{ 534133741Sjmg struct kevent keva[KQ_NEVENTS]; 53563452Sjlemon struct kevent *kevp; 53659290Sjlemon struct kqueue *kq; 53786341Sdillon struct file *fp; 53859290Sjlemon struct timespec ts; 53959290Sjlemon int i, n, nerrors, error; 54059290Sjlemon 54189319Salfred if ((error = fget(td, uap->fd, &fp)) != 0) 54289319Salfred return (error); 543133741Sjmg if ((error = kqueue_aquire(fp, &kq)) != 0) 544133741Sjmg goto done_norel; 545133741Sjmg 54659290Sjlemon if (uap->timeout != NULL) { 54763452Sjlemon error = copyin(uap->timeout, &ts, sizeof(ts)); 54859290Sjlemon if (error) 549133741Sjmg goto done; 55059290Sjlemon uap->timeout = &ts; 55159290Sjlemon } 55259290Sjlemon 55359290Sjlemon nerrors = 0; 55459290Sjlemon 55559290Sjlemon while (uap->nchanges > 0) { 55659290Sjlemon n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 557133741Sjmg error = copyin(uap->changelist, keva, 558133741Sjmg n * sizeof *keva); 55959290Sjlemon if (error) 56068883Sdillon goto done; 56159290Sjlemon for (i = 0; i < n; i++) { 562133741Sjmg kevp = &keva[i]; 56363452Sjlemon kevp->flags &= ~EV_SYSFLAGS; 564133741Sjmg error = kqueue_register(kq, kevp, td, 1); 56559290Sjlemon if (error) { 56659290Sjlemon if (uap->nevents != 0) { 56763452Sjlemon kevp->flags = EV_ERROR; 56863452Sjlemon kevp->data = error; 56998998Salfred (void) copyout(kevp, 57098998Salfred uap->eventlist, 57163452Sjlemon sizeof(*kevp)); 57259290Sjlemon uap->eventlist++; 57359290Sjlemon uap->nevents--; 57459290Sjlemon nerrors++; 57559290Sjlemon } else { 57668883Sdillon goto done; 57759290Sjlemon } 57859290Sjlemon } 57959290Sjlemon } 58059290Sjlemon uap->nchanges -= n; 58159290Sjlemon uap->changelist += n; 58259290Sjlemon } 58359290Sjlemon if (nerrors) { 584133741Sjmg td->td_retval[0] = nerrors; 58568883Sdillon error = 0; 58668883Sdillon goto done; 58759290Sjlemon } 58859290Sjlemon 589133741Sjmg error = kqueue_scan(kq, uap->nevents, uap->eventlist, uap->timeout, 590133741Sjmg keva, td); 59168883Sdillondone: 592133741Sjmg kqueue_release(kq, 0); 593133741Sjmgdone_norel: 59468883Sdillon if (fp != NULL) 59583366Sjulian fdrop(fp, td); 59659290Sjlemon return (error); 59759290Sjlemon} 59859290Sjlemon 59959290Sjlemonint 60088633Salfredkqueue_add_filteropts(int filt, struct filterops *filtops) 60188633Salfred{ 602133741Sjmg int error; 60388633Salfred 604133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { 605133741Sjmg printf( 606133741Sjmg"trying to add a filterop that is out of range: %d is beyond %d\n", 607133741Sjmg ~filt, EVFILT_SYSCOUNT); 608133741Sjmg return EINVAL; 609133741Sjmg } 610133741Sjmg mtx_lock(&filterops_lock); 611133741Sjmg if (sysfilt_ops[~filt].for_fop != &null_filtops && 612133741Sjmg sysfilt_ops[~filt].for_fop != NULL) 613133741Sjmg error = EEXIST; 614133741Sjmg else { 615133741Sjmg sysfilt_ops[~filt].for_fop = filtops; 616133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 617133741Sjmg } 618133741Sjmg mtx_unlock(&filterops_lock); 619133741Sjmg 62088633Salfred return (0); 62188633Salfred} 62288633Salfred 62388633Salfredint 62488633Salfredkqueue_del_filteropts(int filt) 62588633Salfred{ 626133741Sjmg int error; 62788633Salfred 628133741Sjmg error = 0; 629133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 630133741Sjmg return EINVAL; 631133741Sjmg 632133741Sjmg mtx_lock(&filterops_lock); 633133741Sjmg if (sysfilt_ops[~filt].for_fop == &null_filtops || 634133741Sjmg sysfilt_ops[~filt].for_fop == NULL) 635133741Sjmg error = EINVAL; 636133741Sjmg else if (sysfilt_ops[~filt].for_refcnt != 0) 637133741Sjmg error = EBUSY; 638133741Sjmg else { 639133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 640133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 641133741Sjmg } 642133741Sjmg mtx_unlock(&filterops_lock); 643133741Sjmg 644133741Sjmg return error; 64588633Salfred} 64688633Salfred 647133741Sjmgstatic struct filterops * 648133741Sjmgkqueue_fo_find(int filt) 649133741Sjmg{ 650133741Sjmg 651133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 652133741Sjmg return NULL; 653133741Sjmg 654133741Sjmg mtx_lock(&filterops_lock); 655133741Sjmg sysfilt_ops[~filt].for_refcnt++; 656133741Sjmg if (sysfilt_ops[~filt].for_fop == NULL) 657133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 658133741Sjmg mtx_unlock(&filterops_lock); 659133741Sjmg 660133741Sjmg return sysfilt_ops[~filt].for_fop; 661133741Sjmg} 662133741Sjmg 663133741Sjmgstatic void 664133741Sjmgkqueue_fo_release(int filt) 665133741Sjmg{ 666133741Sjmg 667133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 668133741Sjmg return; 669133741Sjmg 670133741Sjmg mtx_lock(&filterops_lock); 671133741Sjmg KASSERT(sysfilt_ops[~filt].for_refcnt > 0, 672133741Sjmg ("filter object refcount not valid on release")); 673133741Sjmg sysfilt_ops[~filt].for_refcnt--; 674133741Sjmg mtx_unlock(&filterops_lock); 675133741Sjmg} 676133741Sjmg 677133741Sjmg/* 678133741Sjmg * A ref to kq (obtained via kqueue_aquire) should be held. waitok will 679133741Sjmg * influence if memory allocation should wait. Make sure it is 0 if you 680133741Sjmg * hold any mutexes. 681133741Sjmg */ 68288633Salfredint 683133741Sjmgkqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok) 68459290Sjlemon{ 685133741Sjmg struct filedesc *fdp; 68659290Sjlemon struct filterops *fops; 687133741Sjmg struct file *fp; 688133741Sjmg struct knote *kn, *tkn; 689133741Sjmg int error, filt, event; 690133741Sjmg int haskqglobal; 691133741Sjmg int fd; 69259290Sjlemon 693133741Sjmg fdp = NULL; 694133741Sjmg fp = NULL; 695133741Sjmg kn = NULL; 696133741Sjmg error = 0; 697133741Sjmg haskqglobal = 0; 69859290Sjlemon 699133741Sjmg filt = kev->filter; 700133741Sjmg fops = kqueue_fo_find(filt); 701133741Sjmg if (fops == NULL) 702133741Sjmg return EINVAL; 703133741Sjmg 704133741Sjmg tkn = knote_alloc(waitok); /* prevent waiting with locks */ 705133741Sjmg 706133741Sjmgfindkn: 70759290Sjlemon if (fops->f_isfd) { 708133741Sjmg KASSERT(td != NULL, ("td is NULL")); 709133741Sjmg fdp = td->td_proc->p_fd; 710133741Sjmg FILEDESC_LOCK(fdp); 71164343Sjlemon /* validate descriptor */ 712133741Sjmg fd = kev->ident; 713133741Sjmg if (fd < 0 || fd >= fdp->fd_nfiles || 714133741Sjmg (fp = fdp->fd_ofiles[fd]) == NULL) { 71589306Salfred FILEDESC_UNLOCK(fdp); 716133741Sjmg error = EBADF; 717133741Sjmg goto done; 71889306Salfred } 71968883Sdillon fhold(fp); 72059290Sjlemon 721133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, 722133741Sjmg kev->ident, 0) != 0) { 723133741Sjmg /* unlock and try again */ 724133741Sjmg FILEDESC_UNLOCK(fdp); 725133741Sjmg fdrop(fp, td); 726133741Sjmg fp = NULL; 727133741Sjmg error = kqueue_expand(kq, fops, kev->ident, waitok); 728133741Sjmg if (error) 729133741Sjmg goto done; 730133741Sjmg goto findkn; 731133741Sjmg } 732133741Sjmg 733133741Sjmg if (fp->f_type == DTYPE_KQUEUE) { 734133741Sjmg /* 735133741Sjmg * if we add some inteligence about what we are doing, 736133741Sjmg * we should be able to support events on ourselves. 737133741Sjmg * We need to know when we are doing this to prevent 738133741Sjmg * getting both the knlist lock and the kq lock since 739133741Sjmg * they are the same thing. 740133741Sjmg */ 741133741Sjmg if (fp->f_data == kq) { 742133741Sjmg FILEDESC_UNLOCK(fdp); 743133741Sjmg error = EINVAL; 744133741Sjmg goto done_noglobal; 745133741Sjmg } 746133741Sjmg 747133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 748133741Sjmg } 749133741Sjmg 750137772Sphk FILEDESC_UNLOCK(fdp); 751133741Sjmg KQ_LOCK(kq); 752133741Sjmg if (kev->ident < kq->kq_knlistsize) { 753133741Sjmg SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) 754133741Sjmg if (kev->filter == kn->kn_filter) 75559290Sjlemon break; 75659290Sjlemon } 75759290Sjlemon } else { 758133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD) 759133741Sjmg kqueue_expand(kq, fops, kev->ident, waitok); 760133741Sjmg 761133741Sjmg KQ_LOCK(kq); 762133741Sjmg if (kq->kq_knhashmask != 0) { 76359290Sjlemon struct klist *list; 764133635Sjmg 765133741Sjmg list = &kq->kq_knhash[ 766133741Sjmg KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 76759290Sjlemon SLIST_FOREACH(kn, list, kn_link) 76859290Sjlemon if (kev->ident == kn->kn_id && 76959290Sjlemon kev->filter == kn->kn_filter) 77059290Sjlemon break; 77159290Sjlemon } 77259290Sjlemon } 77359290Sjlemon 774133741Sjmg /* knote is in the process of changing, wait for it to stablize. */ 775133741Sjmg if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 776133741Sjmg if (fp != NULL) { 777133741Sjmg fdrop(fp, td); 778133741Sjmg fp = NULL; 779133741Sjmg } 780133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 781133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 782133741Sjmg msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); 783133741Sjmg goto findkn; 784133741Sjmg } 785133741Sjmg 78668883Sdillon if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 787133741Sjmg KQ_UNLOCK(kq); 78868883Sdillon error = ENOENT; 78968883Sdillon goto done; 79068883Sdillon } 79159290Sjlemon 79259290Sjlemon /* 79359290Sjlemon * kn now contains the matching knote, or NULL if no match 79459290Sjlemon */ 79559290Sjlemon if (kev->flags & EV_ADD) { 79659290Sjlemon if (kn == NULL) { 797133741Sjmg kn = tkn; 798133741Sjmg tkn = NULL; 79968883Sdillon if (kn == NULL) { 80068883Sdillon error = ENOMEM; 80168883Sdillon goto done; 80268883Sdillon } 80359290Sjlemon kn->kn_fp = fp; 80459290Sjlemon kn->kn_kq = kq; 80559290Sjlemon kn->kn_fop = fops; 80668883Sdillon /* 807133741Sjmg * apply reference counts to knote structure, and 80868883Sdillon * do not release it at the end of this routine. 80968883Sdillon */ 810133741Sjmg fops = NULL; 81168883Sdillon fp = NULL; 81268883Sdillon 81361962Sjlemon kn->kn_sfflags = kev->fflags; 81461962Sjlemon kn->kn_sdata = kev->data; 81561962Sjlemon kev->fflags = 0; 81661962Sjlemon kev->data = 0; 81761962Sjlemon kn->kn_kevent = *kev; 818133741Sjmg kn->kn_status = KN_INFLUX|KN_DETACHED; 81961962Sjlemon 820133741Sjmg error = knote_attach(kn, kq); 821133741Sjmg KQ_UNLOCK(kq); 822133741Sjmg if (error != 0) { 823133741Sjmg tkn = kn; 824133741Sjmg goto done; 825133741Sjmg } 826133741Sjmg 827133741Sjmg if ((error = kn->kn_fop->f_attach(kn)) != 0) { 82883366Sjulian knote_drop(kn, td); 82959290Sjlemon goto done; 83059290Sjlemon } 831133741Sjmg KN_LIST_LOCK(kn); 83261962Sjlemon } else { 83361962Sjlemon /* 83461962Sjlemon * The user may change some filter values after the 835133590Srwatson * initial EV_ADD, but doing so will not reset any 836106171Srwatson * filter which has already been triggered. 83761962Sjlemon */ 838133741Sjmg kn->kn_status |= KN_INFLUX; 839133741Sjmg KQ_UNLOCK(kq); 840133741Sjmg KN_LIST_LOCK(kn); 84161962Sjlemon kn->kn_sfflags = kev->fflags; 84261962Sjlemon kn->kn_sdata = kev->data; 84361962Sjlemon kn->kn_kevent.udata = kev->udata; 84459290Sjlemon } 84561962Sjlemon 846133741Sjmg /* 847133741Sjmg * We can get here with kn->kn_knlist == NULL. 848133741Sjmg * This can happen when the initial attach event decides that 849133741Sjmg * the event is "completed" already. i.e. filt_procattach 850133741Sjmg * is called on a zombie process. It will call filt_proc 851133741Sjmg * which will remove it from the list, and NULL kn_knlist. 852133741Sjmg */ 853133741Sjmg event = kn->kn_fop->f_event(kn, 0); 854133741Sjmg KN_LIST_UNLOCK(kn); 855133741Sjmg KQ_LOCK(kq); 856133741Sjmg if (event) 857133741Sjmg KNOTE_ACTIVATE(kn, 1); 858133741Sjmg kn->kn_status &= ~KN_INFLUX; 85959290Sjlemon } else if (kev->flags & EV_DELETE) { 860133741Sjmg kn->kn_status |= KN_INFLUX; 861133741Sjmg KQ_UNLOCK(kq); 862134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 863134859Sjmg kn->kn_fop->f_detach(kn); 86483366Sjulian knote_drop(kn, td); 86559290Sjlemon goto done; 86659290Sjlemon } 86759290Sjlemon 86859290Sjlemon if ((kev->flags & EV_DISABLE) && 86959290Sjlemon ((kn->kn_status & KN_DISABLED) == 0)) { 87059290Sjlemon kn->kn_status |= KN_DISABLED; 87159290Sjlemon } 87259290Sjlemon 87359290Sjlemon if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 87459290Sjlemon kn->kn_status &= ~KN_DISABLED; 87559290Sjlemon if ((kn->kn_status & KN_ACTIVE) && 87659290Sjlemon ((kn->kn_status & KN_QUEUED) == 0)) 87759290Sjlemon knote_enqueue(kn); 87859290Sjlemon } 879133741Sjmg KQ_UNLOCK_FLUX(kq); 88059290Sjlemon 88159290Sjlemondone: 882133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 883133741Sjmgdone_noglobal: 88468883Sdillon if (fp != NULL) 88583366Sjulian fdrop(fp, td); 886133741Sjmg if (tkn != NULL) 887133741Sjmg knote_free(tkn); 888133741Sjmg if (fops != NULL) 889133741Sjmg kqueue_fo_release(filt); 89059290Sjlemon return (error); 89159290Sjlemon} 89259290Sjlemon 89359290Sjlemonstatic int 894133741Sjmgkqueue_aquire(struct file *fp, struct kqueue **kqp) 89559290Sjlemon{ 896133741Sjmg int error; 89789306Salfred struct kqueue *kq; 898133741Sjmg 899133741Sjmg error = 0; 900133741Sjmg 901133741Sjmg FILE_LOCK(fp); 902133741Sjmg do { 903133741Sjmg kq = fp->f_data; 904133741Sjmg if (fp->f_type != DTYPE_KQUEUE || kq == NULL) { 905133741Sjmg error = EBADF; 906133741Sjmg break; 907133741Sjmg } 908133741Sjmg *kqp = kq; 909133741Sjmg KQ_LOCK(kq); 910133741Sjmg if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { 911133741Sjmg KQ_UNLOCK(kq); 912133741Sjmg error = EBADF; 913133741Sjmg break; 914133741Sjmg } 915133741Sjmg kq->kq_refcnt++; 916133741Sjmg KQ_UNLOCK(kq); 917133741Sjmg } while (0); 918133741Sjmg FILE_UNLOCK(fp); 919133741Sjmg 920133741Sjmg return error; 921133741Sjmg} 922133741Sjmg 923133741Sjmgstatic void 924133741Sjmgkqueue_release(struct kqueue *kq, int locked) 925133741Sjmg{ 926133741Sjmg if (locked) 927133741Sjmg KQ_OWNED(kq); 928133741Sjmg else 929133741Sjmg KQ_LOCK(kq); 930133741Sjmg kq->kq_refcnt--; 931133741Sjmg if (kq->kq_refcnt == 1) 932133741Sjmg wakeup(&kq->kq_refcnt); 933133741Sjmg if (!locked) 934133741Sjmg KQ_UNLOCK(kq); 935133741Sjmg} 936133741Sjmg 937133741Sjmgstatic void 938133741Sjmgkqueue_schedtask(struct kqueue *kq) 939133741Sjmg{ 940133741Sjmg 941133741Sjmg KQ_OWNED(kq); 942133741Sjmg KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), 943133741Sjmg ("scheduling kqueue task while draining")); 944133741Sjmg 945133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { 946133741Sjmg taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task); 947133741Sjmg kq->kq_state |= KQ_TASKSCHED; 948133741Sjmg } 949133741Sjmg} 950133741Sjmg 951133741Sjmg/* 952133741Sjmg * Expand the kq to make sure we have storage for fops/ident pair. 953133741Sjmg * 954133741Sjmg * Return 0 on success (or no work necessary), return errno on failure. 955133741Sjmg * 956133741Sjmg * Not calling hashinit w/ waitok (proper malloc flag) should be safe. 957133741Sjmg * If kqueue_register is called from a non-fd context, there usually/should 958133741Sjmg * be no locks held. 959133741Sjmg */ 960133741Sjmgstatic int 961133741Sjmgkqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, 962133741Sjmg int waitok) 963133741Sjmg{ 964133741Sjmg struct klist *list, *tmp_knhash; 965133741Sjmg u_long tmp_knhashmask; 966133741Sjmg int size; 967133741Sjmg int fd; 968133741Sjmg int mflag = waitok ? M_WAITOK : M_NOWAIT; 969133741Sjmg 970133741Sjmg KQ_NOTOWNED(kq); 971133741Sjmg 972133741Sjmg if (fops->f_isfd) { 973133741Sjmg fd = ident; 974133741Sjmg if (kq->kq_knlistsize <= fd) { 975133741Sjmg size = kq->kq_knlistsize; 976133741Sjmg while (size <= fd) 977133741Sjmg size += KQEXTENT; 978133741Sjmg MALLOC(list, struct klist *, 979133741Sjmg size * sizeof list, M_KQUEUE, mflag); 980133741Sjmg if (list == NULL) 981133741Sjmg return ENOMEM; 982133741Sjmg KQ_LOCK(kq); 983133741Sjmg if (kq->kq_knlistsize > fd) { 984133741Sjmg FREE(list, M_KQUEUE); 985133741Sjmg list = NULL; 986133741Sjmg } else { 987133741Sjmg if (kq->kq_knlist != NULL) { 988133741Sjmg bcopy(kq->kq_knlist, list, 989133741Sjmg kq->kq_knlistsize * sizeof list); 990133741Sjmg FREE(kq->kq_knlist, M_KQUEUE); 991133741Sjmg kq->kq_knlist = NULL; 992133741Sjmg } 993133741Sjmg bzero((caddr_t)list + 994133741Sjmg kq->kq_knlistsize * sizeof list, 995133741Sjmg (size - kq->kq_knlistsize) * sizeof list); 996133741Sjmg kq->kq_knlistsize = size; 997133741Sjmg kq->kq_knlist = list; 998133741Sjmg } 999133741Sjmg KQ_UNLOCK(kq); 1000133741Sjmg } 1001133741Sjmg } else { 1002133741Sjmg if (kq->kq_knhashmask == 0) { 1003133741Sjmg tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1004133741Sjmg &tmp_knhashmask); 1005133741Sjmg if (tmp_knhash == NULL) 1006133741Sjmg return ENOMEM; 1007133741Sjmg KQ_LOCK(kq); 1008133741Sjmg if (kq->kq_knhashmask == 0) { 1009133741Sjmg kq->kq_knhash = tmp_knhash; 1010133741Sjmg kq->kq_knhashmask = tmp_knhashmask; 1011133741Sjmg } else { 1012133741Sjmg free(tmp_knhash, M_KQUEUE); 1013133741Sjmg } 1014133741Sjmg KQ_UNLOCK(kq); 1015133741Sjmg } 1016133741Sjmg } 1017133741Sjmg 1018133741Sjmg KQ_NOTOWNED(kq); 1019133741Sjmg return 0; 1020133741Sjmg} 1021133741Sjmg 1022133741Sjmgstatic void 1023133741Sjmgkqueue_task(void *arg, int pending) 1024133741Sjmg{ 1025133741Sjmg struct kqueue *kq; 1026133741Sjmg int haskqglobal; 1027133741Sjmg 1028133741Sjmg haskqglobal = 0; 1029133741Sjmg kq = arg; 1030133741Sjmg 1031133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1032133741Sjmg KQ_LOCK(kq); 1033133741Sjmg 1034133741Sjmg KNOTE_LOCKED(&kq->kq_sel.si_note, 0); 1035133741Sjmg 1036133741Sjmg kq->kq_state &= ~KQ_TASKSCHED; 1037133741Sjmg if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { 1038133741Sjmg wakeup(&kq->kq_state); 1039133741Sjmg } 1040133741Sjmg KQ_UNLOCK(kq); 1041133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1042133741Sjmg} 1043133741Sjmg 1044133741Sjmg/* 1045133741Sjmg * Scan, update kn_data (if not ONESHOT), and copyout triggered events. 1046133741Sjmg * We treat KN_MARKER knotes as if they are INFLUX. 1047133741Sjmg */ 1048133741Sjmgstatic int 1049133741Sjmgkqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, 1050133741Sjmg const struct timespec *tsp, struct kevent *keva, struct thread *td) 1051133741Sjmg{ 105259290Sjlemon struct kevent *kevp; 105359290Sjlemon struct timeval atv, rtv, ttv; 1054133794Sgreen struct knote *kn, *marker; 1055133741Sjmg int count, timeout, nkev, error; 1056133741Sjmg int haskqglobal; 105759290Sjlemon 105859290Sjlemon count = maxevents; 1059133741Sjmg nkev = 0; 1060133741Sjmg error = 0; 1061133741Sjmg haskqglobal = 0; 106259290Sjlemon 1063133741Sjmg if (maxevents == 0) 1064133741Sjmg goto done_nl; 1065133741Sjmg 106664343Sjlemon if (tsp != NULL) { 106759290Sjlemon TIMESPEC_TO_TIMEVAL(&atv, tsp); 106864343Sjlemon if (itimerfix(&atv)) { 106959290Sjlemon error = EINVAL; 1070133741Sjmg goto done_nl; 107159290Sjlemon } 107264343Sjlemon if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 107364343Sjlemon timeout = -1; 1074133590Srwatson else 107564343Sjlemon timeout = atv.tv_sec > 24 * 60 * 60 ? 107664343Sjlemon 24 * 60 * 60 * hz : tvtohz(&atv); 107764343Sjlemon getmicrouptime(&rtv); 107864343Sjlemon timevaladd(&atv, &rtv); 107964343Sjlemon } else { 108064343Sjlemon atv.tv_sec = 0; 108164343Sjlemon atv.tv_usec = 0; 108259290Sjlemon timeout = 0; 108359290Sjlemon } 1084133794Sgreen marker = knote_alloc(1); 1085133794Sgreen if (marker == NULL) { 1086133794Sgreen error = ENOMEM; 1087133794Sgreen goto done_nl; 1088133794Sgreen } 1089133794Sgreen marker->kn_status = KN_MARKER; 1090133741Sjmg KQ_LOCK(kq); 109159290Sjlemon goto start; 109259290Sjlemon 109359290Sjlemonretry: 109464343Sjlemon if (atv.tv_sec || atv.tv_usec) { 109559290Sjlemon getmicrouptime(&rtv); 109659290Sjlemon if (timevalcmp(&rtv, &atv, >=)) 109759290Sjlemon goto done; 109859290Sjlemon ttv = atv; 109959290Sjlemon timevalsub(&ttv, &rtv); 110059290Sjlemon timeout = ttv.tv_sec > 24 * 60 * 60 ? 110159290Sjlemon 24 * 60 * 60 * hz : tvtohz(&ttv); 110259290Sjlemon } 110359290Sjlemon 110459290Sjlemonstart: 1105133741Sjmg kevp = keva; 110659290Sjlemon if (kq->kq_count == 0) { 1107133590Srwatson if (timeout < 0) { 110864343Sjlemon error = EWOULDBLOCK; 110964343Sjlemon } else { 1110135240Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 111164343Sjlemon kq->kq_state |= KQ_SLEEP; 1112133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH, 1113133741Sjmg "kqread", timeout); 111464343Sjlemon } 111564084Sjlemon if (error == 0) 111659290Sjlemon goto retry; 111764084Sjlemon /* don't restart after signals... */ 111864084Sjlemon if (error == ERESTART) 111964084Sjlemon error = EINTR; 112064084Sjlemon else if (error == EWOULDBLOCK) 112159290Sjlemon error = 0; 112259290Sjlemon goto done; 112359290Sjlemon } 112459290Sjlemon 1125133794Sgreen TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 112659290Sjlemon while (count) { 1127133741Sjmg KQ_OWNED(kq); 112859290Sjlemon kn = TAILQ_FIRST(&kq->kq_head); 1129133741Sjmg 1130133794Sgreen if ((kn->kn_status == KN_MARKER && kn != marker) || 1131133741Sjmg (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1132135240Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1133133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 1134133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK, 1135133741Sjmg "kqflxwt", 0); 1136133741Sjmg continue; 1137133741Sjmg } 1138133741Sjmg 1139133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1140133741Sjmg if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { 1141133741Sjmg kn->kn_status &= ~KN_QUEUED; 1142133741Sjmg kq->kq_count--; 1143133741Sjmg continue; 1144133741Sjmg } 1145133794Sgreen if (kn == marker) { 1146133741Sjmg KQ_FLUX_WAKEUP(kq); 114759290Sjlemon if (count == maxevents) 114859290Sjlemon goto retry; 114959290Sjlemon goto done; 115059290Sjlemon } 1151133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == 0, 1152133741Sjmg ("KN_INFLUX set when not suppose to be")); 1153133741Sjmg 1154133741Sjmg if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { 115559290Sjlemon kn->kn_status &= ~KN_QUEUED; 1156133741Sjmg kn->kn_status |= KN_INFLUX; 115759290Sjlemon kq->kq_count--; 1158133741Sjmg KQ_UNLOCK(kq); 1159133741Sjmg /* 1160133741Sjmg * We don't need to lock the list since we've marked 1161133741Sjmg * it _INFLUX. 1162133741Sjmg */ 1163133741Sjmg *kevp = kn->kn_kevent; 1164134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1165134859Sjmg kn->kn_fop->f_detach(kn); 116683366Sjulian knote_drop(kn, td); 1167133741Sjmg KQ_LOCK(kq); 1168133741Sjmg kn = NULL; 116959290Sjlemon } else { 1170133741Sjmg kn->kn_status |= KN_INFLUX; 1171133741Sjmg KQ_UNLOCK(kq); 1172133741Sjmg if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) 1173133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1174133741Sjmg KN_LIST_LOCK(kn); 1175133741Sjmg if (kn->kn_fop->f_event(kn, 0) == 0) { 1176133741Sjmg KN_LIST_UNLOCK(kn); 1177133741Sjmg KQ_LOCK(kq); 1178133741Sjmg kn->kn_status &= 1179133741Sjmg ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX); 1180133741Sjmg kq->kq_count--; 1181133741Sjmg continue; 1182133741Sjmg } 1183133741Sjmg *kevp = kn->kn_kevent; 1184133741Sjmg KQ_LOCK(kq); 1185133741Sjmg if (kn->kn_flags & EV_CLEAR) { 1186133741Sjmg kn->kn_data = 0; 1187133741Sjmg kn->kn_fflags = 0; 1188133741Sjmg kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1189133741Sjmg kq->kq_count--; 1190133741Sjmg } else 1191133741Sjmg TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1192133741Sjmg KN_LIST_UNLOCK(kn); 1193133741Sjmg kn->kn_status &= ~(KN_INFLUX); 119459290Sjlemon } 1195133741Sjmg 1196133741Sjmg /* we are returning a copy to the user */ 1197133741Sjmg kevp++; 1198133741Sjmg nkev++; 119959290Sjlemon count--; 1200133741Sjmg 120159290Sjlemon if (nkev == KQ_NEVENTS) { 1202133741Sjmg KQ_UNLOCK_FLUX(kq); 1203133741Sjmg error = copyout(keva, ulistp, sizeof *keva * nkev); 120459290Sjlemon ulistp += nkev; 120559290Sjlemon nkev = 0; 1206133741Sjmg kevp = keva; 1207133741Sjmg KQ_LOCK(kq); 120859997Sjlemon if (error) 120959997Sjlemon break; 121059290Sjlemon } 121159290Sjlemon } 1212133794Sgreen TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 121359290Sjlemondone: 1214133741Sjmg KQ_OWNED(kq); 1215133741Sjmg KQ_UNLOCK_FLUX(kq); 1216133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1217133794Sgreen knote_free(marker); 1218133741Sjmgdone_nl: 1219133741Sjmg KQ_NOTOWNED(kq); 122059290Sjlemon if (nkev != 0) 1221133741Sjmg error = copyout(keva, ulistp, sizeof *keva * nkev); 1222133741Sjmg td->td_retval[0] = maxevents - count; 122359290Sjlemon return (error); 122459290Sjlemon} 122559290Sjlemon 122659290Sjlemon/* 122759290Sjlemon * XXX 122859290Sjlemon * This could be expanded to call kqueue_scan, if desired. 122959290Sjlemon */ 123059290Sjlemon/*ARGSUSED*/ 123159290Sjlemonstatic int 1232101941Srwatsonkqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 123383366Sjulian int flags, struct thread *td) 123459290Sjlemon{ 123559290Sjlemon return (ENXIO); 123659290Sjlemon} 123759290Sjlemon 123859290Sjlemon/*ARGSUSED*/ 123959290Sjlemonstatic int 1240101941Srwatsonkqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 124183366Sjulian int flags, struct thread *td) 124259290Sjlemon{ 124359290Sjlemon return (ENXIO); 124459290Sjlemon} 124559290Sjlemon 124659290Sjlemon/*ARGSUSED*/ 124759290Sjlemonstatic int 1248132138Salfredkqueue_ioctl(struct file *fp, u_long cmd, void *data, 1249102003Srwatson struct ucred *active_cred, struct thread *td) 125059290Sjlemon{ 1251132174Salfred /* 1252132174Salfred * Enabling sigio causes two major problems: 1253132174Salfred * 1) infinite recursion: 1254132174Salfred * Synopsys: kevent is being used to track signals and have FIOASYNC 1255132174Salfred * set. On receipt of a signal this will cause a kqueue to recurse 1256132174Salfred * into itself over and over. Sending the sigio causes the kqueue 1257132174Salfred * to become ready, which in turn posts sigio again, forever. 1258132174Salfred * Solution: this can be solved by setting a flag in the kqueue that 1259132174Salfred * we have a SIGIO in progress. 1260132174Salfred * 2) locking problems: 1261132174Salfred * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts 1262132174Salfred * us above the proc and pgrp locks. 1263132174Salfred * Solution: Post a signal using an async mechanism, being sure to 1264132174Salfred * record a generation count in the delivery so that we do not deliver 1265132174Salfred * a signal to the wrong process. 1266132174Salfred * 1267132174Salfred * Note, these two mechanisms are somewhat mutually exclusive! 1268132174Salfred */ 1269132174Salfred#if 0 1270132138Salfred struct kqueue *kq; 1271132138Salfred 1272132138Salfred kq = fp->f_data; 1273132138Salfred switch (cmd) { 1274132138Salfred case FIOASYNC: 1275132138Salfred if (*(int *)data) { 1276132138Salfred kq->kq_state |= KQ_ASYNC; 1277132138Salfred } else { 1278132138Salfred kq->kq_state &= ~KQ_ASYNC; 1279132138Salfred } 1280132138Salfred return (0); 1281132138Salfred 1282132138Salfred case FIOSETOWN: 1283132138Salfred return (fsetown(*(int *)data, &kq->kq_sigio)); 1284132138Salfred 1285132138Salfred case FIOGETOWN: 1286132138Salfred *(int *)data = fgetown(&kq->kq_sigio); 1287132138Salfred return (0); 1288132138Salfred } 1289132174Salfred#endif 1290132138Salfred 129159290Sjlemon return (ENOTTY); 129259290Sjlemon} 129359290Sjlemon 129459290Sjlemon/*ARGSUSED*/ 129559290Sjlemonstatic int 1296101983Srwatsonkqueue_poll(struct file *fp, int events, struct ucred *active_cred, 1297101987Srwatson struct thread *td) 129859290Sjlemon{ 129989306Salfred struct kqueue *kq; 130059290Sjlemon int revents = 0; 1301133741Sjmg int error; 130259290Sjlemon 1303133741Sjmg if ((error = kqueue_aquire(fp, &kq))) 1304133741Sjmg return POLLERR; 1305133741Sjmg 1306133741Sjmg KQ_LOCK(kq); 1307133741Sjmg if (events & (POLLIN | POLLRDNORM)) { 1308133741Sjmg if (kq->kq_count) { 1309133741Sjmg revents |= events & (POLLIN | POLLRDNORM); 131059290Sjlemon } else { 1311133741Sjmg selrecord(td, &kq->kq_sel); 131259290Sjlemon kq->kq_state |= KQ_SEL; 131359290Sjlemon } 131459290Sjlemon } 1315133741Sjmg kqueue_release(kq, 1); 1316133741Sjmg KQ_UNLOCK(kq); 131759290Sjlemon return (revents); 131859290Sjlemon} 131959290Sjlemon 132059290Sjlemon/*ARGSUSED*/ 132159290Sjlemonstatic int 1322101983Srwatsonkqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 1323101987Srwatson struct thread *td) 132459290Sjlemon{ 132559290Sjlemon 1326133741Sjmg return (ENXIO); 132759290Sjlemon} 132859290Sjlemon 132959290Sjlemon/*ARGSUSED*/ 133059290Sjlemonstatic int 133183366Sjuliankqueue_close(struct file *fp, struct thread *td) 133259290Sjlemon{ 1333109153Sdillon struct kqueue *kq = fp->f_data; 1334133741Sjmg struct filedesc *fdp; 1335133741Sjmg struct knote *kn; 133659290Sjlemon int i; 1337133741Sjmg int error; 133859290Sjlemon 1339133741Sjmg if ((error = kqueue_aquire(fp, &kq))) 1340133741Sjmg return error; 1341133741Sjmg 1342133741Sjmg KQ_LOCK(kq); 1343133741Sjmg 1344133741Sjmg KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, 1345133741Sjmg ("kqueue already closing")); 1346133741Sjmg kq->kq_state |= KQ_CLOSING; 1347133741Sjmg if (kq->kq_refcnt > 1) 1348133741Sjmg msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); 1349133741Sjmg 1350133741Sjmg KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); 1351133741Sjmg fdp = kq->kq_fdp; 1352133741Sjmg 1353133741Sjmg KASSERT(knlist_empty(&kq->kq_sel.si_note), 1354133741Sjmg ("kqueue's knlist not empty")); 1355133741Sjmg 1356133741Sjmg for (i = 0; i < kq->kq_knlistsize; i++) { 1357133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { 1358133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == 0, 1359133741Sjmg ("KN_INFLUX set when not suppose to be")); 1360133741Sjmg kn->kn_status |= KN_INFLUX; 1361133741Sjmg KQ_UNLOCK(kq); 1362134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1363134859Sjmg kn->kn_fop->f_detach(kn); 1364133741Sjmg knote_drop(kn, td); 1365133741Sjmg KQ_LOCK(kq); 136659290Sjlemon } 136759290Sjlemon } 1368133741Sjmg if (kq->kq_knhashmask != 0) { 1369133741Sjmg for (i = 0; i <= kq->kq_knhashmask; i++) { 1370133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { 1371133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == 0, 1372133741Sjmg ("KN_INFLUX set when not suppose to be")); 1373133741Sjmg kn->kn_status |= KN_INFLUX; 1374133741Sjmg KQ_UNLOCK(kq); 1375134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1376134859Sjmg kn->kn_fop->f_detach(kn); 1377133741Sjmg knote_drop(kn, td); 1378133741Sjmg KQ_LOCK(kq); 137959290Sjlemon } 138059290Sjlemon } 138159290Sjlemon } 1382133741Sjmg 1383133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { 1384133741Sjmg kq->kq_state |= KQ_TASKDRAIN; 1385133741Sjmg msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); 1386133741Sjmg } 1387133741Sjmg 1388133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 1389126033Sgreen kq->kq_state &= ~KQ_SEL; 1390126033Sgreen selwakeuppri(&kq->kq_sel, PSOCK); 1391126033Sgreen } 1392133741Sjmg 1393133741Sjmg KQ_UNLOCK(kq); 1394133741Sjmg 1395137647Sphk FILEDESC_LOCK_FAST(fdp); 1396133741Sjmg SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list); 1397137647Sphk FILEDESC_UNLOCK_FAST(fdp); 1398133741Sjmg 1399133741Sjmg knlist_destroy(&kq->kq_sel.si_note); 1400133741Sjmg mtx_destroy(&kq->kq_lock); 1401133741Sjmg kq->kq_fdp = NULL; 1402133741Sjmg 1403133741Sjmg if (kq->kq_knhash != NULL) 1404133741Sjmg free(kq->kq_knhash, M_KQUEUE); 1405133741Sjmg if (kq->kq_knlist != NULL) 1406133741Sjmg free(kq->kq_knlist, M_KQUEUE); 1407133741Sjmg 1408132138Salfred funsetown(&kq->kq_sigio); 140984138Sjlemon free(kq, M_KQUEUE); 1410109153Sdillon fp->f_data = NULL; 141159290Sjlemon 141259290Sjlemon return (0); 141359290Sjlemon} 141459290Sjlemon 141559290Sjlemonstatic void 141659290Sjlemonkqueue_wakeup(struct kqueue *kq) 141759290Sjlemon{ 1418133741Sjmg KQ_OWNED(kq); 141959290Sjlemon 1420133741Sjmg if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { 142159290Sjlemon kq->kq_state &= ~KQ_SLEEP; 142259290Sjlemon wakeup(kq); 142359290Sjlemon } 1424133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 142559290Sjlemon kq->kq_state &= ~KQ_SEL; 1426122352Stanimura selwakeuppri(&kq->kq_sel, PSOCK); 142759290Sjlemon } 1428133741Sjmg if (!knlist_empty(&kq->kq_sel.si_note)) 1429133741Sjmg kqueue_schedtask(kq); 1430133741Sjmg if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { 1431132138Salfred pgsigio(&kq->kq_sigio, SIGIO, 0); 1432132138Salfred } 143359290Sjlemon} 143459290Sjlemon 143559290Sjlemon/* 1436133741Sjmg * Walk down a list of knotes, activating them if their event has triggered. 1437133741Sjmg * 1438133741Sjmg * There is a possibility to optimize in the case of one kq watching another. 1439133741Sjmg * Instead of scheduling a task to wake it up, you could pass enough state 1440133741Sjmg * down the chain to make up the parent kqueue. Make this code functional 1441133741Sjmg * first. 144259290Sjlemon */ 144359290Sjlemonvoid 1444133741Sjmgknote(struct knlist *list, long hint, int islocked) 144559290Sjlemon{ 1446133741Sjmg struct kqueue *kq; 144759290Sjlemon struct knote *kn; 144859290Sjlemon 1449133741Sjmg if (list == NULL) 1450133741Sjmg return; 1451133741Sjmg 1452133741Sjmg mtx_assert(list->kl_lock, islocked ? MA_OWNED : MA_NOTOWNED); 1453133741Sjmg if (!islocked) 1454133741Sjmg mtx_lock(list->kl_lock); 1455133741Sjmg /* 1456133741Sjmg * If we unlock the list lock (and set KN_INFLUX), we can eliminate 1457133741Sjmg * the kqueue scheduling, but this will introduce four 1458133741Sjmg * lock/unlock's for each knote to test. If we do, continue to use 1459133741Sjmg * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is 1460133741Sjmg * only safe if you want to remove the current item, which we are 1461133741Sjmg * not doing. 1462133741Sjmg */ 1463133741Sjmg SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 1464133741Sjmg kq = kn->kn_kq; 1465133741Sjmg if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { 1466133741Sjmg KQ_LOCK(kq); 1467133741Sjmg if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { 1468133741Sjmg kn->kn_status |= KN_HASKQLOCK; 1469133741Sjmg if (kn->kn_fop->f_event(kn, hint)) 1470133741Sjmg KNOTE_ACTIVATE(kn, 1); 1471133741Sjmg kn->kn_status &= ~KN_HASKQLOCK; 1472133741Sjmg } 1473133741Sjmg KQ_UNLOCK(kq); 1474133741Sjmg } 1475133741Sjmg kq = NULL; 1476133741Sjmg } 1477133741Sjmg if (!islocked) 1478133741Sjmg mtx_unlock(list->kl_lock); 147959290Sjlemon} 148059290Sjlemon 148159290Sjlemon/* 1482133741Sjmg * add a knote to a knlist 1483133741Sjmg */ 1484133741Sjmgvoid 1485133741Sjmgknlist_add(struct knlist *knl, struct knote *kn, int islocked) 1486133741Sjmg{ 1487133741Sjmg mtx_assert(knl->kl_lock, islocked ? MA_OWNED : MA_NOTOWNED); 1488133741Sjmg KQ_NOTOWNED(kn->kn_kq); 1489133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == 1490133741Sjmg (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); 1491133741Sjmg if (!islocked) 1492133741Sjmg mtx_lock(knl->kl_lock); 1493133741Sjmg SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); 1494133741Sjmg if (!islocked) 1495133741Sjmg mtx_unlock(knl->kl_lock); 1496133741Sjmg KQ_LOCK(kn->kn_kq); 1497133741Sjmg kn->kn_knlist = knl; 1498133741Sjmg kn->kn_status &= ~KN_DETACHED; 1499133741Sjmg KQ_UNLOCK(kn->kn_kq); 1500133741Sjmg} 1501133741Sjmg 1502133741Sjmgstatic void 1503133741Sjmgknlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) 1504133741Sjmg{ 1505133741Sjmg KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); 1506133741Sjmg mtx_assert(knl->kl_lock, knlislocked ? MA_OWNED : MA_NOTOWNED); 1507133741Sjmg mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); 1508133741Sjmg if (!kqislocked) 1509133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, 1510133741Sjmg ("knlist_remove called w/o knote being KN_INFLUX or already removed")); 1511133741Sjmg if (!knlislocked) 1512133741Sjmg mtx_lock(knl->kl_lock); 1513133741Sjmg SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); 1514133741Sjmg kn->kn_knlist = NULL; 1515133741Sjmg if (!knlislocked) 1516133741Sjmg mtx_unlock(knl->kl_lock); 1517133741Sjmg if (!kqislocked) 1518133741Sjmg KQ_LOCK(kn->kn_kq); 1519133741Sjmg kn->kn_status |= KN_DETACHED; 1520133741Sjmg if (!kqislocked) 1521133741Sjmg KQ_UNLOCK(kn->kn_kq); 1522133741Sjmg} 1523133741Sjmg 1524133741Sjmg/* 152559290Sjlemon * remove all knotes from a specified klist 152659290Sjlemon */ 152759290Sjlemonvoid 1528133741Sjmgknlist_remove(struct knlist *knl, struct knote *kn, int islocked) 152959290Sjlemon{ 1530133741Sjmg 1531133741Sjmg knlist_remove_kq(knl, kn, islocked, 0); 1532133741Sjmg} 1533133741Sjmg 1534133741Sjmg/* 1535133741Sjmg * remove knote from a specified klist while in f_event handler. 1536133741Sjmg */ 1537133741Sjmgvoid 1538133741Sjmgknlist_remove_inevent(struct knlist *knl, struct knote *kn) 1539133741Sjmg{ 1540133741Sjmg 1541133741Sjmg knlist_remove_kq(knl, kn, 1, 1542133741Sjmg (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); 1543133741Sjmg} 1544133741Sjmg 1545133741Sjmgint 1546133741Sjmgknlist_empty(struct knlist *knl) 1547133741Sjmg{ 1548133741Sjmg 1549133741Sjmg mtx_assert(knl->kl_lock, MA_OWNED); 1550133741Sjmg return SLIST_EMPTY(&knl->kl_list); 1551133741Sjmg} 1552133741Sjmg 1553133741Sjmgstatic struct mtx knlist_lock; 1554133741SjmgMTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", 1555133741Sjmg MTX_DEF); 1556133741Sjmg 1557133741Sjmgvoid 1558133741Sjmgknlist_init(struct knlist *knl, struct mtx *mtx) 1559133741Sjmg{ 1560133741Sjmg 1561133741Sjmg if (mtx == NULL) 1562133741Sjmg knl->kl_lock = &knlist_lock; 1563133741Sjmg else 1564133741Sjmg knl->kl_lock = mtx; 1565133741Sjmg 1566133741Sjmg SLIST_INIT(&knl->kl_list); 1567133741Sjmg} 1568133741Sjmg 1569133741Sjmgvoid 1570133741Sjmgknlist_destroy(struct knlist *knl) 1571133741Sjmg{ 1572133741Sjmg 1573133741Sjmg#ifdef INVARIANTS 1574133741Sjmg /* 1575133741Sjmg * if we run across this error, we need to find the offending 1576133741Sjmg * driver and have it call knlist_clear. 1577133741Sjmg */ 1578133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) 1579133741Sjmg printf("WARNING: destroying knlist w/ knotes on it!\n"); 1580133741Sjmg#endif 1581133741Sjmg 1582133741Sjmg knl->kl_lock = NULL; 1583133741Sjmg SLIST_INIT(&knl->kl_list); 1584133741Sjmg} 1585133741Sjmg 1586133741Sjmg/* 1587133741Sjmg * Even if we are locked, we may need to drop the lock to allow any influx 1588133741Sjmg * knotes time to "settle". 1589133741Sjmg */ 1590133741Sjmgvoid 1591133741Sjmgknlist_clear(struct knlist *knl, int islocked) 1592133741Sjmg{ 159359290Sjlemon struct knote *kn; 1594133741Sjmg struct kqueue *kq; 159559290Sjlemon 1596133741Sjmg if (islocked) 1597133741Sjmg mtx_assert(knl->kl_lock, MA_OWNED); 1598133741Sjmg else { 1599133741Sjmg mtx_assert(knl->kl_lock, MA_NOTOWNED); 1600133741Sjmgagain: /* need to reaquire lock since we have dropped it */ 1601133741Sjmg mtx_lock(knl->kl_lock); 160259290Sjlemon } 1603133741Sjmg 1604133741Sjmg SLIST_FOREACH(kn, &knl->kl_list, kn_selnext) { 1605133741Sjmg kq = kn->kn_kq; 1606133741Sjmg KQ_LOCK(kq); 1607133741Sjmg if ((kn->kn_status & KN_INFLUX) && 1608133741Sjmg (kn->kn_status & KN_DETACHED) != KN_DETACHED) { 1609133741Sjmg KQ_UNLOCK(kq); 1610133741Sjmg continue; 1611133741Sjmg } 1612133741Sjmg /* Make sure cleared knotes disappear soon */ 1613133741Sjmg kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1614133741Sjmg knlist_remove_kq(knl, kn, 1, 1); 1615133741Sjmg KQ_UNLOCK(kq); 1616133741Sjmg kq = NULL; 1617133741Sjmg } 1618133741Sjmg 1619133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) { 1620133741Sjmg /* there are still KN_INFLUX remaining */ 1621133741Sjmg kn = SLIST_FIRST(&knl->kl_list); 1622133741Sjmg kq = kn->kn_kq; 1623133741Sjmg KQ_LOCK(kq); 1624133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, 1625133741Sjmg ("knote removed w/o list lock")); 1626133741Sjmg mtx_unlock(knl->kl_lock); 1627133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 1628133741Sjmg msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); 1629133741Sjmg kq = NULL; 1630133741Sjmg goto again; 1631133741Sjmg } 1632133741Sjmg 1633133741Sjmg SLIST_INIT(&knl->kl_list); 1634133741Sjmg 1635133741Sjmg if (islocked) 1636133741Sjmg mtx_assert(knl->kl_lock, MA_OWNED); 1637133741Sjmg else { 1638133741Sjmg mtx_unlock(knl->kl_lock); 1639133741Sjmg mtx_assert(knl->kl_lock, MA_NOTOWNED); 1640133741Sjmg } 164159290Sjlemon} 164259290Sjlemon 164359290Sjlemon/* 164459290Sjlemon * remove all knotes referencing a specified fd 1645133741Sjmg * must be called with FILEDESC lock. This prevents a race where a new fd 1646133741Sjmg * comes along and occupies the entry and we attach a knote to the fd. 164759290Sjlemon */ 164859290Sjlemonvoid 164983366Sjulianknote_fdclose(struct thread *td, int fd) 165059290Sjlemon{ 165183366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 1652133741Sjmg struct kqueue *kq; 1653133741Sjmg struct knote *kn; 1654133741Sjmg int influx; 165559290Sjlemon 1656133741Sjmg FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1657133741Sjmg 1658133741Sjmg /* 1659133741Sjmg * We shouldn't have to worry about new kevents appearing on fd 1660133741Sjmg * since filedesc is locked. 1661133741Sjmg */ 1662133741Sjmg SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) { 1663133741Sjmg KQ_LOCK(kq); 1664133741Sjmg 1665133741Sjmgagain: 1666133741Sjmg influx = 0; 1667133741Sjmg while (kq->kq_knlistsize > fd && 1668133741Sjmg (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { 1669133741Sjmg if (kn->kn_status & KN_INFLUX) { 1670133741Sjmg /* someone else might be waiting on our knote */ 1671133741Sjmg if (influx) 1672133741Sjmg wakeup(kq); 1673133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 1674133741Sjmg msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); 1675133741Sjmg goto again; 1676133741Sjmg } 1677133741Sjmg kn->kn_status |= KN_INFLUX; 1678133741Sjmg KQ_UNLOCK(kq); 1679134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1680134859Sjmg kn->kn_fop->f_detach(kn); 1681133741Sjmg knote_drop(kn, td); 1682133741Sjmg influx = 1; 1683133741Sjmg KQ_LOCK(kq); 1684133741Sjmg } 1685133741Sjmg KQ_UNLOCK_FLUX(kq); 1686133741Sjmg } 168759290Sjlemon} 168859290Sjlemon 1689133741Sjmgstatic int 1690133741Sjmgknote_attach(struct knote *kn, struct kqueue *kq) 169159290Sjlemon{ 1692133741Sjmg struct klist *list; 169359290Sjlemon 1694133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); 1695133741Sjmg KQ_OWNED(kq); 169689306Salfred 1697133741Sjmg if (kn->kn_fop->f_isfd) { 1698133741Sjmg if (kn->kn_id >= kq->kq_knlistsize) 1699133741Sjmg return ENOMEM; 1700133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 1701133741Sjmg } else { 1702133741Sjmg if (kq->kq_knhash == NULL) 1703133741Sjmg return ENOMEM; 1704133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 170559290Sjlemon } 170659290Sjlemon 170759290Sjlemon SLIST_INSERT_HEAD(list, kn, kn_link); 1708133741Sjmg 1709133741Sjmg return 0; 171059290Sjlemon} 171159290Sjlemon 171259290Sjlemon/* 1713133741Sjmg * knote must already have been detatched using the f_detach method. 1714133741Sjmg * no lock need to be held, it is assumed that the KN_INFLUX flag is set 1715133741Sjmg * to prevent other removal. 171659290Sjlemon */ 171759290Sjlemonstatic void 171883366Sjulianknote_drop(struct knote *kn, struct thread *td) 171959290Sjlemon{ 1720133741Sjmg struct kqueue *kq; 172159290Sjlemon struct klist *list; 172259290Sjlemon 1723133741Sjmg kq = kn->kn_kq; 1724133741Sjmg 1725133741Sjmg KQ_NOTOWNED(kq); 1726133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, 1727133741Sjmg ("knote_drop called without KN_INFLUX set in kn_status")); 1728133741Sjmg 1729133741Sjmg KQ_LOCK(kq); 173059290Sjlemon if (kn->kn_fop->f_isfd) 1731133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 173259290Sjlemon else 1733133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 173459290Sjlemon 173560938Sjake SLIST_REMOVE(list, kn, knote, kn_link); 173659290Sjlemon if (kn->kn_status & KN_QUEUED) 173759290Sjlemon knote_dequeue(kn); 1738133741Sjmg KQ_UNLOCK_FLUX(kq); 1739133741Sjmg 1740133741Sjmg if (kn->kn_fop->f_isfd) { 1741133741Sjmg fdrop(kn->kn_fp, td); 1742133741Sjmg kn->kn_fp = NULL; 1743133741Sjmg } 1744133741Sjmg kqueue_fo_release(kn->kn_kevent.filter); 1745133741Sjmg kn->kn_fop = NULL; 174659290Sjlemon knote_free(kn); 174759290Sjlemon} 174859290Sjlemon 174959290Sjlemonstatic void 175059290Sjlemonknote_enqueue(struct knote *kn) 175159290Sjlemon{ 175259290Sjlemon struct kqueue *kq = kn->kn_kq; 175359290Sjlemon 1754133741Sjmg KQ_OWNED(kn->kn_kq); 175559997Sjlemon KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 175659997Sjlemon 1757133590Srwatson TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 175859290Sjlemon kn->kn_status |= KN_QUEUED; 175959290Sjlemon kq->kq_count++; 176059290Sjlemon kqueue_wakeup(kq); 176159290Sjlemon} 176259290Sjlemon 176359290Sjlemonstatic void 176459290Sjlemonknote_dequeue(struct knote *kn) 176559290Sjlemon{ 176659290Sjlemon struct kqueue *kq = kn->kn_kq; 176759290Sjlemon 1768133741Sjmg KQ_OWNED(kn->kn_kq); 176959997Sjlemon KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 177059997Sjlemon 1771133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 177259290Sjlemon kn->kn_status &= ~KN_QUEUED; 177359290Sjlemon kq->kq_count--; 177459290Sjlemon} 177559290Sjlemon 177659290Sjlemonstatic void 177759290Sjlemonknote_init(void) 177859290Sjlemon{ 1779133741Sjmg 178092751Sjeff knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 178192751Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 178259290Sjlemon} 178359290SjlemonSYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 178459290Sjlemon 178559290Sjlemonstatic struct knote * 1786133741Sjmgknote_alloc(int waitok) 178759290Sjlemon{ 1788133741Sjmg return ((struct knote *)uma_zalloc(knote_zone, 1789133741Sjmg (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO)); 179059290Sjlemon} 179159290Sjlemon 179259290Sjlemonstatic void 179359290Sjlemonknote_free(struct knote *kn) 179459290Sjlemon{ 1795133741Sjmg if (kn != NULL) 1796133741Sjmg uma_zfree(knote_zone, kn); 179759290Sjlemon} 1798