159290Sjlemon/*- 272969Sjlemon * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3133741Sjmg * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org> 4197240Ssson * Copyright (c) 2009 Apple, Inc. 559290Sjlemon * All rights reserved. 659290Sjlemon * 759290Sjlemon * Redistribution and use in source and binary forms, with or without 859290Sjlemon * modification, are permitted provided that the following conditions 959290Sjlemon * are met: 1059290Sjlemon * 1. Redistributions of source code must retain the above copyright 1159290Sjlemon * notice, this list of conditions and the following disclaimer. 1259290Sjlemon * 2. Redistributions in binary form must reproduce the above copyright 1359290Sjlemon * notice, this list of conditions and the following disclaimer in the 1459290Sjlemon * documentation and/or other materials provided with the distribution. 1559290Sjlemon * 1659290Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1759290Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1859290Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1959290Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2059290Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2159290Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2259290Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2359290Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2459290Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2559290Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2659290Sjlemon * SUCH DAMAGE. 2759290Sjlemon */ 2859290Sjlemon 29116182Sobrien#include <sys/cdefs.h> 30116182Sobrien__FBSDID("$FreeBSD$"); 31116182Sobrien 32162592Sjmg#include "opt_ktrace.h" 33162592Sjmg 3459290Sjlemon#include <sys/param.h> 3559290Sjlemon#include <sys/systm.h> 36224778Srwatson#include <sys/capability.h> 3759290Sjlemon#include <sys/kernel.h> 3876166Smarkm#include <sys/lock.h> 3976166Smarkm#include <sys/mutex.h> 4059290Sjlemon#include <sys/proc.h> 41132138Salfred#include <sys/malloc.h> 4259290Sjlemon#include <sys/unistd.h> 4359290Sjlemon#include <sys/file.h> 44108524Salfred#include <sys/filedesc.h> 45132138Salfred#include <sys/filio.h> 4659290Sjlemon#include <sys/fcntl.h> 47133741Sjmg#include <sys/kthread.h> 4870834Swollman#include <sys/selinfo.h> 4959290Sjlemon#include <sys/queue.h> 5059290Sjlemon#include <sys/event.h> 5159290Sjlemon#include <sys/eventvar.h> 5259290Sjlemon#include <sys/poll.h> 5359290Sjlemon#include <sys/protosw.h> 54132138Salfred#include <sys/sigio.h> 55132138Salfred#include <sys/signalvar.h> 5659290Sjlemon#include <sys/socket.h> 5759290Sjlemon#include <sys/socketvar.h> 5859290Sjlemon#include <sys/stat.h> 5984138Sjlemon#include <sys/sysctl.h> 6059290Sjlemon#include <sys/sysproto.h> 61142934Sps#include <sys/syscallsubr.h> 62133741Sjmg#include <sys/taskqueue.h> 6359290Sjlemon#include <sys/uio.h> 64162592Sjmg#ifdef KTRACE 65162592Sjmg#include <sys/ktrace.h> 66162592Sjmg#endif 6759290Sjlemon 6892751Sjeff#include <vm/uma.h> 6959290Sjlemon 70141616Sphkstatic MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 71141616Sphk 72133741Sjmg/* 73133741Sjmg * This lock is used if multiple kq locks are required. This possibly 74133741Sjmg * should be made into a per proc lock. 75133741Sjmg */ 76133741Sjmgstatic struct mtx kq_global; 77133741SjmgMTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); 78133741Sjmg#define KQ_GLOBAL_LOCK(lck, haslck) do { \ 79133741Sjmg if (!haslck) \ 80133741Sjmg mtx_lock(lck); \ 81133741Sjmg haslck = 1; \ 82133741Sjmg} while (0) 83133741Sjmg#define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ 84133741Sjmg if (haslck) \ 85133741Sjmg mtx_unlock(lck); \ 86133741Sjmg haslck = 0; \ 87133741Sjmg} while (0) 8884138Sjlemon 89133741SjmgTASKQUEUE_DEFINE_THREAD(kqueue); 90133741Sjmg 91146950Spsstatic int kevent_copyout(void *arg, struct kevent *kevp, int count); 92146950Spsstatic int kevent_copyin(void *arg, struct kevent *kevp, int count); 93162594Sjmgstatic int kqueue_register(struct kqueue *kq, struct kevent *kev, 94162594Sjmg struct thread *td, int waitok); 95170029Srwatsonstatic int kqueue_acquire(struct file *fp, struct kqueue **kqp); 96133741Sjmgstatic void kqueue_release(struct kqueue *kq, int locked); 97133741Sjmgstatic int kqueue_expand(struct kqueue *kq, struct filterops *fops, 98133741Sjmg uintptr_t ident, int waitok); 99133741Sjmgstatic void kqueue_task(void *arg, int pending); 100133741Sjmgstatic int kqueue_scan(struct kqueue *kq, int maxevents, 101146950Sps struct kevent_copyops *k_ops, 102146950Sps const struct timespec *timeout, 103146950Sps struct kevent *keva, struct thread *td); 10459290Sjlemonstatic void kqueue_wakeup(struct kqueue *kq); 105133741Sjmgstatic struct filterops *kqueue_fo_find(int filt); 106133741Sjmgstatic void kqueue_fo_release(int filt); 10759290Sjlemon 108108255Sphkstatic fo_rdwr_t kqueue_read; 109108255Sphkstatic fo_rdwr_t kqueue_write; 110175140Sjhbstatic fo_truncate_t kqueue_truncate; 111108255Sphkstatic fo_ioctl_t kqueue_ioctl; 112108255Sphkstatic fo_poll_t kqueue_poll; 113108255Sphkstatic fo_kqfilter_t kqueue_kqfilter; 114108255Sphkstatic fo_stat_t kqueue_stat; 115108255Sphkstatic fo_close_t kqueue_close; 116108238Sphk 11772521Sjlemonstatic struct fileops kqueueops = { 118116546Sphk .fo_read = kqueue_read, 119116546Sphk .fo_write = kqueue_write, 120175140Sjhb .fo_truncate = kqueue_truncate, 121116546Sphk .fo_ioctl = kqueue_ioctl, 122116546Sphk .fo_poll = kqueue_poll, 123116546Sphk .fo_kqfilter = kqueue_kqfilter, 124116546Sphk .fo_stat = kqueue_stat, 125116546Sphk .fo_close = kqueue_close, 126224914Skib .fo_chmod = invfo_chmod, 127224914Skib .fo_chown = invfo_chown, 12872521Sjlemon}; 12972521Sjlemon 130133741Sjmgstatic int knote_attach(struct knote *kn, struct kqueue *kq); 13183366Sjulianstatic void knote_drop(struct knote *kn, struct thread *td); 13259290Sjlemonstatic void knote_enqueue(struct knote *kn); 13359290Sjlemonstatic void knote_dequeue(struct knote *kn); 13459290Sjlemonstatic void knote_init(void); 135133741Sjmgstatic struct knote *knote_alloc(int waitok); 13659290Sjlemonstatic void knote_free(struct knote *kn); 13759290Sjlemon 13872521Sjlemonstatic void filt_kqdetach(struct knote *kn); 13972521Sjlemonstatic int filt_kqueue(struct knote *kn, long hint); 14072521Sjlemonstatic int filt_procattach(struct knote *kn); 14172521Sjlemonstatic void filt_procdetach(struct knote *kn); 14272521Sjlemonstatic int filt_proc(struct knote *kn, long hint); 14372521Sjlemonstatic int filt_fileattach(struct knote *kn); 14479989Sjlemonstatic void filt_timerexpire(void *knx); 14579989Sjlemonstatic int filt_timerattach(struct knote *kn); 14679989Sjlemonstatic void filt_timerdetach(struct knote *kn); 14779989Sjlemonstatic int filt_timer(struct knote *kn, long hint); 148197241Sssonstatic int filt_userattach(struct knote *kn); 149197241Sssonstatic void filt_userdetach(struct knote *kn); 150197241Sssonstatic int filt_user(struct knote *kn, long hint); 151197294Srdivackystatic void filt_usertouch(struct knote *kn, struct kevent *kev, 152197407Srdivacky u_long type); 15372521Sjlemon 154197134Srwatsonstatic struct filterops file_filtops = { 155197134Srwatson .f_isfd = 1, 156197134Srwatson .f_attach = filt_fileattach, 157197134Srwatson}; 158197134Srwatsonstatic struct filterops kqread_filtops = { 159197134Srwatson .f_isfd = 1, 160197134Srwatson .f_detach = filt_kqdetach, 161197134Srwatson .f_event = filt_kqueue, 162197134Srwatson}; 163133741Sjmg/* XXX - move to kern_proc.c? */ 164197134Srwatsonstatic struct filterops proc_filtops = { 165197134Srwatson .f_isfd = 0, 166197134Srwatson .f_attach = filt_procattach, 167197134Srwatson .f_detach = filt_procdetach, 168197134Srwatson .f_event = filt_proc, 169197134Srwatson}; 170197134Srwatsonstatic struct filterops timer_filtops = { 171197134Srwatson .f_isfd = 0, 172197134Srwatson .f_attach = filt_timerattach, 173197134Srwatson .f_detach = filt_timerdetach, 174197134Srwatson .f_event = filt_timer, 175197134Srwatson}; 176197241Sssonstatic struct filterops user_filtops = { 177197241Ssson .f_attach = filt_userattach, 178197241Ssson .f_detach = filt_userdetach, 179197241Ssson .f_event = filt_user, 180197241Ssson .f_touch = filt_usertouch, 181197241Ssson}; 18272521Sjlemon 18392751Sjeffstatic uma_zone_t knote_zone; 18484138Sjlemonstatic int kq_ncallouts = 0; 18584138Sjlemonstatic int kq_calloutmax = (4 * 1024); 18684138SjlemonSYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 18784138Sjlemon &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 18859290Sjlemon 189133741Sjmg/* XXX - ensure not KN_INFLUX?? */ 190133741Sjmg#define KNOTE_ACTIVATE(kn, islock) do { \ 191133741Sjmg if ((islock)) \ 192133741Sjmg mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ 193133741Sjmg else \ 194133741Sjmg KQ_LOCK((kn)->kn_kq); \ 195133741Sjmg (kn)->kn_status |= KN_ACTIVE; \ 196133741Sjmg if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 197133741Sjmg knote_enqueue((kn)); \ 198133741Sjmg if (!(islock)) \ 199133741Sjmg KQ_UNLOCK((kn)->kn_kq); \ 20059290Sjlemon} while(0) 201133741Sjmg#define KQ_LOCK(kq) do { \ 202133741Sjmg mtx_lock(&(kq)->kq_lock); \ 203133741Sjmg} while (0) 204133741Sjmg#define KQ_FLUX_WAKEUP(kq) do { \ 205133741Sjmg if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ 206133741Sjmg (kq)->kq_state &= ~KQ_FLUXWAIT; \ 207133741Sjmg wakeup((kq)); \ 208133741Sjmg } \ 209133741Sjmg} while (0) 210133741Sjmg#define KQ_UNLOCK_FLUX(kq) do { \ 211133741Sjmg KQ_FLUX_WAKEUP(kq); \ 212133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 213133741Sjmg} while (0) 214133741Sjmg#define KQ_UNLOCK(kq) do { \ 215133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 216133741Sjmg} while (0) 217133741Sjmg#define KQ_OWNED(kq) do { \ 218133741Sjmg mtx_assert(&(kq)->kq_lock, MA_OWNED); \ 219133741Sjmg} while (0) 220133741Sjmg#define KQ_NOTOWNED(kq) do { \ 221133741Sjmg mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ 222133741Sjmg} while (0) 223133741Sjmg#define KN_LIST_LOCK(kn) do { \ 224133741Sjmg if (kn->kn_knlist != NULL) \ 225147730Sssouhlal kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ 226133741Sjmg} while (0) 227133741Sjmg#define KN_LIST_UNLOCK(kn) do { \ 228147730Sssouhlal if (kn->kn_knlist != NULL) \ 229147730Sssouhlal kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ 230133741Sjmg} while (0) 231147730Sssouhlal#define KNL_ASSERT_LOCK(knl, islocked) do { \ 232147730Sssouhlal if (islocked) \ 233147730Sssouhlal KNL_ASSERT_LOCKED(knl); \ 234147730Sssouhlal else \ 235147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); \ 236147730Sssouhlal} while (0) 237147730Sssouhlal#ifdef INVARIANTS 238147730Sssouhlal#define KNL_ASSERT_LOCKED(knl) do { \ 239193951Skib knl->kl_assert_locked((knl)->kl_lockarg); \ 240147730Sssouhlal} while (0) 241193951Skib#define KNL_ASSERT_UNLOCKED(knl) do { \ 242193951Skib knl->kl_assert_unlocked((knl)->kl_lockarg); \ 243147730Sssouhlal} while (0) 244147730Sssouhlal#else /* !INVARIANTS */ 245147730Sssouhlal#define KNL_ASSERT_LOCKED(knl) do {} while(0) 246147730Sssouhlal#define KNL_ASSERT_UNLOCKED(knl) do {} while (0) 247147730Sssouhlal#endif /* INVARIANTS */ 24859290Sjlemon 24959290Sjlemon#define KN_HASHSIZE 64 /* XXX should be tunable */ 25059290Sjlemon#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 25159290Sjlemon 25288633Salfredstatic int 25388633Salfredfilt_nullattach(struct knote *kn) 25488633Salfred{ 25588633Salfred 25688633Salfred return (ENXIO); 25788633Salfred}; 25888633Salfred 259197134Srwatsonstruct filterops null_filtops = { 260197134Srwatson .f_isfd = 0, 261197134Srwatson .f_attach = filt_nullattach, 262197134Srwatson}; 26388633Salfred 264133741Sjmg/* XXX - make SYSINIT to add these, and move into respective modules. */ 26559290Sjlemonextern struct filterops sig_filtops; 266131562Salfredextern struct filterops fs_filtops; 26759290Sjlemon 26859290Sjlemon/* 26972521Sjlemon * Table for for all system-defined filters. 27059290Sjlemon */ 271133741Sjmgstatic struct mtx filterops_lock; 272133741SjmgMTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", 273133741Sjmg MTX_DEF); 274133741Sjmgstatic struct { 275133741Sjmg struct filterops *for_fop; 276133741Sjmg int for_refcnt; 277133741Sjmg} sysfilt_ops[EVFILT_SYSCOUNT] = { 278133741Sjmg { &file_filtops }, /* EVFILT_READ */ 279133741Sjmg { &file_filtops }, /* EVFILT_WRITE */ 280133741Sjmg { &null_filtops }, /* EVFILT_AIO */ 281133741Sjmg { &file_filtops }, /* EVFILT_VNODE */ 282133741Sjmg { &proc_filtops }, /* EVFILT_PROC */ 283133741Sjmg { &sig_filtops }, /* EVFILT_SIGNAL */ 284133741Sjmg { &timer_filtops }, /* EVFILT_TIMER */ 285201350Sbrooks { &null_filtops }, /* former EVFILT_NETDEV */ 286133741Sjmg { &fs_filtops }, /* EVFILT_FS */ 287151260Sambrisko { &null_filtops }, /* EVFILT_LIO */ 288197241Ssson { &user_filtops }, /* EVFILT_USER */ 28959290Sjlemon}; 29059290Sjlemon 291133741Sjmg/* 292133741Sjmg * Simple redirection for all cdevsw style objects to call their fo_kqfilter 293133741Sjmg * method. 294133741Sjmg */ 29559290Sjlemonstatic int 29672521Sjlemonfilt_fileattach(struct knote *kn) 29759290Sjlemon{ 298133635Sjmg 29972521Sjlemon return (fo_kqfilter(kn->kn_fp, kn)); 30059290Sjlemon} 30159290Sjlemon 30272521Sjlemon/*ARGSUSED*/ 30359290Sjlemonstatic int 30472521Sjlemonkqueue_kqfilter(struct file *fp, struct knote *kn) 30559290Sjlemon{ 306109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 30759290Sjlemon 30872521Sjlemon if (kn->kn_filter != EVFILT_READ) 309133741Sjmg return (EINVAL); 31059290Sjlemon 311133741Sjmg kn->kn_status |= KN_KQUEUE; 31272521Sjlemon kn->kn_fop = &kqread_filtops; 313133741Sjmg knlist_add(&kq->kq_sel.si_note, kn, 0); 314133741Sjmg 31559290Sjlemon return (0); 31659290Sjlemon} 31759290Sjlemon 31859290Sjlemonstatic void 31959290Sjlemonfilt_kqdetach(struct knote *kn) 32059290Sjlemon{ 321109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 32259290Sjlemon 323133741Sjmg knlist_remove(&kq->kq_sel.si_note, kn, 0); 32459290Sjlemon} 32559290Sjlemon 32659290Sjlemon/*ARGSUSED*/ 32759290Sjlemonstatic int 32859290Sjlemonfilt_kqueue(struct knote *kn, long hint) 32959290Sjlemon{ 330109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 33159290Sjlemon 33259290Sjlemon kn->kn_data = kq->kq_count; 33359290Sjlemon return (kn->kn_data > 0); 33459290Sjlemon} 33559290Sjlemon 336133741Sjmg/* XXX - move to kern_proc.c? */ 33759290Sjlemonstatic int 33859290Sjlemonfilt_procattach(struct knote *kn) 33959290Sjlemon{ 34059290Sjlemon struct proc *p; 341113377Skbyanc int immediate; 34275451Srwatson int error; 34359290Sjlemon 344113377Skbyanc immediate = 0; 34559290Sjlemon p = pfind(kn->kn_id); 346113377Skbyanc if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 347113377Skbyanc p = zpfind(kn->kn_id); 348113377Skbyanc immediate = 1; 349133741Sjmg } else if (p != NULL && (p->p_flag & P_WEXIT)) { 350133741Sjmg immediate = 1; 351113377Skbyanc } 352133741Sjmg 353122019Scognet if (p == NULL) 354122019Scognet return (ESRCH); 355203875Skib if ((error = p_cansee(curthread, p))) { 356203875Skib PROC_UNLOCK(p); 35775451Srwatson return (error); 358203875Skib } 35959290Sjlemon 36059290Sjlemon kn->kn_ptr.p_proc = p; 36159290Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 36259290Sjlemon 36359290Sjlemon /* 36459290Sjlemon * internal flag indicating registration done by kernel 36559290Sjlemon */ 36659290Sjlemon if (kn->kn_flags & EV_FLAG1) { 36759290Sjlemon kn->kn_data = kn->kn_sdata; /* ppid */ 36859290Sjlemon kn->kn_fflags = NOTE_CHILD; 36959290Sjlemon kn->kn_flags &= ~EV_FLAG1; 37059290Sjlemon } 37159290Sjlemon 372122686Scognet if (immediate == 0) 373133741Sjmg knlist_add(&p->p_klist, kn, 1); 374113377Skbyanc 375113377Skbyanc /* 376113377Skbyanc * Immediately activate any exit notes if the target process is a 377113377Skbyanc * zombie. This is necessary to handle the case where the target 378113377Skbyanc * process, e.g. a child, dies before the kevent is registered. 379113377Skbyanc */ 380113377Skbyanc if (immediate && filt_proc(kn, NOTE_EXIT)) 381133741Sjmg KNOTE_ACTIVATE(kn, 0); 382113377Skbyanc 38371500Sjhb PROC_UNLOCK(p); 38459290Sjlemon 38559290Sjlemon return (0); 38659290Sjlemon} 38759290Sjlemon 38859290Sjlemon/* 38959290Sjlemon * The knote may be attached to a different process, which may exit, 39059290Sjlemon * leaving nothing for the knote to be attached to. So when the process 39159290Sjlemon * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 39259290Sjlemon * it will be deleted when read out. However, as part of the knote deletion, 39359290Sjlemon * this routine is called, so a check is needed to avoid actually performing 39459290Sjlemon * a detach, because the original process does not exist any more. 39559290Sjlemon */ 396133741Sjmg/* XXX - move to kern_proc.c? */ 39759290Sjlemonstatic void 39859290Sjlemonfilt_procdetach(struct knote *kn) 39959290Sjlemon{ 400133741Sjmg struct proc *p; 40159290Sjlemon 402133741Sjmg p = kn->kn_ptr.p_proc; 403133741Sjmg knlist_remove(&p->p_klist, kn, 0); 404133741Sjmg kn->kn_ptr.p_proc = NULL; 40559290Sjlemon} 40659290Sjlemon 407133741Sjmg/* XXX - move to kern_proc.c? */ 40859290Sjlemonstatic int 40959290Sjlemonfilt_proc(struct knote *kn, long hint) 41059290Sjlemon{ 411133741Sjmg struct proc *p = kn->kn_ptr.p_proc; 41259290Sjlemon u_int event; 41359290Sjlemon 41459290Sjlemon /* 41559290Sjlemon * mask off extra data 41659290Sjlemon */ 41759290Sjlemon event = (u_int)hint & NOTE_PCTRLMASK; 41859290Sjlemon 41959290Sjlemon /* 42059290Sjlemon * if the user is interested in this event, record it. 42159290Sjlemon */ 42259290Sjlemon if (kn->kn_sfflags & event) 42359290Sjlemon kn->kn_fflags |= event; 42459290Sjlemon 42559290Sjlemon /* 42659290Sjlemon * process is gone, so flag the event as finished. 42759290Sjlemon */ 42859290Sjlemon if (event == NOTE_EXIT) { 429133741Sjmg if (!(kn->kn_status & KN_DETACHED)) 430133741Sjmg knlist_remove_inevent(&p->p_klist, kn); 431133590Srwatson kn->kn_flags |= (EV_EOF | EV_ONESHOT); 432133741Sjmg kn->kn_ptr.p_proc = NULL; 433257759Sjhb if (kn->kn_fflags & NOTE_EXIT) 434257759Sjhb kn->kn_data = p->p_xstat; 435257759Sjhb if (kn->kn_fflags == 0) 436257759Sjhb kn->kn_flags |= EV_DROP; 43759290Sjlemon return (1); 43859290Sjlemon } 43959290Sjlemon 440180340Skib return (kn->kn_fflags != 0); 441180340Skib} 44259290Sjlemon 443180340Skib/* 444180340Skib * Called when the process forked. It mostly does the same as the 445180340Skib * knote(), activating all knotes registered to be activated when the 446180340Skib * process forked. Additionally, for each knote attached to the 447180340Skib * parent, check whether user wants to track the new process. If so 448180340Skib * attach a new knote to it, and immediately report an event with the 449180340Skib * child's pid. 450180340Skib */ 451180340Skibvoid 452180340Skibknote_fork(struct knlist *list, int pid) 453180340Skib{ 454180340Skib struct kqueue *kq; 455180340Skib struct knote *kn; 456180340Skib struct kevent kev; 457180340Skib int error; 458180340Skib 459180340Skib if (list == NULL) 460180340Skib return; 461180340Skib list->kl_lock(list->kl_lockarg); 462180340Skib 463180340Skib SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 464180340Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) 465180340Skib continue; 466180340Skib kq = kn->kn_kq; 467180340Skib KQ_LOCK(kq); 468264369Skib if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { 469180340Skib KQ_UNLOCK(kq); 470180340Skib continue; 471180340Skib } 472180340Skib 47359290Sjlemon /* 474180340Skib * The same as knote(), activate the event. 47559290Sjlemon */ 476180340Skib if ((kn->kn_sfflags & NOTE_TRACK) == 0) { 477180340Skib kn->kn_status |= KN_HASKQLOCK; 478257763Sjhb if (kn->kn_fop->f_event(kn, NOTE_FORK)) 479180340Skib KNOTE_ACTIVATE(kn, 1); 480180340Skib kn->kn_status &= ~KN_HASKQLOCK; 481180340Skib KQ_UNLOCK(kq); 482180340Skib continue; 483180340Skib } 484180340Skib 485180340Skib /* 486180340Skib * The NOTE_TRACK case. In addition to the activation 487180340Skib * of the event, we need to register new event to 488180340Skib * track the child. Drop the locks in preparation for 489180340Skib * the call to kqueue_register(). 490180340Skib */ 491180340Skib kn->kn_status |= KN_INFLUX; 492180340Skib KQ_UNLOCK(kq); 493180340Skib list->kl_unlock(list->kl_lockarg); 494180340Skib 495180340Skib /* 496180340Skib * Activate existing knote and register a knote with 497180340Skib * new process. 498180340Skib */ 499180340Skib kev.ident = pid; 50059290Sjlemon kev.filter = kn->kn_filter; 50159290Sjlemon kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 50259290Sjlemon kev.fflags = kn->kn_sfflags; 503180340Skib kev.data = kn->kn_id; /* parent */ 504180340Skib kev.udata = kn->kn_kevent.udata;/* preserve udata */ 505180340Skib error = kqueue_register(kq, &kev, NULL, 0); 50659290Sjlemon if (error) 50759290Sjlemon kn->kn_fflags |= NOTE_TRACKERR; 508257763Sjhb if (kn->kn_fop->f_event(kn, NOTE_FORK)) 509257763Sjhb KNOTE_ACTIVATE(kn, 0); 510180340Skib KQ_LOCK(kq); 511180340Skib kn->kn_status &= ~KN_INFLUX; 512180340Skib KQ_UNLOCK_FLUX(kq); 513180340Skib list->kl_lock(list->kl_lockarg); 51459290Sjlemon } 515180340Skib list->kl_unlock(list->kl_lockarg); 51659290Sjlemon} 51759290Sjlemon 518239915Sjhb/* 519239915Sjhb * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the 520239915Sjhb * interval timer support code. 521239915Sjhb */ 522133741Sjmgstatic int 523133741Sjmgtimertoticks(intptr_t data) 524133741Sjmg{ 525133741Sjmg struct timeval tv; 526133741Sjmg int tticks; 527133741Sjmg 528133741Sjmg tv.tv_sec = data / 1000; 529133741Sjmg tv.tv_usec = (data % 1000) * 1000; 530133741Sjmg tticks = tvtohz(&tv); 531133741Sjmg 532133741Sjmg return tticks; 533133741Sjmg} 534133741Sjmg 53579989Sjlemonstatic void 53679989Sjlemonfilt_timerexpire(void *knx) 53779989Sjlemon{ 53879989Sjlemon struct knote *kn = knx; 53984138Sjlemon struct callout *calloutp; 54079989Sjlemon 54179989Sjlemon kn->kn_data++; 542133741Sjmg KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ 54379989Sjlemon 544239915Sjhb /* 545239915Sjhb * timertoticks() uses tvtohz() which always adds 1 to allow 546239915Sjhb * for the time until the next clock interrupt being strictly 547239915Sjhb * less than 1 clock tick. We don't want that here since we 548239915Sjhb * want to appear to be in sync with the clock interrupt even 549239915Sjhb * when we're delayed. 550239915Sjhb */ 551133741Sjmg if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { 55284138Sjlemon calloutp = (struct callout *)kn->kn_hook; 553239915Sjhb callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1, 554133741Sjmg filt_timerexpire, kn); 55579989Sjlemon } 55679989Sjlemon} 55779989Sjlemon 55879989Sjlemon/* 55979989Sjlemon * data contains amount of time to sleep, in milliseconds 560133590Srwatson */ 56179989Sjlemonstatic int 56279989Sjlemonfilt_timerattach(struct knote *kn) 56379989Sjlemon{ 56484138Sjlemon struct callout *calloutp; 56579989Sjlemon 566133741Sjmg atomic_add_int(&kq_ncallouts, 1); 567133741Sjmg 568133741Sjmg if (kq_ncallouts >= kq_calloutmax) { 569133741Sjmg atomic_add_int(&kq_ncallouts, -1); 57084138Sjlemon return (ENOMEM); 571133741Sjmg } 57284138Sjlemon 57379989Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 574136500Sjmg kn->kn_status &= ~KN_DETACHED; /* knlist_add usually sets it */ 575184214Sdes calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); 576142217Srwatson callout_init(calloutp, CALLOUT_MPSAFE); 577127982Scperciva kn->kn_hook = calloutp; 578177860Sjeff callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), 579177860Sjeff filt_timerexpire, kn); 58079989Sjlemon 58179989Sjlemon return (0); 58279989Sjlemon} 58379989Sjlemon 58479989Sjlemonstatic void 58579989Sjlemonfilt_timerdetach(struct knote *kn) 58679989Sjlemon{ 58784138Sjlemon struct callout *calloutp; 58879989Sjlemon 58984138Sjlemon calloutp = (struct callout *)kn->kn_hook; 590127982Scperciva callout_drain(calloutp); 591184205Sdes free(calloutp, M_KQUEUE); 592133741Sjmg atomic_add_int(&kq_ncallouts, -1); 593136500Sjmg kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */ 59479989Sjlemon} 59579989Sjlemon 59679989Sjlemonstatic int 59779989Sjlemonfilt_timer(struct knote *kn, long hint) 59879989Sjlemon{ 59979989Sjlemon 60079989Sjlemon return (kn->kn_data != 0); 60179989Sjlemon} 60279989Sjlemon 603197241Sssonstatic int 604197241Sssonfilt_userattach(struct knote *kn) 605197241Ssson{ 606197241Ssson 607197241Ssson /* 608197241Ssson * EVFILT_USER knotes are not attached to anything in the kernel. 609197241Ssson */ 610197241Ssson kn->kn_hook = NULL; 611197241Ssson if (kn->kn_fflags & NOTE_TRIGGER) 612197241Ssson kn->kn_hookid = 1; 613197241Ssson else 614197241Ssson kn->kn_hookid = 0; 615197241Ssson return (0); 616197241Ssson} 617197241Ssson 618197241Sssonstatic void 619197241Sssonfilt_userdetach(__unused struct knote *kn) 620197241Ssson{ 621197241Ssson 622197241Ssson /* 623197241Ssson * EVFILT_USER knotes are not attached to anything in the kernel. 624197241Ssson */ 625197241Ssson} 626197241Ssson 627197241Sssonstatic int 628197241Sssonfilt_user(struct knote *kn, __unused long hint) 629197241Ssson{ 630197241Ssson 631197241Ssson return (kn->kn_hookid); 632197241Ssson} 633197241Ssson 634197241Sssonstatic void 635197407Srdivackyfilt_usertouch(struct knote *kn, struct kevent *kev, u_long type) 636197241Ssson{ 637197407Srdivacky u_int ffctrl; 638197241Ssson 639197241Ssson switch (type) { 640197241Ssson case EVENT_REGISTER: 641197241Ssson if (kev->fflags & NOTE_TRIGGER) 642197241Ssson kn->kn_hookid = 1; 643197241Ssson 644197241Ssson ffctrl = kev->fflags & NOTE_FFCTRLMASK; 645197241Ssson kev->fflags &= NOTE_FFLAGSMASK; 646197241Ssson switch (ffctrl) { 647197241Ssson case NOTE_FFNOP: 648197241Ssson break; 649197241Ssson 650197241Ssson case NOTE_FFAND: 651197241Ssson kn->kn_sfflags &= kev->fflags; 652197241Ssson break; 653197241Ssson 654197241Ssson case NOTE_FFOR: 655197241Ssson kn->kn_sfflags |= kev->fflags; 656197241Ssson break; 657197241Ssson 658197241Ssson case NOTE_FFCOPY: 659197241Ssson kn->kn_sfflags = kev->fflags; 660197241Ssson break; 661197241Ssson 662197241Ssson default: 663197241Ssson /* XXX Return error? */ 664197241Ssson break; 665197241Ssson } 666197241Ssson kn->kn_sdata = kev->data; 667197241Ssson if (kev->flags & EV_CLEAR) { 668197241Ssson kn->kn_hookid = 0; 669197241Ssson kn->kn_data = 0; 670197241Ssson kn->kn_fflags = 0; 671197241Ssson } 672197241Ssson break; 673197241Ssson 674197241Ssson case EVENT_PROCESS: 675197241Ssson *kev = kn->kn_kevent; 676197241Ssson kev->fflags = kn->kn_sfflags; 677197241Ssson kev->data = kn->kn_sdata; 678197241Ssson if (kn->kn_flags & EV_CLEAR) { 679197241Ssson kn->kn_hookid = 0; 680197241Ssson kn->kn_data = 0; 681197241Ssson kn->kn_fflags = 0; 682197241Ssson } 683197241Ssson break; 684197241Ssson 685197241Ssson default: 686197241Ssson panic("filt_usertouch() - invalid type (%ld)", type); 687197241Ssson break; 688197241Ssson } 689197241Ssson} 690197241Ssson 69161468Sjlemonint 692225617Skmacysys_kqueue(struct thread *td, struct kqueue_args *uap) 69359290Sjlemon{ 69482710Sdillon struct filedesc *fdp; 69559290Sjlemon struct kqueue *kq; 69661468Sjlemon struct file *fp; 69761468Sjlemon int fd, error; 69859290Sjlemon 69983366Sjulian fdp = td->td_proc->p_fd; 700220245Skib error = falloc(td, &fp, &fd, 0); 70161468Sjlemon if (error) 70282710Sdillon goto done2; 703133741Sjmg 704121256Sdwmalone /* An extra reference on `nfp' has been held for us by falloc(). */ 705133741Sjmg kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); 706133741Sjmg mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); 70789306Salfred TAILQ_INIT(&kq->kq_head); 708133741Sjmg kq->kq_fdp = fdp; 709193951Skib knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); 710133741Sjmg TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); 711133741Sjmg 712168355Srwatson FILEDESC_XLOCK(fdp); 713255729Skib TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); 714168355Srwatson FILEDESC_XUNLOCK(fdp); 715133741Sjmg 716174988Sjeff finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); 717121256Sdwmalone fdrop(fp, td); 718133741Sjmg 71983366Sjulian td->td_retval[0] = fd; 72082710Sdillondone2: 72161468Sjlemon return (error); 72259290Sjlemon} 72359290Sjlemon 72459290Sjlemon#ifndef _SYS_SYSPROTO_H_ 72559290Sjlemonstruct kevent_args { 72659290Sjlemon int fd; 72763977Speter const struct kevent *changelist; 72859290Sjlemon int nchanges; 72963452Sjlemon struct kevent *eventlist; 73059290Sjlemon int nevents; 73163977Speter const struct timespec *timeout; 73259290Sjlemon}; 73359290Sjlemon#endif 73459290Sjlemonint 735225617Skmacysys_kevent(struct thread *td, struct kevent_args *uap) 73659290Sjlemon{ 737142934Sps struct timespec ts, *tsp; 738146950Sps struct kevent_copyops k_ops = { uap, 739146950Sps kevent_copyout, 740146950Sps kevent_copyin}; 741142934Sps int error; 742162592Sjmg#ifdef KTRACE 743162592Sjmg struct uio ktruio; 744162592Sjmg struct iovec ktriov; 745162592Sjmg struct uio *ktruioin = NULL; 746162592Sjmg struct uio *ktruioout = NULL; 747162592Sjmg#endif 748142934Sps 749142934Sps if (uap->timeout != NULL) { 750142934Sps error = copyin(uap->timeout, &ts, sizeof(ts)); 751142934Sps if (error) 752142934Sps return (error); 753142934Sps tsp = &ts; 754142934Sps } else 755142934Sps tsp = NULL; 756142934Sps 757162592Sjmg#ifdef KTRACE 758162592Sjmg if (KTRPOINT(td, KTR_GENIO)) { 759162592Sjmg ktriov.iov_base = uap->changelist; 760162592Sjmg ktriov.iov_len = uap->nchanges * sizeof(struct kevent); 761162592Sjmg ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, 762162592Sjmg .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, 763162592Sjmg .uio_td = td }; 764162592Sjmg ktruioin = cloneuio(&ktruio); 765162592Sjmg ktriov.iov_base = uap->eventlist; 766162592Sjmg ktriov.iov_len = uap->nevents * sizeof(struct kevent); 767162592Sjmg ktruioout = cloneuio(&ktruio); 768162592Sjmg } 769162592Sjmg#endif 770162592Sjmg 771162592Sjmg error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, 772162592Sjmg &k_ops, tsp); 773162592Sjmg 774162592Sjmg#ifdef KTRACE 775162592Sjmg if (ktruioin != NULL) { 776162592Sjmg ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent); 777162592Sjmg ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); 778162592Sjmg ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent); 779162592Sjmg ktrgenio(uap->fd, UIO_READ, ktruioout, error); 780162592Sjmg } 781162592Sjmg#endif 782162592Sjmg 783162592Sjmg return (error); 784142934Sps} 785142934Sps 786142934Sps/* 787146950Sps * Copy 'count' items into the destination list pointed to by uap->eventlist. 788142934Sps */ 789142934Spsstatic int 790146950Spskevent_copyout(void *arg, struct kevent *kevp, int count) 791142934Sps{ 792146950Sps struct kevent_args *uap; 793142934Sps int error; 794142934Sps 795146950Sps KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 796146950Sps uap = (struct kevent_args *)arg; 797146950Sps 798146950Sps error = copyout(kevp, uap->eventlist, count * sizeof *kevp); 799146950Sps if (error == 0) 800146950Sps uap->eventlist += count; 801142934Sps return (error); 802142934Sps} 803142934Sps 804146950Sps/* 805146950Sps * Copy 'count' items from the list pointed to by uap->changelist. 806146950Sps */ 807146950Spsstatic int 808146950Spskevent_copyin(void *arg, struct kevent *kevp, int count) 809146950Sps{ 810146950Sps struct kevent_args *uap; 811146950Sps int error; 812146950Sps 813146950Sps KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 814146950Sps uap = (struct kevent_args *)arg; 815146950Sps 816146950Sps error = copyin(uap->changelist, kevp, count * sizeof *kevp); 817146950Sps if (error == 0) 818146950Sps uap->changelist += count; 819146950Sps return (error); 820146950Sps} 821146950Sps 822142934Spsint 823146950Spskern_kevent(struct thread *td, int fd, int nchanges, int nevents, 824146950Sps struct kevent_copyops *k_ops, const struct timespec *timeout) 825142934Sps{ 826133741Sjmg struct kevent keva[KQ_NEVENTS]; 827142934Sps struct kevent *kevp, *changes; 82859290Sjlemon struct kqueue *kq; 82986341Sdillon struct file *fp; 83059290Sjlemon int i, n, nerrors, error; 83159290Sjlemon 832224797Sjonathan if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) 83389319Salfred return (error); 834170029Srwatson if ((error = kqueue_acquire(fp, &kq)) != 0) 835133741Sjmg goto done_norel; 836133741Sjmg 83759290Sjlemon nerrors = 0; 83859290Sjlemon 839142934Sps while (nchanges > 0) { 840146950Sps n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; 841146950Sps error = k_ops->k_copyin(k_ops->arg, keva, n); 842146950Sps if (error) 843146950Sps goto done; 844146950Sps changes = keva; 84559290Sjlemon for (i = 0; i < n; i++) { 846142934Sps kevp = &changes[i]; 847151260Sambrisko if (!kevp->filter) 848151260Sambrisko continue; 84963452Sjlemon kevp->flags &= ~EV_SYSFLAGS; 850133741Sjmg error = kqueue_register(kq, kevp, td, 1); 851197243Ssson if (error || (kevp->flags & EV_RECEIPT)) { 852142934Sps if (nevents != 0) { 85363452Sjlemon kevp->flags = EV_ERROR; 85463452Sjlemon kevp->data = error; 855146950Sps (void) k_ops->k_copyout(k_ops->arg, 856146950Sps kevp, 1); 857142934Sps nevents--; 85859290Sjlemon nerrors++; 85959290Sjlemon } else { 86068883Sdillon goto done; 86159290Sjlemon } 86259290Sjlemon } 86359290Sjlemon } 864142934Sps nchanges -= n; 86559290Sjlemon } 86659290Sjlemon if (nerrors) { 867133741Sjmg td->td_retval[0] = nerrors; 86868883Sdillon error = 0; 86968883Sdillon goto done; 87059290Sjlemon } 87159290Sjlemon 872146950Sps error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td); 87368883Sdillondone: 874133741Sjmg kqueue_release(kq, 0); 875133741Sjmgdone_norel: 876170066Srwatson fdrop(fp, td); 87759290Sjlemon return (error); 87859290Sjlemon} 87959290Sjlemon 88059290Sjlemonint 88188633Salfredkqueue_add_filteropts(int filt, struct filterops *filtops) 88288633Salfred{ 883133741Sjmg int error; 88488633Salfred 885201352Sbrooks error = 0; 886133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { 887133741Sjmg printf( 888133741Sjmg"trying to add a filterop that is out of range: %d is beyond %d\n", 889133741Sjmg ~filt, EVFILT_SYSCOUNT); 890133741Sjmg return EINVAL; 891133741Sjmg } 892133741Sjmg mtx_lock(&filterops_lock); 893133741Sjmg if (sysfilt_ops[~filt].for_fop != &null_filtops && 894133741Sjmg sysfilt_ops[~filt].for_fop != NULL) 895133741Sjmg error = EEXIST; 896133741Sjmg else { 897133741Sjmg sysfilt_ops[~filt].for_fop = filtops; 898133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 899133741Sjmg } 900133741Sjmg mtx_unlock(&filterops_lock); 901133741Sjmg 902201352Sbrooks return (error); 90388633Salfred} 90488633Salfred 90588633Salfredint 90688633Salfredkqueue_del_filteropts(int filt) 90788633Salfred{ 908133741Sjmg int error; 90988633Salfred 910133741Sjmg error = 0; 911133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 912133741Sjmg return EINVAL; 913133741Sjmg 914133741Sjmg mtx_lock(&filterops_lock); 915133741Sjmg if (sysfilt_ops[~filt].for_fop == &null_filtops || 916133741Sjmg sysfilt_ops[~filt].for_fop == NULL) 917133741Sjmg error = EINVAL; 918133741Sjmg else if (sysfilt_ops[~filt].for_refcnt != 0) 919133741Sjmg error = EBUSY; 920133741Sjmg else { 921133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 922133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 923133741Sjmg } 924133741Sjmg mtx_unlock(&filterops_lock); 925133741Sjmg 926133741Sjmg return error; 92788633Salfred} 92888633Salfred 929133741Sjmgstatic struct filterops * 930133741Sjmgkqueue_fo_find(int filt) 931133741Sjmg{ 932133741Sjmg 933133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 934133741Sjmg return NULL; 935133741Sjmg 936133741Sjmg mtx_lock(&filterops_lock); 937133741Sjmg sysfilt_ops[~filt].for_refcnt++; 938133741Sjmg if (sysfilt_ops[~filt].for_fop == NULL) 939133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 940133741Sjmg mtx_unlock(&filterops_lock); 941133741Sjmg 942133741Sjmg return sysfilt_ops[~filt].for_fop; 943133741Sjmg} 944133741Sjmg 945133741Sjmgstatic void 946133741Sjmgkqueue_fo_release(int filt) 947133741Sjmg{ 948133741Sjmg 949133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 950133741Sjmg return; 951133741Sjmg 952133741Sjmg mtx_lock(&filterops_lock); 953133741Sjmg KASSERT(sysfilt_ops[~filt].for_refcnt > 0, 954133741Sjmg ("filter object refcount not valid on release")); 955133741Sjmg sysfilt_ops[~filt].for_refcnt--; 956133741Sjmg mtx_unlock(&filterops_lock); 957133741Sjmg} 958133741Sjmg 959133741Sjmg/* 960170029Srwatson * A ref to kq (obtained via kqueue_acquire) must be held. waitok will 961133741Sjmg * influence if memory allocation should wait. Make sure it is 0 if you 962133741Sjmg * hold any mutexes. 963133741Sjmg */ 964162594Sjmgstatic int 965133741Sjmgkqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok) 96659290Sjlemon{ 96759290Sjlemon struct filterops *fops; 968133741Sjmg struct file *fp; 969133741Sjmg struct knote *kn, *tkn; 970133741Sjmg int error, filt, event; 971256074Skib int haskqglobal, filedesc_unlock; 97259290Sjlemon 973133741Sjmg fp = NULL; 974133741Sjmg kn = NULL; 975133741Sjmg error = 0; 976133741Sjmg haskqglobal = 0; 977256074Skib filedesc_unlock = 0; 97859290Sjlemon 979133741Sjmg filt = kev->filter; 980133741Sjmg fops = kqueue_fo_find(filt); 981133741Sjmg if (fops == NULL) 982133741Sjmg return EINVAL; 983133741Sjmg 984133741Sjmg tkn = knote_alloc(waitok); /* prevent waiting with locks */ 985133741Sjmg 986133741Sjmgfindkn: 98759290Sjlemon if (fops->f_isfd) { 988133741Sjmg KASSERT(td != NULL, ("td is NULL")); 989224797Sjonathan error = fget(td, kev->ident, CAP_POLL_EVENT, &fp); 990159553Sjhb if (error) 991133741Sjmg goto done; 99259290Sjlemon 993133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, 994133741Sjmg kev->ident, 0) != 0) { 995159553Sjhb /* try again */ 996133741Sjmg fdrop(fp, td); 997133741Sjmg fp = NULL; 998133741Sjmg error = kqueue_expand(kq, fops, kev->ident, waitok); 999133741Sjmg if (error) 1000133741Sjmg goto done; 1001133741Sjmg goto findkn; 1002133741Sjmg } 1003133741Sjmg 1004133741Sjmg if (fp->f_type == DTYPE_KQUEUE) { 1005133741Sjmg /* 1006133741Sjmg * if we add some inteligence about what we are doing, 1007133741Sjmg * we should be able to support events on ourselves. 1008133741Sjmg * We need to know when we are doing this to prevent 1009133741Sjmg * getting both the knlist lock and the kq lock since 1010133741Sjmg * they are the same thing. 1011133741Sjmg */ 1012133741Sjmg if (fp->f_data == kq) { 1013133741Sjmg error = EINVAL; 1014159172Spjd goto done; 1015133741Sjmg } 1016133741Sjmg 1017256074Skib /* 1018256074Skib * Pre-lock the filedesc before the global 1019256074Skib * lock mutex, see the comment in 1020256074Skib * kqueue_close(). 1021256074Skib */ 1022256074Skib FILEDESC_XLOCK(td->td_proc->p_fd); 1023256074Skib filedesc_unlock = 1; 1024133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1025133741Sjmg } 1026133741Sjmg 1027133741Sjmg KQ_LOCK(kq); 1028133741Sjmg if (kev->ident < kq->kq_knlistsize) { 1029133741Sjmg SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) 1030133741Sjmg if (kev->filter == kn->kn_filter) 103159290Sjlemon break; 103259290Sjlemon } 103359290Sjlemon } else { 1034133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD) 1035133741Sjmg kqueue_expand(kq, fops, kev->ident, waitok); 1036133741Sjmg 1037133741Sjmg KQ_LOCK(kq); 1038133741Sjmg if (kq->kq_knhashmask != 0) { 103959290Sjlemon struct klist *list; 1040133635Sjmg 1041133741Sjmg list = &kq->kq_knhash[ 1042133741Sjmg KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 104359290Sjlemon SLIST_FOREACH(kn, list, kn_link) 104459290Sjlemon if (kev->ident == kn->kn_id && 104559290Sjlemon kev->filter == kn->kn_filter) 104659290Sjlemon break; 104759290Sjlemon } 104859290Sjlemon } 104959290Sjlemon 1050133741Sjmg /* knote is in the process of changing, wait for it to stablize. */ 1051133741Sjmg if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1052197930Skib KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1053256074Skib if (filedesc_unlock) { 1054256074Skib FILEDESC_XUNLOCK(td->td_proc->p_fd); 1055256074Skib filedesc_unlock = 0; 1056256074Skib } 1057197930Skib kq->kq_state |= KQ_FLUXWAIT; 1058197930Skib msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); 1059133741Sjmg if (fp != NULL) { 1060133741Sjmg fdrop(fp, td); 1061133741Sjmg fp = NULL; 1062133741Sjmg } 1063133741Sjmg goto findkn; 1064133741Sjmg } 1065133741Sjmg 106659290Sjlemon /* 106759290Sjlemon * kn now contains the matching knote, or NULL if no match 106859290Sjlemon */ 1069197240Ssson if (kn == NULL) { 1070197240Ssson if (kev->flags & EV_ADD) { 1071133741Sjmg kn = tkn; 1072133741Sjmg tkn = NULL; 107368883Sdillon if (kn == NULL) { 1074159173Spjd KQ_UNLOCK(kq); 107568883Sdillon error = ENOMEM; 107668883Sdillon goto done; 107768883Sdillon } 107859290Sjlemon kn->kn_fp = fp; 107959290Sjlemon kn->kn_kq = kq; 108059290Sjlemon kn->kn_fop = fops; 108168883Sdillon /* 1082133741Sjmg * apply reference counts to knote structure, and 108368883Sdillon * do not release it at the end of this routine. 108468883Sdillon */ 1085133741Sjmg fops = NULL; 108668883Sdillon fp = NULL; 108768883Sdillon 108861962Sjlemon kn->kn_sfflags = kev->fflags; 108961962Sjlemon kn->kn_sdata = kev->data; 109061962Sjlemon kev->fflags = 0; 109161962Sjlemon kev->data = 0; 109261962Sjlemon kn->kn_kevent = *kev; 1093157383Sjmg kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | 1094157383Sjmg EV_ENABLE | EV_DISABLE); 1095133741Sjmg kn->kn_status = KN_INFLUX|KN_DETACHED; 109661962Sjlemon 1097133741Sjmg error = knote_attach(kn, kq); 1098133741Sjmg KQ_UNLOCK(kq); 1099133741Sjmg if (error != 0) { 1100133741Sjmg tkn = kn; 1101133741Sjmg goto done; 1102133741Sjmg } 1103133741Sjmg 1104133741Sjmg if ((error = kn->kn_fop->f_attach(kn)) != 0) { 110583366Sjulian knote_drop(kn, td); 110659290Sjlemon goto done; 110759290Sjlemon } 1108133741Sjmg KN_LIST_LOCK(kn); 1109197240Ssson goto done_ev_add; 111061962Sjlemon } else { 1111197240Ssson /* No matching knote and the EV_ADD flag is not set. */ 1112133741Sjmg KQ_UNLOCK(kq); 1113197240Ssson error = ENOENT; 1114197240Ssson goto done; 111559290Sjlemon } 1116197240Ssson } 1117197240Ssson 1118197240Ssson if (kev->flags & EV_DELETE) { 1119133741Sjmg kn->kn_status |= KN_INFLUX; 1120133741Sjmg KQ_UNLOCK(kq); 1121134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1122134859Sjmg kn->kn_fop->f_detach(kn); 112383366Sjulian knote_drop(kn, td); 112459290Sjlemon goto done; 112559290Sjlemon } 112659290Sjlemon 1127197240Ssson /* 1128197240Ssson * The user may change some filter values after the initial EV_ADD, 1129197240Ssson * but doing so will not reset any filter which has already been 1130197240Ssson * triggered. 1131197240Ssson */ 1132264369Skib kn->kn_status |= KN_INFLUX | KN_SCAN; 1133197240Ssson KQ_UNLOCK(kq); 1134197240Ssson KN_LIST_LOCK(kn); 1135197240Ssson kn->kn_kevent.udata = kev->udata; 1136197240Ssson if (!fops->f_isfd && fops->f_touch != NULL) { 1137197240Ssson fops->f_touch(kn, kev, EVENT_REGISTER); 1138197240Ssson } else { 1139197240Ssson kn->kn_sfflags = kev->fflags; 1140197240Ssson kn->kn_sdata = kev->data; 1141197240Ssson } 1142197240Ssson 1143197240Ssson /* 1144197240Ssson * We can get here with kn->kn_knlist == NULL. This can happen when 1145197240Ssson * the initial attach event decides that the event is "completed" 1146197240Ssson * already. i.e. filt_procattach is called on a zombie process. It 1147197240Ssson * will call filt_proc which will remove it from the list, and NULL 1148197240Ssson * kn_knlist. 1149197240Ssson */ 1150197240Sssondone_ev_add: 1151197240Ssson event = kn->kn_fop->f_event(kn, 0); 1152197240Ssson KQ_LOCK(kq); 1153197240Ssson if (event) 1154197240Ssson KNOTE_ACTIVATE(kn, 1); 1155264369Skib kn->kn_status &= ~(KN_INFLUX | KN_SCAN); 1156197240Ssson KN_LIST_UNLOCK(kn); 1157197240Ssson 115859290Sjlemon if ((kev->flags & EV_DISABLE) && 115959290Sjlemon ((kn->kn_status & KN_DISABLED) == 0)) { 116059290Sjlemon kn->kn_status |= KN_DISABLED; 116159290Sjlemon } 116259290Sjlemon 116359290Sjlemon if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 116459290Sjlemon kn->kn_status &= ~KN_DISABLED; 116559290Sjlemon if ((kn->kn_status & KN_ACTIVE) && 116659290Sjlemon ((kn->kn_status & KN_QUEUED) == 0)) 116759290Sjlemon knote_enqueue(kn); 116859290Sjlemon } 1169133741Sjmg KQ_UNLOCK_FLUX(kq); 117059290Sjlemon 117159290Sjlemondone: 1172133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1173256074Skib if (filedesc_unlock) 1174256074Skib FILEDESC_XUNLOCK(td->td_proc->p_fd); 117568883Sdillon if (fp != NULL) 117683366Sjulian fdrop(fp, td); 1177133741Sjmg if (tkn != NULL) 1178133741Sjmg knote_free(tkn); 1179133741Sjmg if (fops != NULL) 1180133741Sjmg kqueue_fo_release(filt); 118159290Sjlemon return (error); 118259290Sjlemon} 118359290Sjlemon 118459290Sjlemonstatic int 1185170029Srwatsonkqueue_acquire(struct file *fp, struct kqueue **kqp) 118659290Sjlemon{ 1187133741Sjmg int error; 118889306Salfred struct kqueue *kq; 1189133741Sjmg 1190133741Sjmg error = 0; 1191133741Sjmg 1192174988Sjeff kq = fp->f_data; 1193174988Sjeff if (fp->f_type != DTYPE_KQUEUE || kq == NULL) 1194174988Sjeff return (EBADF); 1195174988Sjeff *kqp = kq; 1196174988Sjeff KQ_LOCK(kq); 1197174988Sjeff if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { 1198133741Sjmg KQ_UNLOCK(kq); 1199174988Sjeff return (EBADF); 1200174988Sjeff } 1201174988Sjeff kq->kq_refcnt++; 1202174988Sjeff KQ_UNLOCK(kq); 1203133741Sjmg 1204133741Sjmg return error; 1205133741Sjmg} 1206133741Sjmg 1207133741Sjmgstatic void 1208133741Sjmgkqueue_release(struct kqueue *kq, int locked) 1209133741Sjmg{ 1210133741Sjmg if (locked) 1211133741Sjmg KQ_OWNED(kq); 1212133741Sjmg else 1213133741Sjmg KQ_LOCK(kq); 1214133741Sjmg kq->kq_refcnt--; 1215133741Sjmg if (kq->kq_refcnt == 1) 1216133741Sjmg wakeup(&kq->kq_refcnt); 1217133741Sjmg if (!locked) 1218133741Sjmg KQ_UNLOCK(kq); 1219133741Sjmg} 1220133741Sjmg 1221133741Sjmgstatic void 1222133741Sjmgkqueue_schedtask(struct kqueue *kq) 1223133741Sjmg{ 1224133741Sjmg 1225133741Sjmg KQ_OWNED(kq); 1226133741Sjmg KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), 1227133741Sjmg ("scheduling kqueue task while draining")); 1228133741Sjmg 1229133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { 1230133741Sjmg taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task); 1231133741Sjmg kq->kq_state |= KQ_TASKSCHED; 1232133741Sjmg } 1233133741Sjmg} 1234133741Sjmg 1235133741Sjmg/* 1236133741Sjmg * Expand the kq to make sure we have storage for fops/ident pair. 1237133741Sjmg * 1238133741Sjmg * Return 0 on success (or no work necessary), return errno on failure. 1239133741Sjmg * 1240133741Sjmg * Not calling hashinit w/ waitok (proper malloc flag) should be safe. 1241133741Sjmg * If kqueue_register is called from a non-fd context, there usually/should 1242133741Sjmg * be no locks held. 1243133741Sjmg */ 1244133741Sjmgstatic int 1245133741Sjmgkqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, 1246133741Sjmg int waitok) 1247133741Sjmg{ 1248205886Sjhb struct klist *list, *tmp_knhash, *to_free; 1249133741Sjmg u_long tmp_knhashmask; 1250133741Sjmg int size; 1251133741Sjmg int fd; 1252133741Sjmg int mflag = waitok ? M_WAITOK : M_NOWAIT; 1253133741Sjmg 1254133741Sjmg KQ_NOTOWNED(kq); 1255133741Sjmg 1256205886Sjhb to_free = NULL; 1257133741Sjmg if (fops->f_isfd) { 1258133741Sjmg fd = ident; 1259133741Sjmg if (kq->kq_knlistsize <= fd) { 1260133741Sjmg size = kq->kq_knlistsize; 1261133741Sjmg while (size <= fd) 1262133741Sjmg size += KQEXTENT; 1263197575Sdelphij list = malloc(size * sizeof(*list), M_KQUEUE, mflag); 1264133741Sjmg if (list == NULL) 1265133741Sjmg return ENOMEM; 1266133741Sjmg KQ_LOCK(kq); 1267133741Sjmg if (kq->kq_knlistsize > fd) { 1268205886Sjhb to_free = list; 1269133741Sjmg list = NULL; 1270133741Sjmg } else { 1271133741Sjmg if (kq->kq_knlist != NULL) { 1272133741Sjmg bcopy(kq->kq_knlist, list, 1273197575Sdelphij kq->kq_knlistsize * sizeof(*list)); 1274205886Sjhb to_free = kq->kq_knlist; 1275133741Sjmg kq->kq_knlist = NULL; 1276133741Sjmg } 1277133741Sjmg bzero((caddr_t)list + 1278197575Sdelphij kq->kq_knlistsize * sizeof(*list), 1279197575Sdelphij (size - kq->kq_knlistsize) * sizeof(*list)); 1280133741Sjmg kq->kq_knlistsize = size; 1281133741Sjmg kq->kq_knlist = list; 1282133741Sjmg } 1283133741Sjmg KQ_UNLOCK(kq); 1284133741Sjmg } 1285133741Sjmg } else { 1286133741Sjmg if (kq->kq_knhashmask == 0) { 1287133741Sjmg tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1288133741Sjmg &tmp_knhashmask); 1289133741Sjmg if (tmp_knhash == NULL) 1290133741Sjmg return ENOMEM; 1291133741Sjmg KQ_LOCK(kq); 1292133741Sjmg if (kq->kq_knhashmask == 0) { 1293133741Sjmg kq->kq_knhash = tmp_knhash; 1294133741Sjmg kq->kq_knhashmask = tmp_knhashmask; 1295133741Sjmg } else { 1296205886Sjhb to_free = tmp_knhash; 1297133741Sjmg } 1298133741Sjmg KQ_UNLOCK(kq); 1299133741Sjmg } 1300133741Sjmg } 1301205886Sjhb free(to_free, M_KQUEUE); 1302133741Sjmg 1303133741Sjmg KQ_NOTOWNED(kq); 1304133741Sjmg return 0; 1305133741Sjmg} 1306133741Sjmg 1307133741Sjmgstatic void 1308133741Sjmgkqueue_task(void *arg, int pending) 1309133741Sjmg{ 1310133741Sjmg struct kqueue *kq; 1311133741Sjmg int haskqglobal; 1312133741Sjmg 1313133741Sjmg haskqglobal = 0; 1314133741Sjmg kq = arg; 1315133741Sjmg 1316133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1317133741Sjmg KQ_LOCK(kq); 1318133741Sjmg 1319133741Sjmg KNOTE_LOCKED(&kq->kq_sel.si_note, 0); 1320133741Sjmg 1321133741Sjmg kq->kq_state &= ~KQ_TASKSCHED; 1322133741Sjmg if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { 1323133741Sjmg wakeup(&kq->kq_state); 1324133741Sjmg } 1325133741Sjmg KQ_UNLOCK(kq); 1326133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1327133741Sjmg} 1328133741Sjmg 1329133741Sjmg/* 1330133741Sjmg * Scan, update kn_data (if not ONESHOT), and copyout triggered events. 1331133741Sjmg * We treat KN_MARKER knotes as if they are INFLUX. 1332133741Sjmg */ 1333133741Sjmgstatic int 1334146950Spskqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, 1335146950Sps const struct timespec *tsp, struct kevent *keva, struct thread *td) 1336133741Sjmg{ 133759290Sjlemon struct kevent *kevp; 133859290Sjlemon struct timeval atv, rtv, ttv; 1339133794Sgreen struct knote *kn, *marker; 1340178914Skib int count, timeout, nkev, error, influx; 1341197240Ssson int haskqglobal, touch; 134259290Sjlemon 134359290Sjlemon count = maxevents; 1344133741Sjmg nkev = 0; 1345133741Sjmg error = 0; 1346133741Sjmg haskqglobal = 0; 134759290Sjlemon 1348133741Sjmg if (maxevents == 0) 1349133741Sjmg goto done_nl; 1350133741Sjmg 135164343Sjlemon if (tsp != NULL) { 135259290Sjlemon TIMESPEC_TO_TIMEVAL(&atv, tsp); 135364343Sjlemon if (itimerfix(&atv)) { 135459290Sjlemon error = EINVAL; 1355133741Sjmg goto done_nl; 135659290Sjlemon } 135764343Sjlemon if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 135864343Sjlemon timeout = -1; 1359133590Srwatson else 136064343Sjlemon timeout = atv.tv_sec > 24 * 60 * 60 ? 136164343Sjlemon 24 * 60 * 60 * hz : tvtohz(&atv); 136264343Sjlemon getmicrouptime(&rtv); 136364343Sjlemon timevaladd(&atv, &rtv); 136464343Sjlemon } else { 136564343Sjlemon atv.tv_sec = 0; 136664343Sjlemon atv.tv_usec = 0; 136759290Sjlemon timeout = 0; 136859290Sjlemon } 1369133794Sgreen marker = knote_alloc(1); 1370133794Sgreen if (marker == NULL) { 1371133794Sgreen error = ENOMEM; 1372133794Sgreen goto done_nl; 1373133794Sgreen } 1374133794Sgreen marker->kn_status = KN_MARKER; 1375133741Sjmg KQ_LOCK(kq); 137659290Sjlemon goto start; 137759290Sjlemon 137859290Sjlemonretry: 137964343Sjlemon if (atv.tv_sec || atv.tv_usec) { 138059290Sjlemon getmicrouptime(&rtv); 138159290Sjlemon if (timevalcmp(&rtv, &atv, >=)) 138259290Sjlemon goto done; 138359290Sjlemon ttv = atv; 138459290Sjlemon timevalsub(&ttv, &rtv); 138559290Sjlemon timeout = ttv.tv_sec > 24 * 60 * 60 ? 138659290Sjlemon 24 * 60 * 60 * hz : tvtohz(&ttv); 138759290Sjlemon } 138859290Sjlemon 138959290Sjlemonstart: 1390133741Sjmg kevp = keva; 139159290Sjlemon if (kq->kq_count == 0) { 1392133590Srwatson if (timeout < 0) { 139364343Sjlemon error = EWOULDBLOCK; 139464343Sjlemon } else { 139564343Sjlemon kq->kq_state |= KQ_SLEEP; 1396133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH, 1397133741Sjmg "kqread", timeout); 139864343Sjlemon } 139964084Sjlemon if (error == 0) 140059290Sjlemon goto retry; 140164084Sjlemon /* don't restart after signals... */ 140264084Sjlemon if (error == ERESTART) 140364084Sjlemon error = EINTR; 140464084Sjlemon else if (error == EWOULDBLOCK) 140559290Sjlemon error = 0; 140659290Sjlemon goto done; 140759290Sjlemon } 140859290Sjlemon 1409133794Sgreen TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1410178914Skib influx = 0; 141159290Sjlemon while (count) { 1412133741Sjmg KQ_OWNED(kq); 141359290Sjlemon kn = TAILQ_FIRST(&kq->kq_head); 1414133741Sjmg 1415133794Sgreen if ((kn->kn_status == KN_MARKER && kn != marker) || 1416133741Sjmg (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1417178914Skib if (influx) { 1418178914Skib influx = 0; 1419178914Skib KQ_FLUX_WAKEUP(kq); 1420178914Skib } 1421180336Skib kq->kq_state |= KQ_FLUXWAIT; 1422133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK, 1423133741Sjmg "kqflxwt", 0); 1424133741Sjmg continue; 1425133741Sjmg } 1426133741Sjmg 1427133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1428133741Sjmg if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { 1429133741Sjmg kn->kn_status &= ~KN_QUEUED; 1430133741Sjmg kq->kq_count--; 1431133741Sjmg continue; 1432133741Sjmg } 1433133794Sgreen if (kn == marker) { 1434133741Sjmg KQ_FLUX_WAKEUP(kq); 143559290Sjlemon if (count == maxevents) 143659290Sjlemon goto retry; 143759290Sjlemon goto done; 143859290Sjlemon } 1439133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == 0, 1440133741Sjmg ("KN_INFLUX set when not suppose to be")); 1441133741Sjmg 1442257759Sjhb if ((kn->kn_flags & EV_DROP) == EV_DROP) { 144359290Sjlemon kn->kn_status &= ~KN_QUEUED; 1444133741Sjmg kn->kn_status |= KN_INFLUX; 144559290Sjlemon kq->kq_count--; 1446133741Sjmg KQ_UNLOCK(kq); 1447133741Sjmg /* 1448133741Sjmg * We don't need to lock the list since we've marked 1449133741Sjmg * it _INFLUX. 1450133741Sjmg */ 1451257759Sjhb if (!(kn->kn_status & KN_DETACHED)) 1452257759Sjhb kn->kn_fop->f_detach(kn); 1453257759Sjhb knote_drop(kn, td); 1454257759Sjhb KQ_LOCK(kq); 1455257759Sjhb continue; 1456257759Sjhb } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { 1457257759Sjhb kn->kn_status &= ~KN_QUEUED; 1458257759Sjhb kn->kn_status |= KN_INFLUX; 1459257759Sjhb kq->kq_count--; 1460257759Sjhb KQ_UNLOCK(kq); 1461257759Sjhb /* 1462257759Sjhb * We don't need to lock the list since we've marked 1463257759Sjhb * it _INFLUX. 1464257759Sjhb */ 1465133741Sjmg *kevp = kn->kn_kevent; 1466134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1467134859Sjmg kn->kn_fop->f_detach(kn); 146883366Sjulian knote_drop(kn, td); 1469133741Sjmg KQ_LOCK(kq); 1470133741Sjmg kn = NULL; 147159290Sjlemon } else { 1472264369Skib kn->kn_status |= KN_INFLUX | KN_SCAN; 1473133741Sjmg KQ_UNLOCK(kq); 1474133741Sjmg if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) 1475133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1476133741Sjmg KN_LIST_LOCK(kn); 1477133741Sjmg if (kn->kn_fop->f_event(kn, 0) == 0) { 1478133741Sjmg KQ_LOCK(kq); 1479157754Sjhb KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1480133741Sjmg kn->kn_status &= 1481264369Skib ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | 1482264369Skib KN_SCAN); 1483133741Sjmg kq->kq_count--; 1484150199Sups KN_LIST_UNLOCK(kn); 1485178914Skib influx = 1; 1486133741Sjmg continue; 1487133741Sjmg } 1488197240Ssson touch = (!kn->kn_fop->f_isfd && 1489197240Ssson kn->kn_fop->f_touch != NULL); 1490197240Ssson if (touch) 1491197240Ssson kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); 1492197240Ssson else 1493197240Ssson *kevp = kn->kn_kevent; 1494133741Sjmg KQ_LOCK(kq); 1495157754Sjhb KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1496197242Ssson if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 1497197240Ssson /* 1498197240Ssson * Manually clear knotes who weren't 1499197240Ssson * 'touch'ed. 1500197240Ssson */ 1501197242Ssson if (touch == 0 && kn->kn_flags & EV_CLEAR) { 1502197240Ssson kn->kn_data = 0; 1503197240Ssson kn->kn_fflags = 0; 1504197240Ssson } 1505197242Ssson if (kn->kn_flags & EV_DISPATCH) 1506197242Ssson kn->kn_status |= KN_DISABLED; 1507133741Sjmg kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1508133741Sjmg kq->kq_count--; 1509133741Sjmg } else 1510133741Sjmg TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1511150199Sups 1512264369Skib kn->kn_status &= ~(KN_INFLUX | KN_SCAN); 1513133741Sjmg KN_LIST_UNLOCK(kn); 1514178914Skib influx = 1; 151559290Sjlemon } 1516133741Sjmg 1517133741Sjmg /* we are returning a copy to the user */ 1518133741Sjmg kevp++; 1519133741Sjmg nkev++; 152059290Sjlemon count--; 1521133741Sjmg 152259290Sjlemon if (nkev == KQ_NEVENTS) { 1523178914Skib influx = 0; 1524133741Sjmg KQ_UNLOCK_FLUX(kq); 1525146950Sps error = k_ops->k_copyout(k_ops->arg, keva, nkev); 152659290Sjlemon nkev = 0; 1527133741Sjmg kevp = keva; 1528133741Sjmg KQ_LOCK(kq); 152959997Sjlemon if (error) 153059997Sjlemon break; 153159290Sjlemon } 153259290Sjlemon } 1533133794Sgreen TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 153459290Sjlemondone: 1535133741Sjmg KQ_OWNED(kq); 1536133741Sjmg KQ_UNLOCK_FLUX(kq); 1537133794Sgreen knote_free(marker); 1538133741Sjmgdone_nl: 1539133741Sjmg KQ_NOTOWNED(kq); 154059290Sjlemon if (nkev != 0) 1541146950Sps error = k_ops->k_copyout(k_ops->arg, keva, nkev); 1542133741Sjmg td->td_retval[0] = maxevents - count; 154359290Sjlemon return (error); 154459290Sjlemon} 154559290Sjlemon 154659290Sjlemon/* 154759290Sjlemon * XXX 154859290Sjlemon * This could be expanded to call kqueue_scan, if desired. 154959290Sjlemon */ 155059290Sjlemon/*ARGSUSED*/ 155159290Sjlemonstatic int 1552101941Srwatsonkqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 155383366Sjulian int flags, struct thread *td) 155459290Sjlemon{ 155559290Sjlemon return (ENXIO); 155659290Sjlemon} 155759290Sjlemon 155859290Sjlemon/*ARGSUSED*/ 155959290Sjlemonstatic int 1560101941Srwatsonkqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 156183366Sjulian int flags, struct thread *td) 156259290Sjlemon{ 156359290Sjlemon return (ENXIO); 156459290Sjlemon} 156559290Sjlemon 156659290Sjlemon/*ARGSUSED*/ 156759290Sjlemonstatic int 1568175140Sjhbkqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, 1569175140Sjhb struct thread *td) 1570175140Sjhb{ 1571175140Sjhb 1572175140Sjhb return (EINVAL); 1573175140Sjhb} 1574175140Sjhb 1575175140Sjhb/*ARGSUSED*/ 1576175140Sjhbstatic int 1577132138Salfredkqueue_ioctl(struct file *fp, u_long cmd, void *data, 1578102003Srwatson struct ucred *active_cred, struct thread *td) 157959290Sjlemon{ 1580132174Salfred /* 1581132174Salfred * Enabling sigio causes two major problems: 1582132174Salfred * 1) infinite recursion: 1583132174Salfred * Synopsys: kevent is being used to track signals and have FIOASYNC 1584132174Salfred * set. On receipt of a signal this will cause a kqueue to recurse 1585132174Salfred * into itself over and over. Sending the sigio causes the kqueue 1586132174Salfred * to become ready, which in turn posts sigio again, forever. 1587132174Salfred * Solution: this can be solved by setting a flag in the kqueue that 1588132174Salfred * we have a SIGIO in progress. 1589132174Salfred * 2) locking problems: 1590132174Salfred * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts 1591132174Salfred * us above the proc and pgrp locks. 1592132174Salfred * Solution: Post a signal using an async mechanism, being sure to 1593132174Salfred * record a generation count in the delivery so that we do not deliver 1594132174Salfred * a signal to the wrong process. 1595132174Salfred * 1596132174Salfred * Note, these two mechanisms are somewhat mutually exclusive! 1597132174Salfred */ 1598132174Salfred#if 0 1599132138Salfred struct kqueue *kq; 1600132138Salfred 1601132138Salfred kq = fp->f_data; 1602132138Salfred switch (cmd) { 1603132138Salfred case FIOASYNC: 1604132138Salfred if (*(int *)data) { 1605132138Salfred kq->kq_state |= KQ_ASYNC; 1606132138Salfred } else { 1607132138Salfred kq->kq_state &= ~KQ_ASYNC; 1608132138Salfred } 1609132138Salfred return (0); 1610132138Salfred 1611132138Salfred case FIOSETOWN: 1612132138Salfred return (fsetown(*(int *)data, &kq->kq_sigio)); 1613132138Salfred 1614132138Salfred case FIOGETOWN: 1615132138Salfred *(int *)data = fgetown(&kq->kq_sigio); 1616132138Salfred return (0); 1617132138Salfred } 1618132174Salfred#endif 1619132138Salfred 162059290Sjlemon return (ENOTTY); 162159290Sjlemon} 162259290Sjlemon 162359290Sjlemon/*ARGSUSED*/ 162459290Sjlemonstatic int 1625101983Srwatsonkqueue_poll(struct file *fp, int events, struct ucred *active_cred, 1626101987Srwatson struct thread *td) 162759290Sjlemon{ 162889306Salfred struct kqueue *kq; 162959290Sjlemon int revents = 0; 1630133741Sjmg int error; 163159290Sjlemon 1632170029Srwatson if ((error = kqueue_acquire(fp, &kq))) 1633133741Sjmg return POLLERR; 1634133741Sjmg 1635133741Sjmg KQ_LOCK(kq); 1636133741Sjmg if (events & (POLLIN | POLLRDNORM)) { 1637133741Sjmg if (kq->kq_count) { 1638133741Sjmg revents |= events & (POLLIN | POLLRDNORM); 163959290Sjlemon } else { 1640133741Sjmg selrecord(td, &kq->kq_sel); 1641174647Sjeff if (SEL_WAITING(&kq->kq_sel)) 1642174647Sjeff kq->kq_state |= KQ_SEL; 164359290Sjlemon } 164459290Sjlemon } 1645133741Sjmg kqueue_release(kq, 1); 1646133741Sjmg KQ_UNLOCK(kq); 164759290Sjlemon return (revents); 164859290Sjlemon} 164959290Sjlemon 165059290Sjlemon/*ARGSUSED*/ 165159290Sjlemonstatic int 1652101983Srwatsonkqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 1653101987Srwatson struct thread *td) 165459290Sjlemon{ 165559290Sjlemon 1656146603Sjmg bzero((void *)st, sizeof *st); 1657146603Sjmg /* 1658146603Sjmg * We no longer return kq_count because the unlocked value is useless. 1659146603Sjmg * If you spent all this time getting the count, why not spend your 1660146603Sjmg * syscall better by calling kevent? 1661146603Sjmg * 1662146603Sjmg * XXX - This is needed for libc_r. 1663146603Sjmg */ 1664146603Sjmg st->st_mode = S_IFIFO; 1665146603Sjmg return (0); 166659290Sjlemon} 166759290Sjlemon 166859290Sjlemon/*ARGSUSED*/ 166959290Sjlemonstatic int 167083366Sjuliankqueue_close(struct file *fp, struct thread *td) 167159290Sjlemon{ 1672109153Sdillon struct kqueue *kq = fp->f_data; 1673133741Sjmg struct filedesc *fdp; 1674133741Sjmg struct knote *kn; 167559290Sjlemon int i; 1676133741Sjmg int error; 1677256074Skib int filedesc_unlock; 167859290Sjlemon 1679170029Srwatson if ((error = kqueue_acquire(fp, &kq))) 1680133741Sjmg return error; 1681133741Sjmg 1682256074Skib filedesc_unlock = 0; 1683133741Sjmg KQ_LOCK(kq); 1684133741Sjmg 1685133741Sjmg KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, 1686133741Sjmg ("kqueue already closing")); 1687133741Sjmg kq->kq_state |= KQ_CLOSING; 1688133741Sjmg if (kq->kq_refcnt > 1) 1689133741Sjmg msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); 1690133741Sjmg 1691133741Sjmg KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); 1692133741Sjmg fdp = kq->kq_fdp; 1693133741Sjmg 1694133741Sjmg KASSERT(knlist_empty(&kq->kq_sel.si_note), 1695133741Sjmg ("kqueue's knlist not empty")); 1696133741Sjmg 1697133741Sjmg for (i = 0; i < kq->kq_knlistsize; i++) { 1698133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { 1699178913Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1700178913Skib kq->kq_state |= KQ_FLUXWAIT; 1701178913Skib msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); 1702178913Skib continue; 1703178913Skib } 1704133741Sjmg kn->kn_status |= KN_INFLUX; 1705133741Sjmg KQ_UNLOCK(kq); 1706134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1707134859Sjmg kn->kn_fop->f_detach(kn); 1708133741Sjmg knote_drop(kn, td); 1709133741Sjmg KQ_LOCK(kq); 171059290Sjlemon } 171159290Sjlemon } 1712133741Sjmg if (kq->kq_knhashmask != 0) { 1713133741Sjmg for (i = 0; i <= kq->kq_knhashmask; i++) { 1714133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { 1715178913Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1716178913Skib kq->kq_state |= KQ_FLUXWAIT; 1717178913Skib msleep(kq, &kq->kq_lock, PSOCK, 1718178913Skib "kqclo2", 0); 1719178913Skib continue; 1720178913Skib } 1721133741Sjmg kn->kn_status |= KN_INFLUX; 1722133741Sjmg KQ_UNLOCK(kq); 1723134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1724134859Sjmg kn->kn_fop->f_detach(kn); 1725133741Sjmg knote_drop(kn, td); 1726133741Sjmg KQ_LOCK(kq); 172759290Sjlemon } 172859290Sjlemon } 172959290Sjlemon } 1730133741Sjmg 1731133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { 1732133741Sjmg kq->kq_state |= KQ_TASKDRAIN; 1733133741Sjmg msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); 1734133741Sjmg } 1735133741Sjmg 1736133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 1737126033Sgreen selwakeuppri(&kq->kq_sel, PSOCK); 1738174647Sjeff if (!SEL_WAITING(&kq->kq_sel)) 1739174647Sjeff kq->kq_state &= ~KQ_SEL; 1740126033Sgreen } 1741133741Sjmg 1742133741Sjmg KQ_UNLOCK(kq); 1743133741Sjmg 1744256074Skib /* 1745256074Skib * We could be called due to the knote_drop() doing fdrop(), 1746256074Skib * called from kqueue_register(). In this case the global 1747256074Skib * lock is owned, and filedesc sx is locked before, to not 1748256074Skib * take the sleepable lock after non-sleepable. 1749256074Skib */ 1750256074Skib if (!sx_xlocked(FILEDESC_LOCK(fdp))) { 1751256074Skib FILEDESC_XLOCK(fdp); 1752256074Skib filedesc_unlock = 1; 1753256074Skib } else 1754256074Skib filedesc_unlock = 0; 1755255729Skib TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); 1756256074Skib if (filedesc_unlock) 1757256074Skib FILEDESC_XUNLOCK(fdp); 1758133741Sjmg 1759225177Sattilio seldrain(&kq->kq_sel); 1760133741Sjmg knlist_destroy(&kq->kq_sel.si_note); 1761133741Sjmg mtx_destroy(&kq->kq_lock); 1762133741Sjmg kq->kq_fdp = NULL; 1763133741Sjmg 1764133741Sjmg if (kq->kq_knhash != NULL) 1765133741Sjmg free(kq->kq_knhash, M_KQUEUE); 1766133741Sjmg if (kq->kq_knlist != NULL) 1767133741Sjmg free(kq->kq_knlist, M_KQUEUE); 1768133741Sjmg 1769132138Salfred funsetown(&kq->kq_sigio); 177084138Sjlemon free(kq, M_KQUEUE); 1771109153Sdillon fp->f_data = NULL; 177259290Sjlemon 177359290Sjlemon return (0); 177459290Sjlemon} 177559290Sjlemon 177659290Sjlemonstatic void 177759290Sjlemonkqueue_wakeup(struct kqueue *kq) 177859290Sjlemon{ 1779133741Sjmg KQ_OWNED(kq); 178059290Sjlemon 1781133741Sjmg if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { 178259290Sjlemon kq->kq_state &= ~KQ_SLEEP; 178359290Sjlemon wakeup(kq); 178459290Sjlemon } 1785133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 1786122352Stanimura selwakeuppri(&kq->kq_sel, PSOCK); 1787174647Sjeff if (!SEL_WAITING(&kq->kq_sel)) 1788174647Sjeff kq->kq_state &= ~KQ_SEL; 178959290Sjlemon } 1790133741Sjmg if (!knlist_empty(&kq->kq_sel.si_note)) 1791133741Sjmg kqueue_schedtask(kq); 1792133741Sjmg if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { 1793132138Salfred pgsigio(&kq->kq_sigio, SIGIO, 0); 1794132138Salfred } 179559290Sjlemon} 179659290Sjlemon 179759290Sjlemon/* 1798133741Sjmg * Walk down a list of knotes, activating them if their event has triggered. 1799133741Sjmg * 1800133741Sjmg * There is a possibility to optimize in the case of one kq watching another. 1801133741Sjmg * Instead of scheduling a task to wake it up, you could pass enough state 1802133741Sjmg * down the chain to make up the parent kqueue. Make this code functional 1803133741Sjmg * first. 180459290Sjlemon */ 180559290Sjlemonvoid 1806195148Sstasknote(struct knlist *list, long hint, int lockflags) 180759290Sjlemon{ 1808133741Sjmg struct kqueue *kq; 180959290Sjlemon struct knote *kn; 1810195148Sstas int error; 181159290Sjlemon 1812133741Sjmg if (list == NULL) 1813133741Sjmg return; 1814133741Sjmg 1815195148Sstas KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); 1816147730Sssouhlal 1817195148Sstas if ((lockflags & KNF_LISTLOCKED) == 0) 1818147730Sssouhlal list->kl_lock(list->kl_lockarg); 1819147730Sssouhlal 1820133741Sjmg /* 1821133741Sjmg * If we unlock the list lock (and set KN_INFLUX), we can eliminate 1822133741Sjmg * the kqueue scheduling, but this will introduce four 1823133741Sjmg * lock/unlock's for each knote to test. If we do, continue to use 1824133741Sjmg * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is 1825133741Sjmg * only safe if you want to remove the current item, which we are 1826133741Sjmg * not doing. 1827133741Sjmg */ 1828133741Sjmg SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 1829133741Sjmg kq = kn->kn_kq; 1830264369Skib KQ_LOCK(kq); 1831264369Skib if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { 1832264369Skib /* 1833264369Skib * Do not process the influx notes, except for 1834264369Skib * the influx coming from the kq unlock in the 1835264369Skib * kqueue_scan(). In the later case, we do 1836264369Skib * not interfere with the scan, since the code 1837264369Skib * fragment in kqueue_scan() locks the knlist, 1838264369Skib * and cannot proceed until we finished. 1839264369Skib */ 1840264369Skib KQ_UNLOCK(kq); 1841264369Skib } else if ((lockflags & KNF_NOKQLOCK) != 0) { 1842264369Skib kn->kn_status |= KN_INFLUX; 1843264369Skib KQ_UNLOCK(kq); 1844264369Skib error = kn->kn_fop->f_event(kn, hint); 1845133741Sjmg KQ_LOCK(kq); 1846264369Skib kn->kn_status &= ~KN_INFLUX; 1847264369Skib if (error) 1848264369Skib KNOTE_ACTIVATE(kn, 1); 1849264369Skib KQ_UNLOCK_FLUX(kq); 1850264369Skib } else { 1851264369Skib kn->kn_status |= KN_HASKQLOCK; 1852264369Skib if (kn->kn_fop->f_event(kn, hint)) 1853264369Skib KNOTE_ACTIVATE(kn, 1); 1854264369Skib kn->kn_status &= ~KN_HASKQLOCK; 1855264369Skib KQ_UNLOCK(kq); 1856133741Sjmg } 1857133741Sjmg } 1858195148Sstas if ((lockflags & KNF_LISTLOCKED) == 0) 1859147730Sssouhlal list->kl_unlock(list->kl_lockarg); 186059290Sjlemon} 186159290Sjlemon 186259290Sjlemon/* 1863133741Sjmg * add a knote to a knlist 1864133741Sjmg */ 1865133741Sjmgvoid 1866133741Sjmgknlist_add(struct knlist *knl, struct knote *kn, int islocked) 1867133741Sjmg{ 1868147730Sssouhlal KNL_ASSERT_LOCK(knl, islocked); 1869133741Sjmg KQ_NOTOWNED(kn->kn_kq); 1870133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == 1871133741Sjmg (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); 1872133741Sjmg if (!islocked) 1873147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 1874133741Sjmg SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); 1875133741Sjmg if (!islocked) 1876147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 1877133741Sjmg KQ_LOCK(kn->kn_kq); 1878133741Sjmg kn->kn_knlist = knl; 1879133741Sjmg kn->kn_status &= ~KN_DETACHED; 1880133741Sjmg KQ_UNLOCK(kn->kn_kq); 1881133741Sjmg} 1882133741Sjmg 1883133741Sjmgstatic void 1884133741Sjmgknlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) 1885133741Sjmg{ 1886133741Sjmg KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); 1887147730Sssouhlal KNL_ASSERT_LOCK(knl, knlislocked); 1888133741Sjmg mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); 1889133741Sjmg if (!kqislocked) 1890133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, 1891133741Sjmg ("knlist_remove called w/o knote being KN_INFLUX or already removed")); 1892133741Sjmg if (!knlislocked) 1893147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 1894133741Sjmg SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); 1895133741Sjmg kn->kn_knlist = NULL; 1896133741Sjmg if (!knlislocked) 1897147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 1898133741Sjmg if (!kqislocked) 1899133741Sjmg KQ_LOCK(kn->kn_kq); 1900133741Sjmg kn->kn_status |= KN_DETACHED; 1901133741Sjmg if (!kqislocked) 1902133741Sjmg KQ_UNLOCK(kn->kn_kq); 1903133741Sjmg} 1904133741Sjmg 1905133741Sjmg/* 190659290Sjlemon * remove all knotes from a specified klist 190759290Sjlemon */ 190859290Sjlemonvoid 1909133741Sjmgknlist_remove(struct knlist *knl, struct knote *kn, int islocked) 191059290Sjlemon{ 1911133741Sjmg 1912133741Sjmg knlist_remove_kq(knl, kn, islocked, 0); 1913133741Sjmg} 1914133741Sjmg 1915133741Sjmg/* 1916133741Sjmg * remove knote from a specified klist while in f_event handler. 1917133741Sjmg */ 1918133741Sjmgvoid 1919133741Sjmgknlist_remove_inevent(struct knlist *knl, struct knote *kn) 1920133741Sjmg{ 1921133741Sjmg 1922133741Sjmg knlist_remove_kq(knl, kn, 1, 1923133741Sjmg (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); 1924133741Sjmg} 1925133741Sjmg 1926133741Sjmgint 1927133741Sjmgknlist_empty(struct knlist *knl) 1928133741Sjmg{ 1929147730Sssouhlal KNL_ASSERT_LOCKED(knl); 1930133741Sjmg return SLIST_EMPTY(&knl->kl_list); 1931133741Sjmg} 1932133741Sjmg 1933133741Sjmgstatic struct mtx knlist_lock; 1934133741SjmgMTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", 1935133741Sjmg MTX_DEF); 1936147730Sssouhlalstatic void knlist_mtx_lock(void *arg); 1937147730Sssouhlalstatic void knlist_mtx_unlock(void *arg); 1938133741Sjmg 1939147730Sssouhlalstatic void 1940147730Sssouhlalknlist_mtx_lock(void *arg) 1941147730Sssouhlal{ 1942147730Sssouhlal mtx_lock((struct mtx *)arg); 1943147730Sssouhlal} 1944147730Sssouhlal 1945147730Sssouhlalstatic void 1946147730Sssouhlalknlist_mtx_unlock(void *arg) 1947147730Sssouhlal{ 1948147730Sssouhlal mtx_unlock((struct mtx *)arg); 1949147730Sssouhlal} 1950147730Sssouhlal 1951193951Skibstatic void 1952193951Skibknlist_mtx_assert_locked(void *arg) 1953147730Sssouhlal{ 1954193951Skib mtx_assert((struct mtx *)arg, MA_OWNED); 1955147730Sssouhlal} 1956147730Sssouhlal 1957193951Skibstatic void 1958193951Skibknlist_mtx_assert_unlocked(void *arg) 1959193951Skib{ 1960193951Skib mtx_assert((struct mtx *)arg, MA_NOTOWNED); 1961193951Skib} 1962193951Skib 1963133741Sjmgvoid 1964147730Sssouhlalknlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), 1965193951Skib void (*kl_unlock)(void *), 1966193951Skib void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)) 1967133741Sjmg{ 1968133741Sjmg 1969147730Sssouhlal if (lock == NULL) 1970147730Sssouhlal knl->kl_lockarg = &knlist_lock; 1971133741Sjmg else 1972147730Sssouhlal knl->kl_lockarg = lock; 1973133741Sjmg 1974147730Sssouhlal if (kl_lock == NULL) 1975147730Sssouhlal knl->kl_lock = knlist_mtx_lock; 1976147730Sssouhlal else 1977147730Sssouhlal knl->kl_lock = kl_lock; 1978157582Sjmg if (kl_unlock == NULL) 1979147730Sssouhlal knl->kl_unlock = knlist_mtx_unlock; 1980147730Sssouhlal else 1981147730Sssouhlal knl->kl_unlock = kl_unlock; 1982193951Skib if (kl_assert_locked == NULL) 1983193951Skib knl->kl_assert_locked = knlist_mtx_assert_locked; 1984147730Sssouhlal else 1985193951Skib knl->kl_assert_locked = kl_assert_locked; 1986193951Skib if (kl_assert_unlocked == NULL) 1987193951Skib knl->kl_assert_unlocked = knlist_mtx_assert_unlocked; 1988193951Skib else 1989193951Skib knl->kl_assert_unlocked = kl_assert_unlocked; 1990147730Sssouhlal 1991133741Sjmg SLIST_INIT(&knl->kl_list); 1992133741Sjmg} 1993133741Sjmg 1994133741Sjmgvoid 1995193951Skibknlist_init_mtx(struct knlist *knl, struct mtx *lock) 1996193951Skib{ 1997193951Skib 1998193951Skib knlist_init(knl, lock, NULL, NULL, NULL, NULL); 1999193951Skib} 2000193951Skib 2001193951Skibvoid 2002133741Sjmgknlist_destroy(struct knlist *knl) 2003133741Sjmg{ 2004133741Sjmg 2005133741Sjmg#ifdef INVARIANTS 2006133741Sjmg /* 2007133741Sjmg * if we run across this error, we need to find the offending 2008133741Sjmg * driver and have it call knlist_clear. 2009133741Sjmg */ 2010133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) 2011133741Sjmg printf("WARNING: destroying knlist w/ knotes on it!\n"); 2012133741Sjmg#endif 2013133741Sjmg 2014147730Sssouhlal knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; 2015133741Sjmg SLIST_INIT(&knl->kl_list); 2016133741Sjmg} 2017133741Sjmg 2018133741Sjmg/* 2019133741Sjmg * Even if we are locked, we may need to drop the lock to allow any influx 2020133741Sjmg * knotes time to "settle". 2021133741Sjmg */ 2022133741Sjmgvoid 2023143776Sjmgknlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) 2024133741Sjmg{ 2025159171Spjd struct knote *kn, *kn2; 2026133741Sjmg struct kqueue *kq; 202759290Sjlemon 2028133741Sjmg if (islocked) 2029147730Sssouhlal KNL_ASSERT_LOCKED(knl); 2030133741Sjmg else { 2031147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); 2032170029Srwatsonagain: /* need to reacquire lock since we have dropped it */ 2033147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 203459290Sjlemon } 2035133741Sjmg 2036159171Spjd SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { 2037133741Sjmg kq = kn->kn_kq; 2038133741Sjmg KQ_LOCK(kq); 2039143776Sjmg if ((kn->kn_status & KN_INFLUX)) { 2040133741Sjmg KQ_UNLOCK(kq); 2041133741Sjmg continue; 2042133741Sjmg } 2043133741Sjmg knlist_remove_kq(knl, kn, 1, 1); 2044143776Sjmg if (killkn) { 2045143776Sjmg kn->kn_status |= KN_INFLUX | KN_DETACHED; 2046143776Sjmg KQ_UNLOCK(kq); 2047143776Sjmg knote_drop(kn, td); 2048143776Sjmg } else { 2049143776Sjmg /* Make sure cleared knotes disappear soon */ 2050143776Sjmg kn->kn_flags |= (EV_EOF | EV_ONESHOT); 2051143776Sjmg KQ_UNLOCK(kq); 2052143776Sjmg } 2053133741Sjmg kq = NULL; 2054133741Sjmg } 2055133741Sjmg 2056133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) { 2057133741Sjmg /* there are still KN_INFLUX remaining */ 2058133741Sjmg kn = SLIST_FIRST(&knl->kl_list); 2059133741Sjmg kq = kn->kn_kq; 2060133741Sjmg KQ_LOCK(kq); 2061133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, 2062133741Sjmg ("knote removed w/o list lock")); 2063147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 2064133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 2065133741Sjmg msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); 2066133741Sjmg kq = NULL; 2067133741Sjmg goto again; 2068133741Sjmg } 2069133741Sjmg 2070133741Sjmg if (islocked) 2071147730Sssouhlal KNL_ASSERT_LOCKED(knl); 2072133741Sjmg else { 2073147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 2074147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); 2075133741Sjmg } 207659290Sjlemon} 207759290Sjlemon 207859290Sjlemon/* 2079168355Srwatson * Remove all knotes referencing a specified fd must be called with FILEDESC 2080168355Srwatson * lock. This prevents a race where a new fd comes along and occupies the 2081168355Srwatson * entry and we attach a knote to the fd. 208259290Sjlemon */ 208359290Sjlemonvoid 208483366Sjulianknote_fdclose(struct thread *td, int fd) 208559290Sjlemon{ 208683366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 2087133741Sjmg struct kqueue *kq; 2088133741Sjmg struct knote *kn; 2089133741Sjmg int influx; 209059290Sjlemon 2091168355Srwatson FILEDESC_XLOCK_ASSERT(fdp); 2092133741Sjmg 2093133741Sjmg /* 2094133741Sjmg * We shouldn't have to worry about new kevents appearing on fd 2095133741Sjmg * since filedesc is locked. 2096133741Sjmg */ 2097255729Skib TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { 2098133741Sjmg KQ_LOCK(kq); 2099133741Sjmg 2100133741Sjmgagain: 2101133741Sjmg influx = 0; 2102133741Sjmg while (kq->kq_knlistsize > fd && 2103133741Sjmg (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { 2104133741Sjmg if (kn->kn_status & KN_INFLUX) { 2105133741Sjmg /* someone else might be waiting on our knote */ 2106133741Sjmg if (influx) 2107133741Sjmg wakeup(kq); 2108133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 2109133741Sjmg msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); 2110133741Sjmg goto again; 2111133741Sjmg } 2112133741Sjmg kn->kn_status |= KN_INFLUX; 2113133741Sjmg KQ_UNLOCK(kq); 2114134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 2115134859Sjmg kn->kn_fop->f_detach(kn); 2116133741Sjmg knote_drop(kn, td); 2117133741Sjmg influx = 1; 2118133741Sjmg KQ_LOCK(kq); 2119133741Sjmg } 2120133741Sjmg KQ_UNLOCK_FLUX(kq); 2121133741Sjmg } 212259290Sjlemon} 212359290Sjlemon 2124133741Sjmgstatic int 2125133741Sjmgknote_attach(struct knote *kn, struct kqueue *kq) 212659290Sjlemon{ 2127133741Sjmg struct klist *list; 212859290Sjlemon 2129133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); 2130133741Sjmg KQ_OWNED(kq); 213189306Salfred 2132133741Sjmg if (kn->kn_fop->f_isfd) { 2133133741Sjmg if (kn->kn_id >= kq->kq_knlistsize) 2134133741Sjmg return ENOMEM; 2135133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 2136133741Sjmg } else { 2137133741Sjmg if (kq->kq_knhash == NULL) 2138133741Sjmg return ENOMEM; 2139133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 214059290Sjlemon } 214159290Sjlemon 214259290Sjlemon SLIST_INSERT_HEAD(list, kn, kn_link); 2143133741Sjmg 2144133741Sjmg return 0; 214559290Sjlemon} 214659290Sjlemon 214759290Sjlemon/* 2148151260Sambrisko * knote must already have been detached using the f_detach method. 2149133741Sjmg * no lock need to be held, it is assumed that the KN_INFLUX flag is set 2150133741Sjmg * to prevent other removal. 215159290Sjlemon */ 215259290Sjlemonstatic void 215383366Sjulianknote_drop(struct knote *kn, struct thread *td) 215459290Sjlemon{ 2155133741Sjmg struct kqueue *kq; 215659290Sjlemon struct klist *list; 215759290Sjlemon 2158133741Sjmg kq = kn->kn_kq; 2159133741Sjmg 2160133741Sjmg KQ_NOTOWNED(kq); 2161133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, 2162133741Sjmg ("knote_drop called without KN_INFLUX set in kn_status")); 2163133741Sjmg 2164133741Sjmg KQ_LOCK(kq); 216559290Sjlemon if (kn->kn_fop->f_isfd) 2166133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 216759290Sjlemon else 2168133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 216959290Sjlemon 2170151260Sambrisko if (!SLIST_EMPTY(list)) 2171151260Sambrisko SLIST_REMOVE(list, kn, knote, kn_link); 217259290Sjlemon if (kn->kn_status & KN_QUEUED) 217359290Sjlemon knote_dequeue(kn); 2174133741Sjmg KQ_UNLOCK_FLUX(kq); 2175133741Sjmg 2176133741Sjmg if (kn->kn_fop->f_isfd) { 2177133741Sjmg fdrop(kn->kn_fp, td); 2178133741Sjmg kn->kn_fp = NULL; 2179133741Sjmg } 2180133741Sjmg kqueue_fo_release(kn->kn_kevent.filter); 2181133741Sjmg kn->kn_fop = NULL; 218259290Sjlemon knote_free(kn); 218359290Sjlemon} 218459290Sjlemon 218559290Sjlemonstatic void 218659290Sjlemonknote_enqueue(struct knote *kn) 218759290Sjlemon{ 218859290Sjlemon struct kqueue *kq = kn->kn_kq; 218959290Sjlemon 2190133741Sjmg KQ_OWNED(kn->kn_kq); 219159997Sjlemon KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 219259997Sjlemon 2193133590Srwatson TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 219459290Sjlemon kn->kn_status |= KN_QUEUED; 219559290Sjlemon kq->kq_count++; 219659290Sjlemon kqueue_wakeup(kq); 219759290Sjlemon} 219859290Sjlemon 219959290Sjlemonstatic void 220059290Sjlemonknote_dequeue(struct knote *kn) 220159290Sjlemon{ 220259290Sjlemon struct kqueue *kq = kn->kn_kq; 220359290Sjlemon 2204133741Sjmg KQ_OWNED(kn->kn_kq); 220559997Sjlemon KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 220659997Sjlemon 2207133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 220859290Sjlemon kn->kn_status &= ~KN_QUEUED; 220959290Sjlemon kq->kq_count--; 221059290Sjlemon} 221159290Sjlemon 221259290Sjlemonstatic void 221359290Sjlemonknote_init(void) 221459290Sjlemon{ 2215133741Sjmg 221692751Sjeff knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 221792751Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 221859290Sjlemon} 2219177253SrwatsonSYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); 222059290Sjlemon 222159290Sjlemonstatic struct knote * 2222133741Sjmgknote_alloc(int waitok) 222359290Sjlemon{ 2224133741Sjmg return ((struct knote *)uma_zalloc(knote_zone, 2225133741Sjmg (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO)); 222659290Sjlemon} 222759290Sjlemon 222859290Sjlemonstatic void 222959290Sjlemonknote_free(struct knote *kn) 223059290Sjlemon{ 2231133741Sjmg if (kn != NULL) 2232133741Sjmg uma_zfree(knote_zone, kn); 223359290Sjlemon} 2234162594Sjmg 2235162594Sjmg/* 2236162594Sjmg * Register the kev w/ the kq specified by fd. 2237162594Sjmg */ 2238162594Sjmgint 2239162594Sjmgkqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) 2240162594Sjmg{ 2241162594Sjmg struct kqueue *kq; 2242162594Sjmg struct file *fp; 2243162594Sjmg int error; 2244162594Sjmg 2245224797Sjonathan if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) 2246162594Sjmg return (error); 2247170029Srwatson if ((error = kqueue_acquire(fp, &kq)) != 0) 2248170029Srwatson goto noacquire; 2249162594Sjmg 2250162594Sjmg error = kqueue_register(kq, kev, td, waitok); 2251162594Sjmg 2252162594Sjmg kqueue_release(kq, 0); 2253162594Sjmg 2254170029Srwatsonnoacquire: 2255162608Sjmg fdrop(fp, td); 2256162594Sjmg 2257162594Sjmg return error; 2258162594Sjmg} 2259