kern_event.c revision 197242
159290Sjlemon/*- 272969Sjlemon * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3133741Sjmg * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org> 4197240Ssson * Copyright (c) 2009 Apple, Inc. 559290Sjlemon * All rights reserved. 659290Sjlemon * 759290Sjlemon * Redistribution and use in source and binary forms, with or without 859290Sjlemon * modification, are permitted provided that the following conditions 959290Sjlemon * are met: 1059290Sjlemon * 1. Redistributions of source code must retain the above copyright 1159290Sjlemon * notice, this list of conditions and the following disclaimer. 1259290Sjlemon * 2. Redistributions in binary form must reproduce the above copyright 1359290Sjlemon * notice, this list of conditions and the following disclaimer in the 1459290Sjlemon * documentation and/or other materials provided with the distribution. 1559290Sjlemon * 1659290Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1759290Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1859290Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1959290Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2059290Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2159290Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2259290Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2359290Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2459290Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2559290Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2659290Sjlemon * SUCH DAMAGE. 2759290Sjlemon */ 2859290Sjlemon 29116182Sobrien#include <sys/cdefs.h> 30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_event.c 197242 2009-09-16 03:37:39Z sson $"); 31116182Sobrien 32162592Sjmg#include "opt_ktrace.h" 33162592Sjmg 3459290Sjlemon#include <sys/param.h> 3559290Sjlemon#include <sys/systm.h> 3659290Sjlemon#include <sys/kernel.h> 3776166Smarkm#include <sys/lock.h> 3876166Smarkm#include <sys/mutex.h> 3959290Sjlemon#include <sys/proc.h> 40132138Salfred#include <sys/malloc.h> 4159290Sjlemon#include <sys/unistd.h> 4259290Sjlemon#include <sys/file.h> 43108524Salfred#include <sys/filedesc.h> 44132138Salfred#include <sys/filio.h> 4559290Sjlemon#include <sys/fcntl.h> 46133741Sjmg#include <sys/kthread.h> 4770834Swollman#include <sys/selinfo.h> 4859290Sjlemon#include <sys/queue.h> 4959290Sjlemon#include <sys/event.h> 5059290Sjlemon#include <sys/eventvar.h> 5159290Sjlemon#include <sys/poll.h> 5259290Sjlemon#include <sys/protosw.h> 53132138Salfred#include <sys/sigio.h> 54132138Salfred#include <sys/signalvar.h> 5559290Sjlemon#include <sys/socket.h> 5659290Sjlemon#include <sys/socketvar.h> 5759290Sjlemon#include <sys/stat.h> 5884138Sjlemon#include <sys/sysctl.h> 5959290Sjlemon#include <sys/sysproto.h> 60142934Sps#include <sys/syscallsubr.h> 61133741Sjmg#include <sys/taskqueue.h> 6259290Sjlemon#include <sys/uio.h> 63162592Sjmg#ifdef KTRACE 64162592Sjmg#include <sys/ktrace.h> 65162592Sjmg#endif 6659290Sjlemon 6792751Sjeff#include <vm/uma.h> 6859290Sjlemon 69141616Sphkstatic MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 70141616Sphk 71133741Sjmg/* 72133741Sjmg * This lock is used if multiple kq locks are required. This possibly 73133741Sjmg * should be made into a per proc lock. 74133741Sjmg */ 75133741Sjmgstatic struct mtx kq_global; 76133741SjmgMTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); 77133741Sjmg#define KQ_GLOBAL_LOCK(lck, haslck) do { \ 78133741Sjmg if (!haslck) \ 79133741Sjmg mtx_lock(lck); \ 80133741Sjmg haslck = 1; \ 81133741Sjmg} while (0) 82133741Sjmg#define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ 83133741Sjmg if (haslck) \ 84133741Sjmg mtx_unlock(lck); \ 85133741Sjmg haslck = 0; \ 86133741Sjmg} while (0) 8784138Sjlemon 88133741SjmgTASKQUEUE_DEFINE_THREAD(kqueue); 89133741Sjmg 90146950Spsstatic int kevent_copyout(void *arg, struct kevent *kevp, int count); 91146950Spsstatic int kevent_copyin(void *arg, struct kevent *kevp, int count); 92162594Sjmgstatic int kqueue_register(struct kqueue *kq, struct kevent *kev, 93162594Sjmg struct thread *td, int waitok); 94170029Srwatsonstatic int kqueue_acquire(struct file *fp, struct kqueue **kqp); 95133741Sjmgstatic void kqueue_release(struct kqueue *kq, int locked); 96133741Sjmgstatic int kqueue_expand(struct kqueue *kq, struct filterops *fops, 97133741Sjmg uintptr_t ident, int waitok); 98133741Sjmgstatic void kqueue_task(void *arg, int pending); 99133741Sjmgstatic int kqueue_scan(struct kqueue *kq, int maxevents, 100146950Sps struct kevent_copyops *k_ops, 101146950Sps const struct timespec *timeout, 102146950Sps struct kevent *keva, struct thread *td); 10359290Sjlemonstatic void kqueue_wakeup(struct kqueue *kq); 104133741Sjmgstatic struct filterops *kqueue_fo_find(int filt); 105133741Sjmgstatic void kqueue_fo_release(int filt); 10659290Sjlemon 107108255Sphkstatic fo_rdwr_t kqueue_read; 108108255Sphkstatic fo_rdwr_t kqueue_write; 109175140Sjhbstatic fo_truncate_t kqueue_truncate; 110108255Sphkstatic fo_ioctl_t kqueue_ioctl; 111108255Sphkstatic fo_poll_t kqueue_poll; 112108255Sphkstatic fo_kqfilter_t kqueue_kqfilter; 113108255Sphkstatic fo_stat_t kqueue_stat; 114108255Sphkstatic fo_close_t kqueue_close; 115108238Sphk 11672521Sjlemonstatic struct fileops kqueueops = { 117116546Sphk .fo_read = kqueue_read, 118116546Sphk .fo_write = kqueue_write, 119175140Sjhb .fo_truncate = kqueue_truncate, 120116546Sphk .fo_ioctl = kqueue_ioctl, 121116546Sphk .fo_poll = kqueue_poll, 122116546Sphk .fo_kqfilter = kqueue_kqfilter, 123116546Sphk .fo_stat = kqueue_stat, 124116546Sphk .fo_close = kqueue_close, 12572521Sjlemon}; 12672521Sjlemon 127133741Sjmgstatic int knote_attach(struct knote *kn, struct kqueue *kq); 12883366Sjulianstatic void knote_drop(struct knote *kn, struct thread *td); 12959290Sjlemonstatic void knote_enqueue(struct knote *kn); 13059290Sjlemonstatic void knote_dequeue(struct knote *kn); 13159290Sjlemonstatic void knote_init(void); 132133741Sjmgstatic struct knote *knote_alloc(int waitok); 13359290Sjlemonstatic void knote_free(struct knote *kn); 13459290Sjlemon 13572521Sjlemonstatic void filt_kqdetach(struct knote *kn); 13672521Sjlemonstatic int filt_kqueue(struct knote *kn, long hint); 13772521Sjlemonstatic int filt_procattach(struct knote *kn); 13872521Sjlemonstatic void filt_procdetach(struct knote *kn); 13972521Sjlemonstatic int filt_proc(struct knote *kn, long hint); 14072521Sjlemonstatic int filt_fileattach(struct knote *kn); 14179989Sjlemonstatic void filt_timerexpire(void *knx); 14279989Sjlemonstatic int filt_timerattach(struct knote *kn); 14379989Sjlemonstatic void filt_timerdetach(struct knote *kn); 14479989Sjlemonstatic int filt_timer(struct knote *kn, long hint); 145197241Sssonstatic int filt_userattach(struct knote *kn); 146197241Sssonstatic void filt_userdetach(struct knote *kn); 147197241Sssonstatic int filt_user(struct knote *kn, long hint); 148197241Sssonstatic void filt_usertouch(struct knote *kn, struct kevent *kev, long type); 14972521Sjlemon 150197134Srwatsonstatic struct filterops file_filtops = { 151197134Srwatson .f_isfd = 1, 152197134Srwatson .f_attach = filt_fileattach, 153197134Srwatson}; 154197134Srwatsonstatic struct filterops kqread_filtops = { 155197134Srwatson .f_isfd = 1, 156197134Srwatson .f_detach = filt_kqdetach, 157197134Srwatson .f_event = filt_kqueue, 158197134Srwatson}; 159133741Sjmg/* XXX - move to kern_proc.c? */ 160197134Srwatsonstatic struct filterops proc_filtops = { 161197134Srwatson .f_isfd = 0, 162197134Srwatson .f_attach = filt_procattach, 163197134Srwatson .f_detach = filt_procdetach, 164197134Srwatson .f_event = filt_proc, 165197134Srwatson}; 166197134Srwatsonstatic struct filterops timer_filtops = { 167197134Srwatson .f_isfd = 0, 168197134Srwatson .f_attach = filt_timerattach, 169197134Srwatson .f_detach = filt_timerdetach, 170197134Srwatson .f_event = filt_timer, 171197134Srwatson}; 172197241Sssonstatic struct filterops user_filtops = { 173197241Ssson .f_attach = filt_userattach, 174197241Ssson .f_detach = filt_userdetach, 175197241Ssson .f_event = filt_user, 176197241Ssson .f_touch = filt_usertouch, 177197241Ssson}; 17872521Sjlemon 17992751Sjeffstatic uma_zone_t knote_zone; 18084138Sjlemonstatic int kq_ncallouts = 0; 18184138Sjlemonstatic int kq_calloutmax = (4 * 1024); 18284138SjlemonSYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 18384138Sjlemon &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 18459290Sjlemon 185133741Sjmg/* XXX - ensure not KN_INFLUX?? */ 186133741Sjmg#define KNOTE_ACTIVATE(kn, islock) do { \ 187133741Sjmg if ((islock)) \ 188133741Sjmg mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ 189133741Sjmg else \ 190133741Sjmg KQ_LOCK((kn)->kn_kq); \ 191133741Sjmg (kn)->kn_status |= KN_ACTIVE; \ 192133741Sjmg if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 193133741Sjmg knote_enqueue((kn)); \ 194133741Sjmg if (!(islock)) \ 195133741Sjmg KQ_UNLOCK((kn)->kn_kq); \ 19659290Sjlemon} while(0) 197133741Sjmg#define KQ_LOCK(kq) do { \ 198133741Sjmg mtx_lock(&(kq)->kq_lock); \ 199133741Sjmg} while (0) 200133741Sjmg#define KQ_FLUX_WAKEUP(kq) do { \ 201133741Sjmg if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ 202133741Sjmg (kq)->kq_state &= ~KQ_FLUXWAIT; \ 203133741Sjmg wakeup((kq)); \ 204133741Sjmg } \ 205133741Sjmg} while (0) 206133741Sjmg#define KQ_UNLOCK_FLUX(kq) do { \ 207133741Sjmg KQ_FLUX_WAKEUP(kq); \ 208133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 209133741Sjmg} while (0) 210133741Sjmg#define KQ_UNLOCK(kq) do { \ 211133741Sjmg mtx_unlock(&(kq)->kq_lock); \ 212133741Sjmg} while (0) 213133741Sjmg#define KQ_OWNED(kq) do { \ 214133741Sjmg mtx_assert(&(kq)->kq_lock, MA_OWNED); \ 215133741Sjmg} while (0) 216133741Sjmg#define KQ_NOTOWNED(kq) do { \ 217133741Sjmg mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ 218133741Sjmg} while (0) 219133741Sjmg#define KN_LIST_LOCK(kn) do { \ 220133741Sjmg if (kn->kn_knlist != NULL) \ 221147730Sssouhlal kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ 222133741Sjmg} while (0) 223133741Sjmg#define KN_LIST_UNLOCK(kn) do { \ 224147730Sssouhlal if (kn->kn_knlist != NULL) \ 225147730Sssouhlal kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ 226133741Sjmg} while (0) 227147730Sssouhlal#define KNL_ASSERT_LOCK(knl, islocked) do { \ 228147730Sssouhlal if (islocked) \ 229147730Sssouhlal KNL_ASSERT_LOCKED(knl); \ 230147730Sssouhlal else \ 231147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); \ 232147730Sssouhlal} while (0) 233147730Sssouhlal#ifdef INVARIANTS 234147730Sssouhlal#define KNL_ASSERT_LOCKED(knl) do { \ 235193951Skib knl->kl_assert_locked((knl)->kl_lockarg); \ 236147730Sssouhlal} while (0) 237193951Skib#define KNL_ASSERT_UNLOCKED(knl) do { \ 238193951Skib knl->kl_assert_unlocked((knl)->kl_lockarg); \ 239147730Sssouhlal} while (0) 240147730Sssouhlal#else /* !INVARIANTS */ 241147730Sssouhlal#define KNL_ASSERT_LOCKED(knl) do {} while(0) 242147730Sssouhlal#define KNL_ASSERT_UNLOCKED(knl) do {} while (0) 243147730Sssouhlal#endif /* INVARIANTS */ 24459290Sjlemon 24559290Sjlemon#define KN_HASHSIZE 64 /* XXX should be tunable */ 24659290Sjlemon#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 24759290Sjlemon 24888633Salfredstatic int 24988633Salfredfilt_nullattach(struct knote *kn) 25088633Salfred{ 25188633Salfred 25288633Salfred return (ENXIO); 25388633Salfred}; 25488633Salfred 255197134Srwatsonstruct filterops null_filtops = { 256197134Srwatson .f_isfd = 0, 257197134Srwatson .f_attach = filt_nullattach, 258197134Srwatson}; 25988633Salfred 260133741Sjmg/* XXX - make SYSINIT to add these, and move into respective modules. */ 26159290Sjlemonextern struct filterops sig_filtops; 262131562Salfredextern struct filterops fs_filtops; 26359290Sjlemon 26459290Sjlemon/* 26572521Sjlemon * Table for for all system-defined filters. 26659290Sjlemon */ 267133741Sjmgstatic struct mtx filterops_lock; 268133741SjmgMTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", 269133741Sjmg MTX_DEF); 270133741Sjmgstatic struct { 271133741Sjmg struct filterops *for_fop; 272133741Sjmg int for_refcnt; 273133741Sjmg} sysfilt_ops[EVFILT_SYSCOUNT] = { 274133741Sjmg { &file_filtops }, /* EVFILT_READ */ 275133741Sjmg { &file_filtops }, /* EVFILT_WRITE */ 276133741Sjmg { &null_filtops }, /* EVFILT_AIO */ 277133741Sjmg { &file_filtops }, /* EVFILT_VNODE */ 278133741Sjmg { &proc_filtops }, /* EVFILT_PROC */ 279133741Sjmg { &sig_filtops }, /* EVFILT_SIGNAL */ 280133741Sjmg { &timer_filtops }, /* EVFILT_TIMER */ 281133741Sjmg { &file_filtops }, /* EVFILT_NETDEV */ 282133741Sjmg { &fs_filtops }, /* EVFILT_FS */ 283151260Sambrisko { &null_filtops }, /* EVFILT_LIO */ 284197241Ssson { &user_filtops }, /* EVFILT_USER */ 28559290Sjlemon}; 28659290Sjlemon 287133741Sjmg/* 288133741Sjmg * Simple redirection for all cdevsw style objects to call their fo_kqfilter 289133741Sjmg * method. 290133741Sjmg */ 29159290Sjlemonstatic int 29272521Sjlemonfilt_fileattach(struct knote *kn) 29359290Sjlemon{ 294133635Sjmg 29572521Sjlemon return (fo_kqfilter(kn->kn_fp, kn)); 29659290Sjlemon} 29759290Sjlemon 29872521Sjlemon/*ARGSUSED*/ 29959290Sjlemonstatic int 30072521Sjlemonkqueue_kqfilter(struct file *fp, struct knote *kn) 30159290Sjlemon{ 302109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 30359290Sjlemon 30472521Sjlemon if (kn->kn_filter != EVFILT_READ) 305133741Sjmg return (EINVAL); 30659290Sjlemon 307133741Sjmg kn->kn_status |= KN_KQUEUE; 30872521Sjlemon kn->kn_fop = &kqread_filtops; 309133741Sjmg knlist_add(&kq->kq_sel.si_note, kn, 0); 310133741Sjmg 31159290Sjlemon return (0); 31259290Sjlemon} 31359290Sjlemon 31459290Sjlemonstatic void 31559290Sjlemonfilt_kqdetach(struct knote *kn) 31659290Sjlemon{ 317109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 31859290Sjlemon 319133741Sjmg knlist_remove(&kq->kq_sel.si_note, kn, 0); 32059290Sjlemon} 32159290Sjlemon 32259290Sjlemon/*ARGSUSED*/ 32359290Sjlemonstatic int 32459290Sjlemonfilt_kqueue(struct knote *kn, long hint) 32559290Sjlemon{ 326109153Sdillon struct kqueue *kq = kn->kn_fp->f_data; 32759290Sjlemon 32859290Sjlemon kn->kn_data = kq->kq_count; 32959290Sjlemon return (kn->kn_data > 0); 33059290Sjlemon} 33159290Sjlemon 332133741Sjmg/* XXX - move to kern_proc.c? */ 33359290Sjlemonstatic int 33459290Sjlemonfilt_procattach(struct knote *kn) 33559290Sjlemon{ 33659290Sjlemon struct proc *p; 337113377Skbyanc int immediate; 33875451Srwatson int error; 33959290Sjlemon 340113377Skbyanc immediate = 0; 34159290Sjlemon p = pfind(kn->kn_id); 342113377Skbyanc if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 343113377Skbyanc p = zpfind(kn->kn_id); 344113377Skbyanc immediate = 1; 345133741Sjmg } else if (p != NULL && (p->p_flag & P_WEXIT)) { 346133741Sjmg immediate = 1; 347113377Skbyanc } 348133741Sjmg 349122019Scognet if (p == NULL) 350122019Scognet return (ESRCH); 351133741Sjmg if ((error = p_cansee(curthread, p))) 35275451Srwatson return (error); 35359290Sjlemon 35459290Sjlemon kn->kn_ptr.p_proc = p; 35559290Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 35659290Sjlemon 35759290Sjlemon /* 35859290Sjlemon * internal flag indicating registration done by kernel 35959290Sjlemon */ 36059290Sjlemon if (kn->kn_flags & EV_FLAG1) { 36159290Sjlemon kn->kn_data = kn->kn_sdata; /* ppid */ 36259290Sjlemon kn->kn_fflags = NOTE_CHILD; 36359290Sjlemon kn->kn_flags &= ~EV_FLAG1; 36459290Sjlemon } 36559290Sjlemon 366122686Scognet if (immediate == 0) 367133741Sjmg knlist_add(&p->p_klist, kn, 1); 368113377Skbyanc 369113377Skbyanc /* 370113377Skbyanc * Immediately activate any exit notes if the target process is a 371113377Skbyanc * zombie. This is necessary to handle the case where the target 372113377Skbyanc * process, e.g. a child, dies before the kevent is registered. 373113377Skbyanc */ 374113377Skbyanc if (immediate && filt_proc(kn, NOTE_EXIT)) 375133741Sjmg KNOTE_ACTIVATE(kn, 0); 376113377Skbyanc 37771500Sjhb PROC_UNLOCK(p); 37859290Sjlemon 37959290Sjlemon return (0); 38059290Sjlemon} 38159290Sjlemon 38259290Sjlemon/* 38359290Sjlemon * The knote may be attached to a different process, which may exit, 38459290Sjlemon * leaving nothing for the knote to be attached to. So when the process 38559290Sjlemon * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 38659290Sjlemon * it will be deleted when read out. However, as part of the knote deletion, 38759290Sjlemon * this routine is called, so a check is needed to avoid actually performing 38859290Sjlemon * a detach, because the original process does not exist any more. 38959290Sjlemon */ 390133741Sjmg/* XXX - move to kern_proc.c? */ 39159290Sjlemonstatic void 39259290Sjlemonfilt_procdetach(struct knote *kn) 39359290Sjlemon{ 394133741Sjmg struct proc *p; 39559290Sjlemon 396133741Sjmg p = kn->kn_ptr.p_proc; 397133741Sjmg knlist_remove(&p->p_klist, kn, 0); 398133741Sjmg kn->kn_ptr.p_proc = NULL; 39959290Sjlemon} 40059290Sjlemon 401133741Sjmg/* XXX - move to kern_proc.c? */ 40259290Sjlemonstatic int 40359290Sjlemonfilt_proc(struct knote *kn, long hint) 40459290Sjlemon{ 405133741Sjmg struct proc *p = kn->kn_ptr.p_proc; 40659290Sjlemon u_int event; 40759290Sjlemon 40859290Sjlemon /* 40959290Sjlemon * mask off extra data 41059290Sjlemon */ 41159290Sjlemon event = (u_int)hint & NOTE_PCTRLMASK; 41259290Sjlemon 41359290Sjlemon /* 41459290Sjlemon * if the user is interested in this event, record it. 41559290Sjlemon */ 41659290Sjlemon if (kn->kn_sfflags & event) 41759290Sjlemon kn->kn_fflags |= event; 41859290Sjlemon 41959290Sjlemon /* 42059290Sjlemon * process is gone, so flag the event as finished. 42159290Sjlemon */ 42259290Sjlemon if (event == NOTE_EXIT) { 423133741Sjmg if (!(kn->kn_status & KN_DETACHED)) 424133741Sjmg knlist_remove_inevent(&p->p_klist, kn); 425133590Srwatson kn->kn_flags |= (EV_EOF | EV_ONESHOT); 426164451Sjhb kn->kn_data = p->p_xstat; 427133741Sjmg kn->kn_ptr.p_proc = NULL; 42859290Sjlemon return (1); 42959290Sjlemon } 43059290Sjlemon 431180340Skib return (kn->kn_fflags != 0); 432180340Skib} 43359290Sjlemon 434180340Skib/* 435180340Skib * Called when the process forked. It mostly does the same as the 436180340Skib * knote(), activating all knotes registered to be activated when the 437180340Skib * process forked. Additionally, for each knote attached to the 438180340Skib * parent, check whether user wants to track the new process. If so 439180340Skib * attach a new knote to it, and immediately report an event with the 440180340Skib * child's pid. 441180340Skib */ 442180340Skibvoid 443180340Skibknote_fork(struct knlist *list, int pid) 444180340Skib{ 445180340Skib struct kqueue *kq; 446180340Skib struct knote *kn; 447180340Skib struct kevent kev; 448180340Skib int error; 449180340Skib 450180340Skib if (list == NULL) 451180340Skib return; 452180340Skib list->kl_lock(list->kl_lockarg); 453180340Skib 454180340Skib SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 455180340Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) 456180340Skib continue; 457180340Skib kq = kn->kn_kq; 458180340Skib KQ_LOCK(kq); 459180340Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 460180340Skib KQ_UNLOCK(kq); 461180340Skib continue; 462180340Skib } 463180340Skib 46459290Sjlemon /* 465180340Skib * The same as knote(), activate the event. 46659290Sjlemon */ 467180340Skib if ((kn->kn_sfflags & NOTE_TRACK) == 0) { 468180340Skib kn->kn_status |= KN_HASKQLOCK; 469180340Skib if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) 470180340Skib KNOTE_ACTIVATE(kn, 1); 471180340Skib kn->kn_status &= ~KN_HASKQLOCK; 472180340Skib KQ_UNLOCK(kq); 473180340Skib continue; 474180340Skib } 475180340Skib 476180340Skib /* 477180340Skib * The NOTE_TRACK case. In addition to the activation 478180340Skib * of the event, we need to register new event to 479180340Skib * track the child. Drop the locks in preparation for 480180340Skib * the call to kqueue_register(). 481180340Skib */ 482180340Skib kn->kn_status |= KN_INFLUX; 483180340Skib KQ_UNLOCK(kq); 484180340Skib list->kl_unlock(list->kl_lockarg); 485180340Skib 486180340Skib /* 487180340Skib * Activate existing knote and register a knote with 488180340Skib * new process. 489180340Skib */ 490180340Skib kev.ident = pid; 49159290Sjlemon kev.filter = kn->kn_filter; 49259290Sjlemon kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 49359290Sjlemon kev.fflags = kn->kn_sfflags; 494180340Skib kev.data = kn->kn_id; /* parent */ 495180340Skib kev.udata = kn->kn_kevent.udata;/* preserve udata */ 496180340Skib error = kqueue_register(kq, &kev, NULL, 0); 497180340Skib if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) 498180340Skib KNOTE_ACTIVATE(kn, 0); 49959290Sjlemon if (error) 50059290Sjlemon kn->kn_fflags |= NOTE_TRACKERR; 501180340Skib KQ_LOCK(kq); 502180340Skib kn->kn_status &= ~KN_INFLUX; 503180340Skib KQ_UNLOCK_FLUX(kq); 504180340Skib list->kl_lock(list->kl_lockarg); 50559290Sjlemon } 506180340Skib list->kl_unlock(list->kl_lockarg); 50759290Sjlemon} 50859290Sjlemon 509133741Sjmgstatic int 510133741Sjmgtimertoticks(intptr_t data) 511133741Sjmg{ 512133741Sjmg struct timeval tv; 513133741Sjmg int tticks; 514133741Sjmg 515133741Sjmg tv.tv_sec = data / 1000; 516133741Sjmg tv.tv_usec = (data % 1000) * 1000; 517133741Sjmg tticks = tvtohz(&tv); 518133741Sjmg 519133741Sjmg return tticks; 520133741Sjmg} 521133741Sjmg 522133741Sjmg/* XXX - move to kern_timeout.c? */ 52379989Sjlemonstatic void 52479989Sjlemonfilt_timerexpire(void *knx) 52579989Sjlemon{ 52679989Sjlemon struct knote *kn = knx; 52784138Sjlemon struct callout *calloutp; 52879989Sjlemon 52979989Sjlemon kn->kn_data++; 530133741Sjmg KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ 53179989Sjlemon 532133741Sjmg if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { 53384138Sjlemon calloutp = (struct callout *)kn->kn_hook; 534177860Sjeff callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), 535133741Sjmg filt_timerexpire, kn); 53679989Sjlemon } 53779989Sjlemon} 53879989Sjlemon 53979989Sjlemon/* 54079989Sjlemon * data contains amount of time to sleep, in milliseconds 541133590Srwatson */ 542133741Sjmg/* XXX - move to kern_timeout.c? */ 54379989Sjlemonstatic int 54479989Sjlemonfilt_timerattach(struct knote *kn) 54579989Sjlemon{ 54684138Sjlemon struct callout *calloutp; 54779989Sjlemon 548133741Sjmg atomic_add_int(&kq_ncallouts, 1); 549133741Sjmg 550133741Sjmg if (kq_ncallouts >= kq_calloutmax) { 551133741Sjmg atomic_add_int(&kq_ncallouts, -1); 55284138Sjlemon return (ENOMEM); 553133741Sjmg } 55484138Sjlemon 55579989Sjlemon kn->kn_flags |= EV_CLEAR; /* automatically set */ 556136500Sjmg kn->kn_status &= ~KN_DETACHED; /* knlist_add usually sets it */ 557184214Sdes calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); 558142217Srwatson callout_init(calloutp, CALLOUT_MPSAFE); 559127982Scperciva kn->kn_hook = calloutp; 560177860Sjeff callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), 561177860Sjeff filt_timerexpire, kn); 56279989Sjlemon 56379989Sjlemon return (0); 56479989Sjlemon} 56579989Sjlemon 566133741Sjmg/* XXX - move to kern_timeout.c? */ 56779989Sjlemonstatic void 56879989Sjlemonfilt_timerdetach(struct knote *kn) 56979989Sjlemon{ 57084138Sjlemon struct callout *calloutp; 57179989Sjlemon 57284138Sjlemon calloutp = (struct callout *)kn->kn_hook; 573127982Scperciva callout_drain(calloutp); 574184205Sdes free(calloutp, M_KQUEUE); 575133741Sjmg atomic_add_int(&kq_ncallouts, -1); 576136500Sjmg kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */ 57779989Sjlemon} 57879989Sjlemon 579133741Sjmg/* XXX - move to kern_timeout.c? */ 58079989Sjlemonstatic int 58179989Sjlemonfilt_timer(struct knote *kn, long hint) 58279989Sjlemon{ 58379989Sjlemon 58479989Sjlemon return (kn->kn_data != 0); 58579989Sjlemon} 58679989Sjlemon 587197241Sssonstatic int 588197241Sssonfilt_userattach(struct knote *kn) 589197241Ssson{ 590197241Ssson 591197241Ssson /* 592197241Ssson * EVFILT_USER knotes are not attached to anything in the kernel. 593197241Ssson */ 594197241Ssson kn->kn_hook = NULL; 595197241Ssson if (kn->kn_fflags & NOTE_TRIGGER) 596197241Ssson kn->kn_hookid = 1; 597197241Ssson else 598197241Ssson kn->kn_hookid = 0; 599197241Ssson return (0); 600197241Ssson} 601197241Ssson 602197241Sssonstatic void 603197241Sssonfilt_userdetach(__unused struct knote *kn) 604197241Ssson{ 605197241Ssson 606197241Ssson /* 607197241Ssson * EVFILT_USER knotes are not attached to anything in the kernel. 608197241Ssson */ 609197241Ssson} 610197241Ssson 611197241Sssonstatic int 612197241Sssonfilt_user(struct knote *kn, __unused long hint) 613197241Ssson{ 614197241Ssson 615197241Ssson return (kn->kn_hookid); 616197241Ssson} 617197241Ssson 618197241Sssonstatic void 619197241Sssonfilt_usertouch(struct knote *kn, struct kevent *kev, long type) 620197241Ssson{ 621197241Ssson int ffctrl; 622197241Ssson 623197241Ssson switch (type) { 624197241Ssson case EVENT_REGISTER: 625197241Ssson if (kev->fflags & NOTE_TRIGGER) 626197241Ssson kn->kn_hookid = 1; 627197241Ssson 628197241Ssson ffctrl = kev->fflags & NOTE_FFCTRLMASK; 629197241Ssson kev->fflags &= NOTE_FFLAGSMASK; 630197241Ssson switch (ffctrl) { 631197241Ssson case NOTE_FFNOP: 632197241Ssson break; 633197241Ssson 634197241Ssson case NOTE_FFAND: 635197241Ssson kn->kn_sfflags &= kev->fflags; 636197241Ssson break; 637197241Ssson 638197241Ssson case NOTE_FFOR: 639197241Ssson kn->kn_sfflags |= kev->fflags; 640197241Ssson break; 641197241Ssson 642197241Ssson case NOTE_FFCOPY: 643197241Ssson kn->kn_sfflags = kev->fflags; 644197241Ssson break; 645197241Ssson 646197241Ssson default: 647197241Ssson /* XXX Return error? */ 648197241Ssson break; 649197241Ssson } 650197241Ssson kn->kn_sdata = kev->data; 651197241Ssson if (kev->flags & EV_CLEAR) { 652197241Ssson kn->kn_hookid = 0; 653197241Ssson kn->kn_data = 0; 654197241Ssson kn->kn_fflags = 0; 655197241Ssson } 656197241Ssson break; 657197241Ssson 658197241Ssson case EVENT_PROCESS: 659197241Ssson *kev = kn->kn_kevent; 660197241Ssson kev->fflags = kn->kn_sfflags; 661197241Ssson kev->data = kn->kn_sdata; 662197241Ssson if (kn->kn_flags & EV_CLEAR) { 663197241Ssson kn->kn_hookid = 0; 664197241Ssson kn->kn_data = 0; 665197241Ssson kn->kn_fflags = 0; 666197241Ssson } 667197241Ssson break; 668197241Ssson 669197241Ssson default: 670197241Ssson panic("filt_usertouch() - invalid type (%ld)", type); 671197241Ssson break; 672197241Ssson } 673197241Ssson} 674197241Ssson 67561468Sjlemonint 67683366Sjuliankqueue(struct thread *td, struct kqueue_args *uap) 67759290Sjlemon{ 67882710Sdillon struct filedesc *fdp; 67959290Sjlemon struct kqueue *kq; 68061468Sjlemon struct file *fp; 68161468Sjlemon int fd, error; 68259290Sjlemon 68383366Sjulian fdp = td->td_proc->p_fd; 68483366Sjulian error = falloc(td, &fp, &fd); 68561468Sjlemon if (error) 68682710Sdillon goto done2; 687133741Sjmg 688121256Sdwmalone /* An extra reference on `nfp' has been held for us by falloc(). */ 689133741Sjmg kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); 690133741Sjmg mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); 69189306Salfred TAILQ_INIT(&kq->kq_head); 692133741Sjmg kq->kq_fdp = fdp; 693193951Skib knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); 694133741Sjmg TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); 695133741Sjmg 696168355Srwatson FILEDESC_XLOCK(fdp); 697133741Sjmg SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); 698168355Srwatson FILEDESC_XUNLOCK(fdp); 699133741Sjmg 700174988Sjeff finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); 701121256Sdwmalone fdrop(fp, td); 702133741Sjmg 70383366Sjulian td->td_retval[0] = fd; 70482710Sdillondone2: 70561468Sjlemon return (error); 70659290Sjlemon} 70759290Sjlemon 70859290Sjlemon#ifndef _SYS_SYSPROTO_H_ 70959290Sjlemonstruct kevent_args { 71059290Sjlemon int fd; 71163977Speter const struct kevent *changelist; 71259290Sjlemon int nchanges; 71363452Sjlemon struct kevent *eventlist; 71459290Sjlemon int nevents; 71563977Speter const struct timespec *timeout; 71659290Sjlemon}; 71759290Sjlemon#endif 71859290Sjlemonint 71983366Sjuliankevent(struct thread *td, struct kevent_args *uap) 72059290Sjlemon{ 721142934Sps struct timespec ts, *tsp; 722146950Sps struct kevent_copyops k_ops = { uap, 723146950Sps kevent_copyout, 724146950Sps kevent_copyin}; 725142934Sps int error; 726162592Sjmg#ifdef KTRACE 727162592Sjmg struct uio ktruio; 728162592Sjmg struct iovec ktriov; 729162592Sjmg struct uio *ktruioin = NULL; 730162592Sjmg struct uio *ktruioout = NULL; 731162592Sjmg#endif 732142934Sps 733142934Sps if (uap->timeout != NULL) { 734142934Sps error = copyin(uap->timeout, &ts, sizeof(ts)); 735142934Sps if (error) 736142934Sps return (error); 737142934Sps tsp = &ts; 738142934Sps } else 739142934Sps tsp = NULL; 740142934Sps 741162592Sjmg#ifdef KTRACE 742162592Sjmg if (KTRPOINT(td, KTR_GENIO)) { 743162592Sjmg ktriov.iov_base = uap->changelist; 744162592Sjmg ktriov.iov_len = uap->nchanges * sizeof(struct kevent); 745162592Sjmg ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, 746162592Sjmg .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, 747162592Sjmg .uio_td = td }; 748162592Sjmg ktruioin = cloneuio(&ktruio); 749162592Sjmg ktriov.iov_base = uap->eventlist; 750162592Sjmg ktriov.iov_len = uap->nevents * sizeof(struct kevent); 751162592Sjmg ktruioout = cloneuio(&ktruio); 752162592Sjmg } 753162592Sjmg#endif 754162592Sjmg 755162592Sjmg error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, 756162592Sjmg &k_ops, tsp); 757162592Sjmg 758162592Sjmg#ifdef KTRACE 759162592Sjmg if (ktruioin != NULL) { 760162592Sjmg ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent); 761162592Sjmg ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); 762162592Sjmg ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent); 763162592Sjmg ktrgenio(uap->fd, UIO_READ, ktruioout, error); 764162592Sjmg } 765162592Sjmg#endif 766162592Sjmg 767162592Sjmg return (error); 768142934Sps} 769142934Sps 770142934Sps/* 771146950Sps * Copy 'count' items into the destination list pointed to by uap->eventlist. 772142934Sps */ 773142934Spsstatic int 774146950Spskevent_copyout(void *arg, struct kevent *kevp, int count) 775142934Sps{ 776146950Sps struct kevent_args *uap; 777142934Sps int error; 778142934Sps 779146950Sps KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 780146950Sps uap = (struct kevent_args *)arg; 781146950Sps 782146950Sps error = copyout(kevp, uap->eventlist, count * sizeof *kevp); 783146950Sps if (error == 0) 784146950Sps uap->eventlist += count; 785142934Sps return (error); 786142934Sps} 787142934Sps 788146950Sps/* 789146950Sps * Copy 'count' items from the list pointed to by uap->changelist. 790146950Sps */ 791146950Spsstatic int 792146950Spskevent_copyin(void *arg, struct kevent *kevp, int count) 793146950Sps{ 794146950Sps struct kevent_args *uap; 795146950Sps int error; 796146950Sps 797146950Sps KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 798146950Sps uap = (struct kevent_args *)arg; 799146950Sps 800146950Sps error = copyin(uap->changelist, kevp, count * sizeof *kevp); 801146950Sps if (error == 0) 802146950Sps uap->changelist += count; 803146950Sps return (error); 804146950Sps} 805146950Sps 806142934Spsint 807146950Spskern_kevent(struct thread *td, int fd, int nchanges, int nevents, 808146950Sps struct kevent_copyops *k_ops, const struct timespec *timeout) 809142934Sps{ 810133741Sjmg struct kevent keva[KQ_NEVENTS]; 811142934Sps struct kevent *kevp, *changes; 81259290Sjlemon struct kqueue *kq; 81386341Sdillon struct file *fp; 81459290Sjlemon int i, n, nerrors, error; 81559290Sjlemon 816142934Sps if ((error = fget(td, fd, &fp)) != 0) 81789319Salfred return (error); 818170029Srwatson if ((error = kqueue_acquire(fp, &kq)) != 0) 819133741Sjmg goto done_norel; 820133741Sjmg 82159290Sjlemon nerrors = 0; 82259290Sjlemon 823142934Sps while (nchanges > 0) { 824146950Sps n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; 825146950Sps error = k_ops->k_copyin(k_ops->arg, keva, n); 826146950Sps if (error) 827146950Sps goto done; 828146950Sps changes = keva; 82959290Sjlemon for (i = 0; i < n; i++) { 830142934Sps kevp = &changes[i]; 831151260Sambrisko if (!kevp->filter) 832151260Sambrisko continue; 83363452Sjlemon kevp->flags &= ~EV_SYSFLAGS; 834133741Sjmg error = kqueue_register(kq, kevp, td, 1); 83559290Sjlemon if (error) { 836142934Sps if (nevents != 0) { 83763452Sjlemon kevp->flags = EV_ERROR; 83863452Sjlemon kevp->data = error; 839146950Sps (void) k_ops->k_copyout(k_ops->arg, 840146950Sps kevp, 1); 841142934Sps nevents--; 84259290Sjlemon nerrors++; 84359290Sjlemon } else { 84468883Sdillon goto done; 84559290Sjlemon } 84659290Sjlemon } 84759290Sjlemon } 848142934Sps nchanges -= n; 84959290Sjlemon } 85059290Sjlemon if (nerrors) { 851133741Sjmg td->td_retval[0] = nerrors; 85268883Sdillon error = 0; 85368883Sdillon goto done; 85459290Sjlemon } 85559290Sjlemon 856146950Sps error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td); 85768883Sdillondone: 858133741Sjmg kqueue_release(kq, 0); 859133741Sjmgdone_norel: 860170066Srwatson fdrop(fp, td); 86159290Sjlemon return (error); 86259290Sjlemon} 86359290Sjlemon 86459290Sjlemonint 86588633Salfredkqueue_add_filteropts(int filt, struct filterops *filtops) 86688633Salfred{ 867133741Sjmg int error; 86888633Salfred 869133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { 870133741Sjmg printf( 871133741Sjmg"trying to add a filterop that is out of range: %d is beyond %d\n", 872133741Sjmg ~filt, EVFILT_SYSCOUNT); 873133741Sjmg return EINVAL; 874133741Sjmg } 875133741Sjmg mtx_lock(&filterops_lock); 876133741Sjmg if (sysfilt_ops[~filt].for_fop != &null_filtops && 877133741Sjmg sysfilt_ops[~filt].for_fop != NULL) 878133741Sjmg error = EEXIST; 879133741Sjmg else { 880133741Sjmg sysfilt_ops[~filt].for_fop = filtops; 881133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 882133741Sjmg } 883133741Sjmg mtx_unlock(&filterops_lock); 884133741Sjmg 88588633Salfred return (0); 88688633Salfred} 88788633Salfred 88888633Salfredint 88988633Salfredkqueue_del_filteropts(int filt) 89088633Salfred{ 891133741Sjmg int error; 89288633Salfred 893133741Sjmg error = 0; 894133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 895133741Sjmg return EINVAL; 896133741Sjmg 897133741Sjmg mtx_lock(&filterops_lock); 898133741Sjmg if (sysfilt_ops[~filt].for_fop == &null_filtops || 899133741Sjmg sysfilt_ops[~filt].for_fop == NULL) 900133741Sjmg error = EINVAL; 901133741Sjmg else if (sysfilt_ops[~filt].for_refcnt != 0) 902133741Sjmg error = EBUSY; 903133741Sjmg else { 904133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 905133741Sjmg sysfilt_ops[~filt].for_refcnt = 0; 906133741Sjmg } 907133741Sjmg mtx_unlock(&filterops_lock); 908133741Sjmg 909133741Sjmg return error; 91088633Salfred} 91188633Salfred 912133741Sjmgstatic struct filterops * 913133741Sjmgkqueue_fo_find(int filt) 914133741Sjmg{ 915133741Sjmg 916133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 917133741Sjmg return NULL; 918133741Sjmg 919133741Sjmg mtx_lock(&filterops_lock); 920133741Sjmg sysfilt_ops[~filt].for_refcnt++; 921133741Sjmg if (sysfilt_ops[~filt].for_fop == NULL) 922133741Sjmg sysfilt_ops[~filt].for_fop = &null_filtops; 923133741Sjmg mtx_unlock(&filterops_lock); 924133741Sjmg 925133741Sjmg return sysfilt_ops[~filt].for_fop; 926133741Sjmg} 927133741Sjmg 928133741Sjmgstatic void 929133741Sjmgkqueue_fo_release(int filt) 930133741Sjmg{ 931133741Sjmg 932133741Sjmg if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 933133741Sjmg return; 934133741Sjmg 935133741Sjmg mtx_lock(&filterops_lock); 936133741Sjmg KASSERT(sysfilt_ops[~filt].for_refcnt > 0, 937133741Sjmg ("filter object refcount not valid on release")); 938133741Sjmg sysfilt_ops[~filt].for_refcnt--; 939133741Sjmg mtx_unlock(&filterops_lock); 940133741Sjmg} 941133741Sjmg 942133741Sjmg/* 943170029Srwatson * A ref to kq (obtained via kqueue_acquire) must be held. waitok will 944133741Sjmg * influence if memory allocation should wait. Make sure it is 0 if you 945133741Sjmg * hold any mutexes. 946133741Sjmg */ 947162594Sjmgstatic int 948133741Sjmgkqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok) 94959290Sjlemon{ 95059290Sjlemon struct filterops *fops; 951133741Sjmg struct file *fp; 952133741Sjmg struct knote *kn, *tkn; 953133741Sjmg int error, filt, event; 954133741Sjmg int haskqglobal; 95559290Sjlemon 956133741Sjmg fp = NULL; 957133741Sjmg kn = NULL; 958133741Sjmg error = 0; 959133741Sjmg haskqglobal = 0; 96059290Sjlemon 961133741Sjmg filt = kev->filter; 962133741Sjmg fops = kqueue_fo_find(filt); 963133741Sjmg if (fops == NULL) 964133741Sjmg return EINVAL; 965133741Sjmg 966133741Sjmg tkn = knote_alloc(waitok); /* prevent waiting with locks */ 967133741Sjmg 968133741Sjmgfindkn: 96959290Sjlemon if (fops->f_isfd) { 970133741Sjmg KASSERT(td != NULL, ("td is NULL")); 971159553Sjhb error = fget(td, kev->ident, &fp); 972159553Sjhb if (error) 973133741Sjmg goto done; 97459290Sjlemon 975133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, 976133741Sjmg kev->ident, 0) != 0) { 977159553Sjhb /* try again */ 978133741Sjmg fdrop(fp, td); 979133741Sjmg fp = NULL; 980133741Sjmg error = kqueue_expand(kq, fops, kev->ident, waitok); 981133741Sjmg if (error) 982133741Sjmg goto done; 983133741Sjmg goto findkn; 984133741Sjmg } 985133741Sjmg 986133741Sjmg if (fp->f_type == DTYPE_KQUEUE) { 987133741Sjmg /* 988133741Sjmg * if we add some inteligence about what we are doing, 989133741Sjmg * we should be able to support events on ourselves. 990133741Sjmg * We need to know when we are doing this to prevent 991133741Sjmg * getting both the knlist lock and the kq lock since 992133741Sjmg * they are the same thing. 993133741Sjmg */ 994133741Sjmg if (fp->f_data == kq) { 995133741Sjmg error = EINVAL; 996159172Spjd goto done; 997133741Sjmg } 998133741Sjmg 999133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1000133741Sjmg } 1001133741Sjmg 1002133741Sjmg KQ_LOCK(kq); 1003133741Sjmg if (kev->ident < kq->kq_knlistsize) { 1004133741Sjmg SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) 1005133741Sjmg if (kev->filter == kn->kn_filter) 100659290Sjlemon break; 100759290Sjlemon } 100859290Sjlemon } else { 1009133741Sjmg if ((kev->flags & EV_ADD) == EV_ADD) 1010133741Sjmg kqueue_expand(kq, fops, kev->ident, waitok); 1011133741Sjmg 1012133741Sjmg KQ_LOCK(kq); 1013133741Sjmg if (kq->kq_knhashmask != 0) { 101459290Sjlemon struct klist *list; 1015133635Sjmg 1016133741Sjmg list = &kq->kq_knhash[ 1017133741Sjmg KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 101859290Sjlemon SLIST_FOREACH(kn, list, kn_link) 101959290Sjlemon if (kev->ident == kn->kn_id && 102059290Sjlemon kev->filter == kn->kn_filter) 102159290Sjlemon break; 102259290Sjlemon } 102359290Sjlemon } 102459290Sjlemon 1025133741Sjmg /* knote is in the process of changing, wait for it to stablize. */ 1026133741Sjmg if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1027133741Sjmg if (fp != NULL) { 1028133741Sjmg fdrop(fp, td); 1029133741Sjmg fp = NULL; 1030133741Sjmg } 1031133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1032133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 1033133741Sjmg msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); 1034133741Sjmg goto findkn; 1035133741Sjmg } 1036133741Sjmg 103759290Sjlemon /* 103859290Sjlemon * kn now contains the matching knote, or NULL if no match 103959290Sjlemon */ 1040197240Ssson if (kn == NULL) { 1041197240Ssson if (kev->flags & EV_ADD) { 1042133741Sjmg kn = tkn; 1043133741Sjmg tkn = NULL; 104468883Sdillon if (kn == NULL) { 1045159173Spjd KQ_UNLOCK(kq); 104668883Sdillon error = ENOMEM; 104768883Sdillon goto done; 104868883Sdillon } 104959290Sjlemon kn->kn_fp = fp; 105059290Sjlemon kn->kn_kq = kq; 105159290Sjlemon kn->kn_fop = fops; 105268883Sdillon /* 1053133741Sjmg * apply reference counts to knote structure, and 105468883Sdillon * do not release it at the end of this routine. 105568883Sdillon */ 1056133741Sjmg fops = NULL; 105768883Sdillon fp = NULL; 105868883Sdillon 105961962Sjlemon kn->kn_sfflags = kev->fflags; 106061962Sjlemon kn->kn_sdata = kev->data; 106161962Sjlemon kev->fflags = 0; 106261962Sjlemon kev->data = 0; 106361962Sjlemon kn->kn_kevent = *kev; 1064157383Sjmg kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | 1065157383Sjmg EV_ENABLE | EV_DISABLE); 1066133741Sjmg kn->kn_status = KN_INFLUX|KN_DETACHED; 106761962Sjlemon 1068133741Sjmg error = knote_attach(kn, kq); 1069133741Sjmg KQ_UNLOCK(kq); 1070133741Sjmg if (error != 0) { 1071133741Sjmg tkn = kn; 1072133741Sjmg goto done; 1073133741Sjmg } 1074133741Sjmg 1075133741Sjmg if ((error = kn->kn_fop->f_attach(kn)) != 0) { 107683366Sjulian knote_drop(kn, td); 107759290Sjlemon goto done; 107859290Sjlemon } 1079133741Sjmg KN_LIST_LOCK(kn); 1080197240Ssson goto done_ev_add; 108161962Sjlemon } else { 1082197240Ssson /* No matching knote and the EV_ADD flag is not set. */ 1083133741Sjmg KQ_UNLOCK(kq); 1084197240Ssson error = ENOENT; 1085197240Ssson goto done; 108659290Sjlemon } 1087197240Ssson } 1088197240Ssson 1089197240Ssson if (kev->flags & EV_DELETE) { 1090133741Sjmg kn->kn_status |= KN_INFLUX; 1091133741Sjmg KQ_UNLOCK(kq); 1092134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1093134859Sjmg kn->kn_fop->f_detach(kn); 109483366Sjulian knote_drop(kn, td); 109559290Sjlemon goto done; 109659290Sjlemon } 109759290Sjlemon 1098197240Ssson /* 1099197240Ssson * The user may change some filter values after the initial EV_ADD, 1100197240Ssson * but doing so will not reset any filter which has already been 1101197240Ssson * triggered. 1102197240Ssson */ 1103197240Ssson kn->kn_status |= KN_INFLUX; 1104197240Ssson KQ_UNLOCK(kq); 1105197240Ssson KN_LIST_LOCK(kn); 1106197240Ssson kn->kn_kevent.udata = kev->udata; 1107197240Ssson if (!fops->f_isfd && fops->f_touch != NULL) { 1108197240Ssson fops->f_touch(kn, kev, EVENT_REGISTER); 1109197240Ssson } else { 1110197240Ssson kn->kn_sfflags = kev->fflags; 1111197240Ssson kn->kn_sdata = kev->data; 1112197240Ssson } 1113197240Ssson 1114197240Ssson /* 1115197240Ssson * We can get here with kn->kn_knlist == NULL. This can happen when 1116197240Ssson * the initial attach event decides that the event is "completed" 1117197240Ssson * already. i.e. filt_procattach is called on a zombie process. It 1118197240Ssson * will call filt_proc which will remove it from the list, and NULL 1119197240Ssson * kn_knlist. 1120197240Ssson */ 1121197240Sssondone_ev_add: 1122197240Ssson event = kn->kn_fop->f_event(kn, 0); 1123197240Ssson KQ_LOCK(kq); 1124197240Ssson if (event) 1125197240Ssson KNOTE_ACTIVATE(kn, 1); 1126197240Ssson kn->kn_status &= ~KN_INFLUX; 1127197240Ssson KN_LIST_UNLOCK(kn); 1128197240Ssson 112959290Sjlemon if ((kev->flags & EV_DISABLE) && 113059290Sjlemon ((kn->kn_status & KN_DISABLED) == 0)) { 113159290Sjlemon kn->kn_status |= KN_DISABLED; 113259290Sjlemon } 113359290Sjlemon 113459290Sjlemon if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 113559290Sjlemon kn->kn_status &= ~KN_DISABLED; 113659290Sjlemon if ((kn->kn_status & KN_ACTIVE) && 113759290Sjlemon ((kn->kn_status & KN_QUEUED) == 0)) 113859290Sjlemon knote_enqueue(kn); 113959290Sjlemon } 1140133741Sjmg KQ_UNLOCK_FLUX(kq); 114159290Sjlemon 114259290Sjlemondone: 1143133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 114468883Sdillon if (fp != NULL) 114583366Sjulian fdrop(fp, td); 1146133741Sjmg if (tkn != NULL) 1147133741Sjmg knote_free(tkn); 1148133741Sjmg if (fops != NULL) 1149133741Sjmg kqueue_fo_release(filt); 115059290Sjlemon return (error); 115159290Sjlemon} 115259290Sjlemon 115359290Sjlemonstatic int 1154170029Srwatsonkqueue_acquire(struct file *fp, struct kqueue **kqp) 115559290Sjlemon{ 1156133741Sjmg int error; 115789306Salfred struct kqueue *kq; 1158133741Sjmg 1159133741Sjmg error = 0; 1160133741Sjmg 1161174988Sjeff kq = fp->f_data; 1162174988Sjeff if (fp->f_type != DTYPE_KQUEUE || kq == NULL) 1163174988Sjeff return (EBADF); 1164174988Sjeff *kqp = kq; 1165174988Sjeff KQ_LOCK(kq); 1166174988Sjeff if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { 1167133741Sjmg KQ_UNLOCK(kq); 1168174988Sjeff return (EBADF); 1169174988Sjeff } 1170174988Sjeff kq->kq_refcnt++; 1171174988Sjeff KQ_UNLOCK(kq); 1172133741Sjmg 1173133741Sjmg return error; 1174133741Sjmg} 1175133741Sjmg 1176133741Sjmgstatic void 1177133741Sjmgkqueue_release(struct kqueue *kq, int locked) 1178133741Sjmg{ 1179133741Sjmg if (locked) 1180133741Sjmg KQ_OWNED(kq); 1181133741Sjmg else 1182133741Sjmg KQ_LOCK(kq); 1183133741Sjmg kq->kq_refcnt--; 1184133741Sjmg if (kq->kq_refcnt == 1) 1185133741Sjmg wakeup(&kq->kq_refcnt); 1186133741Sjmg if (!locked) 1187133741Sjmg KQ_UNLOCK(kq); 1188133741Sjmg} 1189133741Sjmg 1190133741Sjmgstatic void 1191133741Sjmgkqueue_schedtask(struct kqueue *kq) 1192133741Sjmg{ 1193133741Sjmg 1194133741Sjmg KQ_OWNED(kq); 1195133741Sjmg KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), 1196133741Sjmg ("scheduling kqueue task while draining")); 1197133741Sjmg 1198133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { 1199133741Sjmg taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task); 1200133741Sjmg kq->kq_state |= KQ_TASKSCHED; 1201133741Sjmg } 1202133741Sjmg} 1203133741Sjmg 1204133741Sjmg/* 1205133741Sjmg * Expand the kq to make sure we have storage for fops/ident pair. 1206133741Sjmg * 1207133741Sjmg * Return 0 on success (or no work necessary), return errno on failure. 1208133741Sjmg * 1209133741Sjmg * Not calling hashinit w/ waitok (proper malloc flag) should be safe. 1210133741Sjmg * If kqueue_register is called from a non-fd context, there usually/should 1211133741Sjmg * be no locks held. 1212133741Sjmg */ 1213133741Sjmgstatic int 1214133741Sjmgkqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, 1215133741Sjmg int waitok) 1216133741Sjmg{ 1217133741Sjmg struct klist *list, *tmp_knhash; 1218133741Sjmg u_long tmp_knhashmask; 1219133741Sjmg int size; 1220133741Sjmg int fd; 1221133741Sjmg int mflag = waitok ? M_WAITOK : M_NOWAIT; 1222133741Sjmg 1223133741Sjmg KQ_NOTOWNED(kq); 1224133741Sjmg 1225133741Sjmg if (fops->f_isfd) { 1226133741Sjmg fd = ident; 1227133741Sjmg if (kq->kq_knlistsize <= fd) { 1228133741Sjmg size = kq->kq_knlistsize; 1229133741Sjmg while (size <= fd) 1230133741Sjmg size += KQEXTENT; 1231184214Sdes list = malloc(size * sizeof list, M_KQUEUE, mflag); 1232133741Sjmg if (list == NULL) 1233133741Sjmg return ENOMEM; 1234133741Sjmg KQ_LOCK(kq); 1235133741Sjmg if (kq->kq_knlistsize > fd) { 1236184205Sdes free(list, M_KQUEUE); 1237133741Sjmg list = NULL; 1238133741Sjmg } else { 1239133741Sjmg if (kq->kq_knlist != NULL) { 1240133741Sjmg bcopy(kq->kq_knlist, list, 1241133741Sjmg kq->kq_knlistsize * sizeof list); 1242184205Sdes free(kq->kq_knlist, M_KQUEUE); 1243133741Sjmg kq->kq_knlist = NULL; 1244133741Sjmg } 1245133741Sjmg bzero((caddr_t)list + 1246133741Sjmg kq->kq_knlistsize * sizeof list, 1247133741Sjmg (size - kq->kq_knlistsize) * sizeof list); 1248133741Sjmg kq->kq_knlistsize = size; 1249133741Sjmg kq->kq_knlist = list; 1250133741Sjmg } 1251133741Sjmg KQ_UNLOCK(kq); 1252133741Sjmg } 1253133741Sjmg } else { 1254133741Sjmg if (kq->kq_knhashmask == 0) { 1255133741Sjmg tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1256133741Sjmg &tmp_knhashmask); 1257133741Sjmg if (tmp_knhash == NULL) 1258133741Sjmg return ENOMEM; 1259133741Sjmg KQ_LOCK(kq); 1260133741Sjmg if (kq->kq_knhashmask == 0) { 1261133741Sjmg kq->kq_knhash = tmp_knhash; 1262133741Sjmg kq->kq_knhashmask = tmp_knhashmask; 1263133741Sjmg } else { 1264133741Sjmg free(tmp_knhash, M_KQUEUE); 1265133741Sjmg } 1266133741Sjmg KQ_UNLOCK(kq); 1267133741Sjmg } 1268133741Sjmg } 1269133741Sjmg 1270133741Sjmg KQ_NOTOWNED(kq); 1271133741Sjmg return 0; 1272133741Sjmg} 1273133741Sjmg 1274133741Sjmgstatic void 1275133741Sjmgkqueue_task(void *arg, int pending) 1276133741Sjmg{ 1277133741Sjmg struct kqueue *kq; 1278133741Sjmg int haskqglobal; 1279133741Sjmg 1280133741Sjmg haskqglobal = 0; 1281133741Sjmg kq = arg; 1282133741Sjmg 1283133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1284133741Sjmg KQ_LOCK(kq); 1285133741Sjmg 1286133741Sjmg KNOTE_LOCKED(&kq->kq_sel.si_note, 0); 1287133741Sjmg 1288133741Sjmg kq->kq_state &= ~KQ_TASKSCHED; 1289133741Sjmg if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { 1290133741Sjmg wakeup(&kq->kq_state); 1291133741Sjmg } 1292133741Sjmg KQ_UNLOCK(kq); 1293133741Sjmg KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1294133741Sjmg} 1295133741Sjmg 1296133741Sjmg/* 1297133741Sjmg * Scan, update kn_data (if not ONESHOT), and copyout triggered events. 1298133741Sjmg * We treat KN_MARKER knotes as if they are INFLUX. 1299133741Sjmg */ 1300133741Sjmgstatic int 1301146950Spskqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, 1302146950Sps const struct timespec *tsp, struct kevent *keva, struct thread *td) 1303133741Sjmg{ 130459290Sjlemon struct kevent *kevp; 130559290Sjlemon struct timeval atv, rtv, ttv; 1306133794Sgreen struct knote *kn, *marker; 1307178914Skib int count, timeout, nkev, error, influx; 1308197240Ssson int haskqglobal, touch; 130959290Sjlemon 131059290Sjlemon count = maxevents; 1311133741Sjmg nkev = 0; 1312133741Sjmg error = 0; 1313133741Sjmg haskqglobal = 0; 131459290Sjlemon 1315133741Sjmg if (maxevents == 0) 1316133741Sjmg goto done_nl; 1317133741Sjmg 131864343Sjlemon if (tsp != NULL) { 131959290Sjlemon TIMESPEC_TO_TIMEVAL(&atv, tsp); 132064343Sjlemon if (itimerfix(&atv)) { 132159290Sjlemon error = EINVAL; 1322133741Sjmg goto done_nl; 132359290Sjlemon } 132464343Sjlemon if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 132564343Sjlemon timeout = -1; 1326133590Srwatson else 132764343Sjlemon timeout = atv.tv_sec > 24 * 60 * 60 ? 132864343Sjlemon 24 * 60 * 60 * hz : tvtohz(&atv); 132964343Sjlemon getmicrouptime(&rtv); 133064343Sjlemon timevaladd(&atv, &rtv); 133164343Sjlemon } else { 133264343Sjlemon atv.tv_sec = 0; 133364343Sjlemon atv.tv_usec = 0; 133459290Sjlemon timeout = 0; 133559290Sjlemon } 1336133794Sgreen marker = knote_alloc(1); 1337133794Sgreen if (marker == NULL) { 1338133794Sgreen error = ENOMEM; 1339133794Sgreen goto done_nl; 1340133794Sgreen } 1341133794Sgreen marker->kn_status = KN_MARKER; 1342133741Sjmg KQ_LOCK(kq); 134359290Sjlemon goto start; 134459290Sjlemon 134559290Sjlemonretry: 134664343Sjlemon if (atv.tv_sec || atv.tv_usec) { 134759290Sjlemon getmicrouptime(&rtv); 134859290Sjlemon if (timevalcmp(&rtv, &atv, >=)) 134959290Sjlemon goto done; 135059290Sjlemon ttv = atv; 135159290Sjlemon timevalsub(&ttv, &rtv); 135259290Sjlemon timeout = ttv.tv_sec > 24 * 60 * 60 ? 135359290Sjlemon 24 * 60 * 60 * hz : tvtohz(&ttv); 135459290Sjlemon } 135559290Sjlemon 135659290Sjlemonstart: 1357133741Sjmg kevp = keva; 135859290Sjlemon if (kq->kq_count == 0) { 1359133590Srwatson if (timeout < 0) { 136064343Sjlemon error = EWOULDBLOCK; 136164343Sjlemon } else { 136264343Sjlemon kq->kq_state |= KQ_SLEEP; 1363133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH, 1364133741Sjmg "kqread", timeout); 136564343Sjlemon } 136664084Sjlemon if (error == 0) 136759290Sjlemon goto retry; 136864084Sjlemon /* don't restart after signals... */ 136964084Sjlemon if (error == ERESTART) 137064084Sjlemon error = EINTR; 137164084Sjlemon else if (error == EWOULDBLOCK) 137259290Sjlemon error = 0; 137359290Sjlemon goto done; 137459290Sjlemon } 137559290Sjlemon 1376133794Sgreen TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1377178914Skib influx = 0; 137859290Sjlemon while (count) { 1379133741Sjmg KQ_OWNED(kq); 138059290Sjlemon kn = TAILQ_FIRST(&kq->kq_head); 1381133741Sjmg 1382133794Sgreen if ((kn->kn_status == KN_MARKER && kn != marker) || 1383133741Sjmg (kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1384178914Skib if (influx) { 1385178914Skib influx = 0; 1386178914Skib KQ_FLUX_WAKEUP(kq); 1387178914Skib } 1388180336Skib kq->kq_state |= KQ_FLUXWAIT; 1389133741Sjmg error = msleep(kq, &kq->kq_lock, PSOCK, 1390133741Sjmg "kqflxwt", 0); 1391133741Sjmg continue; 1392133741Sjmg } 1393133741Sjmg 1394133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1395133741Sjmg if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { 1396133741Sjmg kn->kn_status &= ~KN_QUEUED; 1397133741Sjmg kq->kq_count--; 1398133741Sjmg continue; 1399133741Sjmg } 1400133794Sgreen if (kn == marker) { 1401133741Sjmg KQ_FLUX_WAKEUP(kq); 140259290Sjlemon if (count == maxevents) 140359290Sjlemon goto retry; 140459290Sjlemon goto done; 140559290Sjlemon } 1406133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == 0, 1407133741Sjmg ("KN_INFLUX set when not suppose to be")); 1408133741Sjmg 1409133741Sjmg if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { 141059290Sjlemon kn->kn_status &= ~KN_QUEUED; 1411133741Sjmg kn->kn_status |= KN_INFLUX; 141259290Sjlemon kq->kq_count--; 1413133741Sjmg KQ_UNLOCK(kq); 1414133741Sjmg /* 1415133741Sjmg * We don't need to lock the list since we've marked 1416133741Sjmg * it _INFLUX. 1417133741Sjmg */ 1418133741Sjmg *kevp = kn->kn_kevent; 1419134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1420134859Sjmg kn->kn_fop->f_detach(kn); 142183366Sjulian knote_drop(kn, td); 1422133741Sjmg KQ_LOCK(kq); 1423133741Sjmg kn = NULL; 142459290Sjlemon } else { 1425133741Sjmg kn->kn_status |= KN_INFLUX; 1426133741Sjmg KQ_UNLOCK(kq); 1427133741Sjmg if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) 1428133741Sjmg KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1429133741Sjmg KN_LIST_LOCK(kn); 1430133741Sjmg if (kn->kn_fop->f_event(kn, 0) == 0) { 1431133741Sjmg KQ_LOCK(kq); 1432157754Sjhb KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1433133741Sjmg kn->kn_status &= 1434133741Sjmg ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX); 1435133741Sjmg kq->kq_count--; 1436150199Sups KN_LIST_UNLOCK(kn); 1437178914Skib influx = 1; 1438133741Sjmg continue; 1439133741Sjmg } 1440197240Ssson touch = (!kn->kn_fop->f_isfd && 1441197240Ssson kn->kn_fop->f_touch != NULL); 1442197240Ssson if (touch) 1443197240Ssson kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); 1444197240Ssson else 1445197240Ssson *kevp = kn->kn_kevent; 1446133741Sjmg KQ_LOCK(kq); 1447157754Sjhb KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1448197242Ssson if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 1449197240Ssson /* 1450197240Ssson * Manually clear knotes who weren't 1451197240Ssson * 'touch'ed. 1452197240Ssson */ 1453197242Ssson if (touch == 0 && kn->kn_flags & EV_CLEAR) { 1454197240Ssson kn->kn_data = 0; 1455197240Ssson kn->kn_fflags = 0; 1456197240Ssson } 1457197242Ssson if (kn->kn_flags & EV_DISPATCH) 1458197242Ssson kn->kn_status |= KN_DISABLED; 1459133741Sjmg kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1460133741Sjmg kq->kq_count--; 1461133741Sjmg } else 1462133741Sjmg TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1463150199Sups 1464150199Sups kn->kn_status &= ~(KN_INFLUX); 1465133741Sjmg KN_LIST_UNLOCK(kn); 1466178914Skib influx = 1; 146759290Sjlemon } 1468133741Sjmg 1469133741Sjmg /* we are returning a copy to the user */ 1470133741Sjmg kevp++; 1471133741Sjmg nkev++; 147259290Sjlemon count--; 1473133741Sjmg 147459290Sjlemon if (nkev == KQ_NEVENTS) { 1475178914Skib influx = 0; 1476133741Sjmg KQ_UNLOCK_FLUX(kq); 1477146950Sps error = k_ops->k_copyout(k_ops->arg, keva, nkev); 147859290Sjlemon nkev = 0; 1479133741Sjmg kevp = keva; 1480133741Sjmg KQ_LOCK(kq); 148159997Sjlemon if (error) 148259997Sjlemon break; 148359290Sjlemon } 148459290Sjlemon } 1485133794Sgreen TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 148659290Sjlemondone: 1487133741Sjmg KQ_OWNED(kq); 1488133741Sjmg KQ_UNLOCK_FLUX(kq); 1489133794Sgreen knote_free(marker); 1490133741Sjmgdone_nl: 1491133741Sjmg KQ_NOTOWNED(kq); 149259290Sjlemon if (nkev != 0) 1493146950Sps error = k_ops->k_copyout(k_ops->arg, keva, nkev); 1494133741Sjmg td->td_retval[0] = maxevents - count; 149559290Sjlemon return (error); 149659290Sjlemon} 149759290Sjlemon 149859290Sjlemon/* 149959290Sjlemon * XXX 150059290Sjlemon * This could be expanded to call kqueue_scan, if desired. 150159290Sjlemon */ 150259290Sjlemon/*ARGSUSED*/ 150359290Sjlemonstatic int 1504101941Srwatsonkqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 150583366Sjulian int flags, struct thread *td) 150659290Sjlemon{ 150759290Sjlemon return (ENXIO); 150859290Sjlemon} 150959290Sjlemon 151059290Sjlemon/*ARGSUSED*/ 151159290Sjlemonstatic int 1512101941Srwatsonkqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 151383366Sjulian int flags, struct thread *td) 151459290Sjlemon{ 151559290Sjlemon return (ENXIO); 151659290Sjlemon} 151759290Sjlemon 151859290Sjlemon/*ARGSUSED*/ 151959290Sjlemonstatic int 1520175140Sjhbkqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, 1521175140Sjhb struct thread *td) 1522175140Sjhb{ 1523175140Sjhb 1524175140Sjhb return (EINVAL); 1525175140Sjhb} 1526175140Sjhb 1527175140Sjhb/*ARGSUSED*/ 1528175140Sjhbstatic int 1529132138Salfredkqueue_ioctl(struct file *fp, u_long cmd, void *data, 1530102003Srwatson struct ucred *active_cred, struct thread *td) 153159290Sjlemon{ 1532132174Salfred /* 1533132174Salfred * Enabling sigio causes two major problems: 1534132174Salfred * 1) infinite recursion: 1535132174Salfred * Synopsys: kevent is being used to track signals and have FIOASYNC 1536132174Salfred * set. On receipt of a signal this will cause a kqueue to recurse 1537132174Salfred * into itself over and over. Sending the sigio causes the kqueue 1538132174Salfred * to become ready, which in turn posts sigio again, forever. 1539132174Salfred * Solution: this can be solved by setting a flag in the kqueue that 1540132174Salfred * we have a SIGIO in progress. 1541132174Salfred * 2) locking problems: 1542132174Salfred * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts 1543132174Salfred * us above the proc and pgrp locks. 1544132174Salfred * Solution: Post a signal using an async mechanism, being sure to 1545132174Salfred * record a generation count in the delivery so that we do not deliver 1546132174Salfred * a signal to the wrong process. 1547132174Salfred * 1548132174Salfred * Note, these two mechanisms are somewhat mutually exclusive! 1549132174Salfred */ 1550132174Salfred#if 0 1551132138Salfred struct kqueue *kq; 1552132138Salfred 1553132138Salfred kq = fp->f_data; 1554132138Salfred switch (cmd) { 1555132138Salfred case FIOASYNC: 1556132138Salfred if (*(int *)data) { 1557132138Salfred kq->kq_state |= KQ_ASYNC; 1558132138Salfred } else { 1559132138Salfred kq->kq_state &= ~KQ_ASYNC; 1560132138Salfred } 1561132138Salfred return (0); 1562132138Salfred 1563132138Salfred case FIOSETOWN: 1564132138Salfred return (fsetown(*(int *)data, &kq->kq_sigio)); 1565132138Salfred 1566132138Salfred case FIOGETOWN: 1567132138Salfred *(int *)data = fgetown(&kq->kq_sigio); 1568132138Salfred return (0); 1569132138Salfred } 1570132174Salfred#endif 1571132138Salfred 157259290Sjlemon return (ENOTTY); 157359290Sjlemon} 157459290Sjlemon 157559290Sjlemon/*ARGSUSED*/ 157659290Sjlemonstatic int 1577101983Srwatsonkqueue_poll(struct file *fp, int events, struct ucred *active_cred, 1578101987Srwatson struct thread *td) 157959290Sjlemon{ 158089306Salfred struct kqueue *kq; 158159290Sjlemon int revents = 0; 1582133741Sjmg int error; 158359290Sjlemon 1584170029Srwatson if ((error = kqueue_acquire(fp, &kq))) 1585133741Sjmg return POLLERR; 1586133741Sjmg 1587133741Sjmg KQ_LOCK(kq); 1588133741Sjmg if (events & (POLLIN | POLLRDNORM)) { 1589133741Sjmg if (kq->kq_count) { 1590133741Sjmg revents |= events & (POLLIN | POLLRDNORM); 159159290Sjlemon } else { 1592133741Sjmg selrecord(td, &kq->kq_sel); 1593174647Sjeff if (SEL_WAITING(&kq->kq_sel)) 1594174647Sjeff kq->kq_state |= KQ_SEL; 159559290Sjlemon } 159659290Sjlemon } 1597133741Sjmg kqueue_release(kq, 1); 1598133741Sjmg KQ_UNLOCK(kq); 159959290Sjlemon return (revents); 160059290Sjlemon} 160159290Sjlemon 160259290Sjlemon/*ARGSUSED*/ 160359290Sjlemonstatic int 1604101983Srwatsonkqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 1605101987Srwatson struct thread *td) 160659290Sjlemon{ 160759290Sjlemon 1608146603Sjmg bzero((void *)st, sizeof *st); 1609146603Sjmg /* 1610146603Sjmg * We no longer return kq_count because the unlocked value is useless. 1611146603Sjmg * If you spent all this time getting the count, why not spend your 1612146603Sjmg * syscall better by calling kevent? 1613146603Sjmg * 1614146603Sjmg * XXX - This is needed for libc_r. 1615146603Sjmg */ 1616146603Sjmg st->st_mode = S_IFIFO; 1617146603Sjmg return (0); 161859290Sjlemon} 161959290Sjlemon 162059290Sjlemon/*ARGSUSED*/ 162159290Sjlemonstatic int 162283366Sjuliankqueue_close(struct file *fp, struct thread *td) 162359290Sjlemon{ 1624109153Sdillon struct kqueue *kq = fp->f_data; 1625133741Sjmg struct filedesc *fdp; 1626133741Sjmg struct knote *kn; 162759290Sjlemon int i; 1628133741Sjmg int error; 162959290Sjlemon 1630170029Srwatson if ((error = kqueue_acquire(fp, &kq))) 1631133741Sjmg return error; 1632133741Sjmg 1633133741Sjmg KQ_LOCK(kq); 1634133741Sjmg 1635133741Sjmg KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, 1636133741Sjmg ("kqueue already closing")); 1637133741Sjmg kq->kq_state |= KQ_CLOSING; 1638133741Sjmg if (kq->kq_refcnt > 1) 1639133741Sjmg msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); 1640133741Sjmg 1641133741Sjmg KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); 1642133741Sjmg fdp = kq->kq_fdp; 1643133741Sjmg 1644133741Sjmg KASSERT(knlist_empty(&kq->kq_sel.si_note), 1645133741Sjmg ("kqueue's knlist not empty")); 1646133741Sjmg 1647133741Sjmg for (i = 0; i < kq->kq_knlistsize; i++) { 1648133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { 1649178913Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1650178913Skib kq->kq_state |= KQ_FLUXWAIT; 1651178913Skib msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); 1652178913Skib continue; 1653178913Skib } 1654133741Sjmg kn->kn_status |= KN_INFLUX; 1655133741Sjmg KQ_UNLOCK(kq); 1656134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1657134859Sjmg kn->kn_fop->f_detach(kn); 1658133741Sjmg knote_drop(kn, td); 1659133741Sjmg KQ_LOCK(kq); 166059290Sjlemon } 166159290Sjlemon } 1662133741Sjmg if (kq->kq_knhashmask != 0) { 1663133741Sjmg for (i = 0; i <= kq->kq_knhashmask; i++) { 1664133741Sjmg while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { 1665178913Skib if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1666178913Skib kq->kq_state |= KQ_FLUXWAIT; 1667178913Skib msleep(kq, &kq->kq_lock, PSOCK, 1668178913Skib "kqclo2", 0); 1669178913Skib continue; 1670178913Skib } 1671133741Sjmg kn->kn_status |= KN_INFLUX; 1672133741Sjmg KQ_UNLOCK(kq); 1673134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 1674134859Sjmg kn->kn_fop->f_detach(kn); 1675133741Sjmg knote_drop(kn, td); 1676133741Sjmg KQ_LOCK(kq); 167759290Sjlemon } 167859290Sjlemon } 167959290Sjlemon } 1680133741Sjmg 1681133741Sjmg if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { 1682133741Sjmg kq->kq_state |= KQ_TASKDRAIN; 1683133741Sjmg msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); 1684133741Sjmg } 1685133741Sjmg 1686133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 1687126033Sgreen selwakeuppri(&kq->kq_sel, PSOCK); 1688174647Sjeff if (!SEL_WAITING(&kq->kq_sel)) 1689174647Sjeff kq->kq_state &= ~KQ_SEL; 1690126033Sgreen } 1691133741Sjmg 1692133741Sjmg KQ_UNLOCK(kq); 1693133741Sjmg 1694168355Srwatson FILEDESC_XLOCK(fdp); 1695133741Sjmg SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list); 1696168355Srwatson FILEDESC_XUNLOCK(fdp); 1697133741Sjmg 1698133741Sjmg knlist_destroy(&kq->kq_sel.si_note); 1699133741Sjmg mtx_destroy(&kq->kq_lock); 1700133741Sjmg kq->kq_fdp = NULL; 1701133741Sjmg 1702133741Sjmg if (kq->kq_knhash != NULL) 1703133741Sjmg free(kq->kq_knhash, M_KQUEUE); 1704133741Sjmg if (kq->kq_knlist != NULL) 1705133741Sjmg free(kq->kq_knlist, M_KQUEUE); 1706133741Sjmg 1707132138Salfred funsetown(&kq->kq_sigio); 170884138Sjlemon free(kq, M_KQUEUE); 1709109153Sdillon fp->f_data = NULL; 171059290Sjlemon 171159290Sjlemon return (0); 171259290Sjlemon} 171359290Sjlemon 171459290Sjlemonstatic void 171559290Sjlemonkqueue_wakeup(struct kqueue *kq) 171659290Sjlemon{ 1717133741Sjmg KQ_OWNED(kq); 171859290Sjlemon 1719133741Sjmg if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { 172059290Sjlemon kq->kq_state &= ~KQ_SLEEP; 172159290Sjlemon wakeup(kq); 172259290Sjlemon } 1723133741Sjmg if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 1724122352Stanimura selwakeuppri(&kq->kq_sel, PSOCK); 1725174647Sjeff if (!SEL_WAITING(&kq->kq_sel)) 1726174647Sjeff kq->kq_state &= ~KQ_SEL; 172759290Sjlemon } 1728133741Sjmg if (!knlist_empty(&kq->kq_sel.si_note)) 1729133741Sjmg kqueue_schedtask(kq); 1730133741Sjmg if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { 1731132138Salfred pgsigio(&kq->kq_sigio, SIGIO, 0); 1732132138Salfred } 173359290Sjlemon} 173459290Sjlemon 173559290Sjlemon/* 1736133741Sjmg * Walk down a list of knotes, activating them if their event has triggered. 1737133741Sjmg * 1738133741Sjmg * There is a possibility to optimize in the case of one kq watching another. 1739133741Sjmg * Instead of scheduling a task to wake it up, you could pass enough state 1740133741Sjmg * down the chain to make up the parent kqueue. Make this code functional 1741133741Sjmg * first. 174259290Sjlemon */ 174359290Sjlemonvoid 1744195148Sstasknote(struct knlist *list, long hint, int lockflags) 174559290Sjlemon{ 1746133741Sjmg struct kqueue *kq; 174759290Sjlemon struct knote *kn; 1748195148Sstas int error; 174959290Sjlemon 1750133741Sjmg if (list == NULL) 1751133741Sjmg return; 1752133741Sjmg 1753195148Sstas KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); 1754147730Sssouhlal 1755195148Sstas if ((lockflags & KNF_LISTLOCKED) == 0) 1756147730Sssouhlal list->kl_lock(list->kl_lockarg); 1757147730Sssouhlal 1758133741Sjmg /* 1759133741Sjmg * If we unlock the list lock (and set KN_INFLUX), we can eliminate 1760133741Sjmg * the kqueue scheduling, but this will introduce four 1761133741Sjmg * lock/unlock's for each knote to test. If we do, continue to use 1762133741Sjmg * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is 1763133741Sjmg * only safe if you want to remove the current item, which we are 1764133741Sjmg * not doing. 1765133741Sjmg */ 1766133741Sjmg SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 1767133741Sjmg kq = kn->kn_kq; 1768133741Sjmg if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { 1769133741Sjmg KQ_LOCK(kq); 1770195148Sstas if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { 1771195148Sstas KQ_UNLOCK(kq); 1772195148Sstas } else if ((lockflags & KNF_NOKQLOCK) != 0) { 1773195148Sstas kn->kn_status |= KN_INFLUX; 1774195148Sstas KQ_UNLOCK(kq); 1775195148Sstas error = kn->kn_fop->f_event(kn, hint); 1776195148Sstas KQ_LOCK(kq); 1777195148Sstas kn->kn_status &= ~KN_INFLUX; 1778195148Sstas if (error) 1779195148Sstas KNOTE_ACTIVATE(kn, 1); 1780195148Sstas KQ_UNLOCK_FLUX(kq); 1781195148Sstas } else { 1782133741Sjmg kn->kn_status |= KN_HASKQLOCK; 1783133741Sjmg if (kn->kn_fop->f_event(kn, hint)) 1784133741Sjmg KNOTE_ACTIVATE(kn, 1); 1785133741Sjmg kn->kn_status &= ~KN_HASKQLOCK; 1786195148Sstas KQ_UNLOCK(kq); 1787133741Sjmg } 1788133741Sjmg } 1789133741Sjmg kq = NULL; 1790133741Sjmg } 1791195148Sstas if ((lockflags & KNF_LISTLOCKED) == 0) 1792147730Sssouhlal list->kl_unlock(list->kl_lockarg); 179359290Sjlemon} 179459290Sjlemon 179559290Sjlemon/* 1796133741Sjmg * add a knote to a knlist 1797133741Sjmg */ 1798133741Sjmgvoid 1799133741Sjmgknlist_add(struct knlist *knl, struct knote *kn, int islocked) 1800133741Sjmg{ 1801147730Sssouhlal KNL_ASSERT_LOCK(knl, islocked); 1802133741Sjmg KQ_NOTOWNED(kn->kn_kq); 1803133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == 1804133741Sjmg (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); 1805133741Sjmg if (!islocked) 1806147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 1807133741Sjmg SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); 1808133741Sjmg if (!islocked) 1809147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 1810133741Sjmg KQ_LOCK(kn->kn_kq); 1811133741Sjmg kn->kn_knlist = knl; 1812133741Sjmg kn->kn_status &= ~KN_DETACHED; 1813133741Sjmg KQ_UNLOCK(kn->kn_kq); 1814133741Sjmg} 1815133741Sjmg 1816133741Sjmgstatic void 1817133741Sjmgknlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) 1818133741Sjmg{ 1819133741Sjmg KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); 1820147730Sssouhlal KNL_ASSERT_LOCK(knl, knlislocked); 1821133741Sjmg mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); 1822133741Sjmg if (!kqislocked) 1823133741Sjmg KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, 1824133741Sjmg ("knlist_remove called w/o knote being KN_INFLUX or already removed")); 1825133741Sjmg if (!knlislocked) 1826147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 1827133741Sjmg SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); 1828133741Sjmg kn->kn_knlist = NULL; 1829133741Sjmg if (!knlislocked) 1830147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 1831133741Sjmg if (!kqislocked) 1832133741Sjmg KQ_LOCK(kn->kn_kq); 1833133741Sjmg kn->kn_status |= KN_DETACHED; 1834133741Sjmg if (!kqislocked) 1835133741Sjmg KQ_UNLOCK(kn->kn_kq); 1836133741Sjmg} 1837133741Sjmg 1838133741Sjmg/* 183959290Sjlemon * remove all knotes from a specified klist 184059290Sjlemon */ 184159290Sjlemonvoid 1842133741Sjmgknlist_remove(struct knlist *knl, struct knote *kn, int islocked) 184359290Sjlemon{ 1844133741Sjmg 1845133741Sjmg knlist_remove_kq(knl, kn, islocked, 0); 1846133741Sjmg} 1847133741Sjmg 1848133741Sjmg/* 1849133741Sjmg * remove knote from a specified klist while in f_event handler. 1850133741Sjmg */ 1851133741Sjmgvoid 1852133741Sjmgknlist_remove_inevent(struct knlist *knl, struct knote *kn) 1853133741Sjmg{ 1854133741Sjmg 1855133741Sjmg knlist_remove_kq(knl, kn, 1, 1856133741Sjmg (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); 1857133741Sjmg} 1858133741Sjmg 1859133741Sjmgint 1860133741Sjmgknlist_empty(struct knlist *knl) 1861133741Sjmg{ 1862147730Sssouhlal KNL_ASSERT_LOCKED(knl); 1863133741Sjmg return SLIST_EMPTY(&knl->kl_list); 1864133741Sjmg} 1865133741Sjmg 1866133741Sjmgstatic struct mtx knlist_lock; 1867133741SjmgMTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", 1868133741Sjmg MTX_DEF); 1869147730Sssouhlalstatic void knlist_mtx_lock(void *arg); 1870147730Sssouhlalstatic void knlist_mtx_unlock(void *arg); 1871133741Sjmg 1872147730Sssouhlalstatic void 1873147730Sssouhlalknlist_mtx_lock(void *arg) 1874147730Sssouhlal{ 1875147730Sssouhlal mtx_lock((struct mtx *)arg); 1876147730Sssouhlal} 1877147730Sssouhlal 1878147730Sssouhlalstatic void 1879147730Sssouhlalknlist_mtx_unlock(void *arg) 1880147730Sssouhlal{ 1881147730Sssouhlal mtx_unlock((struct mtx *)arg); 1882147730Sssouhlal} 1883147730Sssouhlal 1884193951Skibstatic void 1885193951Skibknlist_mtx_assert_locked(void *arg) 1886147730Sssouhlal{ 1887193951Skib mtx_assert((struct mtx *)arg, MA_OWNED); 1888147730Sssouhlal} 1889147730Sssouhlal 1890193951Skibstatic void 1891193951Skibknlist_mtx_assert_unlocked(void *arg) 1892193951Skib{ 1893193951Skib mtx_assert((struct mtx *)arg, MA_NOTOWNED); 1894193951Skib} 1895193951Skib 1896133741Sjmgvoid 1897147730Sssouhlalknlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), 1898193951Skib void (*kl_unlock)(void *), 1899193951Skib void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)) 1900133741Sjmg{ 1901133741Sjmg 1902147730Sssouhlal if (lock == NULL) 1903147730Sssouhlal knl->kl_lockarg = &knlist_lock; 1904133741Sjmg else 1905147730Sssouhlal knl->kl_lockarg = lock; 1906133741Sjmg 1907147730Sssouhlal if (kl_lock == NULL) 1908147730Sssouhlal knl->kl_lock = knlist_mtx_lock; 1909147730Sssouhlal else 1910147730Sssouhlal knl->kl_lock = kl_lock; 1911157582Sjmg if (kl_unlock == NULL) 1912147730Sssouhlal knl->kl_unlock = knlist_mtx_unlock; 1913147730Sssouhlal else 1914147730Sssouhlal knl->kl_unlock = kl_unlock; 1915193951Skib if (kl_assert_locked == NULL) 1916193951Skib knl->kl_assert_locked = knlist_mtx_assert_locked; 1917147730Sssouhlal else 1918193951Skib knl->kl_assert_locked = kl_assert_locked; 1919193951Skib if (kl_assert_unlocked == NULL) 1920193951Skib knl->kl_assert_unlocked = knlist_mtx_assert_unlocked; 1921193951Skib else 1922193951Skib knl->kl_assert_unlocked = kl_assert_unlocked; 1923147730Sssouhlal 1924133741Sjmg SLIST_INIT(&knl->kl_list); 1925133741Sjmg} 1926133741Sjmg 1927133741Sjmgvoid 1928193951Skibknlist_init_mtx(struct knlist *knl, struct mtx *lock) 1929193951Skib{ 1930193951Skib 1931193951Skib knlist_init(knl, lock, NULL, NULL, NULL, NULL); 1932193951Skib} 1933193951Skib 1934193951Skibvoid 1935133741Sjmgknlist_destroy(struct knlist *knl) 1936133741Sjmg{ 1937133741Sjmg 1938133741Sjmg#ifdef INVARIANTS 1939133741Sjmg /* 1940133741Sjmg * if we run across this error, we need to find the offending 1941133741Sjmg * driver and have it call knlist_clear. 1942133741Sjmg */ 1943133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) 1944133741Sjmg printf("WARNING: destroying knlist w/ knotes on it!\n"); 1945133741Sjmg#endif 1946133741Sjmg 1947147730Sssouhlal knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; 1948133741Sjmg SLIST_INIT(&knl->kl_list); 1949133741Sjmg} 1950133741Sjmg 1951133741Sjmg/* 1952133741Sjmg * Even if we are locked, we may need to drop the lock to allow any influx 1953133741Sjmg * knotes time to "settle". 1954133741Sjmg */ 1955133741Sjmgvoid 1956143776Sjmgknlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) 1957133741Sjmg{ 1958159171Spjd struct knote *kn, *kn2; 1959133741Sjmg struct kqueue *kq; 196059290Sjlemon 1961133741Sjmg if (islocked) 1962147730Sssouhlal KNL_ASSERT_LOCKED(knl); 1963133741Sjmg else { 1964147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); 1965170029Srwatsonagain: /* need to reacquire lock since we have dropped it */ 1966147730Sssouhlal knl->kl_lock(knl->kl_lockarg); 196759290Sjlemon } 1968133741Sjmg 1969159171Spjd SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { 1970133741Sjmg kq = kn->kn_kq; 1971133741Sjmg KQ_LOCK(kq); 1972143776Sjmg if ((kn->kn_status & KN_INFLUX)) { 1973133741Sjmg KQ_UNLOCK(kq); 1974133741Sjmg continue; 1975133741Sjmg } 1976133741Sjmg knlist_remove_kq(knl, kn, 1, 1); 1977143776Sjmg if (killkn) { 1978143776Sjmg kn->kn_status |= KN_INFLUX | KN_DETACHED; 1979143776Sjmg KQ_UNLOCK(kq); 1980143776Sjmg knote_drop(kn, td); 1981143776Sjmg } else { 1982143776Sjmg /* Make sure cleared knotes disappear soon */ 1983143776Sjmg kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1984143776Sjmg KQ_UNLOCK(kq); 1985143776Sjmg } 1986133741Sjmg kq = NULL; 1987133741Sjmg } 1988133741Sjmg 1989133741Sjmg if (!SLIST_EMPTY(&knl->kl_list)) { 1990133741Sjmg /* there are still KN_INFLUX remaining */ 1991133741Sjmg kn = SLIST_FIRST(&knl->kl_list); 1992133741Sjmg kq = kn->kn_kq; 1993133741Sjmg KQ_LOCK(kq); 1994133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, 1995133741Sjmg ("knote removed w/o list lock")); 1996147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 1997133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 1998133741Sjmg msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); 1999133741Sjmg kq = NULL; 2000133741Sjmg goto again; 2001133741Sjmg } 2002133741Sjmg 2003133741Sjmg if (islocked) 2004147730Sssouhlal KNL_ASSERT_LOCKED(knl); 2005133741Sjmg else { 2006147730Sssouhlal knl->kl_unlock(knl->kl_lockarg); 2007147730Sssouhlal KNL_ASSERT_UNLOCKED(knl); 2008133741Sjmg } 200959290Sjlemon} 201059290Sjlemon 201159290Sjlemon/* 2012168355Srwatson * Remove all knotes referencing a specified fd must be called with FILEDESC 2013168355Srwatson * lock. This prevents a race where a new fd comes along and occupies the 2014168355Srwatson * entry and we attach a knote to the fd. 201559290Sjlemon */ 201659290Sjlemonvoid 201783366Sjulianknote_fdclose(struct thread *td, int fd) 201859290Sjlemon{ 201983366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 2020133741Sjmg struct kqueue *kq; 2021133741Sjmg struct knote *kn; 2022133741Sjmg int influx; 202359290Sjlemon 2024168355Srwatson FILEDESC_XLOCK_ASSERT(fdp); 2025133741Sjmg 2026133741Sjmg /* 2027133741Sjmg * We shouldn't have to worry about new kevents appearing on fd 2028133741Sjmg * since filedesc is locked. 2029133741Sjmg */ 2030133741Sjmg SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) { 2031133741Sjmg KQ_LOCK(kq); 2032133741Sjmg 2033133741Sjmgagain: 2034133741Sjmg influx = 0; 2035133741Sjmg while (kq->kq_knlistsize > fd && 2036133741Sjmg (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { 2037133741Sjmg if (kn->kn_status & KN_INFLUX) { 2038133741Sjmg /* someone else might be waiting on our knote */ 2039133741Sjmg if (influx) 2040133741Sjmg wakeup(kq); 2041133741Sjmg kq->kq_state |= KQ_FLUXWAIT; 2042133741Sjmg msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); 2043133741Sjmg goto again; 2044133741Sjmg } 2045133741Sjmg kn->kn_status |= KN_INFLUX; 2046133741Sjmg KQ_UNLOCK(kq); 2047134859Sjmg if (!(kn->kn_status & KN_DETACHED)) 2048134859Sjmg kn->kn_fop->f_detach(kn); 2049133741Sjmg knote_drop(kn, td); 2050133741Sjmg influx = 1; 2051133741Sjmg KQ_LOCK(kq); 2052133741Sjmg } 2053133741Sjmg KQ_UNLOCK_FLUX(kq); 2054133741Sjmg } 205559290Sjlemon} 205659290Sjlemon 2057133741Sjmgstatic int 2058133741Sjmgknote_attach(struct knote *kn, struct kqueue *kq) 205959290Sjlemon{ 2060133741Sjmg struct klist *list; 206159290Sjlemon 2062133741Sjmg KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); 2063133741Sjmg KQ_OWNED(kq); 206489306Salfred 2065133741Sjmg if (kn->kn_fop->f_isfd) { 2066133741Sjmg if (kn->kn_id >= kq->kq_knlistsize) 2067133741Sjmg return ENOMEM; 2068133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 2069133741Sjmg } else { 2070133741Sjmg if (kq->kq_knhash == NULL) 2071133741Sjmg return ENOMEM; 2072133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 207359290Sjlemon } 207459290Sjlemon 207559290Sjlemon SLIST_INSERT_HEAD(list, kn, kn_link); 2076133741Sjmg 2077133741Sjmg return 0; 207859290Sjlemon} 207959290Sjlemon 208059290Sjlemon/* 2081151260Sambrisko * knote must already have been detached using the f_detach method. 2082133741Sjmg * no lock need to be held, it is assumed that the KN_INFLUX flag is set 2083133741Sjmg * to prevent other removal. 208459290Sjlemon */ 208559290Sjlemonstatic void 208683366Sjulianknote_drop(struct knote *kn, struct thread *td) 208759290Sjlemon{ 2088133741Sjmg struct kqueue *kq; 208959290Sjlemon struct klist *list; 209059290Sjlemon 2091133741Sjmg kq = kn->kn_kq; 2092133741Sjmg 2093133741Sjmg KQ_NOTOWNED(kq); 2094133741Sjmg KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, 2095133741Sjmg ("knote_drop called without KN_INFLUX set in kn_status")); 2096133741Sjmg 2097133741Sjmg KQ_LOCK(kq); 209859290Sjlemon if (kn->kn_fop->f_isfd) 2099133741Sjmg list = &kq->kq_knlist[kn->kn_id]; 210059290Sjlemon else 2101133741Sjmg list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 210259290Sjlemon 2103151260Sambrisko if (!SLIST_EMPTY(list)) 2104151260Sambrisko SLIST_REMOVE(list, kn, knote, kn_link); 210559290Sjlemon if (kn->kn_status & KN_QUEUED) 210659290Sjlemon knote_dequeue(kn); 2107133741Sjmg KQ_UNLOCK_FLUX(kq); 2108133741Sjmg 2109133741Sjmg if (kn->kn_fop->f_isfd) { 2110133741Sjmg fdrop(kn->kn_fp, td); 2111133741Sjmg kn->kn_fp = NULL; 2112133741Sjmg } 2113133741Sjmg kqueue_fo_release(kn->kn_kevent.filter); 2114133741Sjmg kn->kn_fop = NULL; 211559290Sjlemon knote_free(kn); 211659290Sjlemon} 211759290Sjlemon 211859290Sjlemonstatic void 211959290Sjlemonknote_enqueue(struct knote *kn) 212059290Sjlemon{ 212159290Sjlemon struct kqueue *kq = kn->kn_kq; 212259290Sjlemon 2123133741Sjmg KQ_OWNED(kn->kn_kq); 212459997Sjlemon KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 212559997Sjlemon 2126133590Srwatson TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 212759290Sjlemon kn->kn_status |= KN_QUEUED; 212859290Sjlemon kq->kq_count++; 212959290Sjlemon kqueue_wakeup(kq); 213059290Sjlemon} 213159290Sjlemon 213259290Sjlemonstatic void 213359290Sjlemonknote_dequeue(struct knote *kn) 213459290Sjlemon{ 213559290Sjlemon struct kqueue *kq = kn->kn_kq; 213659290Sjlemon 2137133741Sjmg KQ_OWNED(kn->kn_kq); 213859997Sjlemon KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 213959997Sjlemon 2140133590Srwatson TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 214159290Sjlemon kn->kn_status &= ~KN_QUEUED; 214259290Sjlemon kq->kq_count--; 214359290Sjlemon} 214459290Sjlemon 214559290Sjlemonstatic void 214659290Sjlemonknote_init(void) 214759290Sjlemon{ 2148133741Sjmg 214992751Sjeff knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 215092751Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 215159290Sjlemon} 2152177253SrwatsonSYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); 215359290Sjlemon 215459290Sjlemonstatic struct knote * 2155133741Sjmgknote_alloc(int waitok) 215659290Sjlemon{ 2157133741Sjmg return ((struct knote *)uma_zalloc(knote_zone, 2158133741Sjmg (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO)); 215959290Sjlemon} 216059290Sjlemon 216159290Sjlemonstatic void 216259290Sjlemonknote_free(struct knote *kn) 216359290Sjlemon{ 2164133741Sjmg if (kn != NULL) 2165133741Sjmg uma_zfree(knote_zone, kn); 216659290Sjlemon} 2167162594Sjmg 2168162594Sjmg/* 2169162594Sjmg * Register the kev w/ the kq specified by fd. 2170162594Sjmg */ 2171162594Sjmgint 2172162594Sjmgkqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) 2173162594Sjmg{ 2174162594Sjmg struct kqueue *kq; 2175162594Sjmg struct file *fp; 2176162594Sjmg int error; 2177162594Sjmg 2178162594Sjmg if ((error = fget(td, fd, &fp)) != 0) 2179162594Sjmg return (error); 2180170029Srwatson if ((error = kqueue_acquire(fp, &kq)) != 0) 2181170029Srwatson goto noacquire; 2182162594Sjmg 2183162594Sjmg error = kqueue_register(kq, kev, td, waitok); 2184162594Sjmg 2185162594Sjmg kqueue_release(kq, 0); 2186162594Sjmg 2187170029Srwatsonnoacquire: 2188162608Sjmg fdrop(fp, td); 2189162594Sjmg 2190162594Sjmg return error; 2191162594Sjmg} 2192