155714Skris/*- 255714Skris * Copyright (c) 2011 NetApp, Inc. 355714Skris * All rights reserved. 455714Skris * 555714Skris * Redistribution and use in source and binary forms, with or without 655714Skris * modification, are permitted provided that the following conditions 755714Skris * are met: 8280297Sjkim * 1. Redistributions of source code must retain the above copyright 955714Skris * notice, this list of conditions and the following disclaimer. 1055714Skris * 2. Redistributions in binary form must reproduce the above copyright 1155714Skris * notice, this list of conditions and the following disclaimer in the 1255714Skris * documentation and/or other materials provided with the distribution. 1355714Skris * 1455714Skris * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15280297Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1655714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1755714Skris * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 1855714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1955714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2055714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2155714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22280297Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2355714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2455714Skris * SUCH DAMAGE. 2555714Skris * 2655714Skris * $FreeBSD: releng/11.0/usr.sbin/bhyve/mevent.c 265365 2014-05-05 16:30:03Z neel $ 2755714Skris */ 2855714Skris 2955714Skris/* 3055714Skris * Micro event library for FreeBSD, designed for a single i/o thread 3155714Skris * using kqueue, and having events be persistent by default. 3255714Skris */ 3355714Skris 3455714Skris#include <sys/cdefs.h> 3555714Skris__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/mevent.c 265365 2014-05-05 16:30:03Z neel $"); 3655714Skris 37280297Sjkim#include <assert.h> 3855714Skris#include <errno.h> 3955714Skris#include <stdlib.h> 40280297Sjkim#include <stdio.h> 4155714Skris#include <string.h> 4255714Skris#include <unistd.h> 4355714Skris 4455714Skris#include <sys/types.h> 4555714Skris#include <sys/event.h> 4655714Skris#include <sys/time.h> 4755714Skris 4855714Skris#include <pthread.h> 4955714Skris#include <pthread_np.h> 5055714Skris 5155714Skris#include "mevent.h" 52280297Sjkim 5355714Skris#define MEVENT_MAX 64 5455714Skris 5555714Skris#define MEV_ADD 1 5655714Skris#define MEV_ENABLE 2 5755714Skris#define MEV_DISABLE 3 5855714Skris#define MEV_DEL_PENDING 4 5955714Skris 60298998Sjkimextern char *vmname; 6155714Skris 6255714Skrisstatic pthread_t mevent_tid; 6355714Skrisstatic int mevent_timid = 43; 64291719Sjkimstatic int mevent_pipefd[2]; 6555714Skrisstatic pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; 66280297Sjkim 6755714Skrisstruct mevent { 68280297Sjkim void (*me_func)(int, enum ev_type, void *); 69280297Sjkim#define me_msecs me_fd 70280297Sjkim int me_fd; 71280297Sjkim int me_timid; 72280297Sjkim enum ev_type me_type; 7355714Skris void *me_param; 74280297Sjkim int me_cq; 7555714Skris int me_state; 7655714Skris int me_closefd; 77280297Sjkim LIST_ENTRY(mevent) me_list; 78280297Sjkim}; 7955714Skris 8055714Skrisstatic LIST_HEAD(listhead, mevent) global_head, change_head; 8155714Skris 8255714Skrisstatic void 8355714Skrismevent_qlock(void) 8455714Skris{ 8555714Skris pthread_mutex_lock(&mevent_lmutex); 8655714Skris} 8755714Skris 8855714Skrisstatic void 89280297Sjkimmevent_qunlock(void) 9055714Skris{ 9155714Skris pthread_mutex_unlock(&mevent_lmutex); 92280297Sjkim} 93280297Sjkim 9455714Skrisstatic void 9555714Skrismevent_pipe_read(int fd, enum ev_type type, void *param) 9655714Skris{ 9755714Skris char buf[MEVENT_MAX]; 9855714Skris int status; 9955714Skris 100280297Sjkim /* 101280297Sjkim * Drain the pipe read side. The fd is non-blocking so this is 102280297Sjkim * safe to do. 103280297Sjkim */ 104280297Sjkim do { 105280297Sjkim status = read(fd, buf, sizeof(buf)); 106291719Sjkim } while (status == MEVENT_MAX); 10755714Skris} 108280297Sjkim 109280297Sjkimstatic void 110280297Sjkimmevent_notify(void) 111280297Sjkim{ 112280297Sjkim char c; 113280297Sjkim 114280297Sjkim /* 115280297Sjkim * If calling from outside the i/o thread, write a byte on the 116280297Sjkim * pipe to force the i/o thread to exit the blocking kevent call. 117280297Sjkim */ 118280297Sjkim if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { 119280297Sjkim write(mevent_pipefd[1], &c, 1); 120280297Sjkim } 121280297Sjkim} 122280297Sjkim 123280297Sjkimstatic int 124280297Sjkimmevent_kq_filter(struct mevent *mevp) 125280297Sjkim{ 12655714Skris int retval; 127291719Sjkim 128291719Sjkim retval = 0; 129291719Sjkim 130291719Sjkim if (mevp->me_type == EVF_READ) 131291719Sjkim retval = EVFILT_READ; 132291719Sjkim 133291719Sjkim if (mevp->me_type == EVF_WRITE) 134291719Sjkim retval = EVFILT_WRITE; 135291719Sjkim 136291719Sjkim if (mevp->me_type == EVF_TIMER) 137291719Sjkim retval = EVFILT_TIMER; 138291719Sjkim 139291719Sjkim if (mevp->me_type == EVF_SIGNAL) 140291719Sjkim retval = EVFILT_SIGNAL; 141291719Sjkim 142291719Sjkim return (retval); 143291719Sjkim} 14455714Skris 145280297Sjkimstatic int 146280297Sjkimmevent_kq_flags(struct mevent *mevp) 147280297Sjkim{ 148280297Sjkim int ret; 149280297Sjkim 15055714Skris switch (mevp->me_state) { 15155714Skris case MEV_ADD: 152280297Sjkim ret = EV_ADD; /* implicitly enabled */ 153280297Sjkim break; 154280297Sjkim case MEV_ENABLE: 155298998Sjkim ret = EV_ENABLE; 15655714Skris break; 157280297Sjkim case MEV_DISABLE: 158284283Sjkim ret = EV_DISABLE; 159280297Sjkim break; 160280297Sjkim case MEV_DEL_PENDING: 161298998Sjkim ret = EV_DELETE; 162280297Sjkim break; 163280297Sjkim default: 164280297Sjkim assert(0); 165280297Sjkim break; 166280297Sjkim } 167280297Sjkim 168280297Sjkim return (ret); 169280297Sjkim} 170280297Sjkim 171280297Sjkimstatic int 172280297Sjkimmevent_kq_fflags(struct mevent *mevp) 173280297Sjkim{ 174280297Sjkim /* XXX nothing yet, perhaps EV_EOF for reads ? */ 175280297Sjkim return (0); 176280297Sjkim} 177280297Sjkim 178298998Sjkimstatic int 179280297Sjkimmevent_build(int mfd, struct kevent *kev) 180280297Sjkim{ 181280297Sjkim struct mevent *mevp, *tmpp; 182280297Sjkim int i; 183280297Sjkim 184280297Sjkim i = 0; 185280297Sjkim 186280297Sjkim mevent_qlock(); 187298998Sjkim 188298998Sjkim LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { 189298998Sjkim if (mevp->me_closefd) { 190298998Sjkim /* 191298998Sjkim * A close of the file descriptor will remove the 192280297Sjkim * event 193280297Sjkim */ 194280297Sjkim close(mevp->me_fd); 195280297Sjkim } else { 196280297Sjkim if (mevp->me_type == EVF_TIMER) { 19755714Skris kev[i].ident = mevp->me_timid; 19855714Skris kev[i].data = mevp->me_msecs; 199280297Sjkim } else { 200280297Sjkim kev[i].ident = mevp->me_fd; 20155714Skris kev[i].data = 0; 202280297Sjkim } 203280297Sjkim kev[i].filter = mevent_kq_filter(mevp); 204280297Sjkim kev[i].flags = mevent_kq_flags(mevp); 205280297Sjkim kev[i].fflags = mevent_kq_fflags(mevp); 206280297Sjkim kev[i].udata = mevp; 207280297Sjkim i++; 208280297Sjkim } 209280297Sjkim 21055714Skris mevp->me_cq = 0; 21159191Skris LIST_REMOVE(mevp, me_list); 212280297Sjkim 213280297Sjkim if (mevp->me_state == MEV_DEL_PENDING) { 214280297Sjkim free(mevp); 21555714Skris } else { 216280297Sjkim LIST_INSERT_HEAD(&global_head, mevp, me_list); 217280297Sjkim } 218280297Sjkim 219280297Sjkim assert(i < MEVENT_MAX); 220280297Sjkim } 221280297Sjkim 222280297Sjkim mevent_qunlock(); 223280297Sjkim 224280297Sjkim return (i); 225280297Sjkim} 226280297Sjkim 227280297Sjkimstatic void 22855714Skrismevent_handle(struct kevent *kev, int numev) 229280297Sjkim{ 230280297Sjkim struct mevent *mevp; 231280297Sjkim int i; 232280297Sjkim 233280297Sjkim for (i = 0; i < numev; i++) { 234280297Sjkim mevp = kev[i].udata; 235280297Sjkim 236280297Sjkim /* XXX check for EV_ERROR ? */ 23755714Skris 238280297Sjkim (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); 239280297Sjkim } 240280297Sjkim} 24155714Skris 24255714Skrisstruct mevent * 243280297Sjkimmevent_add(int tfd, enum ev_type type, 244291719Sjkim void (*func)(int, enum ev_type, void *), void *param) 245280297Sjkim{ 246291719Sjkim struct mevent *lp, *mevp; 247280297Sjkim 248280297Sjkim if (tfd < 0 || func == NULL) { 249280297Sjkim return (NULL); 25055714Skris } 251280297Sjkim 252280297Sjkim mevp = NULL; 25355714Skris 25455714Skris mevent_qlock(); 255291719Sjkim 256291719Sjkim /* 257291719Sjkim * Verify that the fd/type tuple is not present in any list 258291719Sjkim */ 259291719Sjkim LIST_FOREACH(lp, &global_head, me_list) { 260291719Sjkim if (type != EVF_TIMER && lp->me_fd == tfd && 261291719Sjkim lp->me_type == type) { 262291719Sjkim goto exit; 263291719Sjkim } 264291719Sjkim } 265291719Sjkim 266291719Sjkim LIST_FOREACH(lp, &change_head, me_list) { 267291719Sjkim if (type != EVF_TIMER && lp->me_fd == tfd && 268291719Sjkim lp->me_type == type) { 269291719Sjkim goto exit; 27055714Skris } 27155714Skris } 272280297Sjkim 273280297Sjkim /* 274291719Sjkim * Allocate an entry, populate it, and add it to the change list. 275280297Sjkim */ 27655714Skris mevp = calloc(1, sizeof(struct mevent)); 277280297Sjkim if (mevp == NULL) { 278280297Sjkim goto exit; 27955714Skris } 280291719Sjkim 281291719Sjkim if (type == EVF_TIMER) { 282291719Sjkim mevp->me_msecs = tfd; 283291719Sjkim mevp->me_timid = mevent_timid++; 284291719Sjkim } else 285291719Sjkim mevp->me_fd = tfd; 286291719Sjkim mevp->me_type = type; 287291719Sjkim mevp->me_func = func; 288280297Sjkim mevp->me_param = param; 289280297Sjkim 290280297Sjkim LIST_INSERT_HEAD(&change_head, mevp, me_list); 29155714Skris mevp->me_cq = 1; 292280297Sjkim mevp->me_state = MEV_ADD; 293280297Sjkim mevent_notify(); 294280297Sjkim 295291719Sjkimexit: 296280297Sjkim mevent_qunlock(); 297280297Sjkim 298280297Sjkim return (mevp); 29955714Skris} 300280297Sjkim 301280297Sjkimstatic int 302291719Sjkimmevent_update(struct mevent *evp, int newstate) 303291719Sjkim{ 304291719Sjkim /* 305291719Sjkim * It's not possible to enable/disable a deleted event 306280297Sjkim */ 307100928Snectar if (evp->me_state == MEV_DEL_PENDING) 308291719Sjkim return (EINVAL); 309291719Sjkim 310291719Sjkim /* 311280297Sjkim * No update needed if state isn't changing 31255714Skris */ 313291719Sjkim if (evp->me_state == newstate) 314291719Sjkim return (0); 315291719Sjkim 316280297Sjkim mevent_qlock(); 31755714Skris 318291719Sjkim evp->me_state = newstate; 319291719Sjkim 320291719Sjkim /* 321291719Sjkim * Place the entry onto the changed list if not already there. 322291719Sjkim */ 323291719Sjkim if (evp->me_cq == 0) { 324291719Sjkim evp->me_cq = 1; 325291719Sjkim LIST_REMOVE(evp, me_list); 326291719Sjkim LIST_INSERT_HEAD(&change_head, evp, me_list); 327291719Sjkim mevent_notify(); 328291719Sjkim } 329291719Sjkim 330291719Sjkim mevent_qunlock(); 331280297Sjkim 33255714Skris return (0); 333291719Sjkim} 334291719Sjkim 335291719Sjkimint 336291719Sjkimmevent_enable(struct mevent *evp) 337291719Sjkim{ 338291719Sjkim 339280297Sjkim return (mevent_update(evp, MEV_ENABLE)); 340291719Sjkim} 341291719Sjkim 342291719Sjkimint 343291719Sjkimmevent_disable(struct mevent *evp) 34455714Skris{ 345291719Sjkim 346291719Sjkim return (mevent_update(evp, MEV_DISABLE)); 347291719Sjkim} 348291719Sjkim 349291719Sjkimstatic int 350291719Sjkimmevent_delete_event(struct mevent *evp, int closefd) 351291719Sjkim{ 352291719Sjkim mevent_qlock(); 353291719Sjkim 354291719Sjkim /* 355291719Sjkim * Place the entry onto the changed list if not already there, and 356291719Sjkim * mark as to be deleted. 357280297Sjkim */ 358280297Sjkim if (evp->me_cq == 0) { 359291719Sjkim evp->me_cq = 1; 360291719Sjkim LIST_REMOVE(evp, me_list); 361291719Sjkim LIST_INSERT_HEAD(&change_head, evp, me_list); 362291719Sjkim mevent_notify(); 363291719Sjkim } 364280297Sjkim evp->me_state = MEV_DEL_PENDING; 365280297Sjkim 366291719Sjkim if (closefd) 367291719Sjkim evp->me_closefd = 1; 368291719Sjkim 369291719Sjkim mevent_qunlock(); 370280297Sjkim 371280297Sjkim return (0); 372280297Sjkim} 373280297Sjkim 374280297Sjkimint 37559191Skrismevent_delete(struct mevent *evp) 376280297Sjkim{ 377280297Sjkim 378280297Sjkim return (mevent_delete_event(evp, 0)); 37955714Skris} 380280297Sjkim 381280297Sjkimint 382280297Sjkimmevent_delete_close(struct mevent *evp) 383280297Sjkim{ 384280297Sjkim 38555714Skris return (mevent_delete_event(evp, 1)); 386280297Sjkim} 387280297Sjkim 388280297Sjkimstatic void 389280297Sjkimmevent_set_name(void) 390280297Sjkim{ 391280297Sjkim 39255714Skris pthread_set_name_np(mevent_tid, "mevent"); 393280297Sjkim} 394280297Sjkim 39555714Skrisvoid 396280297Sjkimmevent_dispatch(void) 397280297Sjkim{ 398280297Sjkim struct kevent changelist[MEVENT_MAX]; 399280297Sjkim struct kevent eventlist[MEVENT_MAX]; 400280297Sjkim struct mevent *pipev; 401280297Sjkim int mfd; 402280297Sjkim int numev; 403280297Sjkim int ret; 404280297Sjkim 405280297Sjkim mevent_tid = pthread_self(); 406280297Sjkim mevent_set_name(); 407280297Sjkim 408280297Sjkim mfd = kqueue(); 409280297Sjkim assert(mfd > 0); 410280297Sjkim 411280297Sjkim /* 412280297Sjkim * Open the pipe that will be used for other threads to force 41355714Skris * the blocking kqueue call to exit by writing to it. Set the 41455714Skris * descriptor to non-blocking. 415280297Sjkim */ 416280297Sjkim ret = pipe(mevent_pipefd); 41755714Skris if (ret < 0) { 418280297Sjkim perror("pipe"); 419280297Sjkim exit(0); 420280297Sjkim } 421280297Sjkim 422280297Sjkim /* 423280297Sjkim * Add internal event handler for the pipe write fd 424280297Sjkim */ 425280297Sjkim pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); 426280297Sjkim assert(pipev != NULL); 427280297Sjkim 428280297Sjkim for (;;) { 42955714Skris /* 43055714Skris * Build changelist if required. 43155714Skris * XXX the changelist can be put into the blocking call 432280297Sjkim * to eliminate the extra syscall. Currently better for 433280297Sjkim * debug. 43455714Skris */ 435280297Sjkim numev = mevent_build(mfd, changelist); 436280297Sjkim if (numev) { 437280297Sjkim ret = kevent(mfd, changelist, numev, NULL, 0, NULL); 438280297Sjkim if (ret == -1) { 439280297Sjkim perror("Error return from kevent change"); 440280297Sjkim } 441280297Sjkim } 442280297Sjkim 44355714Skris /* 444280297Sjkim * Block awaiting events 445280297Sjkim */ 446280297Sjkim ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); 447280297Sjkim if (ret == -1 && errno != EINTR) { 448280297Sjkim perror("Error return from kevent monitor"); 449280297Sjkim } 450280297Sjkim 451280297Sjkim /* 452280297Sjkim * Handle reported events 453280297Sjkim */ 454280297Sjkim mevent_handle(eventlist, ret); 455280297Sjkim } 456280297Sjkim} 457280297Sjkim