xenevt.c revision 1.3
1/* $NetBSD: xenevt.c,v 1.3 2005/04/11 12:10:31 yamt Exp $ */ 2 3/* 4 * Copyright (c) 2005 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Manuel Bouyer. 17 * 4. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#include <sys/param.h> 34#include <sys/kernel.h> 35#include <sys/malloc.h> 36#include <sys/systm.h> 37#include <sys/device.h> 38#include <sys/file.h> 39#include <sys/filedesc.h> 40#include <sys/poll.h> 41#include <sys/select.h> 42#include <sys/proc.h> 43#include <sys/conf.h> 44 45#include <machine/hypervisor.h> 46#include <machine/xenio.h> 47#include <machine/xen.h> 48 49/* 50 * Interface between the event channel and userland. 51 * Each process with a xenevt device instance open can regiter events it 52 * wants to receive. It will get pending events by read(), eventually blocking 53 * until some event is available. Pending events are ack'd by a bitmask 54 * write()en to the device. Some special operations (such as events binding) 55 * are done though ioctl(). 56 * Processes get a device instance by opening a cloning device. 57 */ 58 59void xenevtattach(int); 60static int xenevt_read(struct file *, off_t *, struct uio *, 61 struct ucred *, int); 62static int xenevt_write(struct file *, off_t *, struct uio *, 63 struct ucred *, int); 64static int xenevt_ioctl(struct file *, u_long, void *, struct proc *); 65static int xenevt_poll(struct file *, int, struct proc *); 66static int xenevt_close(struct file *, struct proc *); 67/* static int xenevt_kqfilter(struct file *, struct knote *); */ 68 69static const struct fileops xenevt_fileops = { 70 xenevt_read, 71 xenevt_write, 72 xenevt_ioctl, 73 fnullop_fcntl, 74 xenevt_poll, 75 fbadop_stat, 76 xenevt_close, 77 /* xenevt_kqfilter */ fnullop_kqfilter 78}; 79 80dev_type_open(xenevtopen); 81const struct cdevsw xenevt_cdevsw = { 82 xenevtopen, noclose, noread, nowrite, noioctl, 83 nostop, notty, nopoll, nommap, nokqfilter, 84}; 85 86/* per-instance datas */ 87#define XENEVT_RING_SIZE 2048 88#define XENEVT_RING_MASK 2047 89struct xenevt_d { 90 struct simplelock lock; 91 STAILQ_ENTRY(xenevt_d) pendingq; 92 boolean_t pending; 93 u_int16_t ring[2048]; 94 u_int ring_read; /* pointer of the reader */ 95 u_int ring_write; /* pointer of the writer */ 96 u_int flags; 97#define XENEVT_F_OVERFLOW 0x01 /* ring overflow */ 98 struct selinfo sel; /* used by poll */ 99}; 100 101/* event -> user device mapping */ 102static struct xenevt_d *devevent[NR_EVENT_CHANNELS]; 103 104/* pending events */ 105struct simplelock devevent_pending_lock = SIMPLELOCK_INITIALIZER; 106STAILQ_HEAD(, xenevt_d) devevent_pending = 107 STAILQ_HEAD_INITIALIZER(devevent_pending); 108 109static void xenevt_donotify(struct xenevt_d *); 110static void xenevt_record(struct xenevt_d *, int); 111 112/* called at boot time */ 113void 114xenevtattach(int n) 115{ 116 memset(devevent, 0, sizeof(devevent)); 117} 118 119/* event callback */ 120void 121xenevt_event(int port) 122{ 123 struct xenevt_d *d; 124 struct cpu_info *ci; 125 126 hypervisor_mask_event(port); 127 hypervisor_clear_event(port); 128 d = devevent[port]; 129 if (d != NULL) { 130 xenevt_record(d, port); 131 132 if (d->pending) { 133 return; 134 } 135 136 ci = curcpu(); 137 138 if (ci->ci_ilevel < IPL_SOFTXENEVT) { 139 /* fast and common path */ 140 ci->ci_isources[SIR_XENEVT]->is_evcnt.ev_count++; 141 xenevt_donotify(d); 142 } else { 143 simple_lock(&devevent_pending_lock); 144 STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq); 145 simple_unlock(&devevent_pending_lock); 146 d->pending = TRUE; 147 softintr(SIR_XENEVT); 148 } 149 } 150} 151 152void 153xenevt_notify() 154{ 155 156 cli(); 157 simple_lock(&devevent_pending_lock); 158 while (/* CONSTCOND */ 1) { 159 struct xenevt_d *d; 160 161 d = STAILQ_FIRST(&devevent_pending); 162 if (d == NULL) { 163 break; 164 } 165 STAILQ_REMOVE_HEAD(&devevent_pending, pendingq); 166 simple_unlock(&devevent_pending_lock); 167 sti(); 168 169 d->pending = FALSE; 170 xenevt_donotify(d); 171 172 cli(); 173 simple_lock(&devevent_pending_lock); 174 } 175 simple_unlock(&devevent_pending_lock); 176 sti(); 177} 178 179static void 180xenevt_donotify(struct xenevt_d *d) 181{ 182 int s; 183 184 s = splsoftxenevt(); 185 simple_lock(&d->lock); 186 187 selnotify(&d->sel, 1); 188 wakeup(&d->ring_read); 189 190 simple_unlock(&d->lock); 191 splx(s); 192} 193 194static void 195xenevt_record(struct xenevt_d *d, int port) 196{ 197 198 /* 199 * This algorithm overflows for one less slot than available. 200 * Not really an issue, and the correct algorithm would be more 201 * complex 202 */ 203 204 if (d->ring_read == 205 ((d->ring_write + 1) & XENEVT_RING_MASK)) { 206 d->flags |= XENEVT_F_OVERFLOW; 207 printf("xenevt_event: ring overflow port %d\n", port); 208 } else { 209 d->ring[d->ring_write] = port; 210 d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK; 211 } 212} 213 214/* open the xenevt device; this is where we clone */ 215int 216xenevtopen(dev_t dev, int flags, int mode, struct proc *p) 217{ 218 struct xenevt_d *d; 219 struct file *fp; 220 int fd, error; 221 222 /* falloc() will use the descriptor for us. */ 223 if ((error = falloc(p, &fp, &fd)) != 0) 224 return error; 225 226 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO); 227 simple_lock_init(&d->lock); 228 229 return fdclone(p, fp, fd, flags, &xenevt_fileops, d); 230} 231 232static int 233xenevt_close(struct file *fp, struct proc *p) 234{ 235 struct xenevt_d *d = fp->f_data; 236 int i; 237 238 for (i = 0; i < NR_EVENT_CHANNELS; i++ ) { 239 if (devevent[i] == d) { 240 hypervisor_mask_event(i); 241 devevent[i] = NULL; 242 } 243 } 244 free(d, M_DEVBUF); 245 fp->f_data = NULL; 246 247 return (0); 248} 249 250static int 251xenevt_read(struct file *fp, off_t *offp, struct uio *uio, 252 struct ucred *cred, int flags) 253{ 254 struct xenevt_d *d = fp->f_data; 255 int error; 256 size_t len, uio_len; 257 int ring_read; 258 int ring_write; 259 int s; 260 261 error = 0; 262 s = splsoftxenevt(); 263 simple_lock(&d->lock); 264 while (error == 0) { 265 ring_read = d->ring_read; 266 ring_write = d->ring_write; 267 if (ring_read != ring_write) { 268 break; 269 } 270 if (d->flags & XENEVT_F_OVERFLOW) { 271 break; 272 } 273 274 /* nothing to read */ 275 if (fp->f_flag & FNONBLOCK) { 276 error = EAGAIN; 277 } else { 278 error = ltsleep(&d->ring_read, PRIBIO | PCATCH, 279 "xenevt", 0, &d->lock); 280 } 281 } 282 if (error == 0 && (d->flags & XENEVT_F_OVERFLOW)) { 283 error = EFBIG; 284 } 285 simple_unlock(&d->lock); 286 splx(s); 287 288 if (error) { 289 return error; 290 } 291 292 uio_len = uio->uio_resid >> 1; 293 if (ring_read <= ring_write) 294 len = ring_write - ring_read; 295 else 296 len = XENEVT_RING_SIZE - ring_read; 297 if (len > uio_len) 298 len = uio_len; 299 error = uiomove(&d->ring[ring_read], len << 1, uio); 300 if (error) 301 return error; 302 ring_read = (ring_read + len) & XENEVT_RING_MASK; 303 uio_len = uio->uio_resid >> 1; 304 if (uio_len == 0) 305 goto done; 306 /* ring wrapped, read the second part */ 307 len = ring_write - ring_read; 308 if (len > uio_len) 309 len = uio_len; 310 error = uiomove(&d->ring[ring_read], len << 1, uio); 311 if (error) 312 return error; 313 ring_read = (ring_read + len) & XENEVT_RING_MASK; 314 315done: 316 s = splsoftxenevt(); 317 simple_lock(&d->lock); 318 d->ring_read = ring_read; 319 simple_unlock(&d->lock); 320 splx(s); 321 322 return 0; 323} 324 325static int 326xenevt_write(struct file *fp, off_t *offp, struct uio *uio, 327 struct ucred *cred, int flags) 328{ 329 struct xenevt_d *d = fp->f_data; 330 u_int16_t chans[NR_EVENT_CHANNELS]; 331 int i, nentries, error; 332 333 if (uio->uio_resid == 0) 334 return (0); 335 nentries = uio->uio_resid / sizeof(u_int16_t); 336 if (nentries > NR_EVENT_CHANNELS) 337 return EMSGSIZE; 338 error = uiomove(chans, uio->uio_resid, uio); 339 if (error) 340 return error; 341 for (i = 0; i < nentries; i++) { 342 if (chans[i] < NR_EVENT_CHANNELS && 343 devevent[chans[i]] == d) { 344 hypervisor_unmask_event(chans[i]); 345 } 346 } 347 return 0; 348} 349 350static int 351xenevt_ioctl(struct file *fp, u_long cmd, void *addr, struct proc *p) 352{ 353 struct xenevt_d *d = fp->f_data; 354 u_int *arg = addr; 355 356 switch(cmd) { 357 case EVTCHN_RESET: 358 d->ring_read = d->ring_write = 0; 359 d->flags = 0; 360 break; 361 case EVTCHN_BIND: 362 if (*arg > NR_EVENT_CHANNELS) 363 return EINVAL; 364 if (devevent[*arg] != NULL) 365 return EISCONN; 366 devevent[*arg] = d; 367 hypervisor_unmask_event(*arg); 368 break; 369 case EVTCHN_UNBIND: 370 if (*arg > NR_EVENT_CHANNELS) 371 return EINVAL; 372 if (devevent[*arg] != d) 373 return ENOTCONN; 374 devevent[*arg] = NULL; 375 hypervisor_mask_event(*arg); 376 break; 377 case FIONBIO: 378 break; 379 default: 380 return EINVAL; 381 } 382 return 0; 383} 384 385/* 386 * Support for poll() system call 387 * 388 * Return true if the specific operation will not block indefinitely. 389 */ 390 391static int 392xenevt_poll(struct file *fp, int events, struct proc *p) 393{ 394 struct xenevt_d *d = fp->f_data; 395 int revents = events & (POLLOUT | POLLWRNORM); /* we can always write */ 396 397 if (events & (POLLIN | POLLRDNORM)) { 398 if (d->ring_read != d->ring_write) { 399 revents |= events & (POLLIN | POLLRDNORM); 400 } else { 401 /* Record that someone is waiting */ 402 selrecord(p, &d->sel); 403 } 404 } 405 return (revents); 406} 407