xenevt.c revision 1.3
1/*      $NetBSD: xenevt.c,v 1.3 2005/04/11 12:10:31 yamt Exp $      */
2
3/*
4 * Copyright (c) 2005 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 *    must display the following acknowledgement:
16 *      This product includes software developed by Manuel Bouyer.
17 * 4. The name of the author may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33#include <sys/param.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/systm.h>
37#include <sys/device.h>
38#include <sys/file.h>
39#include <sys/filedesc.h>
40#include <sys/poll.h>
41#include <sys/select.h>
42#include <sys/proc.h>
43#include <sys/conf.h>
44
45#include <machine/hypervisor.h>
46#include <machine/xenio.h>
47#include <machine/xen.h>
48
49/*
50 * Interface between the event channel and userland.
51 * Each process with a xenevt device instance open can regiter events it
52 * wants to receive. It will get pending events by read(), eventually blocking
53 * until some event is available. Pending events are ack'd by a bitmask
54 * write()en to the device. Some special operations (such as events binding)
55 * are done though ioctl().
56 * Processes get a device instance by opening a cloning device.
57 */
58
59void		xenevtattach(int);
60static int	xenevt_read(struct file *, off_t *, struct uio *,
61    struct ucred *, int);
62static int	xenevt_write(struct file *, off_t *, struct uio *,
63    struct ucred *, int);
64static int	xenevt_ioctl(struct file *, u_long, void *, struct proc *);
65static int	xenevt_poll(struct file *, int, struct proc *);
66static int	xenevt_close(struct file *, struct proc *);
67/* static int	xenevt_kqfilter(struct file *, struct knote *); */
68
69static const struct fileops xenevt_fileops = {
70	xenevt_read,
71	xenevt_write,
72	xenevt_ioctl,
73	fnullop_fcntl,
74	xenevt_poll,
75	fbadop_stat,
76	xenevt_close,
77	/* xenevt_kqfilter */ fnullop_kqfilter
78};
79
80dev_type_open(xenevtopen);
81const struct cdevsw xenevt_cdevsw = {
82	xenevtopen, noclose, noread, nowrite, noioctl,
83	nostop, notty, nopoll, nommap, nokqfilter,
84};
85
86/* per-instance datas */
87#define XENEVT_RING_SIZE 2048
88#define XENEVT_RING_MASK 2047
89struct xenevt_d {
90	struct simplelock lock;
91	STAILQ_ENTRY(xenevt_d) pendingq;
92	boolean_t pending;
93	u_int16_t ring[2048];
94	u_int ring_read; /* pointer of the reader */
95	u_int ring_write; /* pointer of the writer */
96	u_int flags;
97#define XENEVT_F_OVERFLOW 0x01 /* ring overflow */
98	struct selinfo sel; /* used by poll */
99};
100
101/* event -> user device mapping */
102static struct xenevt_d *devevent[NR_EVENT_CHANNELS];
103
104/* pending events */
105struct simplelock devevent_pending_lock = SIMPLELOCK_INITIALIZER;
106STAILQ_HEAD(, xenevt_d) devevent_pending =
107    STAILQ_HEAD_INITIALIZER(devevent_pending);
108
109static void xenevt_donotify(struct xenevt_d *);
110static void xenevt_record(struct xenevt_d *, int);
111
112/* called at boot time */
113void
114xenevtattach(int n)
115{
116	memset(devevent, 0, sizeof(devevent));
117}
118
119/* event callback */
120void
121xenevt_event(int port)
122{
123	struct xenevt_d *d;
124	struct cpu_info *ci;
125
126	hypervisor_mask_event(port);
127	hypervisor_clear_event(port);
128	d = devevent[port];
129	if (d != NULL) {
130		xenevt_record(d, port);
131
132		if (d->pending) {
133			return;
134		}
135
136		ci = curcpu();
137
138		if (ci->ci_ilevel < IPL_SOFTXENEVT) {
139			/* fast and common path */
140			ci->ci_isources[SIR_XENEVT]->is_evcnt.ev_count++;
141			xenevt_donotify(d);
142		} else {
143			simple_lock(&devevent_pending_lock);
144			STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq);
145			simple_unlock(&devevent_pending_lock);
146			d->pending = TRUE;
147			softintr(SIR_XENEVT);
148		}
149	}
150}
151
152void
153xenevt_notify()
154{
155
156	cli();
157	simple_lock(&devevent_pending_lock);
158	while (/* CONSTCOND */ 1) {
159		struct xenevt_d *d;
160
161		d = STAILQ_FIRST(&devevent_pending);
162		if (d == NULL) {
163			break;
164		}
165		STAILQ_REMOVE_HEAD(&devevent_pending, pendingq);
166		simple_unlock(&devevent_pending_lock);
167		sti();
168
169		d->pending = FALSE;
170		xenevt_donotify(d);
171
172		cli();
173		simple_lock(&devevent_pending_lock);
174	}
175	simple_unlock(&devevent_pending_lock);
176	sti();
177}
178
179static void
180xenevt_donotify(struct xenevt_d *d)
181{
182	int s;
183
184	s = splsoftxenevt();
185	simple_lock(&d->lock);
186
187	selnotify(&d->sel, 1);
188	wakeup(&d->ring_read);
189
190	simple_unlock(&d->lock);
191	splx(s);
192}
193
194static void
195xenevt_record(struct xenevt_d *d, int port)
196{
197
198	/*
199	 * This algorithm overflows for one less slot than available.
200	 * Not really an issue, and the correct algorithm would be more
201	 * complex
202	 */
203
204	if (d->ring_read ==
205	    ((d->ring_write + 1) & XENEVT_RING_MASK)) {
206		d->flags |= XENEVT_F_OVERFLOW;
207		printf("xenevt_event: ring overflow port %d\n", port);
208	} else {
209		d->ring[d->ring_write] = port;
210		d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK;
211	}
212}
213
214/* open the xenevt device; this is where we clone */
215int
216xenevtopen(dev_t dev, int flags, int mode, struct proc *p)
217{
218	struct xenevt_d *d;
219	struct file *fp;
220	int fd, error;
221
222	/* falloc() will use the descriptor for us. */
223	if ((error = falloc(p, &fp, &fd)) != 0)
224		return error;
225
226	d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO);
227	simple_lock_init(&d->lock);
228
229	return fdclone(p, fp, fd, flags, &xenevt_fileops, d);
230}
231
232static int
233xenevt_close(struct file *fp, struct proc *p)
234{
235	struct xenevt_d *d = fp->f_data;
236	int i;
237
238	for (i = 0; i < NR_EVENT_CHANNELS; i++ ) {
239		if (devevent[i] == d) {
240			hypervisor_mask_event(i);
241			devevent[i] = NULL;
242		}
243	}
244	free(d, M_DEVBUF);
245	fp->f_data = NULL;
246
247	return (0);
248}
249
250static int
251xenevt_read(struct file *fp, off_t *offp, struct uio *uio,
252    struct ucred *cred, int flags)
253{
254	struct xenevt_d *d = fp->f_data;
255	int error;
256	size_t len, uio_len;
257	int ring_read;
258	int ring_write;
259	int s;
260
261	error = 0;
262	s = splsoftxenevt();
263	simple_lock(&d->lock);
264	while (error == 0) {
265		ring_read = d->ring_read;
266		ring_write = d->ring_write;
267		if (ring_read != ring_write) {
268			break;
269		}
270		if (d->flags & XENEVT_F_OVERFLOW) {
271			break;
272		}
273
274		/* nothing to read */
275		if (fp->f_flag & FNONBLOCK) {
276			error = EAGAIN;
277		} else {
278			error = ltsleep(&d->ring_read, PRIBIO | PCATCH,
279			    "xenevt", 0, &d->lock);
280		}
281	}
282	if (error == 0 && (d->flags & XENEVT_F_OVERFLOW)) {
283		error = EFBIG;
284	}
285	simple_unlock(&d->lock);
286	splx(s);
287
288	if (error) {
289		return error;
290	}
291
292	uio_len = uio->uio_resid >> 1;
293	if (ring_read <= ring_write)
294		len = ring_write - ring_read;
295	else
296		len = XENEVT_RING_SIZE - ring_read;
297	if (len > uio_len)
298		len = uio_len;
299	error = uiomove(&d->ring[ring_read], len << 1, uio);
300	if (error)
301		return error;
302	ring_read = (ring_read + len) & XENEVT_RING_MASK;
303	uio_len = uio->uio_resid >> 1;
304	if (uio_len == 0)
305		goto done;
306	/* ring wrapped, read the second part */
307	len = ring_write - ring_read;
308	if (len > uio_len)
309		len = uio_len;
310	error = uiomove(&d->ring[ring_read], len << 1, uio);
311	if (error)
312		return error;
313	ring_read = (ring_read + len) & XENEVT_RING_MASK;
314
315done:
316	s = splsoftxenevt();
317	simple_lock(&d->lock);
318	d->ring_read = ring_read;
319	simple_unlock(&d->lock);
320	splx(s);
321
322	return 0;
323}
324
325static int
326xenevt_write(struct file *fp, off_t *offp, struct uio *uio,
327    struct ucred *cred, int flags)
328{
329	struct xenevt_d *d = fp->f_data;
330	u_int16_t chans[NR_EVENT_CHANNELS];
331	int i, nentries, error;
332
333	if (uio->uio_resid == 0)
334		return (0);
335	nentries = uio->uio_resid / sizeof(u_int16_t);
336	if (nentries > NR_EVENT_CHANNELS)
337		return EMSGSIZE;
338	error = uiomove(chans, uio->uio_resid, uio);
339	if (error)
340		return error;
341	for (i = 0; i < nentries; i++) {
342		if (chans[i] < NR_EVENT_CHANNELS &&
343		    devevent[chans[i]] == d) {
344			hypervisor_unmask_event(chans[i]);
345		}
346	}
347	return 0;
348}
349
350static int
351xenevt_ioctl(struct file *fp, u_long cmd, void *addr, struct proc *p)
352{
353	struct xenevt_d *d = fp->f_data;
354	u_int *arg = addr;
355
356	switch(cmd) {
357	case EVTCHN_RESET:
358		d->ring_read = d->ring_write = 0;
359		d->flags = 0;
360		break;
361	case EVTCHN_BIND:
362		if (*arg > NR_EVENT_CHANNELS)
363			return EINVAL;
364		if (devevent[*arg] != NULL)
365			return EISCONN;
366		devevent[*arg] = d;
367		hypervisor_unmask_event(*arg);
368		break;
369	case EVTCHN_UNBIND:
370		if (*arg > NR_EVENT_CHANNELS)
371			return EINVAL;
372		if (devevent[*arg] != d)
373			return ENOTCONN;
374		devevent[*arg] = NULL;
375		hypervisor_mask_event(*arg);
376		break;
377	case FIONBIO:
378		break;
379	default:
380		return EINVAL;
381	}
382	return 0;
383}
384
385/*
386 * Support for poll() system call
387 *
388 * Return true if the specific operation will not block indefinitely.
389 */
390
391static int
392xenevt_poll(struct file *fp, int events, struct proc *p)
393{
394	struct xenevt_d *d = fp->f_data;
395	int revents = events & (POLLOUT | POLLWRNORM); /* we can always write */
396
397	if (events & (POLLIN | POLLRDNORM)) {
398		if (d->ring_read != d->ring_write) {
399			revents |= events & (POLLIN | POLLRDNORM);
400		} else {
401			/* Record that someone is waiting */
402			selrecord(p, &d->sel);
403		}
404	}
405	return (revents);
406}
407