mevent.c revision 259496
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/10.0/usr.sbin/bhyve/mevent.c 259496 2013-12-17 06:39:48Z grehan $
27 */
28
29/*
30 * Micro event library for FreeBSD, designed for a single i/o thread
31 * using kqueue, and having events be persistent by default.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: releng/10.0/usr.sbin/bhyve/mevent.c 259496 2013-12-17 06:39:48Z grehan $");
36
37#include <assert.h>
38#include <errno.h>
39#include <stdlib.h>
40#include <stdio.h>
41#include <string.h>
42#include <unistd.h>
43
44#include <sys/types.h>
45#include <sys/event.h>
46#include <sys/time.h>
47
48#include <pthread.h>
49#include <pthread_np.h>
50
51#include "mevent.h"
52
53#define	MEVENT_MAX	64
54
55#define MEV_ENABLE	1
56#define MEV_DISABLE	2
57#define MEV_DEL_PENDING	3
58
59extern char *vmname;
60
61static pthread_t mevent_tid;
62static int mevent_timid = 43;
63static int mevent_pipefd[2];
64static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
65
66struct mevent {
67	void	(*me_func)(int, enum ev_type, void *);
68#define me_msecs me_fd
69	int	me_fd;
70	int	me_timid;
71	enum ev_type me_type;
72	void    *me_param;
73	int	me_cq;
74	int	me_state;
75	int	me_closefd;
76	LIST_ENTRY(mevent) me_list;
77};
78
79static LIST_HEAD(listhead, mevent) global_head, change_head;
80
81static void
82mevent_qlock(void)
83{
84	pthread_mutex_lock(&mevent_lmutex);
85}
86
87static void
88mevent_qunlock(void)
89{
90	pthread_mutex_unlock(&mevent_lmutex);
91}
92
93static void
94mevent_pipe_read(int fd, enum ev_type type, void *param)
95{
96	char buf[MEVENT_MAX];
97	int status;
98
99	/*
100	 * Drain the pipe read side. The fd is non-blocking so this is
101	 * safe to do.
102	 */
103	do {
104		status = read(fd, buf, sizeof(buf));
105	} while (status == MEVENT_MAX);
106}
107
108static void
109mevent_notify(void)
110{
111	char c;
112
113	/*
114	 * If calling from outside the i/o thread, write a byte on the
115	 * pipe to force the i/o thread to exit the blocking kevent call.
116	 */
117	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
118		write(mevent_pipefd[1], &c, 1);
119	}
120}
121
122static int
123mevent_kq_filter(struct mevent *mevp)
124{
125	int retval;
126
127	retval = 0;
128
129	if (mevp->me_type == EVF_READ)
130		retval = EVFILT_READ;
131
132	if (mevp->me_type == EVF_WRITE)
133		retval = EVFILT_WRITE;
134
135	if (mevp->me_type == EVF_TIMER)
136		retval = EVFILT_TIMER;
137
138	return (retval);
139}
140
141static int
142mevent_kq_flags(struct mevent *mevp)
143{
144	int ret;
145
146	switch (mevp->me_state) {
147	case MEV_ENABLE:
148		ret = EV_ADD;
149		if (mevp->me_type == EVF_TIMER)
150			ret |= EV_ENABLE;
151		break;
152	case MEV_DISABLE:
153		ret = EV_DISABLE;
154		break;
155	case MEV_DEL_PENDING:
156		ret = EV_DELETE;
157		break;
158	}
159
160	return (ret);
161}
162
163static int
164mevent_kq_fflags(struct mevent *mevp)
165{
166	/* XXX nothing yet, perhaps EV_EOF for reads ? */
167	return (0);
168}
169
170static int
171mevent_build(int mfd, struct kevent *kev)
172{
173	struct mevent *mevp, *tmpp;
174	int i;
175
176	i = 0;
177
178	mevent_qlock();
179
180	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
181		if (mevp->me_closefd) {
182			/*
183			 * A close of the file descriptor will remove the
184			 * event
185			 */
186			close(mevp->me_fd);
187		} else {
188			if (mevp->me_type == EVF_TIMER) {
189				kev[i].ident = mevp->me_timid;
190				kev[i].data = mevp->me_msecs;
191			} else {
192				kev[i].ident = mevp->me_fd;
193				kev[i].data = 0;
194			}
195			kev[i].filter = mevent_kq_filter(mevp);
196			kev[i].flags = mevent_kq_flags(mevp);
197			kev[i].fflags = mevent_kq_fflags(mevp);
198			kev[i].udata = mevp;
199			i++;
200		}
201
202		mevp->me_cq = 0;
203		LIST_REMOVE(mevp, me_list);
204
205		if (mevp->me_state == MEV_DEL_PENDING) {
206			free(mevp);
207		} else {
208			LIST_INSERT_HEAD(&global_head, mevp, me_list);
209		}
210
211		assert(i < MEVENT_MAX);
212	}
213
214	mevent_qunlock();
215
216	return (i);
217}
218
219static void
220mevent_handle(struct kevent *kev, int numev)
221{
222	struct mevent *mevp;
223	int i;
224
225	for (i = 0; i < numev; i++) {
226		mevp = kev[i].udata;
227
228		/* XXX check for EV_ERROR ? */
229
230		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
231	}
232}
233
234struct mevent *
235mevent_add(int tfd, enum ev_type type,
236	   void (*func)(int, enum ev_type, void *), void *param)
237{
238	struct mevent *lp, *mevp;
239
240	if (tfd < 0 || func == NULL) {
241		return (NULL);
242	}
243
244	mevp = NULL;
245
246	mevent_qlock();
247
248	/*
249	 * Verify that the fd/type tuple is not present in any list
250	 */
251	LIST_FOREACH(lp, &global_head, me_list) {
252		if (type != EVF_TIMER && lp->me_fd == tfd &&
253		    lp->me_type == type) {
254			goto exit;
255		}
256	}
257
258	LIST_FOREACH(lp, &change_head, me_list) {
259		if (type != EVF_TIMER && lp->me_fd == tfd &&
260		    lp->me_type == type) {
261			goto exit;
262		}
263	}
264
265	/*
266	 * Allocate an entry, populate it, and add it to the change list.
267	 */
268	mevp = malloc(sizeof(struct mevent));
269	if (mevp == NULL) {
270		goto exit;
271	}
272
273	memset(mevp, 0, sizeof(struct mevent));
274	if (type == EVF_TIMER) {
275		mevp->me_msecs = tfd;
276		mevp->me_timid = mevent_timid++;
277	} else
278		mevp->me_fd = tfd;
279	mevp->me_type = type;
280	mevp->me_func = func;
281	mevp->me_param = param;
282
283	LIST_INSERT_HEAD(&change_head, mevp, me_list);
284	mevp->me_cq = 1;
285	mevp->me_state = MEV_ENABLE;
286	mevent_notify();
287
288exit:
289	mevent_qunlock();
290
291	return (mevp);
292}
293
294static int
295mevent_update(struct mevent *evp, int newstate)
296{
297	/*
298	 * It's not possible to enable/disable a deleted event
299	 */
300	if (evp->me_state == MEV_DEL_PENDING)
301		return (EINVAL);
302
303	/*
304	 * No update needed if state isn't changing
305	 */
306	if (evp->me_state == newstate)
307		return (0);
308
309	mevent_qlock();
310
311	evp->me_state = newstate;
312
313	/*
314	 * Place the entry onto the changed list if not already there.
315	 */
316	if (evp->me_cq == 0) {
317		evp->me_cq = 1;
318		LIST_REMOVE(evp, me_list);
319		LIST_INSERT_HEAD(&change_head, evp, me_list);
320		mevent_notify();
321	}
322
323	mevent_qunlock();
324
325	return (0);
326}
327
328int
329mevent_enable(struct mevent *evp)
330{
331
332	return (mevent_update(evp, MEV_ENABLE));
333}
334
335int
336mevent_disable(struct mevent *evp)
337{
338
339	return (mevent_update(evp, MEV_DISABLE));
340}
341
342static int
343mevent_delete_event(struct mevent *evp, int closefd)
344{
345	mevent_qlock();
346
347	/*
348         * Place the entry onto the changed list if not already there, and
349	 * mark as to be deleted.
350         */
351        if (evp->me_cq == 0) {
352		evp->me_cq = 1;
353		LIST_REMOVE(evp, me_list);
354		LIST_INSERT_HEAD(&change_head, evp, me_list);
355		mevent_notify();
356        }
357	evp->me_state = MEV_DEL_PENDING;
358
359	if (closefd)
360		evp->me_closefd = 1;
361
362	mevent_qunlock();
363
364	return (0);
365}
366
367int
368mevent_delete(struct mevent *evp)
369{
370
371	return (mevent_delete_event(evp, 0));
372}
373
374int
375mevent_delete_close(struct mevent *evp)
376{
377
378	return (mevent_delete_event(evp, 1));
379}
380
381static void
382mevent_set_name(void)
383{
384
385	pthread_set_name_np(mevent_tid, "mevent");
386}
387
388void
389mevent_dispatch(void)
390{
391	struct kevent changelist[MEVENT_MAX];
392	struct kevent eventlist[MEVENT_MAX];
393	struct mevent *pipev;
394	int mfd;
395	int numev;
396	int ret;
397
398	mevent_tid = pthread_self();
399	mevent_set_name();
400
401	mfd = kqueue();
402	assert(mfd > 0);
403
404	/*
405	 * Open the pipe that will be used for other threads to force
406	 * the blocking kqueue call to exit by writing to it. Set the
407	 * descriptor to non-blocking.
408	 */
409	ret = pipe(mevent_pipefd);
410	if (ret < 0) {
411		perror("pipe");
412		exit(0);
413	}
414
415	/*
416	 * Add internal event handler for the pipe write fd
417	 */
418	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
419	assert(pipev != NULL);
420
421	for (;;) {
422		/*
423		 * Build changelist if required.
424		 * XXX the changelist can be put into the blocking call
425		 * to eliminate the extra syscall. Currently better for
426		 * debug.
427		 */
428		numev = mevent_build(mfd, changelist);
429		if (numev) {
430			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
431			if (ret == -1) {
432				perror("Error return from kevent change");
433			}
434		}
435
436		/*
437		 * Block awaiting events
438		 */
439		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
440		if (ret == -1) {
441			perror("Error return from kevent monitor");
442		}
443
444		/*
445		 * Handle reported events
446		 */
447		mevent_handle(eventlist, ret);
448	}
449}
450