1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD: releng/10.3/usr.sbin/bhyve/mevent.c 268953 2014-07-21 19:08:02Z jhb $
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan/*
30221828Sgrehan * Micro event library for FreeBSD, designed for a single i/o thread
31221828Sgrehan * using kqueue, and having events be persistent by default.
32221828Sgrehan */
33221828Sgrehan
34221828Sgrehan#include <sys/cdefs.h>
35221828Sgrehan__FBSDID("$FreeBSD: releng/10.3/usr.sbin/bhyve/mevent.c 268953 2014-07-21 19:08:02Z jhb $");
36221828Sgrehan
37221828Sgrehan#include <assert.h>
38221828Sgrehan#include <errno.h>
39221828Sgrehan#include <stdlib.h>
40221828Sgrehan#include <stdio.h>
41221828Sgrehan#include <string.h>
42221828Sgrehan#include <unistd.h>
43221828Sgrehan
44221828Sgrehan#include <sys/types.h>
45221828Sgrehan#include <sys/event.h>
46221828Sgrehan#include <sys/time.h>
47221828Sgrehan
48221828Sgrehan#include <pthread.h>
49244520Sgrehan#include <pthread_np.h>
50221828Sgrehan
51221828Sgrehan#include "mevent.h"
52221828Sgrehan
53221828Sgrehan#define	MEVENT_MAX	64
54221828Sgrehan
55268953Sjhb#define	MEV_ADD		1
56268953Sjhb#define	MEV_ENABLE	2
57268953Sjhb#define	MEV_DISABLE	3
58268953Sjhb#define	MEV_DEL_PENDING	4
59221828Sgrehan
60244520Sgrehanextern char *vmname;
61244520Sgrehan
62221828Sgrehanstatic pthread_t mevent_tid;
63255690Sgrehanstatic int mevent_timid = 43;
64221828Sgrehanstatic int mevent_pipefd[2];
65221828Sgrehanstatic pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
66221828Sgrehan
67221828Sgrehanstruct mevent {
68221828Sgrehan	void	(*me_func)(int, enum ev_type, void *);
69255690Sgrehan#define me_msecs me_fd
70221828Sgrehan	int	me_fd;
71255690Sgrehan	int	me_timid;
72221828Sgrehan	enum ev_type me_type;
73221828Sgrehan	void    *me_param;
74221828Sgrehan	int	me_cq;
75221828Sgrehan	int	me_state;
76221828Sgrehan	int	me_closefd;
77221828Sgrehan	LIST_ENTRY(mevent) me_list;
78221828Sgrehan};
79221828Sgrehan
80221828Sgrehanstatic LIST_HEAD(listhead, mevent) global_head, change_head;
81221828Sgrehan
82221828Sgrehanstatic void
83221828Sgrehanmevent_qlock(void)
84221828Sgrehan{
85221828Sgrehan	pthread_mutex_lock(&mevent_lmutex);
86221828Sgrehan}
87221828Sgrehan
88221828Sgrehanstatic void
89221828Sgrehanmevent_qunlock(void)
90221828Sgrehan{
91221828Sgrehan	pthread_mutex_unlock(&mevent_lmutex);
92221828Sgrehan}
93221828Sgrehan
94221828Sgrehanstatic void
95221828Sgrehanmevent_pipe_read(int fd, enum ev_type type, void *param)
96221828Sgrehan{
97221828Sgrehan	char buf[MEVENT_MAX];
98221828Sgrehan	int status;
99221828Sgrehan
100221828Sgrehan	/*
101221828Sgrehan	 * Drain the pipe read side. The fd is non-blocking so this is
102221828Sgrehan	 * safe to do.
103221828Sgrehan	 */
104221828Sgrehan	do {
105221828Sgrehan		status = read(fd, buf, sizeof(buf));
106221828Sgrehan	} while (status == MEVENT_MAX);
107221828Sgrehan}
108221828Sgrehan
109221828Sgrehanstatic void
110221828Sgrehanmevent_notify(void)
111221828Sgrehan{
112221828Sgrehan	char c;
113221828Sgrehan
114221828Sgrehan	/*
115221828Sgrehan	 * If calling from outside the i/o thread, write a byte on the
116221828Sgrehan	 * pipe to force the i/o thread to exit the blocking kevent call.
117221828Sgrehan	 */
118221828Sgrehan	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
119221828Sgrehan		write(mevent_pipefd[1], &c, 1);
120221828Sgrehan	}
121221828Sgrehan}
122221828Sgrehan
123221828Sgrehanstatic int
124221828Sgrehanmevent_kq_filter(struct mevent *mevp)
125221828Sgrehan{
126221828Sgrehan	int retval;
127221828Sgrehan
128221828Sgrehan	retval = 0;
129221828Sgrehan
130221828Sgrehan	if (mevp->me_type == EVF_READ)
131221828Sgrehan		retval = EVFILT_READ;
132221828Sgrehan
133221828Sgrehan	if (mevp->me_type == EVF_WRITE)
134221828Sgrehan		retval = EVFILT_WRITE;
135221828Sgrehan
136255690Sgrehan	if (mevp->me_type == EVF_TIMER)
137255690Sgrehan		retval = EVFILT_TIMER;
138255690Sgrehan
139261090Sjhb	if (mevp->me_type == EVF_SIGNAL)
140261090Sjhb		retval = EVFILT_SIGNAL;
141261090Sjhb
142221828Sgrehan	return (retval);
143221828Sgrehan}
144221828Sgrehan
145221828Sgrehanstatic int
146221828Sgrehanmevent_kq_flags(struct mevent *mevp)
147221828Sgrehan{
148221828Sgrehan	int ret;
149221828Sgrehan
150221828Sgrehan	switch (mevp->me_state) {
151268953Sjhb	case MEV_ADD:
152268953Sjhb		ret = EV_ADD;		/* implicitly enabled */
153268953Sjhb		break;
154221828Sgrehan	case MEV_ENABLE:
155268953Sjhb		ret = EV_ENABLE;
156221828Sgrehan		break;
157221828Sgrehan	case MEV_DISABLE:
158221828Sgrehan		ret = EV_DISABLE;
159221828Sgrehan		break;
160221828Sgrehan	case MEV_DEL_PENDING:
161221828Sgrehan		ret = EV_DELETE;
162221828Sgrehan		break;
163268953Sjhb	default:
164268953Sjhb		assert(0);
165268953Sjhb		break;
166221828Sgrehan	}
167221828Sgrehan
168221828Sgrehan	return (ret);
169221828Sgrehan}
170221828Sgrehan
171221828Sgrehanstatic int
172221828Sgrehanmevent_kq_fflags(struct mevent *mevp)
173221828Sgrehan{
174221828Sgrehan	/* XXX nothing yet, perhaps EV_EOF for reads ? */
175221828Sgrehan	return (0);
176221828Sgrehan}
177221828Sgrehan
178221828Sgrehanstatic int
179221828Sgrehanmevent_build(int mfd, struct kevent *kev)
180221828Sgrehan{
181221828Sgrehan	struct mevent *mevp, *tmpp;
182221828Sgrehan	int i;
183221828Sgrehan
184221828Sgrehan	i = 0;
185221828Sgrehan
186221828Sgrehan	mevent_qlock();
187221828Sgrehan
188221828Sgrehan	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
189221828Sgrehan		if (mevp->me_closefd) {
190221828Sgrehan			/*
191221828Sgrehan			 * A close of the file descriptor will remove the
192221828Sgrehan			 * event
193221828Sgrehan			 */
194221828Sgrehan			close(mevp->me_fd);
195221828Sgrehan		} else {
196255690Sgrehan			if (mevp->me_type == EVF_TIMER) {
197255690Sgrehan				kev[i].ident = mevp->me_timid;
198255690Sgrehan				kev[i].data = mevp->me_msecs;
199255690Sgrehan			} else {
200255690Sgrehan				kev[i].ident = mevp->me_fd;
201255690Sgrehan				kev[i].data = 0;
202255690Sgrehan			}
203221828Sgrehan			kev[i].filter = mevent_kq_filter(mevp);
204221828Sgrehan			kev[i].flags = mevent_kq_flags(mevp);
205221828Sgrehan			kev[i].fflags = mevent_kq_fflags(mevp);
206221828Sgrehan			kev[i].udata = mevp;
207221828Sgrehan			i++;
208221828Sgrehan		}
209221828Sgrehan
210221828Sgrehan		mevp->me_cq = 0;
211221828Sgrehan		LIST_REMOVE(mevp, me_list);
212221828Sgrehan
213221828Sgrehan		if (mevp->me_state == MEV_DEL_PENDING) {
214221828Sgrehan			free(mevp);
215221828Sgrehan		} else {
216221828Sgrehan			LIST_INSERT_HEAD(&global_head, mevp, me_list);
217221828Sgrehan		}
218221828Sgrehan
219221828Sgrehan		assert(i < MEVENT_MAX);
220221828Sgrehan	}
221221828Sgrehan
222221828Sgrehan	mevent_qunlock();
223221828Sgrehan
224221828Sgrehan	return (i);
225221828Sgrehan}
226221828Sgrehan
227221828Sgrehanstatic void
228221828Sgrehanmevent_handle(struct kevent *kev, int numev)
229221828Sgrehan{
230221828Sgrehan	struct mevent *mevp;
231221828Sgrehan	int i;
232221828Sgrehan
233221828Sgrehan	for (i = 0; i < numev; i++) {
234221828Sgrehan		mevp = kev[i].udata;
235221828Sgrehan
236221828Sgrehan		/* XXX check for EV_ERROR ? */
237221828Sgrehan
238221828Sgrehan		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
239221828Sgrehan	}
240221828Sgrehan}
241221828Sgrehan
242221828Sgrehanstruct mevent *
243255690Sgrehanmevent_add(int tfd, enum ev_type type,
244221828Sgrehan	   void (*func)(int, enum ev_type, void *), void *param)
245221828Sgrehan{
246221828Sgrehan	struct mevent *lp, *mevp;
247221828Sgrehan
248255690Sgrehan	if (tfd < 0 || func == NULL) {
249221828Sgrehan		return (NULL);
250221828Sgrehan	}
251221828Sgrehan
252221828Sgrehan	mevp = NULL;
253221828Sgrehan
254221828Sgrehan	mevent_qlock();
255221828Sgrehan
256221828Sgrehan	/*
257221828Sgrehan	 * Verify that the fd/type tuple is not present in any list
258221828Sgrehan	 */
259221828Sgrehan	LIST_FOREACH(lp, &global_head, me_list) {
260255690Sgrehan		if (type != EVF_TIMER && lp->me_fd == tfd &&
261255690Sgrehan		    lp->me_type == type) {
262221828Sgrehan			goto exit;
263221828Sgrehan		}
264221828Sgrehan	}
265221828Sgrehan
266221828Sgrehan	LIST_FOREACH(lp, &change_head, me_list) {
267255690Sgrehan		if (type != EVF_TIMER && lp->me_fd == tfd &&
268255690Sgrehan		    lp->me_type == type) {
269221828Sgrehan			goto exit;
270221828Sgrehan		}
271221828Sgrehan	}
272221828Sgrehan
273221828Sgrehan	/*
274221828Sgrehan	 * Allocate an entry, populate it, and add it to the change list.
275221828Sgrehan	 */
276268953Sjhb	mevp = calloc(1, sizeof(struct mevent));
277221828Sgrehan	if (mevp == NULL) {
278221828Sgrehan		goto exit;
279221828Sgrehan	}
280221828Sgrehan
281255690Sgrehan	if (type == EVF_TIMER) {
282255690Sgrehan		mevp->me_msecs = tfd;
283255690Sgrehan		mevp->me_timid = mevent_timid++;
284255690Sgrehan	} else
285255690Sgrehan		mevp->me_fd = tfd;
286221828Sgrehan	mevp->me_type = type;
287221828Sgrehan	mevp->me_func = func;
288221828Sgrehan	mevp->me_param = param;
289221828Sgrehan
290221828Sgrehan	LIST_INSERT_HEAD(&change_head, mevp, me_list);
291221828Sgrehan	mevp->me_cq = 1;
292268953Sjhb	mevp->me_state = MEV_ADD;
293221828Sgrehan	mevent_notify();
294221828Sgrehan
295221828Sgrehanexit:
296221828Sgrehan	mevent_qunlock();
297221828Sgrehan
298221828Sgrehan	return (mevp);
299221828Sgrehan}
300221828Sgrehan
301221828Sgrehanstatic int
302221828Sgrehanmevent_update(struct mevent *evp, int newstate)
303221828Sgrehan{
304221828Sgrehan	/*
305221828Sgrehan	 * It's not possible to enable/disable a deleted event
306221828Sgrehan	 */
307221828Sgrehan	if (evp->me_state == MEV_DEL_PENDING)
308221828Sgrehan		return (EINVAL);
309221828Sgrehan
310221828Sgrehan	/*
311221828Sgrehan	 * No update needed if state isn't changing
312221828Sgrehan	 */
313221828Sgrehan	if (evp->me_state == newstate)
314221828Sgrehan		return (0);
315221828Sgrehan
316221828Sgrehan	mevent_qlock();
317221828Sgrehan
318221828Sgrehan	evp->me_state = newstate;
319221828Sgrehan
320221828Sgrehan	/*
321221828Sgrehan	 * Place the entry onto the changed list if not already there.
322221828Sgrehan	 */
323221828Sgrehan	if (evp->me_cq == 0) {
324221828Sgrehan		evp->me_cq = 1;
325221828Sgrehan		LIST_REMOVE(evp, me_list);
326221828Sgrehan		LIST_INSERT_HEAD(&change_head, evp, me_list);
327221828Sgrehan		mevent_notify();
328221828Sgrehan	}
329221828Sgrehan
330221828Sgrehan	mevent_qunlock();
331221828Sgrehan
332221828Sgrehan	return (0);
333221828Sgrehan}
334221828Sgrehan
335221828Sgrehanint
336221828Sgrehanmevent_enable(struct mevent *evp)
337221828Sgrehan{
338221828Sgrehan
339221828Sgrehan	return (mevent_update(evp, MEV_ENABLE));
340221828Sgrehan}
341221828Sgrehan
342221828Sgrehanint
343221828Sgrehanmevent_disable(struct mevent *evp)
344221828Sgrehan{
345221828Sgrehan
346221828Sgrehan	return (mevent_update(evp, MEV_DISABLE));
347221828Sgrehan}
348221828Sgrehan
349221828Sgrehanstatic int
350221828Sgrehanmevent_delete_event(struct mevent *evp, int closefd)
351221828Sgrehan{
352221828Sgrehan	mevent_qlock();
353221828Sgrehan
354221828Sgrehan	/*
355221828Sgrehan         * Place the entry onto the changed list if not already there, and
356221828Sgrehan	 * mark as to be deleted.
357221828Sgrehan         */
358221828Sgrehan        if (evp->me_cq == 0) {
359221828Sgrehan		evp->me_cq = 1;
360221828Sgrehan		LIST_REMOVE(evp, me_list);
361221828Sgrehan		LIST_INSERT_HEAD(&change_head, evp, me_list);
362221828Sgrehan		mevent_notify();
363221828Sgrehan        }
364221828Sgrehan	evp->me_state = MEV_DEL_PENDING;
365221828Sgrehan
366221828Sgrehan	if (closefd)
367221828Sgrehan		evp->me_closefd = 1;
368221828Sgrehan
369221828Sgrehan	mevent_qunlock();
370221828Sgrehan
371221828Sgrehan	return (0);
372221828Sgrehan}
373221828Sgrehan
374221828Sgrehanint
375221828Sgrehanmevent_delete(struct mevent *evp)
376221828Sgrehan{
377221828Sgrehan
378221828Sgrehan	return (mevent_delete_event(evp, 0));
379221828Sgrehan}
380221828Sgrehan
381221828Sgrehanint
382221828Sgrehanmevent_delete_close(struct mevent *evp)
383221828Sgrehan{
384221828Sgrehan
385221828Sgrehan	return (mevent_delete_event(evp, 1));
386221828Sgrehan}
387221828Sgrehan
388244520Sgrehanstatic void
389244520Sgrehanmevent_set_name(void)
390244520Sgrehan{
391244520Sgrehan
392259301Sgrehan	pthread_set_name_np(mevent_tid, "mevent");
393244520Sgrehan}
394244520Sgrehan
395221828Sgrehanvoid
396221828Sgrehanmevent_dispatch(void)
397221828Sgrehan{
398221828Sgrehan	struct kevent changelist[MEVENT_MAX];
399221828Sgrehan	struct kevent eventlist[MEVENT_MAX];
400221828Sgrehan	struct mevent *pipev;
401221828Sgrehan	int mfd;
402221828Sgrehan	int numev;
403221828Sgrehan	int ret;
404221828Sgrehan
405221828Sgrehan	mevent_tid = pthread_self();
406244520Sgrehan	mevent_set_name();
407221828Sgrehan
408221828Sgrehan	mfd = kqueue();
409221828Sgrehan	assert(mfd > 0);
410221828Sgrehan
411221828Sgrehan	/*
412221828Sgrehan	 * Open the pipe that will be used for other threads to force
413221828Sgrehan	 * the blocking kqueue call to exit by writing to it. Set the
414221828Sgrehan	 * descriptor to non-blocking.
415221828Sgrehan	 */
416221828Sgrehan	ret = pipe(mevent_pipefd);
417221828Sgrehan	if (ret < 0) {
418221828Sgrehan		perror("pipe");
419221828Sgrehan		exit(0);
420221828Sgrehan	}
421221828Sgrehan
422221828Sgrehan	/*
423221828Sgrehan	 * Add internal event handler for the pipe write fd
424221828Sgrehan	 */
425221828Sgrehan	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
426221828Sgrehan	assert(pipev != NULL);
427221828Sgrehan
428221828Sgrehan	for (;;) {
429221828Sgrehan		/*
430221828Sgrehan		 * Build changelist if required.
431221828Sgrehan		 * XXX the changelist can be put into the blocking call
432221828Sgrehan		 * to eliminate the extra syscall. Currently better for
433221828Sgrehan		 * debug.
434221828Sgrehan		 */
435221828Sgrehan		numev = mevent_build(mfd, changelist);
436221828Sgrehan		if (numev) {
437221828Sgrehan			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
438221828Sgrehan			if (ret == -1) {
439221828Sgrehan				perror("Error return from kevent change");
440221828Sgrehan			}
441221828Sgrehan		}
442221828Sgrehan
443221828Sgrehan		/*
444221828Sgrehan		 * Block awaiting events
445221828Sgrehan		 */
446221828Sgrehan		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
447261090Sjhb		if (ret == -1 && errno != EINTR) {
448221828Sgrehan			perror("Error return from kevent monitor");
449221828Sgrehan		}
450221828Sgrehan
451221828Sgrehan		/*
452221828Sgrehan		 * Handle reported events
453221828Sgrehan		 */
454221828Sgrehan		mevent_handle(eventlist, ret);
455221828Sgrehan	}
456221828Sgrehan}
457