155714Skris/*-
255714Skris * Copyright (c) 2011 NetApp, Inc.
355714Skris * All rights reserved.
455714Skris *
555714Skris * Redistribution and use in source and binary forms, with or without
655714Skris * modification, are permitted provided that the following conditions
755714Skris * are met:
8280297Sjkim * 1. Redistributions of source code must retain the above copyright
955714Skris *    notice, this list of conditions and the following disclaimer.
1055714Skris * 2. Redistributions in binary form must reproduce the above copyright
1155714Skris *    notice, this list of conditions and the following disclaimer in the
1255714Skris *    documentation and/or other materials provided with the distribution.
1355714Skris *
1455714Skris * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15280297Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1655714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1755714Skris * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
1855714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1955714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2055714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2155714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22280297Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2355714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2455714Skris * SUCH DAMAGE.
2555714Skris *
2655714Skris * $FreeBSD: releng/11.0/usr.sbin/bhyve/mevent.c 265365 2014-05-05 16:30:03Z neel $
2755714Skris */
2855714Skris
2955714Skris/*
3055714Skris * Micro event library for FreeBSD, designed for a single i/o thread
3155714Skris * using kqueue, and having events be persistent by default.
3255714Skris */
3355714Skris
3455714Skris#include <sys/cdefs.h>
3555714Skris__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/mevent.c 265365 2014-05-05 16:30:03Z neel $");
3655714Skris
37280297Sjkim#include <assert.h>
3855714Skris#include <errno.h>
3955714Skris#include <stdlib.h>
40280297Sjkim#include <stdio.h>
4155714Skris#include <string.h>
4255714Skris#include <unistd.h>
4355714Skris
4455714Skris#include <sys/types.h>
4555714Skris#include <sys/event.h>
4655714Skris#include <sys/time.h>
4755714Skris
4855714Skris#include <pthread.h>
4955714Skris#include <pthread_np.h>
5055714Skris
5155714Skris#include "mevent.h"
52280297Sjkim
5355714Skris#define	MEVENT_MAX	64
5455714Skris
5555714Skris#define	MEV_ADD		1
5655714Skris#define	MEV_ENABLE	2
5755714Skris#define	MEV_DISABLE	3
5855714Skris#define	MEV_DEL_PENDING	4
5955714Skris
60298998Sjkimextern char *vmname;
6155714Skris
6255714Skrisstatic pthread_t mevent_tid;
6355714Skrisstatic int mevent_timid = 43;
64291719Sjkimstatic int mevent_pipefd[2];
6555714Skrisstatic pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
66280297Sjkim
6755714Skrisstruct mevent {
68280297Sjkim	void	(*me_func)(int, enum ev_type, void *);
69280297Sjkim#define me_msecs me_fd
70280297Sjkim	int	me_fd;
71280297Sjkim	int	me_timid;
72280297Sjkim	enum ev_type me_type;
7355714Skris	void    *me_param;
74280297Sjkim	int	me_cq;
7555714Skris	int	me_state;
7655714Skris	int	me_closefd;
77280297Sjkim	LIST_ENTRY(mevent) me_list;
78280297Sjkim};
7955714Skris
8055714Skrisstatic LIST_HEAD(listhead, mevent) global_head, change_head;
8155714Skris
8255714Skrisstatic void
8355714Skrismevent_qlock(void)
8455714Skris{
8555714Skris	pthread_mutex_lock(&mevent_lmutex);
8655714Skris}
8755714Skris
8855714Skrisstatic void
89280297Sjkimmevent_qunlock(void)
9055714Skris{
9155714Skris	pthread_mutex_unlock(&mevent_lmutex);
92280297Sjkim}
93280297Sjkim
9455714Skrisstatic void
9555714Skrismevent_pipe_read(int fd, enum ev_type type, void *param)
9655714Skris{
9755714Skris	char buf[MEVENT_MAX];
9855714Skris	int status;
9955714Skris
100280297Sjkim	/*
101280297Sjkim	 * Drain the pipe read side. The fd is non-blocking so this is
102280297Sjkim	 * safe to do.
103280297Sjkim	 */
104280297Sjkim	do {
105280297Sjkim		status = read(fd, buf, sizeof(buf));
106291719Sjkim	} while (status == MEVENT_MAX);
10755714Skris}
108280297Sjkim
109280297Sjkimstatic void
110280297Sjkimmevent_notify(void)
111280297Sjkim{
112280297Sjkim	char c;
113280297Sjkim
114280297Sjkim	/*
115280297Sjkim	 * If calling from outside the i/o thread, write a byte on the
116280297Sjkim	 * pipe to force the i/o thread to exit the blocking kevent call.
117280297Sjkim	 */
118280297Sjkim	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
119280297Sjkim		write(mevent_pipefd[1], &c, 1);
120280297Sjkim	}
121280297Sjkim}
122280297Sjkim
123280297Sjkimstatic int
124280297Sjkimmevent_kq_filter(struct mevent *mevp)
125280297Sjkim{
12655714Skris	int retval;
127291719Sjkim
128291719Sjkim	retval = 0;
129291719Sjkim
130291719Sjkim	if (mevp->me_type == EVF_READ)
131291719Sjkim		retval = EVFILT_READ;
132291719Sjkim
133291719Sjkim	if (mevp->me_type == EVF_WRITE)
134291719Sjkim		retval = EVFILT_WRITE;
135291719Sjkim
136291719Sjkim	if (mevp->me_type == EVF_TIMER)
137291719Sjkim		retval = EVFILT_TIMER;
138291719Sjkim
139291719Sjkim	if (mevp->me_type == EVF_SIGNAL)
140291719Sjkim		retval = EVFILT_SIGNAL;
141291719Sjkim
142291719Sjkim	return (retval);
143291719Sjkim}
14455714Skris
145280297Sjkimstatic int
146280297Sjkimmevent_kq_flags(struct mevent *mevp)
147280297Sjkim{
148280297Sjkim	int ret;
149280297Sjkim
15055714Skris	switch (mevp->me_state) {
15155714Skris	case MEV_ADD:
152280297Sjkim		ret = EV_ADD;		/* implicitly enabled */
153280297Sjkim		break;
154280297Sjkim	case MEV_ENABLE:
155298998Sjkim		ret = EV_ENABLE;
15655714Skris		break;
157280297Sjkim	case MEV_DISABLE:
158284283Sjkim		ret = EV_DISABLE;
159280297Sjkim		break;
160280297Sjkim	case MEV_DEL_PENDING:
161298998Sjkim		ret = EV_DELETE;
162280297Sjkim		break;
163280297Sjkim	default:
164280297Sjkim		assert(0);
165280297Sjkim		break;
166280297Sjkim	}
167280297Sjkim
168280297Sjkim	return (ret);
169280297Sjkim}
170280297Sjkim
171280297Sjkimstatic int
172280297Sjkimmevent_kq_fflags(struct mevent *mevp)
173280297Sjkim{
174280297Sjkim	/* XXX nothing yet, perhaps EV_EOF for reads ? */
175280297Sjkim	return (0);
176280297Sjkim}
177280297Sjkim
178298998Sjkimstatic int
179280297Sjkimmevent_build(int mfd, struct kevent *kev)
180280297Sjkim{
181280297Sjkim	struct mevent *mevp, *tmpp;
182280297Sjkim	int i;
183280297Sjkim
184280297Sjkim	i = 0;
185280297Sjkim
186280297Sjkim	mevent_qlock();
187298998Sjkim
188298998Sjkim	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
189298998Sjkim		if (mevp->me_closefd) {
190298998Sjkim			/*
191298998Sjkim			 * A close of the file descriptor will remove the
192280297Sjkim			 * event
193280297Sjkim			 */
194280297Sjkim			close(mevp->me_fd);
195280297Sjkim		} else {
196280297Sjkim			if (mevp->me_type == EVF_TIMER) {
19755714Skris				kev[i].ident = mevp->me_timid;
19855714Skris				kev[i].data = mevp->me_msecs;
199280297Sjkim			} else {
200280297Sjkim				kev[i].ident = mevp->me_fd;
20155714Skris				kev[i].data = 0;
202280297Sjkim			}
203280297Sjkim			kev[i].filter = mevent_kq_filter(mevp);
204280297Sjkim			kev[i].flags = mevent_kq_flags(mevp);
205280297Sjkim			kev[i].fflags = mevent_kq_fflags(mevp);
206280297Sjkim			kev[i].udata = mevp;
207280297Sjkim			i++;
208280297Sjkim		}
209280297Sjkim
21055714Skris		mevp->me_cq = 0;
21159191Skris		LIST_REMOVE(mevp, me_list);
212280297Sjkim
213280297Sjkim		if (mevp->me_state == MEV_DEL_PENDING) {
214280297Sjkim			free(mevp);
21555714Skris		} else {
216280297Sjkim			LIST_INSERT_HEAD(&global_head, mevp, me_list);
217280297Sjkim		}
218280297Sjkim
219280297Sjkim		assert(i < MEVENT_MAX);
220280297Sjkim	}
221280297Sjkim
222280297Sjkim	mevent_qunlock();
223280297Sjkim
224280297Sjkim	return (i);
225280297Sjkim}
226280297Sjkim
227280297Sjkimstatic void
22855714Skrismevent_handle(struct kevent *kev, int numev)
229280297Sjkim{
230280297Sjkim	struct mevent *mevp;
231280297Sjkim	int i;
232280297Sjkim
233280297Sjkim	for (i = 0; i < numev; i++) {
234280297Sjkim		mevp = kev[i].udata;
235280297Sjkim
236280297Sjkim		/* XXX check for EV_ERROR ? */
23755714Skris
238280297Sjkim		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
239280297Sjkim	}
240280297Sjkim}
24155714Skris
24255714Skrisstruct mevent *
243280297Sjkimmevent_add(int tfd, enum ev_type type,
244291719Sjkim	   void (*func)(int, enum ev_type, void *), void *param)
245280297Sjkim{
246291719Sjkim	struct mevent *lp, *mevp;
247280297Sjkim
248280297Sjkim	if (tfd < 0 || func == NULL) {
249280297Sjkim		return (NULL);
25055714Skris	}
251280297Sjkim
252280297Sjkim	mevp = NULL;
25355714Skris
25455714Skris	mevent_qlock();
255291719Sjkim
256291719Sjkim	/*
257291719Sjkim	 * Verify that the fd/type tuple is not present in any list
258291719Sjkim	 */
259291719Sjkim	LIST_FOREACH(lp, &global_head, me_list) {
260291719Sjkim		if (type != EVF_TIMER && lp->me_fd == tfd &&
261291719Sjkim		    lp->me_type == type) {
262291719Sjkim			goto exit;
263291719Sjkim		}
264291719Sjkim	}
265291719Sjkim
266291719Sjkim	LIST_FOREACH(lp, &change_head, me_list) {
267291719Sjkim		if (type != EVF_TIMER && lp->me_fd == tfd &&
268291719Sjkim		    lp->me_type == type) {
269291719Sjkim			goto exit;
27055714Skris		}
27155714Skris	}
272280297Sjkim
273280297Sjkim	/*
274291719Sjkim	 * Allocate an entry, populate it, and add it to the change list.
275280297Sjkim	 */
27655714Skris	mevp = calloc(1, sizeof(struct mevent));
277280297Sjkim	if (mevp == NULL) {
278280297Sjkim		goto exit;
27955714Skris	}
280291719Sjkim
281291719Sjkim	if (type == EVF_TIMER) {
282291719Sjkim		mevp->me_msecs = tfd;
283291719Sjkim		mevp->me_timid = mevent_timid++;
284291719Sjkim	} else
285291719Sjkim		mevp->me_fd = tfd;
286291719Sjkim	mevp->me_type = type;
287291719Sjkim	mevp->me_func = func;
288280297Sjkim	mevp->me_param = param;
289280297Sjkim
290280297Sjkim	LIST_INSERT_HEAD(&change_head, mevp, me_list);
29155714Skris	mevp->me_cq = 1;
292280297Sjkim	mevp->me_state = MEV_ADD;
293280297Sjkim	mevent_notify();
294280297Sjkim
295291719Sjkimexit:
296280297Sjkim	mevent_qunlock();
297280297Sjkim
298280297Sjkim	return (mevp);
29955714Skris}
300280297Sjkim
301280297Sjkimstatic int
302291719Sjkimmevent_update(struct mevent *evp, int newstate)
303291719Sjkim{
304291719Sjkim	/*
305291719Sjkim	 * It's not possible to enable/disable a deleted event
306280297Sjkim	 */
307100928Snectar	if (evp->me_state == MEV_DEL_PENDING)
308291719Sjkim		return (EINVAL);
309291719Sjkim
310291719Sjkim	/*
311280297Sjkim	 * No update needed if state isn't changing
31255714Skris	 */
313291719Sjkim	if (evp->me_state == newstate)
314291719Sjkim		return (0);
315291719Sjkim
316280297Sjkim	mevent_qlock();
31755714Skris
318291719Sjkim	evp->me_state = newstate;
319291719Sjkim
320291719Sjkim	/*
321291719Sjkim	 * Place the entry onto the changed list if not already there.
322291719Sjkim	 */
323291719Sjkim	if (evp->me_cq == 0) {
324291719Sjkim		evp->me_cq = 1;
325291719Sjkim		LIST_REMOVE(evp, me_list);
326291719Sjkim		LIST_INSERT_HEAD(&change_head, evp, me_list);
327291719Sjkim		mevent_notify();
328291719Sjkim	}
329291719Sjkim
330291719Sjkim	mevent_qunlock();
331280297Sjkim
33255714Skris	return (0);
333291719Sjkim}
334291719Sjkim
335291719Sjkimint
336291719Sjkimmevent_enable(struct mevent *evp)
337291719Sjkim{
338291719Sjkim
339280297Sjkim	return (mevent_update(evp, MEV_ENABLE));
340291719Sjkim}
341291719Sjkim
342291719Sjkimint
343291719Sjkimmevent_disable(struct mevent *evp)
34455714Skris{
345291719Sjkim
346291719Sjkim	return (mevent_update(evp, MEV_DISABLE));
347291719Sjkim}
348291719Sjkim
349291719Sjkimstatic int
350291719Sjkimmevent_delete_event(struct mevent *evp, int closefd)
351291719Sjkim{
352291719Sjkim	mevent_qlock();
353291719Sjkim
354291719Sjkim	/*
355291719Sjkim         * Place the entry onto the changed list if not already there, and
356291719Sjkim	 * mark as to be deleted.
357280297Sjkim         */
358280297Sjkim        if (evp->me_cq == 0) {
359291719Sjkim		evp->me_cq = 1;
360291719Sjkim		LIST_REMOVE(evp, me_list);
361291719Sjkim		LIST_INSERT_HEAD(&change_head, evp, me_list);
362291719Sjkim		mevent_notify();
363291719Sjkim        }
364280297Sjkim	evp->me_state = MEV_DEL_PENDING;
365280297Sjkim
366291719Sjkim	if (closefd)
367291719Sjkim		evp->me_closefd = 1;
368291719Sjkim
369291719Sjkim	mevent_qunlock();
370280297Sjkim
371280297Sjkim	return (0);
372280297Sjkim}
373280297Sjkim
374280297Sjkimint
37559191Skrismevent_delete(struct mevent *evp)
376280297Sjkim{
377280297Sjkim
378280297Sjkim	return (mevent_delete_event(evp, 0));
37955714Skris}
380280297Sjkim
381280297Sjkimint
382280297Sjkimmevent_delete_close(struct mevent *evp)
383280297Sjkim{
384280297Sjkim
38555714Skris	return (mevent_delete_event(evp, 1));
386280297Sjkim}
387280297Sjkim
388280297Sjkimstatic void
389280297Sjkimmevent_set_name(void)
390280297Sjkim{
391280297Sjkim
39255714Skris	pthread_set_name_np(mevent_tid, "mevent");
393280297Sjkim}
394280297Sjkim
39555714Skrisvoid
396280297Sjkimmevent_dispatch(void)
397280297Sjkim{
398280297Sjkim	struct kevent changelist[MEVENT_MAX];
399280297Sjkim	struct kevent eventlist[MEVENT_MAX];
400280297Sjkim	struct mevent *pipev;
401280297Sjkim	int mfd;
402280297Sjkim	int numev;
403280297Sjkim	int ret;
404280297Sjkim
405280297Sjkim	mevent_tid = pthread_self();
406280297Sjkim	mevent_set_name();
407280297Sjkim
408280297Sjkim	mfd = kqueue();
409280297Sjkim	assert(mfd > 0);
410280297Sjkim
411280297Sjkim	/*
412280297Sjkim	 * Open the pipe that will be used for other threads to force
41355714Skris	 * the blocking kqueue call to exit by writing to it. Set the
41455714Skris	 * descriptor to non-blocking.
415280297Sjkim	 */
416280297Sjkim	ret = pipe(mevent_pipefd);
41755714Skris	if (ret < 0) {
418280297Sjkim		perror("pipe");
419280297Sjkim		exit(0);
420280297Sjkim	}
421280297Sjkim
422280297Sjkim	/*
423280297Sjkim	 * Add internal event handler for the pipe write fd
424280297Sjkim	 */
425280297Sjkim	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
426280297Sjkim	assert(pipev != NULL);
427280297Sjkim
428280297Sjkim	for (;;) {
42955714Skris		/*
43055714Skris		 * Build changelist if required.
43155714Skris		 * XXX the changelist can be put into the blocking call
432280297Sjkim		 * to eliminate the extra syscall. Currently better for
433280297Sjkim		 * debug.
43455714Skris		 */
435280297Sjkim		numev = mevent_build(mfd, changelist);
436280297Sjkim		if (numev) {
437280297Sjkim			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
438280297Sjkim			if (ret == -1) {
439280297Sjkim				perror("Error return from kevent change");
440280297Sjkim			}
441280297Sjkim		}
442280297Sjkim
44355714Skris		/*
444280297Sjkim		 * Block awaiting events
445280297Sjkim		 */
446280297Sjkim		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
447280297Sjkim		if (ret == -1 && errno != EINTR) {
448280297Sjkim			perror("Error return from kevent monitor");
449280297Sjkim		}
450280297Sjkim
451280297Sjkim		/*
452280297Sjkim		 * Handle reported events
453280297Sjkim		 */
454280297Sjkim		mevent_handle(eventlist, ret);
455280297Sjkim	}
456280297Sjkim}
457280297Sjkim