1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31/*
32 * Micro event library for FreeBSD, designed for a single i/o thread
33 * using kqueue, and having events be persistent by default.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38
39#include <assert.h>
40#ifndef WITHOUT_CAPSICUM
41#include <capsicum_helpers.h>
42#endif
43#include <err.h>
44#include <errno.h>
45#include <stdbool.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>
49#include <sysexits.h>
50#include <unistd.h>
51
52#include <sys/types.h>
53#ifndef WITHOUT_CAPSICUM
54#include <sys/capsicum.h>
55#endif
56#include <sys/event.h>
57#include <sys/time.h>
58
59#include <pthread.h>
60#include <pthread_np.h>
61
62#include "mevent.h"
63
64#define	MEVENT_MAX	64
65
66extern const char *vmname;
67
68static pthread_t mevent_tid;
69static int mevent_timid = 43;
70static int mevent_pipefd[2];
71static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
72
73struct mevent {
74	void	(*me_func)(int, enum ev_type, void *);
75#define me_msecs me_fd
76	int	me_fd;
77	int	me_timid;
78	enum ev_type me_type;
79	void    *me_param;
80	int	me_cq;
81	int	me_state; /* Desired kevent flags. */
82	int	me_closefd;
83	LIST_ENTRY(mevent) me_list;
84};
85
86static LIST_HEAD(listhead, mevent) global_head, change_head;
87
88static void
89mevent_qlock(void)
90{
91	pthread_mutex_lock(&mevent_lmutex);
92}
93
94static void
95mevent_qunlock(void)
96{
97	pthread_mutex_unlock(&mevent_lmutex);
98}
99
100static void
101mevent_pipe_read(int fd, enum ev_type type, void *param)
102{
103	char buf[MEVENT_MAX];
104	int status;
105
106	/*
107	 * Drain the pipe read side. The fd is non-blocking so this is
108	 * safe to do.
109	 */
110	do {
111		status = read(fd, buf, sizeof(buf));
112	} while (status == MEVENT_MAX);
113}
114
115static void
116mevent_notify(void)
117{
118	char c = '\0';
119
120	/*
121	 * If calling from outside the i/o thread, write a byte on the
122	 * pipe to force the i/o thread to exit the blocking kevent call.
123	 */
124	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
125		write(mevent_pipefd[1], &c, 1);
126	}
127}
128
129static int
130mevent_kq_filter(struct mevent *mevp)
131{
132	int retval;
133
134	retval = 0;
135
136	if (mevp->me_type == EVF_READ)
137		retval = EVFILT_READ;
138
139	if (mevp->me_type == EVF_WRITE)
140		retval = EVFILT_WRITE;
141
142	if (mevp->me_type == EVF_TIMER)
143		retval = EVFILT_TIMER;
144
145	if (mevp->me_type == EVF_SIGNAL)
146		retval = EVFILT_SIGNAL;
147
148	return (retval);
149}
150
151static int
152mevent_kq_flags(struct mevent *mevp)
153{
154	return (mevp->me_state);
155}
156
157static int
158mevent_kq_fflags(struct mevent *mevp)
159{
160	/* XXX nothing yet, perhaps EV_EOF for reads ? */
161	return (0);
162}
163
164static int
165mevent_build(int mfd, struct kevent *kev)
166{
167	struct mevent *mevp, *tmpp;
168	int i;
169
170	i = 0;
171
172	mevent_qlock();
173
174	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
175		if (mevp->me_closefd) {
176			/*
177			 * A close of the file descriptor will remove the
178			 * event
179			 */
180			close(mevp->me_fd);
181		} else {
182			if (mevp->me_type == EVF_TIMER) {
183				kev[i].ident = mevp->me_timid;
184				kev[i].data = mevp->me_msecs;
185			} else {
186				kev[i].ident = mevp->me_fd;
187				kev[i].data = 0;
188			}
189			kev[i].filter = mevent_kq_filter(mevp);
190			kev[i].flags = mevent_kq_flags(mevp);
191			kev[i].fflags = mevent_kq_fflags(mevp);
192			kev[i].udata = mevp;
193			i++;
194		}
195
196		mevp->me_cq = 0;
197		LIST_REMOVE(mevp, me_list);
198
199		if (mevp->me_state & EV_DELETE) {
200			free(mevp);
201		} else {
202			/*
203			 * We need to add the event only once, so we can
204			 * reset the EV_ADD bit after it has been propagated
205			 * to the kevent() arguments the first time.
206			 */
207			mevp->me_state &= ~EV_ADD;
208			LIST_INSERT_HEAD(&global_head, mevp, me_list);
209		}
210
211		assert(i < MEVENT_MAX);
212	}
213
214	mevent_qunlock();
215
216	return (i);
217}
218
219static void
220mevent_handle(struct kevent *kev, int numev)
221{
222	struct mevent *mevp;
223	int i;
224
225	for (i = 0; i < numev; i++) {
226		mevp = kev[i].udata;
227
228		/* XXX check for EV_ERROR ? */
229
230		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
231	}
232}
233
234static struct mevent *
235mevent_add_state(int tfd, enum ev_type type,
236	   void (*func)(int, enum ev_type, void *), void *param,
237	   int state)
238{
239	struct mevent *lp, *mevp;
240
241	if (tfd < 0 || func == NULL) {
242		return (NULL);
243	}
244
245	mevp = NULL;
246
247	mevent_qlock();
248
249	/*
250	 * Verify that the fd/type tuple is not present in any list
251	 */
252	LIST_FOREACH(lp, &global_head, me_list) {
253		if (type != EVF_TIMER && lp->me_fd == tfd &&
254		    lp->me_type == type) {
255			goto exit;
256		}
257	}
258
259	LIST_FOREACH(lp, &change_head, me_list) {
260		if (type != EVF_TIMER && lp->me_fd == tfd &&
261		    lp->me_type == type) {
262			goto exit;
263		}
264	}
265
266	/*
267	 * Allocate an entry, populate it, and add it to the change list.
268	 */
269	mevp = calloc(1, sizeof(struct mevent));
270	if (mevp == NULL) {
271		goto exit;
272	}
273
274	if (type == EVF_TIMER) {
275		mevp->me_msecs = tfd;
276		mevp->me_timid = mevent_timid++;
277	} else
278		mevp->me_fd = tfd;
279	mevp->me_type = type;
280	mevp->me_func = func;
281	mevp->me_param = param;
282
283	LIST_INSERT_HEAD(&change_head, mevp, me_list);
284	mevp->me_cq = 1;
285	mevp->me_state = state;
286	mevent_notify();
287
288exit:
289	mevent_qunlock();
290
291	return (mevp);
292}
293
294struct mevent *
295mevent_add(int tfd, enum ev_type type,
296	   void (*func)(int, enum ev_type, void *), void *param)
297{
298
299	return (mevent_add_state(tfd, type, func, param, EV_ADD));
300}
301
302struct mevent *
303mevent_add_disabled(int tfd, enum ev_type type,
304		    void (*func)(int, enum ev_type, void *), void *param)
305{
306
307	return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE));
308}
309
310static int
311mevent_update(struct mevent *evp, bool enable)
312{
313	int newstate;
314
315	mevent_qlock();
316
317	/*
318	 * It's not possible to enable/disable a deleted event
319	 */
320	assert((evp->me_state & EV_DELETE) == 0);
321
322	newstate = evp->me_state;
323	if (enable) {
324		newstate |= EV_ENABLE;
325		newstate &= ~EV_DISABLE;
326	} else {
327		newstate |= EV_DISABLE;
328		newstate &= ~EV_ENABLE;
329	}
330
331	/*
332	 * No update needed if state isn't changing
333	 */
334	if (evp->me_state != newstate) {
335		evp->me_state = newstate;
336
337		/*
338		 * Place the entry onto the changed list if not
339		 * already there.
340		 */
341		if (evp->me_cq == 0) {
342			evp->me_cq = 1;
343			LIST_REMOVE(evp, me_list);
344			LIST_INSERT_HEAD(&change_head, evp, me_list);
345			mevent_notify();
346		}
347	}
348
349	mevent_qunlock();
350
351	return (0);
352}
353
354int
355mevent_enable(struct mevent *evp)
356{
357
358	return (mevent_update(evp, true));
359}
360
361int
362mevent_disable(struct mevent *evp)
363{
364
365	return (mevent_update(evp, false));
366}
367
368static int
369mevent_delete_event(struct mevent *evp, int closefd)
370{
371	mevent_qlock();
372
373	/*
374         * Place the entry onto the changed list if not already there, and
375	 * mark as to be deleted.
376         */
377        if (evp->me_cq == 0) {
378		evp->me_cq = 1;
379		LIST_REMOVE(evp, me_list);
380		LIST_INSERT_HEAD(&change_head, evp, me_list);
381		mevent_notify();
382        }
383	evp->me_state = EV_DELETE;
384
385	if (closefd)
386		evp->me_closefd = 1;
387
388	mevent_qunlock();
389
390	return (0);
391}
392
393int
394mevent_delete(struct mevent *evp)
395{
396
397	return (mevent_delete_event(evp, 0));
398}
399
400int
401mevent_delete_close(struct mevent *evp)
402{
403
404	return (mevent_delete_event(evp, 1));
405}
406
407static void
408mevent_set_name(void)
409{
410
411	pthread_set_name_np(mevent_tid, "mevent");
412}
413
414void
415mevent_dispatch(void)
416{
417	struct kevent changelist[MEVENT_MAX];
418	struct kevent eventlist[MEVENT_MAX];
419	struct mevent *pipev;
420	int mfd;
421	int numev;
422	int ret;
423#ifndef WITHOUT_CAPSICUM
424	cap_rights_t rights;
425#endif
426
427	mevent_tid = pthread_self();
428	mevent_set_name();
429
430	mfd = kqueue();
431	assert(mfd > 0);
432
433#ifndef WITHOUT_CAPSICUM
434	cap_rights_init(&rights, CAP_KQUEUE);
435	if (caph_rights_limit(mfd, &rights) == -1)
436		errx(EX_OSERR, "Unable to apply rights for sandbox");
437#endif
438
439	/*
440	 * Open the pipe that will be used for other threads to force
441	 * the blocking kqueue call to exit by writing to it. Set the
442	 * descriptor to non-blocking.
443	 */
444	ret = pipe(mevent_pipefd);
445	if (ret < 0) {
446		perror("pipe");
447		exit(0);
448	}
449
450#ifndef WITHOUT_CAPSICUM
451	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
452	if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
453		errx(EX_OSERR, "Unable to apply rights for sandbox");
454	if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
455		errx(EX_OSERR, "Unable to apply rights for sandbox");
456#endif
457
458	/*
459	 * Add internal event handler for the pipe write fd
460	 */
461	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
462	assert(pipev != NULL);
463
464	for (;;) {
465		/*
466		 * Build changelist if required.
467		 * XXX the changelist can be put into the blocking call
468		 * to eliminate the extra syscall. Currently better for
469		 * debug.
470		 */
471		numev = mevent_build(mfd, changelist);
472		if (numev) {
473			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
474			if (ret == -1) {
475				perror("Error return from kevent change");
476			}
477		}
478
479		/*
480		 * Block awaiting events
481		 */
482		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
483		if (ret == -1 && errno != EINTR) {
484			perror("Error return from kevent monitor");
485		}
486
487		/*
488		 * Handle reported events
489		 */
490		mevent_handle(eventlist, ret);
491	}
492}
493