1/*
2 * Submitted by David Pacheco (dp.spambait@gmail.com)
3 *
4 * Copyright 2006-2007 Niels Provos
5 * Copyright 2007-2012 Niels Provos and Nick Mathewson
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * Copyright (c) 2007 Sun Microsystems. All rights reserved.
32 * Use is subject to license terms.
33 */
34
35/*
36 * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
37 * This implementation is loosely modeled after the one used for select(2) (in
38 * select.c).
39 *
40 * The outstanding events are tracked in a data structure called evport_data.
41 * Each entry in the ed_fds array corresponds to a file descriptor, and contains
42 * pointers to the read and write events that correspond to that fd. (That is,
43 * when the file is readable, the "read" event should handle it, etc.)
44 *
45 * evport_add and evport_del update this data structure. evport_dispatch uses it
46 * to determine where to callback when an event occurs (which it gets from
47 * port_getn).
48 *
49 * Helper functions are used: grow() grows the file descriptor array as
50 * necessary when large fd's come in. reassociate() takes care of maintaining
51 * the proper file-descriptor/event-port associations.
52 *
53 * As in the select(2) implementation, signals are handled by evsignal.
54 */
55
56#include "event2/event-config.h"
57
58#include <sys/time.h>
59#include <sys/queue.h>
60#include <errno.h>
61#include <poll.h>
62#include <port.h>
63#include <signal.h>
64#include <stdio.h>
65#include <stdlib.h>
66#include <string.h>
67#include <time.h>
68#include <unistd.h>
69
70#include "event2/thread.h"
71
72#include "evthread-internal.h"
73#include "event-internal.h"
74#include "log-internal.h"
75#include "evsignal-internal.h"
76#include "evmap-internal.h"
77
78/*
79 * Default value for ed_nevents, which is the maximum file descriptor number we
80 * can handle. If an event comes in for a file descriptor F > nevents, we will
81 * grow the array of file descriptors, doubling its size.
82 */
83#define DEFAULT_NFDS	16
84
85
86/*
87 * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
88 * any particular call. You can speed things up by increasing this, but it will
89 * (obviously) require more memory.
90 */
91#define EVENTS_PER_GETN 8
92
93/*
94 * Per-file-descriptor information about what events we're subscribed to. These
95 * fields are NULL if no event is subscribed to either of them.
96 */
97
98struct fd_info {
99	short fdi_what;		/* combinations of EV_READ and EV_WRITE */
100};
101
102#define FDI_HAS_READ(fdi)  ((fdi)->fdi_what & EV_READ)
103#define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE)
104#define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
105#define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
106    (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
107
108struct evport_data {
109	int		ed_port;	/* event port for system events  */
110	int		ed_nevents;	/* number of allocated fdi's	 */
111	struct fd_info *ed_fds;		/* allocated fdi table		 */
112	/* fdi's that we need to reassoc */
113	int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
114};
115
116static void*	evport_init(struct event_base *);
117static int evport_add(struct event_base *, int fd, short old, short events, void *);
118static int evport_del(struct event_base *, int fd, short old, short events, void *);
119static int	evport_dispatch(struct event_base *, struct timeval *);
120static void	evport_dealloc(struct event_base *);
121
122const struct eventop evportops = {
123	"evport",
124	evport_init,
125	evport_add,
126	evport_del,
127	evport_dispatch,
128	evport_dealloc,
129	1, /* need reinit */
130	0, /* features */
131	0, /* fdinfo length */
132};
133
134/*
135 * Initialize the event port implementation.
136 */
137
138static void*
139evport_init(struct event_base *base)
140{
141	struct evport_data *evpd;
142	int i;
143
144	if (!(evpd = mm_calloc(1, sizeof(struct evport_data))))
145		return (NULL);
146
147	if ((evpd->ed_port = port_create()) == -1) {
148		mm_free(evpd);
149		return (NULL);
150	}
151
152	/*
153	 * Initialize file descriptor structure
154	 */
155	evpd->ed_fds = mm_calloc(DEFAULT_NFDS, sizeof(struct fd_info));
156	if (evpd->ed_fds == NULL) {
157		close(evpd->ed_port);
158		mm_free(evpd);
159		return (NULL);
160	}
161	evpd->ed_nevents = DEFAULT_NFDS;
162	for (i = 0; i < EVENTS_PER_GETN; i++)
163		evpd->ed_pending[i] = -1;
164
165	evsig_init(base);
166
167	return (evpd);
168}
169
170#ifdef CHECK_INVARIANTS
171/*
172 * Checks some basic properties about the evport_data structure. Because it
173 * checks all file descriptors, this function can be expensive when the maximum
174 * file descriptor ever used is rather large.
175 */
176
177static void
178check_evportop(struct evport_data *evpd)
179{
180	EVUTIL_ASSERT(evpd);
181	EVUTIL_ASSERT(evpd->ed_nevents > 0);
182	EVUTIL_ASSERT(evpd->ed_port > 0);
183	EVUTIL_ASSERT(evpd->ed_fds > 0);
184}
185
186/*
187 * Verifies very basic integrity of a given port_event.
188 */
189static void
190check_event(port_event_t* pevt)
191{
192	/*
193	 * We've only registered for PORT_SOURCE_FD events. The only
194	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
195	 * but since we're not using port_alert either, we can assume
196	 * PORT_SOURCE_FD.
197	 */
198	EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD);
199	EVUTIL_ASSERT(pevt->portev_user == NULL);
200}
201
202#else
203#define check_evportop(epop)
204#define check_event(pevt)
205#endif /* CHECK_INVARIANTS */
206
207/*
208 * Doubles the size of the allocated file descriptor array.
209 */
210static int
211grow(struct evport_data *epdp, int factor)
212{
213	struct fd_info *tmp;
214	int oldsize = epdp->ed_nevents;
215	int newsize = factor * oldsize;
216	EVUTIL_ASSERT(factor > 1);
217
218	check_evportop(epdp);
219
220	tmp = mm_realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
221	if (NULL == tmp)
222		return -1;
223	epdp->ed_fds = tmp;
224	memset((char*) (epdp->ed_fds + oldsize), 0,
225	    (newsize - oldsize)*sizeof(struct fd_info));
226	epdp->ed_nevents = newsize;
227
228	check_evportop(epdp);
229
230	return 0;
231}
232
233
234/*
235 * (Re)associates the given file descriptor with the event port. The OS events
236 * are specified (implicitly) from the fd_info struct.
237 */
238static int
239reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
240{
241	int sysevents = FDI_TO_SYSEVENTS(fdip);
242
243	if (sysevents != 0) {
244		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
245				   fd, sysevents, NULL) == -1) {
246			event_warn("port_associate");
247			return (-1);
248		}
249	}
250
251	check_evportop(epdp);
252
253	return (0);
254}
255
256/*
257 * Main event loop - polls port_getn for some number of events, and processes
258 * them.
259 */
260
261static int
262evport_dispatch(struct event_base *base, struct timeval *tv)
263{
264	int i, res;
265	struct evport_data *epdp = base->evbase;
266	port_event_t pevtlist[EVENTS_PER_GETN];
267
268	/*
269	 * port_getn will block until it has at least nevents events. It will
270	 * also return how many it's given us (which may be more than we asked
271	 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
272	 * nevents.
273	 */
274	int nevents = 1;
275
276	/*
277	 * We have to convert a struct timeval to a struct timespec
278	 * (only difference is nanoseconds vs. microseconds). If no time-based
279	 * events are active, we should wait for I/O (and tv == NULL).
280	 */
281	struct timespec ts;
282	struct timespec *ts_p = NULL;
283	if (tv != NULL) {
284		ts.tv_sec = tv->tv_sec;
285		ts.tv_nsec = tv->tv_usec * 1000;
286		ts_p = &ts;
287	}
288
289	/*
290	 * Before doing anything else, we need to reassociate the events we hit
291	 * last time which need reassociation. See comment at the end of the
292	 * loop below.
293	 */
294	for (i = 0; i < EVENTS_PER_GETN; ++i) {
295		struct fd_info *fdi = NULL;
296		if (epdp->ed_pending[i] != -1) {
297			fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
298		}
299
300		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
301			int fd = epdp->ed_pending[i];
302			reassociate(epdp, fdi, fd);
303			epdp->ed_pending[i] = -1;
304		}
305	}
306
307	EVBASE_RELEASE_LOCK(base, th_base_lock);
308
309	res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN,
310	    (unsigned int *) &nevents, ts_p);
311
312	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
313
314	if (res == -1) {
315		if (errno == EINTR || errno == EAGAIN) {
316			return (0);
317		} else if (errno == ETIME) {
318			if (nevents == 0)
319				return (0);
320		} else {
321			event_warn("port_getn");
322			return (-1);
323		}
324	}
325
326	event_debug(("%s: port_getn reports %d events", __func__, nevents));
327
328	for (i = 0; i < nevents; ++i) {
329		struct fd_info *fdi;
330		port_event_t *pevt = &pevtlist[i];
331		int fd = (int) pevt->portev_object;
332
333		check_evportop(epdp);
334		check_event(pevt);
335		epdp->ed_pending[i] = fd;
336
337		/*
338		 * Figure out what kind of event it was
339		 * (because we have to pass this to the callback)
340		 */
341		res = 0;
342		if (pevt->portev_events & (POLLERR|POLLHUP)) {
343			res = EV_READ | EV_WRITE;
344		} else {
345			if (pevt->portev_events & POLLIN)
346				res |= EV_READ;
347			if (pevt->portev_events & POLLOUT)
348				res |= EV_WRITE;
349		}
350
351		/*
352		 * Check for the error situations or a hangup situation
353		 */
354		if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL))
355			res |= EV_READ|EV_WRITE;
356
357		EVUTIL_ASSERT(epdp->ed_nevents > fd);
358		fdi = &(epdp->ed_fds[fd]);
359
360		evmap_io_active(base, fd, res);
361	} /* end of all events gotten */
362
363	check_evportop(epdp);
364
365	return (0);
366}
367
368
369/*
370 * Adds the given event (so that you will be notified when it happens via
371 * the callback function).
372 */
373
374static int
375evport_add(struct event_base *base, int fd, short old, short events, void *p)
376{
377	struct evport_data *evpd = base->evbase;
378	struct fd_info *fdi;
379	int factor;
380	(void)p;
381
382	check_evportop(evpd);
383
384	/*
385	 * If necessary, grow the file descriptor info table
386	 */
387
388	factor = 1;
389	while (fd >= factor * evpd->ed_nevents)
390		factor *= 2;
391
392	if (factor > 1) {
393		if (-1 == grow(evpd, factor)) {
394			return (-1);
395		}
396	}
397
398	fdi = &evpd->ed_fds[fd];
399	fdi->fdi_what |= events;
400
401	return reassociate(evpd, fdi, fd);
402}
403
404/*
405 * Removes the given event from the list of events to wait for.
406 */
407
408static int
409evport_del(struct event_base *base, int fd, short old, short events, void *p)
410{
411	struct evport_data *evpd = base->evbase;
412	struct fd_info *fdi;
413	int i;
414	int associated = 1;
415	(void)p;
416
417	check_evportop(evpd);
418
419	if (evpd->ed_nevents < fd) {
420		return (-1);
421	}
422
423	for (i = 0; i < EVENTS_PER_GETN; ++i) {
424		if (evpd->ed_pending[i] == fd) {
425			associated = 0;
426			break;
427		}
428	}
429
430	fdi = &evpd->ed_fds[fd];
431	if (events & EV_READ)
432		fdi->fdi_what &= ~EV_READ;
433	if (events & EV_WRITE)
434		fdi->fdi_what &= ~EV_WRITE;
435
436	if (associated) {
437		if (!FDI_HAS_EVENTS(fdi) &&
438		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) {
439			/*
440			 * Ignore EBADFD error the fd could have been closed
441			 * before event_del() was called.
442			 */
443			if (errno != EBADFD) {
444				event_warn("port_dissociate");
445				return (-1);
446			}
447		} else {
448			if (FDI_HAS_EVENTS(fdi)) {
449				return (reassociate(evpd, fdi, fd));
450			}
451		}
452	} else {
453		if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) {
454			evpd->ed_pending[i] = -1;
455		}
456	}
457	return 0;
458}
459
460
461static void
462evport_dealloc(struct event_base *base)
463{
464	struct evport_data *evpd = base->evbase;
465
466	evsig_dealloc(base);
467
468	close(evpd->ed_port);
469
470	if (evpd->ed_fds)
471		mm_free(evpd->ed_fds);
472	mm_free(evpd);
473}
474