1/*
2   Unix SMB/CIFS implementation.
3   main select loop and event handling
4   Copyright (C) Andrew Tridgell	2003-2005
5   Copyright (C) Stefan Metzmacher	2005-2009
6
7     ** NOTE! The following LGPL license applies to the tevent
8     ** library. This does NOT imply that all of Samba is released
9     ** under the LGPL
10
11   This library is free software; you can redistribute it and/or
12   modify it under the terms of the GNU Lesser General Public
13   License as published by the Free Software Foundation; either
14   version 3 of the License, or (at your option) any later version.
15
16   This library is distributed in the hope that it will be useful,
17   but WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public
22   License along with this library; if not, see <http://www.gnu.org/licenses/>.
23*/
24
25/*
26  This is SAMBA's default event loop code
27
28  - we try to use epoll if configure detected support for it
29    otherwise we use select()
30  - if epoll is broken on the system or the kernel doesn't support it
31    at runtime we fallback to select()
32*/
33
34#include "replace.h"
35#include "system/filesys.h"
36#include "system/select.h"
37#include "tevent.h"
38#include "tevent_util.h"
39#include "tevent_internal.h"
40
41struct std_event_context {
42	/* a pointer back to the generic event_context */
43	struct tevent_context *ev;
44
45	/* the maximum file descriptor number in fd_events */
46	int maxfd;
47
48	/* information for exiting from the event loop */
49	int exit_code;
50
51	/* when using epoll this is the handle from epoll_create */
52	int epoll_fd;
53
54	/* our pid at the time the epoll_fd was created */
55	pid_t pid;
56};
57
58/* use epoll if it is available */
59#if HAVE_EPOLL
60/*
61  called when a epoll call fails, and we should fallback
62  to using select
63*/
64static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason)
65{
66	tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
67		     "%s (%s) - falling back to select()\n",
68		     reason, strerror(errno));
69	close(std_ev->epoll_fd);
70	std_ev->epoll_fd = -1;
71	talloc_set_destructor(std_ev, NULL);
72}
73
74/*
75  map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
76*/
77static uint32_t epoll_map_flags(uint16_t flags)
78{
79	uint32_t ret = 0;
80	if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
81	if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
82	return ret;
83}
84
85/*
86 free the epoll fd
87*/
88static int epoll_ctx_destructor(struct std_event_context *std_ev)
89{
90	if (std_ev->epoll_fd != -1) {
91		close(std_ev->epoll_fd);
92	}
93	std_ev->epoll_fd = -1;
94	return 0;
95}
96
97/*
98 init the epoll fd
99*/
100static void epoll_init_ctx(struct std_event_context *std_ev)
101{
102	std_ev->epoll_fd = epoll_create(64);
103	std_ev->pid = getpid();
104	talloc_set_destructor(std_ev, epoll_ctx_destructor);
105}
106
107static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde);
108
109/*
110  reopen the epoll handle when our pid changes
111  see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
112  demonstration of why this is needed
113 */
114static void epoll_check_reopen(struct std_event_context *std_ev)
115{
116	struct tevent_fd *fde;
117
118	if (std_ev->pid == getpid()) {
119		return;
120	}
121
122	close(std_ev->epoll_fd);
123	std_ev->epoll_fd = epoll_create(64);
124	if (std_ev->epoll_fd == -1) {
125		tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
126			     "Failed to recreate epoll handle after fork\n");
127		return;
128	}
129	std_ev->pid = getpid();
130	for (fde=std_ev->ev->fd_events;fde;fde=fde->next) {
131		epoll_add_event(std_ev, fde);
132	}
133}
134
135#define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT	(1<<0)
136#define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR	(1<<1)
137#define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR	(1<<2)
138
139/*
140 add the epoll event to the given fd_event
141*/
142static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde)
143{
144	struct epoll_event event;
145	if (std_ev->epoll_fd == -1) return;
146
147	fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
148
149	/* if we don't want events yet, don't add an epoll_event */
150	if (fde->flags == 0) return;
151
152	ZERO_STRUCT(event);
153	event.events = epoll_map_flags(fde->flags);
154	event.data.ptr = fde;
155	if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
156		epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed");
157	}
158	fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
159
160	/* only if we want to read we want to tell the event handler about errors */
161	if (fde->flags & TEVENT_FD_READ) {
162		fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
163	}
164}
165
166/*
167 delete the epoll event for given fd_event
168*/
169static void epoll_del_event(struct std_event_context *std_ev, struct tevent_fd *fde)
170{
171	struct epoll_event event;
172	if (std_ev->epoll_fd == -1) return;
173
174	fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
175
176	/* if there's no epoll_event, we don't need to delete it */
177	if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
178
179	ZERO_STRUCT(event);
180	event.events = epoll_map_flags(fde->flags);
181	event.data.ptr = fde;
182	epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
183	fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
184}
185
186/*
187 change the epoll event to the given fd_event
188*/
189static void epoll_mod_event(struct std_event_context *std_ev, struct tevent_fd *fde)
190{
191	struct epoll_event event;
192	if (std_ev->epoll_fd == -1) return;
193
194	fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
195
196	ZERO_STRUCT(event);
197	event.events = epoll_map_flags(fde->flags);
198	event.data.ptr = fde;
199	if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
200		epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed");
201	}
202
203	/* only if we want to read we want to tell the event handler about errors */
204	if (fde->flags & TEVENT_FD_READ) {
205		fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
206	}
207}
208
209static void epoll_change_event(struct std_event_context *std_ev, struct tevent_fd *fde)
210{
211	bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
212	bool want_read = (fde->flags & TEVENT_FD_READ);
213	bool want_write= (fde->flags & TEVENT_FD_WRITE);
214
215	if (std_ev->epoll_fd == -1) return;
216
217	fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
218
219	/* there's already an event */
220	if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
221		if (want_read || (want_write && !got_error)) {
222			epoll_mod_event(std_ev, fde);
223			return;
224		}
225		/*
226		 * if we want to match the select behavior, we need to remove the epoll_event
227		 * when the caller isn't interested in events.
228		 *
229		 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
230		 */
231		epoll_del_event(std_ev, fde);
232		return;
233	}
234
235	/* there's no epoll_event attached to the fde */
236	if (want_read || (want_write && !got_error)) {
237		epoll_add_event(std_ev, fde);
238		return;
239	}
240}
241
242/*
243  event loop handling using epoll
244*/
245static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp)
246{
247	int ret, i;
248#define MAXEVENTS 1
249	struct epoll_event events[MAXEVENTS];
250	int timeout = -1;
251
252	if (std_ev->epoll_fd == -1) return -1;
253
254	if (tvalp) {
255		/* it's better to trigger timed events a bit later than to early */
256		timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
257	}
258
259	if (std_ev->ev->signal_events &&
260	    tevent_common_check_signal(std_ev->ev)) {
261		return 0;
262	}
263
264	ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
265
266	if (ret == -1 && errno == EINTR && std_ev->ev->signal_events) {
267		if (tevent_common_check_signal(std_ev->ev)) {
268			return 0;
269		}
270	}
271
272	if (ret == -1 && errno != EINTR) {
273		epoll_fallback_to_select(std_ev, "epoll_wait() failed");
274		return -1;
275	}
276
277	if (ret == 0 && tvalp) {
278		/* we don't care about a possible delay here */
279		tevent_common_loop_timer_delay(std_ev->ev);
280		return 0;
281	}
282
283	for (i=0;i<ret;i++) {
284		struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
285						       struct tevent_fd);
286		uint16_t flags = 0;
287
288		if (fde == NULL) {
289			epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data");
290			return -1;
291		}
292		if (events[i].events & (EPOLLHUP|EPOLLERR)) {
293			fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
294			/*
295			 * if we only wait for TEVENT_FD_WRITE, we should not tell the
296			 * event handler about it, and remove the epoll_event,
297			 * as we only report errors when waiting for read events,
298			 * to match the select() behavior
299			 */
300			if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
301				epoll_del_event(std_ev, fde);
302				continue;
303			}
304			flags |= TEVENT_FD_READ;
305		}
306		if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
307		if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
308		if (flags) {
309			fde->handler(std_ev->ev, fde, flags, fde->private_data);
310			break;
311		}
312	}
313
314	return 0;
315}
316#else
317#define epoll_init_ctx(std_ev)
318#define epoll_add_event(std_ev,fde)
319#define epoll_del_event(std_ev,fde)
320#define epoll_change_event(std_ev,fde)
321#define epoll_event_loop(std_ev,tvalp) (-1)
322#define epoll_check_reopen(std_ev)
323#endif
324
325/*
326  create a std_event_context structure.
327*/
328static int std_event_context_init(struct tevent_context *ev)
329{
330	struct std_event_context *std_ev;
331
332	std_ev = talloc_zero(ev, struct std_event_context);
333	if (!std_ev) return -1;
334	std_ev->ev = ev;
335	std_ev->epoll_fd = -1;
336
337	epoll_init_ctx(std_ev);
338
339	ev->additional_data = std_ev;
340	return 0;
341}
342
343/*
344  recalculate the maxfd
345*/
346static void calc_maxfd(struct std_event_context *std_ev)
347{
348	struct tevent_fd *fde;
349
350	std_ev->maxfd = 0;
351	for (fde = std_ev->ev->fd_events; fde; fde = fde->next) {
352		if (fde->fd > std_ev->maxfd) {
353			std_ev->maxfd = fde->fd;
354		}
355	}
356}
357
358
359/* to mark the ev->maxfd invalid
360 * this means we need to recalculate it
361 */
362#define EVENT_INVALID_MAXFD (-1)
363
364/*
365  destroy an fd_event
366*/
367static int std_event_fd_destructor(struct tevent_fd *fde)
368{
369	struct tevent_context *ev = fde->event_ctx;
370	struct std_event_context *std_ev = NULL;
371
372	if (ev) {
373		std_ev = talloc_get_type(ev->additional_data,
374					 struct std_event_context);
375
376		epoll_check_reopen(std_ev);
377
378		if (std_ev->maxfd == fde->fd) {
379			std_ev->maxfd = EVENT_INVALID_MAXFD;
380		}
381
382		epoll_del_event(std_ev, fde);
383	}
384
385	return tevent_common_fd_destructor(fde);
386}
387
388/*
389  add a fd based event
390  return NULL on failure (memory allocation error)
391*/
392static struct tevent_fd *std_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
393					  int fd, uint16_t flags,
394					  tevent_fd_handler_t handler,
395					  void *private_data,
396					  const char *handler_name,
397					  const char *location)
398{
399	struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
400							   struct std_event_context);
401	struct tevent_fd *fde;
402
403	epoll_check_reopen(std_ev);
404
405	fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
406				   handler, private_data,
407				   handler_name, location);
408	if (!fde) return NULL;
409
410	if ((std_ev->maxfd != EVENT_INVALID_MAXFD)
411	    && (fde->fd > std_ev->maxfd)) {
412		std_ev->maxfd = fde->fd;
413	}
414	talloc_set_destructor(fde, std_event_fd_destructor);
415
416	epoll_add_event(std_ev, fde);
417
418	return fde;
419}
420
421/*
422  set the fd event flags
423*/
424static void std_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
425{
426	struct tevent_context *ev;
427	struct std_event_context *std_ev;
428
429	if (fde->flags == flags) return;
430
431	ev = fde->event_ctx;
432	std_ev = talloc_get_type(ev->additional_data, struct std_event_context);
433
434	fde->flags = flags;
435
436	epoll_check_reopen(std_ev);
437
438	epoll_change_event(std_ev, fde);
439}
440
441/*
442  event loop handling using select()
443*/
444static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp)
445{
446	fd_set r_fds, w_fds;
447	struct tevent_fd *fde;
448	int selrtn;
449
450	/* we maybe need to recalculate the maxfd */
451	if (std_ev->maxfd == EVENT_INVALID_MAXFD) {
452		calc_maxfd(std_ev);
453	}
454
455	FD_ZERO(&r_fds);
456	FD_ZERO(&w_fds);
457
458	/* setup any fd events */
459	for (fde = std_ev->ev->fd_events; fde; fde = fde->next) {
460		if (fde->fd < 0 || fde->fd >= FD_SETSIZE) {
461			std_ev->exit_code = EBADF;
462			return -1;
463		}
464
465		if (fde->flags & TEVENT_FD_READ) {
466			FD_SET(fde->fd, &r_fds);
467		}
468		if (fde->flags & TEVENT_FD_WRITE) {
469			FD_SET(fde->fd, &w_fds);
470		}
471	}
472
473	if (std_ev->ev->signal_events &&
474	    tevent_common_check_signal(std_ev->ev)) {
475		return 0;
476	}
477
478	selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
479
480	if (selrtn == -1 && errno == EINTR &&
481	    std_ev->ev->signal_events) {
482		tevent_common_check_signal(std_ev->ev);
483		return 0;
484	}
485
486	if (selrtn == -1 && errno == EBADF) {
487		/* the socket is dead! this should never
488		   happen as the socket should have first been
489		   made readable and that should have removed
490		   the event, so this must be a bug. This is a
491		   fatal error. */
492		tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL,
493			     "ERROR: EBADF on std_event_loop_once\n");
494		std_ev->exit_code = EBADF;
495		return -1;
496	}
497
498	if (selrtn == 0 && tvalp) {
499		/* we don't care about a possible delay here */
500		tevent_common_loop_timer_delay(std_ev->ev);
501		return 0;
502	}
503
504	if (selrtn > 0) {
505		/* at least one file descriptor is ready - check
506		   which ones and call the handler, being careful to allow
507		   the handler to remove itself when called */
508		for (fde = std_ev->ev->fd_events; fde; fde = fde->next) {
509			uint16_t flags = 0;
510
511			if (FD_ISSET(fde->fd, &r_fds)) flags |= TEVENT_FD_READ;
512			if (FD_ISSET(fde->fd, &w_fds)) flags |= TEVENT_FD_WRITE;
513			if (flags) {
514				fde->handler(std_ev->ev, fde, flags, fde->private_data);
515				break;
516			}
517		}
518	}
519
520	return 0;
521}
522
523/*
524  do a single event loop using the events defined in ev
525*/
526static int std_event_loop_once(struct tevent_context *ev, const char *location)
527{
528	struct std_event_context *std_ev = talloc_get_type(ev->additional_data,
529		 					   struct std_event_context);
530	struct timeval tval;
531
532	if (ev->signal_events &&
533	    tevent_common_check_signal(ev)) {
534		return 0;
535	}
536
537	if (ev->immediate_events &&
538	    tevent_common_loop_immediate(ev)) {
539		return 0;
540	}
541
542	tval = tevent_common_loop_timer_delay(ev);
543	if (tevent_timeval_is_zero(&tval)) {
544		return 0;
545	}
546
547	epoll_check_reopen(std_ev);
548
549	if (epoll_event_loop(std_ev, &tval) == 0) {
550		return 0;
551	}
552
553	return std_event_loop_select(std_ev, &tval);
554}
555
556static const struct tevent_ops std_event_ops = {
557	.context_init		= std_event_context_init,
558	.add_fd			= std_event_add_fd,
559	.set_fd_close_fn	= tevent_common_fd_set_close_fn,
560	.get_fd_flags		= tevent_common_fd_get_flags,
561	.set_fd_flags		= std_event_set_fd_flags,
562	.add_timer		= tevent_common_add_timer,
563	.schedule_immediate	= tevent_common_schedule_immediate,
564	.add_signal		= tevent_common_add_signal,
565	.loop_once		= std_event_loop_once,
566	.loop_wait		= tevent_common_loop_wait,
567};
568
569
570bool tevent_standard_init(void)
571{
572	return tevent_register_backend("standard", &std_event_ops);
573}
574
575