1/* 2 Unix SMB/CIFS implementation. 3 main select loop and event handling 4 Copyright (C) Andrew Tridgell 2003-2005 5 Copyright (C) Stefan Metzmacher 2005-2009 6 7 ** NOTE! The following LGPL license applies to the tevent 8 ** library. This does NOT imply that all of Samba is released 9 ** under the LGPL 10 11 This library is free software; you can redistribute it and/or 12 modify it under the terms of the GNU Lesser General Public 13 License as published by the Free Software Foundation; either 14 version 3 of the License, or (at your option) any later version. 15 16 This library is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 Lesser General Public License for more details. 20 21 You should have received a copy of the GNU Lesser General Public 22 License along with this library; if not, see <http://www.gnu.org/licenses/>. 23*/ 24 25/* 26 This is SAMBA's default event loop code 27 28 - we try to use epoll if configure detected support for it 29 otherwise we use select() 30 - if epoll is broken on the system or the kernel doesn't support it 31 at runtime we fallback to select() 32*/ 33 34#include "replace.h" 35#include "system/filesys.h" 36#include "system/select.h" 37#include "tevent.h" 38#include "tevent_util.h" 39#include "tevent_internal.h" 40 41struct std_event_context { 42 /* a pointer back to the generic event_context */ 43 struct tevent_context *ev; 44 45 /* the maximum file descriptor number in fd_events */ 46 int maxfd; 47 48 /* information for exiting from the event loop */ 49 int exit_code; 50 51 /* when using epoll this is the handle from epoll_create */ 52 int epoll_fd; 53 54 /* our pid at the time the epoll_fd was created */ 55 pid_t pid; 56}; 57 58/* use epoll if it is available */ 59#if HAVE_EPOLL 60/* 61 called when a epoll call fails, and we should fallback 62 to using select 63*/ 64static void epoll_fallback_to_select(struct std_event_context *std_ev, const char *reason) 65{ 66 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL, 67 "%s (%s) - falling back to select()\n", 68 reason, strerror(errno)); 69 close(std_ev->epoll_fd); 70 std_ev->epoll_fd = -1; 71 talloc_set_destructor(std_ev, NULL); 72} 73 74/* 75 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT 76*/ 77static uint32_t epoll_map_flags(uint16_t flags) 78{ 79 uint32_t ret = 0; 80 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP); 81 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP); 82 return ret; 83} 84 85/* 86 free the epoll fd 87*/ 88static int epoll_ctx_destructor(struct std_event_context *std_ev) 89{ 90 if (std_ev->epoll_fd != -1) { 91 close(std_ev->epoll_fd); 92 } 93 std_ev->epoll_fd = -1; 94 return 0; 95} 96 97/* 98 init the epoll fd 99*/ 100static void epoll_init_ctx(struct std_event_context *std_ev) 101{ 102 std_ev->epoll_fd = epoll_create(64); 103 std_ev->pid = getpid(); 104 talloc_set_destructor(std_ev, epoll_ctx_destructor); 105} 106 107static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde); 108 109/* 110 reopen the epoll handle when our pid changes 111 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an 112 demonstration of why this is needed 113 */ 114static void epoll_check_reopen(struct std_event_context *std_ev) 115{ 116 struct tevent_fd *fde; 117 118 if (std_ev->pid == getpid()) { 119 return; 120 } 121 122 close(std_ev->epoll_fd); 123 std_ev->epoll_fd = epoll_create(64); 124 if (std_ev->epoll_fd == -1) { 125 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL, 126 "Failed to recreate epoll handle after fork\n"); 127 return; 128 } 129 std_ev->pid = getpid(); 130 for (fde=std_ev->ev->fd_events;fde;fde=fde->next) { 131 epoll_add_event(std_ev, fde); 132 } 133} 134 135#define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0) 136#define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1) 137#define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2) 138 139/* 140 add the epoll event to the given fd_event 141*/ 142static void epoll_add_event(struct std_event_context *std_ev, struct tevent_fd *fde) 143{ 144 struct epoll_event event; 145 if (std_ev->epoll_fd == -1) return; 146 147 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 148 149 /* if we don't want events yet, don't add an epoll_event */ 150 if (fde->flags == 0) return; 151 152 ZERO_STRUCT(event); 153 event.events = epoll_map_flags(fde->flags); 154 event.data.ptr = fde; 155 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) { 156 epoll_fallback_to_select(std_ev, "EPOLL_CTL_ADD failed"); 157 } 158 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT; 159 160 /* only if we want to read we want to tell the event handler about errors */ 161 if (fde->flags & TEVENT_FD_READ) { 162 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 163 } 164} 165 166/* 167 delete the epoll event for given fd_event 168*/ 169static void epoll_del_event(struct std_event_context *std_ev, struct tevent_fd *fde) 170{ 171 struct epoll_event event; 172 if (std_ev->epoll_fd == -1) return; 173 174 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 175 176 /* if there's no epoll_event, we don't need to delete it */ 177 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return; 178 179 ZERO_STRUCT(event); 180 event.events = epoll_map_flags(fde->flags); 181 event.data.ptr = fde; 182 epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event); 183 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT; 184} 185 186/* 187 change the epoll event to the given fd_event 188*/ 189static void epoll_mod_event(struct std_event_context *std_ev, struct tevent_fd *fde) 190{ 191 struct epoll_event event; 192 if (std_ev->epoll_fd == -1) return; 193 194 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 195 196 ZERO_STRUCT(event); 197 event.events = epoll_map_flags(fde->flags); 198 event.data.ptr = fde; 199 if (epoll_ctl(std_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) { 200 epoll_fallback_to_select(std_ev, "EPOLL_CTL_MOD failed"); 201 } 202 203 /* only if we want to read we want to tell the event handler about errors */ 204 if (fde->flags & TEVENT_FD_READ) { 205 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 206 } 207} 208 209static void epoll_change_event(struct std_event_context *std_ev, struct tevent_fd *fde) 210{ 211 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR); 212 bool want_read = (fde->flags & TEVENT_FD_READ); 213 bool want_write= (fde->flags & TEVENT_FD_WRITE); 214 215 if (std_ev->epoll_fd == -1) return; 216 217 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR; 218 219 /* there's already an event */ 220 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) { 221 if (want_read || (want_write && !got_error)) { 222 epoll_mod_event(std_ev, fde); 223 return; 224 } 225 /* 226 * if we want to match the select behavior, we need to remove the epoll_event 227 * when the caller isn't interested in events. 228 * 229 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them 230 */ 231 epoll_del_event(std_ev, fde); 232 return; 233 } 234 235 /* there's no epoll_event attached to the fde */ 236 if (want_read || (want_write && !got_error)) { 237 epoll_add_event(std_ev, fde); 238 return; 239 } 240} 241 242/* 243 event loop handling using epoll 244*/ 245static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tvalp) 246{ 247 int ret, i; 248#define MAXEVENTS 1 249 struct epoll_event events[MAXEVENTS]; 250 int timeout = -1; 251 252 if (std_ev->epoll_fd == -1) return -1; 253 254 if (tvalp) { 255 /* it's better to trigger timed events a bit later than to early */ 256 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000); 257 } 258 259 if (std_ev->ev->signal_events && 260 tevent_common_check_signal(std_ev->ev)) { 261 return 0; 262 } 263 264 ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout); 265 266 if (ret == -1 && errno == EINTR && std_ev->ev->signal_events) { 267 if (tevent_common_check_signal(std_ev->ev)) { 268 return 0; 269 } 270 } 271 272 if (ret == -1 && errno != EINTR) { 273 epoll_fallback_to_select(std_ev, "epoll_wait() failed"); 274 return -1; 275 } 276 277 if (ret == 0 && tvalp) { 278 /* we don't care about a possible delay here */ 279 tevent_common_loop_timer_delay(std_ev->ev); 280 return 0; 281 } 282 283 for (i=0;i<ret;i++) { 284 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr, 285 struct tevent_fd); 286 uint16_t flags = 0; 287 288 if (fde == NULL) { 289 epoll_fallback_to_select(std_ev, "epoll_wait() gave bad data"); 290 return -1; 291 } 292 if (events[i].events & (EPOLLHUP|EPOLLERR)) { 293 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR; 294 /* 295 * if we only wait for TEVENT_FD_WRITE, we should not tell the 296 * event handler about it, and remove the epoll_event, 297 * as we only report errors when waiting for read events, 298 * to match the select() behavior 299 */ 300 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) { 301 epoll_del_event(std_ev, fde); 302 continue; 303 } 304 flags |= TEVENT_FD_READ; 305 } 306 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ; 307 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE; 308 if (flags) { 309 fde->handler(std_ev->ev, fde, flags, fde->private_data); 310 break; 311 } 312 } 313 314 return 0; 315} 316#else 317#define epoll_init_ctx(std_ev) 318#define epoll_add_event(std_ev,fde) 319#define epoll_del_event(std_ev,fde) 320#define epoll_change_event(std_ev,fde) 321#define epoll_event_loop(std_ev,tvalp) (-1) 322#define epoll_check_reopen(std_ev) 323#endif 324 325/* 326 create a std_event_context structure. 327*/ 328static int std_event_context_init(struct tevent_context *ev) 329{ 330 struct std_event_context *std_ev; 331 332 std_ev = talloc_zero(ev, struct std_event_context); 333 if (!std_ev) return -1; 334 std_ev->ev = ev; 335 std_ev->epoll_fd = -1; 336 337 epoll_init_ctx(std_ev); 338 339 ev->additional_data = std_ev; 340 return 0; 341} 342 343/* 344 recalculate the maxfd 345*/ 346static void calc_maxfd(struct std_event_context *std_ev) 347{ 348 struct tevent_fd *fde; 349 350 std_ev->maxfd = 0; 351 for (fde = std_ev->ev->fd_events; fde; fde = fde->next) { 352 if (fde->fd > std_ev->maxfd) { 353 std_ev->maxfd = fde->fd; 354 } 355 } 356} 357 358 359/* to mark the ev->maxfd invalid 360 * this means we need to recalculate it 361 */ 362#define EVENT_INVALID_MAXFD (-1) 363 364/* 365 destroy an fd_event 366*/ 367static int std_event_fd_destructor(struct tevent_fd *fde) 368{ 369 struct tevent_context *ev = fde->event_ctx; 370 struct std_event_context *std_ev = NULL; 371 372 if (ev) { 373 std_ev = talloc_get_type(ev->additional_data, 374 struct std_event_context); 375 376 epoll_check_reopen(std_ev); 377 378 if (std_ev->maxfd == fde->fd) { 379 std_ev->maxfd = EVENT_INVALID_MAXFD; 380 } 381 382 epoll_del_event(std_ev, fde); 383 } 384 385 return tevent_common_fd_destructor(fde); 386} 387 388/* 389 add a fd based event 390 return NULL on failure (memory allocation error) 391*/ 392static struct tevent_fd *std_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx, 393 int fd, uint16_t flags, 394 tevent_fd_handler_t handler, 395 void *private_data, 396 const char *handler_name, 397 const char *location) 398{ 399 struct std_event_context *std_ev = talloc_get_type(ev->additional_data, 400 struct std_event_context); 401 struct tevent_fd *fde; 402 403 epoll_check_reopen(std_ev); 404 405 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags, 406 handler, private_data, 407 handler_name, location); 408 if (!fde) return NULL; 409 410 if ((std_ev->maxfd != EVENT_INVALID_MAXFD) 411 && (fde->fd > std_ev->maxfd)) { 412 std_ev->maxfd = fde->fd; 413 } 414 talloc_set_destructor(fde, std_event_fd_destructor); 415 416 epoll_add_event(std_ev, fde); 417 418 return fde; 419} 420 421/* 422 set the fd event flags 423*/ 424static void std_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags) 425{ 426 struct tevent_context *ev; 427 struct std_event_context *std_ev; 428 429 if (fde->flags == flags) return; 430 431 ev = fde->event_ctx; 432 std_ev = talloc_get_type(ev->additional_data, struct std_event_context); 433 434 fde->flags = flags; 435 436 epoll_check_reopen(std_ev); 437 438 epoll_change_event(std_ev, fde); 439} 440 441/* 442 event loop handling using select() 443*/ 444static int std_event_loop_select(struct std_event_context *std_ev, struct timeval *tvalp) 445{ 446 fd_set r_fds, w_fds; 447 struct tevent_fd *fde; 448 int selrtn; 449 450 /* we maybe need to recalculate the maxfd */ 451 if (std_ev->maxfd == EVENT_INVALID_MAXFD) { 452 calc_maxfd(std_ev); 453 } 454 455 FD_ZERO(&r_fds); 456 FD_ZERO(&w_fds); 457 458 /* setup any fd events */ 459 for (fde = std_ev->ev->fd_events; fde; fde = fde->next) { 460 if (fde->fd < 0 || fde->fd >= FD_SETSIZE) { 461 std_ev->exit_code = EBADF; 462 return -1; 463 } 464 465 if (fde->flags & TEVENT_FD_READ) { 466 FD_SET(fde->fd, &r_fds); 467 } 468 if (fde->flags & TEVENT_FD_WRITE) { 469 FD_SET(fde->fd, &w_fds); 470 } 471 } 472 473 if (std_ev->ev->signal_events && 474 tevent_common_check_signal(std_ev->ev)) { 475 return 0; 476 } 477 478 selrtn = select(std_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp); 479 480 if (selrtn == -1 && errno == EINTR && 481 std_ev->ev->signal_events) { 482 tevent_common_check_signal(std_ev->ev); 483 return 0; 484 } 485 486 if (selrtn == -1 && errno == EBADF) { 487 /* the socket is dead! this should never 488 happen as the socket should have first been 489 made readable and that should have removed 490 the event, so this must be a bug. This is a 491 fatal error. */ 492 tevent_debug(std_ev->ev, TEVENT_DEBUG_FATAL, 493 "ERROR: EBADF on std_event_loop_once\n"); 494 std_ev->exit_code = EBADF; 495 return -1; 496 } 497 498 if (selrtn == 0 && tvalp) { 499 /* we don't care about a possible delay here */ 500 tevent_common_loop_timer_delay(std_ev->ev); 501 return 0; 502 } 503 504 if (selrtn > 0) { 505 /* at least one file descriptor is ready - check 506 which ones and call the handler, being careful to allow 507 the handler to remove itself when called */ 508 for (fde = std_ev->ev->fd_events; fde; fde = fde->next) { 509 uint16_t flags = 0; 510 511 if (FD_ISSET(fde->fd, &r_fds)) flags |= TEVENT_FD_READ; 512 if (FD_ISSET(fde->fd, &w_fds)) flags |= TEVENT_FD_WRITE; 513 if (flags) { 514 fde->handler(std_ev->ev, fde, flags, fde->private_data); 515 break; 516 } 517 } 518 } 519 520 return 0; 521} 522 523/* 524 do a single event loop using the events defined in ev 525*/ 526static int std_event_loop_once(struct tevent_context *ev, const char *location) 527{ 528 struct std_event_context *std_ev = talloc_get_type(ev->additional_data, 529 struct std_event_context); 530 struct timeval tval; 531 532 if (ev->signal_events && 533 tevent_common_check_signal(ev)) { 534 return 0; 535 } 536 537 if (ev->immediate_events && 538 tevent_common_loop_immediate(ev)) { 539 return 0; 540 } 541 542 tval = tevent_common_loop_timer_delay(ev); 543 if (tevent_timeval_is_zero(&tval)) { 544 return 0; 545 } 546 547 epoll_check_reopen(std_ev); 548 549 if (epoll_event_loop(std_ev, &tval) == 0) { 550 return 0; 551 } 552 553 return std_event_loop_select(std_ev, &tval); 554} 555 556static const struct tevent_ops std_event_ops = { 557 .context_init = std_event_context_init, 558 .add_fd = std_event_add_fd, 559 .set_fd_close_fn = tevent_common_fd_set_close_fn, 560 .get_fd_flags = tevent_common_fd_get_flags, 561 .set_fd_flags = std_event_set_fd_flags, 562 .add_timer = tevent_common_add_timer, 563 .schedule_immediate = tevent_common_schedule_immediate, 564 .add_signal = tevent_common_add_signal, 565 .loop_once = std_event_loop_once, 566 .loop_wait = tevent_common_loop_wait, 567}; 568 569 570bool tevent_standard_init(void) 571{ 572 return tevent_register_backend("standard", &std_event_ops); 573} 574 575