epoll.c revision 289166
1178825Sdfr/* Licensed to the Apache Software Foundation (ASF) under one or more 2233294Sstas * contributor license agreements. See the NOTICE file distributed with 3233294Sstas * this work for additional information regarding copyright ownership. 4233294Sstas * The ASF licenses this file to You under the Apache License, Version 2.0 5178825Sdfr * (the "License"); you may not use this file except in compliance with 6233294Sstas * the License. You may obtain a copy of the License at 7233294Sstas * 8233294Sstas * http://www.apache.org/licenses/LICENSE-2.0 9178825Sdfr * 10233294Sstas * Unless required by applicable law or agreed to in writing, software 11233294Sstas * distributed under the License is distributed on an "AS IS" BASIS, 12178825Sdfr * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13233294Sstas * See the License for the specific language governing permissions and 14233294Sstas * limitations under the License. 15233294Sstas */ 16178825Sdfr 17233294Sstas#include "apr.h" 18233294Sstas#include "apr_poll.h" 19233294Sstas#include "apr_time.h" 20178825Sdfr#include "apr_portable.h" 21233294Sstas#include "apr_arch_file_io.h" 22233294Sstas#include "apr_arch_networkio.h" 23233294Sstas#include "apr_arch_poll_private.h" 24233294Sstas#include "apr_arch_inherit.h" 25233294Sstas 26233294Sstas#if defined(HAVE_EPOLL) 27233294Sstas 28233294Sstasstatic apr_int16_t get_epoll_event(apr_int16_t event) 29233294Sstas{ 30233294Sstas apr_int16_t rv = 0; 31233294Sstas 32178825Sdfr if (event & APR_POLLIN) 33178825Sdfr rv |= EPOLLIN; 34178825Sdfr if (event & APR_POLLPRI) 35178825Sdfr rv |= EPOLLPRI; 36178825Sdfr if (event & APR_POLLOUT) 37178825Sdfr rv |= EPOLLOUT; 38178825Sdfr /* APR_POLLNVAL is not handled by epoll. EPOLLERR and EPOLLHUP are return-only */ 39178825Sdfr 40233294Sstas return rv; 41178825Sdfr} 42178825Sdfr 43178825Sdfrstatic apr_int16_t get_epoll_revent(apr_int16_t event) 44178825Sdfr{ 45233294Sstas apr_int16_t rv = 0; 46178825Sdfr 47178825Sdfr if (event & EPOLLIN) 48178825Sdfr rv |= APR_POLLIN; 49178825Sdfr if (event & EPOLLPRI) 50233294Sstas rv |= APR_POLLPRI; 51178825Sdfr if (event & EPOLLOUT) 52178825Sdfr rv |= APR_POLLOUT; 53178825Sdfr if (event & EPOLLERR) 54233294Sstas rv |= APR_POLLERR; 55233294Sstas if (event & EPOLLHUP) 56233294Sstas rv |= APR_POLLHUP; 57178825Sdfr /* APR_POLLNVAL is not handled by epoll. */ 58178825Sdfr 59178825Sdfr return rv; 60178825Sdfr} 61178825Sdfr 62178825Sdfrstruct apr_pollset_private_t 63178825Sdfr{ 64178825Sdfr int epoll_fd; 65178825Sdfr struct epoll_event *pollset; 66233294Sstas apr_pollfd_t *result_set; 67178825Sdfr#if APR_HAS_THREADS 68178825Sdfr /* A thread mutex to protect operations on the rings */ 69178825Sdfr apr_thread_mutex_t *ring_lock; 70178825Sdfr#endif 71178825Sdfr /* A ring containing all of the pollfd_t that are active */ 72233294Sstas APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring; 73178825Sdfr /* A ring of pollfd_t that have been used, and then _remove()'d */ 74178825Sdfr APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring; 75178825Sdfr /* A ring of pollfd_t where rings that have been _remove()`ed but 76178825Sdfr might still be inside a _poll() */ 77178825Sdfr APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring; 78178825Sdfr}; 79178825Sdfr 80178825Sdfrstatic apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset) 81178825Sdfr{ 82178825Sdfr close(pollset->p->epoll_fd); 83178825Sdfr return APR_SUCCESS; 84233294Sstas} 85233294Sstas 86178825Sdfr 87178825Sdfrstatic apr_status_t impl_pollset_create(apr_pollset_t *pollset, 88178825Sdfr apr_uint32_t size, 89178825Sdfr apr_pool_t *p, 90178825Sdfr apr_uint32_t flags) 91178825Sdfr{ 92178825Sdfr apr_status_t rv; 93178825Sdfr int fd; 94178825Sdfr 95178825Sdfr#ifdef HAVE_EPOLL_CREATE1 96178825Sdfr fd = epoll_create1(EPOLL_CLOEXEC); 97233294Sstas#else 98178825Sdfr fd = epoll_create(size); 99178825Sdfr#endif 100178825Sdfr if (fd < 0) { 101178825Sdfr pollset->p = NULL; 102178825Sdfr return apr_get_netos_error(); 103233294Sstas } 104178825Sdfr 105178825Sdfr#ifndef HAVE_EPOLL_CREATE1 106178825Sdfr { 107178825Sdfr int fd_flags; 108178825Sdfr 109178825Sdfr if ((fd_flags = fcntl(fd, F_GETFD)) == -1) { 110178825Sdfr rv = errno; 111178825Sdfr close(fd); 112178825Sdfr pollset->p = NULL; 113178825Sdfr return rv; 114178825Sdfr } 115233294Sstas 116178825Sdfr fd_flags |= FD_CLOEXEC; 117178825Sdfr if (fcntl(fd, F_SETFD, fd_flags) == -1) { 118178825Sdfr rv = errno; 119178825Sdfr close(fd); 120178825Sdfr pollset->p = NULL; 121178825Sdfr return rv; 122233294Sstas } 123233294Sstas } 124178825Sdfr#endif 125178825Sdfr 126178825Sdfr pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t)); 127178825Sdfr#if APR_HAS_THREADS 128178825Sdfr if ((flags & APR_POLLSET_THREADSAFE) && 129178825Sdfr !(flags & APR_POLLSET_NOCOPY) && 130178825Sdfr ((rv = apr_thread_mutex_create(&pollset->p->ring_lock, 131178825Sdfr APR_THREAD_MUTEX_DEFAULT, 132178825Sdfr p)) != APR_SUCCESS)) { 133178825Sdfr close(fd); 134178825Sdfr pollset->p = NULL; 135178825Sdfr return rv; 136178825Sdfr } 137178825Sdfr#else 138233294Sstas if (flags & APR_POLLSET_THREADSAFE) { 139233294Sstas close(fd); 140178825Sdfr pollset->p = NULL; 141178825Sdfr return APR_ENOTIMPL; 142178825Sdfr } 143178825Sdfr#endif 144178825Sdfr pollset->p->epoll_fd = fd; 145178825Sdfr pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event)); 146178825Sdfr pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t)); 147178825Sdfr 148178825Sdfr if (!(flags & APR_POLLSET_NOCOPY)) { 149178825Sdfr APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link); 150178825Sdfr APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link); 151178825Sdfr APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link); 152178825Sdfr } 153178825Sdfr return APR_SUCCESS; 154178825Sdfr} 155233294Sstas 156233294Sstasstatic apr_status_t impl_pollset_add(apr_pollset_t *pollset, 157178825Sdfr const apr_pollfd_t *descriptor) 158178825Sdfr{ 159178825Sdfr struct epoll_event ev = {0}; 160233294Sstas int ret = -1; 161178825Sdfr pfd_elem_t *elem = NULL; 162178825Sdfr apr_status_t rv = APR_SUCCESS; 163233294Sstas 164178825Sdfr ev.events = get_epoll_event(descriptor->reqevents); 165178825Sdfr 166178825Sdfr if (pollset->flags & APR_POLLSET_NOCOPY) { 167178825Sdfr ev.data.ptr = (void *)descriptor; 168178825Sdfr } 169178825Sdfr else { 170178825Sdfr pollset_lock_rings(); 171178825Sdfr 172178825Sdfr if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) { 173233294Sstas elem = APR_RING_FIRST(&(pollset->p->free_ring)); 174178825Sdfr APR_RING_REMOVE(elem, link); 175178825Sdfr } 176178825Sdfr else { 177178825Sdfr elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t)); 178178825Sdfr APR_RING_ELEM_INIT(elem, link); 179233294Sstas } 180233294Sstas elem->pfd = *descriptor; 181178825Sdfr ev.data.ptr = elem; 182178825Sdfr } 183233294Sstas if (descriptor->desc_type == APR_POLL_SOCKET) { 184178825Sdfr ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 185178825Sdfr descriptor->desc.s->socketdes, &ev); 186178825Sdfr } 187178825Sdfr else { 188178825Sdfr ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 189233294Sstas descriptor->desc.f->filedes, &ev); 190178825Sdfr } 191178825Sdfr 192178825Sdfr if (0 != ret) { 193178825Sdfr rv = apr_get_netos_error(); 194178825Sdfr } 195178825Sdfr 196178825Sdfr if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 197178825Sdfr if (rv != APR_SUCCESS) { 198178825Sdfr APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link); 199178825Sdfr } 200178825Sdfr else { 201178825Sdfr APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link); 202178825Sdfr } 203178825Sdfr pollset_unlock_rings(); 204178825Sdfr } 205178825Sdfr 206178825Sdfr return rv; 207178825Sdfr} 208178825Sdfr 209178825Sdfrstatic apr_status_t impl_pollset_remove(apr_pollset_t *pollset, 210178825Sdfr const apr_pollfd_t *descriptor) 211178825Sdfr{ 212178825Sdfr pfd_elem_t *ep; 213178825Sdfr apr_status_t rv = APR_SUCCESS; 214178825Sdfr struct epoll_event ev = {0}; /* ignored, but must be passed with 215178825Sdfr * kernel < 2.6.9 216178825Sdfr */ 217178825Sdfr int ret = -1; 218178825Sdfr 219178825Sdfr if (descriptor->desc_type == APR_POLL_SOCKET) { 220178825Sdfr ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 221178825Sdfr descriptor->desc.s->socketdes, &ev); 222178825Sdfr } 223178825Sdfr else { 224178825Sdfr ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 225178825Sdfr descriptor->desc.f->filedes, &ev); 226178825Sdfr } 227178825Sdfr if (ret < 0) { 228178825Sdfr rv = APR_NOTFOUND; 229178825Sdfr } 230178825Sdfr 231178825Sdfr if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 232178825Sdfr pollset_lock_rings(); 233178825Sdfr 234178825Sdfr for (ep = APR_RING_FIRST(&(pollset->p->query_ring)); 235178825Sdfr ep != APR_RING_SENTINEL(&(pollset->p->query_ring), 236233294Sstas pfd_elem_t, link); 237233294Sstas ep = APR_RING_NEXT(ep, link)) { 238233294Sstas 239233294Sstas if (descriptor->desc.s == ep->pfd.desc.s) { 240233294Sstas APR_RING_REMOVE(ep, link); 241233294Sstas APR_RING_INSERT_TAIL(&(pollset->p->dead_ring), 242233294Sstas ep, pfd_elem_t, link); 243233294Sstas break; 244233294Sstas } 245233294Sstas } 246233294Sstas 247233294Sstas pollset_unlock_rings(); 248233294Sstas } 249233294Sstas 250178825Sdfr return rv; 251178825Sdfr} 252178825Sdfr 253178825Sdfrstatic apr_status_t impl_pollset_poll(apr_pollset_t *pollset, 254178825Sdfr apr_interval_time_t timeout, 255178825Sdfr apr_int32_t *num, 256178825Sdfr const apr_pollfd_t **descriptors) 257178825Sdfr{ 258178825Sdfr int ret, i, j; 259178825Sdfr apr_status_t rv = APR_SUCCESS; 260178825Sdfr apr_pollfd_t *fdptr; 261178825Sdfr 262178825Sdfr if (timeout > 0) { 263178825Sdfr timeout /= 1000; 264233294Sstas } 265178825Sdfr 266178825Sdfr ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc, 267178825Sdfr timeout); 268178825Sdfr (*num) = ret; 269178825Sdfr 270178825Sdfr if (ret < 0) { 271178825Sdfr rv = apr_get_netos_error(); 272178825Sdfr } 273178825Sdfr else if (ret == 0) { 274178825Sdfr rv = APR_TIMEUP; 275178825Sdfr } 276178825Sdfr else { 277178825Sdfr for (i = 0, j = 0; i < ret; i++) { 278178825Sdfr if (pollset->flags & APR_POLLSET_NOCOPY) { 279178825Sdfr fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr); 280233294Sstas } 281178825Sdfr else { 282178825Sdfr fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd); 283178825Sdfr } 284178825Sdfr /* Check if the polled descriptor is our 285178825Sdfr * wakeup pipe. In that case do not put it result set. 286178825Sdfr */ 287178825Sdfr if ((pollset->flags & APR_POLLSET_WAKEABLE) && 288178825Sdfr fdptr->desc_type == APR_POLL_FILE && 289233294Sstas fdptr->desc.f == pollset->wakeup_pipe[0]) { 290178825Sdfr apr_pollset_drain_wakeup_pipe(pollset); 291178825Sdfr rv = APR_EINTR; 292178825Sdfr } 293178825Sdfr else { 294178825Sdfr pollset->p->result_set[j] = *fdptr; 295233294Sstas pollset->p->result_set[j].rtnevents = 296178825Sdfr get_epoll_revent(pollset->p->pollset[i].events); 297178825Sdfr j++; 298178825Sdfr } 299233294Sstas } 300233294Sstas if (((*num) = j)) { /* any event besides wakeup pipe? */ 301233294Sstas rv = APR_SUCCESS; 302178825Sdfr 303178825Sdfr if (descriptors) { 304178825Sdfr *descriptors = pollset->p->result_set; 305178825Sdfr } 306178825Sdfr } 307178825Sdfr } 308178825Sdfr 309178825Sdfr if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 310178825Sdfr pollset_lock_rings(); 311178825Sdfr 312178825Sdfr /* Shift all PFDs in the Dead Ring to the Free Ring */ 313233294Sstas APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link); 314178825Sdfr 315178825Sdfr pollset_unlock_rings(); 316178825Sdfr } 317233294Sstas 318233294Sstas return rv; 319233294Sstas} 320178825Sdfr 321178825Sdfrstatic apr_pollset_provider_t impl = { 322178825Sdfr impl_pollset_create, 323233294Sstas impl_pollset_add, 324178825Sdfr impl_pollset_remove, 325178825Sdfr impl_pollset_poll, 326178825Sdfr impl_pollset_cleanup, 327233294Sstas "epoll" 328178825Sdfr}; 329178825Sdfr 330178825Sdfrapr_pollset_provider_t *apr_pollset_provider_epoll = &impl; 331178825Sdfr 332178825Sdfrstatic apr_status_t cb_cleanup(void *p_) 333178825Sdfr{ 334178825Sdfr apr_pollcb_t *pollcb = (apr_pollcb_t *) p_; 335233294Sstas close(pollcb->fd); 336233294Sstas return APR_SUCCESS; 337178825Sdfr} 338233294Sstas 339233294Sstasstatic apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb, 340178825Sdfr apr_uint32_t size, 341178825Sdfr apr_pool_t *p, 342178825Sdfr apr_uint32_t flags) 343178825Sdfr{ 344178825Sdfr int fd; 345233294Sstas 346178825Sdfr#ifdef HAVE_EPOLL_CREATE1 347178825Sdfr fd = epoll_create1(EPOLL_CLOEXEC); 348178825Sdfr#else 349178825Sdfr fd = epoll_create(size); 350178825Sdfr#endif 351178825Sdfr 352178825Sdfr if (fd < 0) { 353178825Sdfr return apr_get_netos_error(); 354178825Sdfr } 355178825Sdfr 356178825Sdfr#ifndef HAVE_EPOLL_CREATE1 357178825Sdfr { 358178825Sdfr int fd_flags; 359233294Sstas apr_status_t rv; 360233294Sstas 361233294Sstas if ((fd_flags = fcntl(fd, F_GETFD)) == -1) { 362178825Sdfr rv = errno; 363178825Sdfr close(fd); 364178825Sdfr pollcb->fd = -1; 365178825Sdfr return rv; 366233294Sstas } 367178825Sdfr 368178825Sdfr fd_flags |= FD_CLOEXEC; 369178825Sdfr if (fcntl(fd, F_SETFD, fd_flags) == -1) { 370178825Sdfr rv = errno; 371233294Sstas close(fd); 372178825Sdfr pollcb->fd = -1; 373178825Sdfr return rv; 374178825Sdfr } 375233294Sstas } 376178825Sdfr#endif 377178825Sdfr 378178825Sdfr pollcb->fd = fd; 379178825Sdfr pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event)); 380178825Sdfr apr_pool_cleanup_register(p, pollcb, cb_cleanup, apr_pool_cleanup_null); 381178825Sdfr 382178825Sdfr return APR_SUCCESS; 383233294Sstas} 384178825Sdfr 385178825Sdfrstatic apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb, 386233294Sstas apr_pollfd_t *descriptor) 387178825Sdfr{ 388178825Sdfr struct epoll_event ev; 389178825Sdfr int ret; 390178825Sdfr 391178825Sdfr ev.events = get_epoll_event(descriptor->reqevents); 392178825Sdfr ev.data.ptr = (void *)descriptor; 393178825Sdfr 394178825Sdfr if (descriptor->desc_type == APR_POLL_SOCKET) { 395178825Sdfr ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 396178825Sdfr descriptor->desc.s->socketdes, &ev); 397178825Sdfr } 398178825Sdfr else { 399178825Sdfr ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 400178825Sdfr descriptor->desc.f->filedes, &ev); 401178825Sdfr } 402233294Sstas 403178825Sdfr if (ret == -1) { 404178825Sdfr return apr_get_netos_error(); 405178825Sdfr } 406178825Sdfr 407233294Sstas return APR_SUCCESS; 408178825Sdfr} 409178825Sdfr 410178825Sdfrstatic apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb, 411178825Sdfr apr_pollfd_t *descriptor) 412233294Sstas{ 413178825Sdfr apr_status_t rv = APR_SUCCESS; 414178825Sdfr struct epoll_event ev = {0}; /* ignored, but must be passed with 415178825Sdfr * kernel < 2.6.9 416178825Sdfr */ 417178825Sdfr int ret = -1; 418178825Sdfr 419178825Sdfr if (descriptor->desc_type == APR_POLL_SOCKET) { 420178825Sdfr ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 421178825Sdfr descriptor->desc.s->socketdes, &ev); 422178825Sdfr } 423178825Sdfr else { 424178825Sdfr ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 425178825Sdfr descriptor->desc.f->filedes, &ev); 426178825Sdfr } 427178825Sdfr 428178825Sdfr if (ret < 0) { 429178825Sdfr rv = APR_NOTFOUND; 430178825Sdfr } 431178825Sdfr 432178825Sdfr return rv; 433178825Sdfr} 434178825Sdfr 435 436static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb, 437 apr_interval_time_t timeout, 438 apr_pollcb_cb_t func, 439 void *baton) 440{ 441 int ret, i; 442 apr_status_t rv = APR_SUCCESS; 443 444 if (timeout > 0) { 445 timeout /= 1000; 446 } 447 448 ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc, 449 timeout); 450 if (ret < 0) { 451 rv = apr_get_netos_error(); 452 } 453 else if (ret == 0) { 454 rv = APR_TIMEUP; 455 } 456 else { 457 for (i = 0; i < ret; i++) { 458 apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr); 459 pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events); 460 461 rv = func(baton, pollfd); 462 if (rv) { 463 return rv; 464 } 465 } 466 } 467 468 return rv; 469} 470 471static apr_pollcb_provider_t impl_cb = { 472 impl_pollcb_create, 473 impl_pollcb_add, 474 impl_pollcb_remove, 475 impl_pollcb_poll, 476 "epoll" 477}; 478 479apr_pollcb_provider_t *apr_pollcb_provider_epoll = &impl_cb; 480 481#endif /* HAVE_EPOLL */ 482