epoll.c revision 362181
1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "apr.h" 18#include "apr_poll.h" 19#include "apr_time.h" 20#include "apr_portable.h" 21#include "apr_arch_file_io.h" 22#include "apr_arch_networkio.h" 23#include "apr_arch_poll_private.h" 24#include "apr_arch_inherit.h" 25 26#if defined(HAVE_EPOLL) 27 28static apr_int16_t get_epoll_event(apr_int16_t event) 29{ 30 apr_int16_t rv = 0; 31 32 if (event & APR_POLLIN) 33 rv |= EPOLLIN; 34 if (event & APR_POLLPRI) 35 rv |= EPOLLPRI; 36 if (event & APR_POLLOUT) 37 rv |= EPOLLOUT; 38 /* APR_POLLNVAL is not handled by epoll. EPOLLERR and EPOLLHUP are return-only */ 39 40 return rv; 41} 42 43static apr_int16_t get_epoll_revent(apr_int16_t event) 44{ 45 apr_int16_t rv = 0; 46 47 if (event & EPOLLIN) 48 rv |= APR_POLLIN; 49 if (event & EPOLLPRI) 50 rv |= APR_POLLPRI; 51 if (event & EPOLLOUT) 52 rv |= APR_POLLOUT; 53 if (event & EPOLLERR) 54 rv |= APR_POLLERR; 55 if (event & EPOLLHUP) 56 rv |= APR_POLLHUP; 57 /* APR_POLLNVAL is not handled by epoll. */ 58 59 return rv; 60} 61 62struct apr_pollset_private_t 63{ 64 int epoll_fd; 65 struct epoll_event *pollset; 66 apr_pollfd_t *result_set; 67#if APR_HAS_THREADS 68 /* A thread mutex to protect operations on the rings */ 69 apr_thread_mutex_t *ring_lock; 70#endif 71 /* A ring containing all of the pollfd_t that are active */ 72 APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring; 73 /* A ring of pollfd_t that have been used, and then _remove()'d */ 74 APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring; 75 /* A ring of pollfd_t where rings that have been _remove()`ed but 76 might still be inside a _poll() */ 77 APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring; 78}; 79 80static apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset) 81{ 82 close(pollset->p->epoll_fd); 83 return APR_SUCCESS; 84} 85 86 87static apr_status_t impl_pollset_create(apr_pollset_t *pollset, 88 apr_uint32_t size, 89 apr_pool_t *p, 90 apr_uint32_t flags) 91{ 92 apr_status_t rv; 93 int fd; 94 95#ifdef HAVE_EPOLL_CREATE1 96 fd = epoll_create1(EPOLL_CLOEXEC); 97#else 98 fd = epoll_create(size); 99#endif 100 if (fd < 0) { 101 pollset->p = NULL; 102 return apr_get_netos_error(); 103 } 104 105#ifndef HAVE_EPOLL_CREATE1 106 { 107 int fd_flags; 108 109 if ((fd_flags = fcntl(fd, F_GETFD)) == -1) { 110 rv = errno; 111 close(fd); 112 pollset->p = NULL; 113 return rv; 114 } 115 116 fd_flags |= FD_CLOEXEC; 117 if (fcntl(fd, F_SETFD, fd_flags) == -1) { 118 rv = errno; 119 close(fd); 120 pollset->p = NULL; 121 return rv; 122 } 123 } 124#endif 125 126 pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t)); 127#if APR_HAS_THREADS 128 if ((flags & APR_POLLSET_THREADSAFE) && 129 !(flags & APR_POLLSET_NOCOPY) && 130 ((rv = apr_thread_mutex_create(&pollset->p->ring_lock, 131 APR_THREAD_MUTEX_DEFAULT, 132 p)) != APR_SUCCESS)) { 133 close(fd); 134 pollset->p = NULL; 135 return rv; 136 } 137#else 138 if (flags & APR_POLLSET_THREADSAFE) { 139 close(fd); 140 pollset->p = NULL; 141 return APR_ENOTIMPL; 142 } 143#endif 144 pollset->p->epoll_fd = fd; 145 pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event)); 146 pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t)); 147 148 if (!(flags & APR_POLLSET_NOCOPY)) { 149 APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link); 150 APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link); 151 APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link); 152 } 153 return APR_SUCCESS; 154} 155 156static apr_status_t impl_pollset_add(apr_pollset_t *pollset, 157 const apr_pollfd_t *descriptor) 158{ 159 struct epoll_event ev = {0}; 160 int ret; 161 pfd_elem_t *elem = NULL; 162 apr_status_t rv = APR_SUCCESS; 163 164 ev.events = get_epoll_event(descriptor->reqevents); 165 166 if (pollset->flags & APR_POLLSET_NOCOPY) { 167 ev.data.ptr = (void *)descriptor; 168 } 169 else { 170 pollset_lock_rings(); 171 172 if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) { 173 elem = APR_RING_FIRST(&(pollset->p->free_ring)); 174 APR_RING_REMOVE(elem, link); 175 } 176 else { 177 elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t)); 178 APR_RING_ELEM_INIT(elem, link); 179 } 180 elem->pfd = *descriptor; 181 ev.data.ptr = elem; 182 } 183 if (descriptor->desc_type == APR_POLL_SOCKET) { 184 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 185 descriptor->desc.s->socketdes, &ev); 186 } 187 else { 188 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD, 189 descriptor->desc.f->filedes, &ev); 190 } 191 192 if (0 != ret) { 193 rv = apr_get_netos_error(); 194 } 195 196 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 197 if (rv != APR_SUCCESS) { 198 APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link); 199 } 200 else { 201 APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link); 202 } 203 pollset_unlock_rings(); 204 } 205 206 return rv; 207} 208 209static apr_status_t impl_pollset_remove(apr_pollset_t *pollset, 210 const apr_pollfd_t *descriptor) 211{ 212 pfd_elem_t *ep; 213 apr_status_t rv = APR_SUCCESS; 214 struct epoll_event ev = {0}; /* ignored, but must be passed with 215 * kernel < 2.6.9 216 */ 217 int ret; 218 219 if (descriptor->desc_type == APR_POLL_SOCKET) { 220 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 221 descriptor->desc.s->socketdes, &ev); 222 } 223 else { 224 ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL, 225 descriptor->desc.f->filedes, &ev); 226 } 227 if (ret < 0) { 228 rv = APR_NOTFOUND; 229 } 230 231 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 232 pollset_lock_rings(); 233 234 for (ep = APR_RING_FIRST(&(pollset->p->query_ring)); 235 ep != APR_RING_SENTINEL(&(pollset->p->query_ring), 236 pfd_elem_t, link); 237 ep = APR_RING_NEXT(ep, link)) { 238 239 if (descriptor->desc.s == ep->pfd.desc.s) { 240 APR_RING_REMOVE(ep, link); 241 APR_RING_INSERT_TAIL(&(pollset->p->dead_ring), 242 ep, pfd_elem_t, link); 243 break; 244 } 245 } 246 247 pollset_unlock_rings(); 248 } 249 250 return rv; 251} 252 253static apr_status_t impl_pollset_poll(apr_pollset_t *pollset, 254 apr_interval_time_t timeout, 255 apr_int32_t *num, 256 const apr_pollfd_t **descriptors) 257{ 258 int ret; 259 apr_status_t rv = APR_SUCCESS; 260 261 *num = 0; 262 263 if (timeout > 0) { 264 timeout /= 1000; 265 } 266 267 ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc, 268 timeout); 269 if (ret < 0) { 270 rv = apr_get_netos_error(); 271 } 272 else if (ret == 0) { 273 rv = APR_TIMEUP; 274 } 275 else { 276 int i, j; 277 const apr_pollfd_t *fdptr; 278 279 for (i = 0, j = 0; i < ret; i++) { 280 if (pollset->flags & APR_POLLSET_NOCOPY) { 281 fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr); 282 } 283 else { 284 fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd); 285 } 286 /* Check if the polled descriptor is our 287 * wakeup pipe. In that case do not put it result set. 288 */ 289 if ((pollset->flags & APR_POLLSET_WAKEABLE) && 290 fdptr->desc_type == APR_POLL_FILE && 291 fdptr->desc.f == pollset->wakeup_pipe[0]) { 292 apr_poll_drain_wakeup_pipe(pollset->wakeup_pipe); 293 rv = APR_EINTR; 294 } 295 else { 296 pollset->p->result_set[j] = *fdptr; 297 pollset->p->result_set[j].rtnevents = 298 get_epoll_revent(pollset->p->pollset[i].events); 299 j++; 300 } 301 } 302 if (((*num) = j)) { /* any event besides wakeup pipe? */ 303 rv = APR_SUCCESS; 304 305 if (descriptors) { 306 *descriptors = pollset->p->result_set; 307 } 308 } 309 } 310 311 if (!(pollset->flags & APR_POLLSET_NOCOPY)) { 312 pollset_lock_rings(); 313 314 /* Shift all PFDs in the Dead Ring to the Free Ring */ 315 APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link); 316 317 pollset_unlock_rings(); 318 } 319 320 return rv; 321} 322 323static const apr_pollset_provider_t impl = { 324 impl_pollset_create, 325 impl_pollset_add, 326 impl_pollset_remove, 327 impl_pollset_poll, 328 impl_pollset_cleanup, 329 "epoll" 330}; 331 332const apr_pollset_provider_t *const apr_pollset_provider_epoll = &impl; 333 334static apr_status_t impl_pollcb_cleanup(apr_pollcb_t *pollcb) 335{ 336 close(pollcb->fd); 337 return APR_SUCCESS; 338} 339 340static apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb, 341 apr_uint32_t size, 342 apr_pool_t *p, 343 apr_uint32_t flags) 344{ 345 int fd; 346 347#ifdef HAVE_EPOLL_CREATE1 348 fd = epoll_create1(EPOLL_CLOEXEC); 349#else 350 fd = epoll_create(size); 351#endif 352 353 if (fd < 0) { 354 return apr_get_netos_error(); 355 } 356 357#ifndef HAVE_EPOLL_CREATE1 358 { 359 int fd_flags; 360 apr_status_t rv; 361 362 if ((fd_flags = fcntl(fd, F_GETFD)) == -1) { 363 rv = errno; 364 close(fd); 365 pollcb->fd = -1; 366 return rv; 367 } 368 369 fd_flags |= FD_CLOEXEC; 370 if (fcntl(fd, F_SETFD, fd_flags) == -1) { 371 rv = errno; 372 close(fd); 373 pollcb->fd = -1; 374 return rv; 375 } 376 } 377#endif 378 379 pollcb->fd = fd; 380 pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event)); 381 382 return APR_SUCCESS; 383} 384 385static apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb, 386 apr_pollfd_t *descriptor) 387{ 388 struct epoll_event ev = { 0 }; 389 int ret; 390 391 ev.events = get_epoll_event(descriptor->reqevents); 392 ev.data.ptr = (void *) descriptor; 393 394 if (descriptor->desc_type == APR_POLL_SOCKET) { 395 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 396 descriptor->desc.s->socketdes, &ev); 397 } 398 else { 399 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD, 400 descriptor->desc.f->filedes, &ev); 401 } 402 403 if (ret == -1) { 404 return apr_get_netos_error(); 405 } 406 407 return APR_SUCCESS; 408} 409 410static apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb, 411 apr_pollfd_t *descriptor) 412{ 413 apr_status_t rv = APR_SUCCESS; 414 struct epoll_event ev = {0}; /* ignored, but must be passed with 415 * kernel < 2.6.9 416 */ 417 int ret; 418 419 if (descriptor->desc_type == APR_POLL_SOCKET) { 420 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 421 descriptor->desc.s->socketdes, &ev); 422 } 423 else { 424 ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL, 425 descriptor->desc.f->filedes, &ev); 426 } 427 428 if (ret < 0) { 429 rv = APR_NOTFOUND; 430 } 431 432 return rv; 433} 434 435 436static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb, 437 apr_interval_time_t timeout, 438 apr_pollcb_cb_t func, 439 void *baton) 440{ 441 int ret, i; 442 apr_status_t rv = APR_SUCCESS; 443 444 if (timeout > 0) { 445 timeout /= 1000; 446 } 447 448 ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc, 449 timeout); 450 if (ret < 0) { 451 rv = apr_get_netos_error(); 452 } 453 else if (ret == 0) { 454 rv = APR_TIMEUP; 455 } 456 else { 457 for (i = 0; i < ret; i++) { 458 apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr); 459 460 if ((pollcb->flags & APR_POLLSET_WAKEABLE) && 461 pollfd->desc_type == APR_POLL_FILE && 462 pollfd->desc.f == pollcb->wakeup_pipe[0]) { 463 apr_poll_drain_wakeup_pipe(pollcb->wakeup_pipe); 464 return APR_EINTR; 465 } 466 467 pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events); 468 469 rv = func(baton, pollfd); 470 if (rv) { 471 return rv; 472 } 473 } 474 } 475 476 return rv; 477} 478 479static const apr_pollcb_provider_t impl_cb = { 480 impl_pollcb_create, 481 impl_pollcb_add, 482 impl_pollcb_remove, 483 impl_pollcb_poll, 484 impl_pollcb_cleanup, 485 "epoll" 486}; 487 488const apr_pollcb_provider_t *const apr_pollcb_provider_epoll = &impl_cb; 489 490#endif /* HAVE_EPOLL */ 491