epoll.c revision 362181
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_poll.h"
19#include "apr_time.h"
20#include "apr_portable.h"
21#include "apr_arch_file_io.h"
22#include "apr_arch_networkio.h"
23#include "apr_arch_poll_private.h"
24#include "apr_arch_inherit.h"
25
26#if defined(HAVE_EPOLL)
27
28static apr_int16_t get_epoll_event(apr_int16_t event)
29{
30    apr_int16_t rv = 0;
31
32    if (event & APR_POLLIN)
33        rv |= EPOLLIN;
34    if (event & APR_POLLPRI)
35        rv |= EPOLLPRI;
36    if (event & APR_POLLOUT)
37        rv |= EPOLLOUT;
38    /* APR_POLLNVAL is not handled by epoll.  EPOLLERR and EPOLLHUP are return-only */
39
40    return rv;
41}
42
43static apr_int16_t get_epoll_revent(apr_int16_t event)
44{
45    apr_int16_t rv = 0;
46
47    if (event & EPOLLIN)
48        rv |= APR_POLLIN;
49    if (event & EPOLLPRI)
50        rv |= APR_POLLPRI;
51    if (event & EPOLLOUT)
52        rv |= APR_POLLOUT;
53    if (event & EPOLLERR)
54        rv |= APR_POLLERR;
55    if (event & EPOLLHUP)
56        rv |= APR_POLLHUP;
57    /* APR_POLLNVAL is not handled by epoll. */
58
59    return rv;
60}
61
62struct apr_pollset_private_t
63{
64    int epoll_fd;
65    struct epoll_event *pollset;
66    apr_pollfd_t *result_set;
67#if APR_HAS_THREADS
68    /* A thread mutex to protect operations on the rings */
69    apr_thread_mutex_t *ring_lock;
70#endif
71    /* A ring containing all of the pollfd_t that are active */
72    APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring;
73    /* A ring of pollfd_t that have been used, and then _remove()'d */
74    APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring;
75    /* A ring of pollfd_t where rings that have been _remove()`ed but
76        might still be inside a _poll() */
77    APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring;
78};
79
80static apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset)
81{
82    close(pollset->p->epoll_fd);
83    return APR_SUCCESS;
84}
85
86
87static apr_status_t impl_pollset_create(apr_pollset_t *pollset,
88                                        apr_uint32_t size,
89                                        apr_pool_t *p,
90                                        apr_uint32_t flags)
91{
92    apr_status_t rv;
93    int fd;
94
95#ifdef HAVE_EPOLL_CREATE1
96    fd = epoll_create1(EPOLL_CLOEXEC);
97#else
98    fd = epoll_create(size);
99#endif
100    if (fd < 0) {
101        pollset->p = NULL;
102        return apr_get_netos_error();
103    }
104
105#ifndef HAVE_EPOLL_CREATE1
106    {
107        int fd_flags;
108
109        if ((fd_flags = fcntl(fd, F_GETFD)) == -1) {
110            rv = errno;
111            close(fd);
112            pollset->p = NULL;
113            return rv;
114        }
115
116        fd_flags |= FD_CLOEXEC;
117        if (fcntl(fd, F_SETFD, fd_flags) == -1) {
118            rv = errno;
119            close(fd);
120            pollset->p = NULL;
121            return rv;
122        }
123    }
124#endif
125
126    pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t));
127#if APR_HAS_THREADS
128    if ((flags & APR_POLLSET_THREADSAFE) &&
129        !(flags & APR_POLLSET_NOCOPY) &&
130        ((rv = apr_thread_mutex_create(&pollset->p->ring_lock,
131                                       APR_THREAD_MUTEX_DEFAULT,
132                                       p)) != APR_SUCCESS)) {
133        close(fd);
134        pollset->p = NULL;
135        return rv;
136    }
137#else
138    if (flags & APR_POLLSET_THREADSAFE) {
139        close(fd);
140        pollset->p = NULL;
141        return APR_ENOTIMPL;
142    }
143#endif
144    pollset->p->epoll_fd = fd;
145    pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event));
146    pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
147
148    if (!(flags & APR_POLLSET_NOCOPY)) {
149        APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link);
150        APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link);
151        APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link);
152    }
153    return APR_SUCCESS;
154}
155
156static apr_status_t impl_pollset_add(apr_pollset_t *pollset,
157                                     const apr_pollfd_t *descriptor)
158{
159    struct epoll_event ev = {0};
160    int ret;
161    pfd_elem_t *elem = NULL;
162    apr_status_t rv = APR_SUCCESS;
163
164    ev.events = get_epoll_event(descriptor->reqevents);
165
166    if (pollset->flags & APR_POLLSET_NOCOPY) {
167        ev.data.ptr = (void *)descriptor;
168    }
169    else {
170        pollset_lock_rings();
171
172        if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) {
173            elem = APR_RING_FIRST(&(pollset->p->free_ring));
174            APR_RING_REMOVE(elem, link);
175        }
176        else {
177            elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t));
178            APR_RING_ELEM_INIT(elem, link);
179        }
180        elem->pfd = *descriptor;
181        ev.data.ptr = elem;
182    }
183    if (descriptor->desc_type == APR_POLL_SOCKET) {
184        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
185                        descriptor->desc.s->socketdes, &ev);
186    }
187    else {
188        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
189                        descriptor->desc.f->filedes, &ev);
190    }
191
192    if (0 != ret) {
193        rv = apr_get_netos_error();
194    }
195
196    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
197        if (rv != APR_SUCCESS) {
198            APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link);
199        }
200        else {
201            APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link);
202        }
203        pollset_unlock_rings();
204    }
205
206    return rv;
207}
208
209static apr_status_t impl_pollset_remove(apr_pollset_t *pollset,
210                                        const apr_pollfd_t *descriptor)
211{
212    pfd_elem_t *ep;
213    apr_status_t rv = APR_SUCCESS;
214    struct epoll_event ev = {0}; /* ignored, but must be passed with
215                                  * kernel < 2.6.9
216                                  */
217    int ret;
218
219    if (descriptor->desc_type == APR_POLL_SOCKET) {
220        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
221                        descriptor->desc.s->socketdes, &ev);
222    }
223    else {
224        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
225                        descriptor->desc.f->filedes, &ev);
226    }
227    if (ret < 0) {
228        rv = APR_NOTFOUND;
229    }
230
231    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
232        pollset_lock_rings();
233
234        for (ep = APR_RING_FIRST(&(pollset->p->query_ring));
235             ep != APR_RING_SENTINEL(&(pollset->p->query_ring),
236                                     pfd_elem_t, link);
237             ep = APR_RING_NEXT(ep, link)) {
238
239            if (descriptor->desc.s == ep->pfd.desc.s) {
240                APR_RING_REMOVE(ep, link);
241                APR_RING_INSERT_TAIL(&(pollset->p->dead_ring),
242                                     ep, pfd_elem_t, link);
243                break;
244            }
245        }
246
247        pollset_unlock_rings();
248    }
249
250    return rv;
251}
252
253static apr_status_t impl_pollset_poll(apr_pollset_t *pollset,
254                                           apr_interval_time_t timeout,
255                                           apr_int32_t *num,
256                                           const apr_pollfd_t **descriptors)
257{
258    int ret;
259    apr_status_t rv = APR_SUCCESS;
260
261    *num = 0;
262
263    if (timeout > 0) {
264        timeout /= 1000;
265    }
266
267    ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc,
268                     timeout);
269    if (ret < 0) {
270        rv = apr_get_netos_error();
271    }
272    else if (ret == 0) {
273        rv = APR_TIMEUP;
274    }
275    else {
276        int i, j;
277        const apr_pollfd_t *fdptr;
278
279        for (i = 0, j = 0; i < ret; i++) {
280            if (pollset->flags & APR_POLLSET_NOCOPY) {
281                fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr);
282            }
283            else {
284                fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd);
285            }
286            /* Check if the polled descriptor is our
287             * wakeup pipe. In that case do not put it result set.
288             */
289            if ((pollset->flags & APR_POLLSET_WAKEABLE) &&
290                fdptr->desc_type == APR_POLL_FILE &&
291                fdptr->desc.f == pollset->wakeup_pipe[0]) {
292                apr_poll_drain_wakeup_pipe(pollset->wakeup_pipe);
293                rv = APR_EINTR;
294            }
295            else {
296                pollset->p->result_set[j] = *fdptr;
297                pollset->p->result_set[j].rtnevents =
298                    get_epoll_revent(pollset->p->pollset[i].events);
299                j++;
300            }
301        }
302        if (((*num) = j)) { /* any event besides wakeup pipe? */
303            rv = APR_SUCCESS;
304
305            if (descriptors) {
306                *descriptors = pollset->p->result_set;
307            }
308        }
309    }
310
311    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
312        pollset_lock_rings();
313
314        /* Shift all PFDs in the Dead Ring to the Free Ring */
315        APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link);
316
317        pollset_unlock_rings();
318    }
319
320    return rv;
321}
322
323static const apr_pollset_provider_t impl = {
324    impl_pollset_create,
325    impl_pollset_add,
326    impl_pollset_remove,
327    impl_pollset_poll,
328    impl_pollset_cleanup,
329    "epoll"
330};
331
332const apr_pollset_provider_t *const apr_pollset_provider_epoll = &impl;
333
334static apr_status_t impl_pollcb_cleanup(apr_pollcb_t *pollcb)
335{
336    close(pollcb->fd);
337    return APR_SUCCESS;
338}
339
340static apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb,
341                                       apr_uint32_t size,
342                                       apr_pool_t *p,
343                                       apr_uint32_t flags)
344{
345    int fd;
346
347#ifdef HAVE_EPOLL_CREATE1
348    fd = epoll_create1(EPOLL_CLOEXEC);
349#else
350    fd = epoll_create(size);
351#endif
352
353    if (fd < 0) {
354        return apr_get_netos_error();
355    }
356
357#ifndef HAVE_EPOLL_CREATE1
358    {
359        int fd_flags;
360        apr_status_t rv;
361
362        if ((fd_flags = fcntl(fd, F_GETFD)) == -1) {
363            rv = errno;
364            close(fd);
365            pollcb->fd = -1;
366            return rv;
367        }
368
369        fd_flags |= FD_CLOEXEC;
370        if (fcntl(fd, F_SETFD, fd_flags) == -1) {
371            rv = errno;
372            close(fd);
373            pollcb->fd = -1;
374            return rv;
375        }
376    }
377#endif
378
379    pollcb->fd = fd;
380    pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event));
381
382    return APR_SUCCESS;
383}
384
385static apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb,
386                                    apr_pollfd_t *descriptor)
387{
388    struct epoll_event ev = { 0 };
389    int ret;
390
391    ev.events = get_epoll_event(descriptor->reqevents);
392    ev.data.ptr = (void *) descriptor;
393
394    if (descriptor->desc_type == APR_POLL_SOCKET) {
395        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
396                        descriptor->desc.s->socketdes, &ev);
397    }
398    else {
399        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
400                        descriptor->desc.f->filedes, &ev);
401    }
402
403    if (ret == -1) {
404        return apr_get_netos_error();
405    }
406
407    return APR_SUCCESS;
408}
409
410static apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb,
411                                       apr_pollfd_t *descriptor)
412{
413    apr_status_t rv = APR_SUCCESS;
414    struct epoll_event ev = {0}; /* ignored, but must be passed with
415                                  * kernel < 2.6.9
416                                  */
417    int ret;
418
419    if (descriptor->desc_type == APR_POLL_SOCKET) {
420        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
421                        descriptor->desc.s->socketdes, &ev);
422    }
423    else {
424        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
425                        descriptor->desc.f->filedes, &ev);
426    }
427
428    if (ret < 0) {
429        rv = APR_NOTFOUND;
430    }
431
432    return rv;
433}
434
435
436static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb,
437                                     apr_interval_time_t timeout,
438                                     apr_pollcb_cb_t func,
439                                     void *baton)
440{
441    int ret, i;
442    apr_status_t rv = APR_SUCCESS;
443
444    if (timeout > 0) {
445        timeout /= 1000;
446    }
447
448    ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc,
449                     timeout);
450    if (ret < 0) {
451        rv = apr_get_netos_error();
452    }
453    else if (ret == 0) {
454        rv = APR_TIMEUP;
455    }
456    else {
457        for (i = 0; i < ret; i++) {
458            apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr);
459
460            if ((pollcb->flags & APR_POLLSET_WAKEABLE) &&
461                pollfd->desc_type == APR_POLL_FILE &&
462                pollfd->desc.f == pollcb->wakeup_pipe[0]) {
463                apr_poll_drain_wakeup_pipe(pollcb->wakeup_pipe);
464                return APR_EINTR;
465            }
466
467            pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events);
468
469            rv = func(baton, pollfd);
470            if (rv) {
471                return rv;
472            }
473        }
474    }
475
476    return rv;
477}
478
479static const apr_pollcb_provider_t impl_cb = {
480    impl_pollcb_create,
481    impl_pollcb_add,
482    impl_pollcb_remove,
483    impl_pollcb_poll,
484    impl_pollcb_cleanup,
485    "epoll"
486};
487
488const apr_pollcb_provider_t *const apr_pollcb_provider_epoll = &impl_cb;
489
490#endif /* HAVE_EPOLL */
491