epoll.c revision 289166
1178825Sdfr/* Licensed to the Apache Software Foundation (ASF) under one or more
2233294Sstas * contributor license agreements.  See the NOTICE file distributed with
3233294Sstas * this work for additional information regarding copyright ownership.
4233294Sstas * The ASF licenses this file to You under the Apache License, Version 2.0
5178825Sdfr * (the "License"); you may not use this file except in compliance with
6233294Sstas * the License.  You may obtain a copy of the License at
7233294Sstas *
8233294Sstas *     http://www.apache.org/licenses/LICENSE-2.0
9178825Sdfr *
10233294Sstas * Unless required by applicable law or agreed to in writing, software
11233294Sstas * distributed under the License is distributed on an "AS IS" BASIS,
12178825Sdfr * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13233294Sstas * See the License for the specific language governing permissions and
14233294Sstas * limitations under the License.
15233294Sstas */
16178825Sdfr
17233294Sstas#include "apr.h"
18233294Sstas#include "apr_poll.h"
19233294Sstas#include "apr_time.h"
20178825Sdfr#include "apr_portable.h"
21233294Sstas#include "apr_arch_file_io.h"
22233294Sstas#include "apr_arch_networkio.h"
23233294Sstas#include "apr_arch_poll_private.h"
24233294Sstas#include "apr_arch_inherit.h"
25233294Sstas
26233294Sstas#if defined(HAVE_EPOLL)
27233294Sstas
28233294Sstasstatic apr_int16_t get_epoll_event(apr_int16_t event)
29233294Sstas{
30233294Sstas    apr_int16_t rv = 0;
31233294Sstas
32178825Sdfr    if (event & APR_POLLIN)
33178825Sdfr        rv |= EPOLLIN;
34178825Sdfr    if (event & APR_POLLPRI)
35178825Sdfr        rv |= EPOLLPRI;
36178825Sdfr    if (event & APR_POLLOUT)
37178825Sdfr        rv |= EPOLLOUT;
38178825Sdfr    /* APR_POLLNVAL is not handled by epoll.  EPOLLERR and EPOLLHUP are return-only */
39178825Sdfr
40233294Sstas    return rv;
41178825Sdfr}
42178825Sdfr
43178825Sdfrstatic apr_int16_t get_epoll_revent(apr_int16_t event)
44178825Sdfr{
45233294Sstas    apr_int16_t rv = 0;
46178825Sdfr
47178825Sdfr    if (event & EPOLLIN)
48178825Sdfr        rv |= APR_POLLIN;
49178825Sdfr    if (event & EPOLLPRI)
50233294Sstas        rv |= APR_POLLPRI;
51178825Sdfr    if (event & EPOLLOUT)
52178825Sdfr        rv |= APR_POLLOUT;
53178825Sdfr    if (event & EPOLLERR)
54233294Sstas        rv |= APR_POLLERR;
55233294Sstas    if (event & EPOLLHUP)
56233294Sstas        rv |= APR_POLLHUP;
57178825Sdfr    /* APR_POLLNVAL is not handled by epoll. */
58178825Sdfr
59178825Sdfr    return rv;
60178825Sdfr}
61178825Sdfr
62178825Sdfrstruct apr_pollset_private_t
63178825Sdfr{
64178825Sdfr    int epoll_fd;
65178825Sdfr    struct epoll_event *pollset;
66233294Sstas    apr_pollfd_t *result_set;
67178825Sdfr#if APR_HAS_THREADS
68178825Sdfr    /* A thread mutex to protect operations on the rings */
69178825Sdfr    apr_thread_mutex_t *ring_lock;
70178825Sdfr#endif
71178825Sdfr    /* A ring containing all of the pollfd_t that are active */
72233294Sstas    APR_RING_HEAD(pfd_query_ring_t, pfd_elem_t) query_ring;
73178825Sdfr    /* A ring of pollfd_t that have been used, and then _remove()'d */
74178825Sdfr    APR_RING_HEAD(pfd_free_ring_t, pfd_elem_t) free_ring;
75178825Sdfr    /* A ring of pollfd_t where rings that have been _remove()`ed but
76178825Sdfr        might still be inside a _poll() */
77178825Sdfr    APR_RING_HEAD(pfd_dead_ring_t, pfd_elem_t) dead_ring;
78178825Sdfr};
79178825Sdfr
80178825Sdfrstatic apr_status_t impl_pollset_cleanup(apr_pollset_t *pollset)
81178825Sdfr{
82178825Sdfr    close(pollset->p->epoll_fd);
83178825Sdfr    return APR_SUCCESS;
84233294Sstas}
85233294Sstas
86178825Sdfr
87178825Sdfrstatic apr_status_t impl_pollset_create(apr_pollset_t *pollset,
88178825Sdfr                                        apr_uint32_t size,
89178825Sdfr                                        apr_pool_t *p,
90178825Sdfr                                        apr_uint32_t flags)
91178825Sdfr{
92178825Sdfr    apr_status_t rv;
93178825Sdfr    int fd;
94178825Sdfr
95178825Sdfr#ifdef HAVE_EPOLL_CREATE1
96178825Sdfr    fd = epoll_create1(EPOLL_CLOEXEC);
97233294Sstas#else
98178825Sdfr    fd = epoll_create(size);
99178825Sdfr#endif
100178825Sdfr    if (fd < 0) {
101178825Sdfr        pollset->p = NULL;
102178825Sdfr        return apr_get_netos_error();
103233294Sstas    }
104178825Sdfr
105178825Sdfr#ifndef HAVE_EPOLL_CREATE1
106178825Sdfr    {
107178825Sdfr        int fd_flags;
108178825Sdfr
109178825Sdfr        if ((fd_flags = fcntl(fd, F_GETFD)) == -1) {
110178825Sdfr            rv = errno;
111178825Sdfr            close(fd);
112178825Sdfr            pollset->p = NULL;
113178825Sdfr            return rv;
114178825Sdfr        }
115233294Sstas
116178825Sdfr        fd_flags |= FD_CLOEXEC;
117178825Sdfr        if (fcntl(fd, F_SETFD, fd_flags) == -1) {
118178825Sdfr            rv = errno;
119178825Sdfr            close(fd);
120178825Sdfr            pollset->p = NULL;
121178825Sdfr            return rv;
122233294Sstas        }
123233294Sstas    }
124178825Sdfr#endif
125178825Sdfr
126178825Sdfr    pollset->p = apr_palloc(p, sizeof(apr_pollset_private_t));
127178825Sdfr#if APR_HAS_THREADS
128178825Sdfr    if ((flags & APR_POLLSET_THREADSAFE) &&
129178825Sdfr        !(flags & APR_POLLSET_NOCOPY) &&
130178825Sdfr        ((rv = apr_thread_mutex_create(&pollset->p->ring_lock,
131178825Sdfr                                       APR_THREAD_MUTEX_DEFAULT,
132178825Sdfr                                       p)) != APR_SUCCESS)) {
133178825Sdfr        close(fd);
134178825Sdfr        pollset->p = NULL;
135178825Sdfr        return rv;
136178825Sdfr    }
137178825Sdfr#else
138233294Sstas    if (flags & APR_POLLSET_THREADSAFE) {
139233294Sstas        close(fd);
140178825Sdfr        pollset->p = NULL;
141178825Sdfr        return APR_ENOTIMPL;
142178825Sdfr    }
143178825Sdfr#endif
144178825Sdfr    pollset->p->epoll_fd = fd;
145178825Sdfr    pollset->p->pollset = apr_palloc(p, size * sizeof(struct epoll_event));
146178825Sdfr    pollset->p->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
147178825Sdfr
148178825Sdfr    if (!(flags & APR_POLLSET_NOCOPY)) {
149178825Sdfr        APR_RING_INIT(&pollset->p->query_ring, pfd_elem_t, link);
150178825Sdfr        APR_RING_INIT(&pollset->p->free_ring, pfd_elem_t, link);
151178825Sdfr        APR_RING_INIT(&pollset->p->dead_ring, pfd_elem_t, link);
152178825Sdfr    }
153178825Sdfr    return APR_SUCCESS;
154178825Sdfr}
155233294Sstas
156233294Sstasstatic apr_status_t impl_pollset_add(apr_pollset_t *pollset,
157178825Sdfr                                     const apr_pollfd_t *descriptor)
158178825Sdfr{
159178825Sdfr    struct epoll_event ev = {0};
160233294Sstas    int ret = -1;
161178825Sdfr    pfd_elem_t *elem = NULL;
162178825Sdfr    apr_status_t rv = APR_SUCCESS;
163233294Sstas
164178825Sdfr    ev.events = get_epoll_event(descriptor->reqevents);
165178825Sdfr
166178825Sdfr    if (pollset->flags & APR_POLLSET_NOCOPY) {
167178825Sdfr        ev.data.ptr = (void *)descriptor;
168178825Sdfr    }
169178825Sdfr    else {
170178825Sdfr        pollset_lock_rings();
171178825Sdfr
172178825Sdfr        if (!APR_RING_EMPTY(&(pollset->p->free_ring), pfd_elem_t, link)) {
173233294Sstas            elem = APR_RING_FIRST(&(pollset->p->free_ring));
174178825Sdfr            APR_RING_REMOVE(elem, link);
175178825Sdfr        }
176178825Sdfr        else {
177178825Sdfr            elem = (pfd_elem_t *) apr_palloc(pollset->pool, sizeof(pfd_elem_t));
178178825Sdfr            APR_RING_ELEM_INIT(elem, link);
179233294Sstas        }
180233294Sstas        elem->pfd = *descriptor;
181178825Sdfr        ev.data.ptr = elem;
182178825Sdfr    }
183233294Sstas    if (descriptor->desc_type == APR_POLL_SOCKET) {
184178825Sdfr        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
185178825Sdfr                        descriptor->desc.s->socketdes, &ev);
186178825Sdfr    }
187178825Sdfr    else {
188178825Sdfr        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_ADD,
189233294Sstas                        descriptor->desc.f->filedes, &ev);
190178825Sdfr    }
191178825Sdfr
192178825Sdfr    if (0 != ret) {
193178825Sdfr        rv = apr_get_netos_error();
194178825Sdfr    }
195178825Sdfr
196178825Sdfr    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
197178825Sdfr        if (rv != APR_SUCCESS) {
198178825Sdfr            APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem, pfd_elem_t, link);
199178825Sdfr        }
200178825Sdfr        else {
201178825Sdfr            APR_RING_INSERT_TAIL(&(pollset->p->query_ring), elem, pfd_elem_t, link);
202178825Sdfr        }
203178825Sdfr        pollset_unlock_rings();
204178825Sdfr    }
205178825Sdfr
206178825Sdfr    return rv;
207178825Sdfr}
208178825Sdfr
209178825Sdfrstatic apr_status_t impl_pollset_remove(apr_pollset_t *pollset,
210178825Sdfr                                        const apr_pollfd_t *descriptor)
211178825Sdfr{
212178825Sdfr    pfd_elem_t *ep;
213178825Sdfr    apr_status_t rv = APR_SUCCESS;
214178825Sdfr    struct epoll_event ev = {0}; /* ignored, but must be passed with
215178825Sdfr                                  * kernel < 2.6.9
216178825Sdfr                                  */
217178825Sdfr    int ret = -1;
218178825Sdfr
219178825Sdfr    if (descriptor->desc_type == APR_POLL_SOCKET) {
220178825Sdfr        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
221178825Sdfr                        descriptor->desc.s->socketdes, &ev);
222178825Sdfr    }
223178825Sdfr    else {
224178825Sdfr        ret = epoll_ctl(pollset->p->epoll_fd, EPOLL_CTL_DEL,
225178825Sdfr                        descriptor->desc.f->filedes, &ev);
226178825Sdfr    }
227178825Sdfr    if (ret < 0) {
228178825Sdfr        rv = APR_NOTFOUND;
229178825Sdfr    }
230178825Sdfr
231178825Sdfr    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
232178825Sdfr        pollset_lock_rings();
233178825Sdfr
234178825Sdfr        for (ep = APR_RING_FIRST(&(pollset->p->query_ring));
235178825Sdfr             ep != APR_RING_SENTINEL(&(pollset->p->query_ring),
236233294Sstas                                     pfd_elem_t, link);
237233294Sstas             ep = APR_RING_NEXT(ep, link)) {
238233294Sstas
239233294Sstas            if (descriptor->desc.s == ep->pfd.desc.s) {
240233294Sstas                APR_RING_REMOVE(ep, link);
241233294Sstas                APR_RING_INSERT_TAIL(&(pollset->p->dead_ring),
242233294Sstas                                     ep, pfd_elem_t, link);
243233294Sstas                break;
244233294Sstas            }
245233294Sstas        }
246233294Sstas
247233294Sstas        pollset_unlock_rings();
248233294Sstas    }
249233294Sstas
250178825Sdfr    return rv;
251178825Sdfr}
252178825Sdfr
253178825Sdfrstatic apr_status_t impl_pollset_poll(apr_pollset_t *pollset,
254178825Sdfr                                           apr_interval_time_t timeout,
255178825Sdfr                                           apr_int32_t *num,
256178825Sdfr                                           const apr_pollfd_t **descriptors)
257178825Sdfr{
258178825Sdfr    int ret, i, j;
259178825Sdfr    apr_status_t rv = APR_SUCCESS;
260178825Sdfr    apr_pollfd_t *fdptr;
261178825Sdfr
262178825Sdfr    if (timeout > 0) {
263178825Sdfr        timeout /= 1000;
264233294Sstas    }
265178825Sdfr
266178825Sdfr    ret = epoll_wait(pollset->p->epoll_fd, pollset->p->pollset, pollset->nalloc,
267178825Sdfr                     timeout);
268178825Sdfr    (*num) = ret;
269178825Sdfr
270178825Sdfr    if (ret < 0) {
271178825Sdfr        rv = apr_get_netos_error();
272178825Sdfr    }
273178825Sdfr    else if (ret == 0) {
274178825Sdfr        rv = APR_TIMEUP;
275178825Sdfr    }
276178825Sdfr    else {
277178825Sdfr        for (i = 0, j = 0; i < ret; i++) {
278178825Sdfr            if (pollset->flags & APR_POLLSET_NOCOPY) {
279178825Sdfr                fdptr = (apr_pollfd_t *)(pollset->p->pollset[i].data.ptr);
280233294Sstas            }
281178825Sdfr            else {
282178825Sdfr                fdptr = &(((pfd_elem_t *) (pollset->p->pollset[i].data.ptr))->pfd);
283178825Sdfr            }
284178825Sdfr            /* Check if the polled descriptor is our
285178825Sdfr             * wakeup pipe. In that case do not put it result set.
286178825Sdfr             */
287178825Sdfr            if ((pollset->flags & APR_POLLSET_WAKEABLE) &&
288178825Sdfr                fdptr->desc_type == APR_POLL_FILE &&
289233294Sstas                fdptr->desc.f == pollset->wakeup_pipe[0]) {
290178825Sdfr                apr_pollset_drain_wakeup_pipe(pollset);
291178825Sdfr                rv = APR_EINTR;
292178825Sdfr            }
293178825Sdfr            else {
294178825Sdfr                pollset->p->result_set[j] = *fdptr;
295233294Sstas                pollset->p->result_set[j].rtnevents =
296178825Sdfr                    get_epoll_revent(pollset->p->pollset[i].events);
297178825Sdfr                j++;
298178825Sdfr            }
299233294Sstas        }
300233294Sstas        if (((*num) = j)) { /* any event besides wakeup pipe? */
301233294Sstas            rv = APR_SUCCESS;
302178825Sdfr
303178825Sdfr            if (descriptors) {
304178825Sdfr                *descriptors = pollset->p->result_set;
305178825Sdfr            }
306178825Sdfr        }
307178825Sdfr    }
308178825Sdfr
309178825Sdfr    if (!(pollset->flags & APR_POLLSET_NOCOPY)) {
310178825Sdfr        pollset_lock_rings();
311178825Sdfr
312178825Sdfr        /* Shift all PFDs in the Dead Ring to the Free Ring */
313233294Sstas        APR_RING_CONCAT(&(pollset->p->free_ring), &(pollset->p->dead_ring), pfd_elem_t, link);
314178825Sdfr
315178825Sdfr        pollset_unlock_rings();
316178825Sdfr    }
317233294Sstas
318233294Sstas    return rv;
319233294Sstas}
320178825Sdfr
321178825Sdfrstatic apr_pollset_provider_t impl = {
322178825Sdfr    impl_pollset_create,
323233294Sstas    impl_pollset_add,
324178825Sdfr    impl_pollset_remove,
325178825Sdfr    impl_pollset_poll,
326178825Sdfr    impl_pollset_cleanup,
327233294Sstas    "epoll"
328178825Sdfr};
329178825Sdfr
330178825Sdfrapr_pollset_provider_t *apr_pollset_provider_epoll = &impl;
331178825Sdfr
332178825Sdfrstatic apr_status_t cb_cleanup(void *p_)
333178825Sdfr{
334178825Sdfr    apr_pollcb_t *pollcb = (apr_pollcb_t *) p_;
335233294Sstas    close(pollcb->fd);
336233294Sstas    return APR_SUCCESS;
337178825Sdfr}
338233294Sstas
339233294Sstasstatic apr_status_t impl_pollcb_create(apr_pollcb_t *pollcb,
340178825Sdfr                                       apr_uint32_t size,
341178825Sdfr                                       apr_pool_t *p,
342178825Sdfr                                       apr_uint32_t flags)
343178825Sdfr{
344178825Sdfr    int fd;
345233294Sstas
346178825Sdfr#ifdef HAVE_EPOLL_CREATE1
347178825Sdfr    fd = epoll_create1(EPOLL_CLOEXEC);
348178825Sdfr#else
349178825Sdfr    fd = epoll_create(size);
350178825Sdfr#endif
351178825Sdfr
352178825Sdfr    if (fd < 0) {
353178825Sdfr        return apr_get_netos_error();
354178825Sdfr    }
355178825Sdfr
356178825Sdfr#ifndef HAVE_EPOLL_CREATE1
357178825Sdfr    {
358178825Sdfr        int fd_flags;
359233294Sstas        apr_status_t rv;
360233294Sstas
361233294Sstas        if ((fd_flags = fcntl(fd, F_GETFD)) == -1) {
362178825Sdfr            rv = errno;
363178825Sdfr            close(fd);
364178825Sdfr            pollcb->fd = -1;
365178825Sdfr            return rv;
366233294Sstas        }
367178825Sdfr
368178825Sdfr        fd_flags |= FD_CLOEXEC;
369178825Sdfr        if (fcntl(fd, F_SETFD, fd_flags) == -1) {
370178825Sdfr            rv = errno;
371233294Sstas            close(fd);
372178825Sdfr            pollcb->fd = -1;
373178825Sdfr            return rv;
374178825Sdfr        }
375233294Sstas    }
376178825Sdfr#endif
377178825Sdfr
378178825Sdfr    pollcb->fd = fd;
379178825Sdfr    pollcb->pollset.epoll = apr_palloc(p, size * sizeof(struct epoll_event));
380178825Sdfr    apr_pool_cleanup_register(p, pollcb, cb_cleanup, apr_pool_cleanup_null);
381178825Sdfr
382178825Sdfr    return APR_SUCCESS;
383233294Sstas}
384178825Sdfr
385178825Sdfrstatic apr_status_t impl_pollcb_add(apr_pollcb_t *pollcb,
386233294Sstas                                    apr_pollfd_t *descriptor)
387178825Sdfr{
388178825Sdfr    struct epoll_event ev;
389178825Sdfr    int ret;
390178825Sdfr
391178825Sdfr    ev.events = get_epoll_event(descriptor->reqevents);
392178825Sdfr    ev.data.ptr = (void *)descriptor;
393178825Sdfr
394178825Sdfr    if (descriptor->desc_type == APR_POLL_SOCKET) {
395178825Sdfr        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
396178825Sdfr                        descriptor->desc.s->socketdes, &ev);
397178825Sdfr    }
398178825Sdfr    else {
399178825Sdfr        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_ADD,
400178825Sdfr                        descriptor->desc.f->filedes, &ev);
401178825Sdfr    }
402233294Sstas
403178825Sdfr    if (ret == -1) {
404178825Sdfr        return apr_get_netos_error();
405178825Sdfr    }
406178825Sdfr
407233294Sstas    return APR_SUCCESS;
408178825Sdfr}
409178825Sdfr
410178825Sdfrstatic apr_status_t impl_pollcb_remove(apr_pollcb_t *pollcb,
411178825Sdfr                                       apr_pollfd_t *descriptor)
412233294Sstas{
413178825Sdfr    apr_status_t rv = APR_SUCCESS;
414178825Sdfr    struct epoll_event ev = {0}; /* ignored, but must be passed with
415178825Sdfr                                  * kernel < 2.6.9
416178825Sdfr                                  */
417178825Sdfr    int ret = -1;
418178825Sdfr
419178825Sdfr    if (descriptor->desc_type == APR_POLL_SOCKET) {
420178825Sdfr        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
421178825Sdfr                        descriptor->desc.s->socketdes, &ev);
422178825Sdfr    }
423178825Sdfr    else {
424178825Sdfr        ret = epoll_ctl(pollcb->fd, EPOLL_CTL_DEL,
425178825Sdfr                        descriptor->desc.f->filedes, &ev);
426178825Sdfr    }
427178825Sdfr
428178825Sdfr    if (ret < 0) {
429178825Sdfr        rv = APR_NOTFOUND;
430178825Sdfr    }
431178825Sdfr
432178825Sdfr    return rv;
433178825Sdfr}
434178825Sdfr
435
436static apr_status_t impl_pollcb_poll(apr_pollcb_t *pollcb,
437                                     apr_interval_time_t timeout,
438                                     apr_pollcb_cb_t func,
439                                     void *baton)
440{
441    int ret, i;
442    apr_status_t rv = APR_SUCCESS;
443
444    if (timeout > 0) {
445        timeout /= 1000;
446    }
447
448    ret = epoll_wait(pollcb->fd, pollcb->pollset.epoll, pollcb->nalloc,
449                     timeout);
450    if (ret < 0) {
451        rv = apr_get_netos_error();
452    }
453    else if (ret == 0) {
454        rv = APR_TIMEUP;
455    }
456    else {
457        for (i = 0; i < ret; i++) {
458            apr_pollfd_t *pollfd = (apr_pollfd_t *)(pollcb->pollset.epoll[i].data.ptr);
459            pollfd->rtnevents = get_epoll_revent(pollcb->pollset.epoll[i].events);
460
461            rv = func(baton, pollfd);
462            if (rv) {
463                return rv;
464            }
465        }
466    }
467
468    return rv;
469}
470
471static apr_pollcb_provider_t impl_cb = {
472    impl_pollcb_create,
473    impl_pollcb_add,
474    impl_pollcb_remove,
475    impl_pollcb_poll,
476    "epoll"
477};
478
479apr_pollcb_provider_t *apr_pollcb_provider_epoll = &impl_cb;
480
481#endif /* HAVE_EPOLL */
482