1/*
2 *  OpenVPN -- An application to securely tunnel IP networks
3 *             over a single TCP/UDP port, with support for SSL/TLS-based
4 *             session authentication and key exchange,
5 *             packet encryption, packet authentication, and
6 *             packet compression.
7 *
8 *  Copyright (C) 2002-2010 OpenVPN Technologies, Inc. <sales@openvpn.net>
9 *
10 *  This program is free software; you can redistribute it and/or modify
11 *  it under the terms of the GNU General Public License version 2
12 *  as published by the Free Software Foundation.
13 *
14 *  This program is distributed in the hope that it will be useful,
15 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 *  GNU General Public License for more details.
18 *
19 *  You should have received a copy of the GNU General Public License
20 *  along with this program (see the file COPYING included with this
21 *  distribution); if not, write to the Free Software Foundation, Inc.,
22 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24
25#ifdef HAVE_CONFIG_H
26#include "config.h"
27#elif defined(_MSC_VER)
28#include "config-msvc.h"
29#endif
30
31#include "syshead.h"
32
33#include "buffer.h"
34#include "error.h"
35#include "integer.h"
36#include "event.h"
37#include "fdmisc.h"
38
39#include "memdbg.h"
40
41/*
42 * Some OSes will prefer select() over poll()
43 * when both are available.
44 */
45#if defined(TARGET_DARWIN)
46#define SELECT_PREFERRED_OVER_POLL
47#endif
48
49/*
50 * All non-windows OSes are assumed to have select()
51 */
52#ifdef WIN32
53#define SELECT 0
54#else
55#define SELECT 1
56#endif
57
58/*
59 * This should be set to the highest file descriptor
60 * which can be used in one of the FD_ macros.
61 */
62#ifdef FD_SETSIZE
63#define SELECT_MAX_FDS FD_SETSIZE
64#else
65#define SELECT_MAX_FDS 256
66#endif
67
68static inline int
69tv_to_ms_timeout (const struct timeval *tv)
70{
71  if (tv->tv_sec == 0 && tv->tv_usec == 0)
72    return 0;
73  else
74    return max_int (tv->tv_sec * 1000 + (tv->tv_usec + 500) / 1000, 1);
75}
76
77#ifdef WIN32
78
79struct we_set
80{
81  struct event_set_functions func;
82  bool fast;
83  HANDLE *events;
84  struct event_set_return *esr;
85  int n_events;
86  int capacity;
87};
88
89static inline void
90we_set_event (struct we_set *wes, int i, event_t event, unsigned int rwflags, void *arg)
91{
92  ASSERT (i >= 0 && i < wes->capacity);
93
94  if (rwflags == EVENT_READ)
95    {
96      ASSERT (event->read != NULL);
97      wes->events[i] = event->read;
98    }
99  else if (rwflags == EVENT_WRITE)
100    {
101      ASSERT (event->write != NULL);
102      wes->events[i] = event->write;
103    }
104  else
105    msg (M_FATAL, "fatal error in we_set_events: rwflags=%d", rwflags);
106
107  wes->esr[i].rwflags = rwflags;
108  wes->esr[i].arg = arg;
109}
110
111static inline bool
112we_append_event (struct we_set *wes, event_t event, unsigned int rwflags, void *arg)
113{
114  if (rwflags & EVENT_WRITE)
115    {
116      if (wes->n_events < wes->capacity)
117	{
118	  we_set_event (wes, wes->n_events, event, EVENT_WRITE, arg);
119	  ++wes->n_events;
120	}
121      else
122	return false;
123    }
124  if (rwflags & EVENT_READ)
125    {
126      if (wes->n_events < wes->capacity)
127	{
128	  we_set_event (wes, wes->n_events, event, EVENT_READ, arg);
129	  ++wes->n_events;
130	}
131      else
132	return false;
133    }
134  return true;
135}
136
137static void
138we_del_event (struct we_set *wes, event_t event)
139{
140  int i, j = 0;
141  const int len = wes->n_events;
142
143  for (i = 0; i < len; ++i)
144    {
145      const HANDLE h = wes->events[i];
146      if (h == event->read || h == event->write)
147	--wes->n_events;
148      else
149	{
150	  if (i != j)
151	    {
152	      wes->events[j] = wes->events[i];
153	      wes->esr[j] = wes->esr[i];
154	    }
155	  ++j;
156	}
157    }
158}
159
160static void
161we_del_index (struct we_set *wes, int index)
162{
163  int i;
164  ASSERT (index >= 0 && index < wes->n_events);
165  for (i = index; i < wes->n_events - 1; ++i)
166    {
167      wes->events[i] = wes->events[i+1];
168      wes->esr[i] = wes->esr[i+1];
169    }
170  --wes->n_events;
171}
172
173static void
174we_get_rw_indices (struct we_set *wes, event_t event, int *ri, int *wi)
175{
176  int i;
177  *ri = *wi = -1;
178  for (i = 0; i < wes->n_events; ++i)
179    {
180      const HANDLE h = wes->events[i];
181      if (h == event->read)
182	{
183	  ASSERT (*ri == -1);
184	  *ri = i;
185	}
186      else if (h == event->write)
187	{
188	  ASSERT (*wi == -1);
189	  *wi = i;
190	}
191    }
192}
193
194static void
195we_free (struct event_set *es)
196{
197  struct we_set *wes = (struct we_set *) es;
198  free (wes->events);
199  free (wes->esr);
200  free (wes);
201}
202
203static void
204we_reset (struct event_set *es)
205{
206  struct we_set *wes = (struct we_set *) es;
207  ASSERT (wes->fast);
208  wes->n_events = 0;
209}
210
211static void
212we_del (struct event_set *es, event_t event)
213{
214  struct we_set *wes = (struct we_set *) es;
215  ASSERT (!wes->fast);
216  we_del_event (wes, event);
217}
218
219static void
220we_ctl (struct event_set *es, event_t event, unsigned int rwflags, void *arg)
221{
222  struct we_set *wes = (struct we_set *) es;
223
224  dmsg (D_EVENT_WAIT, "WE_CTL n=%d ev=%p rwflags=0x%04x arg=" ptr_format,
225       wes->n_events,
226       event,
227       rwflags,
228       (ptr_type)arg);
229
230  if (wes->fast)
231    {
232      if (!we_append_event (wes, event, rwflags, arg))
233	goto err;
234    }
235  else
236    {
237      int ri, wi;
238      int one = -1;
239      int n = 0;
240
241      we_get_rw_indices (wes, event, &ri, &wi);
242      if (wi >= 0)
243	{
244	  one = wi;
245	  ++n;
246	}
247      if (ri >= 0)
248	{
249	  one = ri;
250	  ++n;
251	}
252      switch (rwflags)
253	{
254	case 0:
255	  switch (n)
256	    {
257	    case 0:
258	      break;
259	    case 1:
260	      we_del_index (wes, one);
261	      break;
262	    case 2:
263	      we_del_event (wes, event);
264	      break;
265	    default:
266	      ASSERT (0);
267	    }
268	  break;
269	case EVENT_READ:
270	  switch (n)
271	    {
272	    case 0:
273	      if (!we_append_event (wes, event, EVENT_READ, arg))
274		goto err;
275	      break;
276	    case 1:
277	      we_set_event (wes, one, event, EVENT_READ, arg);
278	      break;
279	    case 2:
280	      we_del_index (wes, wi);
281	      break;
282	    default:
283	      ASSERT (0);
284	    }
285	  break;
286	case EVENT_WRITE:
287	  switch (n)
288	    {
289	    case 0:
290	      if (!we_append_event (wes, event, EVENT_WRITE, arg))
291		goto err;
292	      break;
293	    case 1:
294	      we_set_event (wes, one, event, EVENT_WRITE, arg);
295	      break;
296	    case 2:
297	      we_del_index (wes, ri);
298	      break;
299	    default:
300	      ASSERT (0);
301	    }
302	  break;
303	case EVENT_READ|EVENT_WRITE:
304	  switch (n)
305	    {
306	    case 0:
307	      if (!we_append_event (wes, event, EVENT_READ|EVENT_WRITE, arg))
308		goto err;
309	      break;
310	    case 1:
311	      if (ri == -1)
312		{
313		  ASSERT (wi != -1);
314		  if (!we_append_event (wes, event, EVENT_READ, arg))
315		    goto err;
316		}
317	      else if (wi == -1)
318		{
319		  if (!we_append_event (wes, event, EVENT_WRITE, arg))
320		    goto err;
321		}
322	      else
323		ASSERT (0);
324	      break;
325	    case 2:
326	      break;
327	    default:
328	      ASSERT (0);
329	    }
330	  break;
331	default:
332	  msg (M_FATAL, "fatal error in we_ctl: rwflags=%d", rwflags);
333	}
334    }
335  return;
336
337 err:
338  msg (D_EVENT_ERRORS, "Error: Windows resource limit WSA_MAXIMUM_WAIT_EVENTS (%d) has been exceeded", WSA_MAXIMUM_WAIT_EVENTS);
339}
340
341static int
342we_wait (struct event_set *es, const struct timeval *tv, struct event_set_return *out, int outlen)
343{
344  struct we_set *wes = (struct we_set *) es;
345  const int timeout = tv_to_ms_timeout (tv);
346  DWORD status;
347
348  dmsg (D_EVENT_WAIT, "WE_WAIT enter n=%d to=%d", wes->n_events, timeout);
349
350#ifdef ENABLE_DEBUG
351  if (check_debug_level (D_EVENT_WAIT)) {
352    int i;
353    for (i = 0; i < wes->n_events; ++i)
354      dmsg (D_EVENT_WAIT, "[%d] ev=%p rwflags=0x%04x arg=" ptr_format,
355       i,
356       wes->events[i],
357       wes->esr[i].rwflags,
358       (ptr_type)wes->esr[i].arg);
359  }
360#endif
361
362  /*
363   * First poll our event list with 0 timeout
364   */
365  status = WSAWaitForMultipleEvents(
366    (DWORD) wes->n_events,
367    wes->events,
368    FALSE,
369    (DWORD) 0,
370    FALSE);
371
372  /*
373   * If at least one event is already set, we must
374   * individually poll the whole list.
375   */
376  if (status >= WSA_WAIT_EVENT_0 && status < WSA_WAIT_EVENT_0 + (DWORD) wes->n_events)
377    {
378      int i;
379      int j = 0;
380      for (i = 0; i < wes->n_events; ++i)
381	{
382	  if (j >= outlen)
383	    break;
384	  if (WaitForSingleObject (wes->events[i], 0) == WAIT_OBJECT_0)
385	    {
386	      *out = wes->esr[i];
387	      dmsg (D_EVENT_WAIT, "WE_WAIT leave [%d,%d] rwflags=0x%04x arg=" ptr_format,
388		   i, j, out->rwflags, (ptr_type)out->arg);
389	      ++j;
390	      ++out;
391	    }
392	}
393      return j;
394    }
395  else
396    {
397      /*
398       * If caller specified timeout > 0, we know at this point
399       * that no events are set, so wait only for the first event
400       * (or timeout) and return at most one event_set_return object.
401       *
402       * If caller specified timeout == 0, the second call to
403       * WSAWaitForMultipleEvents would be redundant -- just
404       * return 0 indicating timeout.
405       */
406      if (timeout > 0)
407	status = WSAWaitForMultipleEvents(
408	  (DWORD) wes->n_events,
409	  wes->events,
410	  FALSE,
411	  (DWORD) timeout,
412	  FALSE);
413
414      if (outlen >= 1 && status >= WSA_WAIT_EVENT_0 && status < WSA_WAIT_EVENT_0 + (DWORD) wes->n_events)
415	{
416	  *out = wes->esr[status - WSA_WAIT_EVENT_0];
417	  dmsg (D_EVENT_WAIT, "WE_WAIT leave rwflags=0x%04x arg=" ptr_format,
418	       out->rwflags, (ptr_type)out->arg);
419	  return 1;
420	}
421      else if (status == WSA_WAIT_TIMEOUT)
422	return 0;
423      else
424	return -1;
425    }
426}
427
428static struct event_set *
429we_init (int *maxevents, unsigned int flags)
430{
431  struct we_set *wes;
432
433  dmsg (D_EVENT_WAIT, "WE_INIT maxevents=%d flags=0x%08x", *maxevents, flags);
434
435  ALLOC_OBJ_CLEAR (wes, struct we_set);
436
437  /* set dispatch functions */
438  wes->func.free = we_free;
439  wes->func.reset = we_reset;
440  wes->func.del = we_del;
441  wes->func.ctl = we_ctl;
442  wes->func.wait = we_wait;
443
444  if (flags & EVENT_METHOD_FAST)
445    wes->fast = true;
446  wes->n_events = 0;
447
448  /* Figure our event capacity */
449  ASSERT (*maxevents > 0);
450  wes->capacity = min_int (*maxevents * 2, WSA_MAXIMUM_WAIT_EVENTS);
451  *maxevents = min_int (*maxevents, WSA_MAXIMUM_WAIT_EVENTS);
452
453  /* Allocate space for Win32 event handles */
454  ALLOC_ARRAY_CLEAR (wes->events, HANDLE, wes->capacity);
455
456  /* Allocate space for event_set_return objects */
457  ALLOC_ARRAY_CLEAR (wes->esr, struct event_set_return, wes->capacity);
458
459  dmsg (D_EVENT_WAIT, "WE_INIT maxevents=%d capacity=%d",
460       *maxevents, wes->capacity);
461
462  return (struct event_set *) wes;
463}
464
465#endif /* WIN32 */
466
467#if EPOLL
468
469struct ep_set
470{
471  struct event_set_functions func;
472  bool fast;
473  int epfd;
474  int maxevents;
475  struct epoll_event *events;
476};
477
478static void
479ep_free (struct event_set *es)
480{
481  struct ep_set *eps = (struct ep_set *) es;
482  close (eps->epfd);
483  free (eps->events);
484  free (eps);
485}
486
487static void
488ep_reset (struct event_set *es)
489{
490  const struct ep_set *eps = (struct ep_set *) es;
491  ASSERT (eps->fast);
492}
493
494static void
495ep_del (struct event_set *es, event_t event)
496{
497  struct epoll_event ev;
498  struct ep_set *eps = (struct ep_set *) es;
499
500  dmsg (D_EVENT_WAIT, "EP_DEL ev=%d", (int)event);
501
502  ASSERT (!eps->fast);
503  CLEAR (ev);
504  epoll_ctl (eps->epfd, EPOLL_CTL_DEL, event, &ev);
505}
506
507static void
508ep_ctl (struct event_set *es, event_t event, unsigned int rwflags, void *arg)
509{
510  struct ep_set *eps = (struct ep_set *) es;
511  struct epoll_event ev;
512
513  CLEAR (ev);
514
515  ev.data.ptr = arg;
516  if (rwflags & EVENT_READ)
517    ev.events |= EPOLLIN;
518  if (rwflags & EVENT_WRITE)
519    ev.events |= EPOLLOUT;
520
521  dmsg (D_EVENT_WAIT, "EP_CTL fd=%d rwflags=0x%04x ev=0x%08x arg=" ptr_format,
522       (int)event,
523       rwflags,
524       (unsigned int)ev.events,
525       (ptr_type)ev.data.ptr);
526
527  if (epoll_ctl (eps->epfd, EPOLL_CTL_MOD, event, &ev) < 0)
528    {
529      if (errno == ENOENT)
530	{
531	  if (epoll_ctl (eps->epfd, EPOLL_CTL_ADD, event, &ev) < 0)
532	    msg (M_ERR, "EVENT: epoll_ctl EPOLL_CTL_ADD failed, sd=%d", (int)event);
533	}
534      else
535	msg (M_ERR, "EVENT: epoll_ctl EPOLL_CTL_MOD failed, sd=%d", (int)event);
536    }
537}
538
539static int
540ep_wait (struct event_set *es, const struct timeval *tv, struct event_set_return *out, int outlen)
541{
542  struct ep_set *eps = (struct ep_set *) es;
543  int stat;
544
545  if (outlen > eps->maxevents)
546    outlen = eps->maxevents;
547
548  stat = epoll_wait (eps->epfd, eps->events, outlen, tv_to_ms_timeout (tv));
549  ASSERT (stat <= outlen);
550
551  if (stat > 0)
552    {
553      int i;
554      const struct epoll_event *ev = eps->events;
555      struct event_set_return *esr = out;
556      for (i = 0; i < stat; ++i)
557	{
558	  esr->rwflags = 0;
559	  if (ev->events & (EPOLLIN|EPOLLPRI|EPOLLERR|EPOLLHUP))
560	    esr->rwflags |= EVENT_READ;
561	  if (ev->events & EPOLLOUT)
562	    esr->rwflags |= EVENT_WRITE;
563	  esr->arg = ev->data.ptr;
564	  dmsg (D_EVENT_WAIT, "EP_WAIT[%d] rwflags=0x%04x ev=0x%08x arg=" ptr_format,
565	       i, esr->rwflags, ev->events, (ptr_type)ev->data.ptr);
566	  ++ev;
567	  ++esr;
568	}
569    }
570  return stat;
571}
572
573static struct event_set *
574ep_init (int *maxevents, unsigned int flags)
575{
576  struct ep_set *eps;
577  int fd;
578
579  dmsg (D_EVENT_WAIT, "EP_INIT maxevents=%d flags=0x%08x", *maxevents, flags);
580
581  /* open epoll file descriptor */
582  fd = epoll_create (*maxevents);
583  if (fd < 0)
584    return NULL;
585
586  set_cloexec (fd);
587
588  ALLOC_OBJ_CLEAR (eps, struct ep_set);
589
590  /* set dispatch functions */
591  eps->func.free = ep_free;
592  eps->func.reset = ep_reset;
593  eps->func.del = ep_del;
594  eps->func.ctl = ep_ctl;
595  eps->func.wait = ep_wait;
596
597  /* fast method ("sort of") corresponds to epoll one-shot */
598  if (flags & EVENT_METHOD_FAST)
599    eps->fast = true;
600
601  /* allocate space for epoll_wait return */
602  ASSERT (*maxevents > 0);
603  eps->maxevents = *maxevents;
604  ALLOC_ARRAY_CLEAR (eps->events, struct epoll_event, eps->maxevents);
605
606  /* set epoll control fd */
607  eps->epfd = fd;
608
609  return (struct event_set *) eps;
610}
611#endif /* EPOLL */
612
613#if POLL
614
615struct po_set
616{
617  struct event_set_functions func;
618  bool fast;
619  struct pollfd *events;
620  void **args;
621  int n_events;
622  int capacity;
623};
624
625static void
626po_free (struct event_set *es)
627{
628  struct po_set *pos = (struct po_set *) es;
629  free (pos->events);
630  free (pos->args);
631  free (pos);
632}
633
634static void
635po_reset (struct event_set *es)
636{
637  struct po_set *pos = (struct po_set *) es;
638  ASSERT (pos->fast);
639  pos->n_events = 0;
640}
641
642static void
643po_del (struct event_set *es, event_t event)
644{
645  struct po_set *pos = (struct po_set *) es;
646  int i;
647
648  dmsg (D_EVENT_WAIT, "PO_DEL ev=%d", (int)event);
649
650  ASSERT (!pos->fast);
651  for (i = 0; i < pos->n_events; ++i)
652    {
653      if (pos->events[i].fd == event)
654	{
655	  int j;
656	  for (j = i; j < pos->n_events - 1; ++j)
657	    {
658	      pos->events[j] = pos->events[j+1];
659	      pos->args[j] = pos->args[j+1];
660	    }
661	  --pos->n_events;
662	  break;
663	}
664    }
665}
666
667static inline void
668po_set_pollfd_events (struct pollfd *pfdp, unsigned int rwflags)
669{
670  pfdp->events = 0;
671  if (rwflags & EVENT_WRITE)
672    pfdp->events |= POLLOUT;
673  if (rwflags & EVENT_READ)
674    pfdp->events |= (POLLIN|POLLPRI);
675}
676
677static inline bool
678po_append_event (struct po_set *pos, event_t event, unsigned int rwflags, void *arg)
679{
680  if (pos->n_events < pos->capacity)
681    {
682      struct pollfd *pfdp = &pos->events[pos->n_events];
683      pfdp->fd = event;
684      pos->args[pos->n_events] = arg;
685      po_set_pollfd_events (pfdp, rwflags);
686      ++pos->n_events;
687      return true;
688    }
689  else
690    return false;
691}
692
693static void
694po_ctl (struct event_set *es, event_t event, unsigned int rwflags, void *arg)
695{
696  struct po_set *pos = (struct po_set *) es;
697
698  dmsg (D_EVENT_WAIT, "PO_CTL rwflags=0x%04x ev=%d arg=" ptr_format,
699       rwflags, (int)event, (ptr_type)arg);
700
701  if (pos->fast)
702    {
703      if (!po_append_event (pos, event, rwflags, arg))
704	goto err;
705    }
706  else
707    {
708      int i;
709      for (i = 0; i < pos->n_events; ++i)
710	{
711	  struct pollfd *pfdp = &pos->events[i];
712	  if (pfdp->fd == event)
713	    {
714	      pos->args[i] = arg;
715	      po_set_pollfd_events (pfdp, rwflags);
716	      goto done;
717	    }
718	}
719      if (!po_append_event (pos, event, rwflags, arg))
720	goto err;
721    }
722
723 done:
724  return;
725
726 err:
727  msg (D_EVENT_ERRORS, "Error: poll: too many I/O wait events");
728}
729
730static int
731po_wait (struct event_set *es, const struct timeval *tv, struct event_set_return *out, int outlen)
732{
733  struct po_set *pos = (struct po_set *) es;
734  int stat;
735
736  stat = poll (pos->events, pos->n_events, tv_to_ms_timeout (tv));
737
738  ASSERT (stat <= pos->n_events);
739
740  if (stat > 0)
741    {
742      int i, j=0;
743      const struct pollfd *pfdp = pos->events;
744      for (i = 0; i < pos->n_events && j < outlen; ++i)
745	{
746	  if (pfdp->revents & (POLLIN|POLLPRI|POLLERR|POLLHUP|POLLOUT))
747	    {
748	      out->rwflags = 0;
749	      if (pfdp->revents & (POLLIN|POLLPRI|POLLERR|POLLHUP))
750		out->rwflags |= EVENT_READ;
751	      if (pfdp->revents & POLLOUT)
752		out->rwflags |= EVENT_WRITE;
753	      out->arg = pos->args[i];
754	      dmsg (D_EVENT_WAIT, "PO_WAIT[%d,%d] fd=%d rev=0x%08x rwflags=0x%04x arg=" ptr_format " %s",
755		   i, j, pfdp->fd, pfdp->revents, out->rwflags, (ptr_type)out->arg, pos->fast ? "" : "[scalable]");
756	      ++out;
757	      ++j;
758	    }
759	  else if (pfdp->revents)
760	    {
761	      msg (D_EVENT_ERRORS, "Error: poll: unknown revents=0x%04x", (unsigned int)pfdp->revents);
762	    }
763	  ++pfdp;
764	}
765      return j;
766    }
767  return stat;
768}
769
770static struct event_set *
771po_init (int *maxevents, unsigned int flags)
772{
773  struct po_set *pos;
774
775  dmsg (D_EVENT_WAIT, "PO_INIT maxevents=%d flags=0x%08x", *maxevents, flags);
776
777  ALLOC_OBJ_CLEAR (pos, struct po_set);
778
779  /* set dispatch functions */
780  pos->func.free = po_free;
781  pos->func.reset = po_reset;
782  pos->func.del = po_del;
783  pos->func.ctl = po_ctl;
784  pos->func.wait = po_wait;
785
786  if (flags & EVENT_METHOD_FAST)
787    pos->fast = true;
788
789  pos->n_events = 0;
790
791  /* Figure our event capacity */
792  ASSERT (*maxevents > 0);
793  pos->capacity = *maxevents;
794
795  /* Allocate space for pollfd structures to be passed to poll() */
796  ALLOC_ARRAY_CLEAR (pos->events, struct pollfd, pos->capacity);
797
798  /* Allocate space for event_set_return objects */
799  ALLOC_ARRAY_CLEAR (pos->args, void *, pos->capacity);
800
801  return (struct event_set *) pos;
802}
803#endif /* POLL */
804
805#if SELECT
806
807struct se_set
808{
809  struct event_set_functions func;
810  bool fast;
811  fd_set readfds;
812  fd_set writefds;
813  void **args;  /* allocated to capacity size */
814  int maxfd;    /* largest fd seen so far, always < capacity */
815  int capacity; /* fixed largest fd + 1 */
816};
817
818static void
819se_free (struct event_set *es)
820{
821  struct se_set *ses = (struct se_set *) es;
822  free (ses->args);
823  free (ses);
824}
825
826static void
827se_reset (struct event_set *es)
828{
829  struct se_set *ses = (struct se_set *) es;
830  int i;
831  ASSERT (ses->fast);
832
833  dmsg (D_EVENT_WAIT, "SE_RESET");
834
835  FD_ZERO (&ses->readfds);
836  FD_ZERO (&ses->writefds);
837  for (i = 0; i <= ses->maxfd; ++i)
838    ses->args[i] = NULL;
839  ses->maxfd = -1;
840}
841
842static void
843se_del (struct event_set *es, event_t event)
844{
845  struct se_set *ses = (struct se_set *) es;
846  ASSERT (!ses->fast);
847
848  dmsg (D_EVENT_WAIT, "SE_DEL ev=%d", (int)event);
849
850  if (event >= 0 && event < ses->capacity)
851    {
852      FD_CLR (event, &ses->readfds);
853      FD_CLR (event, &ses->writefds);
854      ses->args[event] = NULL;
855    }
856  else
857    msg (D_EVENT_ERRORS, "Error: select/se_del: too many I/O wait events");
858  return;
859}
860
861static void
862se_ctl (struct event_set *es, event_t event, unsigned int rwflags, void *arg)
863{
864  struct se_set *ses = (struct se_set *) es;
865
866  dmsg (D_EVENT_WAIT, "SE_CTL rwflags=0x%04x ev=%d fast=%d cap=%d maxfd=%d arg=" ptr_format,
867       rwflags, (int)event, (int)ses->fast, ses->capacity, ses->maxfd, (ptr_type)arg);
868
869  if (event >= 0 && event < ses->capacity)
870    {
871      ses->maxfd = max_int (event, ses->maxfd);
872      ses->args[event] = arg;
873      if (ses->fast)
874	{
875	  if (rwflags & EVENT_READ)
876	    FD_SET (event, &ses->readfds);
877	  if (rwflags & EVENT_WRITE)
878	    FD_SET (event, &ses->writefds);
879	}
880      else
881	{
882	  if (rwflags & EVENT_READ)
883	    FD_SET (event, &ses->readfds);
884	  else
885	    FD_CLR (event, &ses->readfds);
886	  if (rwflags & EVENT_WRITE)
887	    FD_SET (event, &ses->writefds);
888	  else
889	    FD_CLR (event, &ses->writefds);
890	}
891    }
892  else
893    {
894      msg (D_EVENT_ERRORS, "Error: select: too many I/O wait events, fd=%d cap=%d",
895	   (int) event,
896	   ses->capacity);
897    }
898}
899
900static int
901se_wait_return (struct se_set *ses,
902		fd_set *read,
903		fd_set *write,
904		struct event_set_return *out,
905		int outlen)
906{
907  int i, j = 0;
908  for (i = 0; i <= ses->maxfd && j < outlen; ++i)
909    {
910      const bool r = FD_ISSET (i, read);
911      const bool w = FD_ISSET (i, write);
912      if (r || w)
913	{
914	  out->rwflags = 0;
915	  if (r)
916	    out->rwflags |= EVENT_READ;
917	  if (w)
918	    out->rwflags |= EVENT_WRITE;
919	  out->arg = ses->args[i];
920	  dmsg (D_EVENT_WAIT, "SE_WAIT[%d,%d] rwflags=0x%04x arg=" ptr_format,
921	       i, j, out->rwflags, (ptr_type)out->arg);
922	  ++out;
923	  ++j;
924	}
925    }
926  return j;
927}
928
929static int
930se_wait_fast (struct event_set *es, const struct timeval *tv, struct event_set_return *out, int outlen)
931{
932  struct se_set *ses = (struct se_set *) es;
933  struct timeval tv_tmp = *tv;
934  int stat;
935
936  dmsg (D_EVENT_WAIT, "SE_WAIT_FAST maxfd=%d tv=%d/%d",
937	ses->maxfd,
938	(int)tv_tmp.tv_sec,
939	(int)tv_tmp.tv_usec);
940
941  stat = select (ses->maxfd + 1, &ses->readfds, &ses->writefds, NULL, &tv_tmp);
942
943  if (stat > 0)
944    stat = se_wait_return (ses, &ses->readfds, &ses->writefds, out, outlen);
945
946  return stat;
947}
948
949static int
950se_wait_scalable (struct event_set *es, const struct timeval *tv, struct event_set_return *out, int outlen)
951{
952  struct se_set *ses = (struct se_set *) es;
953  struct timeval tv_tmp = *tv;
954  fd_set read = ses->readfds;
955  fd_set write = ses->writefds;
956  int stat;
957
958  dmsg (D_EVENT_WAIT, "SE_WAIT_SCALEABLE maxfd=%d tv=%d/%d",
959	ses->maxfd, (int)tv_tmp.tv_sec, (int)tv_tmp.tv_usec);
960
961  stat = select (ses->maxfd + 1, &read, &write, NULL, &tv_tmp);
962
963  if (stat > 0)
964    stat = se_wait_return (ses, &read, &write, out, outlen);
965
966  return stat;
967}
968
969static struct event_set *
970se_init (int *maxevents, unsigned int flags)
971{
972  struct se_set *ses;
973
974  dmsg (D_EVENT_WAIT, "SE_INIT maxevents=%d flags=0x%08x", *maxevents, flags);
975
976  ALLOC_OBJ_CLEAR (ses, struct se_set);
977
978  /* set dispatch functions */
979  ses->func.free = se_free;
980  ses->func.reset = se_reset;
981  ses->func.del = se_del;
982  ses->func.ctl = se_ctl;
983  ses->func.wait = se_wait_scalable;
984
985  if (flags & EVENT_METHOD_FAST)
986    {
987      ses->fast = true;
988      ses->func.wait = se_wait_fast;
989    }
990
991  /* Select needs to be passed this value + 1 */
992  ses->maxfd = -1;
993
994  /* Set our event capacity */
995  ASSERT (*maxevents > 0);
996  *maxevents = min_int (*maxevents, SELECT_MAX_FDS);
997  ses->capacity = SELECT_MAX_FDS;
998
999  /* Allocate space for event_set_return void * args */
1000  ALLOC_ARRAY_CLEAR (ses->args, void *, ses->capacity);
1001
1002  return (struct event_set *) ses;
1003}
1004#endif /* SELECT */
1005
1006static struct event_set *
1007event_set_init_simple (int *maxevents, unsigned int flags)
1008{
1009  struct event_set *ret = NULL;
1010#ifdef WIN32
1011  ret = we_init (maxevents, flags);
1012#elif POLL && SELECT
1013#if 0 /* Define to 1 if EVENT_METHOD_US_TIMEOUT should cause select to be favored over poll */
1014  if (flags & EVENT_METHOD_US_TIMEOUT)
1015    ret = se_init (maxevents, flags);
1016#endif
1017# ifdef SELECT_PREFERRED_OVER_POLL
1018   if (!ret)
1019     ret = se_init (maxevents, flags);
1020   if (!ret)
1021     ret = po_init (maxevents, flags);
1022# else
1023   if (!ret)
1024     ret = po_init (maxevents, flags);
1025   if (!ret)
1026     ret = se_init (maxevents, flags);
1027# endif
1028#elif POLL
1029  ret = po_init (maxevents, flags);
1030#elif SELECT
1031  ret = se_init (maxevents, flags);
1032#else
1033#error At least one of poll, select, or WSAWaitForMultipleEvents must be supported by the kernel
1034#endif
1035  ASSERT (ret);
1036  return ret;
1037}
1038
1039static struct event_set *
1040event_set_init_scalable (int *maxevents, unsigned int flags)
1041{
1042  struct event_set *ret = NULL;
1043#if EPOLL
1044  ret = ep_init (maxevents, flags);
1045  if (!ret)
1046    {
1047      msg (M_WARN, "Note: sys_epoll API is unavailable, falling back to poll/select API");
1048      ret = event_set_init_simple (maxevents, flags);
1049    }
1050#else
1051  ret = event_set_init_simple (maxevents, flags);
1052#endif
1053  ASSERT (ret);
1054  return ret;
1055}
1056
1057struct event_set *
1058event_set_init (int *maxevents, unsigned int flags)
1059{
1060  if (flags & EVENT_METHOD_FAST)
1061    return event_set_init_simple (maxevents, flags);
1062  else
1063    return event_set_init_scalable (maxevents, flags);
1064}
1065