1/* Thread management routine
2 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING.  If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22/* #define DEBUG */
23
24#include <zebra.h>
25
26#include "thread.h"
27#include "memory.h"
28#include "log.h"
29#include "hash.h"
30#include "pqueue.h"
31#include "command.h"
32#include "sigevent.h"
33
34#if defined HAVE_SNMP && defined SNMP_AGENTX
35#include <net-snmp/net-snmp-config.h>
36#include <net-snmp/net-snmp-includes.h>
37#include <net-snmp/agent/net-snmp-agent-includes.h>
38#include <net-snmp/agent/snmp_vars.h>
39
40extern int agentx_enabled;
41#endif
42
43#if defined(__APPLE__)
44#include <mach/mach.h>
45#include <mach/mach_time.h>
46#endif
47
48
49/* Recent absolute time of day */
50struct timeval recent_time;
51static struct timeval last_recent_time;
52/* Relative time, since startup */
53static struct timeval relative_time;
54static struct timeval relative_time_base;
55/* init flag */
56static unsigned short timers_inited;
57
58static struct hash *cpu_record = NULL;
59
60/* Struct timeval's tv_usec one second value.  */
61#define TIMER_SECOND_MICRO 1000000L
62
63/* Adjust so that tv_usec is in the range [0,TIMER_SECOND_MICRO).
64   And change negative values to 0. */
65static struct timeval
66timeval_adjust (struct timeval a)
67{
68  while (a.tv_usec >= TIMER_SECOND_MICRO)
69    {
70      a.tv_usec -= TIMER_SECOND_MICRO;
71      a.tv_sec++;
72    }
73
74  while (a.tv_usec < 0)
75    {
76      a.tv_usec += TIMER_SECOND_MICRO;
77      a.tv_sec--;
78    }
79
80  if (a.tv_sec < 0)
81      /* Change negative timeouts to 0. */
82      a.tv_sec = a.tv_usec = 0;
83
84  return a;
85}
86
87static struct timeval
88timeval_subtract (struct timeval a, struct timeval b)
89{
90  struct timeval ret;
91
92  ret.tv_usec = a.tv_usec - b.tv_usec;
93  ret.tv_sec = a.tv_sec - b.tv_sec;
94
95  return timeval_adjust (ret);
96}
97
98static long
99timeval_cmp (struct timeval a, struct timeval b)
100{
101  return (a.tv_sec == b.tv_sec
102	  ? a.tv_usec - b.tv_usec : a.tv_sec - b.tv_sec);
103}
104
105unsigned long
106timeval_elapsed (struct timeval a, struct timeval b)
107{
108  return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
109	  + (a.tv_usec - b.tv_usec));
110}
111
112#if !defined(HAVE_CLOCK_MONOTONIC) && !defined(__APPLE__)
113static void
114quagga_gettimeofday_relative_adjust (void)
115{
116  struct timeval diff;
117  if (timeval_cmp (recent_time, last_recent_time) < 0)
118    {
119      relative_time.tv_sec++;
120      relative_time.tv_usec = 0;
121    }
122  else
123    {
124      diff = timeval_subtract (recent_time, last_recent_time);
125      relative_time.tv_sec += diff.tv_sec;
126      relative_time.tv_usec += diff.tv_usec;
127      relative_time = timeval_adjust (relative_time);
128    }
129  last_recent_time = recent_time;
130}
131#endif /* !HAVE_CLOCK_MONOTONIC && !__APPLE__ */
132
133/* gettimeofday wrapper, to keep recent_time updated */
134static int
135quagga_gettimeofday (struct timeval *tv)
136{
137  int ret;
138
139  assert (tv);
140
141  if (!(ret = gettimeofday (&recent_time, NULL)))
142    {
143      /* init... */
144      if (!timers_inited)
145        {
146          relative_time_base = last_recent_time = recent_time;
147          timers_inited = 1;
148        }
149      /* avoid copy if user passed recent_time pointer.. */
150      if (tv != &recent_time)
151        *tv = recent_time;
152      return 0;
153    }
154  return ret;
155}
156
157static int
158quagga_get_relative (struct timeval *tv)
159{
160  int ret;
161
162#ifdef HAVE_CLOCK_MONOTONIC
163  {
164    struct timespec tp;
165    if (!(ret = clock_gettime (CLOCK_MONOTONIC, &tp)))
166      {
167        relative_time.tv_sec = tp.tv_sec;
168        relative_time.tv_usec = tp.tv_nsec / 1000;
169      }
170  }
171#elif defined(__APPLE__)
172  {
173    uint64_t ticks;
174    uint64_t useconds;
175    static mach_timebase_info_data_t timebase_info;
176
177    ticks = mach_absolute_time();
178    if (timebase_info.denom == 0)
179      mach_timebase_info(&timebase_info);
180
181    useconds = ticks * timebase_info.numer / timebase_info.denom / 1000;
182    relative_time.tv_sec = useconds / 1000000;
183    relative_time.tv_usec = useconds % 1000000;
184
185    return 0;
186  }
187#else /* !HAVE_CLOCK_MONOTONIC && !__APPLE__ */
188  if (!(ret = quagga_gettimeofday (&recent_time)))
189    quagga_gettimeofday_relative_adjust();
190#endif /* HAVE_CLOCK_MONOTONIC */
191
192  if (tv)
193    *tv = relative_time;
194
195  return ret;
196}
197
198/* Get absolute time stamp, but in terms of the internal timer
199 * Could be wrong, but at least won't go back.
200 */
201static void
202quagga_real_stabilised (struct timeval *tv)
203{
204  *tv = relative_time_base;
205  tv->tv_sec += relative_time.tv_sec;
206  tv->tv_usec += relative_time.tv_usec;
207  *tv = timeval_adjust (*tv);
208}
209
210/* Exported Quagga timestamp function.
211 * Modelled on POSIX clock_gettime.
212 */
213int
214quagga_gettime (enum quagga_clkid clkid, struct timeval *tv)
215{
216  switch (clkid)
217    {
218      case QUAGGA_CLK_REALTIME:
219        return quagga_gettimeofday (tv);
220      case QUAGGA_CLK_MONOTONIC:
221        return quagga_get_relative (tv);
222      case QUAGGA_CLK_REALTIME_STABILISED:
223        quagga_real_stabilised (tv);
224        return 0;
225      default:
226        errno = EINVAL;
227        return -1;
228    }
229}
230
231/* time_t value in terms of stabilised absolute time.
232 * replacement for POSIX time()
233 */
234time_t
235quagga_time (time_t *t)
236{
237  struct timeval tv;
238  quagga_real_stabilised (&tv);
239  if (t)
240    *t = tv.tv_sec;
241  return tv.tv_sec;
242}
243
244/* Public export of recent_relative_time by value */
245struct timeval
246recent_relative_time (void)
247{
248  return relative_time;
249}
250
251static unsigned int
252cpu_record_hash_key (struct cpu_thread_history *a)
253{
254  return (uintptr_t) a->func;
255}
256
257static int
258cpu_record_hash_cmp (const struct cpu_thread_history *a,
259		     const struct cpu_thread_history *b)
260{
261  return a->func == b->func;
262}
263
264static void *
265cpu_record_hash_alloc (struct cpu_thread_history *a)
266{
267  struct cpu_thread_history *new;
268  new = XCALLOC (MTYPE_THREAD_STATS, sizeof (struct cpu_thread_history));
269  new->func = a->func;
270  new->funcname = a->funcname;
271  return new;
272}
273
274static void
275cpu_record_hash_free (void *a)
276{
277  struct cpu_thread_history *hist = a;
278
279  XFREE (MTYPE_THREAD_STATS, hist);
280}
281
282static void
283vty_out_cpu_thread_history(struct vty* vty,
284			   struct cpu_thread_history *a)
285{
286#ifdef HAVE_RUSAGE
287  vty_out(vty, "%7ld.%03ld %9d %8ld %9ld %8ld %9ld",
288	  a->cpu.total/1000, a->cpu.total%1000, a->total_calls,
289	  a->cpu.total/a->total_calls, a->cpu.max,
290	  a->real.total/a->total_calls, a->real.max);
291#else
292  vty_out(vty, "%7ld.%03ld %9d %8ld %9ld",
293	  a->real.total/1000, a->real.total%1000, a->total_calls,
294	  a->real.total/a->total_calls, a->real.max);
295#endif
296  vty_out(vty, " %c%c%c%c%c%c %s%s",
297	  a->types & (1 << THREAD_READ) ? 'R':' ',
298	  a->types & (1 << THREAD_WRITE) ? 'W':' ',
299	  a->types & (1 << THREAD_TIMER) ? 'T':' ',
300	  a->types & (1 << THREAD_EVENT) ? 'E':' ',
301	  a->types & (1 << THREAD_EXECUTE) ? 'X':' ',
302	  a->types & (1 << THREAD_BACKGROUND) ? 'B' : ' ',
303	  a->funcname, VTY_NEWLINE);
304}
305
306static void
307cpu_record_hash_print(struct hash_backet *bucket,
308		      void *args[])
309{
310  struct cpu_thread_history *totals = args[0];
311  struct vty *vty = args[1];
312  thread_type *filter = args[2];
313  struct cpu_thread_history *a = bucket->data;
314
315  a = bucket->data;
316  if ( !(a->types & *filter) )
317       return;
318  vty_out_cpu_thread_history(vty,a);
319  totals->total_calls += a->total_calls;
320  totals->real.total += a->real.total;
321  if (totals->real.max < a->real.max)
322    totals->real.max = a->real.max;
323#ifdef HAVE_RUSAGE
324  totals->cpu.total += a->cpu.total;
325  if (totals->cpu.max < a->cpu.max)
326    totals->cpu.max = a->cpu.max;
327#endif
328}
329
330static void
331cpu_record_print(struct vty *vty, thread_type filter)
332{
333  struct cpu_thread_history tmp;
334  void *args[3] = {&tmp, vty, &filter};
335
336  memset(&tmp, 0, sizeof tmp);
337  tmp.funcname = "TOTAL";
338  tmp.types = filter;
339
340#ifdef HAVE_RUSAGE
341  vty_out(vty, "%21s %18s %18s%s",
342  	  "", "CPU (user+system):", "Real (wall-clock):", VTY_NEWLINE);
343#endif
344  vty_out(vty, "Runtime(ms)   Invoked Avg uSec Max uSecs");
345#ifdef HAVE_RUSAGE
346  vty_out(vty, " Avg uSec Max uSecs");
347#endif
348  vty_out(vty, "  Type  Thread%s", VTY_NEWLINE);
349  hash_iterate(cpu_record,
350	       (void(*)(struct hash_backet*,void*))cpu_record_hash_print,
351	       args);
352
353  if (tmp.total_calls > 0)
354    vty_out_cpu_thread_history(vty, &tmp);
355}
356
357DEFUN(show_thread_cpu,
358      show_thread_cpu_cmd,
359      "show thread cpu [FILTER]",
360      SHOW_STR
361      "Thread information\n"
362      "Thread CPU usage\n"
363      "Display filter (rwtexb)\n")
364{
365  int i = 0;
366  thread_type filter = (thread_type) -1U;
367
368  if (argc > 0)
369    {
370      filter = 0;
371      while (argv[0][i] != '\0')
372	{
373	  switch ( argv[0][i] )
374	    {
375	    case 'r':
376	    case 'R':
377	      filter |= (1 << THREAD_READ);
378	      break;
379	    case 'w':
380	    case 'W':
381	      filter |= (1 << THREAD_WRITE);
382	      break;
383	    case 't':
384	    case 'T':
385	      filter |= (1 << THREAD_TIMER);
386	      break;
387	    case 'e':
388	    case 'E':
389	      filter |= (1 << THREAD_EVENT);
390	      break;
391	    case 'x':
392	    case 'X':
393	      filter |= (1 << THREAD_EXECUTE);
394	      break;
395	    case 'b':
396	    case 'B':
397	      filter |= (1 << THREAD_BACKGROUND);
398	      break;
399	    default:
400	      break;
401	    }
402	  ++i;
403	}
404      if (filter == 0)
405	{
406	  vty_out(vty, "Invalid filter \"%s\" specified,"
407                  " must contain at least one of 'RWTEXB'%s",
408		  argv[0], VTY_NEWLINE);
409	  return CMD_WARNING;
410	}
411    }
412
413  cpu_record_print(vty, filter);
414  return CMD_SUCCESS;
415}
416
417static void
418cpu_record_hash_clear (struct hash_backet *bucket,
419		      void *args)
420{
421  thread_type *filter = args;
422  struct cpu_thread_history *a = bucket->data;
423
424  a = bucket->data;
425  if ( !(a->types & *filter) )
426       return;
427
428  hash_release (cpu_record, bucket->data);
429}
430
431static void
432cpu_record_clear (thread_type filter)
433{
434  thread_type *tmp = &filter;
435  hash_iterate (cpu_record,
436	        (void (*) (struct hash_backet*,void*)) cpu_record_hash_clear,
437	        tmp);
438}
439
440DEFUN(clear_thread_cpu,
441      clear_thread_cpu_cmd,
442      "clear thread cpu [FILTER]",
443      "Clear stored data\n"
444      "Thread information\n"
445      "Thread CPU usage\n"
446      "Display filter (rwtexb)\n")
447{
448  int i = 0;
449  thread_type filter = (thread_type) -1U;
450
451  if (argc > 0)
452    {
453      filter = 0;
454      while (argv[0][i] != '\0')
455	{
456	  switch ( argv[0][i] )
457	    {
458	    case 'r':
459	    case 'R':
460	      filter |= (1 << THREAD_READ);
461	      break;
462	    case 'w':
463	    case 'W':
464	      filter |= (1 << THREAD_WRITE);
465	      break;
466	    case 't':
467	    case 'T':
468	      filter |= (1 << THREAD_TIMER);
469	      break;
470	    case 'e':
471	    case 'E':
472	      filter |= (1 << THREAD_EVENT);
473	      break;
474	    case 'x':
475	    case 'X':
476	      filter |= (1 << THREAD_EXECUTE);
477	      break;
478	    case 'b':
479	    case 'B':
480	      filter |= (1 << THREAD_BACKGROUND);
481	      break;
482	    default:
483	      break;
484	    }
485	  ++i;
486	}
487      if (filter == 0)
488	{
489	  vty_out(vty, "Invalid filter \"%s\" specified,"
490                  " must contain at least one of 'RWTEXB'%s",
491		  argv[0], VTY_NEWLINE);
492	  return CMD_WARNING;
493	}
494    }
495
496  cpu_record_clear (filter);
497  return CMD_SUCCESS;
498}
499
500static int
501thread_timer_cmp(void *a, void *b)
502{
503  struct thread *thread_a = a;
504  struct thread *thread_b = b;
505
506  long cmp = timeval_cmp(thread_a->u.sands, thread_b->u.sands);
507
508  if (cmp < 0)
509    return -1;
510  if (cmp > 0)
511    return 1;
512  return 0;
513}
514
515static void
516thread_timer_update(void *node, int actual_position)
517{
518  struct thread *thread = node;
519
520  thread->index = actual_position;
521}
522
523/* Allocate new thread master.  */
524struct thread_master *
525thread_master_create ()
526{
527  struct thread_master *rv;
528
529  if (cpu_record == NULL)
530    cpu_record
531      = hash_create ((unsigned int (*) (void *))cpu_record_hash_key,
532		     (int (*) (const void *, const void *))cpu_record_hash_cmp);
533
534  rv = XCALLOC (MTYPE_THREAD_MASTER, sizeof (struct thread_master));
535
536  /* Initialize the timer queues */
537  rv->timer = pqueue_create();
538  rv->background = pqueue_create();
539  rv->timer->cmp = rv->background->cmp = thread_timer_cmp;
540  rv->timer->update = rv->background->update = thread_timer_update;
541
542  return rv;
543}
544
545/* Add a new thread to the list.  */
546static void
547thread_list_add (struct thread_list *list, struct thread *thread)
548{
549  thread->next = NULL;
550  thread->prev = list->tail;
551  if (list->tail)
552    list->tail->next = thread;
553  else
554    list->head = thread;
555  list->tail = thread;
556  list->count++;
557}
558
559/* Delete a thread from the list. */
560static struct thread *
561thread_list_delete (struct thread_list *list, struct thread *thread)
562{
563  if (thread->next)
564    thread->next->prev = thread->prev;
565  else
566    list->tail = thread->prev;
567  if (thread->prev)
568    thread->prev->next = thread->next;
569  else
570    list->head = thread->next;
571  thread->next = thread->prev = NULL;
572  list->count--;
573  return thread;
574}
575
576/* Move thread to unuse list. */
577static void
578thread_add_unuse (struct thread_master *m, struct thread *thread)
579{
580  assert (m != NULL && thread != NULL);
581  assert (thread->next == NULL);
582  assert (thread->prev == NULL);
583  assert (thread->type == THREAD_UNUSED);
584  thread_list_add (&m->unuse, thread);
585}
586
587/* Free all unused thread. */
588static void
589thread_list_free (struct thread_master *m, struct thread_list *list)
590{
591  struct thread *t;
592  struct thread *next;
593
594  for (t = list->head; t; t = next)
595    {
596      next = t->next;
597      XFREE (MTYPE_THREAD, t);
598      list->count--;
599      m->alloc--;
600    }
601}
602
603static void
604thread_queue_free (struct thread_master *m, struct pqueue *queue)
605{
606  int i;
607
608  for (i = 0; i < queue->size; i++)
609    XFREE(MTYPE_THREAD, queue->array[i]);
610
611  m->alloc -= queue->size;
612  pqueue_delete(queue);
613}
614
615/* Stop thread scheduler. */
616void
617thread_master_free (struct thread_master *m)
618{
619  thread_list_free (m, &m->read);
620  thread_list_free (m, &m->write);
621  thread_queue_free (m, m->timer);
622  thread_list_free (m, &m->event);
623  thread_list_free (m, &m->ready);
624  thread_list_free (m, &m->unuse);
625  thread_queue_free (m, m->background);
626
627  XFREE (MTYPE_THREAD_MASTER, m);
628
629  if (cpu_record)
630    {
631      hash_clean (cpu_record, cpu_record_hash_free);
632      hash_free (cpu_record);
633      cpu_record = NULL;
634    }
635}
636
637/* Thread list is empty or not.  */
638static int
639thread_empty (struct thread_list *list)
640{
641  return  list->head ? 0 : 1;
642}
643
644/* Delete top of the list and return it. */
645static struct thread *
646thread_trim_head (struct thread_list *list)
647{
648  if (!thread_empty (list))
649    return thread_list_delete (list, list->head);
650  return NULL;
651}
652
653/* Return remain time in second. */
654unsigned long
655thread_timer_remain_second (struct thread *thread)
656{
657  quagga_get_relative (NULL);
658
659  if (thread->u.sands.tv_sec - relative_time.tv_sec > 0)
660    return thread->u.sands.tv_sec - relative_time.tv_sec;
661  else
662    return 0;
663}
664
665#define debugargdef  const char *funcname, const char *schedfrom, int fromln
666#define debugargpass funcname, schedfrom, fromln
667
668/* Get new thread.  */
669static struct thread *
670thread_get (struct thread_master *m, u_char type,
671	    int (*func) (struct thread *), void *arg, debugargdef)
672{
673  struct thread *thread = thread_trim_head (&m->unuse);
674
675  if (! thread)
676    {
677      thread = XCALLOC (MTYPE_THREAD, sizeof (struct thread));
678      m->alloc++;
679    }
680  thread->type = type;
681  thread->add_type = type;
682  thread->master = m;
683  thread->func = func;
684  thread->arg = arg;
685  thread->index = -1;
686
687  thread->funcname = funcname;
688  thread->schedfrom = schedfrom;
689  thread->schedfrom_line = fromln;
690
691  return thread;
692}
693
694/* Add new read thread. */
695struct thread *
696funcname_thread_add_read (struct thread_master *m,
697		 int (*func) (struct thread *), void *arg, int fd,
698		 debugargdef)
699{
700  struct thread *thread;
701
702  assert (m != NULL);
703
704  if (FD_ISSET (fd, &m->readfd))
705    {
706      zlog (NULL, LOG_WARNING, "There is already read fd [%d]", fd);
707      return NULL;
708    }
709
710  thread = thread_get (m, THREAD_READ, func, arg, debugargpass);
711  FD_SET (fd, &m->readfd);
712  thread->u.fd = fd;
713  thread_list_add (&m->read, thread);
714
715  return thread;
716}
717
718/* Add new write thread. */
719struct thread *
720funcname_thread_add_write (struct thread_master *m,
721		 int (*func) (struct thread *), void *arg, int fd,
722		 debugargdef)
723{
724  struct thread *thread;
725
726  assert (m != NULL);
727
728  if (FD_ISSET (fd, &m->writefd))
729    {
730      zlog (NULL, LOG_WARNING, "There is already write fd [%d]", fd);
731      return NULL;
732    }
733
734  thread = thread_get (m, THREAD_WRITE, func, arg, debugargpass);
735  FD_SET (fd, &m->writefd);
736  thread->u.fd = fd;
737  thread_list_add (&m->write, thread);
738
739  return thread;
740}
741
742static struct thread *
743funcname_thread_add_timer_timeval (struct thread_master *m,
744                                   int (*func) (struct thread *),
745                                  int type,
746                                  void *arg,
747                                  struct timeval *time_relative,
748				  debugargdef)
749{
750  struct thread *thread;
751  struct pqueue *queue;
752  struct timeval alarm_time;
753
754  assert (m != NULL);
755
756  assert (type == THREAD_TIMER || type == THREAD_BACKGROUND);
757  assert (time_relative);
758
759  queue = ((type == THREAD_TIMER) ? m->timer : m->background);
760  thread = thread_get (m, type, func, arg, debugargpass);
761
762  /* Do we need jitter here? */
763  quagga_get_relative (NULL);
764  alarm_time.tv_sec = relative_time.tv_sec + time_relative->tv_sec;
765  alarm_time.tv_usec = relative_time.tv_usec + time_relative->tv_usec;
766  thread->u.sands = timeval_adjust(alarm_time);
767
768  pqueue_enqueue(thread, queue);
769  return thread;
770}
771
772
773/* Add timer event thread. */
774struct thread *
775funcname_thread_add_timer (struct thread_master *m,
776		           int (*func) (struct thread *),
777		           void *arg, long timer,
778			   debugargdef)
779{
780  struct timeval trel;
781
782  assert (m != NULL);
783
784  trel.tv_sec = timer;
785  trel.tv_usec = 0;
786
787  return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER, arg,
788                                            &trel, debugargpass);
789}
790
791/* Add timer event thread with "millisecond" resolution */
792struct thread *
793funcname_thread_add_timer_msec (struct thread_master *m,
794                                int (*func) (struct thread *),
795                                void *arg, long timer,
796				debugargdef)
797{
798  struct timeval trel;
799
800  assert (m != NULL);
801
802  trel.tv_sec = timer / 1000;
803  trel.tv_usec = 1000*(timer % 1000);
804
805  return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER,
806                                            arg, &trel, debugargpass);
807}
808
809/* Add a background thread, with an optional millisec delay */
810struct thread *
811funcname_thread_add_background (struct thread_master *m,
812                                int (*func) (struct thread *),
813                                void *arg, long delay,
814				debugargdef)
815{
816  struct timeval trel;
817
818  assert (m != NULL);
819
820  if (delay)
821    {
822      trel.tv_sec = delay / 1000;
823      trel.tv_usec = 1000*(delay % 1000);
824    }
825  else
826    {
827      trel.tv_sec = 0;
828      trel.tv_usec = 0;
829    }
830
831  return funcname_thread_add_timer_timeval (m, func, THREAD_BACKGROUND,
832                                            arg, &trel, debugargpass);
833}
834
835/* Add simple event thread. */
836struct thread *
837funcname_thread_add_event (struct thread_master *m,
838		  int (*func) (struct thread *), void *arg, int val,
839		  debugargdef)
840{
841  struct thread *thread;
842
843  assert (m != NULL);
844
845  thread = thread_get (m, THREAD_EVENT, func, arg, debugargpass);
846  thread->u.val = val;
847  thread_list_add (&m->event, thread);
848
849  return thread;
850}
851
852/* Cancel thread from scheduler. */
853void
854thread_cancel (struct thread *thread)
855{
856  struct thread_list *list = NULL;
857  struct pqueue *queue = NULL;
858
859  switch (thread->type)
860    {
861    case THREAD_READ:
862      assert (FD_ISSET (thread->u.fd, &thread->master->readfd));
863      FD_CLR (thread->u.fd, &thread->master->readfd);
864      list = &thread->master->read;
865      break;
866    case THREAD_WRITE:
867      assert (FD_ISSET (thread->u.fd, &thread->master->writefd));
868      FD_CLR (thread->u.fd, &thread->master->writefd);
869      list = &thread->master->write;
870      break;
871    case THREAD_TIMER:
872      queue = thread->master->timer;
873      break;
874    case THREAD_EVENT:
875      list = &thread->master->event;
876      break;
877    case THREAD_READY:
878      list = &thread->master->ready;
879      break;
880    case THREAD_BACKGROUND:
881      queue = thread->master->background;
882      break;
883    default:
884      return;
885      break;
886    }
887
888  if (queue)
889    {
890      assert(thread->index >= 0);
891      assert(thread == queue->array[thread->index]);
892      pqueue_remove_at(thread->index, queue);
893    }
894  else if (list)
895    {
896      thread_list_delete (list, thread);
897    }
898  else
899    {
900      assert(!"Thread should be either in queue or list!");
901    }
902
903  thread->type = THREAD_UNUSED;
904  thread_add_unuse (thread->master, thread);
905}
906
907/* Delete all events which has argument value arg. */
908unsigned int
909thread_cancel_event (struct thread_master *m, void *arg)
910{
911  unsigned int ret = 0;
912  struct thread *thread;
913
914  thread = m->event.head;
915  while (thread)
916    {
917      struct thread *t;
918
919      t = thread;
920      thread = t->next;
921
922      if (t->arg == arg)
923        {
924          ret++;
925          thread_list_delete (&m->event, t);
926          t->type = THREAD_UNUSED;
927          thread_add_unuse (m, t);
928        }
929    }
930
931  /* thread can be on the ready list too */
932  thread = m->ready.head;
933  while (thread)
934    {
935      struct thread *t;
936
937      t = thread;
938      thread = t->next;
939
940      if (t->arg == arg)
941        {
942          ret++;
943          thread_list_delete (&m->ready, t);
944          t->type = THREAD_UNUSED;
945          thread_add_unuse (m, t);
946        }
947    }
948  return ret;
949}
950
951static struct timeval *
952thread_timer_wait (struct pqueue *queue, struct timeval *timer_val)
953{
954  if (queue->size)
955    {
956      struct thread *next_timer = queue->array[0];
957      *timer_val = timeval_subtract (next_timer->u.sands, relative_time);
958      return timer_val;
959    }
960  return NULL;
961}
962
963static struct thread *
964thread_run (struct thread_master *m, struct thread *thread,
965	    struct thread *fetch)
966{
967  *fetch = *thread;
968  thread->type = THREAD_UNUSED;
969  thread_add_unuse (m, thread);
970  return fetch;
971}
972
973static int
974thread_process_fd (struct thread_list *list, fd_set *fdset, fd_set *mfdset)
975{
976  struct thread *thread;
977  struct thread *next;
978  int ready = 0;
979
980  assert (list);
981
982  for (thread = list->head; thread; thread = next)
983    {
984      next = thread->next;
985
986      if (FD_ISSET (THREAD_FD (thread), fdset))
987        {
988          assert (FD_ISSET (THREAD_FD (thread), mfdset));
989          FD_CLR(THREAD_FD (thread), mfdset);
990          thread_list_delete (list, thread);
991          thread_list_add (&thread->master->ready, thread);
992          thread->type = THREAD_READY;
993          ready++;
994        }
995    }
996  return ready;
997}
998
999/* Add all timers that have popped to the ready list. */
1000static unsigned int
1001thread_timer_process (struct pqueue *queue, struct timeval *timenow)
1002{
1003  struct thread *thread;
1004  unsigned int ready = 0;
1005
1006  while (queue->size)
1007    {
1008      thread = queue->array[0];
1009      if (timeval_cmp (*timenow, thread->u.sands) < 0)
1010        return ready;
1011      pqueue_dequeue(queue);
1012      thread->type = THREAD_READY;
1013      thread_list_add (&thread->master->ready, thread);
1014      ready++;
1015    }
1016  return ready;
1017}
1018
1019/* process a list en masse, e.g. for event thread lists */
1020static unsigned int
1021thread_process (struct thread_list *list)
1022{
1023  struct thread *thread;
1024  struct thread *next;
1025  unsigned int ready = 0;
1026
1027  for (thread = list->head; thread; thread = next)
1028    {
1029      next = thread->next;
1030      thread_list_delete (list, thread);
1031      thread->type = THREAD_READY;
1032      thread_list_add (&thread->master->ready, thread);
1033      ready++;
1034    }
1035  return ready;
1036}
1037
1038
1039/* Fetch next ready thread. */
1040struct thread *
1041thread_fetch (struct thread_master *m, struct thread *fetch)
1042{
1043  struct thread *thread;
1044  fd_set readfd;
1045  fd_set writefd;
1046  fd_set exceptfd;
1047  struct timeval timer_val = { .tv_sec = 0, .tv_usec = 0 };
1048  struct timeval timer_val_bg;
1049  struct timeval *timer_wait = &timer_val;
1050  struct timeval *timer_wait_bg;
1051
1052  while (1)
1053    {
1054      int num = 0;
1055#if defined HAVE_SNMP && defined SNMP_AGENTX
1056      struct timeval snmp_timer_wait;
1057      int snmpblock = 0;
1058      int fdsetsize;
1059#endif
1060
1061      /* Signals pre-empt everything */
1062      quagga_sigevent_process ();
1063
1064      /* Drain the ready queue of already scheduled jobs, before scheduling
1065       * more.
1066       */
1067      if ((thread = thread_trim_head (&m->ready)) != NULL)
1068        return thread_run (m, thread, fetch);
1069
1070      /* To be fair to all kinds of threads, and avoid starvation, we
1071       * need to be careful to consider all thread types for scheduling
1072       * in each quanta. I.e. we should not return early from here on.
1073       */
1074
1075      /* Normal event are the next highest priority.  */
1076      thread_process (&m->event);
1077
1078      /* Structure copy.  */
1079      readfd = m->readfd;
1080      writefd = m->writefd;
1081      exceptfd = m->exceptfd;
1082
1083      /* Calculate select wait timer if nothing else to do */
1084      if (m->ready.count == 0)
1085        {
1086          quagga_get_relative (NULL);
1087          timer_wait = thread_timer_wait (m->timer, &timer_val);
1088          timer_wait_bg = thread_timer_wait (m->background, &timer_val_bg);
1089
1090          if (timer_wait_bg &&
1091              (!timer_wait || (timeval_cmp (*timer_wait, *timer_wait_bg) > 0)))
1092            timer_wait = timer_wait_bg;
1093        }
1094
1095#if defined HAVE_SNMP && defined SNMP_AGENTX
1096      /* When SNMP is enabled, we may have to select() on additional
1097	 FD. snmp_select_info() will add them to `readfd'. The trick
1098	 with this function is its last argument. We need to set it to
1099	 0 if timer_wait is not NULL and we need to use the provided
1100	 new timer only if it is still set to 0. */
1101      if (agentx_enabled)
1102        {
1103          fdsetsize = FD_SETSIZE;
1104          snmpblock = 1;
1105          if (timer_wait)
1106            {
1107              snmpblock = 0;
1108              memcpy(&snmp_timer_wait, timer_wait, sizeof(struct timeval));
1109            }
1110          snmp_select_info(&fdsetsize, &readfd, &snmp_timer_wait, &snmpblock);
1111          if (snmpblock == 0)
1112            timer_wait = &snmp_timer_wait;
1113        }
1114#endif
1115      num = select (FD_SETSIZE, &readfd, &writefd, &exceptfd, timer_wait);
1116
1117      /* Signals should get quick treatment */
1118      if (num < 0)
1119        {
1120          if (errno == EINTR)
1121            continue; /* signal received - process it */
1122          zlog_warn ("select() error: %s", safe_strerror (errno));
1123            return NULL;
1124        }
1125
1126#if defined HAVE_SNMP && defined SNMP_AGENTX
1127      if (agentx_enabled)
1128        {
1129          if (num > 0)
1130            snmp_read(&readfd);
1131          else if (num == 0)
1132            {
1133              snmp_timeout();
1134              run_alarms();
1135            }
1136          netsnmp_check_outstanding_agent_requests();
1137        }
1138#endif
1139
1140      /* Check foreground timers.  Historically, they have had higher
1141         priority than I/O threads, so let's push them onto the ready
1142	 list in front of the I/O threads. */
1143      quagga_get_relative (NULL);
1144      thread_timer_process (m->timer, &relative_time);
1145
1146      /* Got IO, process it */
1147      if (num > 0)
1148        {
1149          /* Normal priority read thead. */
1150          thread_process_fd (&m->read, &readfd, &m->readfd);
1151          /* Write thead. */
1152          thread_process_fd (&m->write, &writefd, &m->writefd);
1153        }
1154
1155#if 0
1156      /* If any threads were made ready above (I/O or foreground timer),
1157         perhaps we should avoid adding background timers to the ready
1158	 list at this time.  If this is code is uncommented, then background
1159	 timer threads will not run unless there is nothing else to do. */
1160      if ((thread = thread_trim_head (&m->ready)) != NULL)
1161        return thread_run (m, thread, fetch);
1162#endif
1163
1164      /* Background timer/events, lowest priority */
1165      thread_timer_process (m->background, &relative_time);
1166
1167      if ((thread = thread_trim_head (&m->ready)) != NULL)
1168        return thread_run (m, thread, fetch);
1169    }
1170}
1171
1172unsigned long
1173thread_consumed_time (RUSAGE_T *now, RUSAGE_T *start, unsigned long *cputime)
1174{
1175#ifdef HAVE_RUSAGE
1176  /* This is 'user + sys' time.  */
1177  *cputime = timeval_elapsed (now->cpu.ru_utime, start->cpu.ru_utime) +
1178	     timeval_elapsed (now->cpu.ru_stime, start->cpu.ru_stime);
1179#else
1180  *cputime = 0;
1181#endif /* HAVE_RUSAGE */
1182  return timeval_elapsed (now->real, start->real);
1183}
1184
1185/* We should aim to yield after THREAD_YIELD_TIME_SLOT milliseconds.
1186   Note: we are using real (wall clock) time for this calculation.
1187   It could be argued that CPU time may make more sense in certain
1188   contexts.  The things to consider are whether the thread may have
1189   blocked (in which case wall time increases, but CPU time does not),
1190   or whether the system is heavily loaded with other processes competing
1191   for CPU time.  On balance, wall clock time seems to make sense.
1192   Plus it has the added benefit that gettimeofday should be faster
1193   than calling getrusage. */
1194int
1195thread_should_yield (struct thread *thread)
1196{
1197  quagga_get_relative (NULL);
1198  return (timeval_elapsed(relative_time, thread->real) >
1199  	  THREAD_YIELD_TIME_SLOT);
1200}
1201
1202void
1203thread_getrusage (RUSAGE_T *r)
1204{
1205  quagga_get_relative (NULL);
1206#ifdef HAVE_RUSAGE
1207  getrusage(RUSAGE_SELF, &(r->cpu));
1208#endif
1209  r->real = relative_time;
1210
1211#ifdef HAVE_CLOCK_MONOTONIC
1212  /* quagga_get_relative() only updates recent_time if gettimeofday
1213   * based, not when using CLOCK_MONOTONIC. As we export recent_time
1214   * and guarantee to update it before threads are run...
1215   */
1216  quagga_gettimeofday(&recent_time);
1217#endif /* HAVE_CLOCK_MONOTONIC */
1218}
1219
1220struct thread *thread_current = NULL;
1221
1222/* We check thread consumed time. If the system has getrusage, we'll
1223   use that to get in-depth stats on the performance of the thread in addition
1224   to wall clock time stats from gettimeofday. */
1225void
1226thread_call (struct thread *thread)
1227{
1228  unsigned long realtime, cputime;
1229  RUSAGE_T before, after;
1230
1231 /* Cache a pointer to the relevant cpu history thread, if the thread
1232  * does not have it yet.
1233  *
1234  * Callers submitting 'dummy threads' hence must take care that
1235  * thread->cpu is NULL
1236  */
1237  if (!thread->hist)
1238    {
1239      struct cpu_thread_history tmp;
1240
1241      tmp.func = thread->func;
1242      tmp.funcname = thread->funcname;
1243
1244      thread->hist = hash_get (cpu_record, &tmp,
1245                    (void * (*) (void *))cpu_record_hash_alloc);
1246    }
1247
1248  GETRUSAGE (&before);
1249  thread->real = before.real;
1250
1251  thread_current = thread;
1252  (*thread->func) (thread);
1253  thread_current = NULL;
1254
1255  GETRUSAGE (&after);
1256
1257  realtime = thread_consumed_time (&after, &before, &cputime);
1258  thread->hist->real.total += realtime;
1259  if (thread->hist->real.max < realtime)
1260    thread->hist->real.max = realtime;
1261#ifdef HAVE_RUSAGE
1262  thread->hist->cpu.total += cputime;
1263  if (thread->hist->cpu.max < cputime)
1264    thread->hist->cpu.max = cputime;
1265#endif
1266
1267  ++(thread->hist->total_calls);
1268  thread->hist->types |= (1 << thread->add_type);
1269
1270#ifdef CONSUMED_TIME_CHECK
1271  if (realtime > CONSUMED_TIME_CHECK)
1272    {
1273      /*
1274       * We have a CPU Hog on our hands.
1275       * Whinge about it now, so we're aware this is yet another task
1276       * to fix.
1277       */
1278      zlog_warn ("SLOW THREAD: task %s (%lx) ran for %lums (cpu time %lums)",
1279		 thread->funcname,
1280		 (unsigned long) thread->func,
1281		 realtime/1000, cputime/1000);
1282    }
1283#endif /* CONSUMED_TIME_CHECK */
1284}
1285
1286/* Execute thread */
1287struct thread *
1288funcname_thread_execute (struct thread_master *m,
1289                int (*func)(struct thread *),
1290                void *arg,
1291                int val,
1292		debugargdef)
1293{
1294  struct thread dummy;
1295
1296  memset (&dummy, 0, sizeof (struct thread));
1297
1298  dummy.type = THREAD_EVENT;
1299  dummy.add_type = THREAD_EXECUTE;
1300  dummy.master = NULL;
1301  dummy.func = func;
1302  dummy.arg = arg;
1303  dummy.u.val = val;
1304
1305  dummy.funcname = funcname;
1306  dummy.schedfrom = schedfrom;
1307  dummy.schedfrom_line = fromln;
1308
1309  thread_call (&dummy);
1310
1311  return NULL;
1312}
1313