1/*
2    Monitor status of quagga daemons and restart if necessary.
3
4    Copyright (C) 2004  Andrew J. Schorr
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 */
20
21#include <zebra.h>
22#include <thread.h>
23#include <log.h>
24#include <network.h>
25#include <sigevent.h>
26#include <lib/version.h>
27#include <getopt.h>
28#include <sys/un.h>
29#include <sys/wait.h>
30#include <memory.h>
31
32#ifndef MIN
33#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
34#endif
35
36/* Macros to help randomize timers. */
37#define JITTER(X) ((random() % ((X)+1))-((X)/2))
38#define FUZZY(X) ((X)+JITTER((X)/20))
39
40#define DEFAULT_PERIOD		5
41#define DEFAULT_TIMEOUT		10
42#define DEFAULT_RESTART_TIMEOUT	20
43#define DEFAULT_LOGLEVEL	LOG_INFO
44#define DEFAULT_MIN_RESTART	60
45#define DEFAULT_MAX_RESTART	600
46#ifdef PATH_WATCHQUAGGA_PID
47#define DEFAULT_PIDFILE		PATH_WATCHQUAGGA_PID
48#else
49#define DEFAULT_PIDFILE		STATEDIR "/watchquagga.pid"
50#endif
51#ifdef DAEMON_VTY_DIR
52#define VTYDIR			DAEMON_VTY_DIR
53#else
54#define VTYDIR			STATEDIR
55#endif
56
57#define PING_TOKEN	"PING"
58
59/* Needs to be global, referenced somewhere inside libzebra. */
60struct thread_master *master;
61
62typedef enum
63{
64  MODE_MONITOR = 0,
65  MODE_GLOBAL_RESTART,
66  MODE_SEPARATE_RESTART,
67  MODE_PHASED_ZEBRA_RESTART,
68  MODE_PHASED_ALL_RESTART
69} watch_mode_t;
70
71static const char *mode_str[] =
72{
73  "monitor",
74  "global restart",
75  "individual daemon restart",
76  "phased zebra restart",
77  "phased global restart for any failure",
78};
79
80typedef enum
81{
82  PHASE_NONE = 0,
83  PHASE_STOPS_PENDING,
84  PHASE_WAITING_DOWN,
85  PHASE_ZEBRA_RESTART_PENDING,
86  PHASE_WAITING_ZEBRA_UP
87} restart_phase_t;
88
89static const char *phase_str[] =
90{
91  "None",
92  "Stop jobs running",
93  "Waiting for other daemons to come down",
94  "Zebra restart job running",
95  "Waiting for zebra to come up",
96  "Start jobs running",
97};
98
99#define PHASE_TIMEOUT (3*gs.restart_timeout)
100
101struct restart_info
102{
103  const char *name;
104  const char *what;
105  pid_t pid;
106  struct timeval time;
107  long interval;
108  struct thread *t_kill;
109  int kills;
110};
111
112static struct global_state
113{
114  watch_mode_t mode;
115  restart_phase_t phase;
116  struct thread *t_phase_hanging;
117  const char *vtydir;
118  long period;
119  long timeout;
120  long restart_timeout;
121  long min_restart_interval;
122  long max_restart_interval;
123  int do_ping;
124  struct daemon *daemons;
125  const char *restart_command;
126  const char *start_command;
127  const char *stop_command;
128  struct restart_info restart;
129  int unresponsive_restart;
130  int loglevel;
131  struct daemon *special;	/* points to zebra when doing phased restart */
132  int numdaemons;
133  int numpids;
134  int numdown;		/* # of daemons that are not UP or UNRESPONSIVE */
135} gs = {
136  .mode = MODE_MONITOR,
137  .phase = PHASE_NONE,
138  .vtydir = VTYDIR,
139  .period = 1000*DEFAULT_PERIOD,
140  .timeout = DEFAULT_TIMEOUT,
141  .restart_timeout = DEFAULT_RESTART_TIMEOUT,
142  .loglevel = DEFAULT_LOGLEVEL,
143  .min_restart_interval = DEFAULT_MIN_RESTART,
144  .max_restart_interval = DEFAULT_MAX_RESTART,
145  .do_ping = 1,
146};
147
148typedef enum
149{
150  DAEMON_INIT,
151  DAEMON_DOWN,
152  DAEMON_CONNECTING,
153  DAEMON_UP,
154  DAEMON_UNRESPONSIVE
155} daemon_state_t;
156
157#define IS_UP(DMN) \
158  (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
159
160static const char *state_str[] =
161{
162  "Init",
163  "Down",
164  "Connecting",
165  "Up",
166  "Unresponsive",
167};
168
169struct daemon {
170  const char *name;
171  daemon_state_t state;
172  int fd;
173  struct timeval echo_sent;
174  u_int connect_tries;
175  struct thread *t_wakeup;
176  struct thread *t_read;
177  struct thread *t_write;
178  struct daemon *next;
179  struct restart_info restart;
180};
181
182static const struct option longopts[] =
183{
184  { "daemon", no_argument, NULL, 'd'},
185  { "statedir", required_argument, NULL, 'S'},
186  { "no-echo", no_argument, NULL, 'e'},
187  { "loglevel", required_argument, NULL, 'l'},
188  { "interval", required_argument, NULL, 'i'},
189  { "timeout", required_argument, NULL, 't'},
190  { "restart-timeout", required_argument, NULL, 'T'},
191  { "restart", required_argument, NULL, 'r'},
192  { "start-command", required_argument, NULL, 's'},
193  { "kill-command", required_argument, NULL, 'k'},
194  { "restart-all", required_argument, NULL, 'R'},
195  { "all-restart", no_argument, NULL, 'a'},
196  { "always-all-restart", no_argument, NULL, 'A'},
197  { "unresponsive-restart", no_argument, NULL, 'z'},
198  { "min-restart-interval", required_argument, NULL, 'm'},
199  { "max-restart-interval", required_argument, NULL, 'M'},
200  { "pid-file", required_argument, NULL, 'p'},
201  { "blank-string", required_argument, NULL, 'b'},
202  { "help", no_argument, NULL, 'h'},
203  { "version", no_argument, NULL, 'v'},
204  { NULL, 0, NULL, 0 }
205};
206
207static int try_connect(struct daemon *dmn);
208static int wakeup_send_echo(struct thread *t_wakeup);
209static void try_restart(struct daemon *dmn);
210static void phase_check(void);
211
212static int
213usage(const char *progname, int status)
214{
215  if (status != 0)
216    fprintf(stderr, "Try `%s --help' for more information.\n", progname);
217  else
218    printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
219Watchdog program to monitor status of quagga daemons and try to restart\n\
220them if they are down or unresponsive.  It determines whether a daemon is\n\
221up based on whether it can connect to the daemon's vty unix stream socket.\n\
222It then repeatedly sends echo commands over that socket to determine whether\n\
223the daemon is responsive.  If the daemon crashes, we will receive an EOF\n\
224on the socket connection and know immediately that the daemon is down.\n\n\
225The daemons to be monitored should be listed on the command line.\n\n\
226This program can run in one of 5 modes:\n\n\
2270. Mode: %s.\n\
228  Just monitor and report on status changes.  Example:\n\
229    %s -d zebra ospfd bgpd\n\n\
2301. Mode: %s.\n\
231  Whenever any daemon hangs or crashes, use the given command to restart\n\
232  them all.  Example:\n\
233    %s -dz \\\n\
234      -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
235      zebra ospfd\n\n\
2362. Mode: %s.\n\
237  When any single daemon hangs or crashes, restart only the daemon that's\n\
238  in trouble using the supplied restart command.  Example:\n\
239    %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
2403. Mode: %s.\n\
241  The same as the previous mode, except that there is special treatment when\n\
242  the zebra daemon is in trouble.  In that case, a phased restart approach\n\
243  is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
244  daemons.  Example:\n\
245    %s -adz -r '/sbin/service %%s restart' \\\n\
246      -s '/sbin/service %%s start' \\\n\
247      -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
2484. Mode: %s.\n\
249  This is the same as the previous mode, except that the phased restart\n\
250  procedure is used whenever any of the daemons hangs or crashes.  Example:\n\
251    %s -Adz -r '/sbin/service %%s restart' \\\n\
252      -s '/sbin/service %%s start' \\\n\
253      -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
254As of this writing, it is believed that mode 2 [%s]\n\
255is not safe, and mode 3 [%s] may not be safe with some of the\n\
256routing daemons.\n\n\
257In order to avoid attempting to restart the daemons in a fast loop,\n\
258the -m and -M options allow you to control the minimum delay between\n\
259restart commands.  The minimum restart delay is recalculated each time\n\
260a restart is attempted: if the time since the last restart attempt exceeds\n\
261twice the -M value, then the restart delay is set to the -m value.\n\
262Otherwise, the interval is doubled (but capped at the -M value).\n\n\
263Options:\n\
264-d, --daemon	Run in daemon mode.  In this mode, error messages are sent\n\
265		to syslog instead of stdout.\n\
266-S, --statedir	Set the vty socket directory (default is %s)\n\
267-e, --no-echo	Do not ping the daemons to test responsiveness (this\n\
268		option is necessary if the daemons do not support the\n\
269		echo command)\n\
270-l, --loglevel	Set the logging level (default is %d).\n\
271		The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
272		but it can be set higher than %d if extra-verbose debugging\n\
273		messages are desired.\n\
274-m, --min-restart-interval\n\
275		Set the minimum seconds to wait between invocations of daemon\n\
276		restart commands (default is %d).\n\
277-M, --max-restart-interval\n\
278		Set the maximum seconds to wait between invocations of daemon\n\
279		restart commands (default is %d).\n\
280-i, --interval	Set the status polling interval in seconds (default is %d)\n\
281-t, --timeout	Set the unresponsiveness timeout in seconds (default is %d)\n\
282-T, --restart-timeout\n\
283		Set the restart (kill) timeout in seconds (default is %d).\n\
284		If any background jobs are still running after this much\n\
285		time has elapsed, they will be killed.\n\
286-r, --restart	Supply a Bourne shell command to use to restart a single\n\
287		daemon.  The command string should include '%%s' where the\n\
288		name of the daemon should be substituted.\n\
289		Note that -r and -R are incompatible.\n\
290-s, --start-command\n\
291		Supply a Bourne shell to command to use to start a single\n\
292		daemon.  The command string should include '%%s' where the\n\
293		name of the daemon should be substituted.\n\
294-k, --kill-command\n\
295		Supply a Bourne shell to command to use to stop a single\n\
296		daemon.  The command string should include '%%s' where the\n\
297		name of the daemon should be substituted.\n\
298-R, --restart-all\n\
299		When one or more daemons is down, try to restart everything\n\
300		using the Bourne shell command supplied as the argument.\n\
301		Note that -r and -R are incompatible.\n\
302-z, --unresponsive-restart\n\
303		When a daemon is unresponsive, treat it as being down for\n\
304		restart purposes.\n\
305-a, --all-restart\n\
306		When zebra hangs or crashes, restart all daemons using\n\
307		this phased approach: 1. stop all other daemons; 2. restart\n\
308		zebra; 3. start other daemons.  Requires -r, -s, and -k.\n\
309-A, --always-all-restart\n\
310		When any daemon (not just zebra) hangs or crashes, use the\n\
311		same phased restart mechanism described above for -a.\n\
312		Requires -r, -s, and -k.\n\
313-p, --pid-file	Set process identifier file name\n\
314		(default is %s).\n\
315-b, --blank-string\n\
316		When the supplied argument string is found in any of the\n\
317		various shell command arguments (-r, -s, -k, or -R), replace\n\
318		it with a space.  This is an ugly hack to circumvent problems\n\
319		passing command-line arguments with embedded spaces.\n\
320-v, --version	Print program version\n\
321-h, --help	Display this help and exit\n\
322", progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
323progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],mode_str[3],
324VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
325DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
326DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,DEFAULT_PIDFILE);
327
328  return status;
329}
330
331static pid_t
332run_background(const char *shell_cmd)
333{
334  pid_t child;
335
336  switch (child = fork())
337    {
338    case -1:
339      zlog_err("fork failed, cannot run command [%s]: %s",
340	       shell_cmd,safe_strerror(errno));
341      return -1;
342    case 0:
343      /* Child process. */
344      /* Use separate process group so child processes can be killed easily. */
345      if (setpgid(0,0) < 0)
346        zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
347      {
348        const char *argv[4] = { "sh", "-c", shell_cmd, NULL};
349	execv("/bin/sh",(char *const *)argv);
350	zlog_err("execv(/bin/sh -c '%s') failed: %s",
351		 shell_cmd,safe_strerror(errno));
352	_exit(127);
353      }
354    default:
355      /* Parent process: we will reap the child later. */
356      zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
357      return child;
358    }
359}
360
361static struct timeval *
362time_elapsed(struct timeval *result, const struct timeval *start_time)
363{
364  gettimeofday(result,NULL);
365  result->tv_sec -= start_time->tv_sec;
366  result->tv_usec -= start_time->tv_usec;
367  while (result->tv_usec < 0)
368    {
369      result->tv_usec += 1000000L;
370      result->tv_sec--;
371    }
372  return result;
373}
374
375static int
376restart_kill(struct thread *t_kill)
377{
378  struct restart_info *restart = THREAD_ARG(t_kill);
379  struct timeval delay;
380
381  time_elapsed(&delay,&restart->time);
382  zlog_warn("Warning: %s %s child process %d still running after "
383	    "%ld seconds, sending signal %d",
384	    restart->what,restart->name,(int)restart->pid,delay.tv_sec,
385	    (restart->kills ? SIGKILL : SIGTERM));
386  kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
387  restart->kills++;
388  restart->t_kill = thread_add_timer(master,restart_kill,restart,
389				     gs.restart_timeout);
390  return 0;
391}
392
393static struct restart_info *
394find_child(pid_t child)
395{
396  if (gs.mode == MODE_GLOBAL_RESTART)
397    {
398      if (gs.restart.pid == child)
399        return &gs.restart;
400    }
401  else
402    {
403      struct daemon *dmn;
404      for (dmn = gs.daemons; dmn; dmn = dmn->next)
405        {
406	  if (dmn->restart.pid == child)
407	    return &dmn->restart;
408        }
409    }
410  return NULL;
411}
412
413static void
414sigchild(void)
415{
416  pid_t child;
417  int status;
418  const char *name;
419  const char *what;
420  struct restart_info *restart;
421
422  switch (child = waitpid(-1,&status,WNOHANG))
423    {
424    case -1:
425      zlog_err("waitpid failed: %s",safe_strerror(errno));
426      return;
427    case 0:
428      zlog_warn("SIGCHLD received, but waitpid did not reap a child");
429      return;
430    }
431
432  if ((restart = find_child(child)) != NULL)
433    {
434      name = restart->name;
435      what = restart->what;
436      restart->pid = 0;
437      gs.numpids--;
438      thread_cancel(restart->t_kill);
439      restart->t_kill = NULL;
440      /* Update restart time to reflect the time the command completed. */
441      gettimeofday(&restart->time,NULL);
442    }
443  else
444    {
445      zlog_err("waitpid returned status for an unknown child process %d",
446	       (int)child);
447      name = "(unknown)";
448      what = "background";
449    }
450  if (WIFSTOPPED(status))
451      zlog_warn("warning: %s %s process %d is stopped",
452		what,name,(int)child);
453  else if (WIFSIGNALED(status))
454    zlog_warn("%s %s process %d terminated due to signal %d",
455	      what,name,(int)child,WTERMSIG(status));
456  else if (WIFEXITED(status))
457    {
458      if (WEXITSTATUS(status) != 0)
459	zlog_warn("%s %s process %d exited with non-zero status %d",
460		  what,name,(int)child,WEXITSTATUS(status));
461      else
462	zlog_debug("%s %s process %d exited normally",what,name,(int)child);
463    }
464  else
465    zlog_err("cannot interpret %s %s process %d wait status 0x%x",
466	     what,name,(int)child,status);
467  phase_check();
468}
469
470static int
471run_job(struct restart_info *restart, const char *cmdtype, const char *command,
472	int force, int update_interval)
473{
474  struct timeval delay;
475
476  if (gs.loglevel > LOG_DEBUG+1)
477    zlog_debug("attempting to %s %s",cmdtype,restart->name);
478
479  if (restart->pid)
480    {
481      if (gs.loglevel > LOG_DEBUG+1)
482        zlog_debug("cannot %s %s, previous pid %d still running",
483		   cmdtype,restart->name,(int)restart->pid);
484      return -1;
485    }
486
487  /* Note: time_elapsed test must come before the force test, since we need
488     to make sure that delay is initialized for use below in updating the
489     restart interval. */
490  if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
491      !force)
492    {
493      if (gs.loglevel > LOG_DEBUG+1)
494        zlog_debug("postponing %s %s: "
495		   "elapsed time %ld < retry interval %ld",
496		   cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
497      return -1;
498    }
499
500  gettimeofday(&restart->time,NULL);
501  restart->kills = 0;
502  {
503    char cmd[strlen(command)+strlen(restart->name)+1];
504    snprintf(cmd,sizeof(cmd),command,restart->name);
505    if ((restart->pid = run_background(cmd)) > 0)
506      {
507	restart->t_kill = thread_add_timer(master,restart_kill,restart,
508					   gs.restart_timeout);
509	restart->what = cmdtype;
510	gs.numpids++;
511      }
512    else
513      restart->pid = 0;
514  }
515
516  /* Calculate the new restart interval. */
517  if (update_interval)
518    {
519      if (delay.tv_sec > 2*gs.max_restart_interval)
520	restart->interval = gs.min_restart_interval;
521      else if ((restart->interval *= 2) > gs.max_restart_interval)
522	restart->interval = gs.max_restart_interval;
523      if (gs.loglevel > LOG_DEBUG+1)
524	zlog_debug("restart %s interval is now %ld",
525		   restart->name,restart->interval);
526    }
527  return restart->pid;
528}
529
530#define SET_READ_HANDLER(DMN) \
531  (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
532
533#define SET_WAKEUP_DOWN(DMN)	\
534  (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN),	\
535    					  FUZZY(gs.period))
536
537#define SET_WAKEUP_UNRESPONSIVE(DMN)	\
538  (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
539    					  FUZZY(gs.period))
540
541#define SET_WAKEUP_ECHO(DMN) \
542  (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
543					  FUZZY(gs.period))
544
545static int
546wakeup_down(struct thread *t_wakeup)
547{
548  struct daemon *dmn = THREAD_ARG(t_wakeup);
549
550  dmn->t_wakeup = NULL;
551  if (try_connect(dmn) < 0)
552    SET_WAKEUP_DOWN(dmn);
553  if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
554    try_restart(dmn);
555  return 0;
556}
557
558static int
559wakeup_init(struct thread *t_wakeup)
560{
561  struct daemon *dmn = THREAD_ARG(t_wakeup);
562
563  dmn->t_wakeup = NULL;
564  if (try_connect(dmn) < 0)
565    {
566      SET_WAKEUP_DOWN(dmn);
567      zlog_err("%s state -> down : initial connection attempt failed",
568	       dmn->name);
569      dmn->state = DAEMON_DOWN;
570    }
571  return 0;
572}
573
574static void
575daemon_down(struct daemon *dmn, const char *why)
576{
577  if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
578    zlog_err("%s state -> down : %s",dmn->name,why);
579  else if (gs.loglevel > LOG_DEBUG)
580    zlog_debug("%s still down : %s",dmn->name,why);
581  if (IS_UP(dmn))
582    gs.numdown++;
583  dmn->state = DAEMON_DOWN;
584  if (dmn->fd >= 0)
585    {
586      close(dmn->fd);
587      dmn->fd = -1;
588    }
589  THREAD_OFF(dmn->t_read);
590  THREAD_OFF(dmn->t_write);
591  THREAD_OFF(dmn->t_wakeup);
592  if (try_connect(dmn) < 0)
593    SET_WAKEUP_DOWN(dmn);
594  phase_check();
595}
596
597static int
598handle_read(struct thread *t_read)
599{
600  struct daemon *dmn = THREAD_ARG(t_read);
601  static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
602  char buf[sizeof(resp)+100];
603  ssize_t rc;
604  struct timeval delay;
605
606  dmn->t_read = NULL;
607  if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
608    {
609      char why[100];
610
611      if (ERRNO_IO_RETRY(errno))
612	{
613	  /* Pretend it never happened. */
614	  SET_READ_HANDLER(dmn);
615	  return 0;
616	}
617      snprintf(why,sizeof(why),"unexpected read error: %s",
618	       safe_strerror(errno));
619      daemon_down(dmn,why);
620      return 0;
621    }
622  if (rc == 0)
623    {
624      daemon_down(dmn,"read returned EOF");
625      return 0;
626    }
627  if (!dmn->echo_sent.tv_sec)
628    {
629      char why[sizeof(buf)+100];
630      snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
631	       (int)rc,(int)rc,buf);
632      daemon_down(dmn,why);
633      return 0;
634    }
635
636  /* We are expecting an echo response: is there any chance that the
637     response would not be returned entirely in the first read?  That
638     seems inconceivable... */
639  if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
640    {
641      char why[100+sizeof(buf)];
642      snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
643			       "(expecting %u): %.*s",
644	       (int)rc,(u_int)sizeof(resp),(int)rc,buf);
645      daemon_down(dmn,why);
646      return 0;
647    }
648
649  time_elapsed(&delay,&dmn->echo_sent);
650  dmn->echo_sent.tv_sec = 0;
651  if (dmn->state == DAEMON_UNRESPONSIVE)
652    {
653      if (delay.tv_sec < gs.timeout)
654	{
655	  dmn->state = DAEMON_UP;
656	  zlog_warn("%s state -> up : echo response received after %ld.%06ld "
657		    "seconds", dmn->name,delay.tv_sec,delay.tv_usec);
658	}
659      else
660	zlog_warn("%s: slow echo response finally received after %ld.%06ld "
661		  "seconds", dmn->name,delay.tv_sec,delay.tv_usec);
662    }
663  else if (gs.loglevel > LOG_DEBUG+1)
664    zlog_debug("%s: echo response received after %ld.%06ld seconds",
665	       dmn->name,delay.tv_sec,delay.tv_usec);
666
667  SET_READ_HANDLER(dmn);
668  if (dmn->t_wakeup)
669    thread_cancel(dmn->t_wakeup);
670  SET_WAKEUP_ECHO(dmn);
671
672  return 0;
673}
674
675static void
676daemon_up(struct daemon *dmn, const char *why)
677{
678  dmn->state = DAEMON_UP;
679  gs.numdown--;
680  dmn->connect_tries = 0;
681  zlog_notice("%s state -> up : %s",dmn->name,why);
682  if (gs.do_ping)
683    SET_WAKEUP_ECHO(dmn);
684  phase_check();
685}
686
687static int
688check_connect(struct thread *t_write)
689{
690  struct daemon *dmn = THREAD_ARG(t_write);
691  int sockerr;
692  socklen_t reslen = sizeof(sockerr);
693
694  dmn->t_write = NULL;
695  if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
696    {
697      zlog_warn("%s: check_connect: getsockopt failed: %s",
698	        dmn->name,safe_strerror(errno));
699      daemon_down(dmn,"getsockopt failed checking connection success");
700      return 0;
701    }
702  if ((reslen == sizeof(sockerr)) && sockerr)
703    {
704      char why[100];
705      snprintf(why,sizeof(why),
706	       "getsockopt reports that connection attempt failed: %s",
707	       safe_strerror(sockerr));
708      daemon_down(dmn,why);
709      return 0;
710    }
711
712  daemon_up(dmn,"delayed connect succeeded");
713  return 0;
714}
715
716static int
717wakeup_connect_hanging(struct thread *t_wakeup)
718{
719  struct daemon *dmn = THREAD_ARG(t_wakeup);
720  char why[100];
721
722  dmn->t_wakeup = NULL;
723  snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
724	   gs.timeout);
725  daemon_down(dmn,why);
726  return 0;
727}
728
729/* Making connection to protocol daemon. */
730static int
731try_connect(struct daemon *dmn)
732{
733  int sock;
734  struct sockaddr_un addr;
735  socklen_t len;
736
737  if (gs.loglevel > LOG_DEBUG+1)
738    zlog_debug("%s: attempting to connect",dmn->name);
739  dmn->connect_tries++;
740
741  memset (&addr, 0, sizeof (struct sockaddr_un));
742  addr.sun_family = AF_UNIX;
743  snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
744	   gs.vtydir,dmn->name);
745#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
746  len = addr.sun_len = SUN_LEN(&addr);
747#else
748  len = sizeof (addr.sun_family) + strlen (addr.sun_path);
749#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
750
751  /* Quick check to see if we might succeed before we go to the trouble
752     of creating a socket. */
753  if (access(addr.sun_path, W_OK) < 0)
754    {
755      if (errno != ENOENT)
756        zlog_err("%s: access to socket %s denied: %s",
757		dmn->name,addr.sun_path,safe_strerror(errno));
758      return -1;
759    }
760
761  if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
762    {
763      zlog_err("%s(%s): cannot make socket: %s",
764	       __func__,addr.sun_path, safe_strerror(errno));
765      return -1;
766    }
767
768  if (set_nonblocking(sock) < 0)
769    {
770      zlog_err("%s(%s): set_nonblocking(%d) failed",
771	       __func__, addr.sun_path, sock);
772      close(sock);
773      return -1;
774    }
775
776  if (connect (sock, (struct sockaddr *) &addr, len) < 0)
777    {
778      if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
779	{
780	  if (gs.loglevel > LOG_DEBUG)
781	    zlog_debug("%s(%s): connect failed: %s",
782		       __func__,addr.sun_path, safe_strerror(errno));
783	  close (sock);
784	  return -1;
785	}
786      if (gs.loglevel > LOG_DEBUG)
787	zlog_debug("%s: connection in progress",dmn->name);
788      dmn->state = DAEMON_CONNECTING;
789      dmn->fd = sock;
790      dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
791      dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
792				       gs.timeout);
793      SET_READ_HANDLER(dmn);
794      return 0;
795    }
796
797  dmn->fd = sock;
798  SET_READ_HANDLER(dmn);
799  daemon_up(dmn,"connect succeeded");
800  return 1;
801}
802
803static int
804phase_hanging(struct thread *t_hanging)
805{
806  gs.t_phase_hanging = NULL;
807  zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
808           phase_str[gs.phase],PHASE_TIMEOUT);
809  gs.phase = PHASE_NONE;
810  return 0;
811}
812
813static void
814set_phase(restart_phase_t new_phase)
815{
816  gs.phase = new_phase;
817  if (gs.t_phase_hanging)
818    thread_cancel(gs.t_phase_hanging);
819  gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
820  					PHASE_TIMEOUT);
821}
822
823static void
824phase_check(void)
825{
826  switch (gs.phase)
827    {
828    case PHASE_NONE:
829      break;
830    case PHASE_STOPS_PENDING:
831      if (gs.numpids)
832	break;
833      zlog_info("Phased restart: all routing daemon stop jobs have completed.");
834      set_phase(PHASE_WAITING_DOWN);
835      /*FALLTHRU*/
836    case PHASE_WAITING_DOWN:
837      if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
838        break;
839      zlog_info("Phased restart: all routing daemons now down.");
840      run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
841      set_phase(PHASE_ZEBRA_RESTART_PENDING);
842      /*FALLTHRU*/
843    case PHASE_ZEBRA_RESTART_PENDING:
844      if (gs.special->restart.pid)
845	break;
846      zlog_info("Phased restart: %s restart job completed.",gs.special->name);
847      set_phase(PHASE_WAITING_ZEBRA_UP);
848      /*FALLTHRU*/
849    case PHASE_WAITING_ZEBRA_UP:
850      if (!IS_UP(gs.special))
851        break;
852      zlog_info("Phased restart: %s is now up.",gs.special->name);
853      {
854        struct daemon *dmn;
855	for (dmn = gs.daemons; dmn; dmn = dmn->next)
856	  {
857	    if (dmn != gs.special)
858	      run_job(&dmn->restart,"start",gs.start_command,1,0);
859	  }
860      }
861      gs.phase = PHASE_NONE;
862      THREAD_OFF(gs.t_phase_hanging);
863      zlog_notice("Phased global restart has completed.");
864      break;
865    }
866}
867
868static void
869try_restart(struct daemon *dmn)
870{
871  switch (gs.mode)
872  {
873  case MODE_MONITOR:
874    return;
875  case MODE_GLOBAL_RESTART:
876    run_job(&gs.restart,"restart",gs.restart_command,0,1);
877    break;
878  case MODE_SEPARATE_RESTART:
879    run_job(&dmn->restart,"restart",gs.restart_command,0,1);
880    break;
881  case MODE_PHASED_ZEBRA_RESTART:
882    if (dmn != gs.special)
883      {
884        if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
885	  run_job(&dmn->restart,"restart",gs.restart_command,0,1);
886	else
887	  zlog_debug("%s: postponing restart attempt because master %s daemon "
888		     "not up [%s], or phased restart in progress",
889		     dmn->name,gs.special->name,state_str[gs.special->state]);
890	break;
891      }
892    /*FALLTHRU*/
893  case MODE_PHASED_ALL_RESTART:
894    if ((gs.phase != PHASE_NONE) || gs.numpids)
895      {
896	if (gs.loglevel > LOG_DEBUG+1)
897	  zlog_debug("postponing phased global restart: restart already in "
898		     "progress [%s], or outstanding child processes [%d]",
899		     phase_str[gs.phase],gs.numpids);
900        break;
901      }
902    /* Is it too soon for a restart? */
903    {
904      struct timeval delay;
905      if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
906      	  gs.special->restart.interval)
907	{
908	  if (gs.loglevel > LOG_DEBUG+1)
909	    zlog_debug("postponing phased global restart: "
910		       "elapsed time %ld < retry interval %ld",
911		       (long)delay.tv_sec,gs.special->restart.interval);
912	  break;
913	}
914    }
915    zlog_info("Phased restart: stopping all routing daemons.");
916    /* First step: stop all other daemons. */
917    for (dmn = gs.daemons; dmn; dmn = dmn->next)
918      {
919        if (dmn != gs.special)
920	  run_job(&dmn->restart,"stop",gs.stop_command,1,1);
921      }
922    set_phase(PHASE_STOPS_PENDING);
923    break;
924  default:
925    zlog_err("error: unknown restart mode %d",gs.mode);
926    break;
927  }
928}
929
930static int
931wakeup_unresponsive(struct thread *t_wakeup)
932{
933  struct daemon *dmn = THREAD_ARG(t_wakeup);
934
935  dmn->t_wakeup = NULL;
936  if (dmn->state != DAEMON_UNRESPONSIVE)
937    zlog_err("%s: no longer unresponsive (now %s), "
938	     "wakeup should have been cancelled!",
939	     dmn->name,state_str[dmn->state]);
940  else
941    {
942      SET_WAKEUP_UNRESPONSIVE(dmn);
943      try_restart(dmn);
944    }
945  return 0;
946}
947
948static int
949wakeup_no_answer(struct thread *t_wakeup)
950{
951  struct daemon *dmn = THREAD_ARG(t_wakeup);
952
953  dmn->t_wakeup = NULL;
954  dmn->state = DAEMON_UNRESPONSIVE;
955  zlog_err("%s state -> unresponsive : no response yet to ping "
956	   "sent %ld seconds ago",dmn->name,gs.timeout);
957  if (gs.unresponsive_restart)
958    {
959      SET_WAKEUP_UNRESPONSIVE(dmn);
960      try_restart(dmn);
961    }
962  return 0;
963}
964
965static int
966wakeup_send_echo(struct thread *t_wakeup)
967{
968  static const char echocmd[] = "echo " PING_TOKEN;
969  ssize_t rc;
970  struct daemon *dmn = THREAD_ARG(t_wakeup);
971
972  dmn->t_wakeup = NULL;
973  if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
974      ((size_t)rc != sizeof(echocmd)))
975    {
976      char why[100+sizeof(echocmd)];
977      snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
978               echocmd,(int)rc,(u_int)sizeof(echocmd));
979      daemon_down(dmn,why);
980    }
981  else
982    {
983      gettimeofday(&dmn->echo_sent,NULL);
984      dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
985    }
986  return 0;
987}
988
989static void
990sigint(void)
991{
992  zlog_notice("Terminating on signal");
993  exit(0);
994}
995
996static int
997valid_command(const char *cmd)
998{
999  char *p;
1000
1001  return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
1002}
1003
1004/* This is an ugly hack to circumvent problems with passing command-line
1005   arguments that contain spaces.  The fix is to use a configuration file. */
1006static char *
1007translate_blanks(const char *cmd, const char *blankstr)
1008{
1009  char *res;
1010  char *p;
1011  size_t bslen = strlen(blankstr);
1012
1013  if (!(res = strdup(cmd)))
1014    {
1015      perror("strdup");
1016      exit(1);
1017    }
1018  while ((p = strstr(res,blankstr)) != NULL)
1019    {
1020      *p = ' ';
1021      if (bslen != 1)
1022	 memmove(p+1,p+bslen,strlen(p+bslen)+1);
1023    }
1024  return res;
1025}
1026
1027int
1028main(int argc, char **argv)
1029{
1030  const char *progname;
1031  int opt;
1032  int daemon_mode = 0;
1033  const char *pidfile = DEFAULT_PIDFILE;
1034  const char *special = "zebra";
1035  const char *blankstr = NULL;
1036  static struct quagga_signal_t my_signals[] =
1037  {
1038    {
1039      .signal = SIGINT,
1040      .handler = sigint,
1041    },
1042    {
1043      .signal = SIGTERM,
1044      .handler = sigint,
1045    },
1046    {
1047      .signal = SIGCHLD,
1048      .handler = sigchild,
1049    },
1050  };
1051
1052  if ((progname = strrchr (argv[0], '/')) != NULL)
1053    progname++;
1054  else
1055    progname = argv[0];
1056
1057  gs.restart.name = "all";
1058  while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
1059			    longopts, 0)) != EOF)
1060    {
1061      switch (opt)
1062        {
1063	case 0:
1064	  break;
1065        case 'a':
1066	  if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1067	    {
1068	      fputs("Ambiguous operating mode selected.\n",stderr);
1069	      return usage(progname,1);
1070	    }
1071	  gs.mode = MODE_PHASED_ZEBRA_RESTART;
1072	  break;
1073        case 'A':
1074	  if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1075	    {
1076	      fputs("Ambiguous operating mode selected.\n",stderr);
1077	      return usage(progname,1);
1078	    }
1079	  gs.mode = MODE_PHASED_ALL_RESTART;
1080	  break;
1081	case 'b':
1082	  blankstr = optarg;
1083	  break;
1084        case 'd':
1085	  daemon_mode = 1;
1086	  break;
1087        case 'e':
1088	  gs.do_ping = 0;
1089	  break;
1090        case 'k':
1091	  if (!valid_command(optarg))
1092	  {
1093	    fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
1094		    optarg);
1095	    return usage(progname,1);
1096	  }
1097	  gs.stop_command = optarg;
1098	  break;
1099	case 'l':
1100	  {
1101	    char garbage[3];
1102	    if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
1103	        (gs.loglevel < LOG_EMERG))
1104	      {
1105	        fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
1106		return usage(progname,1);
1107	      }
1108	  }
1109	  break;
1110	case 'm':
1111	  {
1112	    char garbage[3];
1113	    if ((sscanf(optarg,"%ld%1s",
1114	    		&gs.min_restart_interval,garbage) != 1) ||
1115	        (gs.min_restart_interval < 0))
1116	      {
1117	        fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
1118		        optarg);
1119		return usage(progname,1);
1120	      }
1121	  }
1122	  break;
1123	case 'M':
1124	  {
1125	    char garbage[3];
1126	    if ((sscanf(optarg,"%ld%1s",
1127	    		&gs.max_restart_interval,garbage) != 1) ||
1128	        (gs.max_restart_interval < 0))
1129	      {
1130	        fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
1131		        optarg);
1132		return usage(progname,1);
1133	      }
1134	  }
1135	  break;
1136	case 'i':
1137	  {
1138	    char garbage[3];
1139	    int period;
1140	    if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
1141	        (gs.period < 1))
1142	      {
1143	        fprintf(stderr,"Invalid interval argument: %s\n",optarg);
1144		return usage(progname,1);
1145	      }
1146	    gs.period = 1000*period;
1147	  }
1148	  break;
1149        case 'p':
1150	  pidfile = optarg;
1151	  break;
1152        case 'r':
1153	  if ((gs.mode == MODE_GLOBAL_RESTART) ||
1154	      (gs.mode == MODE_SEPARATE_RESTART))
1155	    {
1156	      fputs("Ambiguous operating mode selected.\n",stderr);
1157	      return usage(progname,1);
1158	    }
1159	  if (!valid_command(optarg))
1160	  {
1161	    fprintf(stderr,
1162		    "Invalid restart command, must contain '%%s': %s\n",
1163		    optarg);
1164	    return usage(progname,1);
1165	  }
1166	  gs.restart_command = optarg;
1167	  if (gs.mode == MODE_MONITOR)
1168	    gs.mode = MODE_SEPARATE_RESTART;
1169	  break;
1170        case 'R':
1171	  if (gs.mode != MODE_MONITOR)
1172	    {
1173	      fputs("Ambiguous operating mode selected.\n",stderr);
1174	      return usage(progname,1);
1175	    }
1176	  if (strchr(optarg,'%'))
1177	    {
1178	      fprintf(stderr,
1179		      "Invalid restart-all arg, must not contain '%%s': %s\n",
1180		      optarg);
1181	      return usage(progname,1);
1182	    }
1183	  gs.restart_command = optarg;
1184	  gs.mode = MODE_GLOBAL_RESTART;
1185	  break;
1186        case 's':
1187	  if (!valid_command(optarg))
1188	  {
1189	    fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
1190		    optarg);
1191	    return usage(progname,1);
1192	  }
1193	  gs.start_command = optarg;
1194	  break;
1195	case 'S':
1196	  gs.vtydir = optarg;
1197	  break;
1198	case 't':
1199	  {
1200	    char garbage[3];
1201	    if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
1202	        (gs.timeout < 1))
1203	      {
1204	        fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
1205		return usage(progname,1);
1206	      }
1207	  }
1208	  break;
1209	case 'T':
1210	  {
1211	    char garbage[3];
1212	    if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
1213	        (gs.restart_timeout < 1))
1214	      {
1215	        fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
1216		return usage(progname,1);
1217	      }
1218	  }
1219	  break;
1220        case 'z':
1221	  gs.unresponsive_restart = 1;
1222	  break;
1223	case 'v':
1224	  printf ("%s version %s\n", progname, QUAGGA_VERSION);
1225	  puts("Copyright 2004 Andrew J. Schorr");
1226	  return 0;
1227        case 'h':
1228	  return usage(progname,0);
1229        default:
1230	  fputs("Invalid option.\n",stderr);
1231	  return usage(progname,1);
1232        }
1233    }
1234
1235  if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
1236    {
1237      fputs("Option -z requires a -r or -R restart option.\n",stderr);
1238      return usage(progname,1);
1239    }
1240  switch (gs.mode)
1241    {
1242    case MODE_MONITOR:
1243      if (gs.restart_command || gs.start_command || gs.stop_command)
1244        {
1245	  fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
1246		  mode_str[gs.mode]);
1247	  return usage(progname,1);
1248	}
1249      break;
1250    case MODE_GLOBAL_RESTART:
1251    case MODE_SEPARATE_RESTART:
1252      if (!gs.restart_command || gs.start_command || gs.stop_command)
1253        {
1254	  fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
1255		  mode_str[gs.mode]);
1256	  return usage(progname,1);
1257	}
1258      break;
1259    case MODE_PHASED_ZEBRA_RESTART:
1260    case MODE_PHASED_ALL_RESTART:
1261      if (!gs.restart_command || !gs.start_command || !gs.stop_command)
1262        {
1263	  fprintf(stderr,
1264	  	  "Need start, kill, and restart commands in [%s] mode.\n",
1265		  mode_str[gs.mode]);
1266	  return usage(progname,1);
1267	}
1268      break;
1269    }
1270
1271  if (blankstr)
1272    {
1273      if (gs.restart_command)
1274        gs.restart_command = translate_blanks(gs.restart_command,blankstr);
1275      if (gs.start_command)
1276        gs.start_command = translate_blanks(gs.start_command,blankstr);
1277      if (gs.stop_command)
1278        gs.stop_command = translate_blanks(gs.stop_command,blankstr);
1279    }
1280
1281  gs.restart.interval = gs.min_restart_interval;
1282  master = thread_master_create();
1283  signal_init (master, array_size(my_signals), my_signals);
1284  srandom(time(NULL));
1285
1286  {
1287    int i;
1288    struct daemon *tail = NULL;
1289
1290    for (i = optind; i < argc; i++)
1291      {
1292	struct daemon *dmn;
1293
1294	if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
1295	  {
1296	    fprintf(stderr,"calloc(1,%u) failed: %s\n",
1297		    (u_int)sizeof(*dmn), safe_strerror(errno));
1298	    return 1;
1299	  }
1300	dmn->name = dmn->restart.name = argv[i];
1301	dmn->state = DAEMON_INIT;
1302	gs.numdaemons++;
1303	gs.numdown++;
1304	dmn->fd = -1;
1305	dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
1306					      100+(random() % 900));
1307	dmn->restart.interval = gs.min_restart_interval;
1308	if (tail)
1309	  tail->next = dmn;
1310	else
1311	  gs.daemons = dmn;
1312	tail = dmn;
1313
1314	if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1315	     (gs.mode == MODE_PHASED_ALL_RESTART)) &&
1316	    !strcmp(dmn->name,special))
1317	  gs.special = dmn;
1318      }
1319  }
1320  if (!gs.daemons)
1321    {
1322      fputs("Must specify one or more daemons to monitor.\n",stderr);
1323      return usage(progname,1);
1324    }
1325  if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1326      (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
1327    {
1328      fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
1329	      mode_str[gs.mode],special);
1330      return usage(progname,1);
1331    }
1332  if (gs.special && (gs.numdaemons < 2))
1333    {
1334      fprintf(stderr,"Mode [%s] does not make sense with only 1 daemon "
1335		     "to watch.\n",mode_str[gs.mode]);
1336      return usage(progname,1);
1337    }
1338
1339  zlog_default = openzlog(progname, ZLOG_NONE,
1340			  LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
1341  zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1342  if (daemon_mode)
1343    {
1344      zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
1345      if (daemon (0, 0) < 0)
1346	{
1347	  fprintf(stderr, "Watchquagga daemon failed: %s", strerror(errno));
1348	  exit (1);
1349	}
1350    }
1351  else
1352    zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
1353
1354  /* Make sure we're not already running. */
1355  pid_output (pidfile);
1356
1357  /* Announce which daemons are being monitored. */
1358  {
1359    struct daemon *dmn;
1360    size_t len = 0;
1361
1362    for (dmn = gs.daemons; dmn; dmn = dmn->next)
1363      len += strlen(dmn->name)+1;
1364
1365    {
1366      char buf[len+1];
1367      char *p = buf;
1368
1369      for (dmn = gs.daemons; dmn; dmn = dmn->next)
1370	{
1371	  if (p != buf)
1372	    *p++ = ' ';
1373	  strcpy(p,dmn->name);
1374	  p += strlen(p);
1375	}
1376      zlog_notice("%s %s watching [%s], mode [%s]",
1377      		  progname, QUAGGA_VERSION, buf, mode_str[gs.mode]);
1378    }
1379  }
1380
1381  {
1382    struct thread thread;
1383
1384    while (thread_fetch (master, &thread))
1385      thread_call (&thread);
1386  }
1387
1388  /* Not reached. */
1389  return 0;
1390}
1391