1/*++ 2/* NAME 3/* watchdog 3 4/* SUMMARY 5/* watchdog timer 6/* SYNOPSIS 7/* #include <watchdog.h> 8/* 9/* WATCHDOG *watchdog_create(timeout, action, context) 10/* unsigned timeout; 11/* void (*action)(WATCHDOG *watchdog, char *context); 12/* char *context; 13/* 14/* void watchdog_start(watchdog) 15/* WATCHDOG *watchdog; 16/* 17/* void watchdog_stop(watchdog) 18/* WATCHDOG *watchdog; 19/* 20/* void watchdog_destroy(watchdog) 21/* WATCHDOG *watchdog; 22/* 23/* void watchdog_pat() 24/* DESCRIPTION 25/* This module implements watchdog timers that are based on ugly 26/* UNIX alarm timers. The module is designed to survive systems 27/* with clocks that jump occasionally. 28/* 29/* Watchdog timers can be stacked. Only one watchdog timer can be 30/* active at a time. Only the last created watchdog timer can be 31/* manipulated. Watchdog timers must be destroyed in reverse order 32/* of creation. 33/* 34/* watchdog_create() suspends the current watchdog timer, if any, 35/* and instantiates a new watchdog timer. 36/* 37/* watchdog_start() starts or restarts the watchdog timer. 38/* 39/* watchdog_stop() stops the watchdog timer. 40/* 41/* watchdog_destroy() stops the watchdog timer, and resumes the 42/* watchdog timer instance that was suspended by watchdog_create(). 43/* 44/* watchdog_pat() pats the watchdog, so it stays quiet. 45/* 46/* Arguments: 47/* .IP timeout 48/* The watchdog time limit. When the watchdog timer runs, the 49/* process must invoke watchdog_start(), watchdog_stop() or 50/* watchdog_destroy() before the time limit is reached. 51/* .IP action 52/* A null pointer, or pointer to function that is called when the 53/* watchdog alarm goes off. The default action is to terminate 54/* the process with a fatal error. 55/* .IP context 56/* Application context that is passed to the action routine. 57/* .IP watchdog 58/* Must be a pointer to the most recently created watchdog instance. 59/* This argument is checked upon each call. 60/* BUGS 61/* UNIX alarm timers are not stackable, so there can be at most one 62/* watchdog instance active at any given time. 63/* SEE ALSO 64/* msg(3) diagnostics interface 65/* DIAGNOSTICS 66/* Fatal errors: memory allocation problem, system call failure. 67/* Panics: interface violations. 68/* LICENSE 69/* .ad 70/* .fi 71/* The Secure Mailer license must be distributed with this software. 72/* AUTHOR(S) 73/* Wietse Venema 74/* IBM T.J. Watson Research 75/* P.O. Box 704 76/* Yorktown Heights, NY 10598, USA 77/*--*/ 78 79/* System library. */ 80 81#include <sys_defs.h> 82#include <unistd.h> 83#include <signal.h> 84#include <posix_signals.h> 85 86/* Utility library. */ 87 88#include <msg.h> 89#include <mymalloc.h> 90#include <killme_after.h> 91#include <watchdog.h> 92 93/* Application-specific. */ 94 95 /* 96 * Rather than having one timer that goes off when it is too late, we break 97 * up the time limit into smaller intervals so that we can deal with clocks 98 * that jump occasionally. 99 */ 100#define WATCHDOG_STEPS 3 101 102 /* 103 * UNIX alarms are not stackable, but we can save and restore state, so that 104 * watchdogs can at least be nested, sort of. 105 */ 106struct WATCHDOG { 107 unsigned timeout; /* our time resolution */ 108 WATCHDOG_FN action; /* application routine */ 109 char *context; /* application context */ 110 int trip_run; /* number of successive timeouts */ 111 WATCHDOG *saved_watchdog; /* saved state */ 112 struct sigaction saved_action; /* saved state */ 113 unsigned saved_time; /* saved state */ 114}; 115 116 /* 117 * However, only one watchdog instance can be current, and the caller has to 118 * restore state before a prior watchdog instance can be manipulated. 119 */ 120static WATCHDOG *watchdog_curr; 121 122 /* 123 * Workaround for systems where the alarm signal does not wakeup the event 124 * machinery, and therefore does not restart the watchdog timer in the 125 * single_server etc. skeletons. The symptom is that programs abort when the 126 * watchdog timeout is less than the max_idle time. 127 */ 128#ifdef USE_WATCHDOG_PIPE 129#include <errno.h> 130#include <iostuff.h> 131#include <events.h> 132 133static int watchdog_pipe[2]; 134 135/* watchdog_read - read event pipe */ 136 137static void watchdog_read(int unused_event, char *unused_context) 138{ 139 char ch; 140 141 while (read(watchdog_pipe[0], &ch, 1) > 0) 142 /* void */ ; 143} 144 145#endif /* USE_WATCHDOG_PIPE */ 146 147/* watchdog_event - handle timeout event */ 148 149static void watchdog_event(int unused_sig) 150{ 151 const char *myname = "watchdog_event"; 152 WATCHDOG *wp; 153 154 /* 155 * This routine runs as a signal handler. We should not do anything that 156 * could involve memory allocation/deallocation, but exiting without 157 * proper explanation would be unacceptable. For this reason, msg(3) was 158 * made safe for usage by signal handlers that terminate the process. 159 */ 160 if ((wp = watchdog_curr) == 0) 161 msg_panic("%s: no instance", myname); 162 if (msg_verbose > 1) 163 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run); 164 if (++(wp->trip_run) < WATCHDOG_STEPS) { 165#ifdef USE_WATCHDOG_PIPE 166 int saved_errno = errno; 167 168 /* Wake up the events(3) engine. */ 169 if (write(watchdog_pipe[1], "", 1) != 1) 170 msg_warn("%s: write watchdog_pipe: %m", myname); 171 errno = saved_errno; 172#endif 173 alarm(wp->timeout); 174 } else { 175 if (wp->action) 176 wp->action(wp, wp->context); 177 else { 178 killme_after(5); 179#ifdef TEST 180 pause(); 181#endif 182 msg_fatal("watchdog timeout"); 183 } 184 } 185} 186 187/* watchdog_create - create watchdog instance */ 188 189WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context) 190{ 191 const char *myname = "watchdog_create"; 192 struct sigaction sig_action; 193 WATCHDOG *wp; 194 195 wp = (WATCHDOG *) mymalloc(sizeof(*wp)); 196 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0) 197 msg_panic("%s: timeout %d is too small", myname, timeout); 198 wp->action = action; 199 wp->context = context; 200 wp->saved_watchdog = watchdog_curr; 201 wp->saved_time = alarm(0); 202 sigemptyset(&sig_action.sa_mask); 203#ifdef SA_RESTART 204 sig_action.sa_flags = SA_RESTART; 205#else 206 sig_action.sa_flags = 0; 207#endif 208 sig_action.sa_handler = watchdog_event; 209 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0) 210 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 211 if (msg_verbose > 1) 212 msg_info("%s: %p %d", myname, (void *) wp, timeout); 213#ifdef USE_WATCHDOG_PIPE 214 if (watchdog_curr == 0) { 215 if (pipe(watchdog_pipe) < 0) 216 msg_fatal("%s: pipe: %m", myname); 217 non_blocking(watchdog_pipe[0], NON_BLOCKING); 218 non_blocking(watchdog_pipe[1], NON_BLOCKING); 219 event_enable_read(watchdog_pipe[0], watchdog_read, (char *) 0); 220 } 221#endif 222 return (watchdog_curr = wp); 223} 224 225/* watchdog_destroy - destroy watchdog instance, restore state */ 226 227void watchdog_destroy(WATCHDOG *wp) 228{ 229 const char *myname = "watchdog_destroy"; 230 231 watchdog_stop(wp); 232 watchdog_curr = wp->saved_watchdog; 233 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0) 234 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 235 if (wp->saved_time) 236 alarm(wp->saved_time); 237 myfree((char *) wp); 238#ifdef USE_WATCHDOG_PIPE 239 if (watchdog_curr == 0) { 240 event_disable_readwrite(watchdog_pipe[0]); 241 (void) close(watchdog_pipe[0]); 242 (void) close(watchdog_pipe[1]); 243 } 244#endif 245 if (msg_verbose > 1) 246 msg_info("%s: %p", myname, (void *) wp); 247} 248 249/* watchdog_start - enable watchdog timer */ 250 251void watchdog_start(WATCHDOG *wp) 252{ 253 const char *myname = "watchdog_start"; 254 255 if (wp != watchdog_curr) 256 msg_panic("%s: wrong watchdog instance", myname); 257 wp->trip_run = 0; 258 alarm(wp->timeout); 259 if (msg_verbose > 1) 260 msg_info("%s: %p", myname, (void *) wp); 261} 262 263/* watchdog_stop - disable watchdog timer */ 264 265void watchdog_stop(WATCHDOG *wp) 266{ 267 const char *myname = "watchdog_stop"; 268 269 if (wp != watchdog_curr) 270 msg_panic("%s: wrong watchdog instance", myname); 271 alarm(0); 272 if (msg_verbose > 1) 273 msg_info("%s: %p", myname, (void *) wp); 274} 275 276/* watchdog_pat - pat the dog so it stays quiet */ 277 278void watchdog_pat(void) 279{ 280 const char *myname = "watchdog_pat"; 281 282 if (watchdog_curr) 283 watchdog_curr->trip_run = 0; 284 if (msg_verbose > 1) 285 msg_info("%s: %p", myname, (void *) watchdog_curr); 286} 287 288#ifdef TEST 289 290#include <vstream.h> 291 292int main(int unused_argc, char **unused_argv) 293{ 294 WATCHDOG *wp; 295 296 msg_verbose = 2; 297 298 wp = watchdog_create(10, (WATCHDOG_FN) 0, (char *) 0); 299 watchdog_start(wp); 300 do { 301 watchdog_pat(); 302 } while (VSTREAM_GETCHAR() != VSTREAM_EOF); 303 watchdog_destroy(wp); 304 return (0); 305} 306 307#endif 308