1/*++
2/* NAME
3/*	watchdog 3
4/* SUMMARY
5/*	watchdog timer
6/* SYNOPSIS
7/*	#include <watchdog.h>
8/*
9/*	WATCHDOG *watchdog_create(timeout, action, context)
10/*	unsigned timeout;
11/*	void	(*action)(WATCHDOG *watchdog, char *context);
12/*	char	*context;
13/*
14/*	void	watchdog_start(watchdog)
15/*	WATCHDOG *watchdog;
16/*
17/*	void	watchdog_stop(watchdog)
18/*	WATCHDOG *watchdog;
19/*
20/*	void	watchdog_destroy(watchdog)
21/*	WATCHDOG *watchdog;
22/*
23/*	void	watchdog_pat()
24/* DESCRIPTION
25/*	This module implements watchdog timers that are based on ugly
26/*	UNIX alarm timers. The module is designed to survive systems
27/*	with clocks that jump occasionally.
28/*
29/*	Watchdog timers can be stacked. Only one watchdog timer can be
30/*	active at a time. Only the last created watchdog timer can be
31/*	manipulated. Watchdog timers must be destroyed in reverse order
32/*	of creation.
33/*
34/*	watchdog_create() suspends the current watchdog timer, if any,
35/*	and instantiates a new watchdog timer.
36/*
37/*	watchdog_start() starts or restarts the watchdog timer.
38/*
39/*	watchdog_stop() stops the watchdog timer.
40/*
41/*	watchdog_destroy() stops the watchdog timer, and resumes the
42/*	watchdog timer instance that was suspended by watchdog_create().
43/*
44/*	watchdog_pat() pats the watchdog, so it stays quiet.
45/*
46/*	Arguments:
47/* .IP timeout
48/*	The watchdog time limit. When the watchdog timer runs, the
49/*	process must invoke watchdog_start(), watchdog_stop() or
50/*	watchdog_destroy() before the time limit is reached.
51/* .IP action
52/*	A null pointer, or pointer to function that is called when the
53/*	watchdog alarm goes off. The default action is to terminate
54/*	the process with a fatal error.
55/* .IP context
56/*	Application context that is passed to the action routine.
57/* .IP watchdog
58/*	Must be a pointer to the most recently created watchdog instance.
59/*	This argument is checked upon each call.
60/* BUGS
61/*	UNIX alarm timers are not stackable, so there can be at most one
62/*	watchdog instance active at any given time.
63/* SEE ALSO
64/*	msg(3) diagnostics interface
65/* DIAGNOSTICS
66/*	Fatal errors: memory allocation problem, system call failure.
67/*	Panics: interface violations.
68/* LICENSE
69/* .ad
70/* .fi
71/*	The Secure Mailer license must be distributed with this software.
72/* AUTHOR(S)
73/*	Wietse Venema
74/*	IBM T.J. Watson Research
75/*	P.O. Box 704
76/*	Yorktown Heights, NY 10598, USA
77/*--*/
78
79/* System library. */
80
81#include <sys_defs.h>
82#include <unistd.h>
83#include <signal.h>
84#include <posix_signals.h>
85
86/* Utility library. */
87
88#include <msg.h>
89#include <mymalloc.h>
90#include <killme_after.h>
91#include <watchdog.h>
92
93/* Application-specific. */
94
95 /*
96  * Rather than having one timer that goes off when it is too late, we break
97  * up the time limit into smaller intervals so that we can deal with clocks
98  * that jump occasionally.
99  */
100#define WATCHDOG_STEPS	3
101
102 /*
103  * UNIX alarms are not stackable, but we can save and restore state, so that
104  * watchdogs can at least be nested, sort of.
105  */
106struct WATCHDOG {
107    unsigned timeout;			/* our time resolution */
108    WATCHDOG_FN action;			/* application routine */
109    char   *context;			/* application context */
110    int     trip_run;			/* number of successive timeouts */
111    WATCHDOG *saved_watchdog;		/* saved state */
112    struct sigaction saved_action;	/* saved state */
113    unsigned saved_time;		/* saved state */
114};
115
116 /*
117  * However, only one watchdog instance can be current, and the caller has to
118  * restore state before a prior watchdog instance can be manipulated.
119  */
120static WATCHDOG *watchdog_curr;
121
122 /*
123  * Workaround for systems where the alarm signal does not wakeup the event
124  * machinery, and therefore does not restart the watchdog timer in the
125  * single_server etc. skeletons. The symptom is that programs abort when the
126  * watchdog timeout is less than the max_idle time.
127  */
128#ifdef USE_WATCHDOG_PIPE
129#include <errno.h>
130#include <iostuff.h>
131#include <events.h>
132
133static int watchdog_pipe[2];
134
135/* watchdog_read - read event pipe */
136
137static void watchdog_read(int unused_event, char *unused_context)
138{
139    char    ch;
140
141    while (read(watchdog_pipe[0], &ch, 1) > 0)
142	 /* void */ ;
143}
144
145#endif					/* USE_WATCHDOG_PIPE */
146
147/* watchdog_event - handle timeout event */
148
149static void watchdog_event(int unused_sig)
150{
151    const char *myname = "watchdog_event";
152    WATCHDOG *wp;
153
154    /*
155     * This routine runs as a signal handler. We should not do anything that
156     * could involve memory allocation/deallocation, but exiting without
157     * proper explanation would be unacceptable. For this reason, msg(3) was
158     * made safe for usage by signal handlers that terminate the process.
159     */
160    if ((wp = watchdog_curr) == 0)
161	msg_panic("%s: no instance", myname);
162    if (msg_verbose > 1)
163	msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
164    if (++(wp->trip_run) < WATCHDOG_STEPS) {
165#ifdef USE_WATCHDOG_PIPE
166	int     saved_errno = errno;
167
168	/* Wake up the events(3) engine. */
169	if (write(watchdog_pipe[1], "", 1) != 1)
170	    msg_warn("%s: write watchdog_pipe: %m", myname);
171	errno = saved_errno;
172#endif
173	alarm(wp->timeout);
174    } else {
175	if (wp->action)
176	    wp->action(wp, wp->context);
177	else {
178	    killme_after(5);
179#ifdef TEST
180	    pause();
181#endif
182	    msg_fatal("watchdog timeout");
183	}
184    }
185}
186
187/* watchdog_create - create watchdog instance */
188
189WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
190{
191    const char *myname = "watchdog_create";
192    struct sigaction sig_action;
193    WATCHDOG *wp;
194
195    wp = (WATCHDOG *) mymalloc(sizeof(*wp));
196    if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
197	msg_panic("%s: timeout %d is too small", myname, timeout);
198    wp->action = action;
199    wp->context = context;
200    wp->saved_watchdog = watchdog_curr;
201    wp->saved_time = alarm(0);
202    sigemptyset(&sig_action.sa_mask);
203#ifdef SA_RESTART
204    sig_action.sa_flags = SA_RESTART;
205#else
206    sig_action.sa_flags = 0;
207#endif
208    sig_action.sa_handler = watchdog_event;
209    if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
210	msg_fatal("%s: sigaction(SIGALRM): %m", myname);
211    if (msg_verbose > 1)
212	msg_info("%s: %p %d", myname, (void *) wp, timeout);
213#ifdef USE_WATCHDOG_PIPE
214    if (watchdog_curr == 0) {
215	if (pipe(watchdog_pipe) < 0)
216	    msg_fatal("%s: pipe: %m", myname);
217	non_blocking(watchdog_pipe[0], NON_BLOCKING);
218	non_blocking(watchdog_pipe[1], NON_BLOCKING);
219	event_enable_read(watchdog_pipe[0], watchdog_read, (char *) 0);
220    }
221#endif
222    return (watchdog_curr = wp);
223}
224
225/* watchdog_destroy - destroy watchdog instance, restore state */
226
227void    watchdog_destroy(WATCHDOG *wp)
228{
229    const char *myname = "watchdog_destroy";
230
231    watchdog_stop(wp);
232    watchdog_curr = wp->saved_watchdog;
233    if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
234	msg_fatal("%s: sigaction(SIGALRM): %m", myname);
235    if (wp->saved_time)
236	alarm(wp->saved_time);
237    myfree((char *) wp);
238#ifdef USE_WATCHDOG_PIPE
239    if (watchdog_curr == 0) {
240	event_disable_readwrite(watchdog_pipe[0]);
241	(void) close(watchdog_pipe[0]);
242	(void) close(watchdog_pipe[1]);
243    }
244#endif
245    if (msg_verbose > 1)
246	msg_info("%s: %p", myname, (void *) wp);
247}
248
249/* watchdog_start - enable watchdog timer */
250
251void    watchdog_start(WATCHDOG *wp)
252{
253    const char *myname = "watchdog_start";
254
255    if (wp != watchdog_curr)
256	msg_panic("%s: wrong watchdog instance", myname);
257    wp->trip_run = 0;
258    alarm(wp->timeout);
259    if (msg_verbose > 1)
260	msg_info("%s: %p", myname, (void *) wp);
261}
262
263/* watchdog_stop - disable watchdog timer */
264
265void    watchdog_stop(WATCHDOG *wp)
266{
267    const char *myname = "watchdog_stop";
268
269    if (wp != watchdog_curr)
270	msg_panic("%s: wrong watchdog instance", myname);
271    alarm(0);
272    if (msg_verbose > 1)
273	msg_info("%s: %p", myname, (void *) wp);
274}
275
276/* watchdog_pat - pat the dog so it stays quiet */
277
278void    watchdog_pat(void)
279{
280    const char *myname = "watchdog_pat";
281
282    if (watchdog_curr)
283	watchdog_curr->trip_run = 0;
284    if (msg_verbose > 1)
285	msg_info("%s: %p", myname, (void *) watchdog_curr);
286}
287
288#ifdef TEST
289
290#include <vstream.h>
291
292int     main(int unused_argc, char **unused_argv)
293{
294    WATCHDOG *wp;
295
296    msg_verbose = 2;
297
298    wp = watchdog_create(10, (WATCHDOG_FN) 0, (char *) 0);
299    watchdog_start(wp);
300    do {
301	watchdog_pat();
302    } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
303    watchdog_destroy(wp);
304    return (0);
305}
306
307#endif
308