watchdogd.c revision 165263
1116874Ssmkelly/*
2128898Ssmkelly * Copyright (c) 2003-2004  Sean M. Kelly <smkelly@FreeBSD.org>
3116874Ssmkelly * All rights reserved.
4116874Ssmkelly *
5116874Ssmkelly * Redistribution and use in source and binary forms, with or without
6116874Ssmkelly * modification, are permitted provided that the following conditions
7116874Ssmkelly * are met:
8116874Ssmkelly * 1. Redistributions of source code must retain the above copyright
9116874Ssmkelly *    notice, this list of conditions and the following disclaimer.
10116874Ssmkelly * 2. Redistributions in binary form must reproduce the above copyright
11116874Ssmkelly *    notice, this list of conditions and the following disclaimer in the
12116874Ssmkelly *    documentation and/or other materials provided with the distribution.
13116874Ssmkelly *
14116874Ssmkelly * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15116874Ssmkelly * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16116874Ssmkelly * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17116874Ssmkelly * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18116874Ssmkelly * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19116874Ssmkelly * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20116874Ssmkelly * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21116874Ssmkelly * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22116874Ssmkelly * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23116874Ssmkelly * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24116874Ssmkelly * SUCH DAMAGE.
25116874Ssmkelly */
26116874Ssmkelly
27116874Ssmkelly/*
28116874Ssmkelly * Software watchdog daemon.
29116874Ssmkelly */
30116874Ssmkelly
31117185Ssmkelly#include <sys/types.h>
32116874Ssmkelly__FBSDID("$FreeBSD: head/usr.sbin/watchdogd/watchdogd.c 165263 2006-12-15 22:47:36Z n_hibma $");
33116874Ssmkelly
34149434Spjd#include <sys/param.h>
35117185Ssmkelly#include <sys/rtprio.h>
36117185Ssmkelly#include <sys/stat.h>
37116874Ssmkelly#include <sys/time.h>
38126383Sphk#include <sys/watchdog.h>
39116874Ssmkelly
40116874Ssmkelly#include <err.h>
41126383Sphk#include <errno.h>
42126383Sphk#include <fcntl.h>
43149434Spjd#include <libutil.h>
44126383Sphk#include <math.h>
45116874Ssmkelly#include <paths.h>
46117185Ssmkelly#include <signal.h>
47116874Ssmkelly#include <stdio.h>
48116874Ssmkelly#include <stdlib.h>
49126383Sphk#include <string.h>
50116874Ssmkelly#include <sysexits.h>
51116874Ssmkelly#include <unistd.h>
52116874Ssmkelly
53116874Ssmkellystatic void	parseargs(int, char *[]);
54116874Ssmkellystatic void	sighandler(int);
55116874Ssmkellystatic void	watchdog_loop(void);
56116874Ssmkellystatic int	watchdog_init(void);
57116874Ssmkellystatic int	watchdog_onoff(int onoff);
58165263Sn_hibmastatic int	watchdog_patpat(u_int timeout);
59116874Ssmkellystatic void	usage(void);
60116874Ssmkelly
61116874Ssmkellyint debugging = 0;
62116874Ssmkellyint end_program = 0;
63116874Ssmkellyconst char *pidfile = _PATH_VARRUN "watchdogd.pid";
64116874Ssmkellyint reset_mib[3];
65116888Ssmkellysize_t reset_miblen = 3;
66126383Sphku_int timeout = WD_TO_16SEC;
67126383Sphku_int passive = 0;
68126383Sphkint is_daemon = 0;
69126383Sphkint fd = -1;
70126383Sphkint nap = 1;
71126383Sphkchar *test_cmd = NULL;
72116874Ssmkelly
73116874Ssmkelly/*
74128705Ssmkelly * Periodically pat the watchdog, preventing it from firing.
75116874Ssmkelly */
76116874Ssmkellyint
77116874Ssmkellymain(int argc, char *argv[])
78116874Ssmkelly{
79116874Ssmkelly	struct rtprio rtp;
80149434Spjd	struct pidfh *pfh;
81149434Spjd	pid_t otherpid;
82116874Ssmkelly
83116874Ssmkelly	if (getuid() != 0)
84116874Ssmkelly		errx(EX_SOFTWARE, "not super user");
85116874Ssmkelly
86116874Ssmkelly	parseargs(argc, argv);
87116874Ssmkelly
88116874Ssmkelly	rtp.type = RTP_PRIO_REALTIME;
89116874Ssmkelly	rtp.prio = 0;
90116874Ssmkelly	if (rtprio(RTP_SET, 0, &rtp) == -1)
91116874Ssmkelly		err(EX_OSERR, "rtprio");
92116874Ssmkelly
93116874Ssmkelly	if (watchdog_init() == -1)
94117185Ssmkelly		errx(EX_SOFTWARE, "unable to initialize watchdog");
95116874Ssmkelly
96126383Sphk	if (is_daemon) {
97126383Sphk		if (watchdog_onoff(1) == -1)
98126383Sphk			exit(EX_SOFTWARE);
99116874Ssmkelly
100150214Spjd		pfh = pidfile_open(pidfile, 0600, &otherpid);
101149434Spjd		if (pfh == NULL) {
102149434Spjd			if (errno == EEXIST) {
103149434Spjd				errx(EX_SOFTWARE, "%s already running, pid: %d",
104149434Spjd				    getprogname(), otherpid);
105149434Spjd			}
106149434Spjd			warn("Cannot open or create pidfile");
107149434Spjd		}
108149434Spjd
109126383Sphk		if (debugging == 0 && daemon(0, 0) == -1) {
110126383Sphk			watchdog_onoff(0);
111149434Spjd			pidfile_remove(pfh);
112126383Sphk			err(EX_OSERR, "daemon");
113126383Sphk		}
114116874Ssmkelly
115126383Sphk		signal(SIGHUP, SIG_IGN);
116126383Sphk		signal(SIGINT, sighandler);
117126383Sphk		signal(SIGTERM, sighandler);
118116874Ssmkelly
119149434Spjd		pidfile_write(pfh);
120116874Ssmkelly
121126383Sphk		watchdog_loop();
122116874Ssmkelly
123126383Sphk		/* exiting */
124149434Spjd		pidfile_remove(pfh);
125126383Sphk		return (EX_OK);
126126383Sphk	} else {
127126383Sphk		if (passive)
128126383Sphk			timeout |= WD_PASSIVE;
129126383Sphk		else
130126383Sphk			timeout |= WD_ACTIVE;
131165263Sn_hibma		if (watchdog_patpat(timeout) < 0)
132126383Sphk			err(EX_OSERR, "patting the dog");
133126383Sphk		return (EX_OK);
134126383Sphk	}
135116874Ssmkelly}
136116874Ssmkelly
137116874Ssmkelly/*
138116874Ssmkelly * Catch signals and begin shutdown process.
139116874Ssmkelly */
140116874Ssmkellystatic void
141116874Ssmkellysighandler(int signum)
142116874Ssmkelly{
143116874Ssmkelly
144116874Ssmkelly	if (signum == SIGINT || signum == SIGTERM)
145116874Ssmkelly		end_program = 1;
146116874Ssmkelly}
147116874Ssmkelly
148116874Ssmkelly/*
149128705Ssmkelly * Open the watchdog device.
150116874Ssmkelly */
151116874Ssmkellystatic int
152116874Ssmkellywatchdog_init()
153116874Ssmkelly{
154116874Ssmkelly
155126383Sphk	fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
156126383Sphk	if (fd >= 0)
157126383Sphk		return (0);
158126383Sphk	warn("Could not open watchdog device");
159126383Sphk	return (-1);
160116874Ssmkelly}
161116874Ssmkelly
162116874Ssmkelly/*
163116874Ssmkelly * Main program loop which is iterated every second.
164116874Ssmkelly */
165116874Ssmkellystatic void
166116874Ssmkellywatchdog_loop(void)
167116874Ssmkelly{
168116874Ssmkelly	struct stat sb;
169116874Ssmkelly	int failed;
170116874Ssmkelly
171165263Sn_hibma	while (end_program != 2) {
172116874Ssmkelly		failed = 0;
173116874Ssmkelly
174126383Sphk		if (test_cmd != NULL)
175126383Sphk			failed = system(test_cmd);
176126383Sphk		else
177126383Sphk			failed = stat("/etc", &sb);
178116874Ssmkelly
179116874Ssmkelly		if (failed == 0)
180165263Sn_hibma			watchdog_patpat(timeout|WD_ACTIVE);
181126383Sphk		sleep(nap);
182165263Sn_hibma
183165263Sn_hibma		if (end_program != 0) {
184165263Sn_hibma			if (watchdog_onoff(0) == 0) {
185165263Sn_hibma				end_program = 2;
186165263Sn_hibma			} else {
187165263Sn_hibma				warnx("Could not stop the watchdog, not exitting");
188165263Sn_hibma				end_program = 0;
189165263Sn_hibma			}
190165263Sn_hibma		}
191116874Ssmkelly	}
192116874Ssmkelly}
193116874Ssmkelly
194116874Ssmkelly/*
195116874Ssmkelly * Reset the watchdog timer. This function must be called periodically
196116874Ssmkelly * to keep the watchdog from firing.
197116874Ssmkelly */
198116874Ssmkellyint
199165263Sn_hibmawatchdog_patpat(u_int t)
200116874Ssmkelly{
201116874Ssmkelly
202165263Sn_hibma	return ioctl(fd, WDIOCPATPAT, &t);
203116874Ssmkelly}
204116874Ssmkelly
205116874Ssmkelly/*
206116874Ssmkelly * Toggle the kernel's watchdog. This routine is used to enable and
207116874Ssmkelly * disable the watchdog.
208116874Ssmkelly */
209116874Ssmkellystatic int
210116874Ssmkellywatchdog_onoff(int onoff)
211116874Ssmkelly{
212116874Ssmkelly
213126383Sphk	if (onoff)
214165263Sn_hibma		return watchdog_patpat((timeout|WD_ACTIVE));
215126383Sphk	else
216165263Sn_hibma		return watchdog_patpat(0);
217116874Ssmkelly}
218116874Ssmkelly
219116874Ssmkelly/*
220116874Ssmkelly * Tell user how to use the program.
221116874Ssmkelly */
222116874Ssmkellystatic void
223116874Ssmkellyusage()
224116874Ssmkelly{
225126383Sphk	if (is_daemon)
226165263Sn_hibma		fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n");
227126383Sphk	else
228156334Sphk		fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n");
229116874Ssmkelly	exit(EX_USAGE);
230116874Ssmkelly}
231116874Ssmkelly
232116874Ssmkelly/*
233116874Ssmkelly * Handle the few command line arguments supported.
234116874Ssmkelly */
235116874Ssmkellystatic void
236116874Ssmkellyparseargs(int argc, char *argv[])
237116874Ssmkelly{
238116874Ssmkelly	int c;
239126383Sphk	char *p;
240126383Sphk	double a;
241116874Ssmkelly
242126383Sphk	c = strlen(argv[0]);
243126383Sphk	if (argv[0][c - 1] == 'd')
244126383Sphk		is_daemon = 1;
245126383Sphk	while ((c = getopt(argc, argv,
246126383Sphk	    is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
247116874Ssmkelly		switch (c) {
248116874Ssmkelly		case 'I':
249116874Ssmkelly			pidfile = optarg;
250116874Ssmkelly			break;
251116874Ssmkelly		case 'd':
252116874Ssmkelly			debugging = 1;
253116874Ssmkelly			break;
254126383Sphk		case 'e':
255126383Sphk			test_cmd = strdup(optarg);
256126383Sphk			break;
257126383Sphk#ifdef notyet
258126383Sphk		case 'p':
259126383Sphk			passive = 1;
260126383Sphk			break;
261126383Sphk#endif
262126383Sphk		case 's':
263126383Sphk			p = NULL;
264126383Sphk			errno = 0;
265126383Sphk			nap = strtol(optarg, &p, 0);
266126383Sphk			if ((p != NULL && *p != '\0') || errno != 0)
267126383Sphk				errx(EX_USAGE, "-s argument is not a number");
268126383Sphk			break;
269126383Sphk		case 't':
270126383Sphk			p = NULL;
271126383Sphk			errno = 0;
272126383Sphk			a = strtod(optarg, &p);
273126383Sphk			if ((p != NULL && *p != '\0') || errno != 0)
274126383Sphk				errx(EX_USAGE, "-t argument is not a number");
275126383Sphk			if (a < 0)
276126383Sphk				errx(EX_USAGE, "-t argument must be positive");
277126383Sphk			if (a == 0)
278126383Sphk				timeout = WD_TO_NEVER;
279126383Sphk			else
280126383Sphk				timeout = 1.0 + log(a * 1e9) / log(2.0);
281126383Sphk			if (debugging)
282126383Sphk				printf("Timeout is 2^%d nanoseconds\n",
283126383Sphk				    timeout);
284126383Sphk			break;
285116874Ssmkelly		case '?':
286116874Ssmkelly		default:
287116874Ssmkelly			usage();
288116874Ssmkelly			/* NOTREACHED */
289116874Ssmkelly		}
290116874Ssmkelly	}
291150747Sphk	if (argc != optind)
292150747Sphk		errx(EX_USAGE, "extra arguments.");
293126383Sphk	if (is_daemon && timeout < WD_TO_1SEC)
294126383Sphk		errx(EX_USAGE, "-t argument is less than one second.");
295116874Ssmkelly}
296