watchdogd.c revision 156334
1116874Ssmkelly/*
2128898Ssmkelly * Copyright (c) 2003-2004  Sean M. Kelly <smkelly@FreeBSD.org>
3116874Ssmkelly * All rights reserved.
4116874Ssmkelly *
5116874Ssmkelly * Redistribution and use in source and binary forms, with or without
6116874Ssmkelly * modification, are permitted provided that the following conditions
7116874Ssmkelly * are met:
8116874Ssmkelly * 1. Redistributions of source code must retain the above copyright
9116874Ssmkelly *    notice, this list of conditions and the following disclaimer.
10116874Ssmkelly * 2. Redistributions in binary form must reproduce the above copyright
11116874Ssmkelly *    notice, this list of conditions and the following disclaimer in the
12116874Ssmkelly *    documentation and/or other materials provided with the distribution.
13116874Ssmkelly *
14116874Ssmkelly * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15116874Ssmkelly * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16116874Ssmkelly * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17116874Ssmkelly * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18116874Ssmkelly * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19116874Ssmkelly * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20116874Ssmkelly * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21116874Ssmkelly * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22116874Ssmkelly * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23116874Ssmkelly * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24116874Ssmkelly * SUCH DAMAGE.
25116874Ssmkelly */
26116874Ssmkelly
27116874Ssmkelly/*
28116874Ssmkelly * Software watchdog daemon.
29116874Ssmkelly */
30116874Ssmkelly
31117185Ssmkelly#include <sys/types.h>
32116874Ssmkelly__FBSDID("$FreeBSD: head/usr.sbin/watchdogd/watchdogd.c 156334 2006-03-06 07:42:52Z phk $");
33116874Ssmkelly
34149434Spjd#include <sys/param.h>
35117185Ssmkelly#include <sys/rtprio.h>
36117185Ssmkelly#include <sys/stat.h>
37116874Ssmkelly#include <sys/time.h>
38126383Sphk#include <sys/watchdog.h>
39116874Ssmkelly
40116874Ssmkelly#include <err.h>
41126383Sphk#include <errno.h>
42126383Sphk#include <fcntl.h>
43149434Spjd#include <libutil.h>
44126383Sphk#include <math.h>
45116874Ssmkelly#include <paths.h>
46117185Ssmkelly#include <signal.h>
47116874Ssmkelly#include <stdio.h>
48116874Ssmkelly#include <stdlib.h>
49126383Sphk#include <string.h>
50116874Ssmkelly#include <sysexits.h>
51116874Ssmkelly#include <unistd.h>
52116874Ssmkelly
53116874Ssmkellystatic void	parseargs(int, char *[]);
54116874Ssmkellystatic void	sighandler(int);
55116874Ssmkellystatic void	watchdog_loop(void);
56116874Ssmkellystatic int	watchdog_init(void);
57116874Ssmkellystatic int	watchdog_onoff(int onoff);
58126383Sphkstatic int	watchdog_patpat(void);
59116874Ssmkellystatic void	usage(void);
60116874Ssmkelly
61116874Ssmkellyint debugging = 0;
62116874Ssmkellyint end_program = 0;
63116874Ssmkellyconst char *pidfile = _PATH_VARRUN "watchdogd.pid";
64116874Ssmkellyint reset_mib[3];
65116888Ssmkellysize_t reset_miblen = 3;
66126383Sphku_int timeout = WD_TO_16SEC;
67126383Sphku_int passive = 0;
68126383Sphkint is_daemon = 0;
69126383Sphkint fd = -1;
70126383Sphkint nap = 1;
71126383Sphkchar *test_cmd = NULL;
72116874Ssmkelly
73116874Ssmkelly/*
74128705Ssmkelly * Periodically pat the watchdog, preventing it from firing.
75116874Ssmkelly */
76116874Ssmkellyint
77116874Ssmkellymain(int argc, char *argv[])
78116874Ssmkelly{
79116874Ssmkelly	struct rtprio rtp;
80149434Spjd	struct pidfh *pfh;
81149434Spjd	pid_t otherpid;
82116874Ssmkelly
83116874Ssmkelly	if (getuid() != 0)
84116874Ssmkelly		errx(EX_SOFTWARE, "not super user");
85116874Ssmkelly
86116874Ssmkelly	parseargs(argc, argv);
87116874Ssmkelly
88116874Ssmkelly	rtp.type = RTP_PRIO_REALTIME;
89116874Ssmkelly	rtp.prio = 0;
90116874Ssmkelly	if (rtprio(RTP_SET, 0, &rtp) == -1)
91116874Ssmkelly		err(EX_OSERR, "rtprio");
92116874Ssmkelly
93116874Ssmkelly	if (watchdog_init() == -1)
94117185Ssmkelly		errx(EX_SOFTWARE, "unable to initialize watchdog");
95116874Ssmkelly
96126383Sphk	if (is_daemon) {
97126383Sphk		if (watchdog_onoff(1) == -1)
98126383Sphk			exit(EX_SOFTWARE);
99116874Ssmkelly
100150214Spjd		pfh = pidfile_open(pidfile, 0600, &otherpid);
101149434Spjd		if (pfh == NULL) {
102149434Spjd			if (errno == EEXIST) {
103149434Spjd				errx(EX_SOFTWARE, "%s already running, pid: %d",
104149434Spjd				    getprogname(), otherpid);
105149434Spjd			}
106149434Spjd			warn("Cannot open or create pidfile");
107149434Spjd		}
108149434Spjd
109126383Sphk		if (debugging == 0 && daemon(0, 0) == -1) {
110126383Sphk			watchdog_onoff(0);
111149434Spjd			pidfile_remove(pfh);
112126383Sphk			err(EX_OSERR, "daemon");
113126383Sphk		}
114116874Ssmkelly
115126383Sphk		signal(SIGHUP, SIG_IGN);
116126383Sphk		signal(SIGINT, sighandler);
117126383Sphk		signal(SIGTERM, sighandler);
118116874Ssmkelly
119149434Spjd		pidfile_write(pfh);
120116874Ssmkelly
121126383Sphk		watchdog_loop();
122116874Ssmkelly
123126383Sphk		/* exiting */
124126383Sphk		watchdog_onoff(0);
125149434Spjd		pidfile_remove(pfh);
126126383Sphk		return (EX_OK);
127126383Sphk	} else {
128126383Sphk		if (passive)
129126383Sphk			timeout |= WD_PASSIVE;
130126383Sphk		else
131126383Sphk			timeout |= WD_ACTIVE;
132150747Sphk		if (watchdog_patpat() < 0)
133126383Sphk			err(EX_OSERR, "patting the dog");
134126383Sphk		return (EX_OK);
135126383Sphk	}
136116874Ssmkelly}
137116874Ssmkelly
138116874Ssmkelly/*
139116874Ssmkelly * Catch signals and begin shutdown process.
140116874Ssmkelly */
141116874Ssmkellystatic void
142116874Ssmkellysighandler(int signum)
143116874Ssmkelly{
144116874Ssmkelly
145116874Ssmkelly	if (signum == SIGINT || signum == SIGTERM)
146116874Ssmkelly		end_program = 1;
147116874Ssmkelly}
148116874Ssmkelly
149116874Ssmkelly/*
150128705Ssmkelly * Open the watchdog device.
151116874Ssmkelly */
152116874Ssmkellystatic int
153116874Ssmkellywatchdog_init()
154116874Ssmkelly{
155116874Ssmkelly
156126383Sphk	fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
157126383Sphk	if (fd >= 0)
158126383Sphk		return (0);
159126383Sphk	warn("Could not open watchdog device");
160126383Sphk	return (-1);
161116874Ssmkelly}
162116874Ssmkelly
163116874Ssmkelly/*
164116874Ssmkelly * Main program loop which is iterated every second.
165116874Ssmkelly */
166116874Ssmkellystatic void
167116874Ssmkellywatchdog_loop(void)
168116874Ssmkelly{
169116874Ssmkelly	struct stat sb;
170116874Ssmkelly	int failed;
171116874Ssmkelly
172116874Ssmkelly	while (end_program == 0) {
173116874Ssmkelly		failed = 0;
174116874Ssmkelly
175126383Sphk		if (test_cmd != NULL)
176126383Sphk			failed = system(test_cmd);
177126383Sphk		else
178126383Sphk			failed = stat("/etc", &sb);
179116874Ssmkelly
180116874Ssmkelly		if (failed == 0)
181126383Sphk			watchdog_patpat();
182126383Sphk		sleep(nap);
183116874Ssmkelly	}
184116874Ssmkelly}
185116874Ssmkelly
186116874Ssmkelly/*
187116874Ssmkelly * Reset the watchdog timer. This function must be called periodically
188116874Ssmkelly * to keep the watchdog from firing.
189116874Ssmkelly */
190116874Ssmkellyint
191126383Sphkwatchdog_patpat(void)
192116874Ssmkelly{
193116874Ssmkelly
194126383Sphk	return ioctl(fd, WDIOCPATPAT, &timeout);
195116874Ssmkelly}
196116874Ssmkelly
197116874Ssmkelly/*
198116874Ssmkelly * Toggle the kernel's watchdog. This routine is used to enable and
199116874Ssmkelly * disable the watchdog.
200116874Ssmkelly */
201116874Ssmkellystatic int
202116874Ssmkellywatchdog_onoff(int onoff)
203116874Ssmkelly{
204116874Ssmkelly
205126383Sphk	if (onoff)
206126383Sphk		timeout |= WD_ACTIVE;
207126383Sphk	else
208126383Sphk		timeout &= ~WD_ACTIVE;
209126383Sphk	return watchdog_patpat();
210116874Ssmkelly}
211116874Ssmkelly
212116874Ssmkelly/*
213116874Ssmkelly * Tell user how to use the program.
214116874Ssmkelly */
215116874Ssmkellystatic void
216116874Ssmkellyusage()
217116874Ssmkelly{
218126383Sphk	if (is_daemon)
219126383Sphk		fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file]\n");
220126383Sphk	else
221156334Sphk		fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n");
222116874Ssmkelly	exit(EX_USAGE);
223116874Ssmkelly}
224116874Ssmkelly
225116874Ssmkelly/*
226116874Ssmkelly * Handle the few command line arguments supported.
227116874Ssmkelly */
228116874Ssmkellystatic void
229116874Ssmkellyparseargs(int argc, char *argv[])
230116874Ssmkelly{
231116874Ssmkelly	int c;
232126383Sphk	char *p;
233126383Sphk	double a;
234116874Ssmkelly
235126383Sphk	c = strlen(argv[0]);
236126383Sphk	if (argv[0][c - 1] == 'd')
237126383Sphk		is_daemon = 1;
238126383Sphk	while ((c = getopt(argc, argv,
239126383Sphk	    is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
240116874Ssmkelly		switch (c) {
241116874Ssmkelly		case 'I':
242116874Ssmkelly			pidfile = optarg;
243116874Ssmkelly			break;
244116874Ssmkelly		case 'd':
245116874Ssmkelly			debugging = 1;
246116874Ssmkelly			break;
247126383Sphk		case 'e':
248126383Sphk			test_cmd = strdup(optarg);
249126383Sphk			break;
250126383Sphk#ifdef notyet
251126383Sphk		case 'p':
252126383Sphk			passive = 1;
253126383Sphk			break;
254126383Sphk#endif
255126383Sphk		case 's':
256126383Sphk			p = NULL;
257126383Sphk			errno = 0;
258126383Sphk			nap = strtol(optarg, &p, 0);
259126383Sphk			if ((p != NULL && *p != '\0') || errno != 0)
260126383Sphk				errx(EX_USAGE, "-s argument is not a number");
261126383Sphk			break;
262126383Sphk		case 't':
263126383Sphk			p = NULL;
264126383Sphk			errno = 0;
265126383Sphk			a = strtod(optarg, &p);
266126383Sphk			if ((p != NULL && *p != '\0') || errno != 0)
267126383Sphk				errx(EX_USAGE, "-t argument is not a number");
268126383Sphk			if (a < 0)
269126383Sphk				errx(EX_USAGE, "-t argument must be positive");
270126383Sphk			if (a == 0)
271126383Sphk				timeout = WD_TO_NEVER;
272126383Sphk			else
273126383Sphk				timeout = 1.0 + log(a * 1e9) / log(2.0);
274126383Sphk			if (debugging)
275126383Sphk				printf("Timeout is 2^%d nanoseconds\n",
276126383Sphk				    timeout);
277126383Sphk			break;
278116874Ssmkelly		case '?':
279116874Ssmkelly		default:
280116874Ssmkelly			usage();
281116874Ssmkelly			/* NOTREACHED */
282116874Ssmkelly		}
283116874Ssmkelly	}
284150747Sphk	if (argc != optind)
285150747Sphk		errx(EX_USAGE, "extra arguments.");
286126383Sphk	if (is_daemon && timeout < WD_TO_1SEC)
287126383Sphk		errx(EX_USAGE, "-t argument is less than one second.");
288116874Ssmkelly}
289