watchdogd.c revision 165263
1116874Ssmkelly/* 2128898Ssmkelly * Copyright (c) 2003-2004 Sean M. Kelly <smkelly@FreeBSD.org> 3116874Ssmkelly * All rights reserved. 4116874Ssmkelly * 5116874Ssmkelly * Redistribution and use in source and binary forms, with or without 6116874Ssmkelly * modification, are permitted provided that the following conditions 7116874Ssmkelly * are met: 8116874Ssmkelly * 1. Redistributions of source code must retain the above copyright 9116874Ssmkelly * notice, this list of conditions and the following disclaimer. 10116874Ssmkelly * 2. Redistributions in binary form must reproduce the above copyright 11116874Ssmkelly * notice, this list of conditions and the following disclaimer in the 12116874Ssmkelly * documentation and/or other materials provided with the distribution. 13116874Ssmkelly * 14116874Ssmkelly * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15116874Ssmkelly * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16116874Ssmkelly * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17116874Ssmkelly * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18116874Ssmkelly * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19116874Ssmkelly * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20116874Ssmkelly * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21116874Ssmkelly * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22116874Ssmkelly * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23116874Ssmkelly * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24116874Ssmkelly * SUCH DAMAGE. 25116874Ssmkelly */ 26116874Ssmkelly 27116874Ssmkelly/* 28116874Ssmkelly * Software watchdog daemon. 29116874Ssmkelly */ 30116874Ssmkelly 31117185Ssmkelly#include <sys/types.h> 32116874Ssmkelly__FBSDID("$FreeBSD: head/usr.sbin/watchdogd/watchdogd.c 165263 2006-12-15 22:47:36Z n_hibma $"); 33116874Ssmkelly 34149434Spjd#include <sys/param.h> 35117185Ssmkelly#include <sys/rtprio.h> 36117185Ssmkelly#include <sys/stat.h> 37116874Ssmkelly#include <sys/time.h> 38126383Sphk#include <sys/watchdog.h> 39116874Ssmkelly 40116874Ssmkelly#include <err.h> 41126383Sphk#include <errno.h> 42126383Sphk#include <fcntl.h> 43149434Spjd#include <libutil.h> 44126383Sphk#include <math.h> 45116874Ssmkelly#include <paths.h> 46117185Ssmkelly#include <signal.h> 47116874Ssmkelly#include <stdio.h> 48116874Ssmkelly#include <stdlib.h> 49126383Sphk#include <string.h> 50116874Ssmkelly#include <sysexits.h> 51116874Ssmkelly#include <unistd.h> 52116874Ssmkelly 53116874Ssmkellystatic void parseargs(int, char *[]); 54116874Ssmkellystatic void sighandler(int); 55116874Ssmkellystatic void watchdog_loop(void); 56116874Ssmkellystatic int watchdog_init(void); 57116874Ssmkellystatic int watchdog_onoff(int onoff); 58165263Sn_hibmastatic int watchdog_patpat(u_int timeout); 59116874Ssmkellystatic void usage(void); 60116874Ssmkelly 61116874Ssmkellyint debugging = 0; 62116874Ssmkellyint end_program = 0; 63116874Ssmkellyconst char *pidfile = _PATH_VARRUN "watchdogd.pid"; 64116874Ssmkellyint reset_mib[3]; 65116888Ssmkellysize_t reset_miblen = 3; 66126383Sphku_int timeout = WD_TO_16SEC; 67126383Sphku_int passive = 0; 68126383Sphkint is_daemon = 0; 69126383Sphkint fd = -1; 70126383Sphkint nap = 1; 71126383Sphkchar *test_cmd = NULL; 72116874Ssmkelly 73116874Ssmkelly/* 74128705Ssmkelly * Periodically pat the watchdog, preventing it from firing. 75116874Ssmkelly */ 76116874Ssmkellyint 77116874Ssmkellymain(int argc, char *argv[]) 78116874Ssmkelly{ 79116874Ssmkelly struct rtprio rtp; 80149434Spjd struct pidfh *pfh; 81149434Spjd pid_t otherpid; 82116874Ssmkelly 83116874Ssmkelly if (getuid() != 0) 84116874Ssmkelly errx(EX_SOFTWARE, "not super user"); 85116874Ssmkelly 86116874Ssmkelly parseargs(argc, argv); 87116874Ssmkelly 88116874Ssmkelly rtp.type = RTP_PRIO_REALTIME; 89116874Ssmkelly rtp.prio = 0; 90116874Ssmkelly if (rtprio(RTP_SET, 0, &rtp) == -1) 91116874Ssmkelly err(EX_OSERR, "rtprio"); 92116874Ssmkelly 93116874Ssmkelly if (watchdog_init() == -1) 94117185Ssmkelly errx(EX_SOFTWARE, "unable to initialize watchdog"); 95116874Ssmkelly 96126383Sphk if (is_daemon) { 97126383Sphk if (watchdog_onoff(1) == -1) 98126383Sphk exit(EX_SOFTWARE); 99116874Ssmkelly 100150214Spjd pfh = pidfile_open(pidfile, 0600, &otherpid); 101149434Spjd if (pfh == NULL) { 102149434Spjd if (errno == EEXIST) { 103149434Spjd errx(EX_SOFTWARE, "%s already running, pid: %d", 104149434Spjd getprogname(), otherpid); 105149434Spjd } 106149434Spjd warn("Cannot open or create pidfile"); 107149434Spjd } 108149434Spjd 109126383Sphk if (debugging == 0 && daemon(0, 0) == -1) { 110126383Sphk watchdog_onoff(0); 111149434Spjd pidfile_remove(pfh); 112126383Sphk err(EX_OSERR, "daemon"); 113126383Sphk } 114116874Ssmkelly 115126383Sphk signal(SIGHUP, SIG_IGN); 116126383Sphk signal(SIGINT, sighandler); 117126383Sphk signal(SIGTERM, sighandler); 118116874Ssmkelly 119149434Spjd pidfile_write(pfh); 120116874Ssmkelly 121126383Sphk watchdog_loop(); 122116874Ssmkelly 123126383Sphk /* exiting */ 124149434Spjd pidfile_remove(pfh); 125126383Sphk return (EX_OK); 126126383Sphk } else { 127126383Sphk if (passive) 128126383Sphk timeout |= WD_PASSIVE; 129126383Sphk else 130126383Sphk timeout |= WD_ACTIVE; 131165263Sn_hibma if (watchdog_patpat(timeout) < 0) 132126383Sphk err(EX_OSERR, "patting the dog"); 133126383Sphk return (EX_OK); 134126383Sphk } 135116874Ssmkelly} 136116874Ssmkelly 137116874Ssmkelly/* 138116874Ssmkelly * Catch signals and begin shutdown process. 139116874Ssmkelly */ 140116874Ssmkellystatic void 141116874Ssmkellysighandler(int signum) 142116874Ssmkelly{ 143116874Ssmkelly 144116874Ssmkelly if (signum == SIGINT || signum == SIGTERM) 145116874Ssmkelly end_program = 1; 146116874Ssmkelly} 147116874Ssmkelly 148116874Ssmkelly/* 149128705Ssmkelly * Open the watchdog device. 150116874Ssmkelly */ 151116874Ssmkellystatic int 152116874Ssmkellywatchdog_init() 153116874Ssmkelly{ 154116874Ssmkelly 155126383Sphk fd = open("/dev/" _PATH_WATCHDOG, O_RDWR); 156126383Sphk if (fd >= 0) 157126383Sphk return (0); 158126383Sphk warn("Could not open watchdog device"); 159126383Sphk return (-1); 160116874Ssmkelly} 161116874Ssmkelly 162116874Ssmkelly/* 163116874Ssmkelly * Main program loop which is iterated every second. 164116874Ssmkelly */ 165116874Ssmkellystatic void 166116874Ssmkellywatchdog_loop(void) 167116874Ssmkelly{ 168116874Ssmkelly struct stat sb; 169116874Ssmkelly int failed; 170116874Ssmkelly 171165263Sn_hibma while (end_program != 2) { 172116874Ssmkelly failed = 0; 173116874Ssmkelly 174126383Sphk if (test_cmd != NULL) 175126383Sphk failed = system(test_cmd); 176126383Sphk else 177126383Sphk failed = stat("/etc", &sb); 178116874Ssmkelly 179116874Ssmkelly if (failed == 0) 180165263Sn_hibma watchdog_patpat(timeout|WD_ACTIVE); 181126383Sphk sleep(nap); 182165263Sn_hibma 183165263Sn_hibma if (end_program != 0) { 184165263Sn_hibma if (watchdog_onoff(0) == 0) { 185165263Sn_hibma end_program = 2; 186165263Sn_hibma } else { 187165263Sn_hibma warnx("Could not stop the watchdog, not exitting"); 188165263Sn_hibma end_program = 0; 189165263Sn_hibma } 190165263Sn_hibma } 191116874Ssmkelly } 192116874Ssmkelly} 193116874Ssmkelly 194116874Ssmkelly/* 195116874Ssmkelly * Reset the watchdog timer. This function must be called periodically 196116874Ssmkelly * to keep the watchdog from firing. 197116874Ssmkelly */ 198116874Ssmkellyint 199165263Sn_hibmawatchdog_patpat(u_int t) 200116874Ssmkelly{ 201116874Ssmkelly 202165263Sn_hibma return ioctl(fd, WDIOCPATPAT, &t); 203116874Ssmkelly} 204116874Ssmkelly 205116874Ssmkelly/* 206116874Ssmkelly * Toggle the kernel's watchdog. This routine is used to enable and 207116874Ssmkelly * disable the watchdog. 208116874Ssmkelly */ 209116874Ssmkellystatic int 210116874Ssmkellywatchdog_onoff(int onoff) 211116874Ssmkelly{ 212116874Ssmkelly 213126383Sphk if (onoff) 214165263Sn_hibma return watchdog_patpat((timeout|WD_ACTIVE)); 215126383Sphk else 216165263Sn_hibma return watchdog_patpat(0); 217116874Ssmkelly} 218116874Ssmkelly 219116874Ssmkelly/* 220116874Ssmkelly * Tell user how to use the program. 221116874Ssmkelly */ 222116874Ssmkellystatic void 223116874Ssmkellyusage() 224116874Ssmkelly{ 225126383Sphk if (is_daemon) 226165263Sn_hibma fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n"); 227126383Sphk else 228156334Sphk fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n"); 229116874Ssmkelly exit(EX_USAGE); 230116874Ssmkelly} 231116874Ssmkelly 232116874Ssmkelly/* 233116874Ssmkelly * Handle the few command line arguments supported. 234116874Ssmkelly */ 235116874Ssmkellystatic void 236116874Ssmkellyparseargs(int argc, char *argv[]) 237116874Ssmkelly{ 238116874Ssmkelly int c; 239126383Sphk char *p; 240126383Sphk double a; 241116874Ssmkelly 242126383Sphk c = strlen(argv[0]); 243126383Sphk if (argv[0][c - 1] == 'd') 244126383Sphk is_daemon = 1; 245126383Sphk while ((c = getopt(argc, argv, 246126383Sphk is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) { 247116874Ssmkelly switch (c) { 248116874Ssmkelly case 'I': 249116874Ssmkelly pidfile = optarg; 250116874Ssmkelly break; 251116874Ssmkelly case 'd': 252116874Ssmkelly debugging = 1; 253116874Ssmkelly break; 254126383Sphk case 'e': 255126383Sphk test_cmd = strdup(optarg); 256126383Sphk break; 257126383Sphk#ifdef notyet 258126383Sphk case 'p': 259126383Sphk passive = 1; 260126383Sphk break; 261126383Sphk#endif 262126383Sphk case 's': 263126383Sphk p = NULL; 264126383Sphk errno = 0; 265126383Sphk nap = strtol(optarg, &p, 0); 266126383Sphk if ((p != NULL && *p != '\0') || errno != 0) 267126383Sphk errx(EX_USAGE, "-s argument is not a number"); 268126383Sphk break; 269126383Sphk case 't': 270126383Sphk p = NULL; 271126383Sphk errno = 0; 272126383Sphk a = strtod(optarg, &p); 273126383Sphk if ((p != NULL && *p != '\0') || errno != 0) 274126383Sphk errx(EX_USAGE, "-t argument is not a number"); 275126383Sphk if (a < 0) 276126383Sphk errx(EX_USAGE, "-t argument must be positive"); 277126383Sphk if (a == 0) 278126383Sphk timeout = WD_TO_NEVER; 279126383Sphk else 280126383Sphk timeout = 1.0 + log(a * 1e9) / log(2.0); 281126383Sphk if (debugging) 282126383Sphk printf("Timeout is 2^%d nanoseconds\n", 283126383Sphk timeout); 284126383Sphk break; 285116874Ssmkelly case '?': 286116874Ssmkelly default: 287116874Ssmkelly usage(); 288116874Ssmkelly /* NOTREACHED */ 289116874Ssmkelly } 290116874Ssmkelly } 291150747Sphk if (argc != optind) 292150747Sphk errx(EX_USAGE, "extra arguments."); 293126383Sphk if (is_daemon && timeout < WD_TO_1SEC) 294126383Sphk errx(EX_USAGE, "-t argument is less than one second."); 295116874Ssmkelly} 296