kern_shutdown.c revision 49627
1/*- 2 * Copyright (c) 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 39 * $Id: kern_shutdown.c,v 1.58 1999/08/09 10:34:57 phk Exp $ 40 */ 41 42#include "opt_ddb.h" 43#include "opt_hw_wdog.h" 44#include "opt_panic.h" 45#include "opt_show_busybufs.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/buf.h> 50#include <sys/reboot.h> 51#include <sys/proc.h> 52#include <sys/vnode.h> 53#include <sys/malloc.h> 54#include <sys/kernel.h> 55#include <sys/mount.h> 56#include <sys/queue.h> 57#include <sys/sysctl.h> 58#include <sys/conf.h> 59#include <sys/sysproto.h> 60#include <sys/cons.h> 61 62#include <machine/pcb.h> 63#include <machine/clock.h> 64#include <machine/md_var.h> 65#ifdef SMP 66#include <machine/smp.h> /* smp_active, cpuid */ 67#endif 68 69#include <sys/signalvar.h> 70 71#ifndef PANIC_REBOOT_WAIT_TIME 72#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 73#endif 74 75/* 76 * Note that stdarg.h and the ANSI style va_start macro is used for both 77 * ANSI and traditional C compilers. 78 */ 79#include <machine/stdarg.h> 80 81#ifdef DDB 82#ifdef DDB_UNATTENDED 83int debugger_on_panic = 0; 84#else 85int debugger_on_panic = 1; 86#endif 87SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, 88 &debugger_on_panic, 0, "Run debugger on kernel panic"); 89#endif 90 91SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); 92 93#ifdef HW_WDOG 94/* 95 * If there is a hardware watchdog, point this at the function needed to 96 * hold it off. 97 * It's needed when the kernel needs to do some lengthy operations. 98 * e.g. in wd.c when dumping core.. It's most annoying to have 99 * your precious core-dump only half written because the wdog kicked in. 100 */ 101watchdog_tickle_fn wdog_tickler = NULL; 102#endif /* HW_WDOG */ 103 104/* 105 * Variable panicstr contains argument to first call to panic; used as flag 106 * to indicate that the kernel has already called panic. 107 */ 108const char *panicstr; 109 110/* 111 * callout list for things to do a shutdown 112 */ 113typedef struct shutdown_list_element { 114 LIST_ENTRY(shutdown_list_element) links; 115 bootlist_fn function; 116 void *arg; 117 int priority; 118} *sle_p; 119 120/* 121 * There are three shutdown lists. Some things need to be shut down 122 * earlier than others. 123 */ 124LIST_HEAD(shutdown_list, shutdown_list_element); 125 126static struct shutdown_list shutdown_lists[SHUTDOWN_FINAL + 1]; 127 128static void boot __P((int)) __dead2; 129static void dumpsys __P((void)); 130static int setdumpdev __P((dev_t dev)); 131 132 133#ifndef _SYS_SYSPROTO_H_ 134struct reboot_args { 135 int opt; 136}; 137#endif 138/* ARGSUSED */ 139 140/* 141 * The system call that results in a reboot 142 */ 143int 144reboot(p, uap) 145 struct proc *p; 146 struct reboot_args *uap; 147{ 148 int error; 149 150 if ((error = suser(p))) 151 return (error); 152 153 boot(uap->opt); 154 return (0); 155} 156 157/* 158 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 159 */ 160void 161shutdown_nice() 162{ 163 /* Send a signal to init(8) and have it shutdown the world */ 164 if (initproc != NULL) { 165 psignal(initproc, SIGINT); 166 } else { 167 /* No init(8) running, so simply reboot */ 168 boot(RB_NOSYNC); 169 } 170 return; 171} 172static int waittime = -1; 173static struct pcb dumppcb; 174 175/* 176 * Go through the rigmarole of shutting down.. 177 * this used to be in machdep.c but I'll be dammned if I could see 178 * anything machine dependant in it. 179 */ 180static void 181boot(howto) 182 int howto; 183{ 184 sle_p ep; 185 186#ifdef SMP 187 if (smp_active) { 188 printf("boot() called on cpu#%d\n", cpuid); 189 } 190#endif 191 /* 192 * Do any callouts that should be done BEFORE syncing the filesystems. 193 */ 194 LIST_FOREACH(ep, &shutdown_lists[SHUTDOWN_PRE_SYNC], links) 195 (*ep->function)(howto, ep->arg); 196 197 /* 198 * Now sync filesystems 199 */ 200 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 201 register struct buf *bp; 202 int iter, nbusy; 203 204 waittime = 0; 205 printf("\nsyncing disks... "); 206 207 sync(&proc0, NULL); 208 209 /* 210 * With soft updates, some buffers that are 211 * written will be remarked as dirty until other 212 * buffers are written. 213 */ 214 for (iter = 0; iter < 20; iter++) { 215 nbusy = 0; 216 for (bp = &buf[nbuf]; --bp >= buf; ) { 217 if ((bp->b_flags & B_INVAL) == 0 && 218 BUF_REFCNT(bp) > 0) { 219 nbusy++; 220 } else if ((bp->b_flags & (B_DELWRI | B_INVAL)) 221 == B_DELWRI) { 222 /* bawrite(bp);*/ 223 nbusy++; 224 } 225 } 226 if (nbusy == 0) 227 break; 228 printf("%d ", nbusy); 229 sync(&proc0, NULL); 230 DELAY(50000 * iter); 231 } 232 /* 233 * Count only busy local buffers to prevent forcing 234 * a fsck if we're just a client of a wedged NFS server 235 */ 236 nbusy = 0; 237 for (bp = &buf[nbuf]; --bp >= buf; ) { 238 if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) || 239 ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) { 240 if (bp->b_dev == NODEV) 241 CIRCLEQ_REMOVE(&mountlist, 242 bp->b_vp->v_mount, mnt_list); 243 else 244 nbusy++; 245 } 246 247 248 } 249 if (nbusy) { 250 /* 251 * Failed to sync all blocks. Indicate this and don't 252 * unmount filesystems (thus forcing an fsck on reboot). 253 */ 254 printf("giving up\n"); 255#ifdef SHOW_BUSYBUFS 256 nbusy = 0; 257 for (bp = &buf[nbuf]; --bp >= buf; ) { 258 if ((bp->b_flags & B_INVAL) == 0 && 259 BUF_REFCNT(bp) > 0) { 260 nbusy++; 261 printf( 262 "%d: dev:%08lx, flags:%08lx, blkno:%ld, lblkno:%ld\n", 263 nbusy, (u_long)bp->b_dev, 264 bp->b_flags, (long)bp->b_blkno, 265 (long)bp->b_lblkno); 266 } 267 } 268 DELAY(5000000); /* 5 seconds */ 269#endif 270 } else { 271 printf("done\n"); 272 /* 273 * Unmount filesystems 274 */ 275 if (panicstr == 0) 276 vfs_unmountall(); 277 } 278 DELAY(100000); /* wait for console output to finish */ 279 } 280 281 /* 282 * Ok, now do things that assume all filesystem activity has 283 * been completed. 284 */ 285 LIST_FOREACH(ep, &shutdown_lists[SHUTDOWN_POST_SYNC], links) 286 (*ep->function)(howto, ep->arg); 287 splhigh(); 288 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) { 289 savectx(&dumppcb); 290#ifdef __i386__ 291 dumppcb.pcb_cr3 = rcr3(); 292#endif 293 dumpsys(); 294 } 295 296 /* Now that we're going to really halt the system... */ 297 LIST_FOREACH(ep, &shutdown_lists[SHUTDOWN_FINAL], links) 298 (*ep->function)(howto, ep->arg); 299 300 if (howto & RB_HALT) { 301 printf("\n"); 302 printf("The operating system has halted.\n"); 303 printf("Please press any key to reboot.\n\n"); 304 switch (cngetc()) { 305 case -1: /* No console, just die */ 306 cpu_halt(); 307 /* NOTREACHED */ 308 default: 309 howto &= ~RB_HALT; 310 break; 311 } 312 } else if (howto & RB_DUMP) { 313 /* System Paniced */ 314 315 if (PANIC_REBOOT_WAIT_TIME != 0) { 316 if (PANIC_REBOOT_WAIT_TIME != -1) { 317 int loop; 318 printf("Automatic reboot in %d seconds - " 319 "press a key on the console to abort\n", 320 PANIC_REBOOT_WAIT_TIME); 321 for (loop = PANIC_REBOOT_WAIT_TIME * 10; 322 loop > 0; --loop) { 323 DELAY(1000 * 100); /* 1/10th second */ 324 /* Did user type a key? */ 325 if (cncheckc() != -1) 326 break; 327 } 328 if (!loop) 329 goto die; 330 } 331 } else { /* zero time specified - reboot NOW */ 332 goto die; 333 } 334 printf("--> Press a key on the console to reboot <--\n"); 335 cngetc(); 336 } 337die: 338 printf("Rebooting...\n"); 339 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 340 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 341 cpu_reset(); 342 for(;;) ; 343 /* NOTREACHED */ 344} 345 346/* 347 * Magic number for savecore 348 * 349 * exported (symorder) and used at least by savecore(8) 350 * 351 */ 352static u_long const dumpmag = 0x8fca0101UL; 353 354static int dumpsize = 0; /* also for savecore */ 355 356static int dodump = 1; 357 358SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, 359 "Try to perform coredump on kernel panic"); 360 361static int 362setdumpdev(dev) 363 dev_t dev; 364{ 365 int maj, psize; 366 long newdumplo; 367 368 if (dev == NODEV) { 369 dumpdev = dev; 370 return (0); 371 } 372 maj = major(dev); 373 if (bdevsw(dev) == NULL) 374 return (ENXIO); /* XXX is this right? */ 375 if (bdevsw(dev)->d_psize == NULL) 376 return (ENXIO); /* XXX should be ENODEV ? */ 377 psize = bdevsw(dev)->d_psize(dev); 378 if (psize == -1) 379 return (ENXIO); /* XXX should be ENODEV ? */ 380 /* 381 * XXX should clean up checking in dumpsys() to be more like this, 382 * and nuke dodump sysctl (too many knobs). 383 */ 384 newdumplo = psize - Maxmem * PAGE_SIZE / DEV_BSIZE; 385 if (newdumplo < 0) 386 return (ENOSPC); 387 dumpdev = dev; 388 dumplo = newdumplo; 389 return (0); 390} 391 392 393/* ARGSUSED */ 394static void dump_conf __P((void *dummy)); 395static void 396dump_conf(dummy) 397 void *dummy; 398{ 399 if (setdumpdev(dumpdev) != 0) 400 dumpdev = NODEV; 401} 402 403SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL) 404 405static int 406sysctl_kern_dumpdev SYSCTL_HANDLER_ARGS 407{ 408 int error; 409 udev_t ndumpdev; 410 411 ndumpdev = dev2budev(dumpdev); 412 error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req); 413 if (error == 0 && req->newptr != NULL) 414 error = setdumpdev(udev2dev(ndumpdev, 1)); 415 return (error); 416} 417 418SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW, 419 0, sizeof dumpdev, sysctl_kern_dumpdev, "T,dev_t", ""); 420 421/* 422 * Doadump comes here after turning off memory management and 423 * getting on the dump stack, either when called above, or by 424 * the auto-restart code. 425 */ 426static void 427dumpsys(void) 428{ 429 int error; 430 431 if (!dodump) 432 return; 433 if (dumpdev == NODEV) 434 return; 435 if (!(bdevsw(dumpdev))) 436 return; 437 if (!(bdevsw(dumpdev)->d_dump)) 438 return; 439 dumpsize = Maxmem; 440 printf("\ndumping to dev (%d,%d), offset %ld\n", 441 major(dumpdev), minor(dumpdev), dumplo); 442 printf("dump "); 443 error = (*bdevsw(dumpdev)->d_dump)(dumpdev); 444 if (error == 0) { 445 printf("succeeded\n"); 446 return; 447 } 448 printf("failed, reason: "); 449 switch (error) { 450 case ENODEV: 451 printf("device doesn't support a dump routine\n"); 452 break; 453 454 case ENXIO: 455 printf("device bad\n"); 456 break; 457 458 case EFAULT: 459 printf("device not ready\n"); 460 break; 461 462 case EINVAL: 463 printf("area improper\n"); 464 break; 465 466 case EIO: 467 printf("i/o error\n"); 468 break; 469 470 case EINTR: 471 printf("aborted from console\n"); 472 break; 473 474 default: 475 printf("unknown, error = %d\n", error); 476 break; 477 } 478} 479 480/* 481 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 482 * and then reboots. If we are called twice, then we avoid trying to sync 483 * the disks as this often leads to recursive panics. 484 */ 485void 486panic(const char *fmt, ...) 487{ 488 int bootopt; 489 va_list ap; 490 static char buf[256]; 491 492 bootopt = RB_AUTOBOOT | RB_DUMP; 493 if (panicstr) 494 bootopt |= RB_NOSYNC; 495 else 496 panicstr = fmt; 497 498 va_start(ap, fmt); 499 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 500 if (panicstr == fmt) 501 panicstr = buf; 502 va_end(ap); 503 printf("panic: %s\n", buf); 504#ifdef SMP 505 /* three seperate prints in case of an unmapped page and trap */ 506 printf("mp_lock = %08x; ", mp_lock); 507 printf("cpuid = %d; ", cpuid); 508 printf("lapic.id = %08x\n", lapic.id); 509#endif 510 511#if defined(DDB) 512 if (debugger_on_panic) 513 Debugger ("panic"); 514#endif 515 boot(bootopt); 516} 517 518/* 519 * Three routines to handle adding/deleting items on the 520 * shutdown callout lists 521 * 522 * at_shutdown(): 523 * Take the arguments given and put them onto the shutdown callout list. 524 * However first make sure that it's not already there. 525 * returns 0 on success. 526 */ 527int 528at_shutdown(bootlist_fn function, void *arg, int queue) 529{ 530 return(at_shutdown_pri(function, arg, queue, SHUTDOWN_PRI_DEFAULT)); 531} 532 533/* 534 * at_shutdown_pri(): 535 * Take the arguments given and put them onto the shutdown callout list 536 * with the given execution priority. 537 * returns 0 on success. 538 */ 539int 540at_shutdown_pri(bootlist_fn function, void *arg, int queue, int pri) 541{ 542 sle_p op, ep, ip; 543 544 op = NULL; /* shut up gcc */ 545 if (queue < SHUTDOWN_PRE_SYNC 546 || queue > SHUTDOWN_FINAL) { 547 printf("at_shutdown: bad exit callout queue %d specified\n", 548 queue); 549 return (EINVAL); 550 } 551 if (rm_at_shutdown(function, arg)) 552 printf("at_shutdown: exit callout entry was already present\n"); 553 ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); 554 if (ep == NULL) 555 return (ENOMEM); 556 ep->function = function; 557 ep->arg = arg; 558 ep->priority = pri; 559 560 /* Sort into list of items on this queue */ 561 ip = LIST_FIRST(&shutdown_lists[queue]); 562 if (ip == NULL) { 563 LIST_INSERT_HEAD(&shutdown_lists[queue], ep, links); 564 } else { 565 for (; ip != NULL; op = ip, ip = LIST_NEXT(ip, links)) { 566 if (ep->priority < ip->priority) { 567 LIST_INSERT_BEFORE(ip, ep, links); 568 ep = NULL; 569 break; 570 } 571 } 572 if (ep != NULL) 573 LIST_INSERT_AFTER(op, ep, links); 574 } 575 return (0); 576} 577 578/* 579 * Scan the exit callout lists for the given items and remove them. 580 * Returns the number of items removed. 581 */ 582int 583rm_at_shutdown(bootlist_fn function, void *arg) 584{ 585 sle_p ep; 586 int count; 587 int queue; 588 589 count = 0; 590 for (queue = SHUTDOWN_PRE_SYNC; queue < SHUTDOWN_FINAL; queue++) { 591 LIST_FOREACH(ep, &shutdown_lists[queue], links) { 592 if ((ep->function == function) && (ep->arg == arg)) { 593 LIST_REMOVE(ep, links); 594 free(ep, M_TEMP); 595 count++; 596 } 597 } 598 } 599 return (count); 600} 601 602/* 603 * Support for poweroff delay. 604 */ 605static int poweroff_delay = 0; 606SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 607 &poweroff_delay, 0, ""); 608 609static void poweroff_wait(int howto, void *unused) 610{ 611 if(!(howto & RB_POWEROFF) || poweroff_delay <= 0) 612 return; 613 DELAY(poweroff_delay * 1000); 614} 615 616/* 617 * XXX OK? This implies I know SHUTDOWN_PRI_LAST > SHUTDOWN_PRI_FIRST 618 */ 619static void poweroff_conf(void *unused) 620{ 621 at_shutdown_pri(poweroff_wait, NULL, SHUTDOWN_FINAL, SHUTDOWN_PRI_FIRST); 622} 623 624SYSINIT(poweroff_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, poweroff_conf, NULL) 625