kern_shutdown.c revision 55862
1/*-
2 * Copyright (c) 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/kern_shutdown.c 55862 2000-01-12 17:26:42Z luoqi $
40 */
41
42#include "opt_ddb.h"
43#include "opt_hw_wdog.h"
44#include "opt_panic.h"
45#include "opt_show_busybufs.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/eventhandler.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/proc.h>
53#include <sys/vnode.h>
54#include <sys/kernel.h>
55#include <sys/kthread.h>
56#include <sys/mount.h>
57#include <sys/queue.h>
58#include <sys/sysctl.h>
59#include <sys/conf.h>
60#include <sys/sysproto.h>
61#include <sys/cons.h>
62
63#include <machine/pcb.h>
64#include <machine/clock.h>
65#include <machine/md_var.h>
66#ifdef SMP
67#include <machine/smp.h>		/* smp_active, cpuid */
68#endif
69
70#include <sys/signalvar.h>
71
72#ifndef PANIC_REBOOT_WAIT_TIME
73#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
74#endif
75
76/*
77 * Note that stdarg.h and the ANSI style va_start macro is used for both
78 * ANSI and traditional C compilers.
79 */
80#include <machine/stdarg.h>
81
82#ifdef DDB
83#ifdef DDB_UNATTENDED
84int debugger_on_panic = 0;
85#else
86int debugger_on_panic = 1;
87#endif
88SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
89	&debugger_on_panic, 0, "Run debugger on kernel panic");
90#endif
91
92SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
93
94#ifdef	HW_WDOG
95/*
96 * If there is a hardware watchdog, point this at the function needed to
97 * hold it off.
98 * It's needed when the kernel needs to do some lengthy operations.
99 * e.g. in wd.c when dumping core.. It's most annoying to have
100 * your precious core-dump only half written because the wdog kicked in.
101 */
102watchdog_tickle_fn wdog_tickler = NULL;
103#endif	/* HW_WDOG */
104
105/*
106 * Variable panicstr contains argument to first call to panic; used as flag
107 * to indicate that the kernel has already called panic.
108 */
109const char *panicstr;
110
111static void boot __P((int)) __dead2;
112static void dumpsys __P((void));
113static int setdumpdev __P((dev_t dev));
114static void poweroff_wait __P((void *, int));
115static void print_uptime __P((void));
116static void shutdown_halt __P((void *junk, int howto));
117static void shutdown_panic __P((void *junk, int howto));
118static void shutdown_reset __P((void *junk, int howto));
119
120/* register various local shutdown events */
121static void
122shutdown_conf(void *unused)
123{
124	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
125	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
126	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
127	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
128}
129
130SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
131
132/* ARGSUSED */
133
134/*
135 * The system call that results in a reboot
136 */
137int
138reboot(p, uap)
139	struct proc *p;
140	struct reboot_args *uap;
141{
142	int error;
143
144	if ((error = suser(p)))
145		return (error);
146
147	boot(uap->opt);
148	return (0);
149}
150
151/*
152 * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
153 */
154void
155shutdown_nice()
156{
157	/* Send a signal to init(8) and have it shutdown the world */
158	if (initproc != NULL) {
159		psignal(initproc, SIGINT);
160	} else {
161		/* No init(8) running, so simply reboot */
162		boot(RB_NOSYNC);
163	}
164	return;
165}
166static int	waittime = -1;
167static struct pcb dumppcb;
168
169static void
170print_uptime()
171{
172	int f;
173	struct timespec ts;
174
175	getnanouptime(&ts);
176	printf("Uptime: ");
177	f = 0;
178	if (ts.tv_sec >= 86400) {
179		printf("%ldd", ts.tv_sec / 86400);
180		ts.tv_sec %= 86400;
181		f = 1;
182	}
183	if (f || ts.tv_sec >= 3600) {
184		printf("%ldh", ts.tv_sec / 3600);
185		ts.tv_sec %= 3600;
186		f = 1;
187	}
188	if (f || ts.tv_sec >= 60) {
189		printf("%ldm", ts.tv_sec / 60);
190		ts.tv_sec %= 60;
191		f = 1;
192	}
193	printf("%lds\n", ts.tv_sec);
194}
195
196/*
197 *  Go through the rigmarole of shutting down..
198 * this used to be in machdep.c but I'll be dammned if I could see
199 * anything machine dependant in it.
200 */
201static void
202boot(howto)
203	int howto;
204{
205
206#ifdef SMP
207	if (smp_active) {
208		printf("boot() called on cpu#%d\n", cpuid);
209	}
210#endif
211	/*
212	 * Do any callouts that should be done BEFORE syncing the filesystems.
213	 */
214	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
215
216	/*
217	 * Now sync filesystems
218	 */
219	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
220		register struct buf *bp;
221		int iter, nbusy;
222
223		waittime = 0;
224		printf("\nsyncing disks... ");
225
226		sync(&proc0, NULL);
227
228		/*
229		 * With soft updates, some buffers that are
230		 * written will be remarked as dirty until other
231		 * buffers are written.
232		 */
233		for (iter = 0; iter < 20; iter++) {
234			nbusy = 0;
235			for (bp = &buf[nbuf]; --bp >= buf; ) {
236				if ((bp->b_flags & B_INVAL) == 0 &&
237				    BUF_REFCNT(bp) > 0) {
238					nbusy++;
239				} else if ((bp->b_flags & (B_DELWRI | B_INVAL))
240						== B_DELWRI) {
241					/* bawrite(bp);*/
242					nbusy++;
243				}
244			}
245			if (nbusy == 0)
246				break;
247			printf("%d ", nbusy);
248			sync(&proc0, NULL);
249			DELAY(50000 * iter);
250		}
251		printf("\n");
252		/*
253		 * Count only busy local buffers to prevent forcing
254		 * a fsck if we're just a client of a wedged NFS server
255		 */
256		nbusy = 0;
257		for (bp = &buf[nbuf]; --bp >= buf; ) {
258			if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
259			    ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
260				if (bp->b_dev == NODEV) {
261					TAILQ_REMOVE(&mountlist,
262					    bp->b_vp->v_mount, mnt_list);
263					continue;
264				}
265				nbusy++;
266#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
267				printf(
268			    "%d: dev:%s, flags:%08lx, blkno:%ld, lblkno:%ld\n",
269				    nbusy, devtoname(bp->b_dev),
270				    bp->b_flags, (long)bp->b_blkno,
271				    (long)bp->b_lblkno);
272#endif
273			}
274		}
275		if (nbusy) {
276			/*
277			 * Failed to sync all blocks. Indicate this and don't
278			 * unmount filesystems (thus forcing an fsck on reboot).
279			 */
280			printf("giving up on %d buffers\n", nbusy);
281			DELAY(5000000);	/* 5 seconds */
282		} else {
283			printf("done\n");
284			/*
285			 * Unmount filesystems
286			 */
287			if (panicstr == 0)
288				vfs_unmountall();
289		}
290		DELAY(100000);		/* wait for console output to finish */
291	}
292
293	print_uptime();
294
295	/*
296	 * Ok, now do things that assume all filesystem activity has
297	 * been completed.
298	 */
299	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
300	splhigh();
301	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) {
302		savectx(&dumppcb);
303#ifdef __i386__
304		dumppcb.pcb_cr3 = rcr3();
305#endif
306		dumpsys();
307	}
308
309	/* Now that we're going to really halt the system... */
310	EVENTHANDLER_INVOKE(shutdown_final, howto);
311
312	for(;;) ;	/* safety against shutdown_reset not working */
313	/* NOTREACHED */
314}
315
316/*
317 * If the shutdown was a clean halt, behave accordingly.
318 */
319static void
320shutdown_halt(void *junk, int howto)
321{
322	if (howto & RB_HALT) {
323		printf("\n");
324		printf("The operating system has halted.\n");
325		printf("Please press any key to reboot.\n\n");
326		switch (cngetc()) {
327		case -1:		/* No console, just die */
328			cpu_halt();
329			/* NOTREACHED */
330		default:
331			howto &= ~RB_HALT;
332			break;
333		}
334	}
335}
336
337/*
338 * Check to see if the system paniced, pause and then reboot
339 * according to the specified delay.
340 */
341static void
342shutdown_panic(void *junk, int howto)
343{
344	int loop;
345
346	if (howto & RB_DUMP) {
347		if (PANIC_REBOOT_WAIT_TIME != 0) {
348			if (PANIC_REBOOT_WAIT_TIME != -1) {
349				printf("Automatic reboot in %d seconds - "
350				       "press a key on the console to abort\n",
351					PANIC_REBOOT_WAIT_TIME);
352				for (loop = PANIC_REBOOT_WAIT_TIME * 10;
353				     loop > 0; --loop) {
354					DELAY(1000 * 100); /* 1/10th second */
355					/* Did user type a key? */
356					if (cncheckc() != -1)
357						break;
358				}
359				if (!loop)
360					return;
361			}
362		} else { /* zero time specified - reboot NOW */
363			return;
364		}
365		printf("--> Press a key on the console to reboot <--\n");
366		cngetc();
367	}
368}
369
370/*
371 * Everything done, now reset
372 */
373static void
374shutdown_reset(void *junk, int howto)
375{
376	printf("Rebooting...\n");
377	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
378	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
379	cpu_reset();
380	/* NOTREACHED */ /* assuming reset worked */
381}
382
383/*
384 * Magic number for savecore
385 *
386 * exported (symorder) and used at least by savecore(8)
387 *
388 */
389static u_long const	dumpmag = 0x8fca0101UL;
390
391static int	dumpsize = 0;		/* also for savecore */
392
393static int	dodump = 1;
394
395SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
396    "Try to perform coredump on kernel panic");
397
398static int
399setdumpdev(dev)
400	dev_t dev;
401{
402	int psize;
403	long newdumplo;
404
405	if (dev == NODEV) {
406		dumpdev = dev;
407		return (0);
408	}
409	if (devsw(dev) == NULL)
410		return (ENXIO);		/* XXX is this right? */
411	if (devsw(dev)->d_psize == NULL)
412		return (ENXIO);		/* XXX should be ENODEV ? */
413	psize = devsw(dev)->d_psize(dev);
414	if (psize == -1)
415		return (ENXIO);		/* XXX should be ENODEV ? */
416	/*
417	 * XXX should clean up checking in dumpsys() to be more like this.
418	 */
419	newdumplo = psize - Maxmem * PAGE_SIZE / DEV_BSIZE;
420	if (newdumplo < 0)
421		return (ENOSPC);
422	dumpdev = dev;
423	dumplo = newdumplo;
424	return (0);
425}
426
427
428/* ARGSUSED */
429static void dump_conf __P((void *dummy));
430static void
431dump_conf(dummy)
432	void *dummy;
433{
434	if (setdumpdev(dumpdev) != 0)
435		dumpdev = NODEV;
436}
437
438SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
439
440static int
441sysctl_kern_dumpdev SYSCTL_HANDLER_ARGS
442{
443	int error;
444	udev_t ndumpdev;
445
446	ndumpdev = dev2udev(dumpdev);
447	error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
448	if (error == 0 && req->newptr != NULL)
449		error = setdumpdev(udev2dev(ndumpdev, 0));
450	return (error);
451}
452
453SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
454	0, sizeof dumpdev, sysctl_kern_dumpdev, "T,dev_t", "");
455
456/*
457 * Doadump comes here after turning off memory management and
458 * getting on the dump stack, either when called above, or by
459 * the auto-restart code.
460 */
461static void
462dumpsys(void)
463{
464	int	error;
465	static int dumping;
466
467	if (dumping++) {
468		printf("Dump already in progress, bailing...\n");
469		return;
470	}
471	if (!dodump)
472		return;
473	if (dumpdev == NODEV)
474		return;
475	if (!(devsw(dumpdev)))
476		return;
477	if (!(devsw(dumpdev)->d_dump))
478		return;
479	dumpsize = Maxmem;
480	printf("\ndumping to dev %s, offset %ld\n", devtoname(dumpdev), dumplo);
481	printf("dump ");
482	error = (*devsw(dumpdev)->d_dump)(dumpdev);
483	if (error == 0) {
484		printf("succeeded\n");
485		return;
486	}
487	printf("failed, reason: ");
488	switch (error) {
489	case ENODEV:
490		printf("device doesn't support a dump routine\n");
491		break;
492
493	case ENXIO:
494		printf("device bad\n");
495		break;
496
497	case EFAULT:
498		printf("device not ready\n");
499		break;
500
501	case EINVAL:
502		printf("area improper\n");
503		break;
504
505	case EIO:
506		printf("i/o error\n");
507		break;
508
509	case EINTR:
510		printf("aborted from console\n");
511		break;
512
513	default:
514		printf("unknown, error = %d\n", error);
515		break;
516	}
517}
518
519/*
520 * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
521 * and then reboots.  If we are called twice, then we avoid trying to sync
522 * the disks as this often leads to recursive panics.
523 */
524void
525panic(const char *fmt, ...)
526{
527	int bootopt;
528	va_list ap;
529	static char buf[256];
530
531	bootopt = RB_AUTOBOOT | RB_DUMP;
532	if (panicstr)
533		bootopt |= RB_NOSYNC;
534	else
535		panicstr = fmt;
536
537	va_start(ap, fmt);
538	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
539	if (panicstr == fmt)
540		panicstr = buf;
541	va_end(ap);
542	printf("panic: %s\n", buf);
543#ifdef SMP
544	/* three seperate prints in case of an unmapped page and trap */
545	printf("mp_lock = %08x; ", mp_lock);
546	printf("cpuid = %d; ", cpuid);
547	printf("lapic.id = %08x\n", lapic.id);
548#endif
549
550#if defined(DDB)
551	if (debugger_on_panic)
552		Debugger ("panic");
553#endif
554	boot(bootopt);
555}
556
557/*
558 * Support for poweroff delay.
559 */
560#ifndef POWEROFF_DELAY
561# define POWEROFF_DELAY 5000
562#endif
563static int poweroff_delay = POWEROFF_DELAY;
564
565SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
566	&poweroff_delay, 0, "");
567
568static void
569poweroff_wait(void *junk, int howto)
570{
571	if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
572		return;
573	DELAY(poweroff_delay * 1000);
574}
575
576/*
577 * Some system processes (e.g. syncer) need to be stopped at appropriate
578 * points in their main loops prior to a system shutdown, so that they
579 * won't interfere with the shutdown process (e.g. by holding a disk buf
580 * to cause sync to fail).  For each of these system processes, register
581 * shutdown_kproc() as a handler for one of shutdown events.
582 */
583static int kproc_shutdown_wait = 60;
584SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
585    &kproc_shutdown_wait, 0, "");
586
587void
588shutdown_kproc(void *arg, int howto)
589{
590	struct proc *p;
591	int error;
592
593	if (panicstr)
594		return;
595
596	p = (struct proc *)arg;
597	printf("Waiting (max %d seconds) for system process `%s' to stop...",
598	    kproc_shutdown_wait, p->p_comm);
599	error = suspend_kproc(p, kproc_shutdown_wait * hz);
600
601	if (error == EWOULDBLOCK)
602		printf("timed out\n");
603	else
604		printf("stopped\n");
605}
606