1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	File:	bsd/kern/kern_shutdown.c
30 *
31 *	Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/vm.h>
39#include <sys/proc_internal.h>
40#include <sys/user.h>
41#include <sys/reboot.h>
42#include <sys/conf.h>
43#include <sys/vnode_internal.h>
44#include <sys/file_internal.h>
45#include <sys/clist.h>
46#include <sys/callout.h>
47#include <sys/mbuf.h>
48#include <sys/msgbuf.h>
49#include <sys/ioctl.h>
50#include <sys/signal.h>
51#include <sys/tty.h>
52#include <kern/task.h>
53#include <sys/quota.h>
54#include <vm/vm_kern.h>
55#include <mach/vm_param.h>
56#include <sys/filedesc.h>
57#include <mach/host_priv.h>
58#include <mach/host_reboot.h>
59
60#include <security/audit/audit.h>
61
62#include <kern/sched_prim.h>		/* for thread_block() */
63#include <kern/host.h>			/* for host_priv_self() */
64#include <net/if_var.h>			/* for if_down_all() */
65#include <sys/buf_internal.h>		/* for count_busy_buffers() */
66#include <sys/mount_internal.h>		/* for vfs_unmountall() */
67#include <mach/task.h>			/* for task_suspend() */
68#include <sys/sysproto.h>		/* abused for sync() */
69#include <kern/clock.h>			/* for delay_for_interval() */
70#include <libkern/OSAtomic.h>
71
72#include <sys/kdebug.h>
73
74uint32_t system_inshutdown = 0;
75
76/* XXX should be in a header file somewhere, but isn't */
77extern void md_prepare_for_shutdown(int, int, char *);
78extern void (*unmountroot_pre_hook)(void);
79
80unsigned int proc_shutdown_exitcount = 0;
81
82static int  sd_openlog(vfs_context_t);
83static int  sd_closelog(vfs_context_t);
84static void sd_log(vfs_context_t, const char *, ...);
85static void proc_shutdown(void);
86
87extern void IOSystemShutdownNotification(void);
88
89struct sd_filterargs{
90	int delayterm;
91	int shutdownstate;
92};
93
94
95struct sd_iterargs {
96	int signo;		/* the signal to be posted */
97	int setsdstate;  	/* shutdown state to be set */
98	int countproc;		/* count processes on action */
99	int activecount; 	/* number of processes on which action was done */
100};
101
102static vnode_t sd_logvp = NULLVP;
103static off_t sd_log_offset = 0;
104
105
106static int sd_filt1(proc_t, void *);
107static int sd_filt2(proc_t, void *);
108static int  sd_callback1(proc_t p, void * arg);
109static int  sd_callback2(proc_t p, void * arg);
110static int  sd_callback3(proc_t p, void * arg);
111
112int
113boot(int paniced, int howto, char *command)
114{
115	struct proc *p = current_proc();	/* XXX */
116	int hostboot_option=0;
117
118	if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
119		if ( (howto&RB_QUICK) == RB_QUICK)
120			goto force_reboot;
121		return (EBUSY);
122	}
123	/*
124	 * Temporary hack to notify the power management root domain
125	 * that the system will shut down.
126	 */
127	IOSystemShutdownNotification();
128
129	md_prepare_for_shutdown(paniced, howto, command);
130
131	if ((howto&RB_QUICK)==RB_QUICK) {
132		printf("Quick reboot...\n");
133		if ((howto&RB_NOSYNC)==0) {
134			sync(p, (void *)NULL, (int *)NULL);
135		}
136	}
137	else if ((howto&RB_NOSYNC)==0) {
138		int iter, nbusy;
139
140		printf("syncing disks... ");
141
142		/*
143		 * Release vnodes held by texts before sync.
144		 */
145
146		/* handle live procs (deallocate their root and current directories). */
147		proc_shutdown();
148
149#if CONFIG_AUDIT
150		audit_shutdown();
151#endif
152
153		if (unmountroot_pre_hook != NULL)
154			unmountroot_pre_hook();
155
156		sync(p, (void *)NULL, (int *)NULL);
157
158		/*
159		 * Now that all processes have been terminated and system is
160		 * sync'ed up, suspend init
161		 */
162
163		if (initproc && p != initproc)
164			task_suspend(initproc->task);
165
166		if (kdebug_enable)
167			kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
168
169		/*
170		 * Unmount filesystems
171		 */
172		vfs_unmountall();
173
174		/* Wait for the buffer cache to clean remaining dirty buffers */
175		for (iter = 0; iter < 100; iter++) {
176			nbusy = count_busy_buffers();
177			if (nbusy == 0)
178				break;
179			printf("%d ", nbusy);
180			delay_for_interval( 1 * nbusy, 1000 * 1000);
181		}
182		if (nbusy)
183			printf("giving up\n");
184		else
185			printf("done\n");
186	}
187#if NETWORKING
188	/*
189	 * Can't just use an splnet() here to disable the network
190	 * because that will lock out softints which the disk
191	 * drivers depend on to finish DMAs.
192	 */
193	if_down_all();
194#endif /* NETWORKING */
195
196force_reboot:
197	if (howto & RB_POWERDOWN)
198		hostboot_option = HOST_REBOOT_HALT;
199	if (howto & RB_HALT)
200		hostboot_option = HOST_REBOOT_HALT;
201	if (paniced == RB_PANIC)
202		hostboot_option = HOST_REBOOT_HALT;
203
204	if (howto & RB_UPSDELAY) {
205		hostboot_option = HOST_REBOOT_UPSDELAY;
206	}
207
208	host_reboot(host_priv_self(), hostboot_option);
209	/*
210	 * should not be reached
211	 */
212	return (0);
213}
214
215static int
216sd_openlog(vfs_context_t ctx)
217{
218	int error = 0;
219	struct timeval tv;
220
221	/* Open shutdown log */
222	if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
223		printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
224		sd_logvp = NULLVP;
225		return error;
226	}
227
228	vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
229
230	/* Write a little header */
231	microtime(&tv);
232	sd_log(ctx, "Process shutdown log.  Current time is %lu (in seconds).\n\n", tv.tv_sec);
233
234	return 0;
235}
236
237static int
238sd_closelog(vfs_context_t ctx)
239{
240	int error = 0;
241	if (sd_logvp != NULLVP) {
242		VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
243		error = vnode_close(sd_logvp, FWRITE, ctx);
244	}
245
246	return error;
247}
248
249static void
250sd_log(vfs_context_t ctx, const char *fmt, ...)
251{
252	int resid, log_error, len;
253	char logbuf[100];
254	va_list arglist;
255
256	/* If the log isn't open yet, open it */
257	if (sd_logvp == NULLVP) {
258		if (sd_openlog(ctx) != 0) {
259			/* Couldn't open, we fail out */
260			return;
261		}
262	}
263
264	va_start(arglist, fmt);
265	len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
266	log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
267			UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
268	if (log_error == EIO || log_error == 0) {
269		sd_log_offset += (len - resid);
270	}
271
272	va_end(arglist);
273
274}
275
276static int
277sd_filt1(proc_t p, void * args)
278{
279	proc_t self = current_proc();
280	struct sd_filterargs * sf = (struct sd_filterargs *)args;
281	int delayterm = sf-> delayterm;
282	int shutdownstate = sf->shutdownstate;
283
284	if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
285		||(p == self) || (p->p_stat == SZOMB)
286		|| (p->p_shutdownstate != shutdownstate)
287		||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
288		|| ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
289			return(0);
290		}
291        else
292                return(1);
293}
294
295
296static int
297sd_callback1(proc_t p, void * args)
298{
299	struct sd_iterargs * sd = (struct sd_iterargs *)args;
300	int signo = sd->signo;
301	int setsdstate = sd->setsdstate;
302	int countproc = sd->countproc;
303
304	proc_lock(p);
305	p->p_shutdownstate = setsdstate;
306	if (p->p_stat != SZOMB) {
307		proc_unlock(p);
308		if (countproc != 0) {
309			proc_list_lock();
310			p->p_listflag |= P_LIST_EXITCOUNT;
311			proc_shutdown_exitcount++;
312			proc_list_unlock();
313		}
314
315		psignal(p, signo);
316		if (countproc !=  0)
317			sd->activecount++;
318	} else
319		proc_unlock(p);
320	return(PROC_RETURNED);
321}
322
323static int
324sd_filt2(proc_t p, void * args)
325{
326	proc_t self = current_proc();
327	struct sd_filterargs * sf = (struct sd_filterargs *)args;
328	int delayterm = sf-> delayterm;
329	int shutdownstate = sf->shutdownstate;
330
331	if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
332		||(p == self) || (p->p_stat == SZOMB)
333		|| (p->p_shutdownstate == shutdownstate)
334		||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
335			return(0);
336		}
337        else
338                return(1);
339}
340
341static int
342sd_callback2(proc_t p, void * args)
343{
344	struct sd_iterargs * sd = (struct sd_iterargs *)args;
345	int signo = sd->signo;
346	int setsdstate = sd->setsdstate;
347	int countproc = sd->countproc;
348
349	proc_lock(p);
350	p->p_shutdownstate = setsdstate;
351	if (p->p_stat != SZOMB) {
352		proc_unlock(p);
353		if (countproc !=  0) {
354			proc_list_lock();
355			p->p_listflag |= P_LIST_EXITCOUNT;
356			proc_shutdown_exitcount++;
357			proc_list_unlock();
358		}
359		psignal(p, signo);
360		if (countproc !=  0)
361			sd->activecount++;
362	} else
363		proc_unlock(p);
364
365	return(PROC_RETURNED);
366
367}
368
369static int
370sd_callback3(proc_t p, void * args)
371{
372	struct sd_iterargs * sd = (struct sd_iterargs *)args;
373	vfs_context_t ctx = vfs_context_current();
374
375	int setsdstate = sd->setsdstate;
376
377	proc_lock(p);
378	p->p_shutdownstate = setsdstate;
379	if (p->p_stat != SZOMB) {
380	       /*
381		* NOTE: following code ignores sig_lock and plays
382		* with exit_thread correctly.  This is OK unless we
383		* are a multiprocessor, in which case I do not
384		* understand the sig_lock.  This needs to be fixed.
385		* XXX
386		*/
387		if (p->exit_thread) {	/* someone already doing it */
388			proc_unlock(p);
389			/* give him a chance */
390			thread_block(THREAD_CONTINUE_NULL);
391		} else {
392			p->exit_thread = current_thread();
393			printf(".");
394
395			sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
396
397			proc_unlock(p);
398			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
399					      p->p_pid, 0, 1, 0, 0);
400			sd->activecount++;
401			exit1(p, 1, (int *)NULL);
402		}
403	} else
404		proc_unlock(p);
405
406	return(PROC_RETURNED);
407}
408
409
410/*
411 * proc_shutdown()
412 *
413 *	Shutdown down proc system (release references to current and root
414 *	dirs for each process).
415 *
416 * POSIX modifications:
417 *
418 *	For POSIX fcntl() file locking call vno_lockrelease() on
419 *	the file to release all of its record locks, if any.
420 */
421
422static void
423proc_shutdown(void)
424{
425	vfs_context_t ctx = vfs_context_current();
426	struct proc *p, *self;
427	int delayterm = 0;
428	struct sd_filterargs sfargs;
429	struct sd_iterargs sdargs;
430	int error = 0;
431	struct timespec ts;
432
433	/*
434	 *	Kill as many procs as we can.  (Except ourself...)
435	 */
436	self = (struct proc *)current_proc();
437
438	/*
439	 * Signal the init with SIGTERM so that he does not launch
440	 * new processes
441	 */
442	p = proc_find(1);
443	if (p && p != self) {
444		psignal(p, SIGTERM);
445	}
446	proc_rele(p);
447
448	printf("Killing all processes ");
449
450sigterm_loop:
451	/*
452	 * send SIGTERM to those procs interested in catching one
453	 */
454	sfargs.delayterm = delayterm;
455	sfargs.shutdownstate = 0;
456	sdargs.signo = SIGTERM;
457	sdargs.setsdstate = 1;
458	sdargs.countproc = 1;
459	sdargs.activecount = 0;
460
461	error = 0;
462	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
463	proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
464
465	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
466		proc_list_lock();
467		if (proc_shutdown_exitcount != 0) {
468			/*
469	 		* now wait for up to 30 seconds to allow those procs catching SIGTERM
470	 		* to digest it
471	 		* as soon as these procs have exited, we'll continue on to the next step
472	 		*/
473			ts.tv_sec = 30;
474			ts.tv_nsec = 0;
475			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
476			if (error != 0) {
477				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
478					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
479						p->p_listflag &= ~P_LIST_EXITCOUNT;
480				}
481				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
482					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
483						p->p_listflag &= ~P_LIST_EXITCOUNT;
484				}
485			}
486
487		}
488		proc_list_unlock();
489	}
490	if (error == ETIMEDOUT) {
491		/*
492		 * log the names of the unresponsive tasks
493		 */
494
495
496		proc_list_lock();
497
498		for (p = allproc.lh_first; p; p = p->p_list.le_next) {
499			if (p->p_shutdownstate == 1) {
500				printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
501				sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
502			}
503		}
504
505		proc_list_unlock();
506
507		delay_for_interval(1000 * 5, 1000 * 1000);
508	}
509
510	/*
511	 * send a SIGKILL to all the procs still hanging around
512	 */
513	sfargs.delayterm = delayterm;
514	sfargs.shutdownstate = 2;
515	sdargs.signo = SIGKILL;
516	sdargs.setsdstate = 2;
517	sdargs.countproc = 1;
518	sdargs.activecount = 0;
519
520	/* post a SIGKILL to all that catch SIGTERM and not marked for delay */
521	proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
522
523	if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
524		proc_list_lock();
525		if (proc_shutdown_exitcount != 0) {
526			/*
527	 		* wait for up to 60 seconds to allow these procs to exit normally
528	 		*
529	 		* History:	The delay interval was changed from 100 to 200
530	 		*		for NFS requests in particular.
531	 		*/
532			ts.tv_sec = 60;
533			ts.tv_nsec = 0;
534			error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
535			if (error != 0) {
536				for (p = allproc.lh_first; p; p = p->p_list.le_next) {
537					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
538						p->p_listflag &= ~P_LIST_EXITCOUNT;
539				}
540				for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
541					if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
542						p->p_listflag &= ~P_LIST_EXITCOUNT;
543				}
544			}
545		}
546		proc_list_unlock();
547	}
548
549	/*
550	 * if we still have procs that haven't exited, then brute force 'em
551	 */
552	sfargs.delayterm = delayterm;
553	sfargs.shutdownstate = 3;
554	sdargs.signo = 0;
555	sdargs.setsdstate = 3;
556	sdargs.countproc = 0;
557	sdargs.activecount = 0;
558
559	/* post a SIGTERM to all that catch SIGTERM and not marked for delay */
560	proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
561	printf("\n");
562
563	/* Now start the termination of processes that are marked for delayed termn */
564	if (delayterm == 0) {
565		delayterm = 1;
566		goto  sigterm_loop;
567	}
568
569	sd_closelog(ctx);
570
571	/* drop the ref on initproc */
572	proc_rele(initproc);
573	printf("continuing\n");
574}
575
576