1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * File: bsd/kern/kern_shutdown.c 30 * 31 * Copyright (C) 1989, NeXT, Inc. 32 * 33 */ 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/vm.h> 39#include <sys/proc_internal.h> 40#include <sys/user.h> 41#include <sys/reboot.h> 42#include <sys/conf.h> 43#include <sys/vnode_internal.h> 44#include <sys/file_internal.h> 45#include <sys/clist.h> 46#include <sys/callout.h> 47#include <sys/mbuf.h> 48#include <sys/msgbuf.h> 49#include <sys/ioctl.h> 50#include <sys/signal.h> 51#include <sys/tty.h> 52#include <kern/task.h> 53#include <sys/quota.h> 54#include <vm/vm_kern.h> 55#include <mach/vm_param.h> 56#include <sys/filedesc.h> 57#include <mach/host_priv.h> 58#include <mach/host_reboot.h> 59 60#include <security/audit/audit.h> 61 62#include <kern/sched_prim.h> /* for thread_block() */ 63#include <kern/host.h> /* for host_priv_self() */ 64#include <net/if_var.h> /* for if_down_all() */ 65#include <sys/buf_internal.h> /* for count_busy_buffers() */ 66#include <sys/mount_internal.h> /* for vfs_unmountall() */ 67#include <mach/task.h> /* for task_suspend() */ 68#include <sys/sysproto.h> /* abused for sync() */ 69#include <kern/clock.h> /* for delay_for_interval() */ 70#include <libkern/OSAtomic.h> 71 72#include <sys/kdebug.h> 73 74uint32_t system_inshutdown = 0; 75 76/* XXX should be in a header file somewhere, but isn't */ 77extern void md_prepare_for_shutdown(int, int, char *); 78extern void (*unmountroot_pre_hook)(void); 79 80unsigned int proc_shutdown_exitcount = 0; 81 82static int sd_openlog(vfs_context_t); 83static int sd_closelog(vfs_context_t); 84static void sd_log(vfs_context_t, const char *, ...); 85static void proc_shutdown(void); 86 87extern void IOSystemShutdownNotification(void); 88 89struct sd_filterargs{ 90 int delayterm; 91 int shutdownstate; 92}; 93 94 95struct sd_iterargs { 96 int signo; /* the signal to be posted */ 97 int setsdstate; /* shutdown state to be set */ 98 int countproc; /* count processes on action */ 99 int activecount; /* number of processes on which action was done */ 100}; 101 102static vnode_t sd_logvp = NULLVP; 103static off_t sd_log_offset = 0; 104 105 106static int sd_filt1(proc_t, void *); 107static int sd_filt2(proc_t, void *); 108static int sd_callback1(proc_t p, void * arg); 109static int sd_callback2(proc_t p, void * arg); 110static int sd_callback3(proc_t p, void * arg); 111 112int 113boot(int paniced, int howto, char *command) 114{ 115 struct proc *p = current_proc(); /* XXX */ 116 int hostboot_option=0; 117 118 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) { 119 if ( (howto&RB_QUICK) == RB_QUICK) 120 goto force_reboot; 121 return (EBUSY); 122 } 123 /* 124 * Temporary hack to notify the power management root domain 125 * that the system will shut down. 126 */ 127 IOSystemShutdownNotification(); 128 129 md_prepare_for_shutdown(paniced, howto, command); 130 131 if ((howto&RB_QUICK)==RB_QUICK) { 132 printf("Quick reboot...\n"); 133 if ((howto&RB_NOSYNC)==0) { 134 sync(p, (void *)NULL, (int *)NULL); 135 } 136 } 137 else if ((howto&RB_NOSYNC)==0) { 138 int iter, nbusy; 139 140 printf("syncing disks... "); 141 142 /* 143 * Release vnodes held by texts before sync. 144 */ 145 146 /* handle live procs (deallocate their root and current directories). */ 147 proc_shutdown(); 148 149#if CONFIG_AUDIT 150 audit_shutdown(); 151#endif 152 153 if (unmountroot_pre_hook != NULL) 154 unmountroot_pre_hook(); 155 156 sync(p, (void *)NULL, (int *)NULL); 157 158 /* 159 * Now that all processes have been terminated and system is 160 * sync'ed up, suspend init 161 */ 162 163 if (initproc && p != initproc) 164 task_suspend(initproc->task); 165 166 if (kdebug_enable) 167 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace"); 168 169 /* 170 * Unmount filesystems 171 */ 172 vfs_unmountall(); 173 174 /* Wait for the buffer cache to clean remaining dirty buffers */ 175 for (iter = 0; iter < 100; iter++) { 176 nbusy = count_busy_buffers(); 177 if (nbusy == 0) 178 break; 179 printf("%d ", nbusy); 180 delay_for_interval( 1 * nbusy, 1000 * 1000); 181 } 182 if (nbusy) 183 printf("giving up\n"); 184 else 185 printf("done\n"); 186 } 187#if NETWORKING 188 /* 189 * Can't just use an splnet() here to disable the network 190 * because that will lock out softints which the disk 191 * drivers depend on to finish DMAs. 192 */ 193 if_down_all(); 194#endif /* NETWORKING */ 195 196force_reboot: 197 if (howto & RB_POWERDOWN) 198 hostboot_option = HOST_REBOOT_HALT; 199 if (howto & RB_HALT) 200 hostboot_option = HOST_REBOOT_HALT; 201 if (paniced == RB_PANIC) 202 hostboot_option = HOST_REBOOT_HALT; 203 204 if (howto & RB_UPSDELAY) { 205 hostboot_option = HOST_REBOOT_UPSDELAY; 206 } 207 208 host_reboot(host_priv_self(), hostboot_option); 209 /* 210 * should not be reached 211 */ 212 return (0); 213} 214 215static int 216sd_openlog(vfs_context_t ctx) 217{ 218 int error = 0; 219 struct timeval tv; 220 221 /* Open shutdown log */ 222 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) { 223 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error); 224 sd_logvp = NULLVP; 225 return error; 226 } 227 228 vnode_setsize(sd_logvp, (off_t)0, 0, ctx); 229 230 /* Write a little header */ 231 microtime(&tv); 232 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec); 233 234 return 0; 235} 236 237static int 238sd_closelog(vfs_context_t ctx) 239{ 240 int error = 0; 241 if (sd_logvp != NULLVP) { 242 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx); 243 error = vnode_close(sd_logvp, FWRITE, ctx); 244 } 245 246 return error; 247} 248 249static void 250sd_log(vfs_context_t ctx, const char *fmt, ...) 251{ 252 int resid, log_error, len; 253 char logbuf[100]; 254 va_list arglist; 255 256 /* If the log isn't open yet, open it */ 257 if (sd_logvp == NULLVP) { 258 if (sd_openlog(ctx) != 0) { 259 /* Couldn't open, we fail out */ 260 return; 261 } 262 } 263 264 va_start(arglist, fmt); 265 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist); 266 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset, 267 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx)); 268 if (log_error == EIO || log_error == 0) { 269 sd_log_offset += (len - resid); 270 } 271 272 va_end(arglist); 273 274} 275 276static int 277sd_filt1(proc_t p, void * args) 278{ 279 proc_t self = current_proc(); 280 struct sd_filterargs * sf = (struct sd_filterargs *)args; 281 int delayterm = sf-> delayterm; 282 int shutdownstate = sf->shutdownstate; 283 284 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0) 285 ||(p == self) || (p->p_stat == SZOMB) 286 || (p->p_shutdownstate != shutdownstate) 287 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM)) 288 || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) { 289 return(0); 290 } 291 else 292 return(1); 293} 294 295 296static int 297sd_callback1(proc_t p, void * args) 298{ 299 struct sd_iterargs * sd = (struct sd_iterargs *)args; 300 int signo = sd->signo; 301 int setsdstate = sd->setsdstate; 302 int countproc = sd->countproc; 303 304 proc_lock(p); 305 p->p_shutdownstate = setsdstate; 306 if (p->p_stat != SZOMB) { 307 proc_unlock(p); 308 if (countproc != 0) { 309 proc_list_lock(); 310 p->p_listflag |= P_LIST_EXITCOUNT; 311 proc_shutdown_exitcount++; 312 proc_list_unlock(); 313 } 314 315 psignal(p, signo); 316 if (countproc != 0) 317 sd->activecount++; 318 } else 319 proc_unlock(p); 320 return(PROC_RETURNED); 321} 322 323static int 324sd_filt2(proc_t p, void * args) 325{ 326 proc_t self = current_proc(); 327 struct sd_filterargs * sf = (struct sd_filterargs *)args; 328 int delayterm = sf-> delayterm; 329 int shutdownstate = sf->shutdownstate; 330 331 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0) 332 ||(p == self) || (p->p_stat == SZOMB) 333 || (p->p_shutdownstate == shutdownstate) 334 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) { 335 return(0); 336 } 337 else 338 return(1); 339} 340 341static int 342sd_callback2(proc_t p, void * args) 343{ 344 struct sd_iterargs * sd = (struct sd_iterargs *)args; 345 int signo = sd->signo; 346 int setsdstate = sd->setsdstate; 347 int countproc = sd->countproc; 348 349 proc_lock(p); 350 p->p_shutdownstate = setsdstate; 351 if (p->p_stat != SZOMB) { 352 proc_unlock(p); 353 if (countproc != 0) { 354 proc_list_lock(); 355 p->p_listflag |= P_LIST_EXITCOUNT; 356 proc_shutdown_exitcount++; 357 proc_list_unlock(); 358 } 359 psignal(p, signo); 360 if (countproc != 0) 361 sd->activecount++; 362 } else 363 proc_unlock(p); 364 365 return(PROC_RETURNED); 366 367} 368 369static int 370sd_callback3(proc_t p, void * args) 371{ 372 struct sd_iterargs * sd = (struct sd_iterargs *)args; 373 vfs_context_t ctx = vfs_context_current(); 374 375 int setsdstate = sd->setsdstate; 376 377 proc_lock(p); 378 p->p_shutdownstate = setsdstate; 379 if (p->p_stat != SZOMB) { 380 /* 381 * NOTE: following code ignores sig_lock and plays 382 * with exit_thread correctly. This is OK unless we 383 * are a multiprocessor, in which case I do not 384 * understand the sig_lock. This needs to be fixed. 385 * XXX 386 */ 387 if (p->exit_thread) { /* someone already doing it */ 388 proc_unlock(p); 389 /* give him a chance */ 390 thread_block(THREAD_CONTINUE_NULL); 391 } else { 392 p->exit_thread = current_thread(); 393 printf("."); 394 395 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid); 396 397 proc_unlock(p); 398 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, 399 p->p_pid, 0, 1, 0, 0); 400 sd->activecount++; 401 exit1(p, 1, (int *)NULL); 402 } 403 } else 404 proc_unlock(p); 405 406 return(PROC_RETURNED); 407} 408 409 410/* 411 * proc_shutdown() 412 * 413 * Shutdown down proc system (release references to current and root 414 * dirs for each process). 415 * 416 * POSIX modifications: 417 * 418 * For POSIX fcntl() file locking call vno_lockrelease() on 419 * the file to release all of its record locks, if any. 420 */ 421 422static void 423proc_shutdown(void) 424{ 425 vfs_context_t ctx = vfs_context_current(); 426 struct proc *p, *self; 427 int delayterm = 0; 428 struct sd_filterargs sfargs; 429 struct sd_iterargs sdargs; 430 int error = 0; 431 struct timespec ts; 432 433 /* 434 * Kill as many procs as we can. (Except ourself...) 435 */ 436 self = (struct proc *)current_proc(); 437 438 /* 439 * Signal the init with SIGTERM so that he does not launch 440 * new processes 441 */ 442 p = proc_find(1); 443 if (p && p != self) { 444 psignal(p, SIGTERM); 445 } 446 proc_rele(p); 447 448 printf("Killing all processes "); 449 450sigterm_loop: 451 /* 452 * send SIGTERM to those procs interested in catching one 453 */ 454 sfargs.delayterm = delayterm; 455 sfargs.shutdownstate = 0; 456 sdargs.signo = SIGTERM; 457 sdargs.setsdstate = 1; 458 sdargs.countproc = 1; 459 sdargs.activecount = 0; 460 461 error = 0; 462 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ 463 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs); 464 465 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { 466 proc_list_lock(); 467 if (proc_shutdown_exitcount != 0) { 468 /* 469 * now wait for up to 30 seconds to allow those procs catching SIGTERM 470 * to digest it 471 * as soon as these procs have exited, we'll continue on to the next step 472 */ 473 ts.tv_sec = 30; 474 ts.tv_nsec = 0; 475 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); 476 if (error != 0) { 477 for (p = allproc.lh_first; p; p = p->p_list.le_next) { 478 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) 479 p->p_listflag &= ~P_LIST_EXITCOUNT; 480 } 481 for (p = zombproc.lh_first; p; p = p->p_list.le_next) { 482 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) 483 p->p_listflag &= ~P_LIST_EXITCOUNT; 484 } 485 } 486 487 } 488 proc_list_unlock(); 489 } 490 if (error == ETIMEDOUT) { 491 /* 492 * log the names of the unresponsive tasks 493 */ 494 495 496 proc_list_lock(); 497 498 for (p = allproc.lh_first; p; p = p->p_list.le_next) { 499 if (p->p_shutdownstate == 1) { 500 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); 501 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); 502 } 503 } 504 505 proc_list_unlock(); 506 507 delay_for_interval(1000 * 5, 1000 * 1000); 508 } 509 510 /* 511 * send a SIGKILL to all the procs still hanging around 512 */ 513 sfargs.delayterm = delayterm; 514 sfargs.shutdownstate = 2; 515 sdargs.signo = SIGKILL; 516 sdargs.setsdstate = 2; 517 sdargs.countproc = 1; 518 sdargs.activecount = 0; 519 520 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */ 521 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs); 522 523 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { 524 proc_list_lock(); 525 if (proc_shutdown_exitcount != 0) { 526 /* 527 * wait for up to 60 seconds to allow these procs to exit normally 528 * 529 * History: The delay interval was changed from 100 to 200 530 * for NFS requests in particular. 531 */ 532 ts.tv_sec = 60; 533 ts.tv_nsec = 0; 534 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); 535 if (error != 0) { 536 for (p = allproc.lh_first; p; p = p->p_list.le_next) { 537 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) 538 p->p_listflag &= ~P_LIST_EXITCOUNT; 539 } 540 for (p = zombproc.lh_first; p; p = p->p_list.le_next) { 541 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) 542 p->p_listflag &= ~P_LIST_EXITCOUNT; 543 } 544 } 545 } 546 proc_list_unlock(); 547 } 548 549 /* 550 * if we still have procs that haven't exited, then brute force 'em 551 */ 552 sfargs.delayterm = delayterm; 553 sfargs.shutdownstate = 3; 554 sdargs.signo = 0; 555 sdargs.setsdstate = 3; 556 sdargs.countproc = 0; 557 sdargs.activecount = 0; 558 559 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ 560 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs); 561 printf("\n"); 562 563 /* Now start the termination of processes that are marked for delayed termn */ 564 if (delayterm == 0) { 565 delayterm = 1; 566 goto sigterm_loop; 567 } 568 569 sd_closelog(ctx); 570 571 /* drop the ref on initproc */ 572 proc_rele(initproc); 573 printf("continuing\n"); 574} 575 576