1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_FREE_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: kern/thread.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub 61 * Date: 1986 62 * 63 * Thread management primitives implementation. 64 */ 65/* 66 * Copyright (c) 1993 The University of Utah and 67 * the Computer Systems Laboratory (CSL). All rights reserved. 68 * 69 * Permission to use, copy, modify and distribute this software and its 70 * documentation is hereby granted, provided that both the copyright 71 * notice and this permission notice appear in all copies of the 72 * software, derivative works or modified versions, and any portions 73 * thereof, and that both notices appear in supporting documentation. 74 * 75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS 76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF 77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 78 * 79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any 80 * improvements that they make and grant CSL redistribution rights. 81 * 82 */ 83 84#include <mach/mach_types.h> 85#include <mach/boolean.h> 86#include <mach/policy.h> 87#include <mach/thread_info.h> 88#include <mach/thread_special_ports.h> 89#include <mach/thread_status.h> 90#include <mach/time_value.h> 91#include <mach/vm_param.h> 92 93#include <machine/thread.h> 94#include <machine/pal_routines.h> 95#include <machine/limits.h> 96 97#include <kern/kern_types.h> 98#include <kern/kalloc.h> 99#include <kern/cpu_data.h> 100#include <kern/counters.h> 101#include <kern/extmod_statistics.h> 102#include <kern/ipc_mig.h> 103#include <kern/ipc_tt.h> 104#include <kern/mach_param.h> 105#include <kern/machine.h> 106#include <kern/misc_protos.h> 107#include <kern/processor.h> 108#include <kern/queue.h> 109#include <kern/sched.h> 110#include <kern/sched_prim.h> 111#include <kern/sync_lock.h> 112#include <kern/syscall_subr.h> 113#include <kern/task.h> 114#include <kern/thread.h> 115#include <kern/host.h> 116#include <kern/zalloc.h> 117#include <kern/assert.h> 118#include <kern/exc_resource.h> 119#include <kern/telemetry.h> 120#if KPC 121#include <kern/kpc.h> 122#endif 123 124#include <ipc/ipc_kmsg.h> 125#include <ipc/ipc_port.h> 126 127#include <vm/vm_kern.h> 128#include <vm/vm_pageout.h> 129 130#include <sys/kdebug.h> 131 132#include <mach/sdt.h> 133 134/* 135 * Exported interfaces 136 */ 137#include <mach/task_server.h> 138#include <mach/thread_act_server.h> 139#include <mach/mach_host_server.h> 140#include <mach/host_priv_server.h> 141 142static struct zone *thread_zone; 143static lck_grp_attr_t thread_lck_grp_attr; 144lck_attr_t thread_lck_attr; 145lck_grp_t thread_lck_grp; 146 147decl_simple_lock_data(static,thread_stack_lock) 148static queue_head_t thread_stack_queue; 149 150decl_simple_lock_data(static,thread_terminate_lock) 151static queue_head_t thread_terminate_queue; 152 153static struct thread thread_template, init_thread; 154 155static void sched_call_null( 156 int type, 157 thread_t thread); 158 159#ifdef MACH_BSD 160extern void proc_exit(void *); 161extern uint64_t get_dispatchqueue_offset_from_proc(void *); 162extern int proc_selfpid(void); 163extern char * proc_name_address(void *p); 164#endif /* MACH_BSD */ 165 166extern int disable_exc_resource; 167extern int audio_active; 168extern int debug_task; 169int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ 170int task_threadmax = CONFIG_THREAD_MAX; 171 172static uint64_t thread_unique_id = 0; 173 174struct _thread_ledger_indices thread_ledgers = { -1 }; 175static ledger_template_t thread_ledger_template = NULL; 176void init_thread_ledgers(void); 177int task_disable_cpumon(task_t task); 178 179/* 180 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry. 181 * 182 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user 183 * stacktraces, aka micro-stackshots) 184 */ 185#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70 186 187int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */ 188void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void); 189 190/* 191 * The smallest interval over which we support limiting CPU consumption is 1ms 192 */ 193#define MINIMUM_CPULIMIT_INTERVAL_MS 1 194 195void 196thread_bootstrap(void) 197{ 198 /* 199 * Fill in a template thread for fast initialization. 200 */ 201 202 thread_template.runq = PROCESSOR_NULL; 203 204 thread_template.ref_count = 2; 205 206 thread_template.reason = AST_NONE; 207 thread_template.at_safe_point = FALSE; 208 thread_template.wait_event = NO_EVENT64; 209 thread_template.wait_queue = WAIT_QUEUE_NULL; 210 thread_template.wait_result = THREAD_WAITING; 211 thread_template.options = THREAD_ABORTSAFE; 212 thread_template.state = TH_WAIT | TH_UNINT; 213 thread_template.wake_active = FALSE; 214 thread_template.continuation = THREAD_CONTINUE_NULL; 215 thread_template.parameter = NULL; 216 217 thread_template.importance = 0; 218 thread_template.sched_mode = TH_MODE_NONE; 219 thread_template.sched_flags = 0; 220 thread_template.saved_mode = TH_MODE_NONE; 221 thread_template.safe_release = 0; 222 223 thread_template.priority = 0; 224 thread_template.sched_pri = 0; 225 thread_template.max_priority = 0; 226 thread_template.task_priority = 0; 227 thread_template.promotions = 0; 228 thread_template.pending_promoter_index = 0; 229 thread_template.pending_promoter[0] = 230 thread_template.pending_promoter[1] = NULL; 231 thread_template.rwlock_count = 0; 232 233 thread_template.realtime.deadline = UINT64_MAX; 234 235 thread_template.current_quantum = 0; 236 thread_template.last_run_time = 0; 237 thread_template.last_quantum_refill_time = 0; 238 239 thread_template.computation_metered = 0; 240 thread_template.computation_epoch = 0; 241 242#if defined(CONFIG_SCHED_TRADITIONAL) 243 thread_template.sched_stamp = 0; 244 thread_template.pri_shift = INT8_MAX; 245 thread_template.sched_usage = 0; 246 thread_template.cpu_usage = thread_template.cpu_delta = 0; 247#endif 248 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0; 249 250 thread_template.bound_processor = PROCESSOR_NULL; 251 thread_template.last_processor = PROCESSOR_NULL; 252 253 thread_template.sched_call = sched_call_null; 254 255 timer_init(&thread_template.user_timer); 256 timer_init(&thread_template.system_timer); 257 thread_template.user_timer_save = 0; 258 thread_template.system_timer_save = 0; 259 thread_template.vtimer_user_save = 0; 260 thread_template.vtimer_prof_save = 0; 261 thread_template.vtimer_rlim_save = 0; 262 263 thread_template.wait_timer_is_set = FALSE; 264 thread_template.wait_timer_active = 0; 265 266 thread_template.depress_timer_active = 0; 267 268 thread_template.special_handler.handler = special_handler; 269 thread_template.special_handler.next = NULL; 270 271 thread_template.funnel_lock = THR_FUNNEL_NULL; 272 thread_template.funnel_state = 0; 273 thread_template.recover = (vm_offset_t)NULL; 274 275 thread_template.map = VM_MAP_NULL; 276 277#if CONFIG_DTRACE 278 thread_template.t_dtrace_predcache = 0; 279 thread_template.t_dtrace_vtime = 0; 280 thread_template.t_dtrace_tracing = 0; 281#endif /* CONFIG_DTRACE */ 282 283#if KPC 284 thread_template.kpc_buf = NULL; 285#endif 286 287 thread_template.t_chud = 0; 288 thread_template.t_page_creation_count = 0; 289 thread_template.t_page_creation_time = 0; 290 291 thread_template.affinity_set = NULL; 292 293 thread_template.syscalls_unix = 0; 294 thread_template.syscalls_mach = 0; 295 296 thread_template.t_ledger = LEDGER_NULL; 297 thread_template.t_threadledger = LEDGER_NULL; 298 299 thread_template.requested_policy = default_task_requested_policy; 300 thread_template.effective_policy = default_task_effective_policy; 301 thread_template.pended_policy = default_task_pended_policy; 302 303 thread_template.iotier_override = THROTTLE_LEVEL_NONE; 304 305 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0; 306 307 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0; 308 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0; 309 310 thread_template.thread_tag = 0; 311 312 init_thread = thread_template; 313 machine_set_current_thread(&init_thread); 314} 315 316void 317thread_init(void) 318{ 319 thread_zone = zinit( 320 sizeof(struct thread), 321 thread_max * sizeof(struct thread), 322 THREAD_CHUNK * sizeof(struct thread), 323 "threads"); 324 325 lck_grp_attr_setdefault(&thread_lck_grp_attr); 326 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr); 327 lck_attr_setdefault(&thread_lck_attr); 328 329 stack_init(); 330 331 /* 332 * Initialize any machine-dependent 333 * per-thread structures necessary. 334 */ 335 machine_thread_init(); 336 337 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct, 338 sizeof (cpumon_ustackshots_trigger_pct))) { 339 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT; 340 } 341 342 init_thread_ledgers(); 343} 344 345static void 346thread_terminate_continue(void) 347{ 348 panic("thread_terminate_continue"); 349 /*NOTREACHED*/ 350} 351 352/* 353 * thread_terminate_self: 354 */ 355void 356thread_terminate_self(void) 357{ 358 thread_t thread = current_thread(); 359 360 task_t task; 361 spl_t s; 362 int threadcnt; 363 364 pal_thread_terminate_self(thread); 365 366 DTRACE_PROC(lwp__exit); 367 368 thread_mtx_lock(thread); 369 370 ipc_thread_disable(thread); 371 372 thread_mtx_unlock(thread); 373 374 s = splsched(); 375 thread_lock(thread); 376 377 /* 378 * Cancel priority depression, wait for concurrent expirations 379 * on other processors. 380 */ 381 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 382 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; 383 384 if (timer_call_cancel(&thread->depress_timer)) 385 thread->depress_timer_active--; 386 } 387 388 while (thread->depress_timer_active > 0) { 389 thread_unlock(thread); 390 splx(s); 391 392 delay(1); 393 394 s = splsched(); 395 thread_lock(thread); 396 } 397 398 thread_sched_call(thread, NULL); 399 400 thread_unlock(thread); 401 splx(s); 402 403 thread_policy_reset(thread); 404 405 406 task = thread->task; 407 uthread_cleanup(task, thread->uthread, task->bsd_info); 408 threadcnt = hw_atomic_sub(&task->active_thread_count, 1); 409 410 /* 411 * If we are the last thread to terminate and the task is 412 * associated with a BSD process, perform BSD process exit. 413 */ 414 if (threadcnt == 0 && task->bsd_info != NULL) 415 proc_exit(task->bsd_info); 416 417 uthread_cred_free(thread->uthread); 418 419 s = splsched(); 420 thread_lock(thread); 421 422 /* 423 * Cancel wait timer, and wait for 424 * concurrent expirations. 425 */ 426 if (thread->wait_timer_is_set) { 427 thread->wait_timer_is_set = FALSE; 428 429 if (timer_call_cancel(&thread->wait_timer)) 430 thread->wait_timer_active--; 431 } 432 433 while (thread->wait_timer_active > 0) { 434 thread_unlock(thread); 435 splx(s); 436 437 delay(1); 438 439 s = splsched(); 440 thread_lock(thread); 441 } 442 443 /* 444 * If there is a reserved stack, release it. 445 */ 446 if (thread->reserved_stack != 0) { 447 stack_free_reserved(thread); 448 thread->reserved_stack = 0; 449 } 450 451 /* 452 * Mark thread as terminating, and block. 453 */ 454 thread->state |= TH_TERMINATE; 455 thread_mark_wait_locked(thread, THREAD_UNINT); 456 assert(thread->promotions == 0); 457 assert(thread->rwlock_count == 0); 458 thread_unlock(thread); 459 /* splsched */ 460 461 thread_block((thread_continue_t)thread_terminate_continue); 462 /*NOTREACHED*/ 463} 464 465void 466thread_deallocate( 467 thread_t thread) 468{ 469 task_t task; 470 471 if (thread == THREAD_NULL) 472 return; 473 474 if (thread_deallocate_internal(thread) > 0) 475 return; 476 477 if(!(thread->state & TH_TERMINATE2)) 478 panic("thread_deallocate: thread not properly terminated\n"); 479 480#if KPC 481 kpc_thread_destroy(thread); 482#endif 483 484 485 ipc_thread_terminate(thread); 486 487 task = thread->task; 488 489#ifdef MACH_BSD 490 { 491 void *ut = thread->uthread; 492 493 thread->uthread = NULL; 494 uthread_zone_free(ut); 495 } 496#endif /* MACH_BSD */ 497 498 if (thread->t_ledger) 499 ledger_dereference(thread->t_ledger); 500 if (thread->t_threadledger) 501 ledger_dereference(thread->t_threadledger); 502 503 if (thread->kernel_stack != 0) 504 stack_free(thread); 505 506 lck_mtx_destroy(&thread->mutex, &thread_lck_grp); 507 machine_thread_destroy(thread); 508 509 task_deallocate(task); 510 511 zfree(thread_zone, thread); 512} 513 514/* 515 * thread_terminate_daemon: 516 * 517 * Perform final clean up for terminating threads. 518 */ 519static void 520thread_terminate_daemon(void) 521{ 522 thread_t self, thread; 523 task_t task; 524 525 self = current_thread(); 526 self->options |= TH_OPT_SYSTEM_CRITICAL; 527 528 (void)splsched(); 529 simple_lock(&thread_terminate_lock); 530 531 while ((thread = (thread_t)dequeue_head(&thread_terminate_queue)) != THREAD_NULL) { 532 simple_unlock(&thread_terminate_lock); 533 (void)spllo(); 534 535 task = thread->task; 536 537 task_lock(task); 538 task->total_user_time += timer_grab(&thread->user_timer); 539 if (thread->precise_user_kernel_time) { 540 task->total_system_time += timer_grab(&thread->system_timer); 541 } else { 542 task->total_user_time += timer_grab(&thread->system_timer); 543 } 544 545 task->c_switch += thread->c_switch; 546 task->p_switch += thread->p_switch; 547 task->ps_switch += thread->ps_switch; 548 549 task->syscalls_unix += thread->syscalls_unix; 550 task->syscalls_mach += thread->syscalls_mach; 551 552 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; 553 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; 554 queue_remove(&task->threads, thread, thread_t, task_threads); 555 task->thread_count--; 556 557 /* 558 * If the task is being halted, and there is only one thread 559 * left in the task after this one, then wakeup that thread. 560 */ 561 if (task->thread_count == 1 && task->halting) 562 thread_wakeup((event_t)&task->halting); 563 564 task_unlock(task); 565 566 lck_mtx_lock(&tasks_threads_lock); 567 queue_remove(&threads, thread, thread_t, threads); 568 threads_count--; 569 lck_mtx_unlock(&tasks_threads_lock); 570 571 thread_deallocate(thread); 572 573 (void)splsched(); 574 simple_lock(&thread_terminate_lock); 575 } 576 577 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT); 578 simple_unlock(&thread_terminate_lock); 579 /* splsched */ 580 581 self->options &= ~TH_OPT_SYSTEM_CRITICAL; 582 thread_block((thread_continue_t)thread_terminate_daemon); 583 /*NOTREACHED*/ 584} 585 586/* 587 * thread_terminate_enqueue: 588 * 589 * Enqueue a terminating thread for final disposition. 590 * 591 * Called at splsched. 592 */ 593void 594thread_terminate_enqueue( 595 thread_t thread) 596{ 597 simple_lock(&thread_terminate_lock); 598 enqueue_tail(&thread_terminate_queue, (queue_entry_t)thread); 599 simple_unlock(&thread_terminate_lock); 600 601 thread_wakeup((event_t)&thread_terminate_queue); 602} 603 604/* 605 * thread_stack_daemon: 606 * 607 * Perform stack allocation as required due to 608 * invoke failures. 609 */ 610static void 611thread_stack_daemon(void) 612{ 613 thread_t thread; 614 spl_t s; 615 616 s = splsched(); 617 simple_lock(&thread_stack_lock); 618 619 while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) { 620 simple_unlock(&thread_stack_lock); 621 splx(s); 622 623 /* allocate stack with interrupts enabled so that we can call into VM */ 624 stack_alloc(thread); 625 626 s = splsched(); 627 thread_lock(thread); 628 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 629 thread_unlock(thread); 630 631 simple_lock(&thread_stack_lock); 632 } 633 634 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT); 635 simple_unlock(&thread_stack_lock); 636 splx(s); 637 638 thread_block((thread_continue_t)thread_stack_daemon); 639 /*NOTREACHED*/ 640} 641 642/* 643 * thread_stack_enqueue: 644 * 645 * Enqueue a thread for stack allocation. 646 * 647 * Called at splsched. 648 */ 649void 650thread_stack_enqueue( 651 thread_t thread) 652{ 653 simple_lock(&thread_stack_lock); 654 enqueue_tail(&thread_stack_queue, (queue_entry_t)thread); 655 simple_unlock(&thread_stack_lock); 656 657 thread_wakeup((event_t)&thread_stack_queue); 658} 659 660void 661thread_daemon_init(void) 662{ 663 kern_return_t result; 664 thread_t thread = NULL; 665 666 simple_lock_init(&thread_terminate_lock, 0); 667 queue_init(&thread_terminate_queue); 668 669 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread); 670 if (result != KERN_SUCCESS) 671 panic("thread_daemon_init: thread_terminate_daemon"); 672 673 thread_deallocate(thread); 674 675 simple_lock_init(&thread_stack_lock, 0); 676 queue_init(&thread_stack_queue); 677 678 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread); 679 if (result != KERN_SUCCESS) 680 panic("thread_daemon_init: thread_stack_daemon"); 681 682 thread_deallocate(thread); 683} 684 685/* 686 * Create a new thread. 687 * Doesn't start the thread running. 688 */ 689static kern_return_t 690thread_create_internal( 691 task_t parent_task, 692 integer_t priority, 693 thread_continue_t continuation, 694 int options, 695#define TH_OPTION_NONE 0x00 696#define TH_OPTION_NOCRED 0x01 697#define TH_OPTION_NOSUSP 0x02 698 thread_t *out_thread) 699{ 700 thread_t new_thread; 701 static thread_t first_thread; 702 703 /* 704 * Allocate a thread and initialize static fields 705 */ 706 if (first_thread == THREAD_NULL) 707 new_thread = first_thread = current_thread(); 708 else 709 new_thread = (thread_t)zalloc(thread_zone); 710 if (new_thread == THREAD_NULL) 711 return (KERN_RESOURCE_SHORTAGE); 712 713 if (new_thread != first_thread) 714 *new_thread = thread_template; 715 716#ifdef MACH_BSD 717 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0); 718 if (new_thread->uthread == NULL) { 719 zfree(thread_zone, new_thread); 720 return (KERN_RESOURCE_SHORTAGE); 721 } 722#endif /* MACH_BSD */ 723 724 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) { 725#ifdef MACH_BSD 726 void *ut = new_thread->uthread; 727 728 new_thread->uthread = NULL; 729 /* cred free may not be necessary */ 730 uthread_cleanup(parent_task, ut, parent_task->bsd_info); 731 uthread_cred_free(ut); 732 uthread_zone_free(ut); 733#endif /* MACH_BSD */ 734 735 zfree(thread_zone, new_thread); 736 return (KERN_FAILURE); 737 } 738 739 new_thread->task = parent_task; 740 741 thread_lock_init(new_thread); 742 wake_lock_init(new_thread); 743 744 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); 745 746 ipc_thread_init(new_thread); 747 748 new_thread->continuation = continuation; 749 750 lck_mtx_lock(&tasks_threads_lock); 751 task_lock(parent_task); 752 753 if ( !parent_task->active || parent_task->halting || 754 ((options & TH_OPTION_NOSUSP) != 0 && 755 parent_task->suspend_count > 0) || 756 (parent_task->thread_count >= task_threadmax && 757 parent_task != kernel_task) ) { 758 task_unlock(parent_task); 759 lck_mtx_unlock(&tasks_threads_lock); 760 761#ifdef MACH_BSD 762 { 763 void *ut = new_thread->uthread; 764 765 new_thread->uthread = NULL; 766 uthread_cleanup(parent_task, ut, parent_task->bsd_info); 767 /* cred free may not be necessary */ 768 uthread_cred_free(ut); 769 uthread_zone_free(ut); 770 } 771#endif /* MACH_BSD */ 772 ipc_thread_disable(new_thread); 773 ipc_thread_terminate(new_thread); 774 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp); 775 machine_thread_destroy(new_thread); 776 zfree(thread_zone, new_thread); 777 return (KERN_FAILURE); 778 } 779 780 /* New threads inherit any default state on the task */ 781 machine_thread_inherit_taskwide(new_thread, parent_task); 782 783 task_reference_internal(parent_task); 784 785 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { 786 /* 787 * This task has a per-thread CPU limit; make sure this new thread 788 * gets its limit set too, before it gets out of the kernel. 789 */ 790 set_astledger(new_thread); 791 } 792 new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */ 793 new_thread->t_ledger = new_thread->task->ledger; 794 if (new_thread->t_ledger) 795 ledger_reference(new_thread->t_ledger); 796 797 /* Cache the task's map */ 798 new_thread->map = parent_task->map; 799 800 /* Chain the thread onto the task's list */ 801 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); 802 parent_task->thread_count++; 803 804 /* So terminating threads don't need to take the task lock to decrement */ 805 hw_atomic_add(&parent_task->active_thread_count, 1); 806 807 /* Protected by the tasks_threads_lock */ 808 new_thread->thread_id = ++thread_unique_id; 809 810 queue_enter(&threads, new_thread, thread_t, threads); 811 threads_count++; 812 813 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); 814 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); 815 816#if CONFIG_COUNTERS 817 /* 818 * If parent task has any reservations, they need to be propagated to this 819 * thread. 820 */ 821 new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? 822 THREAD_PMC_FLAG : 0U; 823#endif 824#if KPC 825 kpc_thread_create(new_thread); 826#endif 827 828 /* Only need to update policies pushed from task to thread */ 829 new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier; 830 new_thread->requested_policy.terminated = parent_task->effective_policy.terminated; 831 832 /* Set the thread's scheduling parameters */ 833 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task); 834 new_thread->sched_flags = 0; 835 new_thread->max_priority = parent_task->max_priority; 836 new_thread->task_priority = parent_task->priority; 837 new_thread->priority = (priority < 0)? parent_task->priority: priority; 838 if (new_thread->priority > new_thread->max_priority) 839 new_thread->priority = new_thread->max_priority; 840 new_thread->importance = new_thread->priority - new_thread->task_priority; 841 new_thread->saved_importance = new_thread->importance; 842 843#if defined(CONFIG_SCHED_TRADITIONAL) 844 new_thread->sched_stamp = sched_tick; 845 new_thread->pri_shift = sched_pri_shift; 846#endif 847 SCHED(compute_priority)(new_thread, FALSE); 848 849 new_thread->active = TRUE; 850 851 *out_thread = new_thread; 852 853 { 854 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; 855 856 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2); 857 858 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 859 TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, 860 (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); 861 862 kdbg_trace_string(parent_task->bsd_info, 863 &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); 864 865 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 866 TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE, 867 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); 868 } 869 870 DTRACE_PROC1(lwp__create, thread_t, *out_thread); 871 872 return (KERN_SUCCESS); 873} 874 875static kern_return_t 876thread_create_internal2( 877 task_t task, 878 thread_t *new_thread, 879 boolean_t from_user) 880{ 881 kern_return_t result; 882 thread_t thread; 883 884 if (task == TASK_NULL || task == kernel_task) 885 return (KERN_INVALID_ARGUMENT); 886 887 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread); 888 if (result != KERN_SUCCESS) 889 return (result); 890 891 thread->user_stop_count = 1; 892 thread_hold(thread); 893 if (task->suspend_count > 0) 894 thread_hold(thread); 895 896 if (from_user) 897 extmod_statistics_incr_thread_create(task); 898 899 task_unlock(task); 900 lck_mtx_unlock(&tasks_threads_lock); 901 902 *new_thread = thread; 903 904 return (KERN_SUCCESS); 905} 906 907/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */ 908kern_return_t 909thread_create( 910 task_t task, 911 thread_t *new_thread); 912 913kern_return_t 914thread_create( 915 task_t task, 916 thread_t *new_thread) 917{ 918 return thread_create_internal2(task, new_thread, FALSE); 919} 920 921kern_return_t 922thread_create_from_user( 923 task_t task, 924 thread_t *new_thread) 925{ 926 return thread_create_internal2(task, new_thread, TRUE); 927} 928 929static kern_return_t 930thread_create_running_internal2( 931 register task_t task, 932 int flavor, 933 thread_state_t new_state, 934 mach_msg_type_number_t new_state_count, 935 thread_t *new_thread, 936 boolean_t from_user) 937{ 938 register kern_return_t result; 939 thread_t thread; 940 941 if (task == TASK_NULL || task == kernel_task) 942 return (KERN_INVALID_ARGUMENT); 943 944 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread); 945 if (result != KERN_SUCCESS) 946 return (result); 947 948 result = machine_thread_set_state( 949 thread, flavor, new_state, new_state_count); 950 if (result != KERN_SUCCESS) { 951 task_unlock(task); 952 lck_mtx_unlock(&tasks_threads_lock); 953 954 thread_terminate(thread); 955 thread_deallocate(thread); 956 return (result); 957 } 958 959 thread_mtx_lock(thread); 960 thread_start_internal(thread); 961 thread_mtx_unlock(thread); 962 963 if (from_user) 964 extmod_statistics_incr_thread_create(task); 965 966 task_unlock(task); 967 lck_mtx_unlock(&tasks_threads_lock); 968 969 *new_thread = thread; 970 971 return (result); 972} 973 974/* Prototype, see justification above */ 975kern_return_t 976thread_create_running( 977 register task_t task, 978 int flavor, 979 thread_state_t new_state, 980 mach_msg_type_number_t new_state_count, 981 thread_t *new_thread); 982 983kern_return_t 984thread_create_running( 985 register task_t task, 986 int flavor, 987 thread_state_t new_state, 988 mach_msg_type_number_t new_state_count, 989 thread_t *new_thread) 990{ 991 return thread_create_running_internal2( 992 task, flavor, new_state, new_state_count, 993 new_thread, FALSE); 994} 995 996kern_return_t 997thread_create_running_from_user( 998 register task_t task, 999 int flavor, 1000 thread_state_t new_state, 1001 mach_msg_type_number_t new_state_count, 1002 thread_t *new_thread) 1003{ 1004 return thread_create_running_internal2( 1005 task, flavor, new_state, new_state_count, 1006 new_thread, TRUE); 1007} 1008 1009kern_return_t 1010thread_create_workq( 1011 task_t task, 1012 thread_continue_t thread_return, 1013 thread_t *new_thread) 1014{ 1015 kern_return_t result; 1016 thread_t thread; 1017 1018 if (task == TASK_NULL || task == kernel_task) 1019 return (KERN_INVALID_ARGUMENT); 1020 1021 result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread); 1022 if (result != KERN_SUCCESS) 1023 return (result); 1024 1025 thread->user_stop_count = 1; 1026 thread_hold(thread); 1027 if (task->suspend_count > 0) 1028 thread_hold(thread); 1029 1030 task_unlock(task); 1031 lck_mtx_unlock(&tasks_threads_lock); 1032 1033 *new_thread = thread; 1034 1035 return (KERN_SUCCESS); 1036} 1037 1038/* 1039 * kernel_thread_create: 1040 * 1041 * Create a thread in the kernel task 1042 * to execute in kernel context. 1043 */ 1044kern_return_t 1045kernel_thread_create( 1046 thread_continue_t continuation, 1047 void *parameter, 1048 integer_t priority, 1049 thread_t *new_thread) 1050{ 1051 kern_return_t result; 1052 thread_t thread; 1053 task_t task = kernel_task; 1054 1055 result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread); 1056 if (result != KERN_SUCCESS) 1057 return (result); 1058 1059 task_unlock(task); 1060 lck_mtx_unlock(&tasks_threads_lock); 1061 1062 stack_alloc(thread); 1063 assert(thread->kernel_stack != 0); 1064 thread->reserved_stack = thread->kernel_stack; 1065 1066 thread->parameter = parameter; 1067 1068if(debug_task & 1) 1069 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation); 1070 *new_thread = thread; 1071 1072 return (result); 1073} 1074 1075kern_return_t 1076kernel_thread_start_priority( 1077 thread_continue_t continuation, 1078 void *parameter, 1079 integer_t priority, 1080 thread_t *new_thread) 1081{ 1082 kern_return_t result; 1083 thread_t thread; 1084 1085 result = kernel_thread_create(continuation, parameter, priority, &thread); 1086 if (result != KERN_SUCCESS) 1087 return (result); 1088 1089 *new_thread = thread; 1090 1091 thread_mtx_lock(thread); 1092 thread_start_internal(thread); 1093 thread_mtx_unlock(thread); 1094 1095 return (result); 1096} 1097 1098kern_return_t 1099kernel_thread_start( 1100 thread_continue_t continuation, 1101 void *parameter, 1102 thread_t *new_thread) 1103{ 1104 return kernel_thread_start_priority(continuation, parameter, -1, new_thread); 1105} 1106 1107 1108kern_return_t 1109thread_info_internal( 1110 register thread_t thread, 1111 thread_flavor_t flavor, 1112 thread_info_t thread_info_out, /* ptr to OUT array */ 1113 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/ 1114{ 1115 int state, flags; 1116 spl_t s; 1117 1118 if (thread == THREAD_NULL) 1119 return (KERN_INVALID_ARGUMENT); 1120 1121 if (flavor == THREAD_BASIC_INFO) { 1122 register thread_basic_info_t basic_info; 1123 1124 if (*thread_info_count < THREAD_BASIC_INFO_COUNT) 1125 return (KERN_INVALID_ARGUMENT); 1126 1127 basic_info = (thread_basic_info_t) thread_info_out; 1128 1129 s = splsched(); 1130 thread_lock(thread); 1131 1132 /* fill in info */ 1133 1134 thread_read_times(thread, &basic_info->user_time, 1135 &basic_info->system_time); 1136 1137 /* 1138 * Update lazy-evaluated scheduler info because someone wants it. 1139 */ 1140 if (SCHED(can_update_priority)(thread)) 1141 SCHED(update_priority)(thread); 1142 1143 basic_info->sleep_time = 0; 1144 1145 /* 1146 * To calculate cpu_usage, first correct for timer rate, 1147 * then for 5/8 ageing. The correction factor [3/5] is 1148 * (1/(5/8) - 1). 1149 */ 1150 basic_info->cpu_usage = 0; 1151#if defined(CONFIG_SCHED_TRADITIONAL) 1152 if (sched_tick_interval) { 1153 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage 1154 * TH_USAGE_SCALE) / sched_tick_interval); 1155 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; 1156 } 1157#endif 1158 1159 if (basic_info->cpu_usage > TH_USAGE_SCALE) 1160 basic_info->cpu_usage = TH_USAGE_SCALE; 1161 1162 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)? 1163 POLICY_TIMESHARE: POLICY_RR); 1164 1165 flags = 0; 1166 if (thread->options & TH_OPT_IDLE_THREAD) 1167 flags |= TH_FLAGS_IDLE; 1168 1169 if (!thread->kernel_stack) 1170 flags |= TH_FLAGS_SWAPPED; 1171 1172 state = 0; 1173 if (thread->state & TH_TERMINATE) 1174 state = TH_STATE_HALTED; 1175 else 1176 if (thread->state & TH_RUN) 1177 state = TH_STATE_RUNNING; 1178 else 1179 if (thread->state & TH_UNINT) 1180 state = TH_STATE_UNINTERRUPTIBLE; 1181 else 1182 if (thread->state & TH_SUSP) 1183 state = TH_STATE_STOPPED; 1184 else 1185 if (thread->state & TH_WAIT) 1186 state = TH_STATE_WAITING; 1187 1188 basic_info->run_state = state; 1189 basic_info->flags = flags; 1190 1191 basic_info->suspend_count = thread->user_stop_count; 1192 1193 thread_unlock(thread); 1194 splx(s); 1195 1196 *thread_info_count = THREAD_BASIC_INFO_COUNT; 1197 1198 return (KERN_SUCCESS); 1199 } 1200 else 1201 if (flavor == THREAD_IDENTIFIER_INFO) { 1202 register thread_identifier_info_t identifier_info; 1203 1204 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) 1205 return (KERN_INVALID_ARGUMENT); 1206 1207 identifier_info = (thread_identifier_info_t) thread_info_out; 1208 1209 s = splsched(); 1210 thread_lock(thread); 1211 1212 identifier_info->thread_id = thread->thread_id; 1213 identifier_info->thread_handle = thread->machine.cthread_self; 1214 if(thread->task->bsd_info) { 1215 identifier_info->dispatch_qaddr = identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); 1216 } else { 1217 thread_unlock(thread); 1218 splx(s); 1219 return KERN_INVALID_ARGUMENT; 1220 } 1221 1222 thread_unlock(thread); 1223 splx(s); 1224 return KERN_SUCCESS; 1225 } 1226 else 1227 if (flavor == THREAD_SCHED_TIMESHARE_INFO) { 1228 policy_timeshare_info_t ts_info; 1229 1230 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT) 1231 return (KERN_INVALID_ARGUMENT); 1232 1233 ts_info = (policy_timeshare_info_t)thread_info_out; 1234 1235 s = splsched(); 1236 thread_lock(thread); 1237 1238 if (thread->sched_mode != TH_MODE_TIMESHARE) { 1239 thread_unlock(thread); 1240 splx(s); 1241 1242 return (KERN_INVALID_POLICY); 1243 } 1244 1245 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0; 1246 if (ts_info->depressed) { 1247 ts_info->base_priority = DEPRESSPRI; 1248 ts_info->depress_priority = thread->priority; 1249 } 1250 else { 1251 ts_info->base_priority = thread->priority; 1252 ts_info->depress_priority = -1; 1253 } 1254 1255 ts_info->cur_priority = thread->sched_pri; 1256 ts_info->max_priority = thread->max_priority; 1257 1258 thread_unlock(thread); 1259 splx(s); 1260 1261 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT; 1262 1263 return (KERN_SUCCESS); 1264 } 1265 else 1266 if (flavor == THREAD_SCHED_FIFO_INFO) { 1267 if (*thread_info_count < POLICY_FIFO_INFO_COUNT) 1268 return (KERN_INVALID_ARGUMENT); 1269 1270 return (KERN_INVALID_POLICY); 1271 } 1272 else 1273 if (flavor == THREAD_SCHED_RR_INFO) { 1274 policy_rr_info_t rr_info; 1275 uint32_t quantum_time; 1276 uint64_t quantum_ns; 1277 1278 if (*thread_info_count < POLICY_RR_INFO_COUNT) 1279 return (KERN_INVALID_ARGUMENT); 1280 1281 rr_info = (policy_rr_info_t) thread_info_out; 1282 1283 s = splsched(); 1284 thread_lock(thread); 1285 1286 if (thread->sched_mode == TH_MODE_TIMESHARE) { 1287 thread_unlock(thread); 1288 splx(s); 1289 1290 return (KERN_INVALID_POLICY); 1291 } 1292 1293 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0; 1294 if (rr_info->depressed) { 1295 rr_info->base_priority = DEPRESSPRI; 1296 rr_info->depress_priority = thread->priority; 1297 } 1298 else { 1299 rr_info->base_priority = thread->priority; 1300 rr_info->depress_priority = -1; 1301 } 1302 1303 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL); 1304 absolutetime_to_nanoseconds(quantum_time, &quantum_ns); 1305 1306 rr_info->max_priority = thread->max_priority; 1307 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000); 1308 1309 thread_unlock(thread); 1310 splx(s); 1311 1312 *thread_info_count = POLICY_RR_INFO_COUNT; 1313 1314 return (KERN_SUCCESS); 1315 } 1316 1317 return (KERN_INVALID_ARGUMENT); 1318} 1319 1320void 1321thread_read_times( 1322 thread_t thread, 1323 time_value_t *user_time, 1324 time_value_t *system_time) 1325{ 1326 clock_sec_t secs; 1327 clock_usec_t usecs; 1328 uint64_t tval_user, tval_system; 1329 1330 tval_user = timer_grab(&thread->user_timer); 1331 tval_system = timer_grab(&thread->system_timer); 1332 1333 if (thread->precise_user_kernel_time) { 1334 absolutetime_to_microtime(tval_user, &secs, &usecs); 1335 user_time->seconds = (typeof(user_time->seconds))secs; 1336 user_time->microseconds = usecs; 1337 1338 absolutetime_to_microtime(tval_system, &secs, &usecs); 1339 system_time->seconds = (typeof(system_time->seconds))secs; 1340 system_time->microseconds = usecs; 1341 } else { 1342 /* system_timer may represent either sys or user */ 1343 tval_user += tval_system; 1344 absolutetime_to_microtime(tval_user, &secs, &usecs); 1345 user_time->seconds = (typeof(user_time->seconds))secs; 1346 user_time->microseconds = usecs; 1347 1348 system_time->seconds = 0; 1349 system_time->microseconds = 0; 1350 } 1351} 1352 1353kern_return_t 1354thread_assign( 1355 __unused thread_t thread, 1356 __unused processor_set_t new_pset) 1357{ 1358 return (KERN_FAILURE); 1359} 1360 1361/* 1362 * thread_assign_default: 1363 * 1364 * Special version of thread_assign for assigning threads to default 1365 * processor set. 1366 */ 1367kern_return_t 1368thread_assign_default( 1369 thread_t thread) 1370{ 1371 return (thread_assign(thread, &pset0)); 1372} 1373 1374/* 1375 * thread_get_assignment 1376 * 1377 * Return current assignment for this thread. 1378 */ 1379kern_return_t 1380thread_get_assignment( 1381 thread_t thread, 1382 processor_set_t *pset) 1383{ 1384 if (thread == NULL) 1385 return (KERN_INVALID_ARGUMENT); 1386 1387 *pset = &pset0; 1388 1389 return (KERN_SUCCESS); 1390} 1391 1392/* 1393 * thread_wire_internal: 1394 * 1395 * Specify that the target thread must always be able 1396 * to run and to allocate memory. 1397 */ 1398kern_return_t 1399thread_wire_internal( 1400 host_priv_t host_priv, 1401 thread_t thread, 1402 boolean_t wired, 1403 boolean_t *prev_state) 1404{ 1405 if (host_priv == NULL || thread != current_thread()) 1406 return (KERN_INVALID_ARGUMENT); 1407 1408 assert(host_priv == &realhost); 1409 1410 if (prev_state) 1411 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0; 1412 1413 if (wired) { 1414 if (!(thread->options & TH_OPT_VMPRIV)) 1415 vm_page_free_reserve(1); /* XXX */ 1416 thread->options |= TH_OPT_VMPRIV; 1417 } 1418 else { 1419 if (thread->options & TH_OPT_VMPRIV) 1420 vm_page_free_reserve(-1); /* XXX */ 1421 thread->options &= ~TH_OPT_VMPRIV; 1422 } 1423 1424 return (KERN_SUCCESS); 1425} 1426 1427 1428/* 1429 * thread_wire: 1430 * 1431 * User-api wrapper for thread_wire_internal() 1432 */ 1433kern_return_t 1434thread_wire( 1435 host_priv_t host_priv, 1436 thread_t thread, 1437 boolean_t wired) 1438{ 1439 return (thread_wire_internal(host_priv, thread, wired, NULL)); 1440} 1441 1442 1443/* 1444 * XXX assuming current thread only, for now... 1445 */ 1446void 1447thread_guard_violation(thread_t thread, unsigned type) 1448{ 1449 assert(thread == current_thread()); 1450 1451 spl_t s = splsched(); 1452 /* 1453 * Use the saved state area of the thread structure 1454 * to store all info required to handle the AST when 1455 * returning to userspace 1456 */ 1457 thread->guard_exc_info.type = type; 1458 thread_ast_set(thread, AST_GUARD); 1459 ast_propagate(thread->ast); 1460 1461 splx(s); 1462} 1463 1464/* 1465 * guard_ast: 1466 * 1467 * Handle AST_GUARD for a thread. This routine looks at the 1468 * state saved in the thread structure to determine the cause 1469 * of this exception. Based on this value, it invokes the 1470 * appropriate routine which determines other exception related 1471 * info and raises the exception. 1472 */ 1473void 1474guard_ast(thread_t thread) 1475{ 1476 if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT) 1477 mach_port_guard_ast(thread); 1478 else 1479 fd_guard_ast(thread); 1480} 1481 1482static void 1483thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1) 1484{ 1485 if (warning == LEDGER_WARNING_ROSE_ABOVE) { 1486#if CONFIG_TELEMETRY 1487 /* 1488 * This thread is in danger of violating the CPU usage monitor. Enable telemetry 1489 * on the entire task so there are micro-stackshots available if and when 1490 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots 1491 * for this thread only; but now that this task is suspect, knowing what all of 1492 * its threads are up to will be useful. 1493 */ 1494 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1); 1495#endif 1496 return; 1497 } 1498 1499#if CONFIG_TELEMETRY 1500 /* 1501 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or 1502 * exceeded the limit, turn telemetry off for the task. 1503 */ 1504 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0); 1505#endif 1506 1507 if (warning == 0) { 1508 THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(); 1509 } 1510} 1511 1512void __attribute__((noinline)) 1513THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) 1514{ 1515 int pid = 0; 1516 task_t task = current_task(); 1517 thread_t thread = current_thread(); 1518 uint64_t tid = thread->thread_id; 1519 char *procname = (char *) "unknown"; 1520 time_value_t thread_total_time = {0, 0}; 1521 time_value_t thread_system_time; 1522 time_value_t thread_user_time; 1523 int action; 1524 uint8_t percentage; 1525 uint32_t limit_percent; 1526 uint32_t usage_percent; 1527 uint32_t interval_sec; 1528 uint64_t interval_ns; 1529 uint64_t balance_ns; 1530 boolean_t fatal = FALSE; 1531 1532 mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; 1533 struct ledger_entry_info lei; 1534 1535 assert(thread->t_threadledger != LEDGER_NULL); 1536 1537 /* 1538 * Now that a thread has tripped the monitor, disable it for the entire task. 1539 */ 1540 task_lock(task); 1541 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { 1542 /* 1543 * The CPU usage monitor has been disabled on our task, so some other 1544 * thread must have gotten here first. We only send one exception per 1545 * task lifetime, so there's nothing left for us to do here. 1546 */ 1547 task_unlock(task); 1548 return; 1549 } 1550 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) { 1551 fatal = TRUE; 1552 } 1553 task_disable_cpumon(task); 1554 task_unlock(task); 1555 1556#ifdef MACH_BSD 1557 pid = proc_selfpid(); 1558 if (task->bsd_info != NULL) 1559 procname = proc_name_address(task->bsd_info); 1560#endif 1561 1562 thread_get_cpulimit(&action, &percentage, &interval_ns); 1563 1564 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC); 1565 1566 thread_read_times(thread, &thread_user_time, &thread_system_time); 1567 time_value_add(&thread_total_time, &thread_user_time); 1568 time_value_add(&thread_total_time, &thread_system_time); 1569 1570 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei); 1571 1572 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns); 1573 usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill); 1574 1575 /* Show refill period in the same units as balance, limit, etc */ 1576 nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period); 1577 1578 limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period); 1579 1580 /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */ 1581 1582 if (disable_exc_resource) { 1583 printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " 1584 "supressed by a boot-arg\n", procname, pid, tid); 1585 return; 1586 } 1587 1588 if (audio_active) { 1589 printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " 1590 "supressed due to audio playback\n", procname, pid, tid); 1591 return; 1592 } 1593 printf("process %s[%d] thread %llu caught burning CPU! " 1594 "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " 1595 "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " 1596 "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) " 1597 "period: %llu time since last refill (ns): %llu \n", 1598 procname, pid, tid, 1599 percentage, usage_percent, interval_sec, 1600 thread_total_time.seconds, thread_total_time.microseconds, 1601 thread_user_time.seconds, thread_user_time.microseconds, 1602 thread_system_time.seconds, thread_system_time.microseconds, 1603 lei.lei_balance, 1604 lei.lei_credit, lei.lei_debit, 1605 lei.lei_limit, limit_percent, 1606 lei.lei_refill_period, lei.lei_last_refill); 1607 1608 1609 code[0] = code[1] = 0; 1610 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU); 1611 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); 1612 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec); 1613 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent); 1614 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent); 1615 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); 1616 1617 if (fatal) { 1618 task_terminate_internal(task); 1619 } 1620} 1621 1622void 1623init_thread_ledgers(void) { 1624 ledger_template_t t; 1625 int idx; 1626 1627 assert(thread_ledger_template == NULL); 1628 1629 if ((t = ledger_template_create("Per-thread ledger")) == NULL) 1630 panic("couldn't create thread ledger template"); 1631 1632 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) { 1633 panic("couldn't create cpu_time entry for thread ledger template"); 1634 } 1635 1636 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) { 1637 panic("couldn't set thread ledger callback for cpu_time entry"); 1638 } 1639 1640 thread_ledgers.cpu_time = idx; 1641 thread_ledger_template = t; 1642} 1643 1644/* 1645 * Returns currently applied CPU usage limit, or 0/0 if none is applied. 1646 */ 1647int 1648thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns) 1649{ 1650 int64_t abstime = 0; 1651 uint64_t limittime = 0; 1652 thread_t thread = current_thread(); 1653 1654 *percentage = 0; 1655 *interval_ns = 0; 1656 *action = 0; 1657 1658 if (thread->t_threadledger == LEDGER_NULL) { 1659 /* 1660 * This thread has no per-thread ledger, so it can't possibly 1661 * have a CPU limit applied. 1662 */ 1663 return (KERN_SUCCESS); 1664 } 1665 1666 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns); 1667 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime); 1668 1669 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) { 1670 /* 1671 * This thread's CPU time ledger has no period or limit; so it 1672 * doesn't have a CPU limit applied. 1673 */ 1674 return (KERN_SUCCESS); 1675 } 1676 1677 /* 1678 * This calculation is the converse to the one in thread_set_cpulimit(). 1679 */ 1680 absolutetime_to_nanoseconds(abstime, &limittime); 1681 *percentage = (limittime * 100ULL) / *interval_ns; 1682 assert(*percentage <= 100); 1683 1684 if (thread->options & TH_OPT_PROC_CPULIMIT) { 1685 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0); 1686 1687 *action = THREAD_CPULIMIT_BLOCK; 1688 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) { 1689 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0); 1690 1691 *action = THREAD_CPULIMIT_EXCEPTION; 1692 } else { 1693 *action = THREAD_CPULIMIT_DISABLE; 1694 } 1695 1696 return (KERN_SUCCESS); 1697} 1698 1699/* 1700 * Set CPU usage limit on a thread. 1701 * 1702 * Calling with percentage of 0 will unset the limit for this thread. 1703 */ 1704int 1705thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) 1706{ 1707 thread_t thread = current_thread(); 1708 ledger_t l; 1709 uint64_t limittime = 0; 1710 uint64_t abstime = 0; 1711 1712 assert(percentage <= 100); 1713 1714 if (action == THREAD_CPULIMIT_DISABLE) { 1715 /* 1716 * Remove CPU limit, if any exists. 1717 */ 1718 if (thread->t_threadledger != LEDGER_NULL) { 1719 l = thread->t_threadledger; 1720 /* 1721 * The only way to get a per-thread ledger is via CPU limits. 1722 */ 1723 assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)); 1724 thread->t_threadledger = NULL; 1725 ledger_dereference(l); 1726 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT); 1727 } 1728 1729 return (0); 1730 } 1731 1732 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) { 1733 return (KERN_INVALID_ARGUMENT); 1734 } 1735 1736 l = thread->t_threadledger; 1737 if (l == LEDGER_NULL) { 1738 /* 1739 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active. 1740 */ 1741 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) 1742 return (KERN_RESOURCE_SHORTAGE); 1743 1744 /* 1745 * We are the first to create this thread's ledger, so only activate our entry. 1746 */ 1747 ledger_entry_setactive(l, thread_ledgers.cpu_time); 1748 thread->t_threadledger = l; 1749 } 1750 1751 /* 1752 * The limit is specified as a percentage of CPU over an interval in nanoseconds. 1753 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit. 1754 */ 1755 limittime = (interval_ns * percentage) / 100; 1756 nanoseconds_to_absolutetime(limittime, &abstime); 1757 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct); 1758 /* 1759 * Refill the thread's allotted CPU time every interval_ns nanoseconds. 1760 */ 1761 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns); 1762 1763 if (action == THREAD_CPULIMIT_EXCEPTION) { 1764 /* 1765 * We don't support programming the CPU usage monitor on a task if any of its 1766 * threads have a per-thread blocking CPU limit configured. 1767 */ 1768 if (thread->options & TH_OPT_PRVT_CPULIMIT) { 1769 panic("CPU usage monitor activated, but blocking thread limit exists"); 1770 } 1771 1772 /* 1773 * Make a note that this thread's CPU limit is being used for the task-wide CPU 1774 * usage monitor. We don't have to arm the callback which will trigger the 1775 * exception, because that was done for us in ledger_instantiate (because the 1776 * ledger template used has a default callback). 1777 */ 1778 thread->options |= TH_OPT_PROC_CPULIMIT; 1779 } else { 1780 /* 1781 * We deliberately override any CPU limit imposed by a task-wide limit (eg 1782 * CPU usage monitor). 1783 */ 1784 thread->options &= ~TH_OPT_PROC_CPULIMIT; 1785 1786 thread->options |= TH_OPT_PRVT_CPULIMIT; 1787 /* The per-thread ledger template by default has a callback for CPU time */ 1788 ledger_disable_callback(l, thread_ledgers.cpu_time); 1789 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK); 1790 } 1791 1792 return (0); 1793} 1794 1795int split_funnel_off = 0; 1796lck_grp_t *funnel_lck_grp = LCK_GRP_NULL; 1797lck_grp_attr_t *funnel_lck_grp_attr; 1798lck_attr_t *funnel_lck_attr; 1799 1800funnel_t * 1801funnel_alloc( 1802 int type) 1803{ 1804 lck_mtx_t *m; 1805 funnel_t *fnl; 1806 1807 if (funnel_lck_grp == LCK_GRP_NULL) { 1808 funnel_lck_grp_attr = lck_grp_attr_alloc_init(); 1809 1810 funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr); 1811 1812 funnel_lck_attr = lck_attr_alloc_init(); 1813 } 1814 if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){ 1815 bzero((void *)fnl, sizeof(funnel_t)); 1816 if ((m = lck_mtx_alloc_init(funnel_lck_grp, funnel_lck_attr)) == (lck_mtx_t *)NULL) { 1817 kfree(fnl, sizeof(funnel_t)); 1818 return(THR_FUNNEL_NULL); 1819 } 1820 fnl->fnl_mutex = m; 1821 fnl->fnl_type = type; 1822 } 1823 return(fnl); 1824} 1825 1826void 1827funnel_free( 1828 funnel_t * fnl) 1829{ 1830 lck_mtx_free(fnl->fnl_mutex, funnel_lck_grp); 1831 if (fnl->fnl_oldmutex) 1832 lck_mtx_free(fnl->fnl_oldmutex, funnel_lck_grp); 1833 kfree(fnl, sizeof(funnel_t)); 1834} 1835 1836void 1837funnel_lock( 1838 funnel_t * fnl) 1839{ 1840 lck_mtx_lock(fnl->fnl_mutex); 1841 fnl->fnl_mtxholder = current_thread(); 1842} 1843 1844void 1845funnel_unlock( 1846 funnel_t * fnl) 1847{ 1848 lck_mtx_unlock(fnl->fnl_mutex); 1849 fnl->fnl_mtxholder = NULL; 1850 fnl->fnl_mtxrelease = current_thread(); 1851} 1852 1853funnel_t * 1854thread_funnel_get( 1855 void) 1856{ 1857 thread_t th = current_thread(); 1858 1859 if (th->funnel_state & TH_FN_OWNED) { 1860 return(th->funnel_lock); 1861 } 1862 return(THR_FUNNEL_NULL); 1863} 1864 1865boolean_t 1866thread_funnel_set( 1867 funnel_t * fnl, 1868 boolean_t funneled) 1869{ 1870 thread_t cur_thread; 1871 boolean_t funnel_state_prev; 1872 boolean_t intr; 1873 1874 cur_thread = current_thread(); 1875 funnel_state_prev = ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED); 1876 1877 if (funnel_state_prev != funneled) { 1878 intr = ml_set_interrupts_enabled(FALSE); 1879 1880 if (funneled == TRUE) { 1881 if (cur_thread->funnel_lock) 1882 panic("Funnel lock called when holding one %p", cur_thread->funnel_lock); 1883 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, 1884 fnl, 1, 0, 0, 0); 1885 funnel_lock(fnl); 1886 KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE, 1887 fnl, 1, 0, 0, 0); 1888 cur_thread->funnel_state |= TH_FN_OWNED; 1889 cur_thread->funnel_lock = fnl; 1890 } else { 1891 if(cur_thread->funnel_lock->fnl_mutex != fnl->fnl_mutex) 1892 panic("Funnel unlock when not holding funnel"); 1893 cur_thread->funnel_state &= ~TH_FN_OWNED; 1894 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, 1895 fnl, 1, 0, 0, 0); 1896 1897 cur_thread->funnel_lock = THR_FUNNEL_NULL; 1898 funnel_unlock(fnl); 1899 } 1900 (void)ml_set_interrupts_enabled(intr); 1901 } else { 1902 /* if we are trying to acquire funnel recursively 1903 * check for funnel to be held already 1904 */ 1905 if (funneled && (fnl->fnl_mutex != cur_thread->funnel_lock->fnl_mutex)) { 1906 panic("thread_funnel_set: already holding a different funnel"); 1907 } 1908 } 1909 return(funnel_state_prev); 1910} 1911 1912static void 1913sched_call_null( 1914__unused int type, 1915__unused thread_t thread) 1916{ 1917 return; 1918} 1919 1920void 1921thread_sched_call( 1922 thread_t thread, 1923 sched_call_t call) 1924{ 1925 thread->sched_call = (call != NULL)? call: sched_call_null; 1926} 1927 1928void 1929thread_static_param( 1930 thread_t thread, 1931 boolean_t state) 1932{ 1933 thread_mtx_lock(thread); 1934 thread->static_param = state; 1935 thread_mtx_unlock(thread); 1936} 1937 1938uint64_t 1939thread_tid( 1940 thread_t thread) 1941{ 1942 return (thread != THREAD_NULL? thread->thread_id: 0); 1943} 1944 1945uint16_t thread_set_tag(thread_t th, uint16_t tag) { 1946 return thread_set_tag_internal(th, tag); 1947} 1948uint16_t thread_get_tag(thread_t th) { 1949 return thread_get_tag_internal(th); 1950} 1951 1952uint64_t 1953thread_dispatchqaddr( 1954 thread_t thread) 1955{ 1956 uint64_t dispatchqueue_addr = 0; 1957 uint64_t thread_handle = 0; 1958 1959 if (thread != THREAD_NULL) { 1960 thread_handle = thread->machine.cthread_self; 1961 1962 if (thread->task->bsd_info) 1963 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); 1964 } 1965 1966 return (dispatchqueue_addr); 1967} 1968 1969/* 1970 * Export routines to other components for things that are done as macros 1971 * within the osfmk component. 1972 */ 1973 1974#undef thread_reference 1975void thread_reference(thread_t thread); 1976void 1977thread_reference( 1978 thread_t thread) 1979{ 1980 if (thread != THREAD_NULL) 1981 thread_reference_internal(thread); 1982} 1983 1984#undef thread_should_halt 1985 1986boolean_t 1987thread_should_halt( 1988 thread_t th) 1989{ 1990 return (thread_should_halt_fast(th)); 1991} 1992 1993#if CONFIG_DTRACE 1994uint32_t dtrace_get_thread_predcache(thread_t thread) 1995{ 1996 if (thread != THREAD_NULL) 1997 return thread->t_dtrace_predcache; 1998 else 1999 return 0; 2000} 2001 2002int64_t dtrace_get_thread_vtime(thread_t thread) 2003{ 2004 if (thread != THREAD_NULL) 2005 return thread->t_dtrace_vtime; 2006 else 2007 return 0; 2008} 2009 2010int64_t dtrace_get_thread_tracing(thread_t thread) 2011{ 2012 if (thread != THREAD_NULL) 2013 return thread->t_dtrace_tracing; 2014 else 2015 return 0; 2016} 2017 2018boolean_t dtrace_get_thread_reentering(thread_t thread) 2019{ 2020 if (thread != THREAD_NULL) 2021 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE; 2022 else 2023 return 0; 2024} 2025 2026vm_offset_t dtrace_get_kernel_stack(thread_t thread) 2027{ 2028 if (thread != THREAD_NULL) 2029 return thread->kernel_stack; 2030 else 2031 return 0; 2032} 2033 2034int64_t dtrace_calc_thread_recent_vtime(thread_t thread) 2035{ 2036 if (thread != THREAD_NULL) { 2037 processor_t processor = current_processor(); 2038 uint64_t abstime = mach_absolute_time(); 2039 timer_t timer; 2040 2041 timer = PROCESSOR_DATA(processor, thread_timer); 2042 2043 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) + 2044 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */ 2045 } else 2046 return 0; 2047} 2048 2049void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache) 2050{ 2051 if (thread != THREAD_NULL) 2052 thread->t_dtrace_predcache = predcache; 2053} 2054 2055void dtrace_set_thread_vtime(thread_t thread, int64_t vtime) 2056{ 2057 if (thread != THREAD_NULL) 2058 thread->t_dtrace_vtime = vtime; 2059} 2060 2061void dtrace_set_thread_tracing(thread_t thread, int64_t accum) 2062{ 2063 if (thread != THREAD_NULL) 2064 thread->t_dtrace_tracing = accum; 2065} 2066 2067void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool) 2068{ 2069 if (thread != THREAD_NULL) { 2070 if (vbool) 2071 thread->options |= TH_OPT_DTRACE; 2072 else 2073 thread->options &= (~TH_OPT_DTRACE); 2074 } 2075} 2076 2077vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover) 2078{ 2079 vm_offset_t prev = 0; 2080 2081 if (thread != THREAD_NULL) { 2082 prev = thread->recover; 2083 thread->recover = recover; 2084 } 2085 return prev; 2086} 2087 2088void dtrace_thread_bootstrap(void) 2089{ 2090 task_t task = current_task(); 2091 2092 if (task->thread_count == 1) { 2093 thread_t thread = current_thread(); 2094 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) { 2095 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS; 2096 DTRACE_PROC(exec__success); 2097 } 2098 DTRACE_PROC(start); 2099 } 2100 DTRACE_PROC(lwp__start); 2101 2102} 2103 2104void 2105dtrace_thread_didexec(thread_t thread) 2106{ 2107 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS; 2108} 2109#endif /* CONFIG_DTRACE */ 2110