1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_FREE_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: sched_prim.c 60 * Author: Avadis Tevanian, Jr. 61 * Date: 1986 62 * 63 * Scheduling primitives 64 * 65 */ 66 67#include <debug.h> 68 69#include <mach/mach_types.h> 70#include <mach/machine.h> 71#include <mach/policy.h> 72#include <mach/sync_policy.h> 73#include <mach/thread_act.h> 74 75#include <machine/machine_routines.h> 76#include <machine/sched_param.h> 77#include <machine/machine_cpu.h> 78#include <machine/machlimits.h> 79 80#include <kern/kern_types.h> 81#include <kern/clock.h> 82#include <kern/counters.h> 83#include <kern/cpu_number.h> 84#include <kern/cpu_data.h> 85#include <kern/debug.h> 86#include <kern/lock.h> 87#include <kern/macro_help.h> 88#include <kern/machine.h> 89#include <kern/misc_protos.h> 90#include <kern/processor.h> 91#include <kern/queue.h> 92#include <kern/sched.h> 93#include <kern/sched_prim.h> 94#include <kern/syscall_subr.h> 95#include <kern/task.h> 96#include <kern/thread.h> 97#include <kern/wait_queue.h> 98#include <kern/ledger.h> 99#include <kern/timer_queue.h> 100 101#include <vm/pmap.h> 102#include <vm/vm_kern.h> 103#include <vm/vm_map.h> 104 105#include <mach/sdt.h> 106 107#include <sys/kdebug.h> 108 109#include <kern/pms.h> 110 111struct rt_queue rt_runq; 112#define RT_RUNQ ((processor_t)-1) 113decl_simple_lock_data(static,rt_lock); 114 115#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) 116static struct fairshare_queue fs_runq; 117#define FS_RUNQ ((processor_t)-2) 118decl_simple_lock_data(static,fs_lock); 119#endif 120 121#define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ 122int default_preemption_rate = DEFAULT_PREEMPTION_RATE; 123 124#define DEFAULT_BG_PREEMPTION_RATE 400 /* (1/s) */ 125int default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE; 126 127#define MAX_UNSAFE_QUANTA 800 128int max_unsafe_quanta = MAX_UNSAFE_QUANTA; 129 130#define MAX_POLL_QUANTA 2 131int max_poll_quanta = MAX_POLL_QUANTA; 132 133#define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */ 134int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT; 135 136uint64_t max_poll_computation; 137 138uint64_t max_unsafe_computation; 139uint64_t sched_safe_duration; 140 141#if defined(CONFIG_SCHED_TRADITIONAL) 142 143uint32_t std_quantum; 144uint32_t min_std_quantum; 145uint32_t bg_quantum; 146 147uint32_t std_quantum_us; 148uint32_t bg_quantum_us; 149 150#endif /* CONFIG_SCHED_TRADITIONAL */ 151 152uint32_t thread_depress_time; 153uint32_t default_timeshare_computation; 154uint32_t default_timeshare_constraint; 155 156uint32_t max_rt_quantum; 157uint32_t min_rt_quantum; 158 159#if defined(CONFIG_SCHED_TRADITIONAL) 160 161unsigned sched_tick; 162uint32_t sched_tick_interval; 163 164uint32_t sched_pri_shift = INT8_MAX; 165uint32_t sched_background_pri_shift = INT8_MAX; 166uint32_t sched_combined_fgbg_pri_shift = INT8_MAX; 167uint32_t sched_fixed_shift; 168uint32_t sched_use_combined_fgbg_decay = 0; 169 170uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */ 171 172static boolean_t sched_traditional_use_pset_runqueue = FALSE; 173 174/* Defaults for timer deadline profiling */ 175#define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <= 176 * 2ms */ 177#define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines 178 <= 5ms */ 179 180uint64_t timer_deadline_tracking_bin_1; 181uint64_t timer_deadline_tracking_bin_2; 182 183thread_t sched_maintenance_thread; 184 185__attribute__((always_inline)) 186static inline run_queue_t runq_for_processor(processor_t processor) 187{ 188 if (sched_traditional_use_pset_runqueue) 189 return &processor->processor_set->pset_runq; 190 else 191 return &processor->runq; 192} 193 194__attribute__((always_inline)) 195static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread) 196{ 197 if (thread->bound_processor == PROCESSOR_NULL) 198 return; 199 200 assert(thread->bound_processor == processor); 201 202 if (sched_traditional_use_pset_runqueue) 203 processor->processor_set->pset_runq_bound_count++; 204 205 processor->runq_bound_count++; 206} 207 208__attribute__((always_inline)) 209static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread) 210{ 211 if (thread->bound_processor == PROCESSOR_NULL) 212 return; 213 214 assert(thread->bound_processor == processor); 215 216 if (sched_traditional_use_pset_runqueue) 217 processor->processor_set->pset_runq_bound_count--; 218 219 processor->runq_bound_count--; 220} 221 222#endif /* CONFIG_SCHED_TRADITIONAL */ 223 224uint64_t sched_one_second_interval; 225 226uint32_t sched_run_count, sched_share_count, sched_background_count; 227uint32_t sched_load_average, sched_mach_factor; 228 229/* Forwards */ 230 231#if defined(CONFIG_SCHED_TRADITIONAL) 232 233static void load_shift_init(void); 234static void preempt_pri_init(void); 235 236#endif /* CONFIG_SCHED_TRADITIONAL */ 237 238static thread_t thread_select( 239 thread_t thread, 240 processor_t processor); 241 242#if CONFIG_SCHED_IDLE_IN_PLACE 243static thread_t thread_select_idle( 244 thread_t thread, 245 processor_t processor); 246#endif 247 248thread_t processor_idle( 249 thread_t thread, 250 processor_t processor); 251 252ast_t 253csw_check_locked( processor_t processor, 254 processor_set_t pset); 255 256#if defined(CONFIG_SCHED_TRADITIONAL) 257 258static thread_t steal_thread( 259 processor_set_t pset); 260 261static thread_t steal_thread_disabled( 262 processor_set_t pset) __attribute__((unused)); 263 264 265static thread_t steal_processor_thread( 266 processor_t processor); 267 268static void thread_update_scan(void); 269 270static void processor_setrun( 271 processor_t processor, 272 thread_t thread, 273 integer_t options); 274 275static boolean_t 276processor_enqueue( 277 processor_t processor, 278 thread_t thread, 279 integer_t options); 280 281static boolean_t 282processor_queue_remove( 283 processor_t processor, 284 thread_t thread); 285 286static boolean_t processor_queue_empty(processor_t processor); 287 288static boolean_t priority_is_urgent(int priority); 289 290static ast_t processor_csw_check(processor_t processor); 291 292static boolean_t processor_queue_has_priority(processor_t processor, 293 int priority, 294 boolean_t gte); 295 296static boolean_t should_current_thread_rechoose_processor(processor_t processor); 297 298static int sched_traditional_processor_runq_count(processor_t processor); 299 300static boolean_t sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor); 301 302static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor); 303 304static uint64_t sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor); 305#endif 306 307 308#if defined(CONFIG_SCHED_TRADITIONAL) 309 310static void 311sched_traditional_init(void); 312 313static void 314sched_traditional_timebase_init(void); 315 316static void 317sched_traditional_processor_init(processor_t processor); 318 319static void 320sched_traditional_pset_init(processor_set_t pset); 321 322static void 323sched_traditional_with_pset_runqueue_init(void); 324 325#endif 326 327static void 328sched_realtime_init(void); 329 330static void 331sched_realtime_timebase_init(void); 332 333static void 334sched_timer_deadline_tracking_init(void); 335 336#if defined(CONFIG_SCHED_TRADITIONAL) 337static void 338sched_traditional_maintenance_continue(void); 339 340static uint32_t 341sched_traditional_initial_quantum_size(thread_t thread); 342 343static sched_mode_t 344sched_traditional_initial_thread_sched_mode(task_t parent_task); 345 346static boolean_t 347sched_traditional_supports_timeshare_mode(void); 348 349static thread_t 350sched_traditional_choose_thread( 351 processor_t processor, 352 int priority); 353 354#endif 355 356#if DEBUG 357extern int debug_task; 358#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args) 359#else 360#define TLOG(a, fmt, args...) do {} while (0) 361#endif 362 363#if DEBUG 364static 365boolean_t thread_runnable( 366 thread_t thread); 367 368#endif /*DEBUG*/ 369 370/* 371 * State machine 372 * 373 * states are combinations of: 374 * R running 375 * W waiting (or on wait queue) 376 * N non-interruptible 377 * O swapped out 378 * I being swapped in 379 * 380 * init action 381 * assert_wait thread_block clear_wait swapout swapin 382 * 383 * R RW, RWN R; setrun - - 384 * RN RWN RN; setrun - - 385 * 386 * RW W R - 387 * RWN WN RN - 388 * 389 * W R; setrun WO 390 * WN RN; setrun - 391 * 392 * RO - - R 393 * 394 */ 395 396#if defined(CONFIG_SCHED_TRADITIONAL) 397int8_t sched_load_shifts[NRQS]; 398int sched_preempt_pri[NRQBM]; 399#endif 400 401 402#if defined(CONFIG_SCHED_TRADITIONAL) 403 404const struct sched_dispatch_table sched_traditional_dispatch = { 405 sched_traditional_init, 406 sched_traditional_timebase_init, 407 sched_traditional_processor_init, 408 sched_traditional_pset_init, 409 sched_traditional_maintenance_continue, 410 sched_traditional_choose_thread, 411 steal_thread, 412 compute_priority, 413 choose_processor, 414 processor_enqueue, 415 processor_queue_shutdown, 416 processor_queue_remove, 417 processor_queue_empty, 418 priority_is_urgent, 419 processor_csw_check, 420 processor_queue_has_priority, 421 sched_traditional_initial_quantum_size, 422 sched_traditional_initial_thread_sched_mode, 423 sched_traditional_supports_timeshare_mode, 424 can_update_priority, 425 update_priority, 426 lightweight_update_priority, 427 sched_traditional_quantum_expire, 428 should_current_thread_rechoose_processor, 429 sched_traditional_processor_runq_count, 430 sched_traditional_processor_runq_stats_count_sum, 431 sched_traditional_fairshare_init, 432 sched_traditional_fairshare_runq_count, 433 sched_traditional_fairshare_runq_stats_count_sum, 434 sched_traditional_fairshare_enqueue, 435 sched_traditional_fairshare_dequeue, 436 sched_traditional_fairshare_queue_remove, 437 TRUE /* direct_dispatch_to_idle_processors */ 438}; 439 440const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = { 441 sched_traditional_with_pset_runqueue_init, 442 sched_traditional_timebase_init, 443 sched_traditional_processor_init, 444 sched_traditional_pset_init, 445 sched_traditional_maintenance_continue, 446 sched_traditional_choose_thread, 447 steal_thread, 448 compute_priority, 449 choose_processor, 450 processor_enqueue, 451 processor_queue_shutdown, 452 processor_queue_remove, 453 sched_traditional_with_pset_runqueue_processor_queue_empty, 454 priority_is_urgent, 455 processor_csw_check, 456 processor_queue_has_priority, 457 sched_traditional_initial_quantum_size, 458 sched_traditional_initial_thread_sched_mode, 459 sched_traditional_supports_timeshare_mode, 460 can_update_priority, 461 update_priority, 462 lightweight_update_priority, 463 sched_traditional_quantum_expire, 464 should_current_thread_rechoose_processor, 465 sched_traditional_processor_runq_count, 466 sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum, 467 sched_traditional_fairshare_init, 468 sched_traditional_fairshare_runq_count, 469 sched_traditional_fairshare_runq_stats_count_sum, 470 sched_traditional_fairshare_enqueue, 471 sched_traditional_fairshare_dequeue, 472 sched_traditional_fairshare_queue_remove, 473 FALSE /* direct_dispatch_to_idle_processors */ 474}; 475 476#endif 477 478const struct sched_dispatch_table *sched_current_dispatch = NULL; 479 480/* 481 * Statically allocate a buffer to hold the longest possible 482 * scheduler description string, as currently implemented. 483 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/ 484 * to export to userspace via sysctl(3). If either version 485 * changes, update the other. 486 * 487 * Note that in addition to being an upper bound on the strings 488 * in the kernel, it's also an exact parameter to PE_get_default(), 489 * which interrogates the device tree on some platforms. That 490 * API requires the caller know the exact size of the device tree 491 * property, so we need both a legacy size (32) and the current size 492 * (48) to deal with old and new device trees. The device tree property 493 * is similarly padded to a fixed size so that the same kernel image 494 * can run on multiple devices with different schedulers configured 495 * in the device tree. 496 */ 497#define SCHED_STRING_MAX_LENGTH (48) 498 499char sched_string[SCHED_STRING_MAX_LENGTH]; 500static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown; 501 502/* Global flag which indicates whether Background Stepper Context is enabled */ 503static int cpu_throttle_enabled = 1; 504 505void 506sched_init(void) 507{ 508 char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' }; 509 510 /* Check for runtime selection of the scheduler algorithm */ 511 if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) { 512 /* If no boot-args override, look in device tree */ 513 if (!PE_get_default("kern.sched", sched_arg, 514 SCHED_STRING_MAX_LENGTH)) { 515 sched_arg[0] = '\0'; 516 } 517 } 518 519 if (strlen(sched_arg) > 0) { 520 if (0) { 521 /* Allow pattern below */ 522#if defined(CONFIG_SCHED_TRADITIONAL) 523 } else if (0 == strcmp(sched_arg, kSchedTraditionalString)) { 524 sched_current_dispatch = &sched_traditional_dispatch; 525 _sched_enum = sched_enum_traditional; 526 strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string)); 527 kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalString); 528 } else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) { 529 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; 530 _sched_enum = sched_enum_traditional_with_pset_runqueue; 531 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); 532 kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalWithPsetRunqueueString); 533#endif 534#if defined(CONFIG_SCHED_PROTO) 535 } else if (0 == strcmp(sched_arg, kSchedProtoString)) { 536 sched_current_dispatch = &sched_proto_dispatch; 537 _sched_enum = sched_enum_proto; 538 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); 539 kprintf("Scheduler: Runtime selection of %s\n", kSchedProtoString); 540#endif 541#if defined(CONFIG_SCHED_GRRR) 542 } else if (0 == strcmp(sched_arg, kSchedGRRRString)) { 543 sched_current_dispatch = &sched_grrr_dispatch; 544 _sched_enum = sched_enum_grrr; 545 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); 546 kprintf("Scheduler: Runtime selection of %s\n", kSchedGRRRString); 547#endif 548#if defined(CONFIG_SCHED_FIXEDPRIORITY) 549 } else if (0 == strcmp(sched_arg, kSchedFixedPriorityString)) { 550 sched_current_dispatch = &sched_fixedpriority_dispatch; 551 _sched_enum = sched_enum_fixedpriority; 552 strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string)); 553 kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityString); 554 } else if (0 == strcmp(sched_arg, kSchedFixedPriorityWithPsetRunqueueString)) { 555 sched_current_dispatch = &sched_fixedpriority_with_pset_runqueue_dispatch; 556 _sched_enum = sched_enum_fixedpriority_with_pset_runqueue; 557 strlcpy(sched_string, kSchedFixedPriorityWithPsetRunqueueString, sizeof(sched_string)); 558 kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityWithPsetRunqueueString); 559#endif 560 } else { 561 panic("Unrecognized scheduler algorithm: %s", sched_arg); 562 } 563 } else { 564#if defined(CONFIG_SCHED_TRADITIONAL) 565 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; 566 _sched_enum = sched_enum_traditional_with_pset_runqueue; 567 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); 568 kprintf("Scheduler: Default of %s\n", kSchedTraditionalWithPsetRunqueueString); 569#elif defined(CONFIG_SCHED_PROTO) 570 sched_current_dispatch = &sched_proto_dispatch; 571 _sched_enum = sched_enum_proto; 572 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); 573 kprintf("Scheduler: Default of %s\n", kSchedProtoString); 574#elif defined(CONFIG_SCHED_GRRR) 575 sched_current_dispatch = &sched_grrr_dispatch; 576 _sched_enum = sched_enum_grrr; 577 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); 578 kprintf("Scheduler: Default of %s\n", kSchedGRRRString); 579#elif defined(CONFIG_SCHED_FIXEDPRIORITY) 580 sched_current_dispatch = &sched_fixedpriority_dispatch; 581 _sched_enum = sched_enum_fixedpriority; 582 strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string)); 583 kprintf("Scheduler: Default of %s\n", kSchedFixedPriorityString); 584#else 585#error No default scheduler implementation 586#endif 587 } 588 589 SCHED(init)(); 590 SCHED(fairshare_init)(); 591 sched_realtime_init(); 592 ast_init(); 593 sched_timer_deadline_tracking_init(); 594 595 SCHED(pset_init)(&pset0); 596 SCHED(processor_init)(master_processor); 597} 598 599void 600sched_timebase_init(void) 601{ 602 uint64_t abstime; 603 604 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime); 605 sched_one_second_interval = abstime; 606 607 SCHED(timebase_init)(); 608 sched_realtime_timebase_init(); 609} 610 611#if defined(CONFIG_SCHED_TRADITIONAL) 612 613static void 614sched_traditional_init(void) 615{ 616 /* 617 * Calculate the timeslicing quantum 618 * in us. 619 */ 620 if (default_preemption_rate < 1) 621 default_preemption_rate = DEFAULT_PREEMPTION_RATE; 622 std_quantum_us = (1000 * 1000) / default_preemption_rate; 623 624 printf("standard timeslicing quantum is %d us\n", std_quantum_us); 625 626 if (default_bg_preemption_rate < 1) 627 default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE; 628 bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate; 629 630 printf("standard background quantum is %d us\n", bg_quantum_us); 631 632 load_shift_init(); 633 preempt_pri_init(); 634 sched_tick = 0; 635} 636 637static void 638sched_traditional_timebase_init(void) 639{ 640 uint64_t abstime; 641 uint32_t shift; 642 643 /* standard timeslicing quantum */ 644 clock_interval_to_absolutetime_interval( 645 std_quantum_us, NSEC_PER_USEC, &abstime); 646 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 647 std_quantum = (uint32_t)abstime; 648 649 /* smallest remaining quantum (250 us) */ 650 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime); 651 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 652 min_std_quantum = (uint32_t)abstime; 653 654 /* quantum for background tasks */ 655 clock_interval_to_absolutetime_interval( 656 bg_quantum_us, NSEC_PER_USEC, &abstime); 657 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 658 bg_quantum = (uint32_t)abstime; 659 660 /* scheduler tick interval */ 661 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT, 662 NSEC_PER_USEC, &abstime); 663 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 664 sched_tick_interval = (uint32_t)abstime; 665 666 /* 667 * Compute conversion factor from usage to 668 * timesharing priorities with 5/8 ** n aging. 669 */ 670 abstime = (abstime * 5) / 3; 671 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift) 672 abstime >>= 1; 673 sched_fixed_shift = shift; 674 675 max_unsafe_computation = max_unsafe_quanta * std_quantum; 676 sched_safe_duration = 2 * max_unsafe_quanta * std_quantum; 677 678 max_poll_computation = max_poll_quanta * std_quantum; 679 thread_depress_time = 1 * std_quantum; 680 default_timeshare_computation = std_quantum / 2; 681 default_timeshare_constraint = std_quantum; 682 683} 684 685static void 686sched_traditional_processor_init(processor_t processor) 687{ 688 if (!sched_traditional_use_pset_runqueue) { 689 run_queue_init(&processor->runq); 690 } 691 processor->runq_bound_count = 0; 692} 693 694static void 695sched_traditional_pset_init(processor_set_t pset) 696{ 697 if (sched_traditional_use_pset_runqueue) { 698 run_queue_init(&pset->pset_runq); 699 } 700 pset->pset_runq_bound_count = 0; 701} 702 703static void 704sched_traditional_with_pset_runqueue_init(void) 705{ 706 sched_traditional_init(); 707 sched_traditional_use_pset_runqueue = TRUE; 708} 709 710#endif /* CONFIG_SCHED_TRADITIONAL */ 711 712#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) 713void 714sched_traditional_fairshare_init(void) 715{ 716 simple_lock_init(&fs_lock, 0); 717 718 fs_runq.count = 0; 719 queue_init(&fs_runq.queue); 720} 721#endif 722 723static void 724sched_realtime_init(void) 725{ 726 simple_lock_init(&rt_lock, 0); 727 728 rt_runq.count = 0; 729 queue_init(&rt_runq.queue); 730} 731 732static void 733sched_realtime_timebase_init(void) 734{ 735 uint64_t abstime; 736 737 /* smallest rt computaton (50 us) */ 738 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime); 739 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 740 min_rt_quantum = (uint32_t)abstime; 741 742 /* maximum rt computation (50 ms) */ 743 clock_interval_to_absolutetime_interval( 744 50, 1000*NSEC_PER_USEC, &abstime); 745 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 746 max_rt_quantum = (uint32_t)abstime; 747 748} 749 750#if defined(CONFIG_SCHED_TRADITIONAL) 751 752/* 753 * Set up values for timeshare 754 * loading factors. 755 */ 756static void 757load_shift_init(void) 758{ 759 int8_t k, *p = sched_load_shifts; 760 uint32_t i, j; 761 762 uint32_t sched_decay_penalty = 1; 763 764 if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) { 765 kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty); 766 } 767 768 if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) { 769 kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor); 770 } 771 772 if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) { 773 kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay); 774 } 775 776 if (sched_decay_penalty == 0) { 777 /* 778 * There is no penalty for timeshare threads for using too much 779 * CPU, so set all load shifts to INT8_MIN. Even under high load, 780 * sched_pri_shift will be >INT8_MAX, and there will be no 781 * penalty applied to threads (nor will sched_usage be updated per 782 * thread). 783 */ 784 for (i = 0; i < NRQS; i++) { 785 sched_load_shifts[i] = INT8_MIN; 786 } 787 788 return; 789 } 790 791 *p++ = INT8_MIN; *p++ = 0; 792 793 /* 794 * For a given system load "i", the per-thread priority 795 * penalty per quantum of CPU usage is ~2^k priority 796 * levels. "sched_decay_penalty" can cause more 797 * array entries to be filled with smaller "k" values 798 */ 799 for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) { 800 for (j <<= 1; (i < j) && (i < NRQS); ++i) 801 *p++ = k; 802 } 803} 804 805static void 806preempt_pri_init(void) 807{ 808 int i, *p = sched_preempt_pri; 809 810 for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i) 811 setbit(i, p); 812 813 for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i) 814 setbit(i, p); 815} 816 817#endif /* CONFIG_SCHED_TRADITIONAL */ 818 819/* 820 * Thread wait timer expiration. 821 */ 822void 823thread_timer_expire( 824 void *p0, 825 __unused void *p1) 826{ 827 thread_t thread = p0; 828 spl_t s; 829 830 s = splsched(); 831 thread_lock(thread); 832 if (--thread->wait_timer_active == 0) { 833 if (thread->wait_timer_is_set) { 834 thread->wait_timer_is_set = FALSE; 835 clear_wait_internal(thread, THREAD_TIMED_OUT); 836 } 837 } 838 thread_unlock(thread); 839 splx(s); 840} 841 842/* 843 * thread_unblock: 844 * 845 * Unblock thread on wake up. 846 * 847 * Returns TRUE if the thread is still running. 848 * 849 * Thread must be locked. 850 */ 851boolean_t 852thread_unblock( 853 thread_t thread, 854 wait_result_t wresult) 855{ 856 boolean_t result = FALSE; 857 thread_t cthread = current_thread(); 858 859 /* 860 * Set wait_result. 861 */ 862 thread->wait_result = wresult; 863 864 /* 865 * Cancel pending wait timer. 866 */ 867 if (thread->wait_timer_is_set) { 868 if (timer_call_cancel(&thread->wait_timer)) 869 thread->wait_timer_active--; 870 thread->wait_timer_is_set = FALSE; 871 } 872 873 /* 874 * Update scheduling state: not waiting, 875 * set running. 876 */ 877 thread->state &= ~(TH_WAIT|TH_UNINT); 878 879 if (!(thread->state & TH_RUN)) { 880 thread->state |= TH_RUN; 881 882 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread); 883 884 /* 885 * Update run counts. 886 */ 887 sched_run_incr(); 888 if (thread->sched_mode == TH_MODE_TIMESHARE) { 889 sched_share_incr(); 890 891 if (thread->max_priority <= MAXPRI_THROTTLE) 892 sched_background_incr(); 893 } 894 } 895 else { 896 /* 897 * Signal if idling on another processor. 898 */ 899#if CONFIG_SCHED_IDLE_IN_PLACE 900 if (thread->state & TH_IDLE) { 901 processor_t processor = thread->last_processor; 902 903 if (processor != current_processor()) 904 machine_signal_idle(processor); 905 } 906#else 907 assert((thread->state & TH_IDLE) == 0); 908#endif 909 910 result = TRUE; 911 } 912 913 /* 914 * Calculate deadline for real-time threads. 915 */ 916 if (thread->sched_mode == TH_MODE_REALTIME) { 917 thread->realtime.deadline = thread->realtime.constraint + mach_absolute_time(); 918 } 919 920 /* 921 * Clear old quantum, fail-safe computation, etc. 922 */ 923 thread->current_quantum = 0; 924 thread->computation_metered = 0; 925 thread->reason = AST_NONE; 926 927 /* Obtain power-relevant interrupt and "platform-idle exit" statistics. 928 * We also account for "double hop" thread signaling via 929 * the thread callout infrastructure. 930 * DRK: consider removing the callout wakeup counters in the future 931 * they're present for verification at the moment. 932 */ 933 boolean_t aticontext, pidle; 934 ml_get_power_state(&aticontext, &pidle); 935 936 if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) { 937 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); 938 DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); 939 940 uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd); 941 942 if (ttd) { 943 if (ttd <= timer_deadline_tracking_bin_1) 944 thread->thread_timer_wakeups_bin_1++; 945 else 946 if (ttd <= timer_deadline_tracking_bin_2) 947 thread->thread_timer_wakeups_bin_2++; 948 } 949 950 if (pidle) { 951 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); 952 } 953 954 } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) { 955 if (cthread->callout_woken_from_icontext) { 956 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); 957 thread->thread_callout_interrupt_wakeups++; 958 if (cthread->callout_woken_from_platform_idle) { 959 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); 960 thread->thread_callout_platform_idle_wakeups++; 961 } 962 963 cthread->callout_woke_thread = TRUE; 964 } 965 } 966 967 if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) { 968 thread->callout_woken_from_icontext = aticontext; 969 thread->callout_woken_from_platform_idle = pidle; 970 thread->callout_woke_thread = FALSE; 971 } 972 973 /* Event should only be triggered if thread is not already running */ 974 if (result == FALSE) { 975 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 976 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, 977 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, 0, 0); 978 } 979 980 DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); 981 982 return (result); 983} 984 985/* 986 * Routine: thread_go 987 * Purpose: 988 * Unblock and dispatch thread. 989 * Conditions: 990 * thread lock held, IPC locks may be held. 991 * thread must have been pulled from wait queue under same lock hold. 992 * Returns: 993 * KERN_SUCCESS - Thread was set running 994 * KERN_NOT_WAITING - Thread was not waiting 995 */ 996kern_return_t 997thread_go( 998 thread_t thread, 999 wait_result_t wresult) 1000{ 1001 assert(thread->at_safe_point == FALSE); 1002 assert(thread->wait_event == NO_EVENT64); 1003 assert(thread->wait_queue == WAIT_QUEUE_NULL); 1004 1005 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) { 1006 if (!thread_unblock(thread, wresult)) 1007 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 1008 1009 return (KERN_SUCCESS); 1010 } 1011 1012 return (KERN_NOT_WAITING); 1013} 1014 1015/* 1016 * Routine: thread_mark_wait_locked 1017 * Purpose: 1018 * Mark a thread as waiting. If, given the circumstances, 1019 * it doesn't want to wait (i.e. already aborted), then 1020 * indicate that in the return value. 1021 * Conditions: 1022 * at splsched() and thread is locked. 1023 */ 1024__private_extern__ 1025wait_result_t 1026thread_mark_wait_locked( 1027 thread_t thread, 1028 wait_interrupt_t interruptible) 1029{ 1030 boolean_t at_safe_point; 1031 1032 assert(thread == current_thread()); 1033 1034 /* 1035 * The thread may have certain types of interrupts/aborts masked 1036 * off. Even if the wait location says these types of interrupts 1037 * are OK, we have to honor mask settings (outer-scoped code may 1038 * not be able to handle aborts at the moment). 1039 */ 1040 if (interruptible > (thread->options & TH_OPT_INTMASK)) 1041 interruptible = thread->options & TH_OPT_INTMASK; 1042 1043 at_safe_point = (interruptible == THREAD_ABORTSAFE); 1044 1045 if ( interruptible == THREAD_UNINT || 1046 !(thread->sched_flags & TH_SFLAG_ABORT) || 1047 (!at_safe_point && 1048 (thread->sched_flags & TH_SFLAG_ABORTSAFELY))) { 1049 1050 if ( !(thread->state & TH_TERMINATE)) 1051 DTRACE_SCHED(sleep); 1052 1053 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT); 1054 thread->at_safe_point = at_safe_point; 1055 return (thread->wait_result = THREAD_WAITING); 1056 } 1057 else 1058 if (thread->sched_flags & TH_SFLAG_ABORTSAFELY) 1059 thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK; 1060 1061 return (thread->wait_result = THREAD_INTERRUPTED); 1062} 1063 1064/* 1065 * Routine: thread_interrupt_level 1066 * Purpose: 1067 * Set the maximum interruptible state for the 1068 * current thread. The effective value of any 1069 * interruptible flag passed into assert_wait 1070 * will never exceed this. 1071 * 1072 * Useful for code that must not be interrupted, 1073 * but which calls code that doesn't know that. 1074 * Returns: 1075 * The old interrupt level for the thread. 1076 */ 1077__private_extern__ 1078wait_interrupt_t 1079thread_interrupt_level( 1080 wait_interrupt_t new_level) 1081{ 1082 thread_t thread = current_thread(); 1083 wait_interrupt_t result = thread->options & TH_OPT_INTMASK; 1084 1085 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK); 1086 1087 return result; 1088} 1089 1090/* 1091 * Check to see if an assert wait is possible, without actually doing one. 1092 * This is used by debug code in locks and elsewhere to verify that it is 1093 * always OK to block when trying to take a blocking lock (since waiting 1094 * for the actual assert_wait to catch the case may make it hard to detect 1095 * this case. 1096 */ 1097boolean_t 1098assert_wait_possible(void) 1099{ 1100 1101 thread_t thread; 1102 1103#if DEBUG 1104 if(debug_mode) return TRUE; /* Always succeed in debug mode */ 1105#endif 1106 1107 thread = current_thread(); 1108 1109 return (thread == NULL || wait_queue_assert_possible(thread)); 1110} 1111 1112/* 1113 * assert_wait: 1114 * 1115 * Assert that the current thread is about to go to 1116 * sleep until the specified event occurs. 1117 */ 1118wait_result_t 1119assert_wait( 1120 event_t event, 1121 wait_interrupt_t interruptible) 1122{ 1123 register wait_queue_t wq; 1124 register int index; 1125 1126 assert(event != NO_EVENT); 1127 1128 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1129 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1130 VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0); 1131 1132 index = wait_hash(event); 1133 wq = &wait_queues[index]; 1134 return wait_queue_assert_wait(wq, event, interruptible, 0); 1135} 1136 1137wait_result_t 1138assert_wait_timeout( 1139 event_t event, 1140 wait_interrupt_t interruptible, 1141 uint32_t interval, 1142 uint32_t scale_factor) 1143{ 1144 thread_t thread = current_thread(); 1145 wait_result_t wresult; 1146 wait_queue_t wqueue; 1147 uint64_t deadline; 1148 spl_t s; 1149 1150 assert(event != NO_EVENT); 1151 wqueue = &wait_queues[wait_hash(event)]; 1152 1153 s = splsched(); 1154 wait_queue_lock(wqueue); 1155 thread_lock(thread); 1156 1157 clock_interval_to_deadline(interval, scale_factor, &deadline); 1158 1159 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1160 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1161 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1162 1163 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), 1164 interruptible, 1165 TIMEOUT_URGENCY_SYS_NORMAL, 1166 deadline, 0, 1167 thread); 1168 1169 thread_unlock(thread); 1170 wait_queue_unlock(wqueue); 1171 splx(s); 1172 1173 return (wresult); 1174} 1175 1176wait_result_t 1177assert_wait_timeout_with_leeway( 1178 event_t event, 1179 wait_interrupt_t interruptible, 1180 wait_timeout_urgency_t urgency, 1181 uint32_t interval, 1182 uint32_t leeway, 1183 uint32_t scale_factor) 1184{ 1185 thread_t thread = current_thread(); 1186 wait_result_t wresult; 1187 wait_queue_t wqueue; 1188 uint64_t deadline; 1189 uint64_t abstime; 1190 uint64_t slop; 1191 uint64_t now; 1192 spl_t s; 1193 1194 now = mach_absolute_time(); 1195 clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); 1196 deadline = now + abstime; 1197 1198 clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop); 1199 1200 assert(event != NO_EVENT); 1201 wqueue = &wait_queues[wait_hash(event)]; 1202 1203 s = splsched(); 1204 wait_queue_lock(wqueue); 1205 thread_lock(thread); 1206 1207 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1208 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1209 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1210 1211 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), 1212 interruptible, 1213 urgency, deadline, slop, 1214 thread); 1215 1216 thread_unlock(thread); 1217 wait_queue_unlock(wqueue); 1218 splx(s); 1219 1220 return (wresult); 1221} 1222 1223wait_result_t 1224assert_wait_deadline( 1225 event_t event, 1226 wait_interrupt_t interruptible, 1227 uint64_t deadline) 1228{ 1229 thread_t thread = current_thread(); 1230 wait_result_t wresult; 1231 wait_queue_t wqueue; 1232 spl_t s; 1233 1234 assert(event != NO_EVENT); 1235 wqueue = &wait_queues[wait_hash(event)]; 1236 1237 s = splsched(); 1238 wait_queue_lock(wqueue); 1239 thread_lock(thread); 1240 1241 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1242 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1243 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1244 1245 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), 1246 interruptible, 1247 TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0, 1248 thread); 1249 1250 thread_unlock(thread); 1251 wait_queue_unlock(wqueue); 1252 splx(s); 1253 1254 return (wresult); 1255} 1256 1257wait_result_t 1258assert_wait_deadline_with_leeway( 1259 event_t event, 1260 wait_interrupt_t interruptible, 1261 wait_timeout_urgency_t urgency, 1262 uint64_t deadline, 1263 uint64_t leeway) 1264{ 1265 thread_t thread = current_thread(); 1266 wait_result_t wresult; 1267 wait_queue_t wqueue; 1268 spl_t s; 1269 1270 assert(event != NO_EVENT); 1271 wqueue = &wait_queues[wait_hash(event)]; 1272 1273 s = splsched(); 1274 wait_queue_lock(wqueue); 1275 thread_lock(thread); 1276 1277 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1278 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1279 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1280 1281 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), 1282 interruptible, 1283 urgency, deadline, leeway, 1284 thread); 1285 1286 thread_unlock(thread); 1287 wait_queue_unlock(wqueue); 1288 splx(s); 1289 1290 return (wresult); 1291} 1292 1293/* 1294 * thread_sleep_fast_usimple_lock: 1295 * 1296 * Cause the current thread to wait until the specified event 1297 * occurs. The specified simple_lock is unlocked before releasing 1298 * the cpu and re-acquired as part of waking up. 1299 * 1300 * This is the simple lock sleep interface for components that use a 1301 * faster version of simple_lock() than is provided by usimple_lock(). 1302 */ 1303__private_extern__ wait_result_t 1304thread_sleep_fast_usimple_lock( 1305 event_t event, 1306 simple_lock_t lock, 1307 wait_interrupt_t interruptible) 1308{ 1309 wait_result_t res; 1310 1311 res = assert_wait(event, interruptible); 1312 if (res == THREAD_WAITING) { 1313 simple_unlock(lock); 1314 res = thread_block(THREAD_CONTINUE_NULL); 1315 simple_lock(lock); 1316 } 1317 return res; 1318} 1319 1320 1321/* 1322 * thread_sleep_usimple_lock: 1323 * 1324 * Cause the current thread to wait until the specified event 1325 * occurs. The specified usimple_lock is unlocked before releasing 1326 * the cpu and re-acquired as part of waking up. 1327 * 1328 * This is the simple lock sleep interface for components where 1329 * simple_lock() is defined in terms of usimple_lock(). 1330 */ 1331wait_result_t 1332thread_sleep_usimple_lock( 1333 event_t event, 1334 usimple_lock_t lock, 1335 wait_interrupt_t interruptible) 1336{ 1337 wait_result_t res; 1338 1339 res = assert_wait(event, interruptible); 1340 if (res == THREAD_WAITING) { 1341 usimple_unlock(lock); 1342 res = thread_block(THREAD_CONTINUE_NULL); 1343 usimple_lock(lock); 1344 } 1345 return res; 1346} 1347 1348/* 1349 * thread_sleep_lock_write: 1350 * 1351 * Cause the current thread to wait until the specified event 1352 * occurs. The specified (write) lock is unlocked before releasing 1353 * the cpu. The (write) lock will be re-acquired before returning. 1354 */ 1355wait_result_t 1356thread_sleep_lock_write( 1357 event_t event, 1358 lock_t *lock, 1359 wait_interrupt_t interruptible) 1360{ 1361 wait_result_t res; 1362 1363 res = assert_wait(event, interruptible); 1364 if (res == THREAD_WAITING) { 1365 lock_write_done(lock); 1366 res = thread_block(THREAD_CONTINUE_NULL); 1367 lock_write(lock); 1368 } 1369 return res; 1370} 1371 1372/* 1373 * thread_isoncpu: 1374 * 1375 * Return TRUE if a thread is running on a processor such that an AST 1376 * is needed to pull it out of userspace execution, or if executing in 1377 * the kernel, bring to a context switch boundary that would cause 1378 * thread state to be serialized in the thread PCB. 1379 * 1380 * Thread locked, returns the same way. While locked, fields 1381 * like "state" and "runq" cannot change. 1382 */ 1383static inline boolean_t 1384thread_isoncpu(thread_t thread) 1385{ 1386 /* Not running or runnable */ 1387 if (!(thread->state & TH_RUN)) 1388 return (FALSE); 1389 1390 /* Waiting on a runqueue, not currently running */ 1391 if (thread->runq != PROCESSOR_NULL) 1392 return (FALSE); 1393 1394 /* 1395 * Thread must be running on a processor, or 1396 * about to run, or just did run. In all these 1397 * cases, an AST to the processor is needed 1398 * to guarantee that the thread is kicked out 1399 * of userspace and the processor has 1400 * context switched (and saved register state). 1401 */ 1402 return (TRUE); 1403} 1404 1405/* 1406 * thread_stop: 1407 * 1408 * Force a preemption point for a thread and wait 1409 * for it to stop running on a CPU. If a stronger 1410 * guarantee is requested, wait until no longer 1411 * runnable. Arbitrates access among 1412 * multiple stop requests. (released by unstop) 1413 * 1414 * The thread must enter a wait state and stop via a 1415 * separate means. 1416 * 1417 * Returns FALSE if interrupted. 1418 */ 1419boolean_t 1420thread_stop( 1421 thread_t thread, 1422 boolean_t until_not_runnable) 1423{ 1424 wait_result_t wresult; 1425 spl_t s = splsched(); 1426 boolean_t oncpu; 1427 1428 wake_lock(thread); 1429 thread_lock(thread); 1430 1431 while (thread->state & TH_SUSP) { 1432 thread->wake_active = TRUE; 1433 thread_unlock(thread); 1434 1435 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); 1436 wake_unlock(thread); 1437 splx(s); 1438 1439 if (wresult == THREAD_WAITING) 1440 wresult = thread_block(THREAD_CONTINUE_NULL); 1441 1442 if (wresult != THREAD_AWAKENED) 1443 return (FALSE); 1444 1445 s = splsched(); 1446 wake_lock(thread); 1447 thread_lock(thread); 1448 } 1449 1450 thread->state |= TH_SUSP; 1451 1452 while ((oncpu = thread_isoncpu(thread)) || 1453 (until_not_runnable && (thread->state & TH_RUN))) { 1454 processor_t processor; 1455 1456 if (oncpu) { 1457 assert(thread->state & TH_RUN); 1458 processor = thread->chosen_processor; 1459 cause_ast_check(processor); 1460 } 1461 1462 thread->wake_active = TRUE; 1463 thread_unlock(thread); 1464 1465 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); 1466 wake_unlock(thread); 1467 splx(s); 1468 1469 if (wresult == THREAD_WAITING) 1470 wresult = thread_block(THREAD_CONTINUE_NULL); 1471 1472 if (wresult != THREAD_AWAKENED) { 1473 thread_unstop(thread); 1474 return (FALSE); 1475 } 1476 1477 s = splsched(); 1478 wake_lock(thread); 1479 thread_lock(thread); 1480 } 1481 1482 thread_unlock(thread); 1483 wake_unlock(thread); 1484 splx(s); 1485 1486 /* 1487 * We return with the thread unlocked. To prevent it from 1488 * transitioning to a runnable state (or from TH_RUN to 1489 * being on the CPU), the caller must ensure the thread 1490 * is stopped via an external means (such as an AST) 1491 */ 1492 1493 return (TRUE); 1494} 1495 1496/* 1497 * thread_unstop: 1498 * 1499 * Release a previous stop request and set 1500 * the thread running if appropriate. 1501 * 1502 * Use only after a successful stop operation. 1503 */ 1504void 1505thread_unstop( 1506 thread_t thread) 1507{ 1508 spl_t s = splsched(); 1509 1510 wake_lock(thread); 1511 thread_lock(thread); 1512 1513 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) { 1514 thread->state &= ~TH_SUSP; 1515 thread_unblock(thread, THREAD_AWAKENED); 1516 1517 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 1518 } 1519 else 1520 if (thread->state & TH_SUSP) { 1521 thread->state &= ~TH_SUSP; 1522 1523 if (thread->wake_active) { 1524 thread->wake_active = FALSE; 1525 thread_unlock(thread); 1526 1527 thread_wakeup(&thread->wake_active); 1528 wake_unlock(thread); 1529 splx(s); 1530 1531 return; 1532 } 1533 } 1534 1535 thread_unlock(thread); 1536 wake_unlock(thread); 1537 splx(s); 1538} 1539 1540/* 1541 * thread_wait: 1542 * 1543 * Wait for a thread to stop running. (non-interruptible) 1544 * 1545 */ 1546void 1547thread_wait( 1548 thread_t thread, 1549 boolean_t until_not_runnable) 1550{ 1551 wait_result_t wresult; 1552 boolean_t oncpu; 1553 processor_t processor; 1554 spl_t s = splsched(); 1555 1556 wake_lock(thread); 1557 thread_lock(thread); 1558 1559 /* 1560 * Wait until not running on a CPU. If stronger requirement 1561 * desired, wait until not runnable. Assumption: if thread is 1562 * on CPU, then TH_RUN is set, so we're not waiting in any case 1563 * where the original, pure "TH_RUN" check would have let us 1564 * finish. 1565 */ 1566 while ((oncpu = thread_isoncpu(thread)) || 1567 (until_not_runnable && (thread->state & TH_RUN))) { 1568 1569 if (oncpu) { 1570 assert(thread->state & TH_RUN); 1571 processor = thread->chosen_processor; 1572 cause_ast_check(processor); 1573 } 1574 1575 thread->wake_active = TRUE; 1576 thread_unlock(thread); 1577 1578 wresult = assert_wait(&thread->wake_active, THREAD_UNINT); 1579 wake_unlock(thread); 1580 splx(s); 1581 1582 if (wresult == THREAD_WAITING) 1583 thread_block(THREAD_CONTINUE_NULL); 1584 1585 s = splsched(); 1586 wake_lock(thread); 1587 thread_lock(thread); 1588 } 1589 1590 thread_unlock(thread); 1591 wake_unlock(thread); 1592 splx(s); 1593} 1594 1595/* 1596 * Routine: clear_wait_internal 1597 * 1598 * Clear the wait condition for the specified thread. 1599 * Start the thread executing if that is appropriate. 1600 * Arguments: 1601 * thread thread to awaken 1602 * result Wakeup result the thread should see 1603 * Conditions: 1604 * At splsched 1605 * the thread is locked. 1606 * Returns: 1607 * KERN_SUCCESS thread was rousted out a wait 1608 * KERN_FAILURE thread was waiting but could not be rousted 1609 * KERN_NOT_WAITING thread was not waiting 1610 */ 1611__private_extern__ kern_return_t 1612clear_wait_internal( 1613 thread_t thread, 1614 wait_result_t wresult) 1615{ 1616 wait_queue_t wq = thread->wait_queue; 1617 uint32_t i = LockTimeOut; 1618 1619 do { 1620 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT)) 1621 return (KERN_FAILURE); 1622 1623 if (wq != WAIT_QUEUE_NULL) { 1624 if (wait_queue_lock_try(wq)) { 1625 wait_queue_pull_thread_locked(wq, thread, TRUE); 1626 /* wait queue unlocked, thread still locked */ 1627 } 1628 else { 1629 thread_unlock(thread); 1630 delay(1); 1631 1632 thread_lock(thread); 1633 if (wq != thread->wait_queue) 1634 return (KERN_NOT_WAITING); 1635 1636 continue; 1637 } 1638 } 1639 1640 return (thread_go(thread, wresult)); 1641 } while ((--i > 0) || machine_timeout_suspended()); 1642 1643 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n", 1644 thread, wq, cpu_number()); 1645 1646 return (KERN_FAILURE); 1647} 1648 1649 1650/* 1651 * clear_wait: 1652 * 1653 * Clear the wait condition for the specified thread. Start the thread 1654 * executing if that is appropriate. 1655 * 1656 * parameters: 1657 * thread thread to awaken 1658 * result Wakeup result the thread should see 1659 */ 1660kern_return_t 1661clear_wait( 1662 thread_t thread, 1663 wait_result_t result) 1664{ 1665 kern_return_t ret; 1666 spl_t s; 1667 1668 s = splsched(); 1669 thread_lock(thread); 1670 ret = clear_wait_internal(thread, result); 1671 thread_unlock(thread); 1672 splx(s); 1673 return ret; 1674} 1675 1676 1677/* 1678 * thread_wakeup_prim: 1679 * 1680 * Common routine for thread_wakeup, thread_wakeup_with_result, 1681 * and thread_wakeup_one. 1682 * 1683 */ 1684kern_return_t 1685thread_wakeup_prim( 1686 event_t event, 1687 boolean_t one_thread, 1688 wait_result_t result) 1689{ 1690 return (thread_wakeup_prim_internal(event, one_thread, result, -1)); 1691} 1692 1693 1694kern_return_t 1695thread_wakeup_prim_internal( 1696 event_t event, 1697 boolean_t one_thread, 1698 wait_result_t result, 1699 int priority) 1700{ 1701 register wait_queue_t wq; 1702 register int index; 1703 1704 index = wait_hash(event); 1705 wq = &wait_queues[index]; 1706 if (one_thread) 1707 return (wait_queue_wakeup_one(wq, event, result, priority)); 1708 else 1709 return (wait_queue_wakeup_all(wq, event, result)); 1710} 1711 1712/* 1713 * thread_bind: 1714 * 1715 * Force the current thread to execute on the specified processor. 1716 * 1717 * Returns the previous binding. PROCESSOR_NULL means 1718 * not bound. 1719 * 1720 * XXX - DO NOT export this to users - XXX 1721 */ 1722processor_t 1723thread_bind( 1724 processor_t processor) 1725{ 1726 thread_t self = current_thread(); 1727 processor_t prev; 1728 spl_t s; 1729 1730 s = splsched(); 1731 thread_lock(self); 1732 1733 prev = self->bound_processor; 1734 self->bound_processor = processor; 1735 1736 thread_unlock(self); 1737 splx(s); 1738 1739 return (prev); 1740} 1741 1742/* 1743 * thread_select: 1744 * 1745 * Select a new thread for the current processor to execute. 1746 * 1747 * May select the current thread, which must be locked. 1748 */ 1749static thread_t 1750thread_select( 1751 thread_t thread, 1752 processor_t processor) 1753{ 1754 processor_set_t pset = processor->processor_set; 1755 thread_t new_thread = THREAD_NULL; 1756 boolean_t inactive_state; 1757 1758 assert(processor == current_processor()); 1759 1760 do { 1761 /* 1762 * Update the priority. 1763 */ 1764 if (SCHED(can_update_priority)(thread)) 1765 SCHED(update_priority)(thread); 1766 1767 processor->current_pri = thread->sched_pri; 1768 processor->current_thmode = thread->sched_mode; 1769 1770 pset_lock(pset); 1771 1772 assert(pset->low_count); 1773 assert(pset->low_pri); 1774 1775 if (processor->processor_meta != PROCESSOR_META_NULL && processor->processor_meta->primary != processor) { 1776 /* 1777 * Should this secondary SMT processor attempt to find work? For pset runqueue systems, 1778 * we should look for work only under the same conditions that choose_processor() 1779 * would have assigned work, which is when all primary processors have been assigned work. 1780 * 1781 * An exception is that bound threads are dispatched to a processor without going through 1782 * choose_processor(), so in those cases we should continue trying to dequeue work. 1783 */ 1784 if (!processor->runq_bound_count && !queue_empty(&pset->idle_queue) && !rt_runq.count) { 1785 goto idle; 1786 } 1787 } 1788 1789 inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor); 1790 1791 simple_lock(&rt_lock); 1792 1793 /* 1794 * Test to see if the current thread should continue 1795 * to run on this processor. Must be runnable, and not 1796 * bound to a different processor, nor be in the wrong 1797 * processor set. 1798 */ 1799 if ( ((thread->state & ~TH_SUSP) == TH_RUN) && 1800 (thread->sched_pri >= BASEPRI_RTQUEUES || 1801 processor->processor_meta == PROCESSOR_META_NULL || 1802 processor->processor_meta->primary == processor) && 1803 (thread->bound_processor == PROCESSOR_NULL || 1804 thread->bound_processor == processor) && 1805 (thread->affinity_set == AFFINITY_SET_NULL || 1806 thread->affinity_set->aset_pset == pset)) { 1807 if (thread->sched_pri >= BASEPRI_RTQUEUES && 1808 first_timeslice(processor)) { 1809 if (rt_runq.count > 0) { 1810 register queue_t q; 1811 1812 q = &rt_runq.queue; 1813 if (((thread_t)q->next)->realtime.deadline < 1814 processor->deadline) { 1815 if ((((thread_t)q->next)->bound_processor == PROCESSOR_NULL) || (((thread_t)q->next)->bound_processor == processor)) { 1816 thread = (thread_t)dequeue_head(q); 1817 thread->runq = PROCESSOR_NULL; 1818 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 1819 rt_runq.count--; 1820 } 1821 } 1822 } 1823 1824 simple_unlock(&rt_lock); 1825 1826 processor->deadline = thread->realtime.deadline; 1827 1828 pset_unlock(pset); 1829 1830 return (thread); 1831 } 1832 1833 if (!inactive_state && (thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) && 1834 (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri)) == THREAD_NULL) { 1835 1836 simple_unlock(&rt_lock); 1837 1838 /* I am the highest priority runnable (non-idle) thread */ 1839 1840 pset_pri_hint(pset, processor, processor->current_pri); 1841 1842 pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor)); 1843 1844 processor->deadline = UINT64_MAX; 1845 1846 pset_unlock(pset); 1847 1848 return (thread); 1849 } 1850 } 1851 1852 if (new_thread != THREAD_NULL || 1853 (SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) && 1854 (new_thread = SCHED(choose_thread)(processor, MINPRI)) != THREAD_NULL)) { 1855 simple_unlock(&rt_lock); 1856 1857 if (!inactive_state) { 1858 pset_pri_hint(pset, processor, new_thread->sched_pri); 1859 1860 pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor)); 1861 } 1862 1863 processor->deadline = UINT64_MAX; 1864 pset_unlock(pset); 1865 1866 return (new_thread); 1867 } 1868 1869 if (rt_runq.count > 0) { 1870 thread = (thread_t)dequeue_head(&rt_runq.queue); 1871 1872 if (__probable((thread->bound_processor == NULL || (thread->bound_processor == processor)))) { 1873 thread->runq = PROCESSOR_NULL; 1874 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 1875 rt_runq.count--; 1876 1877 simple_unlock(&rt_lock); 1878 1879 processor->deadline = thread->realtime.deadline; 1880 pset_unlock(pset); 1881 1882 return (thread); 1883 } else { 1884 enqueue_head(&rt_runq.queue, (queue_entry_t)thread); 1885 } 1886 } 1887 1888 simple_unlock(&rt_lock); 1889 1890 /* No realtime threads and no normal threads on the per-processor 1891 * runqueue. Finally check for global fairshare threads. 1892 */ 1893 if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) { 1894 1895 processor->deadline = UINT64_MAX; 1896 pset_unlock(pset); 1897 1898 return (new_thread); 1899 } 1900 1901 processor->deadline = UINT64_MAX; 1902 1903 /* 1904 * Set processor inactive based on 1905 * indication from the platform code. 1906 */ 1907 if (inactive_state) { 1908 if (processor->state == PROCESSOR_RUNNING) 1909 remqueue((queue_entry_t)processor); 1910 else 1911 if (processor->state == PROCESSOR_IDLE) 1912 remqueue((queue_entry_t)processor); 1913 1914 processor->state = PROCESSOR_INACTIVE; 1915 1916 pset_unlock(pset); 1917 1918 return (processor->idle_thread); 1919 } 1920 1921 /* 1922 * No runnable threads, attempt to steal 1923 * from other processors. 1924 */ 1925 new_thread = SCHED(steal_thread)(pset); 1926 if (new_thread != THREAD_NULL) { 1927 return (new_thread); 1928 } 1929 1930 /* 1931 * If other threads have appeared, shortcut 1932 * around again. 1933 */ 1934 if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0) 1935 continue; 1936 1937 pset_lock(pset); 1938 1939 idle: 1940 /* 1941 * Nothing is runnable, so set this processor idle if it 1942 * was running. 1943 */ 1944 if (processor->state == PROCESSOR_RUNNING) { 1945 remqueue((queue_entry_t)processor); 1946 processor->state = PROCESSOR_IDLE; 1947 1948 if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) { 1949 enqueue_head(&pset->idle_queue, (queue_entry_t)processor); 1950 pset_pri_init_hint(pset, processor); 1951 pset_count_init_hint(pset, processor); 1952 } 1953 else { 1954 enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor); 1955 pset_unlock(pset); 1956 return (processor->idle_thread); 1957 } 1958 } 1959 1960 pset_unlock(pset); 1961 1962#if CONFIG_SCHED_IDLE_IN_PLACE 1963 /* 1964 * Choose idle thread if fast idle is not possible. 1965 */ 1966 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES) 1967 return (processor->idle_thread); 1968 1969 /* 1970 * Perform idling activities directly without a 1971 * context switch. Return dispatched thread, 1972 * else check again for a runnable thread. 1973 */ 1974 new_thread = thread_select_idle(thread, processor); 1975 1976#else /* !CONFIG_SCHED_IDLE_IN_PLACE */ 1977 1978 /* 1979 * Do a full context switch to idle so that the current 1980 * thread can start running on another processor without 1981 * waiting for the fast-idled processor to wake up. 1982 */ 1983 return (processor->idle_thread); 1984 1985#endif /* !CONFIG_SCHED_IDLE_IN_PLACE */ 1986 1987 } while (new_thread == THREAD_NULL); 1988 1989 return (new_thread); 1990} 1991 1992#if CONFIG_SCHED_IDLE_IN_PLACE 1993/* 1994 * thread_select_idle: 1995 * 1996 * Idle the processor using the current thread context. 1997 * 1998 * Called with thread locked, then dropped and relocked. 1999 */ 2000static thread_t 2001thread_select_idle( 2002 thread_t thread, 2003 processor_t processor) 2004{ 2005 thread_t new_thread; 2006 uint64_t arg1, arg2; 2007 int urgency; 2008 2009 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2010 if (thread->max_priority <= MAXPRI_THROTTLE) 2011 sched_background_decr(); 2012 2013 sched_share_decr(); 2014 } 2015 sched_run_decr(); 2016 2017 thread->state |= TH_IDLE; 2018 processor->current_pri = IDLEPRI; 2019 processor->current_thmode = TH_MODE_NONE; 2020 2021 /* Reload precise timing global policy to thread-local policy */ 2022 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); 2023 2024 thread_unlock(thread); 2025 2026 /* 2027 * Switch execution timing to processor idle thread. 2028 */ 2029 processor->last_dispatch = mach_absolute_time(); 2030 thread->last_run_time = processor->last_dispatch; 2031 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer); 2032 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer; 2033 2034 /* 2035 * Cancel the quantum timer while idling. 2036 */ 2037 timer_call_cancel(&processor->quantum_timer); 2038 processor->timeslice = 0; 2039 2040 (*thread->sched_call)(SCHED_CALL_BLOCK, thread); 2041 2042 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); 2043 2044 /* 2045 * Enable interrupts and perform idling activities. No 2046 * preemption due to TH_IDLE being set. 2047 */ 2048 spllo(); new_thread = processor_idle(thread, processor); 2049 2050 /* 2051 * Return at splsched. 2052 */ 2053 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread); 2054 2055 thread_lock(thread); 2056 2057 /* 2058 * If awakened, switch to thread timer and start a new quantum. 2059 * Otherwise skip; we will context switch to another thread or return here. 2060 */ 2061 if (!(thread->state & TH_WAIT)) { 2062 processor->last_dispatch = mach_absolute_time(); 2063 thread_timer_event(processor->last_dispatch, &thread->system_timer); 2064 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2065 2066 thread_quantum_init(thread); 2067 thread->last_quantum_refill_time = processor->last_dispatch; 2068 2069 processor->quantum_end = processor->last_dispatch + thread->current_quantum; 2070 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); 2071 processor->timeslice = 1; 2072 2073 thread->computation_epoch = processor->last_dispatch; 2074 } 2075 2076 thread->state &= ~TH_IDLE; 2077 2078 /* 2079 * If we idled in place, simulate a context switch back 2080 * to the original priority of the thread so that the 2081 * platform layer cannot distinguish this from a true 2082 * switch to the idle thread. 2083 */ 2084 2085 urgency = thread_get_urgency(thread, &arg1, &arg2); 2086 2087 thread_tell_urgency(urgency, arg1, arg2, new_thread); 2088 2089 sched_run_incr(); 2090 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2091 sched_share_incr(); 2092 2093 if (thread->max_priority <= MAXPRI_THROTTLE) 2094 sched_background_incr(); 2095 } 2096 2097 return (new_thread); 2098} 2099#endif /* CONFIG_SCHED_IDLE_IN_PLACE */ 2100 2101#if defined(CONFIG_SCHED_TRADITIONAL) 2102static thread_t 2103sched_traditional_choose_thread( 2104 processor_t processor, 2105 int priority) 2106{ 2107 thread_t thread; 2108 2109 thread = choose_thread(processor, runq_for_processor(processor), priority); 2110 if (thread != THREAD_NULL) { 2111 runq_consider_decr_bound_count(processor, thread); 2112 } 2113 2114 return thread; 2115} 2116 2117#endif /* defined(CONFIG_SCHED_TRADITIONAL) */ 2118 2119#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) 2120 2121/* 2122 * choose_thread: 2123 * 2124 * Locate a thread to execute from the processor run queue 2125 * and return it. Only choose a thread with greater or equal 2126 * priority. 2127 * 2128 * Associated pset must be locked. Returns THREAD_NULL 2129 * on failure. 2130 */ 2131thread_t 2132choose_thread( 2133 processor_t processor, 2134 run_queue_t rq, 2135 int priority) 2136{ 2137 queue_t queue = rq->queues + rq->highq; 2138 int pri = rq->highq, count = rq->count; 2139 thread_t thread; 2140 2141 while (count > 0 && pri >= priority) { 2142 thread = (thread_t)queue_first(queue); 2143 while (!queue_end(queue, (queue_entry_t)thread)) { 2144 if (thread->bound_processor == PROCESSOR_NULL || 2145 thread->bound_processor == processor) { 2146 remqueue((queue_entry_t)thread); 2147 2148 thread->runq = PROCESSOR_NULL; 2149 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 2150 rq->count--; 2151 if (SCHED(priority_is_urgent)(pri)) { 2152 rq->urgency--; assert(rq->urgency >= 0); 2153 } 2154 if (queue_empty(queue)) { 2155 if (pri != IDLEPRI) 2156 clrbit(MAXPRI - pri, rq->bitmap); 2157 rq->highq = MAXPRI - ffsbit(rq->bitmap); 2158 } 2159 2160 return (thread); 2161 } 2162 count--; 2163 2164 thread = (thread_t)queue_next((queue_entry_t)thread); 2165 } 2166 2167 queue--; pri--; 2168 } 2169 2170 return (THREAD_NULL); 2171} 2172 2173#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ 2174 2175/* 2176 * Perform a context switch and start executing the new thread. 2177 * 2178 * Returns FALSE on failure, and the thread is re-dispatched. 2179 * 2180 * Called at splsched. 2181 */ 2182 2183#define funnel_release_check(thread, debug) \ 2184MACRO_BEGIN \ 2185 if ((thread)->funnel_state & TH_FN_OWNED) { \ 2186 (thread)->funnel_state = TH_FN_REFUNNEL; \ 2187 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \ 2188 (thread)->funnel_lock, (debug), 0, 0, 0); \ 2189 funnel_unlock((thread)->funnel_lock); \ 2190 } \ 2191MACRO_END 2192 2193#define funnel_refunnel_check(thread, debug) \ 2194MACRO_BEGIN \ 2195 if ((thread)->funnel_state & TH_FN_REFUNNEL) { \ 2196 kern_return_t result = (thread)->wait_result; \ 2197 \ 2198 (thread)->funnel_state = 0; \ 2199 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \ 2200 (thread)->funnel_lock, (debug), 0, 0, 0); \ 2201 funnel_lock((thread)->funnel_lock); \ 2202 KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \ 2203 (thread)->funnel_lock, (debug), 0, 0, 0); \ 2204 (thread)->funnel_state = TH_FN_OWNED; \ 2205 (thread)->wait_result = result; \ 2206 } \ 2207MACRO_END 2208 2209/* 2210 * thread_invoke 2211 * 2212 * "self" is what is currently running on the processor, 2213 * "thread" is the new thread to context switch to 2214 * (which may be the same thread in some cases) 2215 */ 2216static boolean_t 2217thread_invoke( 2218 thread_t self, 2219 thread_t thread, 2220 ast_t reason) 2221{ 2222 thread_continue_t continuation = self->continuation; 2223 void *parameter = self->parameter; 2224 processor_t processor; 2225 uint64_t ctime = mach_absolute_time(); 2226 2227 if (__improbable(get_preemption_level() != 0)) { 2228 int pl = get_preemption_level(); 2229 panic("thread_invoke: preemption_level %d, possible cause: %s", 2230 pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" : 2231 "blocking while holding a spinlock, or within interrupt context")); 2232 } 2233 2234 assert(self == current_thread()); 2235 2236#if defined(CONFIG_SCHED_TRADITIONAL) 2237 sched_traditional_consider_maintenance(ctime); 2238#endif /* CONFIG_SCHED_TRADITIONAL */ 2239 2240 /* 2241 * Mark thread interruptible. 2242 */ 2243 thread_lock(thread); 2244 thread->state &= ~TH_UNINT; 2245 2246#if DEBUG 2247 assert(thread_runnable(thread)); 2248#endif 2249 2250 /* Reload precise timing global policy to thread-local policy */ 2251 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); 2252 2253 /* 2254 * Allow time constraint threads to hang onto 2255 * a stack. 2256 */ 2257 if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack) 2258 self->reserved_stack = self->kernel_stack; 2259 2260 if (continuation != NULL) { 2261 if (!thread->kernel_stack) { 2262 /* 2263 * If we are using a privileged stack, 2264 * check to see whether we can exchange it with 2265 * that of the other thread. 2266 */ 2267 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack) 2268 goto need_stack; 2269 2270 /* 2271 * Context switch by performing a stack handoff. 2272 */ 2273 continuation = thread->continuation; 2274 parameter = thread->parameter; 2275 2276 processor = current_processor(); 2277 processor->active_thread = thread; 2278 processor->current_pri = thread->sched_pri; 2279 processor->current_thmode = thread->sched_mode; 2280 if (thread->last_processor != processor && thread->last_processor != NULL) { 2281 if (thread->last_processor->processor_set != processor->processor_set) 2282 thread->ps_switch++; 2283 thread->p_switch++; 2284 } 2285 thread->last_processor = processor; 2286 thread->c_switch++; 2287 ast_context(thread); 2288 thread_unlock(thread); 2289 2290 self->reason = reason; 2291 2292 processor->last_dispatch = ctime; 2293 self->last_run_time = ctime; 2294 thread_timer_event(ctime, &thread->system_timer); 2295 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2296 2297 /* 2298 * Since non-precise user/kernel time doesn't update the state timer 2299 * during privilege transitions, synthesize an event now. 2300 */ 2301 if (!thread->precise_user_kernel_time) { 2302 timer_switch(PROCESSOR_DATA(processor, current_state), 2303 ctime, 2304 PROCESSOR_DATA(processor, current_state)); 2305 } 2306 2307 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2308 MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE, 2309 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2310 2311 if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) { 2312 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE, 2313 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0); 2314 } 2315 2316 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); 2317 2318 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri); 2319 2320 TLOG(1, "thread_invoke: calling stack_handoff\n"); 2321 stack_handoff(self, thread); 2322 2323 DTRACE_SCHED(on__cpu); 2324 2325 thread_dispatch(self, thread); 2326 2327 thread->continuation = thread->parameter = NULL; 2328 2329 counter(c_thread_invoke_hits++); 2330 2331 funnel_refunnel_check(thread, 2); 2332 (void) spllo(); 2333 2334 assert(continuation); 2335 call_continuation(continuation, parameter, thread->wait_result); 2336 /*NOTREACHED*/ 2337 } 2338 else if (thread == self) { 2339 /* same thread but with continuation */ 2340 ast_context(self); 2341 counter(++c_thread_invoke_same); 2342 thread_unlock(self); 2343 2344 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2345 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2346 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2347 2348 self->continuation = self->parameter = NULL; 2349 2350 funnel_refunnel_check(self, 3); 2351 (void) spllo(); 2352 2353 call_continuation(continuation, parameter, self->wait_result); 2354 /*NOTREACHED*/ 2355 } 2356 } 2357 else { 2358 /* 2359 * Check that the other thread has a stack 2360 */ 2361 if (!thread->kernel_stack) { 2362need_stack: 2363 if (!stack_alloc_try(thread)) { 2364 counter(c_thread_invoke_misses++); 2365 thread_unlock(thread); 2366 thread_stack_enqueue(thread); 2367 return (FALSE); 2368 } 2369 } 2370 else if (thread == self) { 2371 ast_context(self); 2372 counter(++c_thread_invoke_same); 2373 thread_unlock(self); 2374 2375 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2376 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2377 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2378 2379 return (TRUE); 2380 } 2381 } 2382 2383 /* 2384 * Context switch by full context save. 2385 */ 2386 processor = current_processor(); 2387 processor->active_thread = thread; 2388 processor->current_pri = thread->sched_pri; 2389 processor->current_thmode = thread->sched_mode; 2390 if (thread->last_processor != processor && thread->last_processor != NULL) { 2391 if (thread->last_processor->processor_set != processor->processor_set) 2392 thread->ps_switch++; 2393 thread->p_switch++; 2394 } 2395 thread->last_processor = processor; 2396 thread->c_switch++; 2397 ast_context(thread); 2398 thread_unlock(thread); 2399 2400 counter(c_thread_invoke_csw++); 2401 2402 assert(self->runq == PROCESSOR_NULL); 2403 self->reason = reason; 2404 2405 processor->last_dispatch = ctime; 2406 self->last_run_time = ctime; 2407 thread_timer_event(ctime, &thread->system_timer); 2408 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2409 2410 /* 2411 * Since non-precise user/kernel time doesn't update the state timer 2412 * during privilege transitions, synthesize an event now. 2413 */ 2414 if (!thread->precise_user_kernel_time) { 2415 timer_switch(PROCESSOR_DATA(processor, current_state), 2416 ctime, 2417 PROCESSOR_DATA(processor, current_state)); 2418 } 2419 2420 2421 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2422 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2423 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2424 2425 if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) { 2426 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE, 2427 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0); 2428 } 2429 2430 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); 2431 2432 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri); 2433 2434 /* 2435 * This is where we actually switch register context, 2436 * and address space if required. We will next run 2437 * as a result of a subsequent context switch. 2438 */ 2439 assert(continuation == self->continuation); 2440 thread = machine_switch_context(self, continuation, thread); 2441 assert(self == current_thread()); 2442 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread); 2443 2444 DTRACE_SCHED(on__cpu); 2445 2446 /* 2447 * We have been resumed and are set to run. 2448 */ 2449 thread_dispatch(thread, self); 2450 2451 if (continuation) { 2452 self->continuation = self->parameter = NULL; 2453 2454 funnel_refunnel_check(self, 3); 2455 (void) spllo(); 2456 2457 call_continuation(continuation, parameter, self->wait_result); 2458 /*NOTREACHED*/ 2459 } 2460 2461 return (TRUE); 2462} 2463 2464/* 2465 * thread_dispatch: 2466 * 2467 * Handle threads at context switch. Re-dispatch other thread 2468 * if still running, otherwise update run state and perform 2469 * special actions. Update quantum for other thread and begin 2470 * the quantum for ourselves. 2471 * 2472 * "self" is our new current thread that we have context switched 2473 * to, "thread" is the old thread that we have switched away from. 2474 * 2475 * Called at splsched. 2476 */ 2477void 2478thread_dispatch( 2479 thread_t thread, 2480 thread_t self) 2481{ 2482 processor_t processor = self->last_processor; 2483 2484 if (thread != THREAD_NULL) { 2485 /* 2486 * If blocked at a continuation, discard 2487 * the stack. 2488 */ 2489 if (thread->continuation != NULL && thread->kernel_stack != 0) 2490 stack_free(thread); 2491 2492 if (!(thread->state & TH_IDLE)) { 2493 int64_t consumed; 2494 int64_t remainder = 0; 2495 2496 if (processor->quantum_end > processor->last_dispatch) 2497 remainder = processor->quantum_end - 2498 processor->last_dispatch; 2499 2500 consumed = thread->current_quantum - remainder; 2501 2502 if ((thread->reason & AST_LEDGER) == 0) { 2503 /* 2504 * Bill CPU time to both the task and 2505 * the individual thread. 2506 */ 2507 ledger_credit(thread->t_ledger, 2508 task_ledgers.cpu_time, consumed); 2509 ledger_credit(thread->t_threadledger, 2510 thread_ledgers.cpu_time, consumed); 2511 } 2512 2513 wake_lock(thread); 2514 thread_lock(thread); 2515 2516 /* 2517 * Compute remainder of current quantum. 2518 */ 2519 if (first_timeslice(processor) && 2520 processor->quantum_end > processor->last_dispatch) 2521 thread->current_quantum = (uint32_t)remainder; 2522 else 2523 thread->current_quantum = 0; 2524 2525 if (thread->sched_mode == TH_MODE_REALTIME) { 2526 /* 2527 * Cancel the deadline if the thread has 2528 * consumed the entire quantum. 2529 */ 2530 if (thread->current_quantum == 0) { 2531 thread->realtime.deadline = UINT64_MAX; 2532 thread->reason |= AST_QUANTUM; 2533 } 2534 } else { 2535#if defined(CONFIG_SCHED_TRADITIONAL) 2536 /* 2537 * For non-realtime threads treat a tiny 2538 * remaining quantum as an expired quantum 2539 * but include what's left next time. 2540 */ 2541 if (thread->current_quantum < min_std_quantum) { 2542 thread->reason |= AST_QUANTUM; 2543 thread->current_quantum += SCHED(initial_quantum_size)(thread); 2544 } 2545#endif 2546 } 2547 2548 /* 2549 * If we are doing a direct handoff then 2550 * take the remainder of the quantum. 2551 */ 2552 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) { 2553 self->current_quantum = thread->current_quantum; 2554 thread->reason |= AST_QUANTUM; 2555 thread->current_quantum = 0; 2556 } 2557 2558 thread->computation_metered += (processor->last_dispatch - thread->computation_epoch); 2559 2560 if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) { 2561 integer_t priority; 2562 2563 priority = thread->sched_pri; 2564 2565 if (priority < thread->priority) 2566 priority = thread->priority; 2567 if (priority < BASEPRI_BACKGROUND) 2568 priority = BASEPRI_BACKGROUND; 2569 2570 if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 2571 KERNEL_DEBUG_CONSTANT( 2572 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE, 2573 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0); 2574 2575 thread->sched_flags |= TH_SFLAG_RW_PROMOTED; 2576 2577 if (thread->sched_pri < priority) 2578 set_sched_pri(thread, priority); 2579 } 2580 } 2581 2582 if (!(thread->state & TH_WAIT)) { 2583 /* 2584 * Still running. 2585 */ 2586 if (thread->reason & AST_QUANTUM) 2587 thread_setrun(thread, SCHED_TAILQ); 2588 else 2589 if (thread->reason & AST_PREEMPT) 2590 thread_setrun(thread, SCHED_HEADQ); 2591 else 2592 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 2593 2594 thread->reason = AST_NONE; 2595 2596 if (thread->wake_active) { 2597 thread->wake_active = FALSE; 2598 thread_unlock(thread); 2599 2600 thread_wakeup(&thread->wake_active); 2601 } 2602 else 2603 thread_unlock(thread); 2604 2605 wake_unlock(thread); 2606 } 2607 else { 2608 /* 2609 * Waiting. 2610 */ 2611 boolean_t should_terminate = FALSE; 2612 2613 /* Only the first call to thread_dispatch 2614 * after explicit termination should add 2615 * the thread to the termination queue 2616 */ 2617 if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) { 2618 should_terminate = TRUE; 2619 thread->state |= TH_TERMINATE2; 2620 } 2621 2622 thread->state &= ~TH_RUN; 2623 thread->chosen_processor = PROCESSOR_NULL; 2624 2625 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2626 if (thread->max_priority <= MAXPRI_THROTTLE) 2627 sched_background_decr(); 2628 2629 sched_share_decr(); 2630 } 2631 sched_run_decr(); 2632 2633 (*thread->sched_call)(SCHED_CALL_BLOCK, thread); 2634 2635 if (thread->wake_active) { 2636 thread->wake_active = FALSE; 2637 thread_unlock(thread); 2638 2639 thread_wakeup(&thread->wake_active); 2640 } 2641 else 2642 thread_unlock(thread); 2643 2644 wake_unlock(thread); 2645 2646 if (should_terminate) 2647 thread_terminate_enqueue(thread); 2648 } 2649 } 2650 } 2651 2652 if (!(self->state & TH_IDLE)) { 2653 uint64_t arg1, arg2; 2654 int urgency; 2655 2656 urgency = thread_get_urgency(self, &arg1, &arg2); 2657 2658 thread_tell_urgency(urgency, arg1, arg2, self); 2659 2660 /* 2661 * Get a new quantum if none remaining. 2662 */ 2663 if (self->current_quantum == 0) { 2664 thread_quantum_init(self); 2665 self->last_quantum_refill_time = processor->last_dispatch; 2666 } 2667 2668 /* 2669 * Set up quantum timer and timeslice. 2670 */ 2671 processor->quantum_end = (processor->last_dispatch + self->current_quantum); 2672 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); 2673 2674 processor->timeslice = 1; 2675 2676 self->computation_epoch = processor->last_dispatch; 2677 } 2678 else { 2679 timer_call_cancel(&processor->quantum_timer); 2680 processor->timeslice = 0; 2681 2682 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); 2683 } 2684} 2685 2686#include <libkern/OSDebug.h> 2687 2688uint32_t kdebug_thread_block = 0; 2689 2690 2691/* 2692 * thread_block_reason: 2693 * 2694 * Forces a reschedule, blocking the caller if a wait 2695 * has been asserted. 2696 * 2697 * If a continuation is specified, then thread_invoke will 2698 * attempt to discard the thread's kernel stack. When the 2699 * thread resumes, it will execute the continuation function 2700 * on a new kernel stack. 2701 */ 2702counter(mach_counter_t c_thread_block_calls = 0;) 2703 2704wait_result_t 2705thread_block_reason( 2706 thread_continue_t continuation, 2707 void *parameter, 2708 ast_t reason) 2709{ 2710 register thread_t self = current_thread(); 2711 register processor_t processor; 2712 register thread_t new_thread; 2713 spl_t s; 2714 2715 counter(++c_thread_block_calls); 2716 2717 s = splsched(); 2718 2719 if (!(reason & AST_PREEMPT)) 2720 funnel_release_check(self, 2); 2721 2722 processor = current_processor(); 2723 2724 /* If we're explicitly yielding, force a subsequent quantum */ 2725 if (reason & AST_YIELD) 2726 processor->timeslice = 0; 2727 2728 /* We're handling all scheduling AST's */ 2729 ast_off(AST_SCHEDULING); 2730 2731 self->continuation = continuation; 2732 self->parameter = parameter; 2733 2734 if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) { 2735 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2736 MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK), 2737 reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0); 2738 } 2739 2740 do { 2741 thread_lock(self); 2742 new_thread = thread_select(self, processor); 2743 thread_unlock(self); 2744 } while (!thread_invoke(self, new_thread, reason)); 2745 2746 funnel_refunnel_check(self, 5); 2747 splx(s); 2748 2749 return (self->wait_result); 2750} 2751 2752/* 2753 * thread_block: 2754 * 2755 * Block the current thread if a wait has been asserted. 2756 */ 2757wait_result_t 2758thread_block( 2759 thread_continue_t continuation) 2760{ 2761 return thread_block_reason(continuation, NULL, AST_NONE); 2762} 2763 2764wait_result_t 2765thread_block_parameter( 2766 thread_continue_t continuation, 2767 void *parameter) 2768{ 2769 return thread_block_reason(continuation, parameter, AST_NONE); 2770} 2771 2772/* 2773 * thread_run: 2774 * 2775 * Switch directly from the current thread to the 2776 * new thread, handing off our quantum if appropriate. 2777 * 2778 * New thread must be runnable, and not on a run queue. 2779 * 2780 * Called at splsched. 2781 */ 2782int 2783thread_run( 2784 thread_t self, 2785 thread_continue_t continuation, 2786 void *parameter, 2787 thread_t new_thread) 2788{ 2789 ast_t handoff = AST_HANDOFF; 2790 2791 funnel_release_check(self, 3); 2792 2793 self->continuation = continuation; 2794 self->parameter = parameter; 2795 2796 while (!thread_invoke(self, new_thread, handoff)) { 2797 processor_t processor = current_processor(); 2798 2799 thread_lock(self); 2800 new_thread = thread_select(self, processor); 2801 thread_unlock(self); 2802 handoff = AST_NONE; 2803 } 2804 2805 funnel_refunnel_check(self, 6); 2806 2807 return (self->wait_result); 2808} 2809 2810/* 2811 * thread_continue: 2812 * 2813 * Called at splsched when a thread first receives 2814 * a new stack after a continuation. 2815 */ 2816void 2817thread_continue( 2818 register thread_t thread) 2819{ 2820 register thread_t self = current_thread(); 2821 register thread_continue_t continuation; 2822 register void *parameter; 2823 2824 DTRACE_SCHED(on__cpu); 2825 2826 continuation = self->continuation; 2827 parameter = self->parameter; 2828 2829 thread_dispatch(thread, self); 2830 2831 self->continuation = self->parameter = NULL; 2832 2833 funnel_refunnel_check(self, 4); 2834 2835 if (thread != THREAD_NULL) 2836 (void)spllo(); 2837 2838 TLOG(1, "thread_continue: calling call_continuation \n"); 2839 call_continuation(continuation, parameter, self->wait_result); 2840 /*NOTREACHED*/ 2841} 2842 2843void 2844thread_quantum_init(thread_t thread) 2845{ 2846 if (thread->sched_mode == TH_MODE_REALTIME) { 2847 thread->current_quantum = thread->realtime.computation; 2848 } else { 2849 thread->current_quantum = SCHED(initial_quantum_size)(thread); 2850 } 2851} 2852 2853#if defined(CONFIG_SCHED_TRADITIONAL) 2854static uint32_t 2855sched_traditional_initial_quantum_size(thread_t thread) 2856{ 2857 if ((thread == THREAD_NULL) || thread->priority > MAXPRI_THROTTLE) 2858 return std_quantum; 2859 else 2860 return bg_quantum; 2861} 2862 2863static sched_mode_t 2864sched_traditional_initial_thread_sched_mode(task_t parent_task) 2865{ 2866 if (parent_task == kernel_task) 2867 return TH_MODE_FIXED; 2868 else 2869 return TH_MODE_TIMESHARE; 2870} 2871 2872static boolean_t 2873sched_traditional_supports_timeshare_mode(void) 2874{ 2875 return TRUE; 2876} 2877 2878#endif /* CONFIG_SCHED_TRADITIONAL */ 2879 2880/* 2881 * run_queue_init: 2882 * 2883 * Initialize a run queue before first use. 2884 */ 2885void 2886run_queue_init( 2887 run_queue_t rq) 2888{ 2889 int i; 2890 2891 rq->highq = IDLEPRI; 2892 for (i = 0; i < NRQBM; i++) 2893 rq->bitmap[i] = 0; 2894 setbit(MAXPRI - IDLEPRI, rq->bitmap); 2895 rq->urgency = rq->count = 0; 2896 for (i = 0; i < NRQS; i++) 2897 queue_init(&rq->queues[i]); 2898} 2899 2900#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) 2901int 2902sched_traditional_fairshare_runq_count(void) 2903{ 2904 return fs_runq.count; 2905} 2906 2907uint64_t 2908sched_traditional_fairshare_runq_stats_count_sum(void) 2909{ 2910 return fs_runq.runq_stats.count_sum; 2911} 2912 2913void 2914sched_traditional_fairshare_enqueue(thread_t thread) 2915{ 2916 queue_t queue = &fs_runq.queue; 2917 2918 simple_lock(&fs_lock); 2919 2920 enqueue_tail(queue, (queue_entry_t)thread); 2921 2922 thread->runq = FS_RUNQ; 2923 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2924 fs_runq.count++; 2925 2926 simple_unlock(&fs_lock); 2927} 2928 2929thread_t 2930sched_traditional_fairshare_dequeue(void) 2931{ 2932 thread_t thread; 2933 2934 simple_lock(&fs_lock); 2935 if (fs_runq.count > 0) { 2936 thread = (thread_t)dequeue_head(&fs_runq.queue); 2937 2938 thread->runq = PROCESSOR_NULL; 2939 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2940 fs_runq.count--; 2941 2942 simple_unlock(&fs_lock); 2943 2944 return (thread); 2945 } 2946 simple_unlock(&fs_lock); 2947 2948 return THREAD_NULL; 2949} 2950 2951boolean_t 2952sched_traditional_fairshare_queue_remove(thread_t thread) 2953{ 2954 queue_t q; 2955 2956 simple_lock(&fs_lock); 2957 q = &fs_runq.queue; 2958 2959 if (FS_RUNQ == thread->runq) { 2960 remqueue((queue_entry_t)thread); 2961 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2962 fs_runq.count--; 2963 2964 thread->runq = PROCESSOR_NULL; 2965 simple_unlock(&fs_lock); 2966 return (TRUE); 2967 } 2968 else { 2969 /* 2970 * The thread left the run queue before we could 2971 * lock the run queue. 2972 */ 2973 assert(thread->runq == PROCESSOR_NULL); 2974 simple_unlock(&fs_lock); 2975 return (FALSE); 2976 } 2977} 2978 2979#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ 2980 2981/* 2982 * run_queue_dequeue: 2983 * 2984 * Perform a dequeue operation on a run queue, 2985 * and return the resulting thread. 2986 * 2987 * The run queue must be locked (see thread_run_queue_remove() 2988 * for more info), and not empty. 2989 */ 2990thread_t 2991run_queue_dequeue( 2992 run_queue_t rq, 2993 integer_t options) 2994{ 2995 thread_t thread; 2996 queue_t queue = rq->queues + rq->highq; 2997 2998 if (options & SCHED_HEADQ) { 2999 thread = (thread_t)dequeue_head(queue); 3000 } 3001 else { 3002 thread = (thread_t)dequeue_tail(queue); 3003 } 3004 3005 thread->runq = PROCESSOR_NULL; 3006 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3007 rq->count--; 3008 if (SCHED(priority_is_urgent)(rq->highq)) { 3009 rq->urgency--; assert(rq->urgency >= 0); 3010 } 3011 if (queue_empty(queue)) { 3012 if (rq->highq != IDLEPRI) 3013 clrbit(MAXPRI - rq->highq, rq->bitmap); 3014 rq->highq = MAXPRI - ffsbit(rq->bitmap); 3015 } 3016 3017 return (thread); 3018} 3019 3020/* 3021 * run_queue_enqueue: 3022 * 3023 * Perform a enqueue operation on a run queue. 3024 * 3025 * The run queue must be locked (see thread_run_queue_remove() 3026 * for more info). 3027 */ 3028boolean_t 3029run_queue_enqueue( 3030 run_queue_t rq, 3031 thread_t thread, 3032 integer_t options) 3033{ 3034 queue_t queue = rq->queues + thread->sched_pri; 3035 boolean_t result = FALSE; 3036 3037 if (queue_empty(queue)) { 3038 enqueue_tail(queue, (queue_entry_t)thread); 3039 3040 setbit(MAXPRI - thread->sched_pri, rq->bitmap); 3041 if (thread->sched_pri > rq->highq) { 3042 rq->highq = thread->sched_pri; 3043 result = TRUE; 3044 } 3045 } 3046 else 3047 if (options & SCHED_TAILQ) 3048 enqueue_tail(queue, (queue_entry_t)thread); 3049 else 3050 enqueue_head(queue, (queue_entry_t)thread); 3051 3052 if (SCHED(priority_is_urgent)(thread->sched_pri)) 3053 rq->urgency++; 3054 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3055 rq->count++; 3056 3057 return (result); 3058 3059} 3060 3061/* 3062 * run_queue_remove: 3063 * 3064 * Remove a specific thread from a runqueue. 3065 * 3066 * The run queue must be locked. 3067 */ 3068void 3069run_queue_remove( 3070 run_queue_t rq, 3071 thread_t thread) 3072{ 3073 3074 remqueue((queue_entry_t)thread); 3075 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3076 rq->count--; 3077 if (SCHED(priority_is_urgent)(thread->sched_pri)) { 3078 rq->urgency--; assert(rq->urgency >= 0); 3079 } 3080 3081 if (queue_empty(rq->queues + thread->sched_pri)) { 3082 /* update run queue status */ 3083 if (thread->sched_pri != IDLEPRI) 3084 clrbit(MAXPRI - thread->sched_pri, rq->bitmap); 3085 rq->highq = MAXPRI - ffsbit(rq->bitmap); 3086 } 3087 3088 thread->runq = PROCESSOR_NULL; 3089} 3090 3091/* 3092 * fairshare_setrun: 3093 * 3094 * Dispatch a thread for round-robin execution. 3095 * 3096 * Thread must be locked. Associated pset must 3097 * be locked, and is returned unlocked. 3098 */ 3099static void 3100fairshare_setrun( 3101 processor_t processor, 3102 thread_t thread) 3103{ 3104 processor_set_t pset = processor->processor_set; 3105 3106 thread->chosen_processor = processor; 3107 3108 SCHED(fairshare_enqueue)(thread); 3109 3110 if (processor != current_processor()) 3111 machine_signal_idle(processor); 3112 3113 pset_unlock(pset); 3114 3115} 3116 3117/* 3118 * realtime_queue_insert: 3119 * 3120 * Enqueue a thread for realtime execution. 3121 */ 3122static boolean_t 3123realtime_queue_insert( 3124 thread_t thread) 3125{ 3126 queue_t queue = &rt_runq.queue; 3127 uint64_t deadline = thread->realtime.deadline; 3128 boolean_t preempt = FALSE; 3129 3130 simple_lock(&rt_lock); 3131 3132 if (queue_empty(queue)) { 3133 enqueue_tail(queue, (queue_entry_t)thread); 3134 preempt = TRUE; 3135 } 3136 else { 3137 register thread_t entry = (thread_t)queue_first(queue); 3138 3139 while (TRUE) { 3140 if ( queue_end(queue, (queue_entry_t)entry) || 3141 deadline < entry->realtime.deadline ) { 3142 entry = (thread_t)queue_prev((queue_entry_t)entry); 3143 break; 3144 } 3145 3146 entry = (thread_t)queue_next((queue_entry_t)entry); 3147 } 3148 3149 if ((queue_entry_t)entry == queue) 3150 preempt = TRUE; 3151 3152 insque((queue_entry_t)thread, (queue_entry_t)entry); 3153 } 3154 3155 thread->runq = RT_RUNQ; 3156 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 3157 rt_runq.count++; 3158 3159 simple_unlock(&rt_lock); 3160 3161 return (preempt); 3162} 3163 3164/* 3165 * realtime_setrun: 3166 * 3167 * Dispatch a thread for realtime execution. 3168 * 3169 * Thread must be locked. Associated pset must 3170 * be locked, and is returned unlocked. 3171 */ 3172static void 3173realtime_setrun( 3174 processor_t processor, 3175 thread_t thread) 3176{ 3177 processor_set_t pset = processor->processor_set; 3178 ast_t preempt; 3179 3180 thread->chosen_processor = processor; 3181 3182 /* 3183 * Dispatch directly onto idle processor. 3184 */ 3185 if ( (thread->bound_processor == processor) 3186 && processor->state == PROCESSOR_IDLE) { 3187 remqueue((queue_entry_t)processor); 3188 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3189 3190 processor->next_thread = thread; 3191 processor->current_pri = thread->sched_pri; 3192 processor->current_thmode = thread->sched_mode; 3193 processor->deadline = thread->realtime.deadline; 3194 processor->state = PROCESSOR_DISPATCHING; 3195 3196 if (processor != current_processor()) { 3197 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3198 /* cleared on exit from main processor_idle() loop */ 3199 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3200 machine_signal_idle(processor); 3201 } 3202 } 3203 3204 pset_unlock(pset); 3205 return; 3206 } 3207 3208 if (processor->current_pri < BASEPRI_RTQUEUES) 3209 preempt = (AST_PREEMPT | AST_URGENT); 3210 else if (thread->realtime.deadline < processor->deadline) 3211 preempt = (AST_PREEMPT | AST_URGENT); 3212 else 3213 preempt = AST_NONE; 3214 3215 realtime_queue_insert(thread); 3216 3217 if (preempt != AST_NONE) { 3218 if (processor->state == PROCESSOR_IDLE) { 3219 remqueue((queue_entry_t)processor); 3220 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3221 processor->next_thread = THREAD_NULL; 3222 processor->current_pri = thread->sched_pri; 3223 processor->current_thmode = thread->sched_mode; 3224 processor->deadline = thread->realtime.deadline; 3225 processor->state = PROCESSOR_DISPATCHING; 3226 if (processor == current_processor()) { 3227 ast_on(preempt); 3228 } else { 3229 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3230 /* cleared on exit from main processor_idle() loop */ 3231 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3232 machine_signal_idle(processor); 3233 } 3234 } 3235 } else if (processor->state == PROCESSOR_DISPATCHING) { 3236 if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) { 3237 processor->current_pri = thread->sched_pri; 3238 processor->current_thmode = thread->sched_mode; 3239 processor->deadline = thread->realtime.deadline; 3240 } 3241 } else { 3242 if (processor == current_processor()) { 3243 ast_on(preempt); 3244 } else { 3245 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3246 /* cleared after IPI causes csw_check() to be called */ 3247 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3248 cause_ast_check(processor); 3249 } 3250 } 3251 } 3252 } else { 3253 /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */ 3254 } 3255 3256 pset_unlock(pset); 3257} 3258 3259#if defined(CONFIG_SCHED_TRADITIONAL) 3260 3261static boolean_t 3262priority_is_urgent(int priority) 3263{ 3264 return testbit(priority, sched_preempt_pri) ? TRUE : FALSE; 3265} 3266 3267/* 3268 * processor_enqueue: 3269 * 3270 * Enqueue thread on a processor run queue. Thread must be locked, 3271 * and not already be on a run queue. 3272 * 3273 * Returns TRUE if a preemption is indicated based on the state 3274 * of the run queue. 3275 * 3276 * The run queue must be locked (see thread_run_queue_remove() 3277 * for more info). 3278 */ 3279static boolean_t 3280processor_enqueue( 3281 processor_t processor, 3282 thread_t thread, 3283 integer_t options) 3284{ 3285 run_queue_t rq = runq_for_processor(processor); 3286 boolean_t result; 3287 3288 result = run_queue_enqueue(rq, thread, options); 3289 thread->runq = processor; 3290 runq_consider_incr_bound_count(processor, thread); 3291 3292 return (result); 3293} 3294 3295#endif /* CONFIG_SCHED_TRADITIONAL */ 3296 3297/* 3298 * processor_setrun: 3299 * 3300 * Dispatch a thread for execution on a 3301 * processor. 3302 * 3303 * Thread must be locked. Associated pset must 3304 * be locked, and is returned unlocked. 3305 */ 3306static void 3307processor_setrun( 3308 processor_t processor, 3309 thread_t thread, 3310 integer_t options) 3311{ 3312 processor_set_t pset = processor->processor_set; 3313 ast_t preempt; 3314 enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing; 3315 3316 thread->chosen_processor = processor; 3317 3318 /* 3319 * Dispatch directly onto idle processor. 3320 */ 3321 if ( (SCHED(direct_dispatch_to_idle_processors) || 3322 thread->bound_processor == processor) 3323 && processor->state == PROCESSOR_IDLE) { 3324 remqueue((queue_entry_t)processor); 3325 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3326 3327 processor->next_thread = thread; 3328 processor->current_pri = thread->sched_pri; 3329 processor->current_thmode = thread->sched_mode; 3330 processor->deadline = UINT64_MAX; 3331 processor->state = PROCESSOR_DISPATCHING; 3332 3333 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3334 /* cleared on exit from main processor_idle() loop */ 3335 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3336 machine_signal_idle(processor); 3337 } 3338 3339 pset_unlock(pset); 3340 return; 3341 } 3342 3343 /* 3344 * Set preemption mode. 3345 */ 3346 if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri) 3347 preempt = (AST_PREEMPT | AST_URGENT); 3348 else if(processor->active_thread && thread_eager_preemption(processor->active_thread)) 3349 preempt = (AST_PREEMPT | AST_URGENT); 3350 else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) { 3351 if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) { 3352 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; 3353 } else { 3354 preempt = AST_NONE; 3355 } 3356 } else 3357 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; 3358 3359 SCHED(processor_enqueue)(processor, thread, options); 3360 3361 if (preempt != AST_NONE) { 3362 if (processor->state == PROCESSOR_IDLE) { 3363 remqueue((queue_entry_t)processor); 3364 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3365 processor->next_thread = THREAD_NULL; 3366 processor->current_pri = thread->sched_pri; 3367 processor->current_thmode = thread->sched_mode; 3368 processor->deadline = UINT64_MAX; 3369 processor->state = PROCESSOR_DISPATCHING; 3370 3371 ipi_action = eExitIdle; 3372 } else if ( processor->state == PROCESSOR_DISPATCHING) { 3373 if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) { 3374 processor->current_pri = thread->sched_pri; 3375 processor->current_thmode = thread->sched_mode; 3376 processor->deadline = UINT64_MAX; 3377 } 3378 } else if ( (processor->state == PROCESSOR_RUNNING || 3379 processor->state == PROCESSOR_SHUTDOWN) && 3380 (thread->sched_pri >= processor->current_pri || 3381 processor->current_thmode == TH_MODE_FAIRSHARE)) { 3382 ipi_action = eInterruptRunning; 3383 } 3384 } else { 3385 /* 3386 * New thread is not important enough to preempt what is running, but 3387 * special processor states may need special handling 3388 */ 3389 if (processor->state == PROCESSOR_SHUTDOWN && 3390 thread->sched_pri >= processor->current_pri ) { 3391 ipi_action = eInterruptRunning; 3392 } else if ( processor->state == PROCESSOR_IDLE && 3393 processor != current_processor() ) { 3394 remqueue((queue_entry_t)processor); 3395 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3396 processor->next_thread = THREAD_NULL; 3397 processor->current_pri = thread->sched_pri; 3398 processor->current_thmode = thread->sched_mode; 3399 processor->deadline = UINT64_MAX; 3400 processor->state = PROCESSOR_DISPATCHING; 3401 3402 ipi_action = eExitIdle; 3403 } 3404 } 3405 3406 switch (ipi_action) { 3407 case eDoNothing: 3408 break; 3409 case eExitIdle: 3410 if (processor == current_processor()) { 3411 if (csw_check_locked(processor, pset) != AST_NONE) 3412 ast_on(preempt); 3413 } else { 3414 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3415 /* cleared on exit from main processor_idle() loop */ 3416 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3417 machine_signal_idle(processor); 3418 } 3419 } 3420 break; 3421 case eInterruptRunning: 3422 if (processor == current_processor()) { 3423 if (csw_check_locked(processor, pset) != AST_NONE) 3424 ast_on(preempt); 3425 } else { 3426 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3427 /* cleared after IPI causes csw_check() to be called */ 3428 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3429 cause_ast_check(processor); 3430 } 3431 } 3432 break; 3433 } 3434 3435 pset_unlock(pset); 3436} 3437 3438#if defined(CONFIG_SCHED_TRADITIONAL) 3439 3440static boolean_t 3441processor_queue_empty(processor_t processor) 3442{ 3443 return runq_for_processor(processor)->count == 0; 3444 3445} 3446 3447static boolean_t 3448sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor) 3449{ 3450 processor_set_t pset = processor->processor_set; 3451 int count = runq_for_processor(processor)->count; 3452 3453 /* 3454 * The pset runq contains the count of all runnable threads 3455 * for all processors in the pset. However, for threads that 3456 * are bound to another processor, the current "processor" 3457 * is not eligible to execute the thread. So we only 3458 * include bound threads that our bound to the current 3459 * "processor". This allows the processor to idle when the 3460 * count of eligible threads drops to 0, even if there's 3461 * a runnable thread bound to a different processor in the 3462 * shared runq. 3463 */ 3464 3465 count -= pset->pset_runq_bound_count; 3466 count += processor->runq_bound_count; 3467 3468 return count == 0; 3469} 3470 3471static ast_t 3472processor_csw_check(processor_t processor) 3473{ 3474 run_queue_t runq; 3475 boolean_t has_higher; 3476 3477 assert(processor->active_thread != NULL); 3478 3479 runq = runq_for_processor(processor); 3480 if (first_timeslice(processor)) { 3481 has_higher = (runq->highq > processor->current_pri); 3482 } else { 3483 has_higher = (runq->highq >= processor->current_pri); 3484 } 3485 if (has_higher) { 3486 if (runq->urgency > 0) 3487 return (AST_PREEMPT | AST_URGENT); 3488 3489 if (processor->active_thread && thread_eager_preemption(processor->active_thread)) 3490 return (AST_PREEMPT | AST_URGENT); 3491 3492 return AST_PREEMPT; 3493 } 3494 3495 return AST_NONE; 3496} 3497 3498static boolean_t 3499processor_queue_has_priority(processor_t processor, 3500 int priority, 3501 boolean_t gte) 3502{ 3503 if (gte) 3504 return runq_for_processor(processor)->highq >= priority; 3505 else 3506 return runq_for_processor(processor)->highq > priority; 3507} 3508 3509static boolean_t 3510should_current_thread_rechoose_processor(processor_t processor) 3511{ 3512 return (processor->current_pri < BASEPRI_RTQUEUES 3513 && processor->processor_meta != PROCESSOR_META_NULL 3514 && processor->processor_meta->primary != processor); 3515} 3516 3517static int 3518sched_traditional_processor_runq_count(processor_t processor) 3519{ 3520 return runq_for_processor(processor)->count; 3521} 3522 3523 3524static uint64_t 3525sched_traditional_processor_runq_stats_count_sum(processor_t processor) 3526{ 3527 return runq_for_processor(processor)->runq_stats.count_sum; 3528} 3529 3530static uint64_t 3531sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor) 3532{ 3533 if (processor->cpu_id == processor->processor_set->cpu_set_low) 3534 return runq_for_processor(processor)->runq_stats.count_sum; 3535 else 3536 return 0ULL; 3537} 3538 3539#endif /* CONFIG_SCHED_TRADITIONAL */ 3540 3541#define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets) 3542 3543/* 3544 * choose_next_pset: 3545 * 3546 * Return the next sibling pset containing 3547 * available processors. 3548 * 3549 * Returns the original pset if none other is 3550 * suitable. 3551 */ 3552static processor_set_t 3553choose_next_pset( 3554 processor_set_t pset) 3555{ 3556 processor_set_t nset = pset; 3557 3558 do { 3559 nset = next_pset(nset); 3560 } while (nset->online_processor_count < 1 && nset != pset); 3561 3562 return (nset); 3563} 3564 3565/* 3566 * choose_processor: 3567 * 3568 * Choose a processor for the thread, beginning at 3569 * the pset. Accepts an optional processor hint in 3570 * the pset. 3571 * 3572 * Returns a processor, possibly from a different pset. 3573 * 3574 * The thread must be locked. The pset must be locked, 3575 * and the resulting pset is locked on return. 3576 */ 3577processor_t 3578choose_processor( 3579 processor_set_t pset, 3580 processor_t processor, 3581 thread_t thread) 3582{ 3583 processor_set_t nset, cset = pset; 3584 processor_meta_t pmeta = PROCESSOR_META_NULL; 3585 processor_t mprocessor; 3586 3587 /* 3588 * Prefer the hinted processor, when appropriate. 3589 */ 3590 3591 if (processor != PROCESSOR_NULL) { 3592 if (processor->processor_meta != PROCESSOR_META_NULL) 3593 processor = processor->processor_meta->primary; 3594 } 3595 3596 mprocessor = machine_choose_processor(pset, processor); 3597 if (mprocessor != PROCESSOR_NULL) 3598 processor = mprocessor; 3599 3600 if (processor != PROCESSOR_NULL) { 3601 if (processor->processor_set != pset || 3602 processor->state == PROCESSOR_INACTIVE || 3603 processor->state == PROCESSOR_SHUTDOWN || 3604 processor->state == PROCESSOR_OFF_LINE) 3605 processor = PROCESSOR_NULL; 3606 else 3607 if (processor->state == PROCESSOR_IDLE || 3608 ((thread->sched_pri >= BASEPRI_RTQUEUES) && 3609 (processor->current_pri < BASEPRI_RTQUEUES))) 3610 return (processor); 3611 } 3612 3613 /* 3614 * Iterate through the processor sets to locate 3615 * an appropriate processor. 3616 */ 3617 do { 3618 /* 3619 * Choose an idle processor. 3620 */ 3621 if (!queue_empty(&cset->idle_queue)) 3622 return ((processor_t)queue_first(&cset->idle_queue)); 3623 3624 if (thread->sched_pri >= BASEPRI_RTQUEUES) { 3625 integer_t lowest_priority = MAXPRI + 1; 3626 integer_t lowest_unpaired = MAXPRI + 1; 3627 uint64_t furthest_deadline = 1; 3628 processor_t lp_processor = PROCESSOR_NULL; 3629 processor_t lp_unpaired = PROCESSOR_NULL; 3630 processor_t fd_processor = PROCESSOR_NULL; 3631 3632 lp_processor = cset->low_pri; 3633 /* Consider hinted processor */ 3634 if (lp_processor != PROCESSOR_NULL && 3635 ((lp_processor->processor_meta == PROCESSOR_META_NULL) || 3636 ((lp_processor == lp_processor->processor_meta->primary) && 3637 !queue_empty(&lp_processor->processor_meta->idle_queue))) && 3638 lp_processor->state != PROCESSOR_INACTIVE && 3639 lp_processor->state != PROCESSOR_SHUTDOWN && 3640 lp_processor->state != PROCESSOR_OFF_LINE && 3641 (lp_processor->current_pri < thread->sched_pri)) 3642 return lp_processor; 3643 3644 processor = (processor_t)queue_first(&cset->active_queue); 3645 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { 3646 /* Discover the processor executing the 3647 * thread with the lowest priority within 3648 * this pset, or the one with the furthest 3649 * deadline 3650 */ 3651 integer_t cpri = processor->current_pri; 3652 if (cpri < lowest_priority) { 3653 lowest_priority = cpri; 3654 lp_processor = processor; 3655 } 3656 3657 if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) { 3658 furthest_deadline = processor->deadline; 3659 fd_processor = processor; 3660 } 3661 3662 3663 if (processor->processor_meta != PROCESSOR_META_NULL && 3664 !queue_empty(&processor->processor_meta->idle_queue)) { 3665 if (cpri < lowest_unpaired) { 3666 lowest_unpaired = cpri; 3667 lp_unpaired = processor; 3668 pmeta = processor->processor_meta; 3669 } 3670 else 3671 if (pmeta == PROCESSOR_META_NULL) 3672 pmeta = processor->processor_meta; 3673 } 3674 processor = (processor_t)queue_next((queue_entry_t)processor); 3675 } 3676 3677 if (thread->sched_pri > lowest_unpaired) 3678 return lp_unpaired; 3679 3680 if (pmeta != PROCESSOR_META_NULL) 3681 return ((processor_t)queue_first(&pmeta->idle_queue)); 3682 if (thread->sched_pri > lowest_priority) 3683 return lp_processor; 3684 if (thread->realtime.deadline < furthest_deadline) 3685 return fd_processor; 3686 3687 processor = PROCESSOR_NULL; 3688 } 3689 else { 3690 /* 3691 * Check any hinted processors in the processor set if available. 3692 */ 3693 if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE && 3694 cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE && 3695 (processor == PROCESSOR_NULL || 3696 (thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) { 3697 processor = cset->low_pri; 3698 } 3699 else 3700 if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE && 3701 cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE && 3702 (processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT && 3703 SCHED(processor_runq_count)(cset->low_count) < SCHED(processor_runq_count)(processor)))) { 3704 processor = cset->low_count; 3705 } 3706 3707 /* 3708 * Otherwise, choose an available processor in the set. 3709 */ 3710 if (processor == PROCESSOR_NULL) { 3711 processor = (processor_t)dequeue_head(&cset->active_queue); 3712 if (processor != PROCESSOR_NULL) 3713 enqueue_tail(&cset->active_queue, (queue_entry_t)processor); 3714 } 3715 3716 if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) { 3717 if (processor->processor_meta != PROCESSOR_META_NULL && 3718 !queue_empty(&processor->processor_meta->idle_queue)) 3719 pmeta = processor->processor_meta; 3720 } 3721 } 3722 3723 /* 3724 * Move onto the next processor set. 3725 */ 3726 nset = next_pset(cset); 3727 3728 if (nset != pset) { 3729 pset_unlock(cset); 3730 3731 cset = nset; 3732 pset_lock(cset); 3733 } 3734 } while (nset != pset); 3735 3736 /* 3737 * Make sure that we pick a running processor, 3738 * and that the correct processor set is locked. 3739 */ 3740 do { 3741 if (pmeta != PROCESSOR_META_NULL) { 3742 if (cset != pmeta->primary->processor_set) { 3743 pset_unlock(cset); 3744 3745 cset = pmeta->primary->processor_set; 3746 pset_lock(cset); 3747 } 3748 3749 if (!queue_empty(&pmeta->idle_queue)) 3750 return ((processor_t)queue_first(&pmeta->idle_queue)); 3751 3752 pmeta = PROCESSOR_META_NULL; 3753 } 3754 3755 /* 3756 * If we haven't been able to choose a processor, 3757 * pick the boot processor and return it. 3758 */ 3759 if (processor == PROCESSOR_NULL) { 3760 processor = master_processor; 3761 3762 /* 3763 * Check that the correct processor set is 3764 * returned locked. 3765 */ 3766 if (cset != processor->processor_set) { 3767 pset_unlock(cset); 3768 3769 cset = processor->processor_set; 3770 pset_lock(cset); 3771 } 3772 3773 return (processor); 3774 } 3775 3776 /* 3777 * Check that the processor set for the chosen 3778 * processor is locked. 3779 */ 3780 if (cset != processor->processor_set) { 3781 pset_unlock(cset); 3782 3783 cset = processor->processor_set; 3784 pset_lock(cset); 3785 } 3786 3787 /* 3788 * We must verify that the chosen processor is still available. 3789 */ 3790 if (processor->state == PROCESSOR_INACTIVE || 3791 processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE) 3792 processor = PROCESSOR_NULL; 3793 } while (processor == PROCESSOR_NULL); 3794 3795 return (processor); 3796} 3797 3798/* 3799 * thread_setrun: 3800 * 3801 * Dispatch thread for execution, onto an idle 3802 * processor or run queue, and signal a preemption 3803 * as appropriate. 3804 * 3805 * Thread must be locked. 3806 */ 3807void 3808thread_setrun( 3809 thread_t thread, 3810 integer_t options) 3811{ 3812 processor_t processor; 3813 processor_set_t pset; 3814 3815#if DEBUG 3816 assert(thread_runnable(thread)); 3817#endif 3818 3819 /* 3820 * Update priority if needed. 3821 */ 3822 if (SCHED(can_update_priority)(thread)) 3823 SCHED(update_priority)(thread); 3824 3825 assert(thread->runq == PROCESSOR_NULL); 3826 3827 if (thread->bound_processor == PROCESSOR_NULL) { 3828 /* 3829 * Unbound case. 3830 */ 3831 if (thread->affinity_set != AFFINITY_SET_NULL) { 3832 /* 3833 * Use affinity set policy hint. 3834 */ 3835 pset = thread->affinity_set->aset_pset; 3836 pset_lock(pset); 3837 3838 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); 3839 3840 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3841 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); 3842 } 3843 else 3844 if (thread->last_processor != PROCESSOR_NULL) { 3845 /* 3846 * Simple (last processor) affinity case. 3847 */ 3848 processor = thread->last_processor; 3849 pset = processor->processor_set; 3850 pset_lock(pset); 3851 processor = SCHED(choose_processor)(pset, processor, thread); 3852 3853 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3854 (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0); 3855 } 3856 else { 3857 /* 3858 * No Affinity case: 3859 * 3860 * Utilitize a per task hint to spread threads 3861 * among the available processor sets. 3862 */ 3863 task_t task = thread->task; 3864 3865 pset = task->pset_hint; 3866 if (pset == PROCESSOR_SET_NULL) 3867 pset = current_processor()->processor_set; 3868 3869 pset = choose_next_pset(pset); 3870 pset_lock(pset); 3871 3872 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); 3873 task->pset_hint = processor->processor_set; 3874 3875 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3876 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); 3877 } 3878 } 3879 else { 3880 /* 3881 * Bound case: 3882 * 3883 * Unconditionally dispatch on the processor. 3884 */ 3885 processor = thread->bound_processor; 3886 pset = processor->processor_set; 3887 pset_lock(pset); 3888 3889 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3890 (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0); 3891 } 3892 3893 /* 3894 * Dispatch the thread on the choosen processor. 3895 */ 3896 if (thread->sched_pri >= BASEPRI_RTQUEUES) 3897 realtime_setrun(processor, thread); 3898 else if (thread->sched_mode == TH_MODE_FAIRSHARE) 3899 fairshare_setrun(processor, thread); 3900 else 3901 processor_setrun(processor, thread, options); 3902} 3903 3904processor_set_t 3905task_choose_pset( 3906 task_t task) 3907{ 3908 processor_set_t pset = task->pset_hint; 3909 3910 if (pset != PROCESSOR_SET_NULL) 3911 pset = choose_next_pset(pset); 3912 3913 return (pset); 3914} 3915 3916#if defined(CONFIG_SCHED_TRADITIONAL) 3917 3918/* 3919 * processor_queue_shutdown: 3920 * 3921 * Shutdown a processor run queue by 3922 * re-dispatching non-bound threads. 3923 * 3924 * Associated pset must be locked, and is 3925 * returned unlocked. 3926 */ 3927void 3928processor_queue_shutdown( 3929 processor_t processor) 3930{ 3931 processor_set_t pset = processor->processor_set; 3932 run_queue_t rq = runq_for_processor(processor); 3933 queue_t queue = rq->queues + rq->highq; 3934 int pri = rq->highq, count = rq->count; 3935 thread_t next, thread; 3936 queue_head_t tqueue; 3937 3938 queue_init(&tqueue); 3939 3940 while (count > 0) { 3941 thread = (thread_t)queue_first(queue); 3942 while (!queue_end(queue, (queue_entry_t)thread)) { 3943 next = (thread_t)queue_next((queue_entry_t)thread); 3944 3945 if (thread->bound_processor == PROCESSOR_NULL) { 3946 remqueue((queue_entry_t)thread); 3947 3948 thread->runq = PROCESSOR_NULL; 3949 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3950 runq_consider_decr_bound_count(processor, thread); 3951 rq->count--; 3952 if (SCHED(priority_is_urgent)(pri)) { 3953 rq->urgency--; assert(rq->urgency >= 0); 3954 } 3955 if (queue_empty(queue)) { 3956 if (pri != IDLEPRI) 3957 clrbit(MAXPRI - pri, rq->bitmap); 3958 rq->highq = MAXPRI - ffsbit(rq->bitmap); 3959 } 3960 3961 enqueue_tail(&tqueue, (queue_entry_t)thread); 3962 } 3963 count--; 3964 3965 thread = next; 3966 } 3967 3968 queue--; pri--; 3969 } 3970 3971 pset_unlock(pset); 3972 3973 while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) { 3974 thread_lock(thread); 3975 3976 thread_setrun(thread, SCHED_TAILQ); 3977 3978 thread_unlock(thread); 3979 } 3980} 3981 3982#endif /* CONFIG_SCHED_TRADITIONAL */ 3983 3984/* 3985 * Check for a preemption point in 3986 * the current context. 3987 * 3988 * Called at splsched. 3989 */ 3990ast_t 3991csw_check( 3992 processor_t processor) 3993{ 3994 processor_set_t pset = processor->processor_set; 3995 ast_t result; 3996 3997 pset_lock(pset); 3998 3999 /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */ 4000 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); 4001 4002 result = csw_check_locked(processor, pset); 4003 4004 pset_unlock(pset); 4005 4006 return result; 4007} 4008 4009/* 4010 * Check for preemption at splsched with 4011 * pset locked 4012 */ 4013ast_t 4014csw_check_locked( 4015 processor_t processor, 4016 processor_set_t pset __unused) 4017{ 4018 ast_t result = AST_NONE; 4019 thread_t thread = processor->active_thread; 4020 4021 if (first_timeslice(processor)) { 4022 if (rt_runq.count > 0) 4023 return (AST_PREEMPT | AST_URGENT); 4024 } 4025 else { 4026 if (rt_runq.count > 0) { 4027 if (BASEPRI_RTQUEUES > processor->current_pri) 4028 return (AST_PREEMPT | AST_URGENT); 4029 else 4030 return (AST_PREEMPT); 4031 } 4032 } 4033 4034 result = SCHED(processor_csw_check)(processor); 4035 if (result != AST_NONE) 4036 return (result); 4037 4038 if (SCHED(should_current_thread_rechoose_processor)(processor)) 4039 return (AST_PREEMPT); 4040 4041 if (machine_processor_is_inactive(processor)) 4042 return (AST_PREEMPT); 4043 4044 if (thread->state & TH_SUSP) 4045 return (AST_PREEMPT); 4046 4047 return (AST_NONE); 4048} 4049 4050/* 4051 * set_sched_pri: 4052 * 4053 * Set the scheduled priority of the specified thread. 4054 * 4055 * This may cause the thread to change queues. 4056 * 4057 * Thread must be locked. 4058 */ 4059void 4060set_sched_pri( 4061 thread_t thread, 4062 int priority) 4063{ 4064 boolean_t removed = thread_run_queue_remove(thread); 4065 4066 thread->sched_pri = priority; 4067 if (removed) 4068 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 4069 else 4070 if (thread->state & TH_RUN) { 4071 processor_t processor = thread->last_processor; 4072 4073 if (thread == current_thread()) { 4074 ast_t preempt; 4075 4076 processor->current_pri = priority; 4077 processor->current_thmode = thread->sched_mode; 4078 if ((preempt = csw_check(processor)) != AST_NONE) 4079 ast_on(preempt); 4080 } 4081 else 4082 if ( processor != PROCESSOR_NULL && 4083 processor->active_thread == thread ) 4084 cause_ast_check(processor); 4085 } 4086} 4087 4088#if 0 4089 4090static void 4091run_queue_check( 4092 run_queue_t rq, 4093 thread_t thread) 4094{ 4095 queue_t q; 4096 queue_entry_t qe; 4097 4098 if (rq != thread->runq) 4099 panic("run_queue_check: thread runq"); 4100 4101 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI) 4102 panic("run_queue_check: thread sched_pri"); 4103 4104 q = &rq->queues[thread->sched_pri]; 4105 qe = queue_first(q); 4106 while (!queue_end(q, qe)) { 4107 if (qe == (queue_entry_t)thread) 4108 return; 4109 4110 qe = queue_next(qe); 4111 } 4112 4113 panic("run_queue_check: end"); 4114} 4115 4116#endif /* DEBUG */ 4117 4118#if defined(CONFIG_SCHED_TRADITIONAL) 4119 4120/* locks the runqueue itself */ 4121 4122static boolean_t 4123processor_queue_remove( 4124 processor_t processor, 4125 thread_t thread) 4126{ 4127 void * rqlock; 4128 run_queue_t rq; 4129 4130 rqlock = &processor->processor_set->sched_lock; 4131 rq = runq_for_processor(processor); 4132 4133 simple_lock(rqlock); 4134 if (processor == thread->runq) { 4135 /* 4136 * Thread is on a run queue and we have a lock on 4137 * that run queue. 4138 */ 4139 runq_consider_decr_bound_count(processor, thread); 4140 run_queue_remove(rq, thread); 4141 } 4142 else { 4143 /* 4144 * The thread left the run queue before we could 4145 * lock the run queue. 4146 */ 4147 assert(thread->runq == PROCESSOR_NULL); 4148 processor = PROCESSOR_NULL; 4149 } 4150 4151 simple_unlock(rqlock); 4152 4153 return (processor != PROCESSOR_NULL); 4154} 4155 4156#endif /* CONFIG_SCHED_TRADITIONAL */ 4157 4158/* 4159 * thread_run_queue_remove: 4160 * 4161 * Remove a thread from a current run queue and 4162 * return TRUE if successful. 4163 * 4164 * Thread must be locked. 4165 */ 4166boolean_t 4167thread_run_queue_remove( 4168 thread_t thread) 4169{ 4170 processor_t processor = thread->runq; 4171 4172 /* 4173 * If processor is PROCESSOR_NULL, the thread will stay out of the 4174 * run queues because the caller locked the thread. Otherwise 4175 * the thread is on a run queue, but could be chosen for dispatch 4176 * and removed. 4177 */ 4178 if (processor != PROCESSOR_NULL) { 4179 queue_t q; 4180 4181 /* 4182 * The processor run queues are locked by the 4183 * processor set. Real-time priorities use a 4184 * global queue with a dedicated lock. 4185 */ 4186 if (thread->sched_mode == TH_MODE_FAIRSHARE) { 4187 return SCHED(fairshare_queue_remove)(thread); 4188 } 4189 4190 if (thread->sched_pri < BASEPRI_RTQUEUES) { 4191 return SCHED(processor_queue_remove)(processor, thread); 4192 } 4193 4194 simple_lock(&rt_lock); 4195 q = &rt_runq.queue; 4196 4197 if (processor == thread->runq) { 4198 /* 4199 * Thread is on a run queue and we have a lock on 4200 * that run queue. 4201 */ 4202 remqueue((queue_entry_t)thread); 4203 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 4204 rt_runq.count--; 4205 4206 thread->runq = PROCESSOR_NULL; 4207 } 4208 else { 4209 /* 4210 * The thread left the run queue before we could 4211 * lock the run queue. 4212 */ 4213 assert(thread->runq == PROCESSOR_NULL); 4214 processor = PROCESSOR_NULL; 4215 } 4216 4217 simple_unlock(&rt_lock); 4218 } 4219 4220 return (processor != PROCESSOR_NULL); 4221} 4222 4223#if defined(CONFIG_SCHED_TRADITIONAL) 4224 4225/* 4226 * steal_processor_thread: 4227 * 4228 * Locate a thread to steal from the processor and 4229 * return it. 4230 * 4231 * Associated pset must be locked. Returns THREAD_NULL 4232 * on failure. 4233 */ 4234static thread_t 4235steal_processor_thread( 4236 processor_t processor) 4237{ 4238 run_queue_t rq = runq_for_processor(processor); 4239 queue_t queue = rq->queues + rq->highq; 4240 int pri = rq->highq, count = rq->count; 4241 thread_t thread; 4242 4243 while (count > 0) { 4244 thread = (thread_t)queue_first(queue); 4245 while (!queue_end(queue, (queue_entry_t)thread)) { 4246 if (thread->bound_processor == PROCESSOR_NULL) { 4247 remqueue((queue_entry_t)thread); 4248 4249 thread->runq = PROCESSOR_NULL; 4250 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 4251 runq_consider_decr_bound_count(processor, thread); 4252 rq->count--; 4253 if (SCHED(priority_is_urgent)(pri)) { 4254 rq->urgency--; assert(rq->urgency >= 0); 4255 } 4256 if (queue_empty(queue)) { 4257 if (pri != IDLEPRI) 4258 clrbit(MAXPRI - pri, rq->bitmap); 4259 rq->highq = MAXPRI - ffsbit(rq->bitmap); 4260 } 4261 4262 return (thread); 4263 } 4264 count--; 4265 4266 thread = (thread_t)queue_next((queue_entry_t)thread); 4267 } 4268 4269 queue--; pri--; 4270 } 4271 4272 return (THREAD_NULL); 4273} 4274 4275/* 4276 * Locate and steal a thread, beginning 4277 * at the pset. 4278 * 4279 * The pset must be locked, and is returned 4280 * unlocked. 4281 * 4282 * Returns the stolen thread, or THREAD_NULL on 4283 * failure. 4284 */ 4285static thread_t 4286steal_thread( 4287 processor_set_t pset) 4288{ 4289 processor_set_t nset, cset = pset; 4290 processor_t processor; 4291 thread_t thread; 4292 4293 do { 4294 processor = (processor_t)queue_first(&cset->active_queue); 4295 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { 4296 if (runq_for_processor(processor)->count > 0) { 4297 thread = steal_processor_thread(processor); 4298 if (thread != THREAD_NULL) { 4299 remqueue((queue_entry_t)processor); 4300 enqueue_tail(&cset->active_queue, (queue_entry_t)processor); 4301 4302 pset_unlock(cset); 4303 4304 return (thread); 4305 } 4306 } 4307 4308 processor = (processor_t)queue_next((queue_entry_t)processor); 4309 } 4310 4311 nset = next_pset(cset); 4312 4313 if (nset != pset) { 4314 pset_unlock(cset); 4315 4316 cset = nset; 4317 pset_lock(cset); 4318 } 4319 } while (nset != pset); 4320 4321 pset_unlock(cset); 4322 4323 return (THREAD_NULL); 4324} 4325 4326static thread_t steal_thread_disabled( 4327 processor_set_t pset) 4328{ 4329 pset_unlock(pset); 4330 4331 return (THREAD_NULL); 4332} 4333 4334#endif /* CONFIG_SCHED_TRADITIONAL */ 4335 4336 4337void 4338sys_override_cpu_throttle(int flag) 4339{ 4340 if (flag == CPU_THROTTLE_ENABLE) 4341 cpu_throttle_enabled = 1; 4342 if (flag == CPU_THROTTLE_DISABLE) 4343 cpu_throttle_enabled = 0; 4344} 4345 4346int 4347thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2) 4348{ 4349 if (thread == NULL || (thread->state & TH_IDLE)) { 4350 *arg1 = 0; 4351 *arg2 = 0; 4352 4353 return (THREAD_URGENCY_NONE); 4354 } else if (thread->sched_mode == TH_MODE_REALTIME) { 4355 *arg1 = thread->realtime.period; 4356 *arg2 = thread->realtime.deadline; 4357 4358 return (THREAD_URGENCY_REAL_TIME); 4359 } else if (cpu_throttle_enabled && 4360 ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE))) { 4361 /* 4362 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted 4363 */ 4364 *arg1 = thread->sched_pri; 4365 *arg2 = thread->priority; 4366 4367 return (THREAD_URGENCY_BACKGROUND); 4368 } else { 4369 *arg1 = thread->sched_pri; 4370 *arg2 = thread->priority; 4371 4372 return (THREAD_URGENCY_NORMAL); 4373 } 4374} 4375 4376 4377/* 4378 * This is the processor idle loop, which just looks for other threads 4379 * to execute. Processor idle threads invoke this without supplying a 4380 * current thread to idle without an asserted wait state. 4381 * 4382 * Returns a the next thread to execute if dispatched directly. 4383 */ 4384 4385#if 0 4386#define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__) 4387#else 4388#define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0) 4389#endif 4390 4391thread_t 4392processor_idle( 4393 thread_t thread, 4394 processor_t processor) 4395{ 4396 processor_set_t pset = processor->processor_set; 4397 thread_t new_thread; 4398 int state; 4399 (void)splsched(); 4400 4401 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4402 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, 4403 (uintptr_t)thread_tid(thread), 0, 0, 0, 0); 4404 4405 SCHED_STATS_CPU_IDLE_START(processor); 4406 4407 timer_switch(&PROCESSOR_DATA(processor, system_state), 4408 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state)); 4409 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state); 4410 4411 while (1) { 4412 4413 if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */ 4414 break; 4415 if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id)) 4416 break; 4417 if (!SCHED(processor_queue_empty)(processor)) 4418 break; 4419 if (rt_runq.count) 4420 break; 4421#if CONFIG_SCHED_IDLE_IN_PLACE 4422 if (thread != THREAD_NULL) { 4423 /* Did idle-in-place thread wake up */ 4424 if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active) 4425 break; 4426 } 4427#endif 4428 4429 IDLE_KERNEL_DEBUG_CONSTANT( 4430 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0); 4431 4432 machine_track_platform_idle(TRUE); 4433 4434 machine_idle(); 4435 4436 machine_track_platform_idle(FALSE); 4437 4438 (void)splsched(); 4439 4440 IDLE_KERNEL_DEBUG_CONSTANT( 4441 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0); 4442 4443 if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor)) 4444 break; 4445 } 4446 4447 timer_switch(&PROCESSOR_DATA(processor, idle_state), 4448 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state)); 4449 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state); 4450 4451 pset_lock(pset); 4452 4453 /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */ 4454 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); 4455 4456 state = processor->state; 4457 if (state == PROCESSOR_DISPATCHING) { 4458 /* 4459 * Commmon case -- cpu dispatched. 4460 */ 4461 new_thread = processor->next_thread; 4462 processor->next_thread = THREAD_NULL; 4463 processor->state = PROCESSOR_RUNNING; 4464 4465 if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) || 4466 (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)) ) { 4467 processor->current_pri = IDLEPRI; 4468 processor->current_thmode = TH_MODE_FIXED; 4469 processor->deadline = UINT64_MAX; 4470 4471 pset_unlock(pset); 4472 4473 thread_lock(new_thread); 4474 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0); 4475 thread_setrun(new_thread, SCHED_HEADQ); 4476 thread_unlock(new_thread); 4477 4478 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4479 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4480 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4481 4482 return (THREAD_NULL); 4483 } 4484 4485 pset_unlock(pset); 4486 4487 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4488 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4489 (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0); 4490 4491 return (new_thread); 4492 } 4493 else 4494 if (state == PROCESSOR_IDLE) { 4495 remqueue((queue_entry_t)processor); 4496 4497 processor->state = PROCESSOR_RUNNING; 4498 processor->current_pri = IDLEPRI; 4499 processor->current_thmode = TH_MODE_FIXED; 4500 processor->deadline = UINT64_MAX; 4501 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 4502 } 4503 else 4504 if (state == PROCESSOR_INACTIVE) { 4505 processor->state = PROCESSOR_RUNNING; 4506 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 4507 } 4508 else 4509 if (state == PROCESSOR_SHUTDOWN) { 4510 /* 4511 * Going off-line. Force a 4512 * reschedule. 4513 */ 4514 if ((new_thread = processor->next_thread) != THREAD_NULL) { 4515 processor->next_thread = THREAD_NULL; 4516 processor->current_pri = IDLEPRI; 4517 processor->current_thmode = TH_MODE_FIXED; 4518 processor->deadline = UINT64_MAX; 4519 4520 pset_unlock(pset); 4521 4522 thread_lock(new_thread); 4523 thread_setrun(new_thread, SCHED_HEADQ); 4524 thread_unlock(new_thread); 4525 4526 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4527 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4528 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4529 4530 return (THREAD_NULL); 4531 } 4532 } 4533 4534 pset_unlock(pset); 4535 4536 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4537 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4538 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4539 4540 return (THREAD_NULL); 4541} 4542 4543/* 4544 * Each processor has a dedicated thread which 4545 * executes the idle loop when there is no suitable 4546 * previous context. 4547 */ 4548void 4549idle_thread(void) 4550{ 4551 processor_t processor = current_processor(); 4552 thread_t new_thread; 4553 4554 new_thread = processor_idle(THREAD_NULL, processor); 4555 if (new_thread != THREAD_NULL) { 4556 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); 4557 /*NOTREACHED*/ 4558 } 4559 4560 thread_block((thread_continue_t)idle_thread); 4561 /*NOTREACHED*/ 4562} 4563 4564kern_return_t 4565idle_thread_create( 4566 processor_t processor) 4567{ 4568 kern_return_t result; 4569 thread_t thread; 4570 spl_t s; 4571 4572 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread); 4573 if (result != KERN_SUCCESS) 4574 return (result); 4575 4576 s = splsched(); 4577 thread_lock(thread); 4578 thread->bound_processor = processor; 4579 processor->idle_thread = thread; 4580 thread->sched_pri = thread->priority = IDLEPRI; 4581 thread->state = (TH_RUN | TH_IDLE); 4582 thread->options |= TH_OPT_IDLE_THREAD; 4583 thread_unlock(thread); 4584 splx(s); 4585 4586 thread_deallocate(thread); 4587 4588 return (KERN_SUCCESS); 4589} 4590 4591/* 4592 * sched_startup: 4593 * 4594 * Kicks off scheduler services. 4595 * 4596 * Called at splsched. 4597 */ 4598void 4599sched_startup(void) 4600{ 4601 kern_return_t result; 4602 thread_t thread; 4603 4604 result = kernel_thread_start_priority((thread_continue_t)sched_init_thread, 4605 (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread); 4606 if (result != KERN_SUCCESS) 4607 panic("sched_startup"); 4608 4609 thread_deallocate(thread); 4610 4611 /* 4612 * Yield to the sched_init_thread once, to 4613 * initialize our own thread after being switched 4614 * back to. 4615 * 4616 * The current thread is the only other thread 4617 * active at this point. 4618 */ 4619 thread_block(THREAD_CONTINUE_NULL); 4620} 4621 4622#if defined(CONFIG_SCHED_TRADITIONAL) 4623 4624static volatile uint64_t sched_maintenance_deadline; 4625static uint64_t sched_tick_last_abstime; 4626static uint64_t sched_tick_delta; 4627uint64_t sched_tick_max_delta; 4628/* 4629 * sched_init_thread: 4630 * 4631 * Perform periodic bookkeeping functions about ten 4632 * times per second. 4633 */ 4634static void 4635sched_traditional_maintenance_continue(void) 4636{ 4637 uint64_t sched_tick_ctime; 4638 sched_tick_ctime = mach_absolute_time(); 4639 4640 if (__improbable(sched_tick_last_abstime == 0)) { 4641 sched_tick_last_abstime = sched_tick_ctime; 4642 sched_tick_delta = 1; 4643 } else { 4644 sched_tick_delta = ((sched_tick_ctime) - sched_tick_last_abstime) / sched_tick_interval; 4645 /* Ensure a delta of 1, since the interval could be slightly 4646 * smaller than the sched_tick_interval due to dispatch 4647 * latencies. 4648 */ 4649 sched_tick_delta = MAX(sched_tick_delta, 1); 4650 4651 /* In the event interrupt latencies or platform 4652 * idle events that advanced the timebase resulted 4653 * in periods where no threads were dispatched, 4654 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA 4655 * iterations. 4656 */ 4657 sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA); 4658 4659 sched_tick_last_abstime = sched_tick_ctime; 4660 sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta); 4661 } 4662 4663 /* Add a number of pseudo-ticks corresponding to the elapsed interval 4664 * This could be greater than 1 if substantial intervals where 4665 * all processors are idle occur, which rarely occurs in practice. 4666 */ 4667 4668 sched_tick += sched_tick_delta; 4669 4670 /* 4671 * Compute various averages. 4672 */ 4673 compute_averages(sched_tick_delta); 4674 4675 /* 4676 * Scan the run queues for threads which 4677 * may need to be updated. 4678 */ 4679 thread_update_scan(); 4680 4681 assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT); 4682 thread_block((thread_continue_t)sched_traditional_maintenance_continue); 4683 /*NOTREACHED*/ 4684} 4685 4686static uint64_t sched_maintenance_wakeups; 4687 4688/* 4689 * Determine if the set of routines formerly driven by a maintenance timer 4690 * must be invoked, based on a deadline comparison. Signals the scheduler 4691 * maintenance thread on deadline expiration. Must be invoked at an interval 4692 * lower than the "sched_tick_interval", currently accomplished by 4693 * invocation via the quantum expiration timer and at context switch time. 4694 * Performance matters: this routine reuses a timestamp approximating the 4695 * current absolute time received from the caller, and should perform 4696 * no more than a comparison against the deadline in the common case. 4697 */ 4698void 4699sched_traditional_consider_maintenance(uint64_t ctime) { 4700 uint64_t ndeadline, deadline = sched_maintenance_deadline; 4701 4702 if (__improbable(ctime >= deadline)) { 4703 if (__improbable(current_thread() == sched_maintenance_thread)) 4704 return; 4705 OSMemoryBarrier(); 4706 4707 ndeadline = ctime + sched_tick_interval; 4708 4709 if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) { 4710 thread_wakeup((event_t)sched_traditional_maintenance_continue); 4711 sched_maintenance_wakeups++; 4712 } 4713 } 4714} 4715 4716#endif /* CONFIG_SCHED_TRADITIONAL */ 4717 4718void 4719sched_init_thread(void (*continuation)(void)) 4720{ 4721 thread_block(THREAD_CONTINUE_NULL); 4722 4723 sched_maintenance_thread = current_thread(); 4724 continuation(); 4725 4726 /*NOTREACHED*/ 4727} 4728 4729#if defined(CONFIG_SCHED_TRADITIONAL) 4730 4731/* 4732 * thread_update_scan / runq_scan: 4733 * 4734 * Scan the run queues to account for timesharing threads 4735 * which need to be updated. 4736 * 4737 * Scanner runs in two passes. Pass one squirrels likely 4738 * threads away in an array, pass two does the update. 4739 * 4740 * This is necessary because the run queue is locked for 4741 * the candidate scan, but the thread is locked for the update. 4742 * 4743 * Array should be sized to make forward progress, without 4744 * disabling preemption for long periods. 4745 */ 4746 4747#define THREAD_UPDATE_SIZE 128 4748 4749static thread_t thread_update_array[THREAD_UPDATE_SIZE]; 4750static int thread_update_count = 0; 4751 4752/* 4753 * Scan a runq for candidate threads. 4754 * 4755 * Returns TRUE if retry is needed. 4756 */ 4757static boolean_t 4758runq_scan( 4759 run_queue_t runq) 4760{ 4761 register int count; 4762 register queue_t q; 4763 register thread_t thread; 4764 4765 if ((count = runq->count) > 0) { 4766 q = runq->queues + runq->highq; 4767 while (count > 0) { 4768 queue_iterate(q, thread, thread_t, links) { 4769 if ( thread->sched_stamp != sched_tick && 4770 (thread->sched_mode == TH_MODE_TIMESHARE) ) { 4771 if (thread_update_count == THREAD_UPDATE_SIZE) 4772 return (TRUE); 4773 4774 thread_update_array[thread_update_count++] = thread; 4775 thread_reference_internal(thread); 4776 } 4777 4778 count--; 4779 } 4780 4781 q--; 4782 } 4783 } 4784 4785 return (FALSE); 4786} 4787 4788static void 4789thread_update_scan(void) 4790{ 4791 boolean_t restart_needed = FALSE; 4792 processor_t processor = processor_list; 4793 processor_set_t pset; 4794 thread_t thread; 4795 spl_t s; 4796 4797 do { 4798 do { 4799 pset = processor->processor_set; 4800 4801 s = splsched(); 4802 pset_lock(pset); 4803 4804 restart_needed = runq_scan(runq_for_processor(processor)); 4805 4806 pset_unlock(pset); 4807 splx(s); 4808 4809 if (restart_needed) 4810 break; 4811 4812 thread = processor->idle_thread; 4813 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { 4814 if (thread_update_count == THREAD_UPDATE_SIZE) { 4815 restart_needed = TRUE; 4816 break; 4817 } 4818 4819 thread_update_array[thread_update_count++] = thread; 4820 thread_reference_internal(thread); 4821 } 4822 } while ((processor = processor->processor_list) != NULL); 4823 4824 /* 4825 * Ok, we now have a collection of candidates -- fix them. 4826 */ 4827 while (thread_update_count > 0) { 4828 thread = thread_update_array[--thread_update_count]; 4829 thread_update_array[thread_update_count] = THREAD_NULL; 4830 4831 s = splsched(); 4832 thread_lock(thread); 4833 if ( !(thread->state & (TH_WAIT)) ) { 4834 if (SCHED(can_update_priority)(thread)) 4835 SCHED(update_priority)(thread); 4836 } 4837 thread_unlock(thread); 4838 splx(s); 4839 4840 thread_deallocate(thread); 4841 } 4842 } while (restart_needed); 4843} 4844 4845#endif /* CONFIG_SCHED_TRADITIONAL */ 4846 4847boolean_t 4848thread_eager_preemption(thread_t thread) 4849{ 4850 return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0); 4851} 4852 4853void 4854thread_set_eager_preempt(thread_t thread) 4855{ 4856 spl_t x; 4857 processor_t p; 4858 ast_t ast = AST_NONE; 4859 4860 x = splsched(); 4861 p = current_processor(); 4862 4863 thread_lock(thread); 4864 thread->sched_flags |= TH_SFLAG_EAGERPREEMPT; 4865 4866 if (thread == current_thread()) { 4867 thread_unlock(thread); 4868 4869 ast = csw_check(p); 4870 if (ast != AST_NONE) { 4871 (void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast); 4872 } 4873 } else { 4874 p = thread->last_processor; 4875 4876 if (p != PROCESSOR_NULL && p->state == PROCESSOR_RUNNING && 4877 p->active_thread == thread) { 4878 cause_ast_check(p); 4879 } 4880 4881 thread_unlock(thread); 4882 } 4883 4884 splx(x); 4885} 4886 4887void 4888thread_clear_eager_preempt(thread_t thread) 4889{ 4890 spl_t x; 4891 4892 x = splsched(); 4893 thread_lock(thread); 4894 4895 thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT; 4896 4897 thread_unlock(thread); 4898 splx(x); 4899} 4900/* 4901 * Scheduling statistics 4902 */ 4903void 4904sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri) 4905{ 4906 struct processor_sched_statistics *stats; 4907 boolean_t to_realtime = FALSE; 4908 4909 stats = &processor->processor_data.sched_stats; 4910 stats->csw_count++; 4911 4912 if (otherpri >= BASEPRI_REALTIME) { 4913 stats->rt_sched_count++; 4914 to_realtime = TRUE; 4915 } 4916 4917 if ((reasons & AST_PREEMPT) != 0) { 4918 stats->preempt_count++; 4919 4920 if (selfpri >= BASEPRI_REALTIME) { 4921 stats->preempted_rt_count++; 4922 } 4923 4924 if (to_realtime) { 4925 stats->preempted_by_rt_count++; 4926 } 4927 4928 } 4929} 4930 4931void 4932sched_stats_handle_runq_change(struct runq_stats *stats, int old_count) 4933{ 4934 uint64_t timestamp = mach_absolute_time(); 4935 4936 stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count; 4937 stats->last_change_timestamp = timestamp; 4938} 4939 4940/* 4941 * For calls from assembly code 4942 */ 4943#undef thread_wakeup 4944void 4945thread_wakeup( 4946 event_t x); 4947 4948void 4949thread_wakeup( 4950 event_t x) 4951{ 4952 thread_wakeup_with_result(x, THREAD_AWAKENED); 4953} 4954 4955boolean_t 4956preemption_enabled(void) 4957{ 4958 return (get_preemption_level() == 0 && ml_get_interrupts_enabled()); 4959} 4960 4961#if DEBUG 4962static boolean_t 4963thread_runnable( 4964 thread_t thread) 4965{ 4966 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN); 4967} 4968#endif /* DEBUG */ 4969 4970static void 4971sched_timer_deadline_tracking_init(void) { 4972 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1); 4973 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2); 4974} 4975