1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_FREE_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: sched_prim.c 60 * Author: Avadis Tevanian, Jr. 61 * Date: 1986 62 * 63 * Scheduling primitives 64 * 65 */ 66 67#include <debug.h> 68 69#include <mach/mach_types.h> 70#include <mach/machine.h> 71#include <mach/policy.h> 72#include <mach/sync_policy.h> 73#include <mach/thread_act.h> 74 75#include <machine/machine_routines.h> 76#include <machine/sched_param.h> 77#include <machine/machine_cpu.h> 78#include <machine/machlimits.h> 79 80#ifdef CONFIG_MACH_APPROXIMATE_TIME 81#include <machine/commpage.h> 82#endif 83 84#include <kern/kern_types.h> 85#include <kern/clock.h> 86#include <kern/counters.h> 87#include <kern/cpu_number.h> 88#include <kern/cpu_data.h> 89#include <kern/debug.h> 90#include <kern/macro_help.h> 91#include <kern/machine.h> 92#include <kern/misc_protos.h> 93#include <kern/processor.h> 94#include <kern/queue.h> 95#include <kern/sched.h> 96#include <kern/sched_prim.h> 97#include <kern/sfi.h> 98#include <kern/syscall_subr.h> 99#include <kern/task.h> 100#include <kern/thread.h> 101#include <kern/wait_queue.h> 102#include <kern/ledger.h> 103#include <kern/timer_queue.h> 104 105#include <vm/pmap.h> 106#include <vm/vm_kern.h> 107#include <vm/vm_map.h> 108 109#include <mach/sdt.h> 110 111#include <sys/kdebug.h> 112 113#include <kern/pms.h> 114 115#if defined(CONFIG_TELEMETRY) && defined(CONFIG_SCHED_TIMESHARE_CORE) 116#include <kern/telemetry.h> 117#endif 118 119struct rt_queue rt_runq; 120#define RT_RUNQ ((processor_t)-1) 121decl_simple_lock_data(static,rt_lock); 122 123#if defined(CONFIG_SCHED_FAIRSHARE_CORE) 124static struct fairshare_queue fs_runq; 125#define FS_RUNQ ((processor_t)-2) 126decl_simple_lock_data(static,fs_lock); 127#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ 128 129#define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ 130int default_preemption_rate = DEFAULT_PREEMPTION_RATE; 131 132#define DEFAULT_BG_PREEMPTION_RATE 400 /* (1/s) */ 133int default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE; 134 135#define MAX_UNSAFE_QUANTA 800 136int max_unsafe_quanta = MAX_UNSAFE_QUANTA; 137 138#define MAX_POLL_QUANTA 2 139int max_poll_quanta = MAX_POLL_QUANTA; 140 141#define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */ 142int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT; 143 144uint64_t max_poll_computation; 145 146uint64_t max_unsafe_computation; 147uint64_t sched_safe_duration; 148 149#if defined(CONFIG_SCHED_TIMESHARE_CORE) 150 151uint32_t std_quantum; 152uint32_t min_std_quantum; 153uint32_t bg_quantum; 154 155uint32_t std_quantum_us; 156uint32_t bg_quantum_us; 157 158#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 159 160uint32_t thread_depress_time; 161uint32_t default_timeshare_computation; 162uint32_t default_timeshare_constraint; 163 164uint32_t max_rt_quantum; 165uint32_t min_rt_quantum; 166 167#if defined(CONFIG_SCHED_TIMESHARE_CORE) 168 169unsigned sched_tick; 170uint32_t sched_tick_interval; 171#if defined(CONFIG_TELEMETRY) 172uint32_t sched_telemetry_interval; 173#endif /* CONFIG_TELEMETRY */ 174 175uint32_t sched_pri_shift = INT8_MAX; 176uint32_t sched_background_pri_shift = INT8_MAX; 177uint32_t sched_combined_fgbg_pri_shift = INT8_MAX; 178uint32_t sched_fixed_shift; 179uint32_t sched_use_combined_fgbg_decay = 0; 180 181uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */ 182 183/* Allow foreground to decay past default to resolve inversions */ 184#define DEFAULT_DECAY_BAND_LIMIT ((BASEPRI_FOREGROUND - BASEPRI_DEFAULT) + 2) 185int sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT; 186 187/* Defaults for timer deadline profiling */ 188#define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <= 189 * 2ms */ 190#define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines 191 <= 5ms */ 192 193uint64_t timer_deadline_tracking_bin_1; 194uint64_t timer_deadline_tracking_bin_2; 195 196thread_t sched_maintenance_thread; 197 198#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 199 200#if defined(CONFIG_SCHED_TRADITIONAL) 201 202static boolean_t sched_traditional_use_pset_runqueue = FALSE; 203 204__attribute__((always_inline)) 205static inline run_queue_t runq_for_processor(processor_t processor) 206{ 207 if (sched_traditional_use_pset_runqueue) 208 return &processor->processor_set->pset_runq; 209 else 210 return &processor->runq; 211} 212 213__attribute__((always_inline)) 214static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread) 215{ 216 if (thread->bound_processor == PROCESSOR_NULL) 217 return; 218 219 assert(thread->bound_processor == processor); 220 221 if (sched_traditional_use_pset_runqueue) 222 processor->processor_set->pset_runq_bound_count++; 223 224 processor->runq_bound_count++; 225} 226 227__attribute__((always_inline)) 228static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread) 229{ 230 if (thread->bound_processor == PROCESSOR_NULL) 231 return; 232 233 assert(thread->bound_processor == processor); 234 235 if (sched_traditional_use_pset_runqueue) 236 processor->processor_set->pset_runq_bound_count--; 237 238 processor->runq_bound_count--; 239} 240 241#endif /* CONFIG_SCHED_TRADITIONAL */ 242 243uint64_t sched_one_second_interval; 244 245uint32_t sched_run_count, sched_share_count, sched_background_count; 246uint32_t sched_load_average, sched_mach_factor; 247 248/* Forwards */ 249 250#if defined(CONFIG_SCHED_TIMESHARE_CORE) 251 252static void load_shift_init(void); 253static void preempt_pri_init(void); 254 255#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 256 257static thread_t thread_select( 258 thread_t thread, 259 processor_t processor, 260 ast_t reason); 261 262#if CONFIG_SCHED_IDLE_IN_PLACE 263static thread_t thread_select_idle( 264 thread_t thread, 265 processor_t processor); 266#endif 267 268thread_t processor_idle( 269 thread_t thread, 270 processor_t processor); 271 272ast_t 273csw_check_locked( processor_t processor, 274 processor_set_t pset, 275 ast_t check_reason); 276 277#if defined(CONFIG_SCHED_TRADITIONAL) 278 279static thread_t steal_thread( 280 processor_set_t pset); 281 282static thread_t steal_thread_disabled( 283 processor_set_t pset) __attribute__((unused)); 284 285 286static thread_t steal_processor_thread( 287 processor_t processor); 288 289static void thread_update_scan(void); 290 291static void processor_setrun( 292 processor_t processor, 293 thread_t thread, 294 integer_t options); 295 296static boolean_t 297processor_enqueue( 298 processor_t processor, 299 thread_t thread, 300 integer_t options); 301 302static boolean_t 303processor_queue_remove( 304 processor_t processor, 305 thread_t thread); 306 307static boolean_t processor_queue_empty(processor_t processor); 308 309static ast_t processor_csw_check(processor_t processor); 310 311static boolean_t processor_queue_has_priority(processor_t processor, 312 int priority, 313 boolean_t gte); 314 315static boolean_t should_current_thread_rechoose_processor(processor_t processor); 316 317static int sched_traditional_processor_runq_count(processor_t processor); 318 319static boolean_t sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor); 320 321static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor); 322 323static uint64_t sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor); 324 325static int sched_traditional_processor_bound_count(processor_t processor); 326 327#endif 328 329 330#if defined(CONFIG_SCHED_TRADITIONAL) 331 332static void 333sched_traditional_processor_init(processor_t processor); 334 335static void 336sched_traditional_pset_init(processor_set_t pset); 337 338static void 339sched_traditional_with_pset_runqueue_init(void); 340 341#endif 342 343static void 344sched_realtime_init(void); 345 346static void 347sched_realtime_timebase_init(void); 348 349static void 350sched_timer_deadline_tracking_init(void); 351 352#if defined(CONFIG_SCHED_TRADITIONAL) 353 354static sched_mode_t 355sched_traditional_initial_thread_sched_mode(task_t parent_task); 356 357static thread_t 358sched_traditional_choose_thread( 359 processor_t processor, 360 int priority, 361 __unused ast_t reason); 362 363#endif 364 365#if DEBUG 366extern int debug_task; 367#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args) 368#else 369#define TLOG(a, fmt, args...) do {} while (0) 370#endif 371 372__assert_only static 373boolean_t thread_runnable( 374 thread_t thread); 375 376/* 377 * State machine 378 * 379 * states are combinations of: 380 * R running 381 * W waiting (or on wait queue) 382 * N non-interruptible 383 * O swapped out 384 * I being swapped in 385 * 386 * init action 387 * assert_wait thread_block clear_wait swapout swapin 388 * 389 * R RW, RWN R; setrun - - 390 * RN RWN RN; setrun - - 391 * 392 * RW W R - 393 * RWN WN RN - 394 * 395 * W R; setrun WO 396 * WN RN; setrun - 397 * 398 * RO - - R 399 * 400 */ 401 402#if defined(CONFIG_SCHED_TIMESHARE_CORE) 403int8_t sched_load_shifts[NRQS]; 404int sched_preempt_pri[NRQBM]; 405#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 406 407 408#if defined(CONFIG_SCHED_TRADITIONAL) 409 410const struct sched_dispatch_table sched_traditional_dispatch = { 411 .init = sched_traditional_init, 412 .timebase_init = sched_traditional_timebase_init, 413 .processor_init = sched_traditional_processor_init, 414 .pset_init = sched_traditional_pset_init, 415 .maintenance_continuation = sched_traditional_maintenance_continue, 416 .choose_thread = sched_traditional_choose_thread, 417 .steal_thread = steal_thread, 418 .compute_priority = compute_priority, 419 .choose_processor = choose_processor, 420 .processor_enqueue = processor_enqueue, 421 .processor_queue_shutdown = processor_queue_shutdown, 422 .processor_queue_remove = processor_queue_remove, 423 .processor_queue_empty = processor_queue_empty, 424 .priority_is_urgent = priority_is_urgent, 425 .processor_csw_check = processor_csw_check, 426 .processor_queue_has_priority = processor_queue_has_priority, 427 .initial_quantum_size = sched_traditional_initial_quantum_size, 428 .initial_thread_sched_mode = sched_traditional_initial_thread_sched_mode, 429 .can_update_priority = can_update_priority, 430 .update_priority = update_priority, 431 .lightweight_update_priority = lightweight_update_priority, 432 .quantum_expire = sched_traditional_quantum_expire, 433 .should_current_thread_rechoose_processor = should_current_thread_rechoose_processor, 434 .processor_runq_count = sched_traditional_processor_runq_count, 435 .processor_runq_stats_count_sum = sched_traditional_processor_runq_stats_count_sum, 436 .fairshare_init = sched_traditional_fairshare_init, 437 .fairshare_runq_count = sched_traditional_fairshare_runq_count, 438 .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, 439 .fairshare_enqueue = sched_traditional_fairshare_enqueue, 440 .fairshare_dequeue = sched_traditional_fairshare_dequeue, 441 .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, 442 .processor_bound_count = sched_traditional_processor_bound_count, 443 .thread_update_scan = thread_update_scan, 444 .direct_dispatch_to_idle_processors = TRUE, 445}; 446 447const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = { 448 .init = sched_traditional_with_pset_runqueue_init, 449 .timebase_init = sched_traditional_timebase_init, 450 .processor_init = sched_traditional_processor_init, 451 .pset_init = sched_traditional_pset_init, 452 .maintenance_continuation = sched_traditional_maintenance_continue, 453 .choose_thread = sched_traditional_choose_thread, 454 .steal_thread = steal_thread, 455 .compute_priority = compute_priority, 456 .choose_processor = choose_processor, 457 .processor_enqueue = processor_enqueue, 458 .processor_queue_shutdown = processor_queue_shutdown, 459 .processor_queue_remove = processor_queue_remove, 460 .processor_queue_empty = sched_traditional_with_pset_runqueue_processor_queue_empty, 461 .priority_is_urgent = priority_is_urgent, 462 .processor_csw_check = processor_csw_check, 463 .processor_queue_has_priority = processor_queue_has_priority, 464 .initial_quantum_size = sched_traditional_initial_quantum_size, 465 .initial_thread_sched_mode = sched_traditional_initial_thread_sched_mode, 466 .can_update_priority = can_update_priority, 467 .update_priority = update_priority, 468 .lightweight_update_priority = lightweight_update_priority, 469 .quantum_expire = sched_traditional_quantum_expire, 470 .should_current_thread_rechoose_processor = should_current_thread_rechoose_processor, 471 .processor_runq_count = sched_traditional_processor_runq_count, 472 .processor_runq_stats_count_sum = sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum, 473 .fairshare_init = sched_traditional_fairshare_init, 474 .fairshare_runq_count = sched_traditional_fairshare_runq_count, 475 .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, 476 .fairshare_enqueue = sched_traditional_fairshare_enqueue, 477 .fairshare_dequeue = sched_traditional_fairshare_dequeue, 478 .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, 479 .processor_bound_count = sched_traditional_processor_bound_count, 480 .thread_update_scan = thread_update_scan, 481 .direct_dispatch_to_idle_processors = FALSE, 482}; 483 484#endif 485 486const struct sched_dispatch_table *sched_current_dispatch = NULL; 487 488/* 489 * Statically allocate a buffer to hold the longest possible 490 * scheduler description string, as currently implemented. 491 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/ 492 * to export to userspace via sysctl(3). If either version 493 * changes, update the other. 494 * 495 * Note that in addition to being an upper bound on the strings 496 * in the kernel, it's also an exact parameter to PE_get_default(), 497 * which interrogates the device tree on some platforms. That 498 * API requires the caller know the exact size of the device tree 499 * property, so we need both a legacy size (32) and the current size 500 * (48) to deal with old and new device trees. The device tree property 501 * is similarly padded to a fixed size so that the same kernel image 502 * can run on multiple devices with different schedulers configured 503 * in the device tree. 504 */ 505#define SCHED_STRING_MAX_LENGTH (48) 506 507char sched_string[SCHED_STRING_MAX_LENGTH]; 508static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown; 509 510/* Global flag which indicates whether Background Stepper Context is enabled */ 511static int cpu_throttle_enabled = 1; 512 513void 514sched_init(void) 515{ 516 char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' }; 517 518 /* Check for runtime selection of the scheduler algorithm */ 519 if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) { 520 /* If no boot-args override, look in device tree */ 521 if (!PE_get_default("kern.sched", sched_arg, 522 SCHED_STRING_MAX_LENGTH)) { 523 sched_arg[0] = '\0'; 524 } 525 } 526 527 528 if (!PE_parse_boot_argn("sched_pri_decay_limit", &sched_pri_decay_band_limit, sizeof(sched_pri_decay_band_limit))) { 529 /* No boot-args, check in device tree */ 530 if (!PE_get_default("kern.sched_pri_decay_limit", 531 &sched_pri_decay_band_limit, 532 sizeof(sched_pri_decay_band_limit))) { 533 /* Allow decay all the way to normal limits */ 534 sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT; 535 } 536 } 537 538 kprintf("Setting scheduler priority decay band limit %d\n", sched_pri_decay_band_limit); 539 540 if (strlen(sched_arg) > 0) { 541 if (0) { 542 /* Allow pattern below */ 543#if defined(CONFIG_SCHED_TRADITIONAL) 544 } else if (0 == strcmp(sched_arg, kSchedTraditionalString)) { 545 sched_current_dispatch = &sched_traditional_dispatch; 546 _sched_enum = sched_enum_traditional; 547 strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string)); 548 } else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) { 549 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; 550 _sched_enum = sched_enum_traditional_with_pset_runqueue; 551 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); 552#endif 553#if defined(CONFIG_SCHED_PROTO) 554 } else if (0 == strcmp(sched_arg, kSchedProtoString)) { 555 sched_current_dispatch = &sched_proto_dispatch; 556 _sched_enum = sched_enum_proto; 557 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); 558#endif 559#if defined(CONFIG_SCHED_GRRR) 560 } else if (0 == strcmp(sched_arg, kSchedGRRRString)) { 561 sched_current_dispatch = &sched_grrr_dispatch; 562 _sched_enum = sched_enum_grrr; 563 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); 564#endif 565#if defined(CONFIG_SCHED_MULTIQ) 566 } else if (0 == strcmp(sched_arg, kSchedMultiQString)) { 567 sched_current_dispatch = &sched_multiq_dispatch; 568 _sched_enum = sched_enum_multiq; 569 strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string)); 570 } else if (0 == strcmp(sched_arg, kSchedDualQString)) { 571 sched_current_dispatch = &sched_dualq_dispatch; 572 _sched_enum = sched_enum_dualq; 573 strlcpy(sched_string, kSchedDualQString, sizeof(sched_string)); 574#endif 575 } else { 576#if defined(CONFIG_SCHED_TRADITIONAL) 577 printf("Unrecognized scheduler algorithm: %s\n", sched_arg); 578 printf("Scheduler: Using instead: %s\n", kSchedTraditionalWithPsetRunqueueString); 579 580 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; 581 _sched_enum = sched_enum_traditional_with_pset_runqueue; 582 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); 583#else 584 panic("Unrecognized scheduler algorithm: %s", sched_arg); 585#endif 586 } 587 kprintf("Scheduler: Runtime selection of %s\n", sched_string); 588 } else { 589#if defined(CONFIG_SCHED_MULTIQ) 590 sched_current_dispatch = &sched_multiq_dispatch; 591 _sched_enum = sched_enum_multiq; 592 strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string)); 593#elif defined(CONFIG_SCHED_TRADITIONAL) 594 sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; 595 _sched_enum = sched_enum_traditional_with_pset_runqueue; 596 strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); 597#elif defined(CONFIG_SCHED_PROTO) 598 sched_current_dispatch = &sched_proto_dispatch; 599 _sched_enum = sched_enum_proto; 600 strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); 601#elif defined(CONFIG_SCHED_GRRR) 602 sched_current_dispatch = &sched_grrr_dispatch; 603 _sched_enum = sched_enum_grrr; 604 strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); 605#else 606#error No default scheduler implementation 607#endif 608 kprintf("Scheduler: Default of %s\n", sched_string); 609 } 610 611 SCHED(init)(); 612 SCHED(fairshare_init)(); 613 sched_realtime_init(); 614 ast_init(); 615 sched_timer_deadline_tracking_init(); 616 617 SCHED(pset_init)(&pset0); 618 SCHED(processor_init)(master_processor); 619} 620 621void 622sched_timebase_init(void) 623{ 624 uint64_t abstime; 625 626 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime); 627 sched_one_second_interval = abstime; 628 629 SCHED(timebase_init)(); 630 sched_realtime_timebase_init(); 631} 632 633#if defined(CONFIG_SCHED_TIMESHARE_CORE) 634 635void 636sched_traditional_init(void) 637{ 638 /* 639 * Calculate the timeslicing quantum 640 * in us. 641 */ 642 if (default_preemption_rate < 1) 643 default_preemption_rate = DEFAULT_PREEMPTION_RATE; 644 std_quantum_us = (1000 * 1000) / default_preemption_rate; 645 646 printf("standard timeslicing quantum is %d us\n", std_quantum_us); 647 648 if (default_bg_preemption_rate < 1) 649 default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE; 650 bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate; 651 652 printf("standard background quantum is %d us\n", bg_quantum_us); 653 654 load_shift_init(); 655 preempt_pri_init(); 656 sched_tick = 0; 657} 658 659void 660sched_traditional_timebase_init(void) 661{ 662 uint64_t abstime; 663 uint32_t shift; 664 665 /* standard timeslicing quantum */ 666 clock_interval_to_absolutetime_interval( 667 std_quantum_us, NSEC_PER_USEC, &abstime); 668 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 669 std_quantum = (uint32_t)abstime; 670 671 /* smallest remaining quantum (250 us) */ 672 clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime); 673 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 674 min_std_quantum = (uint32_t)abstime; 675 676 /* quantum for background tasks */ 677 clock_interval_to_absolutetime_interval( 678 bg_quantum_us, NSEC_PER_USEC, &abstime); 679 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 680 bg_quantum = (uint32_t)abstime; 681 682 /* scheduler tick interval */ 683 clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT, 684 NSEC_PER_USEC, &abstime); 685 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 686 sched_tick_interval = (uint32_t)abstime; 687 688 /* 689 * Compute conversion factor from usage to 690 * timesharing priorities with 5/8 ** n aging. 691 */ 692 abstime = (abstime * 5) / 3; 693 for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift) 694 abstime >>= 1; 695 sched_fixed_shift = shift; 696 697 max_unsafe_computation = ((uint64_t)max_unsafe_quanta) * std_quantum; 698 sched_safe_duration = 2 * ((uint64_t)max_unsafe_quanta) * std_quantum; 699 700 max_poll_computation = ((uint64_t)max_poll_quanta) * std_quantum; 701 thread_depress_time = 1 * std_quantum; 702 default_timeshare_computation = std_quantum / 2; 703 default_timeshare_constraint = std_quantum; 704 705#if defined(CONFIG_TELEMETRY) 706 /* interval for high frequency telemetry */ 707 clock_interval_to_absolutetime_interval(10, NSEC_PER_MSEC, &abstime); 708 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 709 sched_telemetry_interval = (uint32_t)abstime; 710#endif 711} 712 713#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 714 715 716#if defined(CONFIG_SCHED_TRADITIONAL) 717 718static void 719sched_traditional_processor_init(processor_t processor) 720{ 721 if (!sched_traditional_use_pset_runqueue) { 722 run_queue_init(&processor->runq); 723 } 724 processor->runq_bound_count = 0; 725} 726 727static void 728sched_traditional_pset_init(processor_set_t pset) 729{ 730 if (sched_traditional_use_pset_runqueue) { 731 run_queue_init(&pset->pset_runq); 732 } 733 pset->pset_runq_bound_count = 0; 734} 735 736static void 737sched_traditional_with_pset_runqueue_init(void) 738{ 739 sched_traditional_init(); 740 sched_traditional_use_pset_runqueue = TRUE; 741} 742 743#endif /* CONFIG_SCHED_TRADITIONAL */ 744 745#if defined(CONFIG_SCHED_FAIRSHARE_CORE) 746void 747sched_traditional_fairshare_init(void) 748{ 749 simple_lock_init(&fs_lock, 0); 750 751 fs_runq.count = 0; 752 queue_init(&fs_runq.queue); 753} 754#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ 755 756static void 757sched_realtime_init(void) 758{ 759 simple_lock_init(&rt_lock, 0); 760 761 rt_runq.count = 0; 762 queue_init(&rt_runq.queue); 763} 764 765static void 766sched_realtime_timebase_init(void) 767{ 768 uint64_t abstime; 769 770 /* smallest rt computaton (50 us) */ 771 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime); 772 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 773 min_rt_quantum = (uint32_t)abstime; 774 775 /* maximum rt computation (50 ms) */ 776 clock_interval_to_absolutetime_interval( 777 50, 1000*NSEC_PER_USEC, &abstime); 778 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); 779 max_rt_quantum = (uint32_t)abstime; 780 781} 782 783#if defined(CONFIG_SCHED_TIMESHARE_CORE) 784 785/* 786 * Set up values for timeshare 787 * loading factors. 788 */ 789static void 790load_shift_init(void) 791{ 792 int8_t k, *p = sched_load_shifts; 793 uint32_t i, j; 794 795 uint32_t sched_decay_penalty = 1; 796 797 if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) { 798 kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty); 799 } 800 801 if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) { 802 kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor); 803 } 804 805 if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) { 806 kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay); 807 } 808 809 if (sched_decay_penalty == 0) { 810 /* 811 * There is no penalty for timeshare threads for using too much 812 * CPU, so set all load shifts to INT8_MIN. Even under high load, 813 * sched_pri_shift will be >INT8_MAX, and there will be no 814 * penalty applied to threads (nor will sched_usage be updated per 815 * thread). 816 */ 817 for (i = 0; i < NRQS; i++) { 818 sched_load_shifts[i] = INT8_MIN; 819 } 820 821 return; 822 } 823 824 *p++ = INT8_MIN; *p++ = 0; 825 826 /* 827 * For a given system load "i", the per-thread priority 828 * penalty per quantum of CPU usage is ~2^k priority 829 * levels. "sched_decay_penalty" can cause more 830 * array entries to be filled with smaller "k" values 831 */ 832 for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) { 833 for (j <<= 1; (i < j) && (i < NRQS); ++i) 834 *p++ = k; 835 } 836} 837 838static void 839preempt_pri_init(void) 840{ 841 int i, *p = sched_preempt_pri; 842 843 for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i) 844 setbit(i, p); 845 846 for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i) 847 setbit(i, p); 848} 849 850#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 851 852/* 853 * Thread wait timer expiration. 854 */ 855void 856thread_timer_expire( 857 void *p0, 858 __unused void *p1) 859{ 860 thread_t thread = p0; 861 spl_t s; 862 863 s = splsched(); 864 thread_lock(thread); 865 if (--thread->wait_timer_active == 0) { 866 if (thread->wait_timer_is_set) { 867 thread->wait_timer_is_set = FALSE; 868 clear_wait_internal(thread, THREAD_TIMED_OUT); 869 } 870 } 871 thread_unlock(thread); 872 splx(s); 873} 874 875/* 876 * thread_unblock: 877 * 878 * Unblock thread on wake up. 879 * 880 * Returns TRUE if the thread is still running. 881 * 882 * Thread must be locked. 883 */ 884boolean_t 885thread_unblock( 886 thread_t thread, 887 wait_result_t wresult) 888{ 889 boolean_t result = FALSE; 890 thread_t cthread = current_thread(); 891 uint32_t new_run_count; 892 893 /* 894 * Set wait_result. 895 */ 896 thread->wait_result = wresult; 897 898 /* 899 * Cancel pending wait timer. 900 */ 901 if (thread->wait_timer_is_set) { 902 if (timer_call_cancel(&thread->wait_timer)) 903 thread->wait_timer_active--; 904 thread->wait_timer_is_set = FALSE; 905 } 906 907 /* 908 * Update scheduling state: not waiting, 909 * set running. 910 */ 911 thread->state &= ~(TH_WAIT|TH_UNINT); 912 913 if (!(thread->state & TH_RUN)) { 914 thread->state |= TH_RUN; 915 916 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread); 917 918 /* 919 * Update run counts. 920 */ 921 new_run_count = sched_run_incr(thread); 922 if (thread->sched_mode == TH_MODE_TIMESHARE) { 923 sched_share_incr(thread); 924 925 if (thread->sched_flags & TH_SFLAG_THROTTLED) 926 sched_background_incr(thread); 927 } 928 } 929 else { 930 /* 931 * Signal if idling on another processor. 932 */ 933#if CONFIG_SCHED_IDLE_IN_PLACE 934 if (thread->state & TH_IDLE) { 935 processor_t processor = thread->last_processor; 936 937 if (processor != current_processor()) 938 machine_signal_idle(processor); 939 } 940#else 941 assert((thread->state & TH_IDLE) == 0); 942#endif 943 944 new_run_count = sched_run_count; /* updated in thread_select_idle() */ 945 result = TRUE; 946 } 947 948 /* 949 * Calculate deadline for real-time threads. 950 */ 951 if (thread->sched_mode == TH_MODE_REALTIME) { 952 uint64_t ctime; 953 954 ctime = mach_absolute_time(); 955 thread->realtime.deadline = thread->realtime.constraint + ctime; 956 } 957 958 /* 959 * Clear old quantum, fail-safe computation, etc. 960 */ 961 thread->quantum_remaining = 0; 962 thread->computation_metered = 0; 963 thread->reason = AST_NONE; 964 965 /* Obtain power-relevant interrupt and "platform-idle exit" statistics. 966 * We also account for "double hop" thread signaling via 967 * the thread callout infrastructure. 968 * DRK: consider removing the callout wakeup counters in the future 969 * they're present for verification at the moment. 970 */ 971 boolean_t aticontext, pidle; 972 ml_get_power_state(&aticontext, &pidle); 973 974 if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) { 975 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); 976 DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); 977 978 uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd); 979 980 if (ttd) { 981 if (ttd <= timer_deadline_tracking_bin_1) 982 thread->thread_timer_wakeups_bin_1++; 983 else 984 if (ttd <= timer_deadline_tracking_bin_2) 985 thread->thread_timer_wakeups_bin_2++; 986 } 987 988 if (pidle) { 989 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); 990 } 991 992 } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) { 993 if (cthread->callout_woken_from_icontext) { 994 ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); 995 thread->thread_callout_interrupt_wakeups++; 996 if (cthread->callout_woken_from_platform_idle) { 997 ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); 998 thread->thread_callout_platform_idle_wakeups++; 999 } 1000 1001 cthread->callout_woke_thread = TRUE; 1002 } 1003 } 1004 1005 if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) { 1006 thread->callout_woken_from_icontext = aticontext; 1007 thread->callout_woken_from_platform_idle = pidle; 1008 thread->callout_woke_thread = FALSE; 1009 } 1010 1011 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1012 MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, 1013 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, new_run_count, 0); 1014 1015 DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); 1016 1017 return (result); 1018} 1019 1020/* 1021 * Routine: thread_go 1022 * Purpose: 1023 * Unblock and dispatch thread. 1024 * Conditions: 1025 * thread lock held, IPC locks may be held. 1026 * thread must have been pulled from wait queue under same lock hold. 1027 * Returns: 1028 * KERN_SUCCESS - Thread was set running 1029 * KERN_NOT_WAITING - Thread was not waiting 1030 */ 1031kern_return_t 1032thread_go( 1033 thread_t thread, 1034 wait_result_t wresult) 1035{ 1036 assert(thread->at_safe_point == FALSE); 1037 assert(thread->wait_event == NO_EVENT64); 1038 assert(thread->wait_queue == WAIT_QUEUE_NULL); 1039 1040 if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) { 1041 if (!thread_unblock(thread, wresult)) 1042 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 1043 1044 return (KERN_SUCCESS); 1045 } 1046 1047 return (KERN_NOT_WAITING); 1048} 1049 1050/* 1051 * Routine: thread_mark_wait_locked 1052 * Purpose: 1053 * Mark a thread as waiting. If, given the circumstances, 1054 * it doesn't want to wait (i.e. already aborted), then 1055 * indicate that in the return value. 1056 * Conditions: 1057 * at splsched() and thread is locked. 1058 */ 1059__private_extern__ 1060wait_result_t 1061thread_mark_wait_locked( 1062 thread_t thread, 1063 wait_interrupt_t interruptible) 1064{ 1065 boolean_t at_safe_point; 1066 1067 assert(thread == current_thread()); 1068 1069 /* 1070 * The thread may have certain types of interrupts/aborts masked 1071 * off. Even if the wait location says these types of interrupts 1072 * are OK, we have to honor mask settings (outer-scoped code may 1073 * not be able to handle aborts at the moment). 1074 */ 1075 if (interruptible > (thread->options & TH_OPT_INTMASK)) 1076 interruptible = thread->options & TH_OPT_INTMASK; 1077 1078 at_safe_point = (interruptible == THREAD_ABORTSAFE); 1079 1080 if ( interruptible == THREAD_UNINT || 1081 !(thread->sched_flags & TH_SFLAG_ABORT) || 1082 (!at_safe_point && 1083 (thread->sched_flags & TH_SFLAG_ABORTSAFELY))) { 1084 1085 if ( !(thread->state & TH_TERMINATE)) 1086 DTRACE_SCHED(sleep); 1087 1088 thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT); 1089 thread->at_safe_point = at_safe_point; 1090 return (thread->wait_result = THREAD_WAITING); 1091 } 1092 else 1093 if (thread->sched_flags & TH_SFLAG_ABORTSAFELY) 1094 thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK; 1095 1096 return (thread->wait_result = THREAD_INTERRUPTED); 1097} 1098 1099/* 1100 * Routine: thread_interrupt_level 1101 * Purpose: 1102 * Set the maximum interruptible state for the 1103 * current thread. The effective value of any 1104 * interruptible flag passed into assert_wait 1105 * will never exceed this. 1106 * 1107 * Useful for code that must not be interrupted, 1108 * but which calls code that doesn't know that. 1109 * Returns: 1110 * The old interrupt level for the thread. 1111 */ 1112__private_extern__ 1113wait_interrupt_t 1114thread_interrupt_level( 1115 wait_interrupt_t new_level) 1116{ 1117 thread_t thread = current_thread(); 1118 wait_interrupt_t result = thread->options & TH_OPT_INTMASK; 1119 1120 thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK); 1121 1122 return result; 1123} 1124 1125/* 1126 * Check to see if an assert wait is possible, without actually doing one. 1127 * This is used by debug code in locks and elsewhere to verify that it is 1128 * always OK to block when trying to take a blocking lock (since waiting 1129 * for the actual assert_wait to catch the case may make it hard to detect 1130 * this case. 1131 */ 1132boolean_t 1133assert_wait_possible(void) 1134{ 1135 1136 thread_t thread; 1137 1138#if DEBUG 1139 if(debug_mode) return TRUE; /* Always succeed in debug mode */ 1140#endif 1141 1142 thread = current_thread(); 1143 1144 return (thread == NULL || wait_queue_assert_possible(thread)); 1145} 1146 1147/* 1148 * assert_wait: 1149 * 1150 * Assert that the current thread is about to go to 1151 * sleep until the specified event occurs. 1152 */ 1153wait_result_t 1154assert_wait( 1155 event_t event, 1156 wait_interrupt_t interruptible) 1157{ 1158 register wait_queue_t wq; 1159 register int index; 1160 1161 if(event == NO_EVENT) 1162 panic("assert_wait() called with NO_EVENT"); 1163 1164 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1165 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1166 VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0); 1167 1168 index = wait_hash(event); 1169 wq = &wait_queues[index]; 1170 return wait_queue_assert_wait(wq, event, interruptible, 0); 1171} 1172 1173wait_result_t 1174assert_wait_timeout( 1175 event_t event, 1176 wait_interrupt_t interruptible, 1177 uint32_t interval, 1178 uint32_t scale_factor) 1179{ 1180 thread_t thread = current_thread(); 1181 wait_result_t wresult; 1182 wait_queue_t wqueue; 1183 uint64_t deadline; 1184 spl_t s; 1185 1186 if(event == NO_EVENT) 1187 panic("assert_wait_timeout() called with NO_EVENT"); 1188 1189 wqueue = &wait_queues[wait_hash(event)]; 1190 1191 s = splsched(); 1192 wait_queue_lock(wqueue); 1193 thread_lock(thread); 1194 1195 clock_interval_to_deadline(interval, scale_factor, &deadline); 1196 1197 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1198 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1199 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1200 1201 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), 1202 interruptible, 1203 TIMEOUT_URGENCY_SYS_NORMAL, 1204 deadline, 0, 1205 thread); 1206 1207 thread_unlock(thread); 1208 wait_queue_unlock(wqueue); 1209 splx(s); 1210 1211 return (wresult); 1212} 1213 1214wait_result_t 1215assert_wait_timeout_with_leeway( 1216 event_t event, 1217 wait_interrupt_t interruptible, 1218 wait_timeout_urgency_t urgency, 1219 uint32_t interval, 1220 uint32_t leeway, 1221 uint32_t scale_factor) 1222{ 1223 thread_t thread = current_thread(); 1224 wait_result_t wresult; 1225 wait_queue_t wqueue; 1226 uint64_t deadline; 1227 uint64_t abstime; 1228 uint64_t slop; 1229 uint64_t now; 1230 spl_t s; 1231 1232 now = mach_absolute_time(); 1233 clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); 1234 deadline = now + abstime; 1235 1236 clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop); 1237 1238 if(event == NO_EVENT) 1239 panic("assert_wait_timeout_with_leeway() called with NO_EVENT"); 1240 1241 wqueue = &wait_queues[wait_hash(event)]; 1242 1243 s = splsched(); 1244 wait_queue_lock(wqueue); 1245 thread_lock(thread); 1246 1247 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1248 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1249 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1250 1251 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), 1252 interruptible, 1253 urgency, deadline, slop, 1254 thread); 1255 1256 thread_unlock(thread); 1257 wait_queue_unlock(wqueue); 1258 splx(s); 1259 1260 return (wresult); 1261} 1262 1263wait_result_t 1264assert_wait_deadline( 1265 event_t event, 1266 wait_interrupt_t interruptible, 1267 uint64_t deadline) 1268{ 1269 thread_t thread = current_thread(); 1270 wait_result_t wresult; 1271 wait_queue_t wqueue; 1272 spl_t s; 1273 1274 assert(event != NO_EVENT); 1275 wqueue = &wait_queues[wait_hash(event)]; 1276 1277 s = splsched(); 1278 wait_queue_lock(wqueue); 1279 thread_lock(thread); 1280 1281 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1282 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1283 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1284 1285 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), 1286 interruptible, 1287 TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0, 1288 thread); 1289 1290 thread_unlock(thread); 1291 wait_queue_unlock(wqueue); 1292 splx(s); 1293 1294 return (wresult); 1295} 1296 1297wait_result_t 1298assert_wait_deadline_with_leeway( 1299 event_t event, 1300 wait_interrupt_t interruptible, 1301 wait_timeout_urgency_t urgency, 1302 uint64_t deadline, 1303 uint64_t leeway) 1304{ 1305 thread_t thread = current_thread(); 1306 wait_result_t wresult; 1307 wait_queue_t wqueue; 1308 spl_t s; 1309 1310 if(event == NO_EVENT) 1311 panic("assert_wait_deadline_with_leeway() called with NO_EVENT"); 1312 1313 wqueue = &wait_queues[wait_hash(event)]; 1314 1315 s = splsched(); 1316 wait_queue_lock(wqueue); 1317 thread_lock(thread); 1318 1319 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1320 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, 1321 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); 1322 1323 wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), 1324 interruptible, 1325 urgency, deadline, leeway, 1326 thread); 1327 1328 thread_unlock(thread); 1329 wait_queue_unlock(wqueue); 1330 splx(s); 1331 1332 return (wresult); 1333} 1334 1335/* 1336 * thread_isoncpu: 1337 * 1338 * Return TRUE if a thread is running on a processor such that an AST 1339 * is needed to pull it out of userspace execution, or if executing in 1340 * the kernel, bring to a context switch boundary that would cause 1341 * thread state to be serialized in the thread PCB. 1342 * 1343 * Thread locked, returns the same way. While locked, fields 1344 * like "state" cannot change. "runq" can change only from set to unset. 1345 */ 1346static inline boolean_t 1347thread_isoncpu(thread_t thread) 1348{ 1349 /* Not running or runnable */ 1350 if (!(thread->state & TH_RUN)) 1351 return (FALSE); 1352 1353 /* Waiting on a runqueue, not currently running */ 1354 /* TODO: This is invalid - it can get dequeued without thread lock, but not context switched. */ 1355 if (thread->runq != PROCESSOR_NULL) 1356 return (FALSE); 1357 1358 /* 1359 * Thread must be running on a processor, or 1360 * about to run, or just did run. In all these 1361 * cases, an AST to the processor is needed 1362 * to guarantee that the thread is kicked out 1363 * of userspace and the processor has 1364 * context switched (and saved register state). 1365 */ 1366 return (TRUE); 1367} 1368 1369/* 1370 * thread_stop: 1371 * 1372 * Force a preemption point for a thread and wait 1373 * for it to stop running on a CPU. If a stronger 1374 * guarantee is requested, wait until no longer 1375 * runnable. Arbitrates access among 1376 * multiple stop requests. (released by unstop) 1377 * 1378 * The thread must enter a wait state and stop via a 1379 * separate means. 1380 * 1381 * Returns FALSE if interrupted. 1382 */ 1383boolean_t 1384thread_stop( 1385 thread_t thread, 1386 boolean_t until_not_runnable) 1387{ 1388 wait_result_t wresult; 1389 spl_t s = splsched(); 1390 boolean_t oncpu; 1391 1392 wake_lock(thread); 1393 thread_lock(thread); 1394 1395 while (thread->state & TH_SUSP) { 1396 thread->wake_active = TRUE; 1397 thread_unlock(thread); 1398 1399 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); 1400 wake_unlock(thread); 1401 splx(s); 1402 1403 if (wresult == THREAD_WAITING) 1404 wresult = thread_block(THREAD_CONTINUE_NULL); 1405 1406 if (wresult != THREAD_AWAKENED) 1407 return (FALSE); 1408 1409 s = splsched(); 1410 wake_lock(thread); 1411 thread_lock(thread); 1412 } 1413 1414 thread->state |= TH_SUSP; 1415 1416 while ((oncpu = thread_isoncpu(thread)) || 1417 (until_not_runnable && (thread->state & TH_RUN))) { 1418 processor_t processor; 1419 1420 if (oncpu) { 1421 assert(thread->state & TH_RUN); 1422 processor = thread->chosen_processor; 1423 cause_ast_check(processor); 1424 } 1425 1426 thread->wake_active = TRUE; 1427 thread_unlock(thread); 1428 1429 wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); 1430 wake_unlock(thread); 1431 splx(s); 1432 1433 if (wresult == THREAD_WAITING) 1434 wresult = thread_block(THREAD_CONTINUE_NULL); 1435 1436 if (wresult != THREAD_AWAKENED) { 1437 thread_unstop(thread); 1438 return (FALSE); 1439 } 1440 1441 s = splsched(); 1442 wake_lock(thread); 1443 thread_lock(thread); 1444 } 1445 1446 thread_unlock(thread); 1447 wake_unlock(thread); 1448 splx(s); 1449 1450 /* 1451 * We return with the thread unlocked. To prevent it from 1452 * transitioning to a runnable state (or from TH_RUN to 1453 * being on the CPU), the caller must ensure the thread 1454 * is stopped via an external means (such as an AST) 1455 */ 1456 1457 return (TRUE); 1458} 1459 1460/* 1461 * thread_unstop: 1462 * 1463 * Release a previous stop request and set 1464 * the thread running if appropriate. 1465 * 1466 * Use only after a successful stop operation. 1467 */ 1468void 1469thread_unstop( 1470 thread_t thread) 1471{ 1472 spl_t s = splsched(); 1473 1474 wake_lock(thread); 1475 thread_lock(thread); 1476 1477 if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) { 1478 thread->state &= ~TH_SUSP; 1479 thread_unblock(thread, THREAD_AWAKENED); 1480 1481 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 1482 } 1483 else 1484 if (thread->state & TH_SUSP) { 1485 thread->state &= ~TH_SUSP; 1486 1487 if (thread->wake_active) { 1488 thread->wake_active = FALSE; 1489 thread_unlock(thread); 1490 1491 thread_wakeup(&thread->wake_active); 1492 wake_unlock(thread); 1493 splx(s); 1494 1495 return; 1496 } 1497 } 1498 1499 thread_unlock(thread); 1500 wake_unlock(thread); 1501 splx(s); 1502} 1503 1504/* 1505 * thread_wait: 1506 * 1507 * Wait for a thread to stop running. (non-interruptible) 1508 * 1509 */ 1510void 1511thread_wait( 1512 thread_t thread, 1513 boolean_t until_not_runnable) 1514{ 1515 wait_result_t wresult; 1516 boolean_t oncpu; 1517 processor_t processor; 1518 spl_t s = splsched(); 1519 1520 wake_lock(thread); 1521 thread_lock(thread); 1522 1523 /* 1524 * Wait until not running on a CPU. If stronger requirement 1525 * desired, wait until not runnable. Assumption: if thread is 1526 * on CPU, then TH_RUN is set, so we're not waiting in any case 1527 * where the original, pure "TH_RUN" check would have let us 1528 * finish. 1529 */ 1530 while ((oncpu = thread_isoncpu(thread)) || 1531 (until_not_runnable && (thread->state & TH_RUN))) { 1532 1533 if (oncpu) { 1534 assert(thread->state & TH_RUN); 1535 processor = thread->chosen_processor; 1536 cause_ast_check(processor); 1537 } 1538 1539 thread->wake_active = TRUE; 1540 thread_unlock(thread); 1541 1542 wresult = assert_wait(&thread->wake_active, THREAD_UNINT); 1543 wake_unlock(thread); 1544 splx(s); 1545 1546 if (wresult == THREAD_WAITING) 1547 thread_block(THREAD_CONTINUE_NULL); 1548 1549 s = splsched(); 1550 wake_lock(thread); 1551 thread_lock(thread); 1552 } 1553 1554 thread_unlock(thread); 1555 wake_unlock(thread); 1556 splx(s); 1557} 1558 1559/* 1560 * Routine: clear_wait_internal 1561 * 1562 * Clear the wait condition for the specified thread. 1563 * Start the thread executing if that is appropriate. 1564 * Arguments: 1565 * thread thread to awaken 1566 * result Wakeup result the thread should see 1567 * Conditions: 1568 * At splsched 1569 * the thread is locked. 1570 * Returns: 1571 * KERN_SUCCESS thread was rousted out a wait 1572 * KERN_FAILURE thread was waiting but could not be rousted 1573 * KERN_NOT_WAITING thread was not waiting 1574 */ 1575__private_extern__ kern_return_t 1576clear_wait_internal( 1577 thread_t thread, 1578 wait_result_t wresult) 1579{ 1580 wait_queue_t wq = thread->wait_queue; 1581 uint32_t i = LockTimeOut; 1582 1583 do { 1584 if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT)) 1585 return (KERN_FAILURE); 1586 1587 if (wq != WAIT_QUEUE_NULL) { 1588 if (wait_queue_lock_try(wq)) { 1589 wait_queue_pull_thread_locked(wq, thread, TRUE); 1590 /* wait queue unlocked, thread still locked */ 1591 } 1592 else { 1593 thread_unlock(thread); 1594 delay(1); 1595 1596 thread_lock(thread); 1597 if (wq != thread->wait_queue) 1598 return (KERN_NOT_WAITING); 1599 1600 continue; 1601 } 1602 } 1603 1604 return (thread_go(thread, wresult)); 1605 } while ((--i > 0) || machine_timeout_suspended()); 1606 1607 panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n", 1608 thread, wq, cpu_number()); 1609 1610 return (KERN_FAILURE); 1611} 1612 1613 1614/* 1615 * clear_wait: 1616 * 1617 * Clear the wait condition for the specified thread. Start the thread 1618 * executing if that is appropriate. 1619 * 1620 * parameters: 1621 * thread thread to awaken 1622 * result Wakeup result the thread should see 1623 */ 1624kern_return_t 1625clear_wait( 1626 thread_t thread, 1627 wait_result_t result) 1628{ 1629 kern_return_t ret; 1630 spl_t s; 1631 1632 s = splsched(); 1633 thread_lock(thread); 1634 ret = clear_wait_internal(thread, result); 1635 thread_unlock(thread); 1636 splx(s); 1637 return ret; 1638} 1639 1640 1641/* 1642 * thread_wakeup_prim: 1643 * 1644 * Common routine for thread_wakeup, thread_wakeup_with_result, 1645 * and thread_wakeup_one. 1646 * 1647 */ 1648kern_return_t 1649thread_wakeup_prim( 1650 event_t event, 1651 boolean_t one_thread, 1652 wait_result_t result) 1653{ 1654 return (thread_wakeup_prim_internal(event, one_thread, result, -1)); 1655} 1656 1657 1658kern_return_t 1659thread_wakeup_prim_internal( 1660 event_t event, 1661 boolean_t one_thread, 1662 wait_result_t result, 1663 int priority) 1664{ 1665 register wait_queue_t wq; 1666 register int index; 1667 1668 if(event == NO_EVENT) 1669 panic("thread_wakeup_prim() called with NO_EVENT"); 1670 1671 index = wait_hash(event); 1672 wq = &wait_queues[index]; 1673 if (one_thread) 1674 return (wait_queue_wakeup_one(wq, event, result, priority)); 1675 else 1676 return (wait_queue_wakeup_all(wq, event, result)); 1677} 1678 1679/* 1680 * thread_bind: 1681 * 1682 * Force the current thread to execute on the specified processor. 1683 * Takes effect after the next thread_block(). 1684 * 1685 * Returns the previous binding. PROCESSOR_NULL means 1686 * not bound. 1687 * 1688 * XXX - DO NOT export this to users - XXX 1689 */ 1690processor_t 1691thread_bind( 1692 processor_t processor) 1693{ 1694 thread_t self = current_thread(); 1695 processor_t prev; 1696 spl_t s; 1697 1698 s = splsched(); 1699 thread_lock(self); 1700 1701 /* <rdar://problem/15102234> */ 1702 assert(self->sched_pri < BASEPRI_RTQUEUES); 1703 1704 prev = self->bound_processor; 1705 self->bound_processor = processor; 1706 1707 thread_unlock(self); 1708 splx(s); 1709 1710 return (prev); 1711} 1712 1713/* Invoked prior to idle entry to determine if, on SMT capable processors, an SMT 1714 * rebalancing opportunity exists when a core is (instantaneously) idle, but 1715 * other SMT-capable cores may be over-committed. TODO: some possible negatives: 1716 * IPI thrash if this core does not remain idle following the load balancing ASTs 1717 * Idle "thrash", when IPI issue is followed by idle entry/core power down 1718 * followed by a wakeup shortly thereafter. 1719 */ 1720 1721/* Invoked with pset locked, returns with pset unlocked */ 1722#if (DEVELOPMENT || DEBUG) 1723int sched_smt_balance = 1; 1724#endif 1725 1726static void 1727sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) { 1728 processor_t ast_processor = NULL; 1729 1730#if (DEVELOPMENT || DEBUG) 1731 if (__improbable(sched_smt_balance == 0)) 1732 goto smt_balance_exit; 1733#endif 1734 1735 assert(cprocessor == current_processor()); 1736 if (cprocessor->is_SMT == FALSE) 1737 goto smt_balance_exit; 1738 1739 processor_t sib_processor = cprocessor->processor_secondary ? cprocessor->processor_secondary : cprocessor->processor_primary; 1740 1741 /* Determine if both this processor and its sibling are idle, 1742 * indicating an SMT rebalancing opportunity. 1743 */ 1744 if (sib_processor->state != PROCESSOR_IDLE) 1745 goto smt_balance_exit; 1746 1747 processor_t sprocessor; 1748 1749 sprocessor = (processor_t)queue_first(&cpset->active_queue); 1750 1751 while (!queue_end(&cpset->active_queue, (queue_entry_t)sprocessor)) { 1752 if ((sprocessor->state == PROCESSOR_RUNNING) && 1753 (sprocessor->processor_primary != sprocessor) && 1754 (sprocessor->processor_primary->state == PROCESSOR_RUNNING) && 1755 (sprocessor->current_pri < BASEPRI_RTQUEUES) && 1756 ((cpset->pending_AST_cpu_mask & (1U << sprocessor->cpu_id)) == 0)) { 1757 assert(sprocessor != cprocessor); 1758 ast_processor = sprocessor; 1759 break; 1760 } 1761 sprocessor = (processor_t)queue_next((queue_entry_t)sprocessor); 1762 } 1763 1764smt_balance_exit: 1765 pset_unlock(cpset); 1766 1767 if (ast_processor) { 1768 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_SMT_BALANCE), ast_processor->cpu_id, ast_processor->state, ast_processor->processor_primary->state, 0, 0); 1769 cause_ast_check(ast_processor); 1770 } 1771} 1772 1773/* 1774 * thread_select: 1775 * 1776 * Select a new thread for the current processor to execute. 1777 * 1778 * May select the current thread, which must be locked. 1779 */ 1780static thread_t 1781thread_select( 1782 thread_t thread, 1783 processor_t processor, 1784 ast_t reason) 1785{ 1786 processor_set_t pset = processor->processor_set; 1787 thread_t new_thread = THREAD_NULL; 1788 1789 assert(processor == current_processor()); 1790 1791 do { 1792 /* 1793 * Update the priority. 1794 */ 1795 if (SCHED(can_update_priority)(thread)) 1796 SCHED(update_priority)(thread); 1797 1798 processor->current_pri = thread->sched_pri; 1799 processor->current_thmode = thread->sched_mode; 1800 processor->current_sfi_class = thread->sfi_class; 1801 1802 pset_lock(pset); 1803 1804 assert(processor->state != PROCESSOR_OFF_LINE); 1805 1806 if (processor->processor_primary != processor) { 1807 /* 1808 * Should this secondary SMT processor attempt to find work? For pset runqueue systems, 1809 * we should look for work only under the same conditions that choose_processor() 1810 * would have assigned work, which is when all primary processors have been assigned work. 1811 * 1812 * An exception is that bound threads are dispatched to a processor without going through 1813 * choose_processor(), so in those cases we should continue trying to dequeue work. 1814 */ 1815 if (!SCHED(processor_bound_count)(processor) && !queue_empty(&pset->idle_queue) && !rt_runq.count) { 1816 goto idle; 1817 } 1818 } 1819 1820 simple_lock(&rt_lock); 1821 1822 /* 1823 * Test to see if the current thread should continue 1824 * to run on this processor. Must be runnable, and not 1825 * bound to a different processor, nor be in the wrong 1826 * processor set. 1827 */ 1828 if (((thread->state & ~TH_SUSP) == TH_RUN) && 1829 (thread->sched_pri >= BASEPRI_RTQUEUES || processor->processor_primary == processor) && 1830 (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor) && 1831 (thread->affinity_set == AFFINITY_SET_NULL || thread->affinity_set->aset_pset == pset)) { 1832 if (thread->sched_pri >= BASEPRI_RTQUEUES && first_timeslice(processor)) { 1833 if (rt_runq.count > 0) { 1834 thread_t next_rt; 1835 1836 next_rt = (thread_t)queue_first(&rt_runq.queue); 1837 if (next_rt->realtime.deadline < processor->deadline && 1838 (next_rt->bound_processor == PROCESSOR_NULL || next_rt->bound_processor == processor)) { 1839 thread = (thread_t)dequeue_head(&rt_runq.queue); 1840 thread->runq = PROCESSOR_NULL; 1841 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 1842 rt_runq.count--; 1843 } 1844 } 1845 1846 simple_unlock(&rt_lock); 1847 1848 processor->deadline = thread->realtime.deadline; 1849 1850 pset_unlock(pset); 1851 1852 return (thread); 1853 } 1854 1855 if ((thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) && (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri, reason)) == THREAD_NULL) { 1856 1857 simple_unlock(&rt_lock); 1858 1859 /* This thread is still the highest priority runnable (non-idle) thread */ 1860 1861 processor->deadline = UINT64_MAX; 1862 1863 pset_unlock(pset); 1864 1865 return (thread); 1866 } 1867 } 1868 1869 if (new_thread != THREAD_NULL || 1870 (SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) && 1871 (new_thread = SCHED(choose_thread)(processor, MINPRI, reason)) != THREAD_NULL)) { 1872 simple_unlock(&rt_lock); 1873 1874 processor->deadline = UINT64_MAX; 1875 pset_unlock(pset); 1876 1877 return (new_thread); 1878 } 1879 1880 if (rt_runq.count > 0) { 1881 thread_t next_rt = (thread_t)queue_first(&rt_runq.queue); 1882 1883 if (__probable((next_rt->bound_processor == NULL || (next_rt->bound_processor == processor)))) { 1884 thread = (thread_t)dequeue_head(&rt_runq.queue); 1885 1886 thread->runq = PROCESSOR_NULL; 1887 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 1888 rt_runq.count--; 1889 1890 simple_unlock(&rt_lock); 1891 1892 processor->deadline = thread->realtime.deadline; 1893 pset_unlock(pset); 1894 1895 return (thread); 1896 } 1897 } 1898 1899 simple_unlock(&rt_lock); 1900 1901 /* No realtime threads and no normal threads on the per-processor 1902 * runqueue. Finally check for global fairshare threads. 1903 */ 1904 if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) { 1905 1906 processor->deadline = UINT64_MAX; 1907 pset_unlock(pset); 1908 1909 return (new_thread); 1910 } 1911 1912 processor->deadline = UINT64_MAX; 1913 1914 /* 1915 * No runnable threads, attempt to steal 1916 * from other processors. 1917 */ 1918 new_thread = SCHED(steal_thread)(pset); 1919 if (new_thread != THREAD_NULL) { 1920 return (new_thread); 1921 } 1922 1923 /* 1924 * If other threads have appeared, shortcut 1925 * around again. 1926 */ 1927 if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0) 1928 continue; 1929 1930 pset_lock(pset); 1931 1932 idle: 1933 /* 1934 * Nothing is runnable, so set this processor idle if it 1935 * was running. 1936 */ 1937 if (processor->state == PROCESSOR_RUNNING) { 1938 remqueue((queue_entry_t)processor); 1939 processor->state = PROCESSOR_IDLE; 1940 1941 if (processor->processor_primary == processor) { 1942 enqueue_head(&pset->idle_queue, (queue_entry_t)processor); 1943 } 1944 else { 1945 enqueue_head(&pset->idle_secondary_queue, (queue_entry_t)processor); 1946 } 1947 } 1948 1949 /* Invoked with pset locked, returns with pset unlocked */ 1950 sched_SMT_balance(processor, pset); 1951 1952#if CONFIG_SCHED_IDLE_IN_PLACE 1953 /* 1954 * Choose idle thread if fast idle is not possible. 1955 */ 1956 if (processor->processor_primary != processor) 1957 return (processor->idle_thread); 1958 1959 if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES) 1960 return (processor->idle_thread); 1961 1962 /* 1963 * Perform idling activities directly without a 1964 * context switch. Return dispatched thread, 1965 * else check again for a runnable thread. 1966 */ 1967 new_thread = thread_select_idle(thread, processor); 1968 1969#else /* !CONFIG_SCHED_IDLE_IN_PLACE */ 1970 1971 /* 1972 * Do a full context switch to idle so that the current 1973 * thread can start running on another processor without 1974 * waiting for the fast-idled processor to wake up. 1975 */ 1976 return (processor->idle_thread); 1977 1978#endif /* !CONFIG_SCHED_IDLE_IN_PLACE */ 1979 1980 } while (new_thread == THREAD_NULL); 1981 1982 return (new_thread); 1983} 1984 1985#if CONFIG_SCHED_IDLE_IN_PLACE 1986/* 1987 * thread_select_idle: 1988 * 1989 * Idle the processor using the current thread context. 1990 * 1991 * Called with thread locked, then dropped and relocked. 1992 */ 1993static thread_t 1994thread_select_idle( 1995 thread_t thread, 1996 processor_t processor) 1997{ 1998 thread_t new_thread; 1999 uint64_t arg1, arg2; 2000 int urgency; 2001 2002 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2003 if (thread->sched_flags & TH_SFLAG_THROTTLED) 2004 sched_background_decr(thread); 2005 2006 sched_share_decr(thread); 2007 } 2008 sched_run_decr(thread); 2009 2010 thread->state |= TH_IDLE; 2011 processor->current_pri = IDLEPRI; 2012 processor->current_thmode = TH_MODE_NONE; 2013 processor->current_sfi_class = SFI_CLASS_KERNEL; 2014 2015 /* Reload precise timing global policy to thread-local policy */ 2016 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); 2017 2018 thread_unlock(thread); 2019 2020 /* 2021 * Switch execution timing to processor idle thread. 2022 */ 2023 processor->last_dispatch = mach_absolute_time(); 2024 2025#ifdef CONFIG_MACH_APPROXIMATE_TIME 2026 commpage_update_mach_approximate_time(processor->last_dispatch); 2027#endif 2028 2029 thread->last_run_time = processor->last_dispatch; 2030 thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer); 2031 PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer; 2032 2033 /* 2034 * Cancel the quantum timer while idling. 2035 */ 2036 timer_call_cancel(&processor->quantum_timer); 2037 processor->timeslice = 0; 2038 2039 (*thread->sched_call)(SCHED_CALL_BLOCK, thread); 2040 2041 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); 2042 2043 /* 2044 * Enable interrupts and perform idling activities. No 2045 * preemption due to TH_IDLE being set. 2046 */ 2047 spllo(); new_thread = processor_idle(thread, processor); 2048 2049 /* 2050 * Return at splsched. 2051 */ 2052 (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread); 2053 2054 thread_lock(thread); 2055 2056 /* 2057 * If awakened, switch to thread timer and start a new quantum. 2058 * Otherwise skip; we will context switch to another thread or return here. 2059 */ 2060 if (!(thread->state & TH_WAIT)) { 2061 processor->last_dispatch = mach_absolute_time(); 2062 thread_timer_event(processor->last_dispatch, &thread->system_timer); 2063 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2064 2065 thread_quantum_init(thread); 2066 processor->quantum_end = processor->last_dispatch + thread->quantum_remaining; 2067 timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); 2068 processor->timeslice = 1; 2069 2070 thread->computation_epoch = processor->last_dispatch; 2071 } 2072 2073 thread->state &= ~TH_IDLE; 2074 2075 /* 2076 * If we idled in place, simulate a context switch back 2077 * to the original priority of the thread so that the 2078 * platform layer cannot distinguish this from a true 2079 * switch to the idle thread. 2080 */ 2081 2082 urgency = thread_get_urgency(thread, &arg1, &arg2); 2083 2084 thread_tell_urgency(urgency, arg1, arg2, new_thread); 2085 2086 sched_run_incr(thread); 2087 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2088 sched_share_incr(thread); 2089 2090 if (thread->sched_flags & TH_SFLAG_THROTTLED) 2091 sched_background_incr(thread); 2092 } 2093 2094 return (new_thread); 2095} 2096#endif /* CONFIG_SCHED_IDLE_IN_PLACE */ 2097 2098#if defined(CONFIG_SCHED_TRADITIONAL) 2099static thread_t 2100sched_traditional_choose_thread( 2101 processor_t processor, 2102 int priority, 2103 __unused ast_t reason) 2104{ 2105 thread_t thread; 2106 2107 thread = choose_thread_from_runq(processor, runq_for_processor(processor), priority); 2108 if (thread != THREAD_NULL) { 2109 runq_consider_decr_bound_count(processor, thread); 2110 } 2111 2112 return thread; 2113} 2114 2115#endif /* defined(CONFIG_SCHED_TRADITIONAL) */ 2116 2117#if defined(CONFIG_SCHED_TRADITIONAL) 2118 2119/* 2120 * choose_thread_from_runq: 2121 * 2122 * Locate a thread to execute from the processor run queue 2123 * and return it. Only choose a thread with greater or equal 2124 * priority. 2125 * 2126 * Associated pset must be locked. Returns THREAD_NULL 2127 * on failure. 2128 */ 2129thread_t 2130choose_thread_from_runq( 2131 processor_t processor, 2132 run_queue_t rq, 2133 int priority) 2134{ 2135 queue_t queue = rq->queues + rq->highq; 2136 int pri = rq->highq, count = rq->count; 2137 thread_t thread; 2138 2139 while (count > 0 && pri >= priority) { 2140 thread = (thread_t)queue_first(queue); 2141 while (!queue_end(queue, (queue_entry_t)thread)) { 2142 if (thread->bound_processor == PROCESSOR_NULL || 2143 thread->bound_processor == processor) { 2144 remqueue((queue_entry_t)thread); 2145 2146 thread->runq = PROCESSOR_NULL; 2147 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 2148 rq->count--; 2149 if (SCHED(priority_is_urgent)(pri)) { 2150 rq->urgency--; assert(rq->urgency >= 0); 2151 } 2152 if (queue_empty(queue)) { 2153 if (pri != IDLEPRI) 2154 clrbit(MAXPRI - pri, rq->bitmap); 2155 rq->highq = MAXPRI - ffsbit(rq->bitmap); 2156 } 2157 2158 return (thread); 2159 } 2160 count--; 2161 2162 thread = (thread_t)queue_next((queue_entry_t)thread); 2163 } 2164 2165 queue--; pri--; 2166 } 2167 2168 return (THREAD_NULL); 2169} 2170 2171#endif /* defined(CONFIG_SCHED_TRADITIONAL) */ 2172 2173/* 2174 * Perform a context switch and start executing the new thread. 2175 * 2176 * Returns FALSE on failure, and the thread is re-dispatched. 2177 * 2178 * Called at splsched. 2179 */ 2180 2181/* 2182 * thread_invoke 2183 * 2184 * "self" is what is currently running on the processor, 2185 * "thread" is the new thread to context switch to 2186 * (which may be the same thread in some cases) 2187 */ 2188static boolean_t 2189thread_invoke( 2190 thread_t self, 2191 thread_t thread, 2192 ast_t reason) 2193{ 2194 thread_continue_t continuation = self->continuation; 2195 void *parameter = self->parameter; 2196 processor_t processor; 2197 uint64_t ctime = mach_absolute_time(); 2198 2199#ifdef CONFIG_MACH_APPROXIMATE_TIME 2200 commpage_update_mach_approximate_time(ctime); 2201#endif 2202 2203 if (__improbable(get_preemption_level() != 0)) { 2204 int pl = get_preemption_level(); 2205 panic("thread_invoke: preemption_level %d, possible cause: %s", 2206 pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" : 2207 "blocking while holding a spinlock, or within interrupt context")); 2208 } 2209 2210 assert(self == current_thread()); 2211 assert(self->runq == PROCESSOR_NULL); 2212 2213#if defined(CONFIG_SCHED_TIMESHARE_CORE) 2214 sched_traditional_consider_maintenance(ctime); 2215#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 2216 2217 /* 2218 * Mark thread interruptible. 2219 */ 2220 thread_lock(thread); 2221 thread->state &= ~TH_UNINT; 2222 2223 assert(thread_runnable(thread)); 2224 assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == current_processor()); 2225 assert(thread->runq == PROCESSOR_NULL); 2226 2227 /* Reload precise timing global policy to thread-local policy */ 2228 thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); 2229 2230 /* Update SFI class based on other factors */ 2231 thread->sfi_class = sfi_thread_classify(thread); 2232 2233 /* 2234 * Allow time constraint threads to hang onto 2235 * a stack. 2236 */ 2237 if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack) 2238 self->reserved_stack = self->kernel_stack; 2239 2240 if (continuation != NULL) { 2241 if (!thread->kernel_stack) { 2242 /* 2243 * If we are using a privileged stack, 2244 * check to see whether we can exchange it with 2245 * that of the other thread. 2246 */ 2247 if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack) 2248 goto need_stack; 2249 2250 /* 2251 * Context switch by performing a stack handoff. 2252 */ 2253 continuation = thread->continuation; 2254 parameter = thread->parameter; 2255 2256 processor = current_processor(); 2257 processor->active_thread = thread; 2258 processor->current_pri = thread->sched_pri; 2259 processor->current_thmode = thread->sched_mode; 2260 processor->current_sfi_class = thread->sfi_class; 2261 if (thread->last_processor != processor && thread->last_processor != NULL) { 2262 if (thread->last_processor->processor_set != processor->processor_set) 2263 thread->ps_switch++; 2264 thread->p_switch++; 2265 } 2266 thread->last_processor = processor; 2267 thread->c_switch++; 2268 ast_context(thread); 2269 thread_unlock(thread); 2270 2271 self->reason = reason; 2272 2273 processor->last_dispatch = ctime; 2274 self->last_run_time = ctime; 2275 thread_timer_event(ctime, &thread->system_timer); 2276 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2277 2278 /* 2279 * Since non-precise user/kernel time doesn't update the state timer 2280 * during privilege transitions, synthesize an event now. 2281 */ 2282 if (!thread->precise_user_kernel_time) { 2283 timer_switch(PROCESSOR_DATA(processor, current_state), 2284 ctime, 2285 PROCESSOR_DATA(processor, current_state)); 2286 } 2287 2288 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2289 MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE, 2290 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2291 2292 if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) { 2293 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE, 2294 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0); 2295 } 2296 2297 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); 2298 2299 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri); 2300 2301 TLOG(1, "thread_invoke: calling stack_handoff\n"); 2302 stack_handoff(self, thread); 2303 2304 DTRACE_SCHED(on__cpu); 2305 2306 thread_dispatch(self, thread); 2307 2308 thread->continuation = thread->parameter = NULL; 2309 2310 counter(c_thread_invoke_hits++); 2311 2312 (void) spllo(); 2313 2314 assert(continuation); 2315 call_continuation(continuation, parameter, thread->wait_result); 2316 /*NOTREACHED*/ 2317 } 2318 else if (thread == self) { 2319 /* same thread but with continuation */ 2320 ast_context(self); 2321 counter(++c_thread_invoke_same); 2322 thread_unlock(self); 2323 2324 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2325 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2326 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2327 2328 self->continuation = self->parameter = NULL; 2329 2330 (void) spllo(); 2331 2332 call_continuation(continuation, parameter, self->wait_result); 2333 /*NOTREACHED*/ 2334 } 2335 } 2336 else { 2337 /* 2338 * Check that the other thread has a stack 2339 */ 2340 if (!thread->kernel_stack) { 2341need_stack: 2342 if (!stack_alloc_try(thread)) { 2343 counter(c_thread_invoke_misses++); 2344 thread_unlock(thread); 2345 thread_stack_enqueue(thread); 2346 return (FALSE); 2347 } 2348 } 2349 else if (thread == self) { 2350 ast_context(self); 2351 counter(++c_thread_invoke_same); 2352 thread_unlock(self); 2353 2354 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2355 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2356 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2357 2358 return (TRUE); 2359 } 2360 } 2361 2362 /* 2363 * Context switch by full context save. 2364 */ 2365 processor = current_processor(); 2366 processor->active_thread = thread; 2367 processor->current_pri = thread->sched_pri; 2368 processor->current_thmode = thread->sched_mode; 2369 processor->current_sfi_class = thread->sfi_class; 2370 if (thread->last_processor != processor && thread->last_processor != NULL) { 2371 if (thread->last_processor->processor_set != processor->processor_set) 2372 thread->ps_switch++; 2373 thread->p_switch++; 2374 } 2375 thread->last_processor = processor; 2376 thread->c_switch++; 2377 ast_context(thread); 2378 thread_unlock(thread); 2379 2380 counter(c_thread_invoke_csw++); 2381 2382 assert(self->runq == PROCESSOR_NULL); 2383 self->reason = reason; 2384 2385 processor->last_dispatch = ctime; 2386 self->last_run_time = ctime; 2387 thread_timer_event(ctime, &thread->system_timer); 2388 PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; 2389 2390 /* 2391 * Since non-precise user/kernel time doesn't update the state timer 2392 * during privilege transitions, synthesize an event now. 2393 */ 2394 if (!thread->precise_user_kernel_time) { 2395 timer_switch(PROCESSOR_DATA(processor, current_state), 2396 ctime, 2397 PROCESSOR_DATA(processor, current_state)); 2398 } 2399 2400 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2401 MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, 2402 self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); 2403 2404 if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) { 2405 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE, 2406 (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0); 2407 } 2408 2409 DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); 2410 2411 SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri); 2412 2413 /* 2414 * This is where we actually switch register context, 2415 * and address space if required. We will next run 2416 * as a result of a subsequent context switch. 2417 */ 2418 assert(continuation == self->continuation); 2419 thread = machine_switch_context(self, continuation, thread); 2420 assert(self == current_thread()); 2421 TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread); 2422 2423 DTRACE_SCHED(on__cpu); 2424 2425 /* 2426 * We have been resumed and are set to run. 2427 */ 2428 thread_dispatch(thread, self); 2429 2430 if (continuation) { 2431 self->continuation = self->parameter = NULL; 2432 2433 (void) spllo(); 2434 2435 call_continuation(continuation, parameter, self->wait_result); 2436 /*NOTREACHED*/ 2437 } 2438 2439 return (TRUE); 2440} 2441 2442/* 2443 * thread_dispatch: 2444 * 2445 * Handle threads at context switch. Re-dispatch other thread 2446 * if still running, otherwise update run state and perform 2447 * special actions. Update quantum for other thread and begin 2448 * the quantum for ourselves. 2449 * 2450 * "self" is our new current thread that we have context switched 2451 * to, "thread" is the old thread that we have switched away from. 2452 * 2453 * Called at splsched. 2454 */ 2455void 2456thread_dispatch( 2457 thread_t thread, 2458 thread_t self) 2459{ 2460 processor_t processor = self->last_processor; 2461 2462 if (thread != THREAD_NULL) { 2463 /* 2464 * If blocked at a continuation, discard 2465 * the stack. 2466 */ 2467 if (thread->continuation != NULL && thread->kernel_stack != 0) 2468 stack_free(thread); 2469 2470 if (!(thread->state & TH_IDLE)) { 2471 int64_t consumed; 2472 int64_t remainder = 0; 2473 2474 if (processor->quantum_end > processor->last_dispatch) 2475 remainder = processor->quantum_end - 2476 processor->last_dispatch; 2477 2478 consumed = thread->quantum_remaining - remainder; 2479 2480 if ((thread->reason & AST_LEDGER) == 0) { 2481 /* 2482 * Bill CPU time to both the task and 2483 * the individual thread. 2484 */ 2485 ledger_credit(thread->t_ledger, 2486 task_ledgers.cpu_time, consumed); 2487 ledger_credit(thread->t_threadledger, 2488 thread_ledgers.cpu_time, consumed); 2489#ifdef CONFIG_BANK 2490 if (thread->t_bankledger) { 2491 ledger_credit(thread->t_bankledger, 2492 bank_ledgers.cpu_time, 2493 (consumed - thread->t_deduct_bank_ledger_time)); 2494 2495 } 2496 thread->t_deduct_bank_ledger_time =0; 2497#endif 2498 } 2499 2500 wake_lock(thread); 2501 thread_lock(thread); 2502 2503 /* 2504 * Compute remainder of current quantum. 2505 */ 2506 if (first_timeslice(processor) && 2507 processor->quantum_end > processor->last_dispatch) 2508 thread->quantum_remaining = (uint32_t)remainder; 2509 else 2510 thread->quantum_remaining = 0; 2511 2512 if (thread->sched_mode == TH_MODE_REALTIME) { 2513 /* 2514 * Cancel the deadline if the thread has 2515 * consumed the entire quantum. 2516 */ 2517 if (thread->quantum_remaining == 0) { 2518 thread->realtime.deadline = UINT64_MAX; 2519 } 2520 } else { 2521#if defined(CONFIG_SCHED_TRADITIONAL) 2522 /* 2523 * For non-realtime threads treat a tiny 2524 * remaining quantum as an expired quantum 2525 * but include what's left next time. 2526 */ 2527 if (thread->quantum_remaining < min_std_quantum) { 2528 thread->reason |= AST_QUANTUM; 2529 thread->quantum_remaining += SCHED(initial_quantum_size)(thread); 2530 } 2531#endif 2532 } 2533 2534 /* 2535 * If we are doing a direct handoff then 2536 * take the remainder of the quantum. 2537 */ 2538 if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) { 2539 self->quantum_remaining = thread->quantum_remaining; 2540 thread->reason |= AST_QUANTUM; 2541 thread->quantum_remaining = 0; 2542 } else { 2543#if defined(CONFIG_SCHED_MULTIQ) 2544 if (sched_groups_enabled && thread->sched_group == self->sched_group) { 2545 /* TODO: Remove tracepoint */ 2546 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2547 MACHDBG_CODE(DBG_MACH_SCHED, MACH_QUANTUM_HANDOFF) | DBG_FUNC_NONE, 2548 self->reason, (uintptr_t)thread_tid(thread), 2549 self->quantum_remaining, thread->quantum_remaining, 0); 2550 2551 self->quantum_remaining = thread->quantum_remaining; 2552 thread->quantum_remaining = 0; 2553 /* TODO: Should we set AST_QUANTUM here? */ 2554 } 2555#endif /* defined(CONFIG_SCHED_MULTIQ) */ 2556 } 2557 2558 thread->computation_metered += (processor->last_dispatch - thread->computation_epoch); 2559 2560 if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) { 2561 integer_t priority; 2562 2563 priority = thread->sched_pri; 2564 2565 if (priority < thread->priority) 2566 priority = thread->priority; 2567 if (priority < BASEPRI_BACKGROUND) 2568 priority = BASEPRI_BACKGROUND; 2569 2570 if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { 2571 KERNEL_DEBUG_CONSTANT( 2572 MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE, 2573 (uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0); 2574 2575 thread->sched_flags |= TH_SFLAG_RW_PROMOTED; 2576 2577 if (thread->sched_pri < priority) 2578 set_sched_pri(thread, priority); 2579 } 2580 } 2581 2582 if (!(thread->state & TH_WAIT)) { 2583 /* 2584 * Still running. 2585 */ 2586 if (thread->reason & AST_QUANTUM) 2587 thread_setrun(thread, SCHED_TAILQ); 2588 else 2589 if (thread->reason & AST_PREEMPT) 2590 thread_setrun(thread, SCHED_HEADQ); 2591 else 2592 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 2593 2594 thread->reason = AST_NONE; 2595 2596 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2597 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE, 2598 (uintptr_t)thread_tid(thread), thread->reason, thread->state, sched_run_count, 0); 2599 2600 if (thread->wake_active) { 2601 thread->wake_active = FALSE; 2602 thread_unlock(thread); 2603 2604 thread_wakeup(&thread->wake_active); 2605 } 2606 else 2607 thread_unlock(thread); 2608 2609 wake_unlock(thread); 2610 } 2611 else { 2612 /* 2613 * Waiting. 2614 */ 2615 boolean_t should_terminate = FALSE; 2616 uint32_t new_run_count; 2617 2618 /* Only the first call to thread_dispatch 2619 * after explicit termination should add 2620 * the thread to the termination queue 2621 */ 2622 if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) { 2623 should_terminate = TRUE; 2624 thread->state |= TH_TERMINATE2; 2625 } 2626 2627 thread->state &= ~TH_RUN; 2628 thread->chosen_processor = PROCESSOR_NULL; 2629 2630 if (thread->sched_mode == TH_MODE_TIMESHARE) { 2631 if (thread->sched_flags & TH_SFLAG_THROTTLED) 2632 sched_background_decr(thread); 2633 2634 sched_share_decr(thread); 2635 } 2636 new_run_count = sched_run_decr(thread); 2637 2638 if ((thread->state & (TH_WAIT | TH_TERMINATE)) == TH_WAIT) { 2639 if (thread->reason & AST_SFI) { 2640 thread->wait_sfi_begin_time = processor->last_dispatch; 2641 } 2642 } 2643 2644 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2645 MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE, 2646 (uintptr_t)thread_tid(thread), thread->reason, thread->state, new_run_count, 0); 2647 2648 (*thread->sched_call)(SCHED_CALL_BLOCK, thread); 2649 2650 if (thread->wake_active) { 2651 thread->wake_active = FALSE; 2652 thread_unlock(thread); 2653 2654 thread_wakeup(&thread->wake_active); 2655 } 2656 else 2657 thread_unlock(thread); 2658 2659 wake_unlock(thread); 2660 2661 if (should_terminate) 2662 thread_terminate_enqueue(thread); 2663 } 2664 } 2665 } 2666 2667 if (!(self->state & TH_IDLE)) { 2668 uint64_t arg1, arg2; 2669 int urgency; 2670 ast_t new_ast; 2671 2672 thread_lock(self); 2673 new_ast = sfi_thread_needs_ast(self, NULL); 2674 thread_unlock(self); 2675 2676 if (new_ast != AST_NONE) { 2677 ast_on(new_ast); 2678 } 2679 2680 urgency = thread_get_urgency(self, &arg1, &arg2); 2681 2682 thread_tell_urgency(urgency, arg1, arg2, self); 2683 2684 /* 2685 * Get a new quantum if none remaining. 2686 */ 2687 if (self->quantum_remaining == 0) { 2688 thread_quantum_init(self); 2689 } 2690 2691 /* 2692 * Set up quantum timer and timeslice. 2693 */ 2694 processor->quantum_end = processor->last_dispatch + self->quantum_remaining; 2695 timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); 2696 2697 processor->timeslice = 1; 2698 2699 self->computation_epoch = processor->last_dispatch; 2700 } 2701 else { 2702 timer_call_cancel(&processor->quantum_timer); 2703 processor->timeslice = 0; 2704 2705 thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); 2706 } 2707} 2708 2709/* 2710 * thread_block_reason: 2711 * 2712 * Forces a reschedule, blocking the caller if a wait 2713 * has been asserted. 2714 * 2715 * If a continuation is specified, then thread_invoke will 2716 * attempt to discard the thread's kernel stack. When the 2717 * thread resumes, it will execute the continuation function 2718 * on a new kernel stack. 2719 */ 2720counter(mach_counter_t c_thread_block_calls = 0;) 2721 2722wait_result_t 2723thread_block_reason( 2724 thread_continue_t continuation, 2725 void *parameter, 2726 ast_t reason) 2727{ 2728 register thread_t self = current_thread(); 2729 register processor_t processor; 2730 register thread_t new_thread; 2731 spl_t s; 2732 2733 counter(++c_thread_block_calls); 2734 2735 s = splsched(); 2736 2737 processor = current_processor(); 2738 2739 /* If we're explicitly yielding, force a subsequent quantum */ 2740 if (reason & AST_YIELD) 2741 processor->timeslice = 0; 2742 2743 /* We're handling all scheduling AST's */ 2744 ast_off(AST_SCHEDULING); 2745 2746 self->continuation = continuation; 2747 self->parameter = parameter; 2748 2749 if (self->state & ~(TH_RUN | TH_IDLE)) { 2750 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 2751 MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK), 2752 reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0); 2753 } 2754 2755 do { 2756 thread_lock(self); 2757 new_thread = thread_select(self, processor, reason); 2758 thread_unlock(self); 2759 } while (!thread_invoke(self, new_thread, reason)); 2760 2761 splx(s); 2762 2763 return (self->wait_result); 2764} 2765 2766/* 2767 * thread_block: 2768 * 2769 * Block the current thread if a wait has been asserted. 2770 */ 2771wait_result_t 2772thread_block( 2773 thread_continue_t continuation) 2774{ 2775 return thread_block_reason(continuation, NULL, AST_NONE); 2776} 2777 2778wait_result_t 2779thread_block_parameter( 2780 thread_continue_t continuation, 2781 void *parameter) 2782{ 2783 return thread_block_reason(continuation, parameter, AST_NONE); 2784} 2785 2786/* 2787 * thread_run: 2788 * 2789 * Switch directly from the current thread to the 2790 * new thread, handing off our quantum if appropriate. 2791 * 2792 * New thread must be runnable, and not on a run queue. 2793 * 2794 * Called at splsched. 2795 */ 2796int 2797thread_run( 2798 thread_t self, 2799 thread_continue_t continuation, 2800 void *parameter, 2801 thread_t new_thread) 2802{ 2803 ast_t handoff = AST_HANDOFF; 2804 2805 self->continuation = continuation; 2806 self->parameter = parameter; 2807 2808 while (!thread_invoke(self, new_thread, handoff)) { 2809 processor_t processor = current_processor(); 2810 2811 thread_lock(self); 2812 new_thread = thread_select(self, processor, AST_NONE); 2813 thread_unlock(self); 2814 handoff = AST_NONE; 2815 } 2816 2817 return (self->wait_result); 2818} 2819 2820/* 2821 * thread_continue: 2822 * 2823 * Called at splsched when a thread first receives 2824 * a new stack after a continuation. 2825 */ 2826void 2827thread_continue( 2828 register thread_t thread) 2829{ 2830 register thread_t self = current_thread(); 2831 register thread_continue_t continuation; 2832 register void *parameter; 2833 2834 DTRACE_SCHED(on__cpu); 2835 2836 continuation = self->continuation; 2837 parameter = self->parameter; 2838 2839 thread_dispatch(thread, self); 2840 2841 self->continuation = self->parameter = NULL; 2842 2843 if (thread != THREAD_NULL) 2844 (void)spllo(); 2845 2846 TLOG(1, "thread_continue: calling call_continuation \n"); 2847 call_continuation(continuation, parameter, self->wait_result); 2848 /*NOTREACHED*/ 2849} 2850 2851void 2852thread_quantum_init(thread_t thread) 2853{ 2854 if (thread->sched_mode == TH_MODE_REALTIME) { 2855 thread->quantum_remaining = thread->realtime.computation; 2856 } else { 2857 thread->quantum_remaining = SCHED(initial_quantum_size)(thread); 2858 } 2859} 2860 2861#if defined(CONFIG_SCHED_TIMESHARE_CORE) 2862 2863uint32_t 2864sched_traditional_initial_quantum_size(thread_t thread) 2865{ 2866 if ((thread == THREAD_NULL) || !(thread->sched_flags & TH_SFLAG_THROTTLED)) 2867 return std_quantum; 2868 else 2869 return bg_quantum; 2870} 2871 2872#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 2873 2874#if defined(CONFIG_SCHED_TRADITIONAL) 2875 2876static sched_mode_t 2877sched_traditional_initial_thread_sched_mode(task_t parent_task) 2878{ 2879 if (parent_task == kernel_task) 2880 return TH_MODE_FIXED; 2881 else 2882 return TH_MODE_TIMESHARE; 2883} 2884 2885#endif /* CONFIG_SCHED_TRADITIONAL */ 2886 2887/* 2888 * run_queue_init: 2889 * 2890 * Initialize a run queue before first use. 2891 */ 2892void 2893run_queue_init( 2894 run_queue_t rq) 2895{ 2896 int i; 2897 2898 rq->highq = IDLEPRI; 2899 for (i = 0; i < NRQBM; i++) 2900 rq->bitmap[i] = 0; 2901 setbit(MAXPRI - IDLEPRI, rq->bitmap); 2902 rq->urgency = rq->count = 0; 2903 for (i = 0; i < NRQS; i++) 2904 queue_init(&rq->queues[i]); 2905} 2906 2907#if defined(CONFIG_SCHED_FAIRSHARE_CORE) 2908int 2909sched_traditional_fairshare_runq_count(void) 2910{ 2911 return fs_runq.count; 2912} 2913 2914uint64_t 2915sched_traditional_fairshare_runq_stats_count_sum(void) 2916{ 2917 return fs_runq.runq_stats.count_sum; 2918} 2919 2920void 2921sched_traditional_fairshare_enqueue(thread_t thread) 2922{ 2923 queue_t queue = &fs_runq.queue; 2924 2925 simple_lock(&fs_lock); 2926 2927 enqueue_tail(queue, (queue_entry_t)thread); 2928 2929 thread->runq = FS_RUNQ; 2930 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2931 fs_runq.count++; 2932 2933 simple_unlock(&fs_lock); 2934} 2935 2936thread_t 2937sched_traditional_fairshare_dequeue(void) 2938{ 2939 thread_t thread; 2940 2941 simple_lock(&fs_lock); 2942 if (fs_runq.count > 0) { 2943 thread = (thread_t)dequeue_head(&fs_runq.queue); 2944 2945 thread->runq = PROCESSOR_NULL; 2946 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2947 fs_runq.count--; 2948 2949 simple_unlock(&fs_lock); 2950 2951 return (thread); 2952 } 2953 simple_unlock(&fs_lock); 2954 2955 return THREAD_NULL; 2956} 2957 2958boolean_t 2959sched_traditional_fairshare_queue_remove(thread_t thread) 2960{ 2961 queue_t q; 2962 2963 simple_lock(&fs_lock); 2964 q = &fs_runq.queue; 2965 2966 if (FS_RUNQ == thread->runq) { 2967 remqueue((queue_entry_t)thread); 2968 SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count); 2969 fs_runq.count--; 2970 2971 thread->runq = PROCESSOR_NULL; 2972 simple_unlock(&fs_lock); 2973 return (TRUE); 2974 } 2975 else { 2976 /* 2977 * The thread left the run queue before we could 2978 * lock the run queue. 2979 */ 2980 assert(thread->runq == PROCESSOR_NULL); 2981 simple_unlock(&fs_lock); 2982 return (FALSE); 2983 } 2984} 2985 2986#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ 2987 2988/* 2989 * run_queue_dequeue: 2990 * 2991 * Perform a dequeue operation on a run queue, 2992 * and return the resulting thread. 2993 * 2994 * The run queue must be locked (see thread_run_queue_remove() 2995 * for more info), and not empty. 2996 */ 2997thread_t 2998run_queue_dequeue( 2999 run_queue_t rq, 3000 integer_t options) 3001{ 3002 thread_t thread; 3003 queue_t queue = rq->queues + rq->highq; 3004 3005 if (options & SCHED_HEADQ) { 3006 thread = (thread_t)dequeue_head(queue); 3007 } 3008 else { 3009 thread = (thread_t)dequeue_tail(queue); 3010 } 3011 3012 thread->runq = PROCESSOR_NULL; 3013 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3014 rq->count--; 3015 if (SCHED(priority_is_urgent)(rq->highq)) { 3016 rq->urgency--; assert(rq->urgency >= 0); 3017 } 3018 if (queue_empty(queue)) { 3019 if (rq->highq != IDLEPRI) 3020 clrbit(MAXPRI - rq->highq, rq->bitmap); 3021 rq->highq = MAXPRI - ffsbit(rq->bitmap); 3022 } 3023 3024 return (thread); 3025} 3026 3027/* 3028 * run_queue_enqueue: 3029 * 3030 * Perform a enqueue operation on a run queue. 3031 * 3032 * The run queue must be locked (see thread_run_queue_remove() 3033 * for more info). 3034 */ 3035boolean_t 3036run_queue_enqueue( 3037 run_queue_t rq, 3038 thread_t thread, 3039 integer_t options) 3040{ 3041 queue_t queue = rq->queues + thread->sched_pri; 3042 boolean_t result = FALSE; 3043 3044 if (queue_empty(queue)) { 3045 enqueue_tail(queue, (queue_entry_t)thread); 3046 3047 setbit(MAXPRI - thread->sched_pri, rq->bitmap); 3048 if (thread->sched_pri > rq->highq) { 3049 rq->highq = thread->sched_pri; 3050 result = TRUE; 3051 } 3052 } else { 3053 if (options & SCHED_TAILQ) 3054 enqueue_tail(queue, (queue_entry_t)thread); 3055 else 3056 enqueue_head(queue, (queue_entry_t)thread); 3057 } 3058 if (SCHED(priority_is_urgent)(thread->sched_pri)) 3059 rq->urgency++; 3060 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3061 rq->count++; 3062 3063 return (result); 3064 3065} 3066 3067/* 3068 * run_queue_remove: 3069 * 3070 * Remove a specific thread from a runqueue. 3071 * 3072 * The run queue must be locked. 3073 */ 3074void 3075run_queue_remove( 3076 run_queue_t rq, 3077 thread_t thread) 3078{ 3079 3080 remqueue((queue_entry_t)thread); 3081 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 3082 rq->count--; 3083 if (SCHED(priority_is_urgent)(thread->sched_pri)) { 3084 rq->urgency--; assert(rq->urgency >= 0); 3085 } 3086 3087 if (queue_empty(rq->queues + thread->sched_pri)) { 3088 /* update run queue status */ 3089 if (thread->sched_pri != IDLEPRI) 3090 clrbit(MAXPRI - thread->sched_pri, rq->bitmap); 3091 rq->highq = MAXPRI - ffsbit(rq->bitmap); 3092 } 3093 3094 thread->runq = PROCESSOR_NULL; 3095} 3096 3097/* 3098 * fairshare_setrun: 3099 * 3100 * Dispatch a thread for round-robin execution. 3101 * 3102 * Thread must be locked. Associated pset must 3103 * be locked, and is returned unlocked. 3104 */ 3105static void 3106fairshare_setrun( 3107 processor_t processor, 3108 thread_t thread) 3109{ 3110 processor_set_t pset = processor->processor_set; 3111 3112 thread->chosen_processor = processor; 3113 3114 SCHED(fairshare_enqueue)(thread); 3115 3116 pset_unlock(pset); 3117 3118 if (processor != current_processor()) 3119 machine_signal_idle(processor); 3120 3121 3122} 3123 3124/* 3125 * realtime_queue_insert: 3126 * 3127 * Enqueue a thread for realtime execution. 3128 */ 3129static boolean_t 3130realtime_queue_insert( 3131 thread_t thread) 3132{ 3133 queue_t queue = &rt_runq.queue; 3134 uint64_t deadline = thread->realtime.deadline; 3135 boolean_t preempt = FALSE; 3136 3137 simple_lock(&rt_lock); 3138 3139 if (queue_empty(queue)) { 3140 enqueue_tail(queue, (queue_entry_t)thread); 3141 preempt = TRUE; 3142 } 3143 else { 3144 register thread_t entry = (thread_t)queue_first(queue); 3145 3146 while (TRUE) { 3147 if ( queue_end(queue, (queue_entry_t)entry) || 3148 deadline < entry->realtime.deadline ) { 3149 entry = (thread_t)queue_prev((queue_entry_t)entry); 3150 break; 3151 } 3152 3153 entry = (thread_t)queue_next((queue_entry_t)entry); 3154 } 3155 3156 if ((queue_entry_t)entry == queue) 3157 preempt = TRUE; 3158 3159 insque((queue_entry_t)thread, (queue_entry_t)entry); 3160 } 3161 3162 thread->runq = RT_RUNQ; 3163 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 3164 rt_runq.count++; 3165 3166 simple_unlock(&rt_lock); 3167 3168 return (preempt); 3169} 3170 3171/* 3172 * realtime_setrun: 3173 * 3174 * Dispatch a thread for realtime execution. 3175 * 3176 * Thread must be locked. Associated pset must 3177 * be locked, and is returned unlocked. 3178 */ 3179static void 3180realtime_setrun( 3181 processor_t processor, 3182 thread_t thread) 3183{ 3184 processor_set_t pset = processor->processor_set; 3185 ast_t preempt; 3186 3187 boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE; 3188 3189 thread->chosen_processor = processor; 3190 3191 /* <rdar://problem/15102234> */ 3192 assert(thread->bound_processor == PROCESSOR_NULL); 3193 3194 /* 3195 * Dispatch directly onto idle processor. 3196 */ 3197 if ( (thread->bound_processor == processor) 3198 && processor->state == PROCESSOR_IDLE) { 3199 remqueue((queue_entry_t)processor); 3200 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3201 3202 processor->next_thread = thread; 3203 processor->current_pri = thread->sched_pri; 3204 processor->current_thmode = thread->sched_mode; 3205 processor->current_sfi_class = thread->sfi_class; 3206 processor->deadline = thread->realtime.deadline; 3207 processor->state = PROCESSOR_DISPATCHING; 3208 3209 if (processor != current_processor()) { 3210 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3211 /* cleared on exit from main processor_idle() loop */ 3212 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3213 do_signal_idle = TRUE; 3214 } 3215 } 3216 pset_unlock(pset); 3217 3218 if (do_signal_idle) { 3219 machine_signal_idle(processor); 3220 } 3221 return; 3222 } 3223 3224 if (processor->current_pri < BASEPRI_RTQUEUES) 3225 preempt = (AST_PREEMPT | AST_URGENT); 3226 else if (thread->realtime.deadline < processor->deadline) 3227 preempt = (AST_PREEMPT | AST_URGENT); 3228 else 3229 preempt = AST_NONE; 3230 3231 realtime_queue_insert(thread); 3232 3233 if (preempt != AST_NONE) { 3234 if (processor->state == PROCESSOR_IDLE) { 3235 remqueue((queue_entry_t)processor); 3236 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3237 processor->next_thread = THREAD_NULL; 3238 processor->current_pri = thread->sched_pri; 3239 processor->current_thmode = thread->sched_mode; 3240 processor->current_sfi_class = thread->sfi_class; 3241 processor->deadline = thread->realtime.deadline; 3242 processor->state = PROCESSOR_DISPATCHING; 3243 if (processor == current_processor()) { 3244 ast_on(preempt); 3245 } else { 3246 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3247 /* cleared on exit from main processor_idle() loop */ 3248 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3249 do_signal_idle = TRUE; 3250 } 3251 } 3252 } else if (processor->state == PROCESSOR_DISPATCHING) { 3253 if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) { 3254 processor->current_pri = thread->sched_pri; 3255 processor->current_thmode = thread->sched_mode; 3256 processor->current_sfi_class = thread->sfi_class; 3257 processor->deadline = thread->realtime.deadline; 3258 } 3259 } else { 3260 if (processor == current_processor()) { 3261 ast_on(preempt); 3262 } else { 3263 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3264 /* cleared after IPI causes csw_check() to be called */ 3265 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3266 do_cause_ast = TRUE; 3267 } 3268 } 3269 } 3270 } else { 3271 /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */ 3272 } 3273 3274 pset_unlock(pset); 3275 3276 if (do_signal_idle) { 3277 machine_signal_idle(processor); 3278 } else if (do_cause_ast) { 3279 cause_ast_check(processor); 3280 } 3281} 3282 3283 3284#if defined(CONFIG_SCHED_TIMESHARE_CORE) 3285 3286boolean_t 3287priority_is_urgent(int priority) 3288{ 3289 return testbit(priority, sched_preempt_pri) ? TRUE : FALSE; 3290} 3291 3292#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 3293 3294#if defined(CONFIG_SCHED_TRADITIONAL) 3295/* 3296 * processor_enqueue: 3297 * 3298 * Enqueue thread on a processor run queue. Thread must be locked, 3299 * and not already be on a run queue. 3300 * 3301 * Returns TRUE if a preemption is indicated based on the state 3302 * of the run queue. 3303 * 3304 * The run queue must be locked (see thread_run_queue_remove() 3305 * for more info). 3306 */ 3307static boolean_t 3308processor_enqueue( 3309 processor_t processor, 3310 thread_t thread, 3311 integer_t options) 3312{ 3313 run_queue_t rq = runq_for_processor(processor); 3314 boolean_t result; 3315 3316 result = run_queue_enqueue(rq, thread, options); 3317 thread->runq = processor; 3318 runq_consider_incr_bound_count(processor, thread); 3319 3320 return (result); 3321} 3322 3323#endif /* CONFIG_SCHED_TRADITIONAL */ 3324 3325/* 3326 * processor_setrun: 3327 * 3328 * Dispatch a thread for execution on a 3329 * processor. 3330 * 3331 * Thread must be locked. Associated pset must 3332 * be locked, and is returned unlocked. 3333 */ 3334static void 3335processor_setrun( 3336 processor_t processor, 3337 thread_t thread, 3338 integer_t options) 3339{ 3340 processor_set_t pset = processor->processor_set; 3341 ast_t preempt; 3342 enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing; 3343 3344 boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE; 3345 3346 thread->chosen_processor = processor; 3347 3348 /* 3349 * Dispatch directly onto idle processor. 3350 */ 3351 if ( (SCHED(direct_dispatch_to_idle_processors) || 3352 thread->bound_processor == processor) 3353 && processor->state == PROCESSOR_IDLE) { 3354 remqueue((queue_entry_t)processor); 3355 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3356 3357 processor->next_thread = thread; 3358 processor->current_pri = thread->sched_pri; 3359 processor->current_thmode = thread->sched_mode; 3360 processor->current_sfi_class = thread->sfi_class; 3361 processor->deadline = UINT64_MAX; 3362 processor->state = PROCESSOR_DISPATCHING; 3363 3364 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3365 /* cleared on exit from main processor_idle() loop */ 3366 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3367 do_signal_idle = TRUE; 3368 } 3369 3370 pset_unlock(pset); 3371 if (do_signal_idle) { 3372 machine_signal_idle(processor); 3373 } 3374 3375 return; 3376 } 3377 3378 /* 3379 * Set preemption mode. 3380 */ 3381 if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri) 3382 preempt = (AST_PREEMPT | AST_URGENT); 3383 else if(processor->active_thread && thread_eager_preemption(processor->active_thread)) 3384 preempt = (AST_PREEMPT | AST_URGENT); 3385 else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) { 3386 if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) { 3387 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; 3388 } else { 3389 preempt = AST_NONE; 3390 } 3391 } else 3392 preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; 3393 3394 SCHED(processor_enqueue)(processor, thread, options); 3395 3396 if (preempt != AST_NONE) { 3397 if (processor->state == PROCESSOR_IDLE) { 3398 remqueue((queue_entry_t)processor); 3399 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3400 processor->next_thread = THREAD_NULL; 3401 processor->current_pri = thread->sched_pri; 3402 processor->current_thmode = thread->sched_mode; 3403 processor->current_sfi_class = thread->sfi_class; 3404 processor->deadline = UINT64_MAX; 3405 processor->state = PROCESSOR_DISPATCHING; 3406 3407 ipi_action = eExitIdle; 3408 } else if ( processor->state == PROCESSOR_DISPATCHING) { 3409 if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) { 3410 processor->current_pri = thread->sched_pri; 3411 processor->current_thmode = thread->sched_mode; 3412 processor->current_sfi_class = thread->sfi_class; 3413 processor->deadline = UINT64_MAX; 3414 } 3415 } else if ( (processor->state == PROCESSOR_RUNNING || 3416 processor->state == PROCESSOR_SHUTDOWN) && 3417 (thread->sched_pri >= processor->current_pri || 3418 processor->current_thmode == TH_MODE_FAIRSHARE)) { 3419 ipi_action = eInterruptRunning; 3420 } 3421 } else { 3422 /* 3423 * New thread is not important enough to preempt what is running, but 3424 * special processor states may need special handling 3425 */ 3426 if (processor->state == PROCESSOR_SHUTDOWN && 3427 thread->sched_pri >= processor->current_pri ) { 3428 ipi_action = eInterruptRunning; 3429 } else if ( processor->state == PROCESSOR_IDLE && 3430 processor != current_processor() ) { 3431 remqueue((queue_entry_t)processor); 3432 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 3433 processor->next_thread = THREAD_NULL; 3434 processor->current_pri = thread->sched_pri; 3435 processor->current_thmode = thread->sched_mode; 3436 processor->current_sfi_class = thread->sfi_class; 3437 processor->deadline = UINT64_MAX; 3438 processor->state = PROCESSOR_DISPATCHING; 3439 3440 ipi_action = eExitIdle; 3441 } 3442 } 3443 3444 switch (ipi_action) { 3445 case eDoNothing: 3446 break; 3447 case eExitIdle: 3448 if (processor == current_processor()) { 3449 if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE) 3450 ast_on(preempt); 3451 } else { 3452 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3453 /* cleared on exit from main processor_idle() loop */ 3454 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3455 do_signal_idle = TRUE; 3456 } 3457 } 3458 break; 3459 case eInterruptRunning: 3460 if (processor == current_processor()) { 3461 if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE) 3462 ast_on(preempt); 3463 } else { 3464 if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { 3465 /* cleared after IPI causes csw_check() to be called */ 3466 pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); 3467 do_cause_ast = TRUE; 3468 } 3469 } 3470 break; 3471 } 3472 3473 pset_unlock(pset); 3474 3475 if (do_signal_idle) { 3476 machine_signal_idle(processor); 3477 } else if (do_cause_ast) { 3478 cause_ast_check(processor); 3479 } 3480} 3481 3482#if defined(CONFIG_SCHED_TRADITIONAL) 3483 3484static boolean_t 3485processor_queue_empty(processor_t processor) 3486{ 3487 return runq_for_processor(processor)->count == 0; 3488 3489} 3490 3491static boolean_t 3492sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor) 3493{ 3494 processor_set_t pset = processor->processor_set; 3495 int count = runq_for_processor(processor)->count; 3496 3497 /* 3498 * The pset runq contains the count of all runnable threads 3499 * for all processors in the pset. However, for threads that 3500 * are bound to another processor, the current "processor" 3501 * is not eligible to execute the thread. So we only 3502 * include bound threads that our bound to the current 3503 * "processor". This allows the processor to idle when the 3504 * count of eligible threads drops to 0, even if there's 3505 * a runnable thread bound to a different processor in the 3506 * shared runq. 3507 */ 3508 3509 count -= pset->pset_runq_bound_count; 3510 count += processor->runq_bound_count; 3511 3512 return count == 0; 3513} 3514 3515static ast_t 3516processor_csw_check(processor_t processor) 3517{ 3518 run_queue_t runq; 3519 boolean_t has_higher; 3520 3521 assert(processor->active_thread != NULL); 3522 3523 runq = runq_for_processor(processor); 3524 if (first_timeslice(processor)) { 3525 has_higher = (runq->highq > processor->current_pri); 3526 } else { 3527 has_higher = (runq->highq >= processor->current_pri); 3528 } 3529 if (has_higher) { 3530 if (runq->urgency > 0) 3531 return (AST_PREEMPT | AST_URGENT); 3532 3533 if (processor->active_thread && thread_eager_preemption(processor->active_thread)) 3534 return (AST_PREEMPT | AST_URGENT); 3535 3536 return AST_PREEMPT; 3537 } 3538 3539 return AST_NONE; 3540} 3541 3542static boolean_t 3543processor_queue_has_priority(processor_t processor, 3544 int priority, 3545 boolean_t gte) 3546{ 3547 if (gte) 3548 return runq_for_processor(processor)->highq >= priority; 3549 else 3550 return runq_for_processor(processor)->highq > priority; 3551} 3552 3553static boolean_t 3554should_current_thread_rechoose_processor(processor_t processor) 3555{ 3556 return (processor->current_pri < BASEPRI_RTQUEUES 3557 && processor->processor_primary != processor); 3558} 3559 3560static int 3561sched_traditional_processor_runq_count(processor_t processor) 3562{ 3563 return runq_for_processor(processor)->count; 3564} 3565 3566static uint64_t 3567sched_traditional_processor_runq_stats_count_sum(processor_t processor) 3568{ 3569 return runq_for_processor(processor)->runq_stats.count_sum; 3570} 3571 3572static uint64_t 3573sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor) 3574{ 3575 if (processor->cpu_id == processor->processor_set->cpu_set_low) 3576 return runq_for_processor(processor)->runq_stats.count_sum; 3577 else 3578 return 0ULL; 3579} 3580 3581static int 3582sched_traditional_processor_bound_count(processor_t processor) 3583{ 3584 return processor->runq_bound_count; 3585} 3586 3587#endif /* CONFIG_SCHED_TRADITIONAL */ 3588 3589/* 3590 * choose_next_pset: 3591 * 3592 * Return the next sibling pset containing 3593 * available processors. 3594 * 3595 * Returns the original pset if none other is 3596 * suitable. 3597 */ 3598static processor_set_t 3599choose_next_pset( 3600 processor_set_t pset) 3601{ 3602 processor_set_t nset = pset; 3603 3604 do { 3605 nset = next_pset(nset); 3606 } while (nset->online_processor_count < 1 && nset != pset); 3607 3608 return (nset); 3609} 3610 3611/* 3612 * choose_processor: 3613 * 3614 * Choose a processor for the thread, beginning at 3615 * the pset. Accepts an optional processor hint in 3616 * the pset. 3617 * 3618 * Returns a processor, possibly from a different pset. 3619 * 3620 * The thread must be locked. The pset must be locked, 3621 * and the resulting pset is locked on return. 3622 */ 3623processor_t 3624choose_processor( 3625 processor_set_t pset, 3626 processor_t processor, 3627 thread_t thread) 3628{ 3629 processor_set_t nset, cset = pset; 3630 3631 /* 3632 * Prefer the hinted processor, when appropriate. 3633 */ 3634 3635 /* Fold last processor hint from secondary processor to its primary */ 3636 if (processor != PROCESSOR_NULL) { 3637 processor = processor->processor_primary; 3638 } 3639 3640 /* 3641 * Only consult platform layer if pset is active, which 3642 * it may not be in some cases when a multi-set system 3643 * is going to sleep. 3644 */ 3645 if (pset->online_processor_count) { 3646 if ((processor == PROCESSOR_NULL) || (processor->processor_set == pset && processor->state == PROCESSOR_IDLE)) { 3647 processor_t mc_processor = machine_choose_processor(pset, processor); 3648 if (mc_processor != PROCESSOR_NULL) 3649 processor = mc_processor->processor_primary; 3650 } 3651 } 3652 3653 /* 3654 * At this point, we may have a processor hint, and we may have 3655 * an initial starting pset. If the hint is not in the pset, or 3656 * if the hint is for a processor in an invalid state, discard 3657 * the hint. 3658 */ 3659 if (processor != PROCESSOR_NULL) { 3660 if (processor->processor_set != pset) { 3661 processor = PROCESSOR_NULL; 3662 } else { 3663 switch (processor->state) { 3664 case PROCESSOR_START: 3665 case PROCESSOR_SHUTDOWN: 3666 case PROCESSOR_OFF_LINE: 3667 /* 3668 * Hint is for a processor that cannot support running new threads. 3669 */ 3670 processor = PROCESSOR_NULL; 3671 break; 3672 case PROCESSOR_IDLE: 3673 /* 3674 * Hint is for an idle processor. Assume it is no worse than any other 3675 * idle processor. The platform layer had an opportunity to provide 3676 * the "least cost idle" processor above. 3677 */ 3678 return (processor); 3679 break; 3680 case PROCESSOR_RUNNING: 3681 case PROCESSOR_DISPATCHING: 3682 /* 3683 * Hint is for an active CPU. This fast-path allows 3684 * realtime threads to preempt non-realtime threads 3685 * to regain their previous executing processor. 3686 */ 3687 if ((thread->sched_pri >= BASEPRI_RTQUEUES) && 3688 (processor->current_pri < BASEPRI_RTQUEUES)) 3689 return (processor); 3690 3691 /* Otherwise, use hint as part of search below */ 3692 break; 3693 default: 3694 processor = PROCESSOR_NULL; 3695 break; 3696 } 3697 } 3698 } 3699 3700 /* 3701 * Iterate through the processor sets to locate 3702 * an appropriate processor. Seed results with 3703 * a last-processor hint, if available, so that 3704 * a search must find something strictly better 3705 * to replace it. 3706 * 3707 * A primary/secondary pair of SMT processors are 3708 * "unpaired" if the primary is busy but its 3709 * corresponding secondary is idle (so the physical 3710 * core has full use of its resources). 3711 */ 3712 3713 integer_t lowest_priority = MAXPRI + 1; 3714 integer_t lowest_unpaired_primary_priority = MAXPRI + 1; 3715 integer_t lowest_count = INT_MAX; 3716 uint64_t furthest_deadline = 1; 3717 processor_t lp_processor = PROCESSOR_NULL; 3718 processor_t lp_unpaired_primary_processor = PROCESSOR_NULL; 3719 processor_t lp_unpaired_secondary_processor = PROCESSOR_NULL; 3720 processor_t lc_processor = PROCESSOR_NULL; 3721 processor_t fd_processor = PROCESSOR_NULL; 3722 3723 if (processor != PROCESSOR_NULL) { 3724 /* All other states should be enumerated above. */ 3725 assert(processor->state == PROCESSOR_RUNNING || processor->state == PROCESSOR_DISPATCHING); 3726 3727 lowest_priority = processor->current_pri; 3728 lp_processor = processor; 3729 3730 if (processor->current_pri >= BASEPRI_RTQUEUES) { 3731 furthest_deadline = processor->deadline; 3732 fd_processor = processor; 3733 } 3734 3735 lowest_count = SCHED(processor_runq_count)(processor); 3736 lc_processor = processor; 3737 } 3738 3739 do { 3740 3741 /* 3742 * Choose an idle processor, in pset traversal order 3743 */ 3744 if (!queue_empty(&cset->idle_queue)) 3745 return ((processor_t)queue_first(&cset->idle_queue)); 3746 3747 /* 3748 * Otherwise, enumerate active and idle processors to find candidates 3749 * with lower priority/etc. 3750 */ 3751 3752 processor = (processor_t)queue_first(&cset->active_queue); 3753 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { 3754 3755 integer_t cpri = processor->current_pri; 3756 if (cpri < lowest_priority) { 3757 lowest_priority = cpri; 3758 lp_processor = processor; 3759 } 3760 3761 if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) { 3762 furthest_deadline = processor->deadline; 3763 fd_processor = processor; 3764 } 3765 3766 integer_t ccount = SCHED(processor_runq_count)(processor); 3767 if (ccount < lowest_count) { 3768 lowest_count = ccount; 3769 lc_processor = processor; 3770 } 3771 3772 processor = (processor_t)queue_next((queue_entry_t)processor); 3773 } 3774 3775 /* 3776 * For SMT configs, these idle secondary processors must have active primary. Otherwise 3777 * the idle primary would have short-circuited the loop above 3778 */ 3779 processor = (processor_t)queue_first(&cset->idle_secondary_queue); 3780 while (!queue_end(&cset->idle_secondary_queue, (queue_entry_t)processor)) { 3781 processor_t cprimary = processor->processor_primary; 3782 3783 /* If the primary processor is offline or starting up, it's not a candidate for this path */ 3784 if (cprimary->state == PROCESSOR_RUNNING || cprimary->state == PROCESSOR_DISPATCHING) { 3785 integer_t primary_pri = cprimary->current_pri; 3786 3787 if (primary_pri < lowest_unpaired_primary_priority) { 3788 lowest_unpaired_primary_priority = primary_pri; 3789 lp_unpaired_primary_processor = cprimary; 3790 lp_unpaired_secondary_processor = processor; 3791 } 3792 } 3793 3794 processor = (processor_t)queue_next((queue_entry_t)processor); 3795 } 3796 3797 3798 if (thread->sched_pri >= BASEPRI_RTQUEUES) { 3799 3800 /* 3801 * For realtime threads, the most important aspect is 3802 * scheduling latency, so we attempt to assign threads 3803 * to good preemption candidates (assuming an idle primary 3804 * processor was not available above). 3805 */ 3806 3807 if (thread->sched_pri > lowest_unpaired_primary_priority) { 3808 /* Move to end of active queue so that the next thread doesn't also pick it */ 3809 remqueue((queue_entry_t)lp_unpaired_primary_processor); 3810 enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor); 3811 return lp_unpaired_primary_processor; 3812 } 3813 if (thread->sched_pri > lowest_priority) { 3814 /* Move to end of active queue so that the next thread doesn't also pick it */ 3815 remqueue((queue_entry_t)lp_processor); 3816 enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor); 3817 return lp_processor; 3818 } 3819 if (thread->realtime.deadline < furthest_deadline) 3820 return fd_processor; 3821 3822 /* 3823 * If all primary and secondary CPUs are busy with realtime 3824 * threads with deadlines earlier than us, move on to next 3825 * pset. 3826 */ 3827 } 3828 else { 3829 3830 if (thread->sched_pri > lowest_unpaired_primary_priority) { 3831 /* Move to end of active queue so that the next thread doesn't also pick it */ 3832 remqueue((queue_entry_t)lp_unpaired_primary_processor); 3833 enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor); 3834 return lp_unpaired_primary_processor; 3835 } 3836 if (thread->sched_pri > lowest_priority) { 3837 /* Move to end of active queue so that the next thread doesn't also pick it */ 3838 remqueue((queue_entry_t)lp_processor); 3839 enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor); 3840 return lp_processor; 3841 } 3842 3843 /* 3844 * If all primary processor in this pset are running a higher 3845 * priority thread, move on to next pset. Only when we have 3846 * exhausted this search do we fall back to other heuristics. 3847 */ 3848 } 3849 3850 /* 3851 * Move onto the next processor set. 3852 */ 3853 nset = next_pset(cset); 3854 3855 if (nset != pset) { 3856 pset_unlock(cset); 3857 3858 cset = nset; 3859 pset_lock(cset); 3860 } 3861 } while (nset != pset); 3862 3863 /* 3864 * Make sure that we pick a running processor, 3865 * and that the correct processor set is locked. 3866 * Since we may have unlock the candidate processor's 3867 * pset, it may have changed state. 3868 * 3869 * All primary processors are running a higher priority 3870 * thread, so the only options left are enqueuing on 3871 * the secondary processor that would perturb the least priority 3872 * primary, or the least busy primary. 3873 */ 3874 do { 3875 3876 /* lowest_priority is evaluated in the main loops above */ 3877 if (lp_unpaired_secondary_processor != PROCESSOR_NULL) { 3878 processor = lp_unpaired_secondary_processor; 3879 lp_unpaired_secondary_processor = PROCESSOR_NULL; 3880 } else if (lc_processor != PROCESSOR_NULL) { 3881 processor = lc_processor; 3882 lc_processor = PROCESSOR_NULL; 3883 } else { 3884 /* 3885 * All processors are executing higher 3886 * priority threads, and the lowest_count 3887 * candidate was not usable 3888 */ 3889 processor = master_processor; 3890 } 3891 3892 /* 3893 * Check that the correct processor set is 3894 * returned locked. 3895 */ 3896 if (cset != processor->processor_set) { 3897 pset_unlock(cset); 3898 cset = processor->processor_set; 3899 pset_lock(cset); 3900 } 3901 3902 /* 3903 * We must verify that the chosen processor is still available. 3904 * master_processor is an exception, since we may need to preempt 3905 * a running thread on it during processor shutdown (for sleep), 3906 * and that thread needs to be enqueued on its runqueue to run 3907 * when the processor is restarted. 3908 */ 3909 if (processor != master_processor && (processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)) 3910 processor = PROCESSOR_NULL; 3911 3912 } while (processor == PROCESSOR_NULL); 3913 3914 return (processor); 3915} 3916 3917/* 3918 * thread_setrun: 3919 * 3920 * Dispatch thread for execution, onto an idle 3921 * processor or run queue, and signal a preemption 3922 * as appropriate. 3923 * 3924 * Thread must be locked. 3925 */ 3926void 3927thread_setrun( 3928 thread_t thread, 3929 integer_t options) 3930{ 3931 processor_t processor; 3932 processor_set_t pset; 3933 3934 assert(thread_runnable(thread)); 3935 3936 /* 3937 * Update priority if needed. 3938 */ 3939 if (SCHED(can_update_priority)(thread)) 3940 SCHED(update_priority)(thread); 3941 3942 thread->sfi_class = sfi_thread_classify(thread); 3943 3944 assert(thread->runq == PROCESSOR_NULL); 3945 3946 if (thread->bound_processor == PROCESSOR_NULL) { 3947 /* 3948 * Unbound case. 3949 */ 3950 if (thread->affinity_set != AFFINITY_SET_NULL) { 3951 /* 3952 * Use affinity set policy hint. 3953 */ 3954 pset = thread->affinity_set->aset_pset; 3955 pset_lock(pset); 3956 3957 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); 3958 3959 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3960 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); 3961 } 3962 else 3963 if (thread->last_processor != PROCESSOR_NULL) { 3964 /* 3965 * Simple (last processor) affinity case. 3966 */ 3967 processor = thread->last_processor; 3968 pset = processor->processor_set; 3969 pset_lock(pset); 3970 processor = SCHED(choose_processor)(pset, processor, thread); 3971 3972 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3973 (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0); 3974 } 3975 else { 3976 /* 3977 * No Affinity case: 3978 * 3979 * Utilitize a per task hint to spread threads 3980 * among the available processor sets. 3981 */ 3982 task_t task = thread->task; 3983 3984 pset = task->pset_hint; 3985 if (pset == PROCESSOR_SET_NULL) 3986 pset = current_processor()->processor_set; 3987 3988 pset = choose_next_pset(pset); 3989 pset_lock(pset); 3990 3991 processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); 3992 task->pset_hint = processor->processor_set; 3993 3994 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 3995 (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); 3996 } 3997 } 3998 else { 3999 /* 4000 * Bound case: 4001 * 4002 * Unconditionally dispatch on the processor. 4003 */ 4004 processor = thread->bound_processor; 4005 pset = processor->processor_set; 4006 pset_lock(pset); 4007 4008 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, 4009 (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0); 4010 } 4011 4012 /* 4013 * Dispatch the thread on the choosen processor. 4014 * TODO: This should be based on sched_mode, not sched_pri 4015 */ 4016 if (thread->sched_pri >= BASEPRI_RTQUEUES) 4017 realtime_setrun(processor, thread); 4018 else if (thread->sched_mode == TH_MODE_FAIRSHARE) 4019 fairshare_setrun(processor, thread); 4020 else 4021 processor_setrun(processor, thread, options); 4022} 4023 4024processor_set_t 4025task_choose_pset( 4026 task_t task) 4027{ 4028 processor_set_t pset = task->pset_hint; 4029 4030 if (pset != PROCESSOR_SET_NULL) 4031 pset = choose_next_pset(pset); 4032 4033 return (pset); 4034} 4035 4036#if defined(CONFIG_SCHED_TRADITIONAL) 4037 4038/* 4039 * processor_queue_shutdown: 4040 * 4041 * Shutdown a processor run queue by 4042 * re-dispatching non-bound threads. 4043 * 4044 * Associated pset must be locked, and is 4045 * returned unlocked. 4046 */ 4047void 4048processor_queue_shutdown( 4049 processor_t processor) 4050{ 4051 processor_set_t pset = processor->processor_set; 4052 run_queue_t rq = runq_for_processor(processor); 4053 queue_t queue = rq->queues + rq->highq; 4054 int pri = rq->highq, count = rq->count; 4055 thread_t next, thread; 4056 queue_head_t tqueue; 4057 4058 queue_init(&tqueue); 4059 4060 while (count > 0) { 4061 thread = (thread_t)queue_first(queue); 4062 while (!queue_end(queue, (queue_entry_t)thread)) { 4063 next = (thread_t)queue_next((queue_entry_t)thread); 4064 4065 if (thread->bound_processor == PROCESSOR_NULL) { 4066 remqueue((queue_entry_t)thread); 4067 4068 thread->runq = PROCESSOR_NULL; 4069 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 4070 runq_consider_decr_bound_count(processor, thread); 4071 rq->count--; 4072 if (SCHED(priority_is_urgent)(pri)) { 4073 rq->urgency--; assert(rq->urgency >= 0); 4074 } 4075 if (queue_empty(queue)) { 4076 if (pri != IDLEPRI) 4077 clrbit(MAXPRI - pri, rq->bitmap); 4078 rq->highq = MAXPRI - ffsbit(rq->bitmap); 4079 } 4080 4081 enqueue_tail(&tqueue, (queue_entry_t)thread); 4082 } 4083 count--; 4084 4085 thread = next; 4086 } 4087 4088 queue--; pri--; 4089 } 4090 4091 pset_unlock(pset); 4092 4093 while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) { 4094 thread_lock(thread); 4095 4096 thread_setrun(thread, SCHED_TAILQ); 4097 4098 thread_unlock(thread); 4099 } 4100} 4101 4102#endif /* CONFIG_SCHED_TRADITIONAL */ 4103 4104/* 4105 * Check for a preemption point in 4106 * the current context. 4107 * 4108 * Called at splsched with thread locked. 4109 */ 4110ast_t 4111csw_check( 4112 processor_t processor, 4113 ast_t check_reason) 4114{ 4115 processor_set_t pset = processor->processor_set; 4116 ast_t result; 4117 4118 pset_lock(pset); 4119 4120 /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */ 4121 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); 4122 4123 result = csw_check_locked(processor, pset, check_reason); 4124 4125 pset_unlock(pset); 4126 4127 return result; 4128} 4129 4130/* 4131 * Check for preemption at splsched with 4132 * pset and thread locked 4133 */ 4134ast_t 4135csw_check_locked( 4136 processor_t processor, 4137 processor_set_t pset __unused, 4138 ast_t check_reason) 4139{ 4140 ast_t result; 4141 thread_t thread = processor->active_thread; 4142 4143 if (first_timeslice(processor)) { 4144 if (rt_runq.count > 0) 4145 return (check_reason | AST_PREEMPT | AST_URGENT); 4146 } 4147 else { 4148 if (rt_runq.count > 0) { 4149 if (BASEPRI_RTQUEUES > processor->current_pri) 4150 return (check_reason | AST_PREEMPT | AST_URGENT); 4151 else 4152 return (check_reason | AST_PREEMPT); 4153 } 4154 } 4155 4156 result = SCHED(processor_csw_check)(processor); 4157 if (result != AST_NONE) 4158 return (check_reason | result); 4159 4160 if (SCHED(should_current_thread_rechoose_processor)(processor)) 4161 return (check_reason | AST_PREEMPT); 4162 4163 if (thread->state & TH_SUSP) 4164 return (check_reason | AST_PREEMPT); 4165 4166 /* 4167 * Current thread may not need to be preempted, but maybe needs 4168 * an SFI wait? 4169 */ 4170 result = sfi_thread_needs_ast(thread, NULL); 4171 if (result != AST_NONE) 4172 return (check_reason | result); 4173 4174 return (AST_NONE); 4175} 4176 4177/* 4178 * set_sched_pri: 4179 * 4180 * Set the scheduled priority of the specified thread. 4181 * 4182 * This may cause the thread to change queues. 4183 * 4184 * Thread must be locked. 4185 */ 4186void 4187set_sched_pri( 4188 thread_t thread, 4189 int priority) 4190{ 4191 boolean_t removed = thread_run_queue_remove(thread); 4192 int curgency, nurgency; 4193 uint64_t urgency_param1, urgency_param2; 4194 thread_t cthread = current_thread(); 4195 4196 if (thread == cthread) { 4197 curgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2); 4198 } 4199 4200 thread->sched_pri = priority; 4201 4202 if (thread == cthread) { 4203 nurgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2); 4204/* set_sched_pri doesn't alter RT params. We expect direct base priority/QoS 4205 * class alterations from user space to occur relatively infrequently, hence 4206 * those are lazily handled. QoS classes have distinct priority bands, and QoS 4207 * inheritance is expected to involve priority changes. 4208 */ 4209 if (nurgency != curgency) { 4210 thread_tell_urgency(nurgency, urgency_param1, urgency_param2, thread); 4211 } 4212 } 4213 4214 if (removed) 4215 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); 4216 else 4217 if (thread->state & TH_RUN) { 4218 processor_t processor = thread->last_processor; 4219 4220 if (thread == current_thread()) { 4221 ast_t preempt; 4222 4223 processor->current_pri = priority; 4224 processor->current_thmode = thread->sched_mode; 4225 processor->current_sfi_class = thread->sfi_class = sfi_thread_classify(thread); 4226 if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE) 4227 ast_on(preempt); 4228 } 4229 else 4230 if ( processor != PROCESSOR_NULL && 4231 processor->active_thread == thread ) 4232 cause_ast_check(processor); 4233 } 4234} 4235 4236#if 0 4237 4238static void 4239run_queue_check( 4240 run_queue_t rq, 4241 thread_t thread) 4242{ 4243 queue_t q; 4244 queue_entry_t qe; 4245 4246 if (rq != thread->runq) 4247 panic("run_queue_check: thread runq"); 4248 4249 if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI) 4250 panic("run_queue_check: thread sched_pri"); 4251 4252 q = &rq->queues[thread->sched_pri]; 4253 qe = queue_first(q); 4254 while (!queue_end(q, qe)) { 4255 if (qe == (queue_entry_t)thread) 4256 return; 4257 4258 qe = queue_next(qe); 4259 } 4260 4261 panic("run_queue_check: end"); 4262} 4263 4264#endif /* DEBUG */ 4265 4266#if defined(CONFIG_SCHED_TRADITIONAL) 4267 4268/* 4269 * Locks the runqueue itself. 4270 * 4271 * Thread must be locked. 4272 */ 4273static boolean_t 4274processor_queue_remove( 4275 processor_t processor, 4276 thread_t thread) 4277{ 4278 void * rqlock; 4279 run_queue_t rq; 4280 4281 rqlock = &processor->processor_set->sched_lock; 4282 rq = runq_for_processor(processor); 4283 4284 simple_lock(rqlock); 4285 if (processor == thread->runq) { 4286 /* 4287 * Thread is on a run queue and we have a lock on 4288 * that run queue. 4289 */ 4290 runq_consider_decr_bound_count(processor, thread); 4291 run_queue_remove(rq, thread); 4292 } 4293 else { 4294 /* 4295 * The thread left the run queue before we could 4296 * lock the run queue. 4297 */ 4298 assert(thread->runq == PROCESSOR_NULL); 4299 processor = PROCESSOR_NULL; 4300 } 4301 4302 simple_unlock(rqlock); 4303 4304 return (processor != PROCESSOR_NULL); 4305} 4306 4307#endif /* CONFIG_SCHED_TRADITIONAL */ 4308 4309 4310/* 4311 * thread_run_queue_remove: 4312 * 4313 * Remove a thread from its current run queue and 4314 * return TRUE if successful. 4315 * 4316 * Thread must be locked. 4317 * 4318 * If thread->runq is PROCESSOR_NULL, the thread will not re-enter the 4319 * run queues because the caller locked the thread. Otherwise 4320 * the thread is on a run queue, but could be chosen for dispatch 4321 * and removed by another processor under a different lock, which 4322 * will set thread->runq to PROCESSOR_NULL. 4323 * 4324 * Hence the thread select path must not rely on anything that could 4325 * be changed under the thread lock after calling this function, 4326 * most importantly thread->sched_pri. 4327 */ 4328boolean_t 4329thread_run_queue_remove( 4330 thread_t thread) 4331{ 4332 boolean_t removed = FALSE; 4333 processor_t processor = thread->runq; 4334 4335 if ((thread->state & (TH_RUN|TH_WAIT)) == TH_WAIT) { 4336 /* Thread isn't runnable */ 4337 assert(thread->runq == PROCESSOR_NULL); 4338 return FALSE; 4339 } 4340 4341 if (processor == PROCESSOR_NULL) { 4342 /* 4343 * The thread is either not on the runq, 4344 * or is in the midst of being removed from the runq. 4345 * 4346 * runq is set to NULL under the pset lock, not the thread 4347 * lock, so the thread may still be in the process of being dequeued 4348 * from the runq. It will wait in invoke for the thread lock to be 4349 * dropped. 4350 */ 4351 4352 return FALSE; 4353 } 4354 4355 if (thread->sched_mode == TH_MODE_FAIRSHARE) { 4356 return SCHED(fairshare_queue_remove)(thread); 4357 } 4358 4359 if (thread->sched_pri < BASEPRI_RTQUEUES) { 4360 return SCHED(processor_queue_remove)(processor, thread); 4361 } 4362 4363 simple_lock(&rt_lock); 4364 4365 if (thread->runq != PROCESSOR_NULL) { 4366 /* 4367 * Thread is on a run queue and we have a lock on 4368 * that run queue. 4369 */ 4370 4371 assert(thread->runq == RT_RUNQ); 4372 4373 remqueue((queue_entry_t)thread); 4374 SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); 4375 rt_runq.count--; 4376 4377 thread->runq = PROCESSOR_NULL; 4378 4379 removed = TRUE; 4380 } 4381 4382 simple_unlock(&rt_lock); 4383 4384 return (removed); 4385} 4386 4387#if defined(CONFIG_SCHED_TRADITIONAL) 4388 4389/* 4390 * steal_processor_thread: 4391 * 4392 * Locate a thread to steal from the processor and 4393 * return it. 4394 * 4395 * Associated pset must be locked. Returns THREAD_NULL 4396 * on failure. 4397 */ 4398static thread_t 4399steal_processor_thread( 4400 processor_t processor) 4401{ 4402 run_queue_t rq = runq_for_processor(processor); 4403 queue_t queue = rq->queues + rq->highq; 4404 int pri = rq->highq, count = rq->count; 4405 thread_t thread; 4406 4407 while (count > 0) { 4408 thread = (thread_t)queue_first(queue); 4409 while (!queue_end(queue, (queue_entry_t)thread)) { 4410 if (thread->bound_processor == PROCESSOR_NULL) { 4411 remqueue((queue_entry_t)thread); 4412 4413 thread->runq = PROCESSOR_NULL; 4414 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); 4415 runq_consider_decr_bound_count(processor, thread); 4416 rq->count--; 4417 if (SCHED(priority_is_urgent)(pri)) { 4418 rq->urgency--; assert(rq->urgency >= 0); 4419 } 4420 if (queue_empty(queue)) { 4421 if (pri != IDLEPRI) 4422 clrbit(MAXPRI - pri, rq->bitmap); 4423 rq->highq = MAXPRI - ffsbit(rq->bitmap); 4424 } 4425 4426 return (thread); 4427 } 4428 count--; 4429 4430 thread = (thread_t)queue_next((queue_entry_t)thread); 4431 } 4432 4433 queue--; pri--; 4434 } 4435 4436 return (THREAD_NULL); 4437} 4438 4439/* 4440 * Locate and steal a thread, beginning 4441 * at the pset. 4442 * 4443 * The pset must be locked, and is returned 4444 * unlocked. 4445 * 4446 * Returns the stolen thread, or THREAD_NULL on 4447 * failure. 4448 */ 4449static thread_t 4450steal_thread( 4451 processor_set_t pset) 4452{ 4453 processor_set_t nset, cset = pset; 4454 processor_t processor; 4455 thread_t thread; 4456 4457 do { 4458 processor = (processor_t)queue_first(&cset->active_queue); 4459 while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { 4460 if (runq_for_processor(processor)->count > 0) { 4461 thread = steal_processor_thread(processor); 4462 if (thread != THREAD_NULL) { 4463 remqueue((queue_entry_t)processor); 4464 enqueue_tail(&cset->active_queue, (queue_entry_t)processor); 4465 4466 pset_unlock(cset); 4467 4468 return (thread); 4469 } 4470 } 4471 4472 processor = (processor_t)queue_next((queue_entry_t)processor); 4473 } 4474 4475 nset = next_pset(cset); 4476 4477 if (nset != pset) { 4478 pset_unlock(cset); 4479 4480 cset = nset; 4481 pset_lock(cset); 4482 } 4483 } while (nset != pset); 4484 4485 pset_unlock(cset); 4486 4487 return (THREAD_NULL); 4488} 4489 4490static thread_t steal_thread_disabled( 4491 processor_set_t pset) 4492{ 4493 pset_unlock(pset); 4494 4495 return (THREAD_NULL); 4496} 4497 4498#endif /* CONFIG_SCHED_TRADITIONAL */ 4499 4500 4501void 4502sys_override_cpu_throttle(int flag) 4503{ 4504 if (flag == CPU_THROTTLE_ENABLE) 4505 cpu_throttle_enabled = 1; 4506 if (flag == CPU_THROTTLE_DISABLE) 4507 cpu_throttle_enabled = 0; 4508} 4509 4510int 4511thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2) 4512{ 4513 if (thread == NULL || (thread->state & TH_IDLE)) { 4514 *arg1 = 0; 4515 *arg2 = 0; 4516 4517 return (THREAD_URGENCY_NONE); 4518 } else if (thread->sched_mode == TH_MODE_REALTIME) { 4519 *arg1 = thread->realtime.period; 4520 *arg2 = thread->realtime.deadline; 4521 4522 return (THREAD_URGENCY_REAL_TIME); 4523 } else if (cpu_throttle_enabled && 4524 ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE))) { 4525 /* 4526 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted 4527 * TODO: Use TH_SFLAG_THROTTLED instead? 4528 */ 4529 *arg1 = thread->sched_pri; 4530 *arg2 = thread->priority; 4531 4532 return (THREAD_URGENCY_BACKGROUND); 4533 } else { 4534 /* For otherwise unclassified threads, report throughput QoS 4535 * parameters 4536 */ 4537 *arg1 = thread->effective_policy.t_through_qos; 4538 *arg2 = thread->task->effective_policy.t_through_qos; 4539 4540 return (THREAD_URGENCY_NORMAL); 4541 } 4542} 4543 4544 4545/* 4546 * This is the processor idle loop, which just looks for other threads 4547 * to execute. Processor idle threads invoke this without supplying a 4548 * current thread to idle without an asserted wait state. 4549 * 4550 * Returns a the next thread to execute if dispatched directly. 4551 */ 4552 4553#if 0 4554#define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__) 4555#else 4556#define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0) 4557#endif 4558 4559thread_t 4560processor_idle( 4561 thread_t thread, 4562 processor_t processor) 4563{ 4564 processor_set_t pset = processor->processor_set; 4565 thread_t new_thread; 4566 int state; 4567 (void)splsched(); 4568 4569 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4570 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, 4571 (uintptr_t)thread_tid(thread), 0, 0, 0, 0); 4572 4573 SCHED_STATS_CPU_IDLE_START(processor); 4574 4575 timer_switch(&PROCESSOR_DATA(processor, system_state), 4576 mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state)); 4577 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state); 4578 4579 while (1) { 4580 if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */ 4581 break; 4582 if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id)) 4583 break; 4584 if (rt_runq.count) 4585 break; 4586#if CONFIG_SCHED_IDLE_IN_PLACE 4587 if (thread != THREAD_NULL) { 4588 /* Did idle-in-place thread wake up */ 4589 if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active) 4590 break; 4591 } 4592#endif 4593 4594 IDLE_KERNEL_DEBUG_CONSTANT( 4595 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0); 4596 4597 machine_track_platform_idle(TRUE); 4598 4599 machine_idle(); 4600 4601 machine_track_platform_idle(FALSE); 4602 4603 (void)splsched(); 4604 4605 IDLE_KERNEL_DEBUG_CONSTANT( 4606 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0); 4607 4608 if (!SCHED(processor_queue_empty)(processor)) { 4609 /* Secondary SMT processors respond to directed wakeups 4610 * exclusively. Some platforms induce 'spurious' SMT wakeups. 4611 */ 4612 if (processor->processor_primary == processor) 4613 break; 4614 } 4615 } 4616 4617 timer_switch(&PROCESSOR_DATA(processor, idle_state), 4618 mach_absolute_time(), &PROCESSOR_DATA(processor, system_state)); 4619 PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state); 4620 4621 pset_lock(pset); 4622 4623 /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */ 4624 pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); 4625 4626 state = processor->state; 4627 if (state == PROCESSOR_DISPATCHING) { 4628 /* 4629 * Commmon case -- cpu dispatched. 4630 */ 4631 new_thread = processor->next_thread; 4632 processor->next_thread = THREAD_NULL; 4633 processor->state = PROCESSOR_RUNNING; 4634 4635 if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) || 4636 (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)) ) { 4637 /* Something higher priority has popped up on the runqueue - redispatch this thread elsewhere */ 4638 processor->current_pri = IDLEPRI; 4639 processor->current_thmode = TH_MODE_FIXED; 4640 processor->current_sfi_class = SFI_CLASS_KERNEL; 4641 processor->deadline = UINT64_MAX; 4642 4643 pset_unlock(pset); 4644 4645 thread_lock(new_thread); 4646 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0); 4647 thread_setrun(new_thread, SCHED_HEADQ); 4648 thread_unlock(new_thread); 4649 4650 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4651 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4652 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4653 4654 return (THREAD_NULL); 4655 } 4656 4657 pset_unlock(pset); 4658 4659 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4660 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4661 (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0); 4662 4663 return (new_thread); 4664 } 4665 else 4666 if (state == PROCESSOR_IDLE) { 4667 remqueue((queue_entry_t)processor); 4668 4669 processor->state = PROCESSOR_RUNNING; 4670 processor->current_pri = IDLEPRI; 4671 processor->current_thmode = TH_MODE_FIXED; 4672 processor->current_sfi_class = SFI_CLASS_KERNEL; 4673 processor->deadline = UINT64_MAX; 4674 enqueue_tail(&pset->active_queue, (queue_entry_t)processor); 4675 } 4676 else 4677 if (state == PROCESSOR_SHUTDOWN) { 4678 /* 4679 * Going off-line. Force a 4680 * reschedule. 4681 */ 4682 if ((new_thread = processor->next_thread) != THREAD_NULL) { 4683 processor->next_thread = THREAD_NULL; 4684 processor->current_pri = IDLEPRI; 4685 processor->current_thmode = TH_MODE_FIXED; 4686 processor->current_sfi_class = SFI_CLASS_KERNEL; 4687 processor->deadline = UINT64_MAX; 4688 4689 pset_unlock(pset); 4690 4691 thread_lock(new_thread); 4692 thread_setrun(new_thread, SCHED_HEADQ); 4693 thread_unlock(new_thread); 4694 4695 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4696 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4697 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4698 4699 return (THREAD_NULL); 4700 } 4701 } 4702 4703 pset_unlock(pset); 4704 4705 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4706 MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 4707 (uintptr_t)thread_tid(thread), state, 0, 0, 0); 4708 4709 return (THREAD_NULL); 4710} 4711 4712/* 4713 * Each processor has a dedicated thread which 4714 * executes the idle loop when there is no suitable 4715 * previous context. 4716 */ 4717void 4718idle_thread(void) 4719{ 4720 processor_t processor = current_processor(); 4721 thread_t new_thread; 4722 4723 new_thread = processor_idle(THREAD_NULL, processor); 4724 if (new_thread != THREAD_NULL) { 4725 thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); 4726 /*NOTREACHED*/ 4727 } 4728 4729 thread_block((thread_continue_t)idle_thread); 4730 /*NOTREACHED*/ 4731} 4732 4733kern_return_t 4734idle_thread_create( 4735 processor_t processor) 4736{ 4737 kern_return_t result; 4738 thread_t thread; 4739 spl_t s; 4740 4741 result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread); 4742 if (result != KERN_SUCCESS) 4743 return (result); 4744 4745 s = splsched(); 4746 thread_lock(thread); 4747 thread->bound_processor = processor; 4748 processor->idle_thread = thread; 4749 thread->sched_pri = thread->priority = IDLEPRI; 4750 thread->state = (TH_RUN | TH_IDLE); 4751 thread->options |= TH_OPT_IDLE_THREAD; 4752 thread_unlock(thread); 4753 splx(s); 4754 4755 thread_deallocate(thread); 4756 4757 return (KERN_SUCCESS); 4758} 4759 4760/* 4761 * sched_startup: 4762 * 4763 * Kicks off scheduler services. 4764 * 4765 * Called at splsched. 4766 */ 4767void 4768sched_startup(void) 4769{ 4770 kern_return_t result; 4771 thread_t thread; 4772 4773 result = kernel_thread_start_priority((thread_continue_t)sched_init_thread, 4774 (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread); 4775 if (result != KERN_SUCCESS) 4776 panic("sched_startup"); 4777 4778 thread_deallocate(thread); 4779 4780 /* 4781 * Yield to the sched_init_thread once, to 4782 * initialize our own thread after being switched 4783 * back to. 4784 * 4785 * The current thread is the only other thread 4786 * active at this point. 4787 */ 4788 thread_block(THREAD_CONTINUE_NULL); 4789} 4790 4791#if defined(CONFIG_SCHED_TIMESHARE_CORE) 4792 4793static volatile uint64_t sched_maintenance_deadline; 4794#if defined(CONFIG_TELEMETRY) 4795static volatile uint64_t sched_telemetry_deadline = 0; 4796#endif 4797static uint64_t sched_tick_last_abstime; 4798static uint64_t sched_tick_delta; 4799uint64_t sched_tick_max_delta; 4800/* 4801 * sched_init_thread: 4802 * 4803 * Perform periodic bookkeeping functions about ten 4804 * times per second. 4805 */ 4806void 4807sched_traditional_maintenance_continue(void) 4808{ 4809 uint64_t sched_tick_ctime, late_time; 4810 4811 sched_tick_ctime = mach_absolute_time(); 4812 4813 if (__improbable(sched_tick_last_abstime == 0)) { 4814 sched_tick_last_abstime = sched_tick_ctime; 4815 late_time = 0; 4816 sched_tick_delta = 1; 4817 } else { 4818 late_time = sched_tick_ctime - sched_tick_last_abstime; 4819 sched_tick_delta = late_time / sched_tick_interval; 4820 /* Ensure a delta of 1, since the interval could be slightly 4821 * smaller than the sched_tick_interval due to dispatch 4822 * latencies. 4823 */ 4824 sched_tick_delta = MAX(sched_tick_delta, 1); 4825 4826 /* In the event interrupt latencies or platform 4827 * idle events that advanced the timebase resulted 4828 * in periods where no threads were dispatched, 4829 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA 4830 * iterations. 4831 */ 4832 sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA); 4833 4834 sched_tick_last_abstime = sched_tick_ctime; 4835 sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta); 4836 } 4837 4838 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_START, 4839 sched_tick_delta, 4840 late_time, 4841 0, 4842 0, 4843 0); 4844 4845 /* Add a number of pseudo-ticks corresponding to the elapsed interval 4846 * This could be greater than 1 if substantial intervals where 4847 * all processors are idle occur, which rarely occurs in practice. 4848 */ 4849 4850 sched_tick += sched_tick_delta; 4851 4852 /* 4853 * Compute various averages. 4854 */ 4855 compute_averages(sched_tick_delta); 4856 4857 /* 4858 * Scan the run queues for threads which 4859 * may need to be updated. 4860 */ 4861 SCHED(thread_update_scan)(); 4862 4863 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_END, 4864 sched_pri_shift, 4865 sched_background_pri_shift, 4866 0, 4867 0, 4868 0); 4869 4870 assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT); 4871 thread_block((thread_continue_t)sched_traditional_maintenance_continue); 4872 /*NOTREACHED*/ 4873} 4874 4875static uint64_t sched_maintenance_wakeups; 4876 4877/* 4878 * Determine if the set of routines formerly driven by a maintenance timer 4879 * must be invoked, based on a deadline comparison. Signals the scheduler 4880 * maintenance thread on deadline expiration. Must be invoked at an interval 4881 * lower than the "sched_tick_interval", currently accomplished by 4882 * invocation via the quantum expiration timer and at context switch time. 4883 * Performance matters: this routine reuses a timestamp approximating the 4884 * current absolute time received from the caller, and should perform 4885 * no more than a comparison against the deadline in the common case. 4886 */ 4887void 4888sched_traditional_consider_maintenance(uint64_t ctime) { 4889 uint64_t ndeadline, deadline = sched_maintenance_deadline; 4890 4891 if (__improbable(ctime >= deadline)) { 4892 if (__improbable(current_thread() == sched_maintenance_thread)) 4893 return; 4894 OSMemoryBarrier(); 4895 4896 ndeadline = ctime + sched_tick_interval; 4897 4898 if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) { 4899 thread_wakeup((event_t)sched_traditional_maintenance_continue); 4900 sched_maintenance_wakeups++; 4901 } 4902 } 4903 4904#if defined(CONFIG_TELEMETRY) 4905 /* 4906 * Windowed telemetry is driven by the scheduler. It should be safe 4907 * to call compute_telemetry_windowed() even when windowed telemetry 4908 * is disabled, but we should try to avoid doing extra work for no 4909 * reason. 4910 */ 4911 if (telemetry_window_enabled) { 4912 deadline = sched_telemetry_deadline; 4913 4914 if (__improbable(ctime >= deadline)) { 4915 ndeadline = ctime + sched_telemetry_interval; 4916 4917 if (__probable(__sync_bool_compare_and_swap(&sched_telemetry_deadline, deadline, ndeadline))) { 4918 compute_telemetry_windowed(); 4919 } 4920 } 4921 } 4922#endif /* CONFIG_TELEMETRY */ 4923} 4924 4925#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 4926 4927void 4928sched_init_thread(void (*continuation)(void)) 4929{ 4930 thread_block(THREAD_CONTINUE_NULL); 4931 4932 sched_maintenance_thread = current_thread(); 4933 continuation(); 4934 4935 /*NOTREACHED*/ 4936} 4937 4938#if defined(CONFIG_SCHED_TIMESHARE_CORE) 4939 4940/* 4941 * thread_update_scan / runq_scan: 4942 * 4943 * Scan the run queues to account for timesharing threads 4944 * which need to be updated. 4945 * 4946 * Scanner runs in two passes. Pass one squirrels likely 4947 * threads away in an array, pass two does the update. 4948 * 4949 * This is necessary because the run queue is locked for 4950 * the candidate scan, but the thread is locked for the update. 4951 * 4952 * Array should be sized to make forward progress, without 4953 * disabling preemption for long periods. 4954 */ 4955 4956#define THREAD_UPDATE_SIZE 128 4957 4958static thread_t thread_update_array[THREAD_UPDATE_SIZE]; 4959static int thread_update_count = 0; 4960 4961/* Returns TRUE if thread was added, FALSE if thread_update_array is full */ 4962boolean_t 4963thread_update_add_thread(thread_t thread) 4964{ 4965 if (thread_update_count == THREAD_UPDATE_SIZE) 4966 return (FALSE); 4967 4968 thread_update_array[thread_update_count++] = thread; 4969 thread_reference_internal(thread); 4970 return (TRUE); 4971} 4972 4973void 4974thread_update_process_threads(void) 4975{ 4976 while (thread_update_count > 0) { 4977 spl_t s; 4978 thread_t thread = thread_update_array[--thread_update_count]; 4979 thread_update_array[thread_update_count] = THREAD_NULL; 4980 4981 s = splsched(); 4982 thread_lock(thread); 4983 if (!(thread->state & (TH_WAIT)) && (SCHED(can_update_priority)(thread))) { 4984 SCHED(update_priority)(thread); 4985 } 4986 thread_unlock(thread); 4987 splx(s); 4988 4989 thread_deallocate(thread); 4990 } 4991} 4992 4993/* 4994 * Scan a runq for candidate threads. 4995 * 4996 * Returns TRUE if retry is needed. 4997 */ 4998boolean_t 4999runq_scan( 5000 run_queue_t runq) 5001{ 5002 register int count; 5003 register queue_t q; 5004 register thread_t thread; 5005 5006 if ((count = runq->count) > 0) { 5007 q = runq->queues + runq->highq; 5008 while (count > 0) { 5009 queue_iterate(q, thread, thread_t, links) { 5010 if ( thread->sched_stamp != sched_tick && 5011 (thread->sched_mode == TH_MODE_TIMESHARE) ) { 5012 if (thread_update_add_thread(thread) == FALSE) 5013 return (TRUE); 5014 } 5015 5016 count--; 5017 } 5018 5019 q--; 5020 } 5021 } 5022 5023 return (FALSE); 5024} 5025 5026#endif /* CONFIG_SCHED_TIMESHARE_CORE */ 5027 5028#if defined(CONFIG_SCHED_TRADITIONAL) 5029 5030static void 5031thread_update_scan(void) 5032{ 5033 boolean_t restart_needed = FALSE; 5034 processor_t processor = processor_list; 5035 processor_set_t pset; 5036 thread_t thread; 5037 spl_t s; 5038 5039 do { 5040 do { 5041 /* 5042 * TODO: in sched_traditional_use_pset_runqueue case, 5043 * avoid scanning the same runq multiple times 5044 */ 5045 pset = processor->processor_set; 5046 5047 s = splsched(); 5048 pset_lock(pset); 5049 5050 restart_needed = runq_scan(runq_for_processor(processor)); 5051 5052 pset_unlock(pset); 5053 splx(s); 5054 5055 if (restart_needed) 5056 break; 5057 5058 thread = processor->idle_thread; 5059 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { 5060 if (thread_update_add_thread(thread) == FALSE) { 5061 restart_needed = TRUE; 5062 break; 5063 } 5064 } 5065 } while ((processor = processor->processor_list) != NULL); 5066 5067 /* Ok, we now have a collection of candidates -- fix them. */ 5068 thread_update_process_threads(); 5069 } while (restart_needed); 5070} 5071 5072#endif /* CONFIG_SCHED_TRADITIONAL */ 5073 5074boolean_t 5075thread_eager_preemption(thread_t thread) 5076{ 5077 return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0); 5078} 5079 5080void 5081thread_set_eager_preempt(thread_t thread) 5082{ 5083 spl_t x; 5084 processor_t p; 5085 ast_t ast = AST_NONE; 5086 5087 x = splsched(); 5088 p = current_processor(); 5089 5090 thread_lock(thread); 5091 thread->sched_flags |= TH_SFLAG_EAGERPREEMPT; 5092 5093 if (thread == current_thread()) { 5094 5095 ast = csw_check(p, AST_NONE); 5096 thread_unlock(thread); 5097 if (ast != AST_NONE) { 5098 (void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast); 5099 } 5100 } else { 5101 p = thread->last_processor; 5102 5103 if (p != PROCESSOR_NULL && p->state == PROCESSOR_RUNNING && 5104 p->active_thread == thread) { 5105 cause_ast_check(p); 5106 } 5107 5108 thread_unlock(thread); 5109 } 5110 5111 splx(x); 5112} 5113 5114void 5115thread_clear_eager_preempt(thread_t thread) 5116{ 5117 spl_t x; 5118 5119 x = splsched(); 5120 thread_lock(thread); 5121 5122 thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT; 5123 5124 thread_unlock(thread); 5125 splx(x); 5126} 5127/* 5128 * Scheduling statistics 5129 */ 5130void 5131sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri) 5132{ 5133 struct processor_sched_statistics *stats; 5134 boolean_t to_realtime = FALSE; 5135 5136 stats = &processor->processor_data.sched_stats; 5137 stats->csw_count++; 5138 5139 if (otherpri >= BASEPRI_REALTIME) { 5140 stats->rt_sched_count++; 5141 to_realtime = TRUE; 5142 } 5143 5144 if ((reasons & AST_PREEMPT) != 0) { 5145 stats->preempt_count++; 5146 5147 if (selfpri >= BASEPRI_REALTIME) { 5148 stats->preempted_rt_count++; 5149 } 5150 5151 if (to_realtime) { 5152 stats->preempted_by_rt_count++; 5153 } 5154 5155 } 5156} 5157 5158void 5159sched_stats_handle_runq_change(struct runq_stats *stats, int old_count) 5160{ 5161 uint64_t timestamp = mach_absolute_time(); 5162 5163 stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count; 5164 stats->last_change_timestamp = timestamp; 5165} 5166 5167/* 5168 * For calls from assembly code 5169 */ 5170#undef thread_wakeup 5171void 5172thread_wakeup( 5173 event_t x); 5174 5175void 5176thread_wakeup( 5177 event_t x) 5178{ 5179 thread_wakeup_with_result(x, THREAD_AWAKENED); 5180} 5181 5182boolean_t 5183preemption_enabled(void) 5184{ 5185 return (get_preemption_level() == 0 && ml_get_interrupts_enabled()); 5186} 5187 5188__assert_only static boolean_t 5189thread_runnable( 5190 thread_t thread) 5191{ 5192 return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN); 5193} 5194 5195static void 5196sched_timer_deadline_tracking_init(void) { 5197 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1); 5198 nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2); 5199} 5200