1/* 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <mach/mach_types.h> 30#include <mach/task_server.h> 31 32#include <kern/sched.h> 33#include <kern/task.h> 34#include <mach/thread_policy.h> 35#include <sys/errno.h> 36#include <sys/resource.h> 37#include <machine/limits.h> 38#include <kern/ledger.h> 39#include <kern/thread_call.h> 40#if CONFIG_TELEMETRY 41#include <kern/telemetry.h> 42#endif 43 44#if IMPORTANCE_DEBUG 45#include <mach/machine/sdt.h> 46#endif /* IMPORTANCE_DEBUG */ 47 48#include <sys/kdebug.h> 49 50/* 51 * Task Policy 52 * 53 * This subsystem manages task and thread IO priority and backgrounding, 54 * as well as importance inheritance, process suppression, task QoS, and apptype. 55 * These properties have a suprising number of complex interactions, so they are 56 * centralized here in one state machine to simplify the implementation of those interactions. 57 * 58 * Architecture: 59 * Threads and tasks have three policy fields: requested, effective, and pending. 60 * Requested represents the wishes of each interface that influences task policy. 61 * Effective represents the distillation of that policy into a set of behaviors. 62 * Pending represents updates that haven't been applied yet. 63 * 64 * Each interface that has an input into the task policy state machine controls a field in requested. 65 * If the interface has a getter, it returns what is in the field in requested, but that is 66 * not necessarily what is actually in effect. 67 * 68 * All kernel subsystems that behave differently based on task policy call into 69 * the get_effective_policy function, which returns the decision of the task policy state machine 70 * for that subsystem by querying only the 'effective' field. 71 * 72 * Policy change operations: 73 * Here are the steps to change a policy on a task or thread: 74 * 1) Lock task 75 * 2) Change requested field for the relevant policy 76 * 3) Run a task policy update, which recalculates effective based on requested, 77 * then takes a diff between the old and new versions of requested and calls the relevant 78 * other subsystems to apply these changes, and updates the pending field. 79 * 4) Unlock task 80 * 5) Run task policy update complete, which looks at the pending field to update 81 * subsystems which cannot be touched while holding the task lock. 82 * 83 * To add a new requested policy, add the field in the requested struct, the flavor in task.h, 84 * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield, 85 * then set up the effects of that behavior in task_policy_update*. 86 * 87 * Most policies are set via proc_set_task_policy, but policies that don't fit that interface 88 * roll their own lock/set/update/unlock/complete code inside this file. 89 * 90 * 91 * Suppression policy 92 * 93 * These are a set of behaviors that can be requested for a task. They currently have specific 94 * implied actions when they're enabled, but they may be made customizable in the future. 95 * 96 * When the affected task is boosted, we temporarily disable the suppression behaviors 97 * so that the affected process has a chance to run so it can call the API to permanently 98 * disable the suppression behaviors. 99 * 100 * Locking 101 * 102 * Changing task policy on a task or thread takes the task lock, and not the thread lock. 103 * TODO: Should changing policy on a thread take the thread lock instead? 104 * 105 * Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code. 106 * This means that any notification of state change needs to be externally synchronized. 107 * 108 */ 109 110/* for task holds without dropping the lock */ 111extern void task_hold_locked(task_t task); 112extern void task_release_locked(task_t task); 113extern void task_wait_locked(task_t task, boolean_t until_not_runnable); 114 115/* Task policy related helper functions */ 116static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value); 117 118static void task_policy_update_locked(task_t task, thread_t thread); 119static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create); 120static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle); 121static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle); 122 123static void task_policy_update_complete_unlocked(task_t task, thread_t thread); 124 125static int proc_get_effective_policy(task_t task, thread_t thread, int policy); 126 127static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive); 128static int proc_tier_to_iopol(int tier, int passive); 129 130static uintptr_t trequested(task_t task, thread_t thread); 131static uintptr_t teffective(task_t task, thread_t thread); 132static uintptr_t tpending(task_t task, thread_t thread); 133static uint64_t task_requested_bitfield(task_t task, thread_t thread); 134static uint64_t task_effective_bitfield(task_t task, thread_t thread); 135static uint64_t task_pending_bitfield(task_t task, thread_t thread); 136 137void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); 138 139/* CPU Limits related helper functions */ 140static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope); 141int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled); 142static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled); 143int task_disable_cpumon(task_t task); 144static int task_apply_resource_actions(task_t task, int type); 145void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1); 146void proc_init_cpumon_params(void); 147 148#ifdef MACH_BSD 149int proc_pid(void *proc); 150extern int proc_selfpid(void); 151extern char * proc_name_address(void *p); 152extern void rethrottle_thread(void * uthread); 153extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg); 154#endif /* MACH_BSD */ 155 156 157/* Importance Inheritance related helper functions */ 158 159void task_importance_mark_receiver(task_t task, boolean_t receiving); 160 161#if IMPORTANCE_INHERITANCE 162static void task_update_boost_locked(task_t task, boolean_t boost_active); 163 164static int task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count); 165static int task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count); 166#endif /* IMPORTANCE_INHERITANCE */ 167 168#if IMPORTANCE_DEBUG 169#define __impdebug_only 170#else 171#define __impdebug_only __unused 172#endif 173 174#if IMPORTANCE_INHERITANCE 175#define __imp_only 176#else 177#define __imp_only __unused 178#endif 179 180#define TASK_LOCKED 1 181#define TASK_UNLOCKED 0 182 183#define DO_LOWPRI_CPU 1 184#define UNDO_LOWPRI_CPU 2 185 186/* Macros for making tracing simpler */ 187 188#define tpriority(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task->priority) : (thread->priority))) 189#define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK : TASK_POLICY_THREAD) 190#define targetid(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (audit_token_pid_from_task(task)) : (thread->thread_id))) 191 192/* 193 * Default parameters for certain policies 194 */ 195 196int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1; 197int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1; 198int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1; 199 200int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF); 201 202const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2; 203 204 205const struct task_requested_policy default_task_requested_policy = { 206 .bg_iotier = proc_default_bg_iotier 207}; 208const struct task_effective_policy default_task_effective_policy = {}; 209const struct task_pended_policy default_task_pended_policy = {}; 210 211/* 212 * Default parameters for CPU usage monitor. 213 * 214 * Default setting is 50% over 3 minutes. 215 */ 216#define DEFAULT_CPUMON_PERCENTAGE 50 217#define DEFAULT_CPUMON_INTERVAL (3 * 60) 218 219uint8_t proc_max_cpumon_percentage; 220uint64_t proc_max_cpumon_interval; 221 222static kern_return_t 223task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) { 224 if (count < TASK_QOS_POLICY_COUNT) 225 return KERN_INVALID_ARGUMENT; 226 227 task_latency_qos_t ltier = qosinfo->task_latency_qos_tier; 228 task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier; 229 230 if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && 231 ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) 232 return KERN_INVALID_ARGUMENT; 233 234 if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && 235 ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) 236 return KERN_INVALID_ARGUMENT; 237 238 return KERN_SUCCESS; 239} 240 241static uint32_t 242task_qos_extract(uint32_t qv) { 243 return (qv & 0xFF); 244} 245 246static uint32_t 247task_qos_latency_package(uint32_t qv) { 248 return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv); 249} 250 251static uint32_t 252task_qos_throughput_package(uint32_t qv) { 253 return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv); 254} 255 256kern_return_t 257task_policy_set( 258 task_t task, 259 task_policy_flavor_t flavor, 260 task_policy_t policy_info, 261 mach_msg_type_number_t count) 262{ 263 kern_return_t result = KERN_SUCCESS; 264 265 if (task == TASK_NULL || task == kernel_task) 266 return (KERN_INVALID_ARGUMENT); 267 268 switch (flavor) { 269 270 case TASK_CATEGORY_POLICY: { 271 task_category_policy_t info = (task_category_policy_t)policy_info; 272 273 if (count < TASK_CATEGORY_POLICY_COUNT) 274 return (KERN_INVALID_ARGUMENT); 275 276 277 switch(info->role) { 278 case TASK_FOREGROUND_APPLICATION: 279 case TASK_BACKGROUND_APPLICATION: 280 case TASK_DEFAULT_APPLICATION: 281 proc_set_task_policy(task, THREAD_NULL, 282 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, 283 info->role); 284 break; 285 286 case TASK_CONTROL_APPLICATION: 287 if (task != current_task() || task->sec_token.val[0] != 0) 288 result = KERN_INVALID_ARGUMENT; 289 else 290 proc_set_task_policy(task, THREAD_NULL, 291 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, 292 info->role); 293 break; 294 295 case TASK_GRAPHICS_SERVER: 296 /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */ 297 if (task != current_task() || task->sec_token.val[0] != 0) 298 result = KERN_INVALID_ARGUMENT; 299 else 300 proc_set_task_policy(task, THREAD_NULL, 301 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, 302 info->role); 303 break; 304 default: 305 result = KERN_INVALID_ARGUMENT; 306 break; 307 } /* switch (info->role) */ 308 309 break; 310 } 311 312/* Desired energy-efficiency/performance "quality-of-service" */ 313 case TASK_BASE_QOS_POLICY: 314 { 315 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; 316 kern_return_t kr = task_qos_policy_validate(qosinfo, count); 317 318 if (kr != KERN_SUCCESS) 319 return kr; 320 321 task_lock(task); 322 323 /* This uses the latency QoS tracepoint, even though we might be changing both */ 324 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 325 (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, 326 proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); 327 328 task->requested_policy.t_base_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); 329 task->requested_policy.t_base_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); 330 331 task_policy_update_locked(task, THREAD_NULL); 332 333 task_unlock(task); 334 335 task_policy_update_complete_unlocked(task, THREAD_NULL); 336 337 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 338 (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, 339 proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); 340 } 341 break; 342 343 case TASK_OVERRIDE_QOS_POLICY: 344 { 345 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; 346 kern_return_t kr = task_qos_policy_validate(qosinfo, count); 347 348 if (kr != KERN_SUCCESS) 349 return kr; 350 351 task_lock(task); 352 353 /* This uses the latency QoS tracepoint, even though we might be changing both */ 354 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 355 (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, 356 proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); 357 358 task->requested_policy.t_over_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); 359 task->requested_policy.t_over_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); 360 361 task_policy_update_locked(task, THREAD_NULL); 362 363 task_unlock(task); 364 365 task_policy_update_complete_unlocked(task, THREAD_NULL); 366 367 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 368 (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, 369 proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); 370 } 371 break; 372 373 case TASK_SUPPRESSION_POLICY: 374 { 375 376 task_suppression_policy_t info = (task_suppression_policy_t)policy_info; 377 378 if (count < TASK_SUPPRESSION_POLICY_COUNT) 379 return (KERN_INVALID_ARGUMENT); 380 381 struct task_qos_policy qosinfo; 382 383 qosinfo.task_latency_qos_tier = info->timer_throttle; 384 qosinfo.task_throughput_qos_tier = info->throughput_qos; 385 386 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT); 387 388 if (kr != KERN_SUCCESS) 389 return kr; 390 391 task_lock(task); 392 393 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 394 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START, 395 proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 396 0, 0); 397 398 task->requested_policy.t_sup_active = (info->active) ? 1 : 0; 399 task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0; 400 task->requested_policy.t_sup_timer = task_qos_extract(info->timer_throttle); 401 task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0; 402 task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0; 403 task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0; 404 task->requested_policy.t_sup_throughput = task_qos_extract(info->throughput_qos); 405 task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0; 406 407 task_policy_update_locked(task, THREAD_NULL); 408 409 task_unlock(task); 410 411 task_policy_update_complete_unlocked(task, THREAD_NULL); 412 413 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 414 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END, 415 proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 416 0, 0); 417 418 break; 419 420 } 421 422 default: 423 result = KERN_INVALID_ARGUMENT; 424 break; 425 } 426 427 return (result); 428} 429 430/* Sets BSD 'nice' value on the task */ 431kern_return_t 432task_importance( 433 task_t task, 434 integer_t importance) 435{ 436 if (task == TASK_NULL || task == kernel_task) 437 return (KERN_INVALID_ARGUMENT); 438 439 task_lock(task); 440 441 if (!task->active) { 442 task_unlock(task); 443 444 return (KERN_TERMINATED); 445 } 446 447 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) { 448 task_unlock(task); 449 450 return (KERN_INVALID_ARGUMENT); 451 } 452 453 task->importance = importance; 454 455 /* TODO: tracepoint? */ 456 457 /* Redrive only the task priority calculation */ 458 task_policy_update_task_locked(task, FALSE, FALSE); 459 460 task_unlock(task); 461 462 return (KERN_SUCCESS); 463} 464 465kern_return_t 466task_policy_get( 467 task_t task, 468 task_policy_flavor_t flavor, 469 task_policy_t policy_info, 470 mach_msg_type_number_t *count, 471 boolean_t *get_default) 472{ 473 if (task == TASK_NULL || task == kernel_task) 474 return (KERN_INVALID_ARGUMENT); 475 476 switch (flavor) { 477 478 case TASK_CATEGORY_POLICY: 479 { 480 task_category_policy_t info = (task_category_policy_t)policy_info; 481 482 if (*count < TASK_CATEGORY_POLICY_COUNT) 483 return (KERN_INVALID_ARGUMENT); 484 485 if (*get_default) 486 info->role = TASK_UNSPECIFIED; 487 else 488 info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); 489 break; 490 } 491 492 case TASK_BASE_QOS_POLICY: /* FALLTHRU */ 493 case TASK_OVERRIDE_QOS_POLICY: 494 { 495 task_qos_policy_t info = (task_qos_policy_t)policy_info; 496 497 if (*count < TASK_QOS_POLICY_COUNT) 498 return (KERN_INVALID_ARGUMENT); 499 500 if (*get_default) { 501 info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED; 502 info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED; 503 } else if (flavor == TASK_BASE_QOS_POLICY) { 504 task_lock(task); 505 506 info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_base_latency_qos); 507 info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_base_through_qos); 508 509 task_unlock(task); 510 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) { 511 task_lock(task); 512 513 info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_over_latency_qos); 514 info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_over_through_qos); 515 516 task_unlock(task); 517 } 518 519 break; 520 } 521 522 case TASK_POLICY_STATE: 523 { 524 task_policy_state_t info = (task_policy_state_t)policy_info; 525 526 if (*count < TASK_POLICY_STATE_COUNT) 527 return (KERN_INVALID_ARGUMENT); 528 529 /* Only root can get this info */ 530 if (current_task()->sec_token.val[0] != 0) 531 return KERN_PROTECTION_FAILURE; 532 533 task_lock(task); 534 535 if (*get_default) { 536 info->requested = 0; 537 info->effective = 0; 538 info->pending = 0; 539 info->imp_assertcnt = 0; 540 info->imp_externcnt = 0; 541 info->flags = 0; 542 } else { 543 info->requested = task_requested_bitfield(task, THREAD_NULL); 544 info->effective = task_effective_bitfield(task, THREAD_NULL); 545 info->pending = task_pending_bitfield(task, THREAD_NULL); 546 info->imp_assertcnt = task->task_imp_assertcnt; 547 info->imp_externcnt = task->task_imp_externcnt; 548 549 info->flags = 0; 550 info->flags |= (task->imp_receiver ? TASK_IMP_RECEIVER : 0); 551 info->flags |= (task->imp_donor ? TASK_IMP_DONOR : 0); 552 } 553 554 task_unlock(task); 555 556 break; 557 } 558 559 case TASK_SUPPRESSION_POLICY: 560 { 561 task_suppression_policy_t info = (task_suppression_policy_t)policy_info; 562 563 if (*count < TASK_SUPPRESSION_POLICY_COUNT) 564 return (KERN_INVALID_ARGUMENT); 565 566 task_lock(task); 567 568 if (*get_default) { 569 info->active = 0; 570 info->lowpri_cpu = 0; 571 info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED; 572 info->disk_throttle = 0; 573 info->cpu_limit = 0; 574 info->suspend = 0; 575 info->throughput_qos = 0; 576 info->suppressed_cpu = 0; 577 } else { 578 info->active = task->requested_policy.t_sup_active; 579 info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu; 580 info->timer_throttle = task_qos_latency_package(task->requested_policy.t_sup_timer); 581 info->disk_throttle = task->requested_policy.t_sup_disk; 582 info->cpu_limit = task->requested_policy.t_sup_cpu_limit; 583 info->suspend = task->requested_policy.t_sup_suspend; 584 info->throughput_qos = task_qos_throughput_package(task->requested_policy.t_sup_throughput); 585 info->suppressed_cpu = task->requested_policy.t_sup_cpu; 586 } 587 588 task_unlock(task); 589 break; 590 } 591 592 default: 593 return (KERN_INVALID_ARGUMENT); 594 } 595 596 return (KERN_SUCCESS); 597} 598 599/* 600 * Called at task creation 601 * We calculate the correct effective but don't apply it to anything yet. 602 * The threads, etc will inherit from the task as they get created. 603 */ 604void 605task_policy_create(task_t task, int parent_boosted) 606{ 607 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { 608 if (parent_boosted) { 609 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; 610 task_importance_mark_donor(task, TRUE); 611 } else { 612 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND; 613 task_importance_mark_receiver(task, FALSE); 614 } 615 } 616 617 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 618 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, 619 proc_selfpid(), audit_token_pid_from_task(task), 620 teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); 621 622 task_policy_update_internal_locked(task, THREAD_NULL, TRUE); 623 624 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 625 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, 626 proc_selfpid(), audit_token_pid_from_task(task), 627 teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); 628} 629 630static void 631task_policy_update_locked(task_t task, thread_t thread) 632{ 633 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 634 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START), 635 proc_selfpid(), targetid(task, thread), 636 teffective(task, thread), tpriority(task, thread), 0); 637 638 task_policy_update_internal_locked(task, thread, FALSE); 639 640 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 641 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END, 642 proc_selfpid(), targetid(task, thread), 643 teffective(task, thread), tpriority(task, thread), 0); 644} 645 646/* 647 * One state update function TO RULE THEM ALL 648 * 649 * This function updates the task or thread effective policy fields 650 * and pushes the results to the relevant subsystems. 651 * 652 * Must call update_complete after unlocking the task, 653 * as some subsystems cannot be updated while holding the task lock. 654 * 655 * Called with task locked, not thread 656 */ 657static void 658task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create) 659{ 660 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; 661 662 /* 663 * Step 1: 664 * Gather requested policy 665 */ 666 667 struct task_requested_policy requested = 668 (on_task) ? task->requested_policy : thread->requested_policy; 669 670 /* 671 * Step 2: 672 * Calculate new effective policies from requested policy and task state 673 * Rules: 674 * If in an 'on_task' block, must only look at and set fields starting with t_ 675 * If operating on a task, don't touch anything starting with th_ 676 * If operating on a thread, don't touch anything starting with t_ 677 * Don't change requested, it won't take effect 678 */ 679 680 struct task_effective_policy next = {}; 681 682 /* Calculate DARWIN_BG */ 683 boolean_t wants_darwinbg = FALSE; 684 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */ 685 boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */ 686 boolean_t wants_tal = FALSE; /* Do I want the effects of TAL mode */ 687 /* 688 * If DARWIN_BG has been requested at either level, it's engaged. 689 * Only true DARWIN_BG changes cause watchers to transition. 690 */ 691 if (requested.int_darwinbg || requested.ext_darwinbg) 692 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE; 693 694 if (on_task) { 695 /* Background TAL apps are throttled when TAL is enabled */ 696 if (requested.t_apptype == TASK_APPTYPE_APP_TAL && 697 requested.t_role == TASK_BACKGROUND_APPLICATION && 698 requested.t_tal_enabled == 1) { 699 wants_tal = TRUE; 700 next.t_tal_engaged = 1; 701 } 702 703 /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */ 704 if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && 705 requested.t_boosted == 0) 706 wants_darwinbg = TRUE; 707 708 /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */ 709 if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) 710 wants_darwinbg = TRUE; 711 } else { 712 if (requested.th_pidbind_bg) 713 wants_all_sockets_bg = wants_darwinbg = TRUE; 714 715 if (requested.th_workq_bg) 716 wants_darwinbg = TRUE; 717 } 718 719 /* Calculate side effects of DARWIN_BG */ 720 721 if (wants_darwinbg) { 722 next.darwinbg = 1; 723 /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */ 724 next.new_sockets_bg = 1; 725 next.lowpri_cpu = 1; 726 } 727 728 if (wants_all_sockets_bg) 729 next.all_sockets_bg = 1; 730 731 if (on_task && wants_watchersbg) 732 next.t_watchers_bg = 1; 733 734 /* Calculate low CPU priority */ 735 736 boolean_t wants_lowpri_cpu = FALSE; 737 738 if (wants_darwinbg || wants_tal) 739 wants_lowpri_cpu = TRUE; 740 741 if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0) 742 wants_lowpri_cpu = TRUE; 743 744 if (wants_lowpri_cpu) 745 next.lowpri_cpu = 1; 746 747 /* Calculate IO policy */ 748 749 /* Update BG IO policy (so we can see if it has changed) */ 750 next.bg_iotier = requested.bg_iotier; 751 752 int iopol = THROTTLE_LEVEL_TIER0; 753 754 if (wants_darwinbg) 755 iopol = MAX(iopol, requested.bg_iotier); 756 757 if (on_task) { 758 if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD) 759 iopol = MAX(iopol, proc_standard_daemon_tier); 760 761 if (requested.t_sup_disk && requested.t_boosted == 0) 762 iopol = MAX(iopol, proc_suppressed_disk_tier); 763 764 if (wants_tal) 765 iopol = MAX(iopol, proc_tal_disk_tier); 766 } 767 768 iopol = MAX(iopol, requested.int_iotier); 769 iopol = MAX(iopol, requested.ext_iotier); 770 771 next.io_tier = iopol; 772 773 /* Calculate Passive IO policy */ 774 775 if (requested.ext_iopassive || requested.int_iopassive) 776 next.io_passive = 1; 777 778 /* Calculate miscellaneous policy */ 779 780 if (on_task) { 781 /* Update role */ 782 next.t_role = requested.t_role; 783 784 /* Calculate suppression-active flag */ 785 if (requested.t_sup_active && requested.t_boosted == 0) 786 next.t_sup_active = 1; 787 788 /* Calculate suspend policy */ 789 if (requested.t_sup_suspend && requested.t_boosted == 0) 790 next.t_suspended = 1; 791 792 /* Calculate GPU Access policy */ 793 if (requested.t_int_gpu_deny || requested.t_ext_gpu_deny) 794 next.t_gpu_deny = 1; 795 796 797 /* Calculate timer QOS */ 798 int latency_qos = requested.t_base_latency_qos; 799 800 if (requested.t_sup_timer && requested.t_boosted == 0) 801 latency_qos = requested.t_sup_timer; 802 803 if (requested.t_over_latency_qos != 0) 804 latency_qos = requested.t_over_latency_qos; 805 806 /* Treat the windowserver special */ 807 if (requested.t_role == TASK_GRAPHICS_SERVER) 808 latency_qos = proc_graphics_timer_qos; 809 810 next.t_latency_qos = latency_qos; 811 812 /* Calculate throughput QOS */ 813 int through_qos = requested.t_base_through_qos; 814 815 if (requested.t_sup_throughput && requested.t_boosted == 0) 816 through_qos = requested.t_sup_throughput; 817 818 if (requested.t_over_through_qos != 0) 819 through_qos = requested.t_over_through_qos; 820 821 next.t_through_qos = through_qos; 822 823 /* Calculate suppressed CPU priority */ 824 if (requested.t_sup_cpu && requested.t_boosted == 0) 825 next.t_suppressed_cpu = 1; 826 } 827 828 if (requested.terminated) { 829 /* 830 * Shoot down the throttles that slow down exit or response to SIGTERM 831 * We don't need to shoot down: 832 * passive (don't want to cause others to throttle) 833 * all_sockets_bg (don't need to iterate FDs on every exit) 834 * new_sockets_bg (doesn't matter for exiting process) 835 * gpu deny (doesn't matter for exiting process) 836 * pidsuspend (jetsam-ed BG process shouldn't run again) 837 * watchers_bg (watcher threads don't need to be unthrottled) 838 * t_latency_qos (affects userspace timers only) 839 */ 840 841 next.terminated = 1; 842 next.darwinbg = 0; 843 next.lowpri_cpu = 0; 844 next.io_tier = THROTTLE_LEVEL_TIER0; 845 if (on_task) { 846 next.t_tal_engaged = 0; 847 next.t_role = TASK_UNSPECIFIED; 848 next.t_suppressed_cpu = 0; 849 850 /* TODO: This should only be shot down on SIGTERM, not exit */ 851 next.t_suspended = 0; 852 } 853 } 854 855 /* 856 * Step 3: 857 * Swap out old policy for new policy 858 */ 859 860 struct task_effective_policy prev = 861 (on_task) ? task->effective_policy : thread->effective_policy; 862 863 /* 864 * Check for invalid transitions here for easier debugging 865 * TODO: dump the structs as hex in the panic string 866 */ 867 if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg) 868 panic("unexpected network change for kernel task"); 869 870 /* This is the point where the new values become visible to other threads */ 871 if (on_task) 872 task->effective_policy = next; 873 else 874 thread->effective_policy = next; 875 876 /* Don't do anything further to a half-formed task or thread */ 877 if (in_create) 878 return; 879 880 /* 881 * Step 4: 882 * Pend updates that can't be done while holding the task lock 883 * Preserve pending updates that may still be waiting to be applied 884 */ 885 886 struct task_pended_policy pended = 887 (on_task) ? task->pended_policy : thread->pended_policy; 888 889 if (prev.all_sockets_bg != next.all_sockets_bg) 890 pended.update_sockets = 1; 891 892 if (on_task) { 893 /* Only re-scan the timer list if the qos level is getting less strong */ 894 if (prev.t_latency_qos > next.t_latency_qos) 895 pended.t_update_timers = 1; 896 897 } 898 899 if (on_task) 900 task->pended_policy = pended; 901 else 902 thread->pended_policy = pended; 903 904 /* 905 * Step 5: 906 * Update other subsystems as necessary if something has changed 907 */ 908 909 boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE; 910 911 if (on_task) { 912 if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) { 913 task_hold_locked(task); 914 task_wait_locked(task, FALSE); 915 } 916 if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) { 917 task_release_locked(task); 918 } 919 920 boolean_t update_threads = FALSE; 921 922 if (prev.bg_iotier != next.bg_iotier) 923 update_threads = TRUE; 924 925 if (prev.terminated != next.terminated) 926 update_threads = TRUE; 927 928 task_policy_update_task_locked(task, update_throttle, update_threads); 929 } else { 930 int update_cpu = 0; 931 932 if (prev.lowpri_cpu != next.lowpri_cpu) 933 update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU); 934 935 task_policy_update_thread_locked(thread, update_cpu, update_throttle); 936 } 937} 938 939/* Despite the name, the thread's task is locked, the thread is not */ 940static void 941task_policy_update_thread_locked(thread_t thread, 942 int update_cpu, 943 boolean_t update_throttle) 944{ 945 thread_precedence_policy_data_t policy; 946 947 if (update_throttle) { 948 rethrottle_thread(thread->uthread); 949 } 950 951 /* 952 * TODO: pidbind needs to stuff remembered importance into saved_importance 953 * properly deal with bg'ed threads being pidbound and unbging while pidbound 954 * 955 * TODO: A BG thread's priority is 0 on desktop and 4 on embedded. Need to reconcile this. 956 * */ 957 if (update_cpu == DO_LOWPRI_CPU) { 958 thread->saved_importance = thread->importance; 959 policy.importance = INT_MIN; 960 } else if (update_cpu == UNDO_LOWPRI_CPU) { 961 policy.importance = thread->saved_importance; 962 thread->saved_importance = 0; 963 } 964 965 /* Takes thread lock and thread mtx lock */ 966 if (update_cpu) 967 thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, 968 (thread_policy_t)&policy, 969 THREAD_PRECEDENCE_POLICY_COUNT); 970} 971 972/* 973 * Calculate priority on a task, loop through its threads, and tell them about 974 * priority changes and throttle changes. 975 */ 976static void 977task_policy_update_task_locked(task_t task, 978 boolean_t update_throttle, 979 boolean_t update_threads) 980{ 981 boolean_t update_priority = FALSE; 982 983 if (task == kernel_task) 984 panic("Attempting to set task policy on kernel_task"); 985 986 int priority = BASEPRI_DEFAULT; 987 int max_priority = MAXPRI_USER; 988 989 if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) { 990 priority = MAXPRI_THROTTLE; 991 max_priority = MAXPRI_THROTTLE; 992 } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) { 993 priority = MAXPRI_SUPPRESSED; 994 max_priority = MAXPRI_SUPPRESSED; 995 } else { 996 switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) { 997 case TASK_FOREGROUND_APPLICATION: 998 priority = BASEPRI_FOREGROUND; 999 break; 1000 case TASK_BACKGROUND_APPLICATION: 1001 priority = BASEPRI_BACKGROUND; 1002 break; 1003 case TASK_CONTROL_APPLICATION: 1004 priority = BASEPRI_CONTROL; 1005 break; 1006 case TASK_GRAPHICS_SERVER: 1007 priority = BASEPRI_GRAPHICS; 1008 max_priority = MAXPRI_RESERVED; 1009 break; 1010 default: 1011 break; 1012 } 1013 1014 /* factor in 'nice' value */ 1015 priority += task->importance; 1016 } 1017 1018 /* avoid extra work if priority isn't changing */ 1019 if (task->priority != priority || task->max_priority != max_priority) { 1020 update_priority = TRUE; 1021 1022 /* update the scheduling priority for the task */ 1023 task->max_priority = max_priority; 1024 1025 if (priority > task->max_priority) 1026 priority = task->max_priority; 1027 else if (priority < MINPRI) 1028 priority = MINPRI; 1029 1030 task->priority = priority; 1031 } 1032 1033 /* Loop over the threads in the task only once, and only if necessary */ 1034 if (update_threads || update_throttle || update_priority ) { 1035 thread_t thread; 1036 1037 queue_iterate(&task->threads, thread, thread_t, task_threads) { 1038 if (update_priority) { 1039 thread_mtx_lock(thread); 1040 1041 if (thread->active) 1042 thread_task_priority(thread, priority, max_priority); 1043 1044 thread_mtx_unlock(thread); 1045 } 1046 1047 if (update_throttle) { 1048 rethrottle_thread(thread->uthread); 1049 } 1050 1051 if (update_threads) { 1052 thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier; 1053 thread->requested_policy.terminated = task->effective_policy.terminated; 1054 1055 task_policy_update_internal_locked(task, thread, FALSE); 1056 /* The thread policy must not emit any completion actions due to this change. */ 1057 } 1058 } 1059 } 1060} 1061 1062/* 1063 * Called with task unlocked to do things that can't be done while holding the task lock 1064 * To keep things consistent, only one thread can make progress through here at a time for any one task. 1065 * 1066 * TODO: tracepoints 1067 */ 1068static void 1069task_policy_update_complete_unlocked(task_t task, thread_t thread) 1070{ 1071 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; 1072 1073 task_lock(task); 1074 1075 while (task->pended_policy.t_updating_policy != 0) { 1076 assert_wait((event_t)&task->pended_policy, THREAD_UNINT); 1077 task_unlock(task); 1078 thread_block(THREAD_CONTINUE_NULL); 1079 task_lock(task); 1080 } 1081 1082 /* Take a snapshot of the current state */ 1083 1084 struct task_pended_policy pended = 1085 (on_task) ? task->pended_policy : thread->pended_policy; 1086 1087 struct task_effective_policy effective = 1088 (on_task) ? task->effective_policy : thread->effective_policy; 1089 1090 /* Mark the pended operations as being handled */ 1091 if (on_task) 1092 task->pended_policy = default_task_pended_policy; 1093 else 1094 thread->pended_policy = default_task_pended_policy; 1095 1096 task->pended_policy.t_updating_policy = 1; 1097 1098 task_unlock(task); 1099 1100 /* Update the other subsystems with the new state */ 1101 1102#ifdef MACH_BSD 1103 if (pended.update_sockets) 1104 proc_apply_task_networkbg(task->bsd_info, thread, effective.all_sockets_bg); 1105#endif /* MACH_BSD */ 1106 1107 if (on_task) { 1108 /* The timer throttle has been removed, we need to look for expired timers and fire them */ 1109 if (pended.t_update_timers) 1110 ml_timer_evaluate(); 1111 1112 } 1113 1114 /* Wake up anyone waiting to make another update */ 1115 task_lock(task); 1116 task->pended_policy.t_updating_policy = 0; 1117 thread_wakeup(&task->pended_policy); 1118 task_unlock(task); 1119} 1120 1121/* 1122 * Initiate a task policy state transition 1123 * 1124 * Everything that modifies requested except functions that need to hold the task lock 1125 * should use this function 1126 * 1127 * Argument validation should be performed before reaching this point. 1128 * 1129 * TODO: Do we need to check task->active or thread->active? 1130 */ 1131void 1132proc_set_task_policy(task_t task, 1133 thread_t thread, 1134 int category, 1135 int flavor, 1136 int value) 1137{ 1138 task_lock(task); 1139 1140 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1141 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, 1142 proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); 1143 1144 proc_set_task_policy_locked(task, thread, category, flavor, value); 1145 1146 task_policy_update_locked(task, thread); 1147 1148 task_unlock(task); 1149 1150 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1151 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, 1152 proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); 1153 1154 task_policy_update_complete_unlocked(task, thread); 1155} 1156 1157/* 1158 * Initiate a task policy state transition on a thread with its TID 1159 * Useful if you cannot guarantee the thread won't get terminated 1160 */ 1161void 1162proc_set_task_policy_thread(task_t task, 1163 uint64_t tid, 1164 int category, 1165 int flavor, 1166 int value) 1167{ 1168 thread_t thread; 1169 thread_t self = current_thread(); 1170 1171 task_lock(task); 1172 1173 if (tid == TID_NULL || tid == self->thread_id) 1174 thread = self; 1175 else 1176 thread = task_findtid(task, tid); 1177 1178 if (thread == THREAD_NULL) { 1179 task_unlock(task); 1180 return; 1181 } 1182 1183 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1184 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, 1185 proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); 1186 1187 proc_set_task_policy_locked(task, thread, category, flavor, value); 1188 1189 task_policy_update_locked(task, thread); 1190 1191 task_unlock(task); 1192 1193 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1194 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, 1195 proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); 1196 1197 task_policy_update_complete_unlocked(task, thread); 1198} 1199 1200 1201/* 1202 * Set the requested state for a specific flavor to a specific value. 1203 * 1204 * TODO: 1205 * Verify that arguments to non iopol things are 1 or 0 1206 */ 1207static void 1208proc_set_task_policy_locked(task_t task, 1209 thread_t thread, 1210 int category, 1211 int flavor, 1212 int value) 1213{ 1214 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; 1215 1216 int tier, passive; 1217 1218 struct task_requested_policy requested = 1219 (on_task) ? task->requested_policy : thread->requested_policy; 1220 1221 switch (flavor) { 1222 1223 /* Category: EXTERNAL and INTERNAL, thread and task */ 1224 1225 case TASK_POLICY_DARWIN_BG: 1226 if (category == TASK_POLICY_EXTERNAL) 1227 requested.ext_darwinbg = value; 1228 else 1229 requested.int_darwinbg = value; 1230 break; 1231 1232 case TASK_POLICY_IOPOL: 1233 proc_iopol_to_tier(value, &tier, &passive); 1234 if (category == TASK_POLICY_EXTERNAL) { 1235 requested.ext_iotier = tier; 1236 requested.ext_iopassive = passive; 1237 } else { 1238 requested.int_iotier = tier; 1239 requested.int_iopassive = passive; 1240 } 1241 break; 1242 1243 case TASK_POLICY_IO: 1244 if (category == TASK_POLICY_EXTERNAL) 1245 requested.ext_iotier = value; 1246 else 1247 requested.int_iotier = value; 1248 break; 1249 1250 case TASK_POLICY_PASSIVE_IO: 1251 if (category == TASK_POLICY_EXTERNAL) 1252 requested.ext_iopassive = value; 1253 else 1254 requested.int_iopassive = value; 1255 break; 1256 1257 /* Category: EXTERNAL and INTERNAL, task only */ 1258 1259 case TASK_POLICY_GPU_DENY: 1260 assert(on_task); 1261 if (category == TASK_POLICY_EXTERNAL) 1262 requested.t_ext_gpu_deny = value; 1263 else 1264 requested.t_int_gpu_deny = value; 1265 break; 1266 1267 case TASK_POLICY_DARWIN_BG_AND_GPU: 1268 assert(on_task); 1269 if (category == TASK_POLICY_EXTERNAL) { 1270 requested.ext_darwinbg = value; 1271 requested.t_ext_gpu_deny = value; 1272 } else { 1273 requested.int_darwinbg = value; 1274 requested.t_int_gpu_deny = value; 1275 } 1276 break; 1277 1278 /* Category: INTERNAL, task only */ 1279 1280 case TASK_POLICY_DARWIN_BG_IOPOL: 1281 assert(on_task && category == TASK_POLICY_INTERNAL); 1282 proc_iopol_to_tier(value, &tier, &passive); 1283 requested.bg_iotier = tier; 1284 break; 1285 1286 /* Category: ATTRIBUTE, task only */ 1287 1288 case TASK_POLICY_TAL: 1289 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1290 requested.t_tal_enabled = value; 1291 break; 1292 1293 case TASK_POLICY_BOOST: 1294 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1295 requested.t_boosted = value; 1296 break; 1297 1298 case TASK_POLICY_ROLE: 1299 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1300 requested.t_role = value; 1301 break; 1302 1303 case TASK_POLICY_TERMINATED: 1304 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1305 requested.terminated = value; 1306 break; 1307 1308 /* Category: ATTRIBUTE, thread only */ 1309 1310 case TASK_POLICY_PIDBIND_BG: 1311 assert(!on_task && category == TASK_POLICY_ATTRIBUTE); 1312 requested.th_pidbind_bg = value; 1313 break; 1314 1315 case TASK_POLICY_WORKQ_BG: 1316 assert(!on_task && category == TASK_POLICY_ATTRIBUTE); 1317 requested.th_workq_bg = value; 1318 break; 1319 1320 default: 1321 panic("unknown task policy: %d %d %d", category, flavor, value); 1322 break; 1323 } 1324 1325 if (on_task) 1326 task->requested_policy = requested; 1327 else 1328 thread->requested_policy = requested; 1329} 1330 1331 1332/* 1333 * Gets what you set. Effective values may be different. 1334 */ 1335int 1336proc_get_task_policy(task_t task, 1337 thread_t thread, 1338 int category, 1339 int flavor) 1340{ 1341 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; 1342 1343 int value = 0; 1344 1345 task_lock(task); 1346 1347 struct task_requested_policy requested = 1348 (on_task) ? task->requested_policy : thread->requested_policy; 1349 1350 switch (flavor) { 1351 case TASK_POLICY_DARWIN_BG: 1352 if (category == TASK_POLICY_EXTERNAL) 1353 value = requested.ext_darwinbg; 1354 else 1355 value = requested.int_darwinbg; 1356 break; 1357 case TASK_POLICY_IOPOL: 1358 if (category == TASK_POLICY_EXTERNAL) 1359 value = proc_tier_to_iopol(requested.ext_iotier, 1360 requested.ext_iopassive); 1361 else 1362 value = proc_tier_to_iopol(requested.int_iotier, 1363 requested.int_iopassive); 1364 break; 1365 case TASK_POLICY_IO: 1366 if (category == TASK_POLICY_EXTERNAL) 1367 value = requested.ext_iotier; 1368 else 1369 value = requested.int_iotier; 1370 break; 1371 case TASK_POLICY_PASSIVE_IO: 1372 if (category == TASK_POLICY_EXTERNAL) 1373 value = requested.ext_iopassive; 1374 else 1375 value = requested.int_iopassive; 1376 break; 1377 case TASK_POLICY_GPU_DENY: 1378 assert(on_task); 1379 if (category == TASK_POLICY_EXTERNAL) 1380 value = requested.t_ext_gpu_deny; 1381 else 1382 value = requested.t_int_gpu_deny; 1383 break; 1384 case TASK_POLICY_DARWIN_BG_IOPOL: 1385 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1386 value = proc_tier_to_iopol(requested.bg_iotier, 0); 1387 break; 1388 case TASK_POLICY_ROLE: 1389 assert(on_task && category == TASK_POLICY_ATTRIBUTE); 1390 value = requested.t_role; 1391 break; 1392 default: 1393 panic("unknown policy_flavor %d", flavor); 1394 break; 1395 } 1396 1397 task_unlock(task); 1398 1399 return value; 1400} 1401 1402 1403/* 1404 * Functions for querying effective state for relevant subsystems 1405 * ONLY the relevant subsystem should query these. 1406 * NEVER take a value from one of the 'effective' functions and stuff it into a setter. 1407 */ 1408 1409int 1410proc_get_effective_task_policy(task_t task, int flavor) 1411{ 1412 return proc_get_effective_policy(task, THREAD_NULL, flavor); 1413} 1414 1415int 1416proc_get_effective_thread_policy(thread_t thread, int flavor) 1417{ 1418 return proc_get_effective_policy(thread->task, thread, flavor); 1419} 1420 1421/* 1422 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. 1423 * 1424 * NOTE: This accessor does not take the task lock. 1425 * Notifications of state updates need to be externally synchronized with state queries. 1426 * This routine *MUST* remain interrupt safe, as it is potentially invoked 1427 * within the context of a timer interrupt. 1428 */ 1429static int 1430proc_get_effective_policy(task_t task, 1431 thread_t thread, 1432 int flavor) 1433{ 1434 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; 1435 int value = 0; 1436 1437 switch (flavor) { 1438 case TASK_POLICY_DARWIN_BG: 1439 /* 1440 * This backs the KPI call proc_pidbackgrounded to find 1441 * out if a pid is backgrounded, 1442 * as well as proc_get_effective_thread_policy. 1443 * Its main use is within the timer layer, as well as 1444 * prioritizing requests to the graphics system. 1445 * Returns 1 for background mode, 0 for normal mode 1446 */ 1447 if (on_task) 1448 value = task->effective_policy.darwinbg; 1449 else 1450 value = (task->effective_policy.darwinbg || 1451 thread->effective_policy.darwinbg) ? 1 : 0; 1452 break; 1453 case TASK_POLICY_IO: 1454 /* 1455 * The I/O system calls here to find out what throttling tier to apply to an operation. 1456 * Returns THROTTLE_LEVEL_* values 1457 */ 1458 if (on_task) 1459 value = task->effective_policy.io_tier; 1460 else { 1461 value = MAX(task->effective_policy.io_tier, 1462 thread->effective_policy.io_tier); 1463 if (thread->iotier_override != THROTTLE_LEVEL_NONE) 1464 value = MIN(value, thread->iotier_override); 1465 } 1466 break; 1467 case TASK_POLICY_PASSIVE_IO: 1468 /* 1469 * The I/O system calls here to find out whether an operation should be passive. 1470 * (i.e. not cause operations with lower throttle tiers to be throttled) 1471 * Returns 1 for passive mode, 0 for normal mode 1472 */ 1473 if (on_task) 1474 value = task->effective_policy.io_passive; 1475 else 1476 value = (task->effective_policy.io_passive || 1477 thread->effective_policy.io_passive) ? 1 : 0; 1478 break; 1479 case TASK_POLICY_NEW_SOCKETS_BG: 1480 /* 1481 * socreate() calls this to determine if it should mark a new socket as background 1482 * Returns 1 for background mode, 0 for normal mode 1483 */ 1484 if (on_task) 1485 value = task->effective_policy.new_sockets_bg; 1486 else 1487 value = (task->effective_policy.new_sockets_bg || 1488 thread->effective_policy.new_sockets_bg) ? 1 : 0; 1489 break; 1490 case TASK_POLICY_LOWPRI_CPU: 1491 /* 1492 * Returns 1 for low priority cpu mode, 0 for normal mode 1493 */ 1494 if (on_task) 1495 value = task->effective_policy.lowpri_cpu; 1496 else 1497 value = (task->effective_policy.lowpri_cpu || 1498 thread->effective_policy.lowpri_cpu) ? 1 : 0; 1499 break; 1500 case TASK_POLICY_SUPPRESSED_CPU: 1501 /* 1502 * Returns 1 for suppressed cpu mode, 0 for normal mode 1503 */ 1504 assert(on_task); 1505 value = task->effective_policy.t_suppressed_cpu; 1506 break; 1507 case TASK_POLICY_LATENCY_QOS: 1508 /* 1509 * timer arming calls into here to find out the timer coalescing level 1510 * Returns a QoS tier (0-6) 1511 */ 1512 assert(on_task); 1513 value = task->effective_policy.t_latency_qos; 1514 break; 1515 case TASK_POLICY_THROUGH_QOS: 1516 /* 1517 * Returns a QoS tier (0-6) 1518 */ 1519 assert(on_task); 1520 value = task->effective_policy.t_through_qos; 1521 break; 1522 case TASK_POLICY_GPU_DENY: 1523 /* 1524 * This is where IOKit calls into task_policy to find out whether 1525 * it should allow access to the GPU. 1526 * Returns 1 for NOT allowed, returns 0 for allowed 1527 */ 1528 assert(on_task); 1529 value = task->effective_policy.t_gpu_deny; 1530 break; 1531 case TASK_POLICY_ROLE: 1532 assert(on_task); 1533 value = task->effective_policy.t_role; 1534 break; 1535 case TASK_POLICY_WATCHERS_BG: 1536 assert(on_task); 1537 value = task->effective_policy.t_watchers_bg; 1538 break; 1539 default: 1540 panic("unknown policy_flavor %d", flavor); 1541 break; 1542 } 1543 1544 return value; 1545} 1546 1547/* 1548 * Convert from IOPOL_* values to throttle tiers. 1549 * 1550 * TODO: Can this be made more compact, like an array lookup 1551 * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future 1552 */ 1553 1554static void 1555proc_iopol_to_tier(int iopolicy, int *tier, int *passive) 1556{ 1557 *passive = 0; 1558 *tier = 0; 1559 switch (iopolicy) { 1560 case IOPOL_IMPORTANT: 1561 *tier = THROTTLE_LEVEL_TIER0; 1562 break; 1563 case IOPOL_PASSIVE: 1564 *tier = THROTTLE_LEVEL_TIER0; 1565 *passive = 1; 1566 break; 1567 case IOPOL_STANDARD: 1568 *tier = THROTTLE_LEVEL_TIER1; 1569 break; 1570 case IOPOL_UTILITY: 1571 *tier = THROTTLE_LEVEL_TIER2; 1572 break; 1573 case IOPOL_THROTTLE: 1574 *tier = THROTTLE_LEVEL_TIER3; 1575 break; 1576 default: 1577 panic("unknown I/O policy %d", iopolicy); 1578 break; 1579 } 1580} 1581 1582static int 1583proc_tier_to_iopol(int tier, int passive) 1584{ 1585 if (passive == 1) { 1586 switch (tier) { 1587 case THROTTLE_LEVEL_TIER0: 1588 return IOPOL_PASSIVE; 1589 break; 1590 default: 1591 panic("unknown passive tier %d", tier); 1592 return IOPOL_DEFAULT; 1593 break; 1594 } 1595 } else { 1596 switch (tier) { 1597 case THROTTLE_LEVEL_NONE: 1598 return IOPOL_DEFAULT; 1599 break; 1600 case THROTTLE_LEVEL_TIER0: 1601 return IOPOL_IMPORTANT; 1602 break; 1603 case THROTTLE_LEVEL_TIER1: 1604 return IOPOL_STANDARD; 1605 break; 1606 case THROTTLE_LEVEL_TIER2: 1607 return IOPOL_UTILITY; 1608 break; 1609 case THROTTLE_LEVEL_TIER3: 1610 return IOPOL_THROTTLE; 1611 break; 1612 default: 1613 panic("unknown tier %d", tier); 1614 return IOPOL_DEFAULT; 1615 break; 1616 } 1617 } 1618} 1619 1620/* apply internal backgrounding for workqueue threads */ 1621int 1622proc_apply_workq_bgthreadpolicy(thread_t thread) 1623{ 1624 if (thread == THREAD_NULL) 1625 return ESRCH; 1626 1627 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, 1628 TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE); 1629 1630 return(0); 1631} 1632 1633/* 1634 * remove internal backgrounding for workqueue threads 1635 * does NOT go find sockets created while BG and unbackground them 1636 */ 1637int 1638proc_restore_workq_bgthreadpolicy(thread_t thread) 1639{ 1640 if (thread == THREAD_NULL) 1641 return ESRCH; 1642 1643 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, 1644 TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE); 1645 1646 return(0); 1647} 1648 1649/* here for temporary compatibility */ 1650int 1651proc_setthread_saved_importance(__unused thread_t thread, __unused int importance) 1652{ 1653 return(0); 1654} 1655 1656/* 1657 * Set an override on the thread which is consulted with a 1658 * higher priority than the task/thread policy. This should 1659 * only be set for temporary grants until the thread 1660 * returns to the userspace boundary 1661 * 1662 * We use atomic operations to swap in the override, with 1663 * the assumption that the thread itself can 1664 * read the override and clear it on return to userspace. 1665 * 1666 * No locking is performed, since it is acceptable to see 1667 * a stale override for one loop through throttle_lowpri_io(). 1668 * However a thread reference must be held on the thread. 1669 */ 1670 1671void set_thread_iotier_override(thread_t thread, int policy) 1672{ 1673 int current_override; 1674 1675 /* Let most aggressive I/O policy win until user boundary */ 1676 do { 1677 current_override = thread->iotier_override; 1678 1679 if (current_override != THROTTLE_LEVEL_NONE) 1680 policy = MIN(current_override, policy); 1681 1682 if (current_override == policy) { 1683 /* no effective change */ 1684 return; 1685 } 1686 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override)); 1687 1688 /* 1689 * Since the thread may be currently throttled, 1690 * re-evaluate tiers and potentially break out 1691 * of an msleep 1692 */ 1693 rethrottle_thread(thread->uthread); 1694} 1695 1696/* 1697 * Called at process exec to initialize the apptype of a process 1698 */ 1699void 1700proc_set_task_apptype(task_t task, int apptype) 1701{ 1702 task_lock(task); 1703 1704 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1705 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, 1706 proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 1707 apptype, 0); 1708 1709 switch (apptype) { 1710 case TASK_APPTYPE_APP_TAL: 1711 /* TAL starts off enabled by default */ 1712 task->requested_policy.t_tal_enabled = 1; 1713 /* fall through */ 1714 1715 case TASK_APPTYPE_APP_DEFAULT: 1716 case TASK_APPTYPE_DAEMON_INTERACTIVE: 1717 task->requested_policy.t_apptype = apptype; 1718 1719 task_importance_mark_donor(task, TRUE); 1720 /* Apps (and interactive daemons) are boost recievers on desktop for suppression behaviors */ 1721 task_importance_mark_receiver(task, TRUE); 1722 break; 1723 1724 case TASK_APPTYPE_DAEMON_STANDARD: 1725 task->requested_policy.t_apptype = apptype; 1726 1727 task_importance_mark_donor(task, TRUE); 1728 task_importance_mark_receiver(task, FALSE); 1729 break; 1730 1731 case TASK_APPTYPE_DAEMON_ADAPTIVE: 1732 task->requested_policy.t_apptype = apptype; 1733 1734 task_importance_mark_donor(task, FALSE); 1735 task_importance_mark_receiver(task, TRUE); 1736 break; 1737 1738 case TASK_APPTYPE_DAEMON_BACKGROUND: 1739 task->requested_policy.t_apptype = apptype; 1740 1741 task_importance_mark_donor(task, FALSE); 1742 task_importance_mark_receiver(task, FALSE); 1743 break; 1744 1745 default: 1746 panic("invalid apptype %d", apptype); 1747 break; 1748 } 1749 1750 task_policy_update_locked(task, THREAD_NULL); 1751 1752 task_unlock(task); 1753 1754 task_policy_update_complete_unlocked(task, THREAD_NULL); 1755 1756 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 1757 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, 1758 proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 1759 task->imp_receiver, 0); 1760} 1761 1762/* for process_policy to check before attempting to set */ 1763boolean_t 1764proc_task_is_tal(task_t task) 1765{ 1766 return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE; 1767} 1768 1769/* for telemetry */ 1770integer_t 1771task_grab_latency_qos(task_t task) 1772{ 1773 return task_qos_latency_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); 1774} 1775 1776/* update the darwin background action state in the flags field for libproc */ 1777int 1778proc_get_darwinbgstate(task_t task, uint32_t * flagsp) 1779{ 1780 if (task->requested_policy.ext_darwinbg) 1781 *flagsp |= PROC_FLAG_EXT_DARWINBG; 1782 1783 if (task->requested_policy.int_darwinbg) 1784 *flagsp |= PROC_FLAG_DARWINBG; 1785 1786 1787 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) 1788 *flagsp |= PROC_FLAG_ADAPTIVE; 1789 1790 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1) 1791 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT; 1792 1793 if (task->imp_donor) 1794 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR; 1795 1796 if (task->effective_policy.t_sup_active) 1797 *flagsp |= PROC_FLAG_SUPPRESSED; 1798 1799 return(0); 1800} 1801 1802/* All per-thread state is in the first 32-bits of the bitfield */ 1803void 1804proc_get_thread_policy(thread_t thread, thread_policy_state_t info) 1805{ 1806 task_t task = thread->task; 1807 task_lock(task); 1808 info->requested = (integer_t)task_requested_bitfield(task, thread); 1809 info->effective = (integer_t)task_effective_bitfield(task, thread); 1810 info->pending = (integer_t)task_pending_bitfield(task, thread); 1811 task_unlock(task); 1812} 1813 1814 1815/* dump requested for tracepoint */ 1816static uintptr_t 1817trequested(task_t task, thread_t thread) 1818{ 1819 return (uintptr_t) task_requested_bitfield(task, thread); 1820} 1821 1822/* dump effective for tracepoint */ 1823static uintptr_t 1824teffective(task_t task, thread_t thread) 1825{ 1826 return (uintptr_t) task_effective_bitfield(task, thread); 1827} 1828 1829/* dump pending for tracepoint */ 1830static uintptr_t 1831tpending(task_t task, thread_t thread) 1832{ 1833 return (uintptr_t) task_pending_bitfield(task, thread); 1834} 1835 1836uint64_t 1837task_requested_bitfield(task_t task, thread_t thread) 1838{ 1839 uint64_t bits = 0; 1840 struct task_requested_policy requested = 1841 (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy; 1842 1843 bits |= (requested.int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); 1844 bits |= (requested.ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); 1845 bits |= (requested.int_iotier ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); 1846 bits |= (requested.ext_iotier ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); 1847 bits |= (requested.int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); 1848 bits |= (requested.ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); 1849 bits |= (requested.bg_iotier ? (((uint64_t)requested.bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0); 1850 bits |= (requested.terminated ? POLICY_REQ_TERMINATED : 0); 1851 1852 bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); 1853 bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0); 1854 1855 bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0); 1856 bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0); 1857 bits |= (requested.t_int_gpu_deny ? POLICY_REQ_INT_GPU_DENY : 0); 1858 bits |= (requested.t_ext_gpu_deny ? POLICY_REQ_EXT_GPU_DENY : 0); 1859 bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0); 1860 bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0); 1861 1862 bits |= (requested.t_sup_active ? POLICY_REQ_SUP_ACTIVE : 0); 1863 bits |= (requested.t_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0); 1864 bits |= (requested.t_sup_cpu ? POLICY_REQ_SUP_CPU : 0); 1865 bits |= (requested.t_sup_timer ? (((uint64_t)requested.t_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0); 1866 bits |= (requested.t_sup_throughput ? (((uint64_t)requested.t_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0); 1867 bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0); 1868 bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0); 1869 bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0); 1870 bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); 1871 bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0); 1872 bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); 1873 bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0); 1874 1875 return bits; 1876} 1877 1878uint64_t 1879task_effective_bitfield(task_t task, thread_t thread) 1880{ 1881 uint64_t bits = 0; 1882 struct task_effective_policy effective = 1883 (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy; 1884 1885 bits |= (effective.io_tier ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); 1886 bits |= (effective.io_passive ? POLICY_EFF_IO_PASSIVE : 0); 1887 bits |= (effective.darwinbg ? POLICY_EFF_DARWIN_BG : 0); 1888 bits |= (effective.lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0); 1889 bits |= (effective.terminated ? POLICY_EFF_TERMINATED : 0); 1890 bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); 1891 bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); 1892 bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0); 1893 1894 bits |= (effective.t_gpu_deny ? POLICY_EFF_GPU_DENY : 0); 1895 bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0); 1896 bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0); 1897 bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0); 1898 bits |= (effective.t_sup_active ? POLICY_EFF_SUP_ACTIVE : 0); 1899 bits |= (effective.t_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0); 1900 bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0); 1901 bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); 1902 bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); 1903 1904 return bits; 1905} 1906 1907uint64_t 1908task_pending_bitfield(task_t task, thread_t thread) 1909{ 1910 uint64_t bits = 0; 1911 struct task_pended_policy pended = 1912 (thread == THREAD_NULL) ? task->pended_policy : thread->pended_policy; 1913 1914 bits |= (pended.t_updating_policy ? POLICY_PEND_UPDATING : 0); 1915 bits |= (pended.update_sockets ? POLICY_PEND_SOCKETS : 0); 1916 1917 bits |= (pended.t_update_timers ? POLICY_PEND_TIMERS : 0); 1918 bits |= (pended.t_update_watchers ? POLICY_PEND_WATCHERS : 0); 1919 1920 return bits; 1921} 1922 1923 1924/* 1925 * Resource usage and CPU related routines 1926 */ 1927 1928int 1929proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep) 1930{ 1931 1932 int error = 0; 1933 int scope; 1934 1935 task_lock(task); 1936 1937 1938 error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope); 1939 task_unlock(task); 1940 1941 /* 1942 * Reverse-map from CPU resource limit scopes back to policies (see comment below). 1943 */ 1944 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { 1945 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC; 1946 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { 1947 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE; 1948 } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) { 1949 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; 1950 } 1951 1952 return(error); 1953} 1954 1955/* 1956 * Configure the default CPU usage monitor parameters. 1957 * 1958 * For tasks which have this mechanism activated: if any thread in the 1959 * process consumes more CPU than this, an EXC_RESOURCE exception will be generated. 1960 */ 1961void 1962proc_init_cpumon_params(void) 1963{ 1964 if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage, 1965 sizeof (proc_max_cpumon_percentage))) { 1966 proc_max_cpumon_percentage = DEFAULT_CPUMON_PERCENTAGE; 1967 } 1968 1969 if (proc_max_cpumon_percentage > 100) { 1970 proc_max_cpumon_percentage = 100; 1971 } 1972 1973 /* The interval should be specified in seconds. */ 1974 if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval, 1975 sizeof (proc_max_cpumon_interval))) { 1976 proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL; 1977 } 1978 1979 proc_max_cpumon_interval *= NSEC_PER_SEC; 1980} 1981 1982/* 1983 * Currently supported configurations for CPU limits. 1984 * 1985 * Policy | Deadline-based CPU limit | Percentage-based CPU limit 1986 * -------------------------------------+--------------------------+------------------------------ 1987 * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only 1988 * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP 1989 * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP 1990 * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP 1991 * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only 1992 * 1993 * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed 1994 * after the specified amount of wallclock time has elapsed. 1995 * 1996 * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time 1997 * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an 1998 * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads 1999 * in the task are added together), or by any one thread in the task (so-called "per-thread" scope). 2000 * 2001 * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them 2002 * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action 2003 * after I have used some amount of CPU time; this is different than the recurring percentage/interval model) 2004 * but the potential consumer of the API at the time was insisting on wallclock time instead. 2005 * 2006 * Currently, requesting notification via an exception is the only way to get per-thread scope for a 2007 * CPU limit. All other types of notifications force task-wide scope for the limit. 2008 */ 2009int 2010proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline, 2011 int cpumon_entitled) 2012{ 2013 int error = 0; 2014 int scope; 2015 2016 /* 2017 * Enforce the matrix of supported configurations for policy, percentage, and deadline. 2018 */ 2019 switch (policy) { 2020 // If no policy is explicitly given, the default is to throttle. 2021 case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE: 2022 case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE: 2023 if (deadline != 0) 2024 return (ENOTSUP); 2025 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; 2026 break; 2027 case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND: 2028 case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE: 2029 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ: 2030 if (percentage != 0) 2031 return (ENOTSUP); 2032 scope = TASK_RUSECPU_FLAGS_DEADLINE; 2033 break; 2034 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC: 2035 if (deadline != 0) 2036 return (ENOTSUP); 2037 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; 2038#ifdef CONFIG_NOMONITORS 2039 return (error); 2040#endif /* CONFIG_NOMONITORS */ 2041 break; 2042 default: 2043 return (EINVAL); 2044 } 2045 2046 task_lock(task); 2047 if (task != current_task()) { 2048 task->policy_ru_cpu_ext = policy; 2049 } else { 2050 task->policy_ru_cpu = policy; 2051 } 2052 error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled); 2053 task_unlock(task); 2054 return(error); 2055} 2056 2057int 2058proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled) 2059{ 2060 int error = 0; 2061 int action; 2062 void * bsdinfo = NULL; 2063 2064 task_lock(task); 2065 if (task != current_task()) { 2066 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; 2067 } else { 2068 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; 2069 } 2070 2071 error = task_clear_cpuusage_locked(task, cpumon_entitled); 2072 if (error != 0) 2073 goto out; 2074 2075 action = task->applied_ru_cpu; 2076 if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { 2077 /* reset action */ 2078 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; 2079 } 2080 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { 2081 bsdinfo = task->bsd_info; 2082 task_unlock(task); 2083 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); 2084 goto out1; 2085 } 2086 2087out: 2088 task_unlock(task); 2089out1: 2090 return(error); 2091 2092} 2093 2094/* used to apply resource limit related actions */ 2095static int 2096task_apply_resource_actions(task_t task, int type) 2097{ 2098 int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; 2099 void * bsdinfo = NULL; 2100 2101 switch (type) { 2102 case TASK_POLICY_CPU_RESOURCE_USAGE: 2103 break; 2104 case TASK_POLICY_WIREDMEM_RESOURCE_USAGE: 2105 case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE: 2106 case TASK_POLICY_DISK_RESOURCE_USAGE: 2107 case TASK_POLICY_NETWORK_RESOURCE_USAGE: 2108 case TASK_POLICY_POWER_RESOURCE_USAGE: 2109 return(0); 2110 2111 default: 2112 return(1); 2113 }; 2114 2115 /* only cpu actions for now */ 2116 task_lock(task); 2117 2118 if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { 2119 /* apply action */ 2120 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext; 2121 action = task->applied_ru_cpu_ext; 2122 } else { 2123 action = task->applied_ru_cpu_ext; 2124 } 2125 2126 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { 2127 bsdinfo = task->bsd_info; 2128 task_unlock(task); 2129 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); 2130 } else 2131 task_unlock(task); 2132 2133 return(0); 2134} 2135 2136/* 2137 * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API 2138 * only allows for one at a time. This means that if there is a per-thread limit active, the other 2139 * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest 2140 * to the caller, and prefer that, but there's no need for that at the moment. 2141 */ 2142int 2143task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope) 2144{ 2145 *percentagep = 0; 2146 *intervalp = 0; 2147 *deadlinep = 0; 2148 2149 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) { 2150 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; 2151 *percentagep = task->rusage_cpu_perthr_percentage; 2152 *intervalp = task->rusage_cpu_perthr_interval; 2153 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) { 2154 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; 2155 *percentagep = task->rusage_cpu_percentage; 2156 *intervalp = task->rusage_cpu_interval; 2157 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) { 2158 *scope = TASK_RUSECPU_FLAGS_DEADLINE; 2159 *deadlinep = task->rusage_cpu_deadline; 2160 } else { 2161 *scope = 0; 2162 } 2163 2164 return(0); 2165} 2166 2167/* 2168 * Disable the CPU usage monitor for the task. Return value indicates 2169 * if the mechanism was actually enabled. 2170 */ 2171int 2172task_disable_cpumon(task_t task) { 2173 thread_t thread; 2174 2175 task_lock_assert_owned(task); 2176 2177 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { 2178 return (KERN_INVALID_ARGUMENT); 2179 } 2180 2181#if CONFIG_TELEMETRY 2182 /* 2183 * Disable task-wide telemetry if it was ever enabled by the CPU usage 2184 * monitor's warning zone. 2185 */ 2186 telemetry_task_ctl_locked(current_task(), TF_CPUMON_WARNING, 0); 2187#endif 2188 2189 /* 2190 * Disable the monitor for the task, and propagate that change to each thread. 2191 */ 2192 task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON); 2193 queue_iterate(&task->threads, thread, thread_t, task_threads) { 2194 set_astledger(thread); 2195 } 2196 task->rusage_cpu_perthr_percentage = 0; 2197 task->rusage_cpu_perthr_interval = 0; 2198 2199 return (KERN_SUCCESS); 2200} 2201 2202int 2203task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled) 2204{ 2205 thread_t thread; 2206 uint64_t abstime = 0; 2207 uint64_t limittime = 0; 2208 2209 lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED); 2210 2211 /* By default, refill once per second */ 2212 if (interval == 0) 2213 interval = NSEC_PER_SEC; 2214 2215 if (percentage != 0) { 2216 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { 2217 boolean_t warn = FALSE; 2218 2219 /* 2220 * A per-thread CPU limit on a task generates an exception 2221 * (LEDGER_ACTION_EXCEPTION) if any one thread in the task 2222 * exceeds the limit. 2223 */ 2224 2225 if (percentage == TASK_POLICY_CPUMON_DISABLE) { 2226 if (cpumon_entitled) { 2227 task_disable_cpumon(task); 2228 return (0); 2229 } 2230 2231 /* 2232 * This task wishes to disable the CPU usage monitor, but it's 2233 * missing the required entitlement: 2234 * com.apple.private.kernel.override-cpumon 2235 * 2236 * Instead, treat this as a request to reset its params 2237 * back to the defaults. 2238 */ 2239 warn = TRUE; 2240 percentage = TASK_POLICY_CPUMON_DEFAULTS; 2241 } 2242 2243 if (percentage == TASK_POLICY_CPUMON_DEFAULTS) { 2244 percentage = proc_max_cpumon_percentage; 2245 interval = proc_max_cpumon_interval; 2246 } 2247 2248 if (percentage > 100) { 2249 percentage = 100; 2250 } 2251 2252 /* 2253 * Passing in an interval of -1 means either: 2254 * - Leave the interval as-is, if there's already a per-thread 2255 * limit configured 2256 * - Use the system default. 2257 */ 2258 if (interval == -1ULL) { 2259 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { 2260 interval = task->rusage_cpu_perthr_interval; 2261 } else { 2262 interval = proc_max_cpumon_interval; 2263 } 2264 } 2265 2266 /* 2267 * Enforce global caps on CPU usage monitor here if the process is not 2268 * entitled to escape the global caps. 2269 */ 2270 if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) { 2271 warn = TRUE; 2272 percentage = proc_max_cpumon_percentage; 2273 } 2274 2275 if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) { 2276 warn = TRUE; 2277 interval = proc_max_cpumon_interval; 2278 } 2279 2280 if (warn) { 2281 int pid = 0; 2282 char *procname = (char *)"unknown"; 2283 2284#ifdef MACH_BSD 2285 pid = proc_selfpid(); 2286 if (current_task()->bsd_info != NULL) { 2287 procname = proc_name_address(current_task()->bsd_info); 2288 } 2289#endif 2290 2291 printf("process %s[%d] denied attempt to escape CPU monitor" 2292 " (missing required entitlement).\n", procname, pid); 2293 } 2294 2295 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT; 2296 task->rusage_cpu_perthr_percentage = percentage; 2297 task->rusage_cpu_perthr_interval = interval; 2298 queue_iterate(&task->threads, thread, thread_t, task_threads) { 2299 set_astledger(thread); 2300 } 2301 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { 2302 /* 2303 * Currently, a proc-wide CPU limit always blocks if the limit is 2304 * exceeded (LEDGER_ACTION_BLOCK). 2305 */ 2306 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT; 2307 task->rusage_cpu_percentage = percentage; 2308 task->rusage_cpu_interval = interval; 2309 2310 limittime = (interval * percentage) / 100; 2311 nanoseconds_to_absolutetime(limittime, &abstime); 2312 2313 ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0); 2314 ledger_set_period(task->ledger, task_ledgers.cpu_time, interval); 2315 ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK); 2316 } 2317 } 2318 2319 if (deadline != 0) { 2320 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE); 2321 2322 /* if already in use, cancel and wait for it to cleanout */ 2323 if (task->rusage_cpu_callt != NULL) { 2324 task_unlock(task); 2325 thread_call_cancel_wait(task->rusage_cpu_callt); 2326 task_lock(task); 2327 } 2328 if (task->rusage_cpu_callt == NULL) { 2329 task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL); 2330 } 2331 /* setup callout */ 2332 if (task->rusage_cpu_callt != 0) { 2333 uint64_t save_abstime = 0; 2334 2335 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE; 2336 task->rusage_cpu_deadline = deadline; 2337 2338 nanoseconds_to_absolutetime(deadline, &abstime); 2339 save_abstime = abstime; 2340 clock_absolutetime_interval_to_deadline(save_abstime, &abstime); 2341 thread_call_enter_delayed(task->rusage_cpu_callt, abstime); 2342 } 2343 } 2344 2345 return(0); 2346} 2347 2348int 2349task_clear_cpuusage(task_t task, int cpumon_entitled) 2350{ 2351 int retval = 0; 2352 2353 task_lock(task); 2354 retval = task_clear_cpuusage_locked(task, cpumon_entitled); 2355 task_unlock(task); 2356 2357 return(retval); 2358} 2359 2360int 2361task_clear_cpuusage_locked(task_t task, int cpumon_entitled) 2362{ 2363 thread_call_t savecallt; 2364 2365 /* cancel percentage handling if set */ 2366 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) { 2367 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT; 2368 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0); 2369 task->rusage_cpu_percentage = 0; 2370 task->rusage_cpu_interval = 0; 2371 } 2372 2373 /* 2374 * Disable the CPU usage monitor. 2375 */ 2376 if (cpumon_entitled) { 2377 task_disable_cpumon(task); 2378 } 2379 2380 /* cancel deadline handling if set */ 2381 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) { 2382 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE; 2383 if (task->rusage_cpu_callt != 0) { 2384 savecallt = task->rusage_cpu_callt; 2385 task->rusage_cpu_callt = NULL; 2386 task->rusage_cpu_deadline = 0; 2387 task_unlock(task); 2388 thread_call_cancel_wait(savecallt); 2389 thread_call_free(savecallt); 2390 task_lock(task); 2391 } 2392 } 2393 return(0); 2394} 2395 2396/* called by ledger unit to enforce action due to resource usage criteria being met */ 2397void 2398task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1) 2399{ 2400 task_t task = (task_t)param0; 2401 (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE); 2402 return; 2403} 2404 2405 2406/* 2407 * Routines for taskwatch and pidbind 2408 */ 2409 2410 2411/* 2412 * Routines for importance donation/inheritance/boosting 2413 */ 2414 2415void 2416task_importance_mark_donor(task_t task, boolean_t donating) 2417{ 2418#if IMPORTANCE_INHERITANCE 2419 task->imp_donor = (donating ? 1 : 0); 2420#endif /* IMPORTANCE_INHERITANCE */ 2421} 2422 2423void 2424task_importance_mark_receiver(task_t task, boolean_t receiving) 2425{ 2426#if IMPORTANCE_INHERITANCE 2427 if (receiving) { 2428 assert(task->task_imp_assertcnt == 0); 2429 task->imp_receiver = 1; /* task can receive importance boost */ 2430 task->task_imp_assertcnt = 0; 2431 task->task_imp_externcnt = 0; 2432 } else { 2433 if (task->task_imp_assertcnt != 0 || task->task_imp_externcnt != 0) 2434 panic("disabling imp_receiver on task with pending boosts!"); 2435 2436 task->imp_receiver = 0; 2437 task->task_imp_assertcnt = 0; 2438 task->task_imp_externcnt = 0; 2439 } 2440#endif /* IMPORTANCE_INHERITANCE */ 2441} 2442 2443 2444#if IMPORTANCE_INHERITANCE 2445 2446static void 2447task_update_boost_locked(task_t task, boolean_t boost_active) 2448{ 2449#if IMPORTANCE_DEBUG 2450 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START), 2451 proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 0, 0); 2452#endif 2453 2454 /* assert(boost_active ? task->requested_policy.t_boosted == 0 : task->requested_policy.t_boosted == 1); */ 2455 2456 proc_set_task_policy_locked(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BOOST, boost_active); 2457 2458 task_policy_update_locked(task, THREAD_NULL); 2459 2460#if IMPORTANCE_DEBUG 2461 if (boost_active == TRUE){ 2462 DTRACE_BOOST2(boost, task_t, task, int, audit_token_pid_from_task(task)); 2463 } else { 2464 DTRACE_BOOST2(unboost, task_t, task, int, audit_token_pid_from_task(task)); 2465 } 2466 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END), 2467 proc_selfpid(), audit_token_pid_from_task(task), 2468 trequested(task, THREAD_NULL), tpending(task, THREAD_NULL), 0); 2469#endif 2470} 2471 2472/* 2473 * Check if this task should donate importance. 2474 * 2475 * May be called without taking the task lock. In that case, donor status can change 2476 * so you must check only once for each donation event. 2477 */ 2478boolean_t 2479task_is_importance_donor(task_t task) 2480{ 2481 return (task->imp_donor == 1 || task->task_imp_assertcnt > 0) ? TRUE : FALSE; 2482} 2483 2484/* 2485 * This routine may be called without holding task lock 2486 * since the value of imp_receiver can never be unset. 2487 */ 2488boolean_t 2489task_is_importance_receiver(task_t task) 2490{ 2491 return (task->imp_receiver) ? TRUE : FALSE; 2492} 2493 2494/* 2495 * External importance assertions are managed by the process in userspace 2496 * Internal importance assertions are the responsibility of the kernel 2497 * Assertions are changed from internal to external via task_importance_externalize_assertion 2498 */ 2499 2500int 2501task_importance_hold_internal_assertion(task_t target_task, uint32_t count) 2502{ 2503 int rval = 0; 2504 2505 task_lock(target_task); 2506 rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); 2507 task_unlock(target_task); 2508 2509 task_policy_update_complete_unlocked(target_task, THREAD_NULL); 2510 2511 return(rval); 2512} 2513 2514int 2515task_importance_hold_external_assertion(task_t target_task, uint32_t count) 2516{ 2517 int rval = 0; 2518 2519 task_lock(target_task); 2520 rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); 2521 task_unlock(target_task); 2522 2523 task_policy_update_complete_unlocked(target_task, THREAD_NULL); 2524 2525 return(rval); 2526} 2527 2528int 2529task_importance_drop_internal_assertion(task_t target_task, uint32_t count) 2530{ 2531 int rval = 0; 2532 2533 task_lock(target_task); 2534 rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); 2535 task_unlock(target_task); 2536 2537 task_policy_update_complete_unlocked(target_task, THREAD_NULL); 2538 2539 return(rval); 2540} 2541 2542int 2543task_importance_drop_external_assertion(task_t target_task, uint32_t count) 2544{ 2545 int rval = 0; 2546 2547 task_lock(target_task); 2548 rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); 2549 task_unlock(target_task); 2550 2551 task_policy_update_complete_unlocked(target_task, THREAD_NULL); 2552 2553 return(rval); 2554} 2555 2556/* 2557 * Returns EOVERFLOW if an external assertion is taken when not holding an external boost. 2558 */ 2559static int 2560task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count) 2561{ 2562 boolean_t apply_boost = FALSE; 2563 int ret = 0; 2564 2565 assert(target_task->imp_receiver != 0); 2566 2567#if IMPORTANCE_DEBUG 2568 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_START, 2569 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2570#endif 2571 2572 /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ 2573 2574 if (external == TASK_POLICY_EXTERNAL) { 2575 if (target_task->task_imp_externcnt == 0) { 2576 /* Only allowed to take a new boost assertion when holding an external boost */ 2577 printf("BUG in process %s[%d]: it attempted to acquire a new boost assertion without holding an existing external assertion. " 2578 "(%d total, %d external)\n", 2579 proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), 2580 target_task->task_imp_assertcnt, target_task->task_imp_externcnt); 2581 ret = EOVERFLOW; 2582 count = 0; 2583 } else { 2584 target_task->task_imp_assertcnt += count; 2585 target_task->task_imp_externcnt += count; 2586 } 2587 } else { 2588 if (target_task->task_imp_assertcnt == 0) 2589 apply_boost = TRUE; 2590 target_task->task_imp_assertcnt += count; 2591 } 2592 2593 if (apply_boost == TRUE) 2594 task_update_boost_locked(target_task, TRUE); 2595 2596#if IMPORTANCE_DEBUG 2597 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_END, 2598 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2599 DTRACE_BOOST6(receive_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), task_t, current_task(), int, proc_selfpid(), int, count, int, target_task->task_imp_assertcnt); 2600 if (external == TASK_POLICY_EXTERNAL){ 2601 DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, proc_selfpid(), int, count, int, target_task->task_imp_externcnt); 2602 } 2603#endif 2604 return(ret); 2605} 2606 2607 2608/* 2609 * Returns EOVERFLOW if an external assertion is over-released. 2610 * Panics if an internal assertion is over-released. 2611 */ 2612static int 2613task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count) 2614{ 2615 int ret = 0; 2616 2617 assert(target_task->imp_receiver != 0); 2618 2619#if IMPORTANCE_DEBUG 2620 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_START, 2621 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2622#endif 2623 2624 /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ 2625 2626 if (external == TASK_POLICY_EXTERNAL) { 2627 assert(count == 1); 2628 if (count <= target_task->task_imp_externcnt) { 2629 target_task->task_imp_externcnt -= count; 2630 if (count <= target_task->task_imp_assertcnt) 2631 target_task->task_imp_assertcnt -= count; 2632 } else { 2633 /* Process over-released its boost count */ 2634 printf("BUG in process %s[%d]: over-released external boost assertions (%d total, %d external)\n", 2635 proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), 2636 target_task->task_imp_assertcnt, target_task->task_imp_externcnt); 2637 2638 /* TODO: If count > 1, we should clear out as many external assertions as there are left. */ 2639 ret = EOVERFLOW; 2640 count = 0; 2641 } 2642 } else { 2643 if (count <= target_task->task_imp_assertcnt) { 2644 target_task->task_imp_assertcnt -= count; 2645 } else { 2646 /* TODO: Turn this back into a panic <rdar://problem/12592649> */ 2647 printf("Over-release of kernel-internal importance assertions for task %p (%s), dropping %d assertion(s) but task only has %d remaining (%d external).\n", 2648 target_task, 2649 (target_task->bsd_info == NULL) ? "" : proc_name_address(target_task->bsd_info), 2650 count, 2651 target_task->task_imp_assertcnt, 2652 target_task->task_imp_externcnt); 2653 count = 0; 2654 } 2655 } 2656 2657 /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ 2658 2659 if (target_task->task_imp_assertcnt == 0 && ret == 0) 2660 task_update_boost_locked(target_task, FALSE); 2661 2662#if IMPORTANCE_DEBUG 2663 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_END, 2664 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2665 if (external == TASK_POLICY_EXTERNAL) { 2666 DTRACE_BOOST4(drop_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_externcnt); 2667 } 2668 DTRACE_BOOST4(drop_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_assertcnt); 2669#endif 2670 2671 return(ret); 2672} 2673 2674/* Transfer an assertion to userspace responsibility */ 2675int 2676task_importance_externalize_assertion(task_t target_task, uint32_t count, __unused int sender_pid) 2677{ 2678 assert(target_task != TASK_NULL); 2679 assert(target_task->imp_receiver != 0); 2680 2681 task_lock(target_task); 2682 2683#if IMPORTANCE_DEBUG 2684 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_START, 2685 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2686#endif 2687 2688 /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt + count); */ 2689 2690 target_task->task_imp_externcnt += count; 2691 2692#if IMPORTANCE_DEBUG 2693 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_END, 2694 proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); 2695 DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), 2696 int, sender_pid, int, count, int, target_task->task_imp_externcnt); 2697#endif /* IMPORTANCE_DEBUG */ 2698 2699 task_unlock(target_task); 2700 2701 return(0); 2702} 2703 2704 2705#endif /* IMPORTANCE_INHERITANCE */ 2706 2707void 2708task_hold_multiple_assertion(__imp_only task_t task, __imp_only uint32_t count) 2709{ 2710#if IMPORTANCE_INHERITANCE 2711 assert(task->imp_receiver != 0); 2712 2713 task_importance_hold_internal_assertion(task, count); 2714#endif /* IMPORTANCE_INHERITANCE */ 2715} 2716 2717void 2718task_add_importance_watchport(__imp_only task_t task, __imp_only __impdebug_only int pid, __imp_only mach_port_t port, int *boostp) 2719{ 2720 int boost = 0; 2721 2722 __impdebug_only int released_pid = 0; 2723 2724#if IMPORTANCE_INHERITANCE 2725 task_t release_imp_task = TASK_NULL; 2726 2727 if (task->imp_receiver == 0) { 2728 *boostp = boost; 2729 return; 2730 } 2731 2732 if (IP_VALID(port) != 0) { 2733 ip_lock(port); 2734 2735 /* 2736 * The port must have been marked tempowner already. 2737 * This also filters out ports whose receive rights 2738 * are already enqueued in a message, as you can't 2739 * change the right's destination once it's already 2740 * on its way. 2741 */ 2742 if (port->ip_tempowner != 0) { 2743 assert(port->ip_impdonation != 0); 2744 2745 boost = port->ip_impcount; 2746 if (port->ip_taskptr != 0) { 2747 /* 2748 * if this port is already bound to a task, 2749 * release the task reference and drop any 2750 * watchport-forwarded boosts 2751 */ 2752 release_imp_task = port->ip_imp_task; 2753 } 2754 2755 /* mark the port is watching another task */ 2756 port->ip_taskptr = 1; 2757 port->ip_imp_task = task; 2758 task_reference(task); 2759 } 2760 ip_unlock(port); 2761 2762 if (release_imp_task != TASK_NULL) { 2763 if (boost > 0) 2764 task_importance_drop_internal_assertion(release_imp_task, boost); 2765 released_pid = audit_token_pid_from_task(release_imp_task); 2766 task_deallocate(release_imp_task); 2767 } 2768#if IMPORTANCE_DEBUG 2769 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE, 2770 proc_selfpid(), pid, boost, released_pid, 0); 2771#endif /* IMPORTANCE_DEBUG */ 2772 } 2773#endif /* IMPORTANCE_INHERITANCE */ 2774 2775 *boostp = boost; 2776 return; 2777} 2778 2779 2780/* 2781 * Routines for VM to query task importance 2782 */ 2783 2784 2785/* 2786 * Order to be considered while estimating importance 2787 * for low memory notification and purging purgeable memory. 2788 */ 2789#define TASK_IMPORTANCE_FOREGROUND 4 2790#define TASK_IMPORTANCE_NOTDARWINBG 1 2791 2792 2793/* 2794 * Checks if the task is already notified. 2795 * 2796 * Condition: task lock should be held while calling this function. 2797 */ 2798boolean_t 2799task_has_been_notified(task_t task, int pressurelevel) 2800{ 2801 if (task == NULL) { 2802 return FALSE; 2803 } 2804 2805 if (pressurelevel == kVMPressureWarning) 2806 return (task->low_mem_notified_warn ? TRUE : FALSE); 2807 else if (pressurelevel == kVMPressureCritical) 2808 return (task->low_mem_notified_critical ? TRUE : FALSE); 2809 else 2810 return TRUE; 2811} 2812 2813 2814/* 2815 * Checks if the task is used for purging. 2816 * 2817 * Condition: task lock should be held while calling this function. 2818 */ 2819boolean_t 2820task_used_for_purging(task_t task, int pressurelevel) 2821{ 2822 if (task == NULL) { 2823 return FALSE; 2824 } 2825 2826 if (pressurelevel == kVMPressureWarning) 2827 return (task->purged_memory_warn ? TRUE : FALSE); 2828 else if (pressurelevel == kVMPressureCritical) 2829 return (task->purged_memory_critical ? TRUE : FALSE); 2830 else 2831 return TRUE; 2832} 2833 2834 2835/* 2836 * Mark the task as notified with memory notification. 2837 * 2838 * Condition: task lock should be held while calling this function. 2839 */ 2840void 2841task_mark_has_been_notified(task_t task, int pressurelevel) 2842{ 2843 if (task == NULL) { 2844 return; 2845 } 2846 2847 if (pressurelevel == kVMPressureWarning) 2848 task->low_mem_notified_warn = 1; 2849 else if (pressurelevel == kVMPressureCritical) 2850 task->low_mem_notified_critical = 1; 2851} 2852 2853 2854/* 2855 * Mark the task as purged. 2856 * 2857 * Condition: task lock should be held while calling this function. 2858 */ 2859void 2860task_mark_used_for_purging(task_t task, int pressurelevel) 2861{ 2862 if (task == NULL) { 2863 return; 2864 } 2865 2866 if (pressurelevel == kVMPressureWarning) 2867 task->purged_memory_warn = 1; 2868 else if (pressurelevel == kVMPressureCritical) 2869 task->purged_memory_critical = 1; 2870} 2871 2872 2873/* 2874 * Mark the task eligible for low memory notification. 2875 * 2876 * Condition: task lock should be held while calling this function. 2877 */ 2878void 2879task_clear_has_been_notified(task_t task, int pressurelevel) 2880{ 2881 if (task == NULL) { 2882 return; 2883 } 2884 2885 if (pressurelevel == kVMPressureWarning) 2886 task->low_mem_notified_warn = 0; 2887 else if (pressurelevel == kVMPressureCritical) 2888 task->low_mem_notified_critical = 0; 2889} 2890 2891 2892/* 2893 * Mark the task eligible for purging its purgeable memory. 2894 * 2895 * Condition: task lock should be held while calling this function. 2896 */ 2897void 2898task_clear_used_for_purging(task_t task) 2899{ 2900 if (task == NULL) { 2901 return; 2902 } 2903 2904 task->purged_memory_warn = 0; 2905 task->purged_memory_critical = 0; 2906} 2907 2908 2909/* 2910 * Estimate task importance for purging its purgeable memory 2911 * and low memory notification. 2912 * 2913 * Importance is calculated in the following order of criteria: 2914 * -Task role : Background vs Foreground 2915 * -Boost status: Not boosted vs Boosted 2916 * -Darwin BG status. 2917 * 2918 * Returns: Estimated task importance. Less important task will have lower 2919 * estimated importance. 2920 */ 2921int 2922task_importance_estimate(task_t task) 2923{ 2924 int task_importance = 0; 2925 2926 if (task == NULL) { 2927 return 0; 2928 } 2929 2930 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) 2931 task_importance += TASK_IMPORTANCE_FOREGROUND; 2932 2933 if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) 2934 task_importance += TASK_IMPORTANCE_NOTDARWINBG; 2935 2936 return task_importance; 2937} 2938 2939