1/*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/task_server.h>
31
32#include <kern/sched.h>
33#include <kern/task.h>
34#include <mach/thread_policy.h>
35#include <sys/errno.h>
36#include <sys/resource.h>
37#include <machine/limits.h>
38#include <kern/ledger.h>
39#include <kern/thread_call.h>
40#include <kern/sfi.h>
41#if CONFIG_TELEMETRY
42#include <kern/telemetry.h>
43#endif
44
45#if IMPORTANCE_INHERITANCE
46#include <ipc/ipc_importance.h>
47#if IMPORTANCE_DEBUG
48#include <mach/machine/sdt.h>
49#endif /* IMPORTANCE_DEBUG */
50#endif /* IMPORTANCE_INHERITACE */
51
52#include <sys/kdebug.h>
53
54/*
55 *  Task Policy
56 *
57 *  This subsystem manages task and thread IO priority and backgrounding,
58 *  as well as importance inheritance, process suppression, task QoS, and apptype.
59 *  These properties have a suprising number of complex interactions, so they are
60 *  centralized here in one state machine to simplify the implementation of those interactions.
61 *
62 *  Architecture:
63 *  Threads and tasks have three policy fields: requested, effective, and pending.
64 *  Requested represents the wishes of each interface that influences task policy.
65 *  Effective represents the distillation of that policy into a set of behaviors.
66 *  Pending represents updates that haven't been applied yet.
67 *
68 *  Each interface that has an input into the task policy state machine controls a field in requested.
69 *  If the interface has a getter, it returns what is in the field in requested, but that is
70 *  not necessarily what is actually in effect.
71 *
72 *  All kernel subsystems that behave differently based on task policy call into
73 *  the get_effective_policy function, which returns the decision of the task policy state machine
74 *  for that subsystem by querying only the 'effective' field.
75 *
76 *  Policy change operations:
77 *  Here are the steps to change a policy on a task or thread:
78 *  1) Lock task
79 *  2) Change requested field for the relevant policy
80 *  3) Run a task policy update, which recalculates effective based on requested,
81 *     then takes a diff between the old and new versions of requested and calls the relevant
82 *     other subsystems to apply these changes, and updates the pending field.
83 *  4) Unlock task
84 *  5) Run task policy update complete, which looks at the pending field to update
85 *     subsystems which cannot be touched while holding the task lock.
86 *
87 *  To add a new requested policy, add the field in the requested struct, the flavor in task.h,
88 *  the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield,
89 *  then set up the effects of that behavior in task_policy_update*. If the policy manifests
90 *  itself as a distinct effective policy, add it to the effective struct and add it to the
91 *  proc_get_effective_policy accessor.
92 *
93 *  Most policies are set via proc_set_task_policy, but policies that don't fit that interface
94 *  roll their own lock/set/update/unlock/complete code inside this file.
95 *
96 *
97 *  Suppression policy
98 *
99 *  These are a set of behaviors that can be requested for a task.  They currently have specific
100 *  implied actions when they're enabled, but they may be made customizable in the future.
101 *
102 *  When the affected task is boosted, we temporarily disable the suppression behaviors
103 *  so that the affected process has a chance to run so it can call the API to permanently
104 *  disable the suppression behaviors.
105 *
106 *  Locking
107 *
108 *  Changing task policy on a task or thread takes the task lock, and not the thread lock.
109 *  TODO: Should changing policy on a thread take the thread lock instead?
110 *
111 *  Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code.
112 *  This means that any notification of state change needs to be externally synchronized.
113 *
114 */
115
116extern const qos_policy_params_t thread_qos_policy_params;
117
118/* for task holds without dropping the lock */
119extern void task_hold_locked(task_t task);
120extern void task_release_locked(task_t task);
121extern void task_wait_locked(task_t task, boolean_t until_not_runnable);
122
123extern void thread_recompute_qos(thread_t thread);
124
125/* Task policy related helper functions */
126static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value);
127static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2);
128
129static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token);
130static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token);
131static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi);
132static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos);
133
134static int proc_get_effective_policy(task_t task, thread_t thread, int policy);
135
136static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive);
137static int proc_tier_to_iopol(int tier, int passive);
138
139static uintptr_t trequested_0(task_t task, thread_t thread);
140static uintptr_t trequested_1(task_t task, thread_t thread);
141static uintptr_t teffective_0(task_t task, thread_t thread);
142static uintptr_t teffective_1(task_t task, thread_t thread);
143static uint32_t tpending(task_pend_token_t pend_token);
144static uint64_t task_requested_bitfield(task_t task, thread_t thread);
145static uint64_t task_effective_bitfield(task_t task, thread_t thread);
146
147void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
148
149/* CPU Limits related helper functions */
150static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
151int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
152static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
153int task_disable_cpumon(task_t task);
154static int task_apply_resource_actions(task_t task, int type);
155void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
156void proc_init_cpumon_params(void);
157
158#ifdef MACH_BSD
159int             proc_pid(void *proc);
160extern int      proc_selfpid(void);
161extern char *   proc_name_address(void *p);
162extern void     rethrottle_thread(void * uthread);
163extern void     proc_apply_task_networkbg(void * bsd_info, thread_t thread);
164#endif /* MACH_BSD */
165
166
167/* Importance Inheritance related helper functions */
168
169#if IMPORTANCE_INHERITANCE
170
171static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
172static void task_importance_update_live_donor(task_t target_task);
173
174#endif /* IMPORTANCE_INHERITANCE */
175
176#if IMPORTANCE_DEBUG
177#define __impdebug_only
178#else
179#define __impdebug_only __unused
180#endif
181
182#if IMPORTANCE_INHERITANCE
183#define __imp_only
184#else
185#define __imp_only __unused
186#endif
187
188#define TASK_LOCKED   1
189#define TASK_UNLOCKED 0
190
191#define DO_LOWPRI_CPU   1
192#define UNDO_LOWPRI_CPU 2
193
194/* Macros for making tracing simpler */
195
196#define tpriority(task, thread)  ((uintptr_t)(thread == THREAD_NULL ? (task->priority)  : (thread->priority)))
197#define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK  : TASK_POLICY_THREAD)
198#define targetid(task, thread)   ((uintptr_t)(thread == THREAD_NULL ? (audit_token_pid_from_task(task)) : (thread->thread_id)))
199
200/*
201 * Default parameters for certain policies
202 */
203
204int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
205int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
206int proc_tal_disk_tier        = THROTTLE_LEVEL_TIER1;
207
208int proc_graphics_timer_qos   = (LATENCY_QOS_TIER_0 & 0xFF);
209
210const int proc_default_bg_iotier  = THROTTLE_LEVEL_TIER2;
211
212/* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
213const struct task_requested_policy default_task_requested_policy = {
214	.bg_iotier = proc_default_bg_iotier
215};
216const struct task_effective_policy default_task_effective_policy = {};
217const struct task_pended_policy default_task_pended_policy = {};
218
219/*
220 * Default parameters for CPU usage monitor.
221 *
222 * Default setting is 50% over 3 minutes.
223 */
224#define         DEFAULT_CPUMON_PERCENTAGE 50
225#define         DEFAULT_CPUMON_INTERVAL   (3 * 60)
226
227uint8_t         proc_max_cpumon_percentage;
228uint64_t	proc_max_cpumon_interval;
229
230kern_return_t
231qos_latency_policy_validate(task_latency_qos_t ltier) {
232	if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
233	    ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0)))
234		return KERN_INVALID_ARGUMENT;
235
236	return KERN_SUCCESS;
237}
238
239kern_return_t
240qos_throughput_policy_validate(task_throughput_qos_t ttier) {
241	if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
242	    ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0)))
243		return KERN_INVALID_ARGUMENT;
244
245	return KERN_SUCCESS;
246}
247
248static kern_return_t
249task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) {
250	if (count < TASK_QOS_POLICY_COUNT)
251		return KERN_INVALID_ARGUMENT;
252
253	task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
254	task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
255
256	kern_return_t kr = qos_latency_policy_validate(ltier);
257
258	if (kr != KERN_SUCCESS)
259		return kr;
260
261	kr = qos_throughput_policy_validate(ttier);
262
263	return kr;
264}
265
266uint32_t
267qos_extract(uint32_t qv) {
268	return (qv & 0xFF);
269}
270
271uint32_t
272qos_latency_policy_package(uint32_t qv) {
273	return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
274}
275
276uint32_t
277qos_throughput_policy_package(uint32_t qv) {
278	return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
279}
280
281/* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
282static boolean_t task_policy_suppression_disable = FALSE;
283
284kern_return_t
285task_policy_set(
286	task_t					task,
287	task_policy_flavor_t	flavor,
288	task_policy_t			policy_info,
289	mach_msg_type_number_t	count)
290{
291	kern_return_t		result = KERN_SUCCESS;
292
293	if (task == TASK_NULL || task == kernel_task)
294		return (KERN_INVALID_ARGUMENT);
295
296	switch (flavor) {
297
298	case TASK_CATEGORY_POLICY: {
299		task_category_policy_t info = (task_category_policy_t)policy_info;
300
301		if (count < TASK_CATEGORY_POLICY_COUNT)
302			return (KERN_INVALID_ARGUMENT);
303
304
305		switch(info->role) {
306			case TASK_FOREGROUND_APPLICATION:
307			case TASK_BACKGROUND_APPLICATION:
308			case TASK_DEFAULT_APPLICATION:
309				proc_set_task_policy(task, THREAD_NULL,
310				                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
311				                     info->role);
312				break;
313
314			case TASK_CONTROL_APPLICATION:
315				if (task != current_task() || task->sec_token.val[0] != 0)
316					result = KERN_INVALID_ARGUMENT;
317				else
318					proc_set_task_policy(task, THREAD_NULL,
319					                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
320					                     info->role);
321				break;
322
323			case TASK_GRAPHICS_SERVER:
324				/* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
325				if (task != current_task() || task->sec_token.val[0] != 0)
326					result = KERN_INVALID_ARGUMENT;
327				else
328					proc_set_task_policy(task, THREAD_NULL,
329					                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
330					                     info->role);
331				break;
332			default:
333				result = KERN_INVALID_ARGUMENT;
334				break;
335		} /* switch (info->role) */
336
337		break;
338	}
339
340/* Desired energy-efficiency/performance "quality-of-service" */
341	case TASK_BASE_QOS_POLICY:
342	case TASK_OVERRIDE_QOS_POLICY:
343	{
344		task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
345		kern_return_t kr = task_qos_policy_validate(qosinfo, count);
346
347		if (kr != KERN_SUCCESS)
348			return kr;
349
350
351		uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
352		uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
353
354		proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
355							  flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
356							  lqos, tqos);
357	}
358	break;
359
360	case TASK_BASE_LATENCY_QOS_POLICY:
361	{
362		task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
363		kern_return_t kr = task_qos_policy_validate(qosinfo, count);
364
365		if (kr != KERN_SUCCESS)
366			return kr;
367
368		uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
369
370		proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
371	}
372	break;
373
374	case TASK_BASE_THROUGHPUT_QOS_POLICY:
375	{
376		task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
377		kern_return_t kr = task_qos_policy_validate(qosinfo, count);
378
379		if (kr != KERN_SUCCESS)
380			return kr;
381
382		uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
383
384		proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
385	}
386	break;
387
388	case TASK_SUPPRESSION_POLICY:
389	{
390
391		task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
392
393		if (count < TASK_SUPPRESSION_POLICY_COUNT)
394			return (KERN_INVALID_ARGUMENT);
395
396		struct task_qos_policy qosinfo;
397
398		qosinfo.task_latency_qos_tier = info->timer_throttle;
399		qosinfo.task_throughput_qos_tier = info->throughput_qos;
400
401		kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
402
403		if (kr != KERN_SUCCESS)
404			return kr;
405
406		/* TEMPORARY disablement of task suppression */
407		if (task_policy_suppression_disable && info->active)
408			return KERN_SUCCESS;
409
410		struct task_pend_token pend_token = {};
411
412		task_lock(task);
413
414		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
415		                          (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
416		                          proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL),
417		                          trequested_1(task, THREAD_NULL), 0);
418
419		task->requested_policy.t_sup_active      = (info->active)         ? 1 : 0;
420		task->requested_policy.t_sup_lowpri_cpu  = (info->lowpri_cpu)     ? 1 : 0;
421		task->requested_policy.t_sup_timer       = qos_extract(info->timer_throttle);
422		task->requested_policy.t_sup_disk        = (info->disk_throttle)  ? 1 : 0;
423		task->requested_policy.t_sup_cpu_limit   = (info->cpu_limit)      ? 1 : 0;
424		task->requested_policy.t_sup_suspend     = (info->suspend)        ? 1 : 0;
425		task->requested_policy.t_sup_throughput  = qos_extract(info->throughput_qos);
426		task->requested_policy.t_sup_cpu         = (info->suppressed_cpu) ? 1 : 0;
427		task->requested_policy.t_sup_bg_sockets  = (info->background_sockets) ? 1 : 0;
428
429		task_policy_update_locked(task, THREAD_NULL, &pend_token);
430
431		task_unlock(task);
432
433		task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
434
435		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
436		                          (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
437		                          proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL),
438		                          trequested_1(task, THREAD_NULL), 0);
439
440		break;
441
442	}
443
444	default:
445		result = KERN_INVALID_ARGUMENT;
446		break;
447	}
448
449	return (result);
450}
451
452/* Sets BSD 'nice' value on the task */
453kern_return_t
454task_importance(
455	task_t				task,
456	integer_t			importance)
457{
458	if (task == TASK_NULL || task == kernel_task)
459		return (KERN_INVALID_ARGUMENT);
460
461	task_lock(task);
462
463	if (!task->active) {
464		task_unlock(task);
465
466		return (KERN_TERMINATED);
467	}
468
469	if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
470		task_unlock(task);
471
472		return (KERN_INVALID_ARGUMENT);
473	}
474
475	task->importance = importance;
476
477	/* TODO: tracepoint? */
478
479	/* Redrive only the task priority calculation */
480	task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
481
482	task_unlock(task);
483
484	return (KERN_SUCCESS);
485}
486
487kern_return_t
488task_policy_get(
489	task_t					task,
490	task_policy_flavor_t	flavor,
491	task_policy_t			policy_info,
492	mach_msg_type_number_t	*count,
493	boolean_t				*get_default)
494{
495	if (task == TASK_NULL || task == kernel_task)
496		return (KERN_INVALID_ARGUMENT);
497
498	switch (flavor) {
499
500	case TASK_CATEGORY_POLICY:
501	{
502		task_category_policy_t		info = (task_category_policy_t)policy_info;
503
504		if (*count < TASK_CATEGORY_POLICY_COUNT)
505			return (KERN_INVALID_ARGUMENT);
506
507		if (*get_default)
508			info->role = TASK_UNSPECIFIED;
509		else
510			info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
511		break;
512	}
513
514	case TASK_BASE_QOS_POLICY: /* FALLTHRU */
515	case TASK_OVERRIDE_QOS_POLICY:
516	{
517		task_qos_policy_t info = (task_qos_policy_t)policy_info;
518
519		if (*count < TASK_QOS_POLICY_COUNT)
520			return (KERN_INVALID_ARGUMENT);
521
522		if (*get_default) {
523			info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
524			info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
525		} else if (flavor == TASK_BASE_QOS_POLICY) {
526			int value1, value2;
527
528			proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
529
530			info->task_latency_qos_tier = qos_latency_policy_package(value1);
531			info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
532
533		} else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
534			int value1, value2;
535
536			proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
537
538			info->task_latency_qos_tier = qos_latency_policy_package(value1);
539			info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
540		}
541
542		break;
543	}
544
545	case TASK_POLICY_STATE:
546	{
547		task_policy_state_t info = (task_policy_state_t)policy_info;
548
549		if (*count < TASK_POLICY_STATE_COUNT)
550			return (KERN_INVALID_ARGUMENT);
551
552		/* Only root can get this info */
553		if (current_task()->sec_token.val[0] != 0)
554			return KERN_PROTECTION_FAILURE;
555
556		if (*get_default) {
557			info->requested = 0;
558			info->effective = 0;
559			info->pending = 0;
560			info->imp_assertcnt = 0;
561			info->imp_externcnt = 0;
562			info->flags = 0;
563			info->imp_transitions = 0;
564		} else {
565			task_lock(task);
566
567			info->requested = task_requested_bitfield(task, THREAD_NULL);
568			info->effective = task_effective_bitfield(task, THREAD_NULL);
569			info->pending   = 0;
570
571			info->flags = 0;
572			if (task->task_imp_base != NULL) {
573				info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
574				info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
575				info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
576				info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
577				info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
578				info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
579				info->imp_transitions = task->task_imp_base->iit_transitions;
580			} else {
581				info->imp_assertcnt = 0;
582				info->imp_externcnt = 0;
583				info->imp_transitions = 0;
584			}
585			task_unlock(task);
586		}
587
588		info->reserved[0] = 0;
589		info->reserved[1] = 0;
590
591		break;
592	}
593
594	case TASK_SUPPRESSION_POLICY:
595	{
596		task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
597
598		if (*count < TASK_SUPPRESSION_POLICY_COUNT)
599			return (KERN_INVALID_ARGUMENT);
600
601		task_lock(task);
602
603		if (*get_default) {
604			info->active            = 0;
605			info->lowpri_cpu        = 0;
606			info->timer_throttle    = LATENCY_QOS_TIER_UNSPECIFIED;
607			info->disk_throttle     = 0;
608			info->cpu_limit         = 0;
609			info->suspend           = 0;
610			info->throughput_qos    = 0;
611			info->suppressed_cpu    = 0;
612		} else {
613			info->active            = task->requested_policy.t_sup_active;
614			info->lowpri_cpu        = task->requested_policy.t_sup_lowpri_cpu;
615			info->timer_throttle    = qos_latency_policy_package(task->requested_policy.t_sup_timer);
616			info->disk_throttle     = task->requested_policy.t_sup_disk;
617			info->cpu_limit         = task->requested_policy.t_sup_cpu_limit;
618			info->suspend           = task->requested_policy.t_sup_suspend;
619			info->throughput_qos    = qos_throughput_policy_package(task->requested_policy.t_sup_throughput);
620			info->suppressed_cpu    = task->requested_policy.t_sup_cpu;
621			info->background_sockets = task->requested_policy.t_sup_bg_sockets;
622		}
623
624		task_unlock(task);
625		break;
626	}
627
628	default:
629		return (KERN_INVALID_ARGUMENT);
630	}
631
632	return (KERN_SUCCESS);
633}
634
635/*
636 * Called at task creation
637 * We calculate the correct effective but don't apply it to anything yet.
638 * The threads, etc will inherit from the task as they get created.
639 */
640void
641task_policy_create(task_t task, int parent_boosted)
642{
643	if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
644		if (parent_boosted) {
645			task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
646			task_importance_mark_donor(task, TRUE);
647		} else {
648			task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
649			task_importance_mark_receiver(task, FALSE);
650		}
651	}
652
653	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
654				  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
655				  audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL),
656				  teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
657
658	task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL);
659
660	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
661				  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
662				  audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL),
663				  teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
664
665	task_importance_update_live_donor(task);
666	task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
667}
668
669void
670thread_policy_create(thread_t thread)
671{
672	task_t task = thread->task;
673
674	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
675				  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
676				  targetid(task, thread), teffective_0(task, thread),
677				  teffective_1(task, thread), tpriority(task, thread), 0);
678
679	task_policy_update_internal_locked(task, thread, TRUE, NULL);
680
681	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
682				  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
683				  targetid(task, thread), teffective_0(task, thread),
684				  teffective_1(task, thread), tpriority(task, thread), 0);
685}
686
687static void
688task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token)
689{
690	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
691	                          (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START),
692	                          targetid(task, thread), teffective_0(task, thread),
693	                          teffective_1(task, thread), tpriority(task, thread), 0);
694
695	task_policy_update_internal_locked(task, thread, FALSE, pend_token);
696
697	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
698				  (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END,
699				  targetid(task, thread), teffective_0(task, thread),
700				  teffective_1(task, thread), tpriority(task, thread), 0);
701}
702
703/*
704 * One state update function TO RULE THEM ALL
705 *
706 * This function updates the task or thread effective policy fields
707 * and pushes the results to the relevant subsystems.
708 *
709 * Must call update_complete after unlocking the task,
710 * as some subsystems cannot be updated while holding the task lock.
711 *
712 * Called with task locked, not thread
713 */
714
715static void
716task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token)
717{
718	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
719
720	/*
721	 * Step 1:
722	 *  Gather requested policy
723	 */
724
725	struct task_requested_policy requested =
726	        (on_task) ? task->requested_policy : thread->requested_policy;
727
728
729	/*
730	 * Step 2:
731	 *  Calculate new effective policies from requested policy and task state
732	 *  Rules:
733	 *      If in an 'on_task' block, must only look at and set fields starting with t_
734	 *      If operating on a task, don't touch anything starting with th_
735	 *      If operating on a thread, don't touch anything starting with t_
736	 *      Don't change requested, it won't take effect
737	 */
738
739	struct task_effective_policy next = {};
740	struct task_effective_policy task_effective;
741
742	/* Calculate QoS policies */
743
744	if (on_task) {
745		/* Update task role */
746		next.t_role = requested.t_role;
747
748		/* Set task qos clamp and ceiling */
749		next.t_qos_clamp = requested.t_qos_clamp;
750
751		if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
752		    requested.t_apptype == TASK_APPTYPE_APP_TAL) {
753
754			switch (next.t_role) {
755				case TASK_FOREGROUND_APPLICATION:
756					/* Foreground apps get urgent scheduler priority */
757					next.qos_ui_is_urgent = 1;
758					next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
759					break;
760
761				case TASK_BACKGROUND_APPLICATION:
762					/* This is really 'non-focal but on-screen' */
763					next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
764					break;
765
766				case TASK_NONUI_APPLICATION:
767					/* i.e. 'off-screen' */
768					next.t_qos_ceiling = THREAD_QOS_LEGACY;
769					break;
770
771				case TASK_CONTROL_APPLICATION:
772				case TASK_GRAPHICS_SERVER:
773					next.qos_ui_is_urgent = 1;
774					next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
775					break;
776
777				case TASK_UNSPECIFIED:
778				default:
779					/* Apps that don't have an application role get
780					 * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
781					next.t_qos_ceiling = THREAD_QOS_LEGACY;
782					break;
783			}
784		} else {
785			/* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */
786			next.t_qos_ceiling = THREAD_QOS_USER_INITIATED;
787		}
788	} else {
789		/*
790		 * Set thread qos tier
791		 * Note that an override only overrides the QoS field, not other policy settings.
792		 * A thread must already be participating in QoS for override to take effect
793		 */
794
795		/* Snapshot the task's effective policy */
796		task_effective = task->effective_policy;
797
798		next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent;
799
800		if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED))
801			next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos);
802		else
803			next.thep_qos = requested.thrp_qos;
804
805		/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
806		if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
807			if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
808				next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos);
809			else
810				next.thep_qos = task_effective.t_qos_clamp;
811		}
812
813		/* The ceiling only applies to threads that are in the QoS world */
814		if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
815		    next.thep_qos                != THREAD_QOS_UNSPECIFIED) {
816			next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos);
817		}
818
819		/*
820		 * The QoS relative priority is only applicable when the original programmer's
821		 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
822		 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
823		 * since otherwise it would be lower than unclamped threads. Similarly, in the
824		 * presence of boosting, the programmer doesn't know what other actors
825		 * are boosting the thread.
826		 */
827		if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
828		    (requested.thrp_qos == next.thep_qos) &&
829		    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
830			next.thep_qos_relprio = requested.thrp_qos_relprio;
831		} else {
832			next.thep_qos_relprio = 0;
833		}
834	}
835
836	/* Calculate DARWIN_BG */
837	boolean_t wants_darwinbg        = FALSE;
838	boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
839	boolean_t wants_watchersbg      = FALSE; /* Do I want my pidbound threads to be bg */
840	boolean_t wants_tal             = FALSE; /* Do I want the effects of TAL mode */
841
842	/*
843	 * If DARWIN_BG has been requested at either level, it's engaged.
844	 * Only true DARWIN_BG changes cause watchers to transition.
845	 *
846	 * Backgrounding due to apptype does.
847	 */
848	if (requested.int_darwinbg || requested.ext_darwinbg)
849		wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
850
851	if (on_task) {
852		/* Background TAL apps are throttled when TAL is enabled */
853		if (requested.t_apptype      == TASK_APPTYPE_APP_TAL &&
854		    requested.t_role         == TASK_BACKGROUND_APPLICATION &&
855		    requested.t_tal_enabled  == 1) {
856			wants_tal = TRUE;
857			next.t_tal_engaged = 1;
858		}
859
860		/* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
861		if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
862		    requested.t_boosted == 0)
863			wants_darwinbg = TRUE;
864
865		/* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
866		if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND)
867			wants_darwinbg = TRUE;
868
869		if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE)
870			wants_darwinbg = TRUE;
871	} else {
872		if (requested.th_pidbind_bg)
873			wants_all_sockets_bg = wants_darwinbg = TRUE;
874
875		if (requested.th_workq_bg)
876			wants_darwinbg = TRUE;
877
878		if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE)
879			wants_darwinbg = TRUE;
880	}
881
882	/* Calculate side effects of DARWIN_BG */
883
884	if (wants_darwinbg) {
885		next.darwinbg = 1;
886		/* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */
887		next.new_sockets_bg = 1;
888		next.lowpri_cpu = 1;
889	}
890
891	if (wants_all_sockets_bg)
892		next.all_sockets_bg = 1;
893
894	if (on_task && wants_watchersbg)
895		next.t_watchers_bg = 1;
896
897	/* darwinbg on either task or thread implies background QOS (or lower) */
898	if (!on_task &&
899		(wants_darwinbg || task_effective.darwinbg) &&
900		(next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){
901		next.thep_qos = THREAD_QOS_BACKGROUND;
902		next.thep_qos_relprio = 0;
903	}
904
905	/* Calculate low CPU priority */
906
907	boolean_t wants_lowpri_cpu = FALSE;
908
909	if (wants_darwinbg || wants_tal)
910		wants_lowpri_cpu = TRUE;
911
912	if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0)
913		wants_lowpri_cpu = TRUE;
914
915	if (wants_lowpri_cpu)
916		next.lowpri_cpu = 1;
917
918	/* Calculate IO policy */
919
920	/* Update BG IO policy (so we can see if it has changed) */
921	next.bg_iotier = requested.bg_iotier;
922
923	int iopol = THROTTLE_LEVEL_TIER0;
924
925	if (wants_darwinbg)
926		iopol = MAX(iopol, requested.bg_iotier);
927
928	if (on_task) {
929		if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD)
930			iopol = MAX(iopol, proc_standard_daemon_tier);
931
932		if (requested.t_sup_disk && requested.t_boosted == 0)
933			iopol = MAX(iopol, proc_suppressed_disk_tier);
934
935		if (wants_tal)
936			iopol = MAX(iopol, proc_tal_disk_tier);
937
938		if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
939			iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]);
940
941	} else {
942		/* Look up the associated IO tier value for the QoS class */
943		iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
944	}
945
946	iopol = MAX(iopol, requested.int_iotier);
947	iopol = MAX(iopol, requested.ext_iotier);
948
949	next.io_tier = iopol;
950
951	/* Calculate Passive IO policy */
952
953	if (requested.ext_iopassive || requested.int_iopassive)
954		next.io_passive = 1;
955
956	/* Calculate miscellaneous policy */
957
958	if (on_task) {
959		/* Calculate suppression-active flag */
960		if (requested.t_sup_active && requested.t_boosted == 0)
961			next.t_sup_active = 1;
962
963		/* Calculate suspend policy */
964		if (requested.t_sup_suspend && requested.t_boosted == 0)
965			next.t_suspended = 1;
966
967		/* Calculate timer QOS */
968		int latency_qos = requested.t_base_latency_qos;
969
970		if (requested.t_sup_timer && requested.t_boosted == 0)
971			latency_qos = requested.t_sup_timer;
972
973		if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
974			latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]);
975
976		if (requested.t_over_latency_qos != 0)
977			latency_qos = requested.t_over_latency_qos;
978
979		/* Treat the windowserver special */
980		if (requested.t_role == TASK_GRAPHICS_SERVER)
981			latency_qos = proc_graphics_timer_qos;
982
983		next.t_latency_qos = latency_qos;
984
985		/* Calculate throughput QOS */
986		int through_qos = requested.t_base_through_qos;
987
988		if (requested.t_sup_throughput && requested.t_boosted == 0)
989			through_qos = requested.t_sup_throughput;
990
991		if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
992			through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]);
993
994		if (requested.t_over_through_qos != 0)
995			through_qos = requested.t_over_through_qos;
996
997		next.t_through_qos = through_qos;
998
999		/* Calculate suppressed CPU priority */
1000		if (requested.t_sup_cpu && requested.t_boosted == 0)
1001			next.t_suppressed_cpu = 1;
1002
1003		/*
1004		 * Calculate background sockets
1005		 * Don't take into account boosting to limit transition frequency.
1006		 */
1007		if (requested.t_sup_bg_sockets){
1008			next.all_sockets_bg = 1;
1009			next.new_sockets_bg = 1;
1010		}
1011
1012		/* Apply SFI Managed class bit */
1013		next.t_sfi_managed = requested.t_sfi_managed;
1014
1015		/* Calculate 'live donor' status for live importance */
1016		switch (requested.t_apptype) {
1017			case TASK_APPTYPE_APP_TAL:
1018			case TASK_APPTYPE_APP_DEFAULT:
1019				if (requested.ext_darwinbg == 0)
1020					next.t_live_donor = 1;
1021				else
1022					next.t_live_donor = 0;
1023				break;
1024
1025			case TASK_APPTYPE_DAEMON_INTERACTIVE:
1026			case TASK_APPTYPE_DAEMON_STANDARD:
1027			case TASK_APPTYPE_DAEMON_ADAPTIVE:
1028			case TASK_APPTYPE_DAEMON_BACKGROUND:
1029			default:
1030				next.t_live_donor = 0;
1031				break;
1032		}
1033	}
1034
1035	if (requested.terminated) {
1036		/*
1037		 * Shoot down the throttles that slow down exit or response to SIGTERM
1038		 * We don't need to shoot down:
1039		 * passive        (don't want to cause others to throttle)
1040		 * all_sockets_bg (don't need to iterate FDs on every exit)
1041		 * new_sockets_bg (doesn't matter for exiting process)
1042		 * pidsuspend     (jetsam-ed BG process shouldn't run again)
1043		 * watchers_bg    (watcher threads don't need to be unthrottled)
1044		 * t_latency_qos  (affects userspace timers only)
1045		 */
1046
1047		next.terminated         = 1;
1048		next.darwinbg           = 0;
1049		next.lowpri_cpu         = 0;
1050		next.io_tier            = THROTTLE_LEVEL_TIER0;
1051		if (on_task) {
1052			next.t_tal_engaged = 0;
1053			next.t_role = TASK_UNSPECIFIED;
1054			next.t_suppressed_cpu = 0;
1055
1056			/* TODO: This should only be shot down on SIGTERM, not exit */
1057			next.t_suspended   = 0;
1058		} else {
1059			next.thep_qos = 0;
1060		}
1061	}
1062
1063	/*
1064	 * Step 3:
1065	 *  Swap out old policy for new policy
1066	 */
1067
1068	if (!on_task) {
1069		/* Acquire thread mutex to synchronize against
1070		 * thread_policy_set(). Consider reworking to separate qos
1071		 * fields, or locking the task in thread_policy_set.
1072		 * A more efficient model would be to make the thread bits
1073		 * authoritative.
1074		 */
1075		thread_mtx_lock(thread);
1076	}
1077
1078	struct task_effective_policy prev =
1079	        (on_task) ? task->effective_policy : thread->effective_policy;
1080
1081	/*
1082	 * Check for invalid transitions here for easier debugging
1083	 * TODO: dump the structs as hex in the panic string
1084	 */
1085	if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg)
1086		panic("unexpected network change for kernel task");
1087
1088	/* This is the point where the new values become visible to other threads */
1089	if (on_task)
1090		task->effective_policy = next;
1091	else {
1092		/* Preserve thread specific latency/throughput QoS modified via
1093		 * thread_policy_set(). Inelegant in the extreme, to be reworked.
1094		 *
1095		 * If thread QoS class is set, we don't need to preserve the previously set values.
1096		 * We should ensure to not accidentally preserve previous thread QoS values if you set a thread
1097		 * back to default QoS.
1098		 */
1099		uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos;
1100
1101		if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1102			next.t_latency_qos = lqos;
1103			next.t_through_qos = tqos;
1104		} else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1105			next.t_latency_qos = 0;
1106			next.t_through_qos = 0;
1107		} else {
1108			next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos];
1109			next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos];
1110		}
1111
1112		thread_update_qos_cpu_time(thread, TRUE);
1113		thread->effective_policy = next;
1114		thread_mtx_unlock(thread);
1115	}
1116
1117	/* Don't do anything further to a half-formed task or thread */
1118	if (in_create)
1119		return;
1120
1121	/*
1122	 * Step 4:
1123	 *  Pend updates that can't be done while holding the task lock
1124	 */
1125
1126	if (prev.all_sockets_bg != next.all_sockets_bg)
1127		pend_token->tpt_update_sockets = 1;
1128
1129	if (on_task) {
1130		/* Only re-scan the timer list if the qos level is getting less strong */
1131		if (prev.t_latency_qos > next.t_latency_qos)
1132			pend_token->tpt_update_timers = 1;
1133
1134
1135		if (prev.t_live_donor != next.t_live_donor)
1136			pend_token->tpt_update_live_donor = 1;
1137	}
1138
1139	/*
1140	 * Step 5:
1141	 *  Update other subsystems as necessary if something has changed
1142	 */
1143
1144	boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE;
1145
1146	if (on_task) {
1147		if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) {
1148			task_hold_locked(task);
1149			task_wait_locked(task, FALSE);
1150		}
1151		if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) {
1152			task_release_locked(task);
1153		}
1154
1155		boolean_t update_threads = FALSE;
1156		boolean_t update_sfi = FALSE;
1157
1158		if (prev.bg_iotier          != next.bg_iotier        ||
1159		    prev.terminated         != next.terminated       ||
1160		    prev.t_qos_clamp        != next.t_qos_clamp      ||
1161		    prev.t_qos_ceiling      != next.t_qos_ceiling    ||
1162		    prev.qos_ui_is_urgent   != next.qos_ui_is_urgent ||
1163		    prev.darwinbg           != next.darwinbg)
1164			update_threads = TRUE;
1165
1166		/*
1167		 * A bit of a layering violation. We know what task policy attributes
1168		 * sfi_thread_classify() consults, so if they change, trigger SFI
1169		 * re-evaluation.
1170		 */
1171		if ((prev.t_latency_qos != next.t_latency_qos) ||
1172			(prev.t_role != next.t_role) ||
1173			(prev.darwinbg != next.darwinbg) ||
1174			(prev.t_sfi_managed != next.t_sfi_managed))
1175			update_sfi = TRUE;
1176
1177		task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi);
1178	} else {
1179		int update_cpu = 0;
1180		boolean_t update_sfi = FALSE;
1181		boolean_t update_qos = FALSE;
1182
1183		if (prev.lowpri_cpu != next.lowpri_cpu)
1184			update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU);
1185
1186		if (prev.darwinbg != next.darwinbg ||
1187		    prev.thep_qos != next.thep_qos)
1188			update_sfi = TRUE;
1189
1190		if (prev.thep_qos           != next.thep_qos          ||
1191		    prev.thep_qos_relprio   != next.thep_qos_relprio  ||
1192		    prev.qos_ui_is_urgent   != next.qos_ui_is_urgent) {
1193			update_qos = TRUE;
1194		}
1195
1196		task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos);
1197	}
1198}
1199
1200/* Despite the name, the thread's task is locked, the thread is not */
1201void
1202task_policy_update_thread_locked(thread_t thread,
1203                                 int update_cpu,
1204                                 boolean_t update_throttle,
1205                                 boolean_t update_sfi,
1206                                 boolean_t update_qos)
1207{
1208	thread_precedence_policy_data_t policy;
1209
1210	if (update_throttle) {
1211		rethrottle_thread(thread->uthread);
1212	}
1213
1214	if (update_sfi) {
1215		sfi_reevaluate(thread);
1216	}
1217
1218	/*
1219	 * TODO: pidbind needs to stuff remembered importance into saved_importance
1220	 * properly deal with bg'ed threads being pidbound and unbging while pidbound
1221	 *
1222	 * TODO: A BG thread's priority is 0 on desktop and 4 on embedded.  Need to reconcile this.
1223	 * */
1224	if (update_cpu == DO_LOWPRI_CPU) {
1225		thread->saved_importance = thread->importance;
1226		policy.importance = INT_MIN;
1227	} else if (update_cpu == UNDO_LOWPRI_CPU) {
1228		policy.importance = thread->saved_importance;
1229		thread->saved_importance = 0;
1230	}
1231
1232	/* Takes thread lock and thread mtx lock */
1233	if (update_cpu)
1234		thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
1235                                           (thread_policy_t)&policy,
1236                                           THREAD_PRECEDENCE_POLICY_COUNT);
1237
1238	if (update_qos)
1239		thread_recompute_qos(thread);
1240}
1241
1242/*
1243 * Calculate priority on a task, loop through its threads, and tell them about
1244 * priority changes and throttle changes.
1245 */
1246void
1247task_policy_update_task_locked(task_t    task,
1248                               boolean_t update_throttle,
1249                               boolean_t update_threads,
1250                               boolean_t update_sfi)
1251{
1252	boolean_t update_priority = FALSE;
1253
1254	if (task == kernel_task)
1255		panic("Attempting to set task policy on kernel_task");
1256
1257	int priority     = BASEPRI_DEFAULT;
1258	int max_priority = MAXPRI_USER;
1259
1260	if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) {
1261		priority = MAXPRI_THROTTLE;
1262		max_priority = MAXPRI_THROTTLE;
1263	} else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) {
1264		priority = MAXPRI_SUPPRESSED;
1265		max_priority = MAXPRI_SUPPRESSED;
1266	} else {
1267		switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) {
1268			case TASK_CONTROL_APPLICATION:
1269				priority = BASEPRI_CONTROL;
1270				break;
1271			case TASK_GRAPHICS_SERVER:
1272				priority = BASEPRI_GRAPHICS;
1273				max_priority = MAXPRI_RESERVED;
1274				break;
1275			default:
1276				break;
1277		}
1278
1279		/* factor in 'nice' value */
1280		priority += task->importance;
1281
1282		if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1283			int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp];
1284
1285			priority        = MIN(priority, qos_clamp_priority);
1286			max_priority    = MIN(max_priority, qos_clamp_priority);
1287		}
1288	}
1289
1290	/* avoid extra work if priority isn't changing */
1291	if (task->priority != priority || task->max_priority != max_priority) {
1292		update_priority = TRUE;
1293
1294		/* update the scheduling priority for the task */
1295		task->max_priority = max_priority;
1296
1297		if (priority > task->max_priority)
1298			priority = task->max_priority;
1299		else if (priority < MINPRI)
1300			priority = MINPRI;
1301
1302		task->priority = priority;
1303	}
1304
1305	/* Loop over the threads in the task only once, and only if necessary */
1306	if (update_threads || update_throttle || update_priority || update_sfi ) {
1307		thread_t thread;
1308
1309		queue_iterate(&task->threads, thread, thread_t, task_threads) {
1310			if (update_priority) {
1311				thread_mtx_lock(thread);
1312
1313				thread_task_priority(thread, priority, max_priority);
1314
1315				thread_mtx_unlock(thread);
1316			}
1317
1318			if (update_throttle) {
1319				rethrottle_thread(thread->uthread);
1320			}
1321
1322			if (update_sfi) {
1323				sfi_reevaluate(thread);
1324			}
1325
1326			if (update_threads) {
1327				thread->requested_policy.bg_iotier  = task->effective_policy.bg_iotier;
1328				thread->requested_policy.terminated = task->effective_policy.terminated;
1329
1330				task_policy_update_internal_locked(task, thread, FALSE, NULL);
1331				/*  The thread policy must not emit any completion actions due to this change. */
1332			}
1333		}
1334	}
1335}
1336
1337/*
1338 * Called with task unlocked to do things that can't be done while holding the task lock
1339 */
1340void
1341task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token)
1342{
1343	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1344
1345#ifdef MACH_BSD
1346	if (pend_token->tpt_update_sockets)
1347		proc_apply_task_networkbg(task->bsd_info, thread);
1348#endif /* MACH_BSD */
1349
1350	if (on_task) {
1351		/* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1352		if (pend_token->tpt_update_timers)
1353			ml_timer_evaluate();
1354
1355
1356		if (pend_token->tpt_update_live_donor)
1357			task_importance_update_live_donor(task);
1358	}
1359}
1360
1361/*
1362 * Initiate a task policy state transition
1363 *
1364 * Everything that modifies requested except functions that need to hold the task lock
1365 * should use this function
1366 *
1367 * Argument validation should be performed before reaching this point.
1368 *
1369 * TODO: Do we need to check task->active or thread->active?
1370 */
1371void
1372proc_set_task_policy(task_t     task,
1373                     thread_t   thread,
1374                     int        category,
1375                     int        flavor,
1376                     int        value)
1377{
1378	struct task_pend_token pend_token = {};
1379
1380	task_lock(task);
1381
1382	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1383				  (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1384				  targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1385
1386	proc_set_task_policy_locked(task, thread, category, flavor, value);
1387
1388	task_policy_update_locked(task, thread, &pend_token);
1389
1390	task_unlock(task);
1391
1392	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1393				  (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1394				  targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1395
1396	task_policy_update_complete_unlocked(task, thread, &pend_token);
1397}
1398
1399/*
1400 * Initiate a task policy state transition on a thread with its TID
1401 * Useful if you cannot guarantee the thread won't get terminated
1402 */
1403void
1404proc_set_task_policy_thread(task_t     task,
1405                            uint64_t   tid,
1406                            int        category,
1407                            int        flavor,
1408                            int        value)
1409{
1410	thread_t thread;
1411	thread_t self = current_thread();
1412	struct task_pend_token pend_token = {};
1413
1414	task_lock(task);
1415
1416	if (tid == TID_NULL || tid == self->thread_id)
1417		thread = self;
1418	else
1419		thread = task_findtid(task, tid);
1420
1421	if (thread == THREAD_NULL) {
1422		task_unlock(task);
1423		return;
1424	}
1425
1426	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1427				  (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1428				  targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1429
1430	proc_set_task_policy_locked(task, thread, category, flavor, value);
1431
1432	task_policy_update_locked(task, thread, &pend_token);
1433
1434	task_unlock(task);
1435
1436	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1437				  (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1438				  targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1439
1440	task_policy_update_complete_unlocked(task, thread, &pend_token);
1441}
1442
1443/*
1444 * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1445 * Same locking rules apply.
1446 */
1447void
1448proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2)
1449{
1450	struct task_pend_token pend_token = {};
1451
1452	task_lock(task);
1453
1454	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1455				  (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1456				  targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0);
1457
1458	proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2);
1459
1460	task_policy_update_locked(task, thread, &pend_token);
1461
1462	task_unlock(task);
1463
1464	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1465				  (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1466				  targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0);
1467
1468	task_policy_update_complete_unlocked(task, thread, &pend_token);
1469}
1470
1471/*
1472 * Set the requested state for a specific flavor to a specific value.
1473 *
1474 *  TODO:
1475 *  Verify that arguments to non iopol things are 1 or 0
1476 */
1477static void
1478proc_set_task_policy_locked(task_t      task,
1479                            thread_t    thread,
1480                            int         category,
1481                            int         flavor,
1482                            int         value)
1483{
1484	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1485
1486	int tier, passive;
1487
1488	struct task_requested_policy requested =
1489	        (on_task) ? task->requested_policy : thread->requested_policy;
1490
1491	switch (flavor) {
1492
1493	/* Category: EXTERNAL and INTERNAL, thread and task */
1494
1495		case TASK_POLICY_DARWIN_BG:
1496			if (category == TASK_POLICY_EXTERNAL)
1497				requested.ext_darwinbg = value;
1498			else
1499				requested.int_darwinbg = value;
1500			break;
1501
1502		case TASK_POLICY_IOPOL:
1503			proc_iopol_to_tier(value, &tier, &passive);
1504			if (category == TASK_POLICY_EXTERNAL) {
1505				requested.ext_iotier  = tier;
1506				requested.ext_iopassive = passive;
1507			} else {
1508				requested.int_iotier  = tier;
1509				requested.int_iopassive = passive;
1510			}
1511			break;
1512
1513		case TASK_POLICY_IO:
1514			if (category == TASK_POLICY_EXTERNAL)
1515				requested.ext_iotier = value;
1516			else
1517				requested.int_iotier = value;
1518			break;
1519
1520		case TASK_POLICY_PASSIVE_IO:
1521			if (category == TASK_POLICY_EXTERNAL)
1522				requested.ext_iopassive = value;
1523			else
1524				requested.int_iopassive = value;
1525			break;
1526
1527	/* Category: INTERNAL, task only */
1528
1529		case TASK_POLICY_DARWIN_BG_IOPOL:
1530			assert(on_task && category == TASK_POLICY_INTERNAL);
1531			proc_iopol_to_tier(value, &tier, &passive);
1532			requested.bg_iotier = tier;
1533			break;
1534
1535	/* Category: ATTRIBUTE, task only */
1536
1537		case TASK_POLICY_TAL:
1538			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1539			requested.t_tal_enabled = value;
1540			break;
1541
1542		case TASK_POLICY_BOOST:
1543			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1544			requested.t_boosted = value;
1545			break;
1546
1547		case TASK_POLICY_ROLE:
1548			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1549			requested.t_role = value;
1550			break;
1551
1552		case TASK_POLICY_TERMINATED:
1553			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1554			requested.terminated = value;
1555			break;
1556		case TASK_BASE_LATENCY_QOS_POLICY:
1557			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1558			requested.t_base_latency_qos = value;
1559			break;
1560		case TASK_BASE_THROUGHPUT_QOS_POLICY:
1561			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1562			requested.t_base_through_qos = value;
1563			break;
1564		case TASK_POLICY_SFI_MANAGED:
1565			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1566			requested.t_sfi_managed = value;
1567			break;
1568
1569	/* Category: ATTRIBUTE, thread only */
1570
1571		case TASK_POLICY_PIDBIND_BG:
1572			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1573			requested.th_pidbind_bg = value;
1574			break;
1575
1576		case TASK_POLICY_WORKQ_BG:
1577			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1578			requested.th_workq_bg = value;
1579			break;
1580
1581		case TASK_POLICY_QOS:
1582			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1583			requested.thrp_qos = value;
1584			break;
1585
1586		case TASK_POLICY_QOS_OVERRIDE:
1587			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1588			requested.thrp_qos_override = value;
1589			break;
1590
1591		default:
1592			panic("unknown task policy: %d %d %d", category, flavor, value);
1593			break;
1594	}
1595
1596	if (on_task)
1597		task->requested_policy = requested;
1598	else
1599		thread->requested_policy = requested;
1600}
1601
1602/*
1603 * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure.
1604 */
1605static void
1606proc_set_task_policy2_locked(task_t      task,
1607                             thread_t    thread,
1608                             int         category,
1609                             int         flavor,
1610                             int         value1,
1611                             int         value2)
1612{
1613	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1614
1615	struct task_requested_policy requested =
1616	        (on_task) ? task->requested_policy : thread->requested_policy;
1617
1618	switch (flavor) {
1619
1620	/* Category: ATTRIBUTE, task only */
1621
1622		case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1623			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1624			requested.t_base_latency_qos = value1;
1625			requested.t_base_through_qos = value2;
1626			break;
1627
1628		case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1629			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1630			requested.t_over_latency_qos = value1;
1631			requested.t_over_through_qos = value2;
1632			break;
1633
1634	/* Category: ATTRIBUTE, thread only */
1635
1636		case TASK_POLICY_QOS_AND_RELPRIO:
1637
1638			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1639			requested.thrp_qos = value1;
1640			requested.thrp_qos_relprio = value2;
1641			DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1642			break;
1643
1644		default:
1645			panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2);
1646			break;
1647	}
1648
1649	if (on_task)
1650		task->requested_policy = requested;
1651	else
1652		thread->requested_policy = requested;
1653}
1654
1655
1656/*
1657 * Gets what you set. Effective values may be different.
1658 */
1659int
1660proc_get_task_policy(task_t     task,
1661                     thread_t   thread,
1662                     int        category,
1663                     int        flavor)
1664{
1665	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1666
1667	int value = 0;
1668
1669	task_lock(task);
1670
1671	struct task_requested_policy requested =
1672	        (on_task) ? task->requested_policy : thread->requested_policy;
1673
1674	switch (flavor) {
1675		case TASK_POLICY_DARWIN_BG:
1676			if (category == TASK_POLICY_EXTERNAL)
1677				value = requested.ext_darwinbg;
1678			else
1679				value = requested.int_darwinbg;
1680			break;
1681		case TASK_POLICY_IOPOL:
1682			if (category == TASK_POLICY_EXTERNAL)
1683				value = proc_tier_to_iopol(requested.ext_iotier,
1684				                            requested.ext_iopassive);
1685			else
1686				value = proc_tier_to_iopol(requested.int_iotier,
1687				                            requested.int_iopassive);
1688			break;
1689		case TASK_POLICY_IO:
1690			if (category == TASK_POLICY_EXTERNAL)
1691				value = requested.ext_iotier;
1692			else
1693				value = requested.int_iotier;
1694			break;
1695		case TASK_POLICY_PASSIVE_IO:
1696			if (category == TASK_POLICY_EXTERNAL)
1697				value = requested.ext_iopassive;
1698			else
1699				value = requested.int_iopassive;
1700			break;
1701		case TASK_POLICY_DARWIN_BG_IOPOL:
1702			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1703			value = proc_tier_to_iopol(requested.bg_iotier, 0);
1704			break;
1705		case TASK_POLICY_ROLE:
1706			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1707			value = requested.t_role;
1708			break;
1709		case TASK_POLICY_SFI_MANAGED:
1710			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1711			value = requested.t_sfi_managed;
1712			break;
1713		case TASK_POLICY_QOS:
1714			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1715			value = requested.thrp_qos;
1716			break;
1717		case TASK_POLICY_QOS_OVERRIDE:
1718			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1719			value = requested.thrp_qos_override;
1720			break;
1721		default:
1722			panic("unknown policy_flavor %d", flavor);
1723			break;
1724	}
1725
1726	task_unlock(task);
1727
1728	return value;
1729}
1730
1731/*
1732 * Variant of proc_get_task_policy() that returns two scalar outputs.
1733 */
1734void
1735proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2)
1736{
1737	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1738
1739	task_lock(task);
1740
1741	struct task_requested_policy requested =
1742	        (on_task) ? task->requested_policy : thread->requested_policy;
1743
1744	switch (flavor) {
1745		/* TASK attributes */
1746		case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1747			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1748			*value1 = requested.t_base_latency_qos;
1749			*value2 = requested.t_base_through_qos;
1750			break;
1751
1752		case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1753			assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1754			*value1 = requested.t_over_latency_qos;
1755			*value2 = requested.t_over_through_qos;
1756			break;
1757
1758		/* THREAD attributes */
1759		case TASK_POLICY_QOS_AND_RELPRIO:
1760			assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1761			*value1 = requested.thrp_qos;
1762			*value2 = requested.thrp_qos_relprio;
1763			break;
1764
1765		default:
1766			panic("unknown policy_flavor %d", flavor);
1767			break;
1768	}
1769
1770	task_unlock(task);
1771}
1772
1773
1774/*
1775 * Functions for querying effective state for relevant subsystems
1776 * ONLY the relevant subsystem should query these.
1777 * NEVER take a value from one of the 'effective' functions and stuff it into a setter.
1778 */
1779
1780int
1781proc_get_effective_task_policy(task_t task, int flavor)
1782{
1783	return proc_get_effective_policy(task, THREAD_NULL, flavor);
1784}
1785
1786int
1787proc_get_effective_thread_policy(thread_t thread, int flavor)
1788{
1789	return proc_get_effective_policy(thread->task, thread, flavor);
1790}
1791
1792/*
1793 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1794 *
1795 * NOTE: This accessor does not take the task lock.
1796 * Notifications of state updates need to be externally synchronized with state queries.
1797 * This routine *MUST* remain interrupt safe, as it is potentially invoked
1798 * within the context of a timer interrupt.  It is also called in KDP context for stackshot.
1799 */
1800static int
1801proc_get_effective_policy(task_t   task,
1802                          thread_t thread,
1803                          int      flavor)
1804{
1805	boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1806	int value = 0;
1807
1808	switch (flavor) {
1809		case TASK_POLICY_DARWIN_BG:
1810			/*
1811			 * This backs the KPI call proc_pidbackgrounded to find
1812			 * out if a pid is backgrounded,
1813			 * as well as proc_get_effective_thread_policy.
1814			 * Its main use is within the timer layer, as well as
1815			 * prioritizing requests to the graphics system.
1816			 * Returns 1 for background mode, 0 for normal mode
1817			 */
1818			if (on_task)
1819				value = task->effective_policy.darwinbg;
1820			else
1821				value = (task->effective_policy.darwinbg ||
1822				          thread->effective_policy.darwinbg) ? 1 : 0;
1823			break;
1824		case TASK_POLICY_IO:
1825			/*
1826			 * The I/O system calls here to find out what throttling tier to apply to an operation.
1827			 * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply
1828			 * a temporary iotier override to make the I/O more aggressive to get the lock
1829			 * owner to release the spinlock.
1830			 */
1831			if (on_task)
1832				value = task->effective_policy.io_tier;
1833			else {
1834				value = MAX(task->effective_policy.io_tier,
1835				             thread->effective_policy.io_tier);
1836				if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1837					value = MIN(value, thread->iotier_override);
1838			}
1839			break;
1840		case TASK_POLICY_PASSIVE_IO:
1841			/*
1842			 * The I/O system calls here to find out whether an operation should be passive.
1843			 * (i.e. not cause operations with lower throttle tiers to be throttled)
1844			 * Returns 1 for passive mode, 0 for normal mode.
1845			 * If a userspace spinlock has applied an override, that I/O should always
1846			 * be passive to avoid self-throttling when the override is removed and lower
1847			 * iotier I/Os are issued.
1848			 */
1849			if (on_task)
1850				value = task->effective_policy.io_passive;
1851			else {
1852				int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier);
1853				boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier);
1854
1855				value = (task->effective_policy.io_passive ||
1856				          thread->effective_policy.io_passive || override_in_effect) ? 1 : 0;
1857			}
1858			break;
1859		case TASK_POLICY_ALL_SOCKETS_BG:
1860			/*
1861			 * do_background_socket() calls this to determine what it should do to the proc's sockets
1862			 * Returns 1 for background mode, 0 for normal mode
1863			 *
1864			 * This consults both thread and task so un-DBGing a thread while the task is BG
1865			 * doesn't get you out of the network throttle.
1866			 */
1867			if (on_task)
1868				value = task->effective_policy.all_sockets_bg;
1869			else
1870				value = (task->effective_policy.all_sockets_bg ||
1871				         thread->effective_policy.all_sockets_bg) ? 1 : 0;
1872			break;
1873		case TASK_POLICY_NEW_SOCKETS_BG:
1874			/*
1875			 * socreate() calls this to determine if it should mark a new socket as background
1876			 * Returns 1 for background mode, 0 for normal mode
1877			 */
1878			if (on_task)
1879				value = task->effective_policy.new_sockets_bg;
1880			else
1881				value = (task->effective_policy.new_sockets_bg ||
1882				          thread->effective_policy.new_sockets_bg) ? 1 : 0;
1883			break;
1884		case TASK_POLICY_LOWPRI_CPU:
1885			/*
1886			 * Returns 1 for low priority cpu mode, 0 for normal mode
1887			 */
1888			if (on_task)
1889				value = task->effective_policy.lowpri_cpu;
1890			else
1891				value = (task->effective_policy.lowpri_cpu ||
1892				          thread->effective_policy.lowpri_cpu) ? 1 : 0;
1893			break;
1894		case TASK_POLICY_SUPPRESSED_CPU:
1895			/*
1896			 * Returns 1 for suppressed cpu mode, 0 for normal mode
1897			 */
1898			assert(on_task);
1899			value = task->effective_policy.t_suppressed_cpu;
1900			break;
1901		case TASK_POLICY_LATENCY_QOS:
1902			/*
1903			 * timer arming calls into here to find out the timer coalescing level
1904			 * Returns a QoS tier (0-6)
1905			 */
1906			if (on_task) {
1907				value = task->effective_policy.t_latency_qos;
1908			} else {
1909				value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos);
1910			}
1911			break;
1912		case TASK_POLICY_THROUGH_QOS:
1913			/*
1914			 * Returns a QoS tier (0-6)
1915			 */
1916			assert(on_task);
1917			value = task->effective_policy.t_through_qos;
1918			break;
1919		case TASK_POLICY_ROLE:
1920			assert(on_task);
1921			value = task->effective_policy.t_role;
1922			break;
1923		case TASK_POLICY_WATCHERS_BG:
1924			assert(on_task);
1925			value = task->effective_policy.t_watchers_bg;
1926			break;
1927		case TASK_POLICY_SFI_MANAGED:
1928			assert(on_task);
1929			value = task->effective_policy.t_sfi_managed;
1930			break;
1931		case TASK_POLICY_QOS:
1932			assert(!on_task);
1933			value = thread->effective_policy.thep_qos;
1934			break;
1935		default:
1936			panic("unknown policy_flavor %d", flavor);
1937			break;
1938	}
1939
1940	return value;
1941}
1942
1943/*
1944 * Convert from IOPOL_* values to throttle tiers.
1945 *
1946 * TODO: Can this be made more compact, like an array lookup
1947 * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
1948 */
1949
1950static void
1951proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
1952{
1953	*passive = 0;
1954	*tier = 0;
1955	switch (iopolicy) {
1956		case IOPOL_IMPORTANT:
1957			*tier = THROTTLE_LEVEL_TIER0;
1958			break;
1959		case IOPOL_PASSIVE:
1960			*tier = THROTTLE_LEVEL_TIER0;
1961			*passive = 1;
1962			break;
1963		case IOPOL_STANDARD:
1964			*tier = THROTTLE_LEVEL_TIER1;
1965			break;
1966		case IOPOL_UTILITY:
1967			*tier = THROTTLE_LEVEL_TIER2;
1968			break;
1969		case IOPOL_THROTTLE:
1970			*tier = THROTTLE_LEVEL_TIER3;
1971			break;
1972		default:
1973			panic("unknown I/O policy %d", iopolicy);
1974			break;
1975	}
1976}
1977
1978static int
1979proc_tier_to_iopol(int tier, int passive)
1980{
1981	if (passive == 1) {
1982		switch (tier) {
1983			case THROTTLE_LEVEL_TIER0:
1984				return IOPOL_PASSIVE;
1985				break;
1986			default:
1987				panic("unknown passive tier %d", tier);
1988				return IOPOL_DEFAULT;
1989				break;
1990		}
1991	} else {
1992		switch (tier) {
1993			case THROTTLE_LEVEL_NONE:
1994			case THROTTLE_LEVEL_TIER0:
1995				return IOPOL_DEFAULT;
1996				break;
1997			case THROTTLE_LEVEL_TIER1:
1998				return IOPOL_STANDARD;
1999				break;
2000			case THROTTLE_LEVEL_TIER2:
2001				return IOPOL_UTILITY;
2002				break;
2003			case THROTTLE_LEVEL_TIER3:
2004				return IOPOL_THROTTLE;
2005				break;
2006			default:
2007				panic("unknown tier %d", tier);
2008				return IOPOL_DEFAULT;
2009				break;
2010		}
2011	}
2012}
2013
2014/* apply internal backgrounding for workqueue threads */
2015int
2016proc_apply_workq_bgthreadpolicy(thread_t thread)
2017{
2018	if (thread == THREAD_NULL)
2019		return ESRCH;
2020
2021	proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2022	                     TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE);
2023
2024	return(0);
2025}
2026
2027/*
2028 * remove internal backgrounding for workqueue threads
2029 * does NOT go find sockets created while BG and unbackground them
2030 */
2031int
2032proc_restore_workq_bgthreadpolicy(thread_t thread)
2033{
2034	if (thread == THREAD_NULL)
2035		return ESRCH;
2036
2037	proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2038	                     TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE);
2039
2040	return(0);
2041}
2042
2043/* here for temporary compatibility */
2044int
2045proc_setthread_saved_importance(__unused thread_t thread, __unused int importance)
2046{
2047	return(0);
2048}
2049
2050/*
2051 * Set an override on the thread which is consulted with a
2052 * higher priority than the task/thread policy. This should
2053 * only be set for temporary grants until the thread
2054 * returns to the userspace boundary
2055 *
2056 * We use atomic operations to swap in the override, with
2057 * the assumption that the thread itself can
2058 * read the override and clear it on return to userspace.
2059 *
2060 * No locking is performed, since it is acceptable to see
2061 * a stale override for one loop through throttle_lowpri_io().
2062 * However a thread reference must be held on the thread.
2063 */
2064
2065void set_thread_iotier_override(thread_t thread, int policy)
2066{
2067	int current_override;
2068
2069	/* Let most aggressive I/O policy win until user boundary */
2070	do {
2071		current_override = thread->iotier_override;
2072
2073		if (current_override != THROTTLE_LEVEL_NONE)
2074			policy = MIN(current_override, policy);
2075
2076		if (current_override == policy) {
2077			/* no effective change */
2078			return;
2079		}
2080	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2081
2082	/*
2083	 * Since the thread may be currently throttled,
2084	 * re-evaluate tiers and potentially break out
2085	 * of an msleep
2086	 */
2087	rethrottle_thread(thread->uthread);
2088}
2089
2090/*
2091 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2092 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2093 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2094 * priority thread. In these cases, we attempt to propagate the priority token, as long
2095 * as the subsystem informs us of the relationships between the threads. The userspace
2096 * synchronization subsystem should maintain the information of owner->resource and
2097 * resource->waiters itself.
2098 *
2099 * The add/remove routines can return failure if the target of the override cannot be
2100 * found, perhaps because the resource subsystem doesn't have an accurate view of the
2101 * resource owner in the face of race conditions.
2102 */
2103
2104boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource)
2105{
2106	thread_t	self = current_thread();
2107	int			resource_count;
2108	struct task_pend_token pend_token = {};
2109
2110	/* XXX move to thread mutex when thread policy does */
2111	task_lock(task);
2112
2113	/*
2114	 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2115	 * to the thread
2116	 */
2117
2118	if (thread != THREAD_NULL) {
2119		assert(task == thread->task);
2120	} else {
2121		if (tid == self->thread_id) {
2122			thread = self;
2123		} else {
2124			thread = task_findtid(task, tid);
2125
2126			if (thread == THREAD_NULL) {
2127				KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2128									  tid, 0, 0xdead, 0, 0);
2129				task_unlock(task);
2130				return FALSE;
2131			}
2132		}
2133	}
2134
2135	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2136						  thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2137
2138	DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos,
2139		uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource);
2140
2141	if (first_override_for_resource) {
2142		resource_count = ++thread->usynch_override_contended_resource_count;
2143	} else {
2144		resource_count = thread->usynch_override_contended_resource_count;
2145	}
2146
2147	struct task_requested_policy requested = thread->requested_policy;
2148
2149	if (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)
2150		requested.thrp_qos_override = override_qos;
2151	else
2152		requested.thrp_qos_override = MAX(requested.thrp_qos_override, override_qos);
2153
2154	thread->requested_policy = requested;
2155
2156	task_policy_update_locked(task, thread, &pend_token);
2157
2158	thread_reference(thread);
2159
2160	task_unlock(task);
2161
2162	task_policy_update_complete_unlocked(task, thread, &pend_token);
2163
2164	DTRACE_BOOST3(qos_add_override_post, uint64_t, requested.thrp_qos_override,
2165		uint64_t, thread->effective_policy.thep_qos, int, resource_count);
2166
2167	thread_deallocate(thread);
2168
2169	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2170						  requested.thrp_qos_override, resource_count, 0, 0, 0);
2171
2172	return TRUE;
2173}
2174
2175boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid)
2176{
2177	thread_t	self = current_thread();
2178	int			resource_count;
2179	struct task_pend_token pend_token = {};
2180
2181	/* XXX move to thread mutex when thread policy does */
2182	task_lock(task);
2183
2184	/*
2185	 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2186	 * to the thread
2187	 */
2188	if (thread != THREAD_NULL) {
2189		assert(task == thread->task);
2190	} else {
2191		if (tid == self->thread_id) {
2192			thread = self;
2193		} else {
2194			thread = task_findtid(task, tid);
2195
2196			if (thread == THREAD_NULL) {
2197				KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2198									  tid, 0, 0xdead, 0, 0);
2199				task_unlock(task);
2200				return FALSE;
2201			}
2202		}
2203	}
2204
2205	resource_count = --thread->usynch_override_contended_resource_count;
2206
2207	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2208						  thread_tid(thread), resource_count, 0, 0, 0);
2209
2210	if (0 == resource_count) {
2211		thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2212
2213		task_policy_update_locked(task, thread, &pend_token);
2214
2215		thread_reference(thread);
2216
2217		task_unlock(task);
2218
2219		task_policy_update_complete_unlocked(task, thread, &pend_token);
2220
2221		thread_deallocate(thread);
2222	} else if (0 > resource_count) {
2223		// panic("usynch_override_contended_resource_count underflow for thread %p", thread);
2224		task_unlock(task);
2225	} else {
2226		task_unlock(task);
2227	}
2228
2229	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2230						  0, 0, 0, 0, 0);
2231
2232	return TRUE;
2233}
2234
2235/* TODO: remove this variable when interactive daemon audit period is over */
2236extern boolean_t ipc_importance_interactive_receiver;
2237
2238/*
2239 * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
2240 *
2241 * TODO: Make this function more table-driven instead of ad-hoc
2242 */
2243void
2244proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp,
2245                          ipc_port_t * portwatch_ports, int portwatch_count)
2246{
2247	struct task_pend_token pend_token = {};
2248
2249	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2250				  (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
2251				  audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2252				  apptype, 0);
2253
2254	switch (apptype) {
2255		case TASK_APPTYPE_APP_TAL:
2256		case TASK_APPTYPE_APP_DEFAULT:
2257			/* Apps become donors via the 'live-donor' flag instead of the static donor flag */
2258			task_importance_mark_donor(task, FALSE);
2259			task_importance_mark_live_donor(task, TRUE);
2260			task_importance_mark_receiver(task, FALSE);
2261			/* Apps are de-nap recievers on desktop for suppression behaviors */
2262			task_importance_mark_denap_receiver(task, TRUE);
2263			break;
2264
2265		case TASK_APPTYPE_DAEMON_INTERACTIVE:
2266			task_importance_mark_donor(task, TRUE);
2267			task_importance_mark_live_donor(task, FALSE);
2268
2269			/*
2270			 * A boot arg controls whether interactive daemons are importance receivers.
2271			 * Normally, they are not.  But for testing their behavior as an adaptive
2272			 * daemon, the boot-arg can be set.
2273			 *
2274			 * TODO: remove this when the interactive daemon audit period is over.
2275			 */
2276			task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
2277			task_importance_mark_denap_receiver(task, FALSE);
2278			break;
2279
2280		case TASK_APPTYPE_DAEMON_STANDARD:
2281			task_importance_mark_donor(task, TRUE);
2282			task_importance_mark_live_donor(task, FALSE);
2283			task_importance_mark_receiver(task, FALSE);
2284			task_importance_mark_denap_receiver(task, FALSE);
2285			break;
2286
2287		case TASK_APPTYPE_DAEMON_ADAPTIVE:
2288			task_importance_mark_donor(task, FALSE);
2289			task_importance_mark_live_donor(task, FALSE);
2290			task_importance_mark_receiver(task, TRUE);
2291			task_importance_mark_denap_receiver(task, FALSE);
2292			break;
2293
2294		case TASK_APPTYPE_DAEMON_BACKGROUND:
2295			task_importance_mark_donor(task, FALSE);
2296			task_importance_mark_live_donor(task, FALSE);
2297			task_importance_mark_receiver(task, FALSE);
2298			task_importance_mark_denap_receiver(task, FALSE);
2299			break;
2300
2301		case TASK_APPTYPE_NONE:
2302			break;
2303	}
2304
2305	if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2306		int portwatch_boosts = 0;
2307
2308		for (int i = 0; i < portwatch_count; i++) {
2309			ipc_port_t port = NULL;
2310
2311			if ((port = portwatch_ports[i]) != NULL) {
2312				int boost = 0;
2313				task_add_importance_watchport(task, port, &boost);
2314				portwatch_boosts += boost;
2315			}
2316		}
2317
2318		if (portwatch_boosts > 0) {
2319			task_importance_hold_internal_assertion(task, portwatch_boosts);
2320		}
2321	}
2322
2323	task_lock(task);
2324
2325	if (apptype == TASK_APPTYPE_APP_TAL) {
2326		/* TAL starts off enabled by default */
2327		task->requested_policy.t_tal_enabled = 1;
2328	}
2329
2330	if (apptype != TASK_APPTYPE_NONE) {
2331		task->requested_policy.t_apptype = apptype;
2332
2333	}
2334
2335	if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2336		task->requested_policy.t_qos_clamp = qos_clamp;
2337	}
2338
2339	task_policy_update_locked(task, THREAD_NULL, &pend_token);
2340
2341	task_unlock(task);
2342
2343	/* Ensure the donor bit is updated to be in sync with the new live donor status */
2344	pend_token.tpt_update_live_donor = 1;
2345
2346	task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
2347
2348	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2349				  (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2350				  audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2351				  task_is_importance_receiver(task), 0);
2352}
2353
2354/* Set up the primordial thread's QoS */
2355void
2356task_set_main_thread_qos(task_t task, thread_t main_thread) {
2357	struct task_pend_token pend_token = {};
2358
2359	assert(main_thread->task == task);
2360
2361	task_lock(task);
2362
2363	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2364	                          (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2365	                          audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2366	                          main_thread->requested_policy.thrp_qos, 0);
2367
2368	int primordial_qos = THREAD_QOS_UNSPECIFIED;
2369
2370	int qos_clamp = task->requested_policy.t_qos_clamp;
2371
2372	switch (task->requested_policy.t_apptype) {
2373		case TASK_APPTYPE_APP_TAL:
2374		case TASK_APPTYPE_APP_DEFAULT:
2375			primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2376			break;
2377
2378		case TASK_APPTYPE_DAEMON_INTERACTIVE:
2379		case TASK_APPTYPE_DAEMON_STANDARD:
2380		case TASK_APPTYPE_DAEMON_ADAPTIVE:
2381			primordial_qos = THREAD_QOS_LEGACY;
2382			break;
2383
2384		case TASK_APPTYPE_DAEMON_BACKGROUND:
2385			primordial_qos = THREAD_QOS_BACKGROUND;
2386			break;
2387	}
2388
2389	if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2390		if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2391			primordial_qos = MIN(qos_clamp, primordial_qos);
2392		} else {
2393			primordial_qos = qos_clamp;
2394		}
2395	}
2396
2397	main_thread->requested_policy.thrp_qos = primordial_qos;
2398
2399	task_policy_update_locked(task, main_thread, &pend_token);
2400
2401	task_unlock(task);
2402
2403	task_policy_update_complete_unlocked(task, main_thread, &pend_token);
2404
2405	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2406	                          (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2407	                          audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2408	                          primordial_qos, 0);
2409}
2410
2411/* for process_policy to check before attempting to set */
2412boolean_t
2413proc_task_is_tal(task_t task)
2414{
2415	return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2416}
2417
2418/* for telemetry */
2419integer_t
2420task_grab_latency_qos(task_t task)
2421{
2422	return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2423}
2424
2425/* update the darwin background action state in the flags field for libproc */
2426int
2427proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2428{
2429	if (task->requested_policy.ext_darwinbg)
2430		*flagsp |= PROC_FLAG_EXT_DARWINBG;
2431
2432	if (task->requested_policy.int_darwinbg)
2433		*flagsp |= PROC_FLAG_DARWINBG;
2434
2435
2436	if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
2437	    task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL)
2438		*flagsp |= PROC_FLAG_APPLICATION;
2439
2440	if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
2441		*flagsp |= PROC_FLAG_ADAPTIVE;
2442
2443	if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1)
2444		*flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2445
2446	if (task_is_importance_donor(task))
2447		*flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2448
2449	if (task->effective_policy.t_sup_active)
2450		*flagsp |= PROC_FLAG_SUPPRESSED;
2451
2452	return(0);
2453}
2454
2455/* All per-thread state is in the first 32-bits of the bitfield */
2456void
2457proc_get_thread_policy(thread_t thread, thread_policy_state_t info)
2458{
2459	task_t task = thread->task;
2460	task_lock(task);
2461	info->requested = (integer_t)task_requested_bitfield(task, thread);
2462	info->effective = (integer_t)task_effective_bitfield(task, thread);
2463	info->pending   = 0;
2464	task_unlock(task);
2465}
2466
2467/*
2468 * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2469 * The current scheme packs as much data into a single tracepoint as it can.
2470 *
2471 * Each task/thread requested/effective structure is 64 bits in size. Any
2472 * given tracepoint will emit either requested or effective data, but not both.
2473 *
2474 * A tracepoint may emit any of task, thread, or task & thread data.
2475 *
2476 * The type of data emitted varies with pointer size. Where possible, both
2477 * task and thread data are emitted. In LP32 systems, the first and second
2478 * halves of either the task or thread data is emitted.
2479 *
2480 * The code uses uintptr_t array indexes instead of high/low to avoid
2481 * confusion WRT big vs little endian.
2482 *
2483 * The truth table for the tracepoint data functions is below, and has the
2484 * following invariants:
2485 *
2486 * 1) task and thread are uintptr_t*
2487 * 2) task may never be NULL
2488 *
2489 *
2490 *                                     LP32            LP64
2491 * trequested_0(task, NULL)            task[0]         task[0]
2492 * trequested_1(task, NULL)            task[1]         NULL
2493 * trequested_0(task, thread)          thread[0]       task[0]
2494 * trequested_1(task, thread)          thread[1]       thread[0]
2495 *
2496 * Basically, you get a full task or thread on LP32, and both on LP64.
2497 *
2498 * The uintptr_t munging here is squicky enough to deserve a comment.
2499 *
2500 * The variables we are accessing are laid out in memory like this:
2501 *
2502 * [            LP64 uintptr_t  0          ]
2503 * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2504 *
2505 *      1   2   3   4     5   6   7   8
2506 *
2507 */
2508
2509static uintptr_t
2510trequested_0(task_t task, thread_t thread)
2511{
2512	assert(task);
2513	_Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2514	_Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2515
2516	uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2517	return raw[0];
2518}
2519
2520static uintptr_t
2521trequested_1(task_t task, thread_t thread)
2522{
2523	assert(task);
2524	_Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2525	_Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2526
2527#if defined __LP64__
2528	return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy;
2529#else
2530	uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2531	return raw[1];
2532#endif
2533}
2534
2535static uintptr_t
2536teffective_0(task_t task, thread_t thread)
2537{
2538	assert(task);
2539	_Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2540	_Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2541
2542	uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2543	return raw[0];
2544}
2545
2546static uintptr_t
2547teffective_1(task_t task, thread_t thread)
2548{
2549	assert(task);
2550	_Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2551	_Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2552
2553#if defined __LP64__
2554	return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy;
2555#else
2556	uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2557	return raw[1];
2558#endif
2559}
2560
2561/* dump pending for tracepoint */
2562static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); }
2563
2564uint64_t
2565task_requested_bitfield(task_t task, thread_t thread)
2566{
2567	uint64_t bits = 0;
2568	struct task_requested_policy requested =
2569	        (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy;
2570
2571	bits |= (requested.int_darwinbg         ? POLICY_REQ_INT_DARWIN_BG  : 0);
2572	bits |= (requested.ext_darwinbg         ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2573	bits |= (requested.int_iotier           ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2574	bits |= (requested.ext_iotier           ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2575	bits |= (requested.int_iopassive        ? POLICY_REQ_INT_PASSIVE_IO : 0);
2576	bits |= (requested.ext_iopassive        ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2577	bits |= (requested.bg_iotier            ? (((uint64_t)requested.bg_iotier)  << POLICY_REQ_BG_IOTIER_SHIFT)   : 0);
2578	bits |= (requested.terminated           ? POLICY_REQ_TERMINATED     : 0);
2579
2580	bits |= (requested.th_pidbind_bg        ? POLICY_REQ_PIDBIND_BG     : 0);
2581	bits |= (requested.th_workq_bg          ? POLICY_REQ_WORKQ_BG       : 0);
2582
2583	if (thread != THREAD_NULL) {
2584		bits |= (requested.thrp_qos     ? (((uint64_t)requested.thrp_qos)   << POLICY_REQ_TH_QOS_SHIFT)  : 0);
2585		bits |= (requested.thrp_qos_override     ? (((uint64_t)requested.thrp_qos_override)   << POLICY_REQ_TH_QOS_OVER_SHIFT)  : 0);
2586	}
2587
2588	bits |= (requested.t_boosted            ? POLICY_REQ_BOOSTED        : 0);
2589	bits |= (requested.t_tal_enabled        ? POLICY_REQ_TAL_ENABLED    : 0);
2590	bits |= (requested.t_apptype            ? (((uint64_t)requested.t_apptype)    << POLICY_REQ_APPTYPE_SHIFT)  : 0);
2591	bits |= (requested.t_role               ? (((uint64_t)requested.t_role)       << POLICY_REQ_ROLE_SHIFT)     : 0);
2592
2593	bits |= (requested.t_sup_active         ? POLICY_REQ_SUP_ACTIVE         : 0);
2594	bits |= (requested.t_sup_lowpri_cpu     ? POLICY_REQ_SUP_LOWPRI_CPU     : 0);
2595	bits |= (requested.t_sup_cpu            ? POLICY_REQ_SUP_CPU            : 0);
2596	bits |= (requested.t_sup_timer          ? (((uint64_t)requested.t_sup_timer)  << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2597	bits |= (requested.t_sup_throughput     ? (((uint64_t)requested.t_sup_throughput)   << POLICY_REQ_SUP_THROUGHPUT_SHIFT)   : 0);
2598	bits |= (requested.t_sup_disk           ? POLICY_REQ_SUP_DISK_THROTTLE  : 0);
2599	bits |= (requested.t_sup_cpu_limit      ? POLICY_REQ_SUP_CPU_LIMIT      : 0);
2600	bits |= (requested.t_sup_suspend        ? POLICY_REQ_SUP_SUSPEND        : 0);
2601	bits |= (requested.t_sup_bg_sockets     ? POLICY_REQ_SUP_BG_SOCKETS     : 0);
2602	bits |= (requested.t_base_latency_qos   ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2603	bits |= (requested.t_over_latency_qos   ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2604	bits |= (requested.t_base_through_qos   ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2605	bits |= (requested.t_over_through_qos   ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2606	bits |= (requested.t_sfi_managed        ? POLICY_REQ_SFI_MANAGED        : 0);
2607	bits |= (requested.t_qos_clamp          ? (((uint64_t)requested.t_qos_clamp)        << POLICY_REQ_QOS_CLAMP_SHIFT)        : 0);
2608
2609	return bits;
2610}
2611
2612uint64_t
2613task_effective_bitfield(task_t task, thread_t thread)
2614{
2615	uint64_t bits = 0;
2616	struct task_effective_policy effective =
2617	        (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy;
2618
2619	bits |= (effective.io_tier              ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2620	bits |= (effective.io_passive           ? POLICY_EFF_IO_PASSIVE     : 0);
2621	bits |= (effective.darwinbg             ? POLICY_EFF_DARWIN_BG      : 0);
2622	bits |= (effective.lowpri_cpu           ? POLICY_EFF_LOWPRI_CPU     : 0);
2623	bits |= (effective.terminated           ? POLICY_EFF_TERMINATED     : 0);
2624	bits |= (effective.all_sockets_bg       ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2625	bits |= (effective.new_sockets_bg       ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2626	bits |= (effective.bg_iotier            ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
2627	bits |= (effective.qos_ui_is_urgent     ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
2628
2629	if (thread != THREAD_NULL)
2630		bits |= (effective.thep_qos     ? (((uint64_t)effective.thep_qos)   << POLICY_EFF_TH_QOS_SHIFT)  : 0);
2631
2632	bits |= (effective.t_tal_engaged        ? POLICY_EFF_TAL_ENGAGED    : 0);
2633	bits |= (effective.t_suspended          ? POLICY_EFF_SUSPENDED      : 0);
2634	bits |= (effective.t_watchers_bg        ? POLICY_EFF_WATCHERS_BG    : 0);
2635	bits |= (effective.t_sup_active         ? POLICY_EFF_SUP_ACTIVE     : 0);
2636	bits |= (effective.t_suppressed_cpu     ? POLICY_EFF_SUP_CPU        : 0);
2637	bits |= (effective.t_role               ? (((uint64_t)effective.t_role)        << POLICY_EFF_ROLE_SHIFT)        : 0);
2638	bits |= (effective.t_latency_qos        ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2639	bits |= (effective.t_through_qos        ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2640	bits |= (effective.t_sfi_managed        ? POLICY_EFF_SFI_MANAGED    : 0);
2641	bits |= (effective.t_qos_ceiling        ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
2642
2643	return bits;
2644}
2645
2646
2647/*
2648 * Resource usage and CPU related routines
2649 */
2650
2651int
2652proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
2653{
2654
2655	int error = 0;
2656	int scope;
2657
2658	task_lock(task);
2659
2660
2661	error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
2662	task_unlock(task);
2663
2664	/*
2665	 * Reverse-map from CPU resource limit scopes back to policies (see comment below).
2666	 */
2667	if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2668		*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
2669	} else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2670		*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
2671	} else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
2672		*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2673	}
2674
2675	return(error);
2676}
2677
2678/*
2679 * Configure the default CPU usage monitor parameters.
2680 *
2681 * For tasks which have this mechanism activated: if any thread in the
2682 * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
2683 */
2684void
2685proc_init_cpumon_params(void)
2686{
2687	if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
2688		sizeof (proc_max_cpumon_percentage))) {
2689	 	proc_max_cpumon_percentage = DEFAULT_CPUMON_PERCENTAGE;
2690	}
2691
2692	if (proc_max_cpumon_percentage > 100) {
2693		proc_max_cpumon_percentage = 100;
2694	}
2695
2696	/* The interval should be specified in seconds. */
2697	if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
2698	 	sizeof (proc_max_cpumon_interval))) {
2699	 	proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
2700	}
2701
2702	proc_max_cpumon_interval *= NSEC_PER_SEC;
2703
2704	/* TEMPORARY boot arg to control App suppression */
2705	PE_parse_boot_argn("task_policy_suppression_disable",
2706			   &task_policy_suppression_disable,
2707			   sizeof(task_policy_suppression_disable));
2708}
2709
2710/*
2711 * Currently supported configurations for CPU limits.
2712 *
2713 * Policy				| Deadline-based CPU limit | Percentage-based CPU limit
2714 * -------------------------------------+--------------------------+------------------------------
2715 * PROC_POLICY_RSRCACT_THROTTLE		| ENOTSUP		   | Task-wide scope only
2716 * PROC_POLICY_RSRCACT_SUSPEND		| Task-wide scope only	   | ENOTSUP
2717 * PROC_POLICY_RSRCACT_TERMINATE	| Task-wide scope only	   | ENOTSUP
2718 * PROC_POLICY_RSRCACT_NOTIFY_KQ	| Task-wide scope only	   | ENOTSUP
2719 * PROC_POLICY_RSRCACT_NOTIFY_EXC	| ENOTSUP		   | Per-thread scope only
2720 *
2721 * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
2722 * after the specified amount of wallclock time has elapsed.
2723 *
2724 * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
2725 * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
2726 * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
2727 * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
2728 *
2729 * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
2730 * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
2731 * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
2732 * but the potential consumer of the API at the time was insisting on wallclock time instead.
2733 *
2734 * Currently, requesting notification via an exception is the only way to get per-thread scope for a
2735 * CPU limit. All other types of notifications force task-wide scope for the limit.
2736 */
2737int
2738proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
2739	int cpumon_entitled)
2740{
2741	int error = 0;
2742	int scope;
2743
2744 	/*
2745 	 * Enforce the matrix of supported configurations for policy, percentage, and deadline.
2746 	 */
2747 	switch (policy) {
2748 	// If no policy is explicitly given, the default is to throttle.
2749 	case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
2750	case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
2751		if (deadline != 0)
2752			return (ENOTSUP);
2753		scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2754		break;
2755	case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
2756	case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
2757	case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
2758		if (percentage != 0)
2759			return (ENOTSUP);
2760		scope = TASK_RUSECPU_FLAGS_DEADLINE;
2761		break;
2762 	case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
2763		if (deadline != 0)
2764			return (ENOTSUP);
2765		scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2766#ifdef CONFIG_NOMONITORS
2767		return (error);
2768#endif /* CONFIG_NOMONITORS */
2769		break;
2770	default:
2771		return (EINVAL);
2772	}
2773
2774	task_lock(task);
2775	if (task != current_task()) {
2776		task->policy_ru_cpu_ext = policy;
2777	} else {
2778		task->policy_ru_cpu = policy;
2779	}
2780	error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
2781	task_unlock(task);
2782	return(error);
2783}
2784
2785int
2786proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
2787{
2788	int error = 0;
2789	int action;
2790	void * bsdinfo = NULL;
2791
2792	task_lock(task);
2793	if (task != current_task()) {
2794		task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2795	} else {
2796		task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2797	}
2798
2799	error = task_clear_cpuusage_locked(task, cpumon_entitled);
2800	if (error != 0)
2801		goto out;
2802
2803	action = task->applied_ru_cpu;
2804	if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2805		/* reset action */
2806		task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2807	}
2808	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2809		bsdinfo = task->bsd_info;
2810		task_unlock(task);
2811		proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2812		goto out1;
2813	}
2814
2815out:
2816	task_unlock(task);
2817out1:
2818	return(error);
2819
2820}
2821
2822/* used to apply resource limit related actions */
2823static int
2824task_apply_resource_actions(task_t task, int type)
2825{
2826	int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2827	void * bsdinfo = NULL;
2828
2829	switch (type) {
2830		case TASK_POLICY_CPU_RESOURCE_USAGE:
2831			break;
2832		case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
2833		case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
2834		case TASK_POLICY_DISK_RESOURCE_USAGE:
2835		case TASK_POLICY_NETWORK_RESOURCE_USAGE:
2836		case TASK_POLICY_POWER_RESOURCE_USAGE:
2837			return(0);
2838
2839		default:
2840			return(1);
2841	};
2842
2843	/* only cpu actions for now */
2844	task_lock(task);
2845
2846	if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2847		/* apply action */
2848		task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
2849		action = task->applied_ru_cpu_ext;
2850	} else {
2851		action = task->applied_ru_cpu_ext;
2852	}
2853
2854	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2855		bsdinfo = task->bsd_info;
2856		task_unlock(task);
2857		proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2858	} else
2859		task_unlock(task);
2860
2861	return(0);
2862}
2863
2864/*
2865 * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
2866 * only allows for one at a time. This means that if there is a per-thread limit active, the other
2867 * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
2868 * to the caller, and prefer that, but there's no need for that at the moment.
2869 */
2870int
2871task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
2872{
2873	*percentagep = 0;
2874	*intervalp = 0;
2875	*deadlinep = 0;
2876
2877	if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
2878		*scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2879		*percentagep = task->rusage_cpu_perthr_percentage;
2880		*intervalp = task->rusage_cpu_perthr_interval;
2881	} else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
2882		*scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2883		*percentagep = task->rusage_cpu_percentage;
2884		*intervalp = task->rusage_cpu_interval;
2885	} else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
2886		*scope = TASK_RUSECPU_FLAGS_DEADLINE;
2887		*deadlinep = task->rusage_cpu_deadline;
2888	} else {
2889		*scope = 0;
2890	}
2891
2892	return(0);
2893}
2894
2895/*
2896 * Disable the CPU usage monitor for the task. Return value indicates
2897 * if the mechanism was actually enabled.
2898 */
2899int
2900task_disable_cpumon(task_t task) {
2901	thread_t thread;
2902
2903	task_lock_assert_owned(task);
2904
2905	if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
2906		return (KERN_INVALID_ARGUMENT);
2907	}
2908
2909#if CONFIG_TELEMETRY
2910	/*
2911	 * Disable task-wide telemetry if it was ever enabled by the CPU usage
2912	 * monitor's warning zone.
2913	 */
2914	telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
2915#endif
2916
2917	/*
2918	 * Disable the monitor for the task, and propagate that change to each thread.
2919	 */
2920	task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
2921	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2922		set_astledger(thread);
2923	}
2924	task->rusage_cpu_perthr_percentage = 0;
2925	task->rusage_cpu_perthr_interval = 0;
2926
2927	return (KERN_SUCCESS);
2928}
2929
2930int
2931task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
2932{
2933	thread_t thread;
2934	uint64_t abstime = 0;
2935	uint64_t limittime = 0;
2936
2937	lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
2938
2939	/* By default, refill once per second */
2940	if (interval == 0)
2941		interval = NSEC_PER_SEC;
2942
2943	if (percentage != 0) {
2944		if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2945			boolean_t warn = FALSE;
2946
2947			/*
2948			 * A per-thread CPU limit on a task generates an exception
2949			 * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
2950			 * exceeds the limit.
2951			 */
2952
2953			if (percentage == TASK_POLICY_CPUMON_DISABLE) {
2954				if (cpumon_entitled) {
2955					task_disable_cpumon(task);
2956					return (0);
2957				}
2958
2959				/*
2960				 * This task wishes to disable the CPU usage monitor, but it's
2961				 * missing the required entitlement:
2962				 *     com.apple.private.kernel.override-cpumon
2963				 *
2964				 * Instead, treat this as a request to reset its params
2965				 * back to the defaults.
2966				 */
2967				warn = TRUE;
2968				percentage = TASK_POLICY_CPUMON_DEFAULTS;
2969			}
2970
2971			if (percentage == TASK_POLICY_CPUMON_DEFAULTS) {
2972				percentage = proc_max_cpumon_percentage;
2973				interval   = proc_max_cpumon_interval;
2974			}
2975
2976			if (percentage > 100) {
2977				percentage = 100;
2978			}
2979
2980			/*
2981			 * Passing in an interval of -1 means either:
2982			 * - Leave the interval as-is, if there's already a per-thread
2983			 *   limit configured
2984			 * - Use the system default.
2985		  	 */
2986			if (interval == -1ULL) {
2987				if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2988			 		interval = task->rusage_cpu_perthr_interval;
2989				} else {
2990					interval = proc_max_cpumon_interval;
2991				}
2992			}
2993
2994			/*
2995			 * Enforce global caps on CPU usage monitor here if the process is not
2996			 * entitled to escape the global caps.
2997			 */
2998			 if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
2999				warn = TRUE;
3000			 	percentage = proc_max_cpumon_percentage;
3001			 }
3002
3003			 if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
3004				warn = TRUE;
3005			 	interval = proc_max_cpumon_interval;
3006			 }
3007
3008			if (warn) {
3009				int 	  pid = 0;
3010				char 	  *procname = (char *)"unknown";
3011
3012#ifdef MACH_BSD
3013				pid = proc_selfpid();
3014				if (current_task()->bsd_info != NULL) {
3015					procname = proc_name_address(current_task()->bsd_info);
3016				}
3017#endif
3018
3019				printf("process %s[%d] denied attempt to escape CPU monitor"
3020					" (missing required entitlement).\n", procname, pid);
3021			}
3022
3023			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3024			task->rusage_cpu_perthr_percentage = percentage;
3025			task->rusage_cpu_perthr_interval = interval;
3026			queue_iterate(&task->threads, thread, thread_t, task_threads) {
3027				set_astledger(thread);
3028			}
3029		} else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3030			/*
3031			 * Currently, a proc-wide CPU limit always blocks if the limit is
3032			 * exceeded (LEDGER_ACTION_BLOCK).
3033			 */
3034			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
3035			task->rusage_cpu_percentage = percentage;
3036			task->rusage_cpu_interval = interval;
3037
3038			limittime = (interval * percentage) / 100;
3039			nanoseconds_to_absolutetime(limittime, &abstime);
3040
3041			ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
3042			ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
3043			ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
3044		}
3045	}
3046
3047	if (deadline != 0) {
3048		assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
3049
3050		/* if already in use, cancel and wait for it to cleanout */
3051		if (task->rusage_cpu_callt != NULL) {
3052			task_unlock(task);
3053			thread_call_cancel_wait(task->rusage_cpu_callt);
3054			task_lock(task);
3055		}
3056		if (task->rusage_cpu_callt == NULL) {
3057			task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
3058		}
3059		/* setup callout */
3060		if (task->rusage_cpu_callt != 0) {
3061			uint64_t save_abstime = 0;
3062
3063			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
3064			task->rusage_cpu_deadline = deadline;
3065
3066			nanoseconds_to_absolutetime(deadline, &abstime);
3067			save_abstime = abstime;
3068			clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
3069			thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
3070		}
3071	}
3072
3073	return(0);
3074}
3075
3076int
3077task_clear_cpuusage(task_t task, int cpumon_entitled)
3078{
3079	int retval = 0;
3080
3081	task_lock(task);
3082	retval = task_clear_cpuusage_locked(task, cpumon_entitled);
3083	task_unlock(task);
3084
3085	return(retval);
3086}
3087
3088int
3089task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
3090{
3091	thread_call_t savecallt;
3092
3093	/* cancel percentage handling if set */
3094	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3095		task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
3096		ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
3097		task->rusage_cpu_percentage = 0;
3098		task->rusage_cpu_interval = 0;
3099	}
3100
3101	/*
3102	 * Disable the CPU usage monitor.
3103	 */
3104	if (cpumon_entitled) {
3105		task_disable_cpumon(task);
3106	}
3107
3108	/* cancel deadline handling if set */
3109	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
3110		task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
3111		if (task->rusage_cpu_callt != 0) {
3112			savecallt = task->rusage_cpu_callt;
3113			task->rusage_cpu_callt = NULL;
3114			task->rusage_cpu_deadline = 0;
3115			task_unlock(task);
3116			thread_call_cancel_wait(savecallt);
3117			thread_call_free(savecallt);
3118			task_lock(task);
3119		}
3120	}
3121	return(0);
3122}
3123
3124/* called by ledger unit to enforce action due to  resource usage criteria being met */
3125void
3126task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
3127{
3128	task_t task = (task_t)param0;
3129	(void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
3130	return;
3131}
3132
3133
3134/*
3135 * Routines for taskwatch and pidbind
3136 */
3137
3138
3139/*
3140 * Routines for importance donation/inheritance/boosting
3141 */
3142
3143static void
3144task_importance_update_live_donor(task_t target_task)
3145{
3146#if IMPORTANCE_INHERITANCE
3147
3148	ipc_importance_task_t task_imp;
3149
3150	task_imp = ipc_importance_for_task(target_task, FALSE);
3151	if (IIT_NULL != task_imp) {
3152		ipc_importance_task_update_live_donor(task_imp);
3153		ipc_importance_task_release(task_imp);
3154	}
3155#endif /* IMPORTANCE_INHERITANCE */
3156}
3157
3158void
3159task_importance_mark_donor(task_t task, boolean_t donating)
3160{
3161#if IMPORTANCE_INHERITANCE
3162	ipc_importance_task_t task_imp;
3163
3164	task_imp = ipc_importance_for_task(task, FALSE);
3165	if (IIT_NULL != task_imp) {
3166		ipc_importance_task_mark_donor(task_imp, donating);
3167		ipc_importance_task_release(task_imp);
3168	}
3169#endif /* IMPORTANCE_INHERITANCE */
3170}
3171
3172void
3173task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3174{
3175#if IMPORTANCE_INHERITANCE
3176	ipc_importance_task_t task_imp;
3177
3178	task_imp = ipc_importance_for_task(task, FALSE);
3179	if (IIT_NULL != task_imp) {
3180		ipc_importance_task_mark_live_donor(task_imp, live_donating);
3181		ipc_importance_task_release(task_imp);
3182	}
3183#endif /* IMPORTANCE_INHERITANCE */
3184}
3185
3186void
3187task_importance_mark_receiver(task_t task, boolean_t receiving)
3188{
3189#if IMPORTANCE_INHERITANCE
3190	ipc_importance_task_t task_imp;
3191
3192	task_imp = ipc_importance_for_task(task, FALSE);
3193	if (IIT_NULL != task_imp) {
3194		ipc_importance_task_mark_receiver(task_imp, receiving);
3195		ipc_importance_task_release(task_imp);
3196	}
3197#endif /* IMPORTANCE_INHERITANCE */
3198}
3199
3200void
3201task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3202{
3203#if IMPORTANCE_INHERITANCE
3204	ipc_importance_task_t task_imp;
3205
3206	task_imp = ipc_importance_for_task(task, FALSE);
3207	if (IIT_NULL != task_imp) {
3208		ipc_importance_task_mark_denap_receiver(task_imp, denap);
3209		ipc_importance_task_release(task_imp);
3210	}
3211#endif /* IMPORTANCE_INHERITANCE */
3212}
3213
3214void
3215task_importance_reset(__imp_only task_t task)
3216{
3217#if IMPORTANCE_INHERITANCE
3218	ipc_importance_task_t task_imp;
3219
3220	/* TODO: Lower importance downstream before disconnect */
3221	task_imp = task->task_imp_base;
3222	ipc_importance_reset(task_imp, FALSE);
3223	task_importance_update_live_donor(task);
3224#endif /* IMPORTANCE_INHERITANCE */
3225}
3226
3227#if IMPORTANCE_INHERITANCE
3228
3229/*
3230 * Sets the task boost bit to the provided value.  Does NOT run the update function.
3231 *
3232 * Task lock must be held.
3233 */
3234void
3235task_set_boost_locked(task_t task, boolean_t boost_active)
3236{
3237#if IMPORTANCE_DEBUG
3238	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3239	                          proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3240#endif
3241
3242	task->requested_policy.t_boosted = boost_active;
3243
3244#if IMPORTANCE_DEBUG
3245	if (boost_active == TRUE){
3246		DTRACE_BOOST2(boost, task_t, task, int, audit_token_pid_from_task(task));
3247	} else {
3248		DTRACE_BOOST2(unboost, task_t, task, int, audit_token_pid_from_task(task));
3249	}
3250	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3251	                          proc_selfpid(), audit_token_pid_from_task(task),
3252	                          trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3253#endif
3254}
3255
3256/*
3257 * Sets the task boost bit to the provided value and applies the update.
3258 *
3259 * Task lock must be held.  Must call update complete after unlocking the task.
3260 */
3261void
3262task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3263{
3264	task_set_boost_locked(task, boost_active);
3265
3266	task_policy_update_locked(task, THREAD_NULL, pend_token);
3267}
3268
3269/*
3270 * Check if this task should donate importance.
3271 *
3272 * May be called without taking the task lock. In that case, donor status can change
3273 * so you must check only once for each donation event.
3274 */
3275boolean_t
3276task_is_importance_donor(task_t task)
3277{
3278	if (task->task_imp_base == IIT_NULL)
3279		return FALSE;
3280	return ipc_importance_task_is_donor(task->task_imp_base);
3281}
3282
3283/*
3284 * Query the status of the task's donor mark.
3285 */
3286boolean_t
3287task_is_marked_importance_donor(task_t task)
3288{
3289	if (task->task_imp_base == IIT_NULL)
3290		return FALSE;
3291	return ipc_importance_task_is_marked_donor(task->task_imp_base);
3292}
3293
3294/*
3295 * Query the status of the task's live donor and donor mark.
3296 */
3297boolean_t
3298task_is_marked_live_importance_donor(task_t task)
3299{
3300	if (task->task_imp_base == IIT_NULL)
3301		return FALSE;
3302	return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3303}
3304
3305
3306/*
3307 * This routine may be called without holding task lock
3308 * since the value of imp_receiver can never be unset.
3309 */
3310boolean_t
3311task_is_importance_receiver(task_t task)
3312{
3313	if (task->task_imp_base == IIT_NULL)
3314		return FALSE;
3315	return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3316}
3317
3318/*
3319 * Query the task's receiver mark.
3320 */
3321boolean_t
3322task_is_marked_importance_receiver(task_t task)
3323{
3324	if (task->task_imp_base == IIT_NULL)
3325		return FALSE;
3326	return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3327}
3328
3329/*
3330 * This routine may be called without holding task lock
3331 * since the value of de-nap receiver can never be unset.
3332 */
3333boolean_t
3334task_is_importance_denap_receiver(task_t task)
3335{
3336	if (task->task_imp_base == IIT_NULL)
3337		return FALSE;
3338	return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3339}
3340
3341/*
3342 * Query the task's de-nap receiver mark.
3343 */
3344boolean_t
3345task_is_marked_importance_denap_receiver(task_t task)
3346{
3347	if (task->task_imp_base == IIT_NULL)
3348		return FALSE;
3349	return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3350}
3351
3352/*
3353 * This routine may be called without holding task lock
3354 * since the value of imp_receiver can never be unset.
3355 */
3356boolean_t
3357task_is_importance_receiver_type(task_t task)
3358{
3359	if (task->task_imp_base == IIT_NULL)
3360		return FALSE;
3361	return (task_is_importance_receiver(task) ||
3362		task_is_importance_denap_receiver(task));
3363}
3364
3365/*
3366 * External importance assertions are managed by the process in userspace
3367 * Internal importance assertions are the responsibility of the kernel
3368 * Assertions are changed from internal to external via task_importance_externalize_assertion
3369 */
3370
3371int
3372task_importance_hold_watchport_assertion(task_t target_task, uint32_t count)
3373{
3374	ipc_importance_task_t task_imp;
3375	kern_return_t ret;
3376
3377	/* must already have set up an importance */
3378	task_imp = target_task->task_imp_base;
3379	assert(IIT_NULL != task_imp);
3380
3381	ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3382	return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3383}
3384
3385int
3386task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3387{
3388	ipc_importance_task_t task_imp;
3389	kern_return_t ret;
3390
3391	/* may be first time, so allow for possible importance setup */
3392	task_imp = ipc_importance_for_task(target_task, FALSE);
3393	if (IIT_NULL == task_imp) {
3394		return EOVERFLOW;
3395	}
3396	ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3397	ipc_importance_task_release(task_imp);
3398
3399	return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3400}
3401
3402int
3403task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3404{
3405	ipc_importance_task_t task_imp;
3406	kern_return_t ret;
3407
3408	/* may be first time, so allow for possible importance setup */
3409	task_imp = ipc_importance_for_task(target_task, FALSE);
3410	if (IIT_NULL == task_imp) {
3411		return EOVERFLOW;
3412	}
3413	ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3414	ipc_importance_task_release(task_imp);
3415
3416	return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3417}
3418
3419int
3420task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3421{
3422	ipc_importance_task_t task_imp;
3423	kern_return_t ret;
3424
3425	/* must already have set up an importance */
3426	task_imp = target_task->task_imp_base;
3427	if (IIT_NULL == task_imp) {
3428		return EOVERFLOW;
3429	}
3430	ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3431	return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3432}
3433
3434int
3435task_importance_drop_internal_assertion(task_t target_task, uint32_t count)
3436{
3437	ipc_importance_task_t task_imp;
3438	kern_return_t ret;
3439
3440	/* must already have set up an importance */
3441	task_imp = target_task->task_imp_base;
3442	if (IIT_NULL == task_imp) {
3443		return EOVERFLOW;
3444	}
3445	ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count);
3446	return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3447}
3448
3449int
3450task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3451{
3452	ipc_importance_task_t task_imp;
3453	kern_return_t ret;
3454
3455	/* must already have set up an importance */
3456	task_imp = target_task->task_imp_base;
3457	if (IIT_NULL == task_imp) {
3458		return EOVERFLOW;
3459	}
3460	ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3461	return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3462}
3463
3464int
3465task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3466{
3467	ipc_importance_task_t task_imp;
3468	kern_return_t ret;
3469
3470	/* must already have set up an importance */
3471	task_imp = target_task->task_imp_base;
3472	if (IIT_NULL == task_imp) {
3473		return EOVERFLOW;
3474	}
3475	ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3476	return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3477}
3478
3479static void
3480task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3481{
3482	int boost = 0;
3483
3484	__impdebug_only int released_pid = 0;
3485	__impdebug_only int pid = audit_token_pid_from_task(task);
3486
3487	ipc_importance_task_t release_imp_task = IIT_NULL;
3488
3489	if (IP_VALID(port) != 0) {
3490		ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3491
3492		ip_lock(port);
3493
3494		/*
3495		 * The port must have been marked tempowner already.
3496		 * This also filters out ports whose receive rights
3497		 * are already enqueued in a message, as you can't
3498		 * change the right's destination once it's already
3499		 * on its way.
3500		 */
3501		if (port->ip_tempowner != 0) {
3502			assert(port->ip_impdonation != 0);
3503
3504			boost = port->ip_impcount;
3505			if (IIT_NULL != port->ip_imp_task) {
3506				/*
3507				 * if this port is already bound to a task,
3508				 * release the task reference and drop any
3509				 * watchport-forwarded boosts
3510				 */
3511				release_imp_task = port->ip_imp_task;
3512				port->ip_imp_task = IIT_NULL;
3513			}
3514
3515			/* mark the port is watching another task (reference held in port->ip_imp_task) */
3516			if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3517				port->ip_imp_task = new_imp_task;
3518				new_imp_task = IIT_NULL;
3519			}
3520		}
3521		ip_unlock(port);
3522
3523		if (IIT_NULL != new_imp_task) {
3524			ipc_importance_task_release(new_imp_task);
3525		}
3526
3527		if (IIT_NULL != release_imp_task) {
3528			if (boost > 0)
3529				ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3530
3531			// released_pid = audit_token_pid_from_task(release_imp_task); /* TODO: Need ref-safe way to get pid */
3532			ipc_importance_task_release(release_imp_task);
3533		}
3534#if IMPORTANCE_DEBUG
3535		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3536		        proc_selfpid(), pid, boost, released_pid, 0);
3537#endif /* IMPORTANCE_DEBUG */
3538	}
3539
3540	*boostp = boost;
3541	return;
3542}
3543
3544#endif /* IMPORTANCE_INHERITANCE */
3545
3546/*
3547 * Routines for VM to query task importance
3548 */
3549
3550
3551/*
3552 * Order to be considered while estimating importance
3553 * for low memory notification and purging purgeable memory.
3554 */
3555#define TASK_IMPORTANCE_FOREGROUND     4
3556#define TASK_IMPORTANCE_NOTDARWINBG    1
3557
3558
3559/*
3560 * Checks if the task is already notified.
3561 *
3562 * Condition: task lock should be held while calling this function.
3563 */
3564boolean_t
3565task_has_been_notified(task_t task, int pressurelevel)
3566{
3567	if (task == NULL) {
3568		return FALSE;
3569	}
3570
3571	if (pressurelevel == kVMPressureWarning)
3572		return (task->low_mem_notified_warn ? TRUE : FALSE);
3573	else if (pressurelevel == kVMPressureCritical)
3574		return (task->low_mem_notified_critical ? TRUE : FALSE);
3575	else
3576		return TRUE;
3577}
3578
3579
3580/*
3581 * Checks if the task is used for purging.
3582 *
3583 * Condition: task lock should be held while calling this function.
3584 */
3585boolean_t
3586task_used_for_purging(task_t task, int pressurelevel)
3587{
3588	if (task == NULL) {
3589		return FALSE;
3590	}
3591
3592	if (pressurelevel == kVMPressureWarning)
3593		return (task->purged_memory_warn ? TRUE : FALSE);
3594	else if (pressurelevel == kVMPressureCritical)
3595		return (task->purged_memory_critical ? TRUE : FALSE);
3596	else
3597		return TRUE;
3598}
3599
3600
3601/*
3602 * Mark the task as notified with memory notification.
3603 *
3604 * Condition: task lock should be held while calling this function.
3605 */
3606void
3607task_mark_has_been_notified(task_t task, int pressurelevel)
3608{
3609	if (task == NULL) {
3610		return;
3611	}
3612
3613	if (pressurelevel == kVMPressureWarning)
3614		task->low_mem_notified_warn = 1;
3615	else if (pressurelevel == kVMPressureCritical)
3616		task->low_mem_notified_critical = 1;
3617}
3618
3619
3620/*
3621 * Mark the task as purged.
3622 *
3623 * Condition: task lock should be held while calling this function.
3624 */
3625void
3626task_mark_used_for_purging(task_t task, int pressurelevel)
3627{
3628	if (task == NULL) {
3629		return;
3630	}
3631
3632	if (pressurelevel == kVMPressureWarning)
3633		task->purged_memory_warn = 1;
3634	else if (pressurelevel == kVMPressureCritical)
3635		task->purged_memory_critical = 1;
3636}
3637
3638
3639/*
3640 * Mark the task eligible for low memory notification.
3641 *
3642 * Condition: task lock should be held while calling this function.
3643 */
3644void
3645task_clear_has_been_notified(task_t task, int pressurelevel)
3646{
3647	if (task == NULL) {
3648		return;
3649	}
3650
3651	if (pressurelevel == kVMPressureWarning)
3652		task->low_mem_notified_warn = 0;
3653	else if (pressurelevel == kVMPressureCritical)
3654		task->low_mem_notified_critical = 0;
3655}
3656
3657
3658/*
3659 * Mark the task eligible for purging its purgeable memory.
3660 *
3661 * Condition: task lock should be held while calling this function.
3662 */
3663void
3664task_clear_used_for_purging(task_t task)
3665{
3666	if (task == NULL) {
3667		return;
3668	}
3669
3670	task->purged_memory_warn = 0;
3671	task->purged_memory_critical = 0;
3672}
3673
3674
3675/*
3676 * Estimate task importance for purging its purgeable memory
3677 * and low memory notification.
3678 *
3679 * Importance is calculated in the following order of criteria:
3680 * -Task role : Background vs Foreground
3681 * -Boost status: Not boosted vs Boosted
3682 * -Darwin BG status.
3683 *
3684 * Returns: Estimated task importance. Less important task will have lower
3685 *          estimated importance.
3686 */
3687int
3688task_importance_estimate(task_t task)
3689{
3690	int task_importance = 0;
3691
3692	if (task == NULL) {
3693		return 0;
3694	}
3695
3696	if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION)
3697			task_importance += TASK_IMPORTANCE_FOREGROUND;
3698
3699	if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0)
3700			task_importance += TASK_IMPORTANCE_NOTDARWINBG;
3701
3702	return task_importance;
3703}
3704
3705