1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/thread_act_server.h>
31
32#include <kern/kern_types.h>
33#include <kern/processor.h>
34#include <kern/thread.h>
35#include <kern/affinity.h>
36#include <mach/task_policy.h>
37#include <kern/sfi.h>
38
39#include <mach/machine/sdt.h>
40
41#define QOS_EXTRACT(q)        ((q) & 0xff)
42
43/*
44 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
45 * to threads that don't have a QoS class set.
46 */
47const qos_policy_params_t thread_qos_policy_params = {
48	/*
49	 * This table defines the starting base priority of the thread,
50	 * which will be modified by the thread importance and the task max priority
51	 * before being applied.
52	 */
53	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
54	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
55	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
56	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
57	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
58	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
59	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
60
61	/*
62	 * This table defines the highest IO priority that a thread marked with this
63	 * QoS class can have.
64	 */
65	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
66	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
67	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
68	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
69	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
70	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
71	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
72
73	/*
74	 * This table defines the highest QoS level that
75	 * a thread marked with this QoS class can have.
76	 */
77
78	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
79	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
80	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
81	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
82	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
83	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
84	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
85
86	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
87	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
88	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
89	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
90	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
91	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
92	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
93};
94
95void
96thread_recompute_qos(thread_t thread);
97
98static void
99thread_recompute_priority(
100	thread_t		thread);
101
102static void
103thread_set_user_sched_mode(thread_t thread, sched_mode_t mode);
104
105static int
106thread_qos_scaled_relative_priority(int qos, int qos_relprio);
107
108
109extern void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
110
111boolean_t
112thread_has_qos_policy(thread_t thread) {
113	return (proc_get_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
114}
115
116kern_return_t
117thread_remove_qos_policy(thread_t thread)
118{
119	thread_qos_policy_data_t unspec_qos;
120	unspec_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
121	unspec_qos.tier_importance = 0;
122
123	__unused int prev_qos = thread->requested_policy.thrp_qos;
124
125	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
126
127	return thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&unspec_qos, THREAD_QOS_POLICY_COUNT);
128}
129
130boolean_t
131thread_is_static_param(thread_t thread)
132{
133	if (thread->static_param) {
134		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
135		return TRUE;
136	}
137	return FALSE;
138}
139
140/*
141 * Relative priorities can range between 0REL and -15REL. These
142 * map to QoS-specific ranges, to create non-overlapping priority
143 * ranges.
144 */
145static int
146thread_qos_scaled_relative_priority(int qos, int qos_relprio)
147{
148	int next_lower_qos;
149
150	/* Fast path, since no validation or scaling is needed */
151	if (qos_relprio == 0) return 0;
152
153	switch (qos) {
154		case THREAD_QOS_USER_INTERACTIVE:
155			next_lower_qos = THREAD_QOS_USER_INITIATED;
156			break;
157		case THREAD_QOS_USER_INITIATED:
158			next_lower_qos = THREAD_QOS_LEGACY;
159			break;
160		case THREAD_QOS_LEGACY:
161			next_lower_qos = THREAD_QOS_UTILITY;
162			break;
163		case THREAD_QOS_UTILITY:
164			next_lower_qos = THREAD_QOS_BACKGROUND;
165			break;
166		case THREAD_QOS_MAINTENANCE:
167		case THREAD_QOS_BACKGROUND:
168			next_lower_qos = 0;
169			break;
170		default:
171			panic("Unrecognized QoS %d", qos);
172			return 0;
173	}
174
175	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
176	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
177
178	/*
179	 * We now have the valid range that the scaled relative priority can map to. Note
180	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
181	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
182	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
183	 * remainder.
184	 */
185	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
186
187	return scaled_relprio;
188}
189
190/*
191 * flag set by -qos-policy-allow boot-arg to allow
192 * testing thread qos policy from userspace
193 */
194boolean_t allow_qos_policy_set = FALSE;
195
196kern_return_t
197thread_policy_set(
198	thread_t				thread,
199	thread_policy_flavor_t	flavor,
200	thread_policy_t			policy_info,
201	mach_msg_type_number_t	count)
202{
203	thread_qos_policy_data_t req_qos;
204	kern_return_t kr;
205
206	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
207
208	if (thread == THREAD_NULL)
209		return (KERN_INVALID_ARGUMENT);
210
211	if (allow_qos_policy_set == FALSE) {
212		if (thread_is_static_param(thread))
213			return (KERN_POLICY_STATIC);
214
215		if (flavor == THREAD_QOS_POLICY || flavor == THREAD_QOS_POLICY_OVERRIDE)
216			return (KERN_INVALID_ARGUMENT);
217	}
218
219	/* Threads without static_param set reset their QoS when other policies are applied. */
220	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
221		/* Store the existing tier, if we fail this call it is used to reset back. */
222		req_qos.qos_tier = thread->requested_policy.thrp_qos;
223		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
224
225		kr = thread_remove_qos_policy(thread);
226		if (kr != KERN_SUCCESS) {
227			return kr;
228		}
229	}
230
231	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
232
233	/* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
234	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
235		if (kr != KERN_SUCCESS) {
236			/* Reset back to our original tier as the set failed. */
237			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
238		}
239	}
240
241	return kr;
242}
243
244kern_return_t
245thread_policy_set_internal(
246	thread_t				thread,
247	thread_policy_flavor_t	flavor,
248	thread_policy_t			policy_info,
249	mach_msg_type_number_t	count)
250{
251	kern_return_t			result = KERN_SUCCESS;
252	spl_t					s;
253
254	thread_mtx_lock(thread);
255	if (!thread->active) {
256		thread_mtx_unlock(thread);
257
258		return (KERN_TERMINATED);
259	}
260
261	switch (flavor) {
262
263	case THREAD_EXTENDED_POLICY:
264	{
265		boolean_t				timeshare = TRUE;
266
267		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
268			thread_extended_policy_t	info;
269
270			info = (thread_extended_policy_t)policy_info;
271			timeshare = info->timeshare;
272		}
273
274		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
275
276		s = splsched();
277		thread_lock(thread);
278
279		boolean_t removed = thread_run_queue_remove(thread);
280
281		thread_set_user_sched_mode(thread, mode);
282		thread_recompute_priority(thread);
283
284		if (removed)
285			thread_setrun(thread, SCHED_TAILQ);
286
287		thread_unlock(thread);
288		splx(s);
289
290		sfi_reevaluate(thread);
291
292		break;
293	}
294
295	case THREAD_TIME_CONSTRAINT_POLICY:
296	{
297		thread_time_constraint_policy_t		info;
298
299		if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
300			result = KERN_INVALID_ARGUMENT;
301			break;
302		}
303
304		info = (thread_time_constraint_policy_t)policy_info;
305		if (	info->constraint < info->computation	||
306				info->computation > max_rt_quantum		||
307				info->computation < min_rt_quantum		) {
308			result = KERN_INVALID_ARGUMENT;
309			break;
310		}
311
312		s = splsched();
313		thread_lock(thread);
314
315		boolean_t removed = thread_run_queue_remove(thread);
316
317		thread->realtime.period = info->period;
318		thread->realtime.computation = info->computation;
319		thread->realtime.constraint = info->constraint;
320		thread->realtime.preemptible = info->preemptible;
321
322		thread_set_user_sched_mode(thread, TH_MODE_REALTIME);
323		thread_recompute_priority(thread);
324
325		if (removed)
326			thread_setrun(thread, SCHED_TAILQ);
327
328		thread_unlock(thread);
329		splx(s);
330
331		sfi_reevaluate(thread);
332
333		break;
334	}
335
336	case THREAD_PRECEDENCE_POLICY:
337	{
338		thread_precedence_policy_t		info;
339
340		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
341			result = KERN_INVALID_ARGUMENT;
342			break;
343		}
344		info = (thread_precedence_policy_t)policy_info;
345
346		s = splsched();
347		thread_lock(thread);
348
349		thread->importance = info->importance;
350
351		thread_recompute_priority(thread);
352
353		thread_unlock(thread);
354		splx(s);
355
356		break;
357	}
358
359	case THREAD_AFFINITY_POLICY:
360	{
361		thread_affinity_policy_t	info;
362
363		if (!thread_affinity_is_supported()) {
364			result = KERN_NOT_SUPPORTED;
365			break;
366		}
367		if (count < THREAD_AFFINITY_POLICY_COUNT) {
368			result = KERN_INVALID_ARGUMENT;
369			break;
370		}
371
372		info = (thread_affinity_policy_t) policy_info;
373		/*
374		 * Unlock the thread mutex here and
375		 * return directly after calling thread_affinity_set().
376		 * This is necessary for correct lock ordering because
377		 * thread_affinity_set() takes the task lock.
378		 */
379		thread_mtx_unlock(thread);
380		return thread_affinity_set(thread, info->affinity_tag);
381	}
382
383	case THREAD_THROUGHPUT_QOS_POLICY:
384	{
385		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
386		int tqos;
387
388		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
389			result = KERN_INVALID_ARGUMENT;
390			break;
391		}
392
393		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) !=
394		    KERN_SUCCESS) {
395			break;
396		}
397
398		tqos = qos_extract(info->thread_throughput_qos_tier);
399		thread->effective_policy.t_through_qos = tqos;
400	}
401		break;
402
403	case THREAD_LATENCY_QOS_POLICY:
404	{
405		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
406		int lqos;
407
408		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
409			result = KERN_INVALID_ARGUMENT;
410			break;
411		}
412
413		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) !=
414		    KERN_SUCCESS) {
415			break;
416		}
417
418		lqos = qos_extract(info->thread_latency_qos_tier);
419/* The expected use cases (opt-in) of per-thread latency QoS would seem to
420 * preclude any requirement at present to re-evaluate timers on a thread level
421 * latency QoS change.
422 */
423		thread->effective_policy.t_latency_qos = lqos;
424
425	}
426		break;
427
428	case THREAD_QOS_POLICY:
429	case THREAD_QOS_POLICY_OVERRIDE:
430	{
431		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
432
433		if (count < THREAD_QOS_POLICY_COUNT) {
434			result = KERN_INVALID_ARGUMENT;
435			break;
436		}
437
438		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
439			result = KERN_INVALID_ARGUMENT;
440			break;
441		}
442
443		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
444			result = KERN_INVALID_ARGUMENT;
445			break;
446		}
447
448		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
449			result = KERN_INVALID_ARGUMENT;
450			break;
451		}
452
453		/*
454		 * Going into task policy requires the task mutex,
455		 * because of the way synchronization against the IO policy
456		 * subsystem works.
457		 *
458		 * We need to move thread policy to the thread mutex instead.
459		 * <rdar://problem/15831652> separate thread policy from task policy
460		 */
461
462		if (flavor == THREAD_QOS_POLICY_OVERRIDE) {
463			int strongest_override = info->qos_tier;
464
465			if (info->qos_tier != THREAD_QOS_UNSPECIFIED &&
466			    thread->requested_policy.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
467				strongest_override = MAX(thread->requested_policy.thrp_qos_override, info->qos_tier);
468
469			thread_mtx_unlock(thread);
470
471			/* There is a race here. To be closed in <rdar://problem/15831652> separate thread policy from task policy */
472
473			proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, strongest_override);
474
475			return (result);
476		}
477
478		thread_mtx_unlock(thread);
479
480		proc_set_task_policy2(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, info->qos_tier, -info->tier_importance);
481
482		thread_mtx_lock(thread);
483		if (!thread->active) {
484			thread_mtx_unlock(thread);
485			return (KERN_TERMINATED);
486		}
487
488		break;
489	}
490
491	default:
492		result = KERN_INVALID_ARGUMENT;
493		break;
494	}
495
496	thread_mtx_unlock(thread);
497	return (result);
498}
499
500/*
501 * thread_set_mode_and_absolute_pri:
502 *
503 * Set scheduling policy & absolute priority for thread, for deprecated
504 * thread_set_policy and thread_policy interfaces.
505 *
506 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
507 * Both result in FIXED mode scheduling.
508 *
509 * Called with thread mutex locked.
510 */
511kern_return_t
512thread_set_mode_and_absolute_pri(
513	thread_t		thread,
514	integer_t		policy,
515	integer_t		priority)
516{
517	spl_t s;
518	sched_mode_t mode;
519	kern_return_t kr = KERN_SUCCESS;
520
521	if (thread_is_static_param(thread))
522		return (KERN_POLICY_STATIC);
523
524	if (thread->policy_reset)
525		return (KERN_SUCCESS);
526
527	/* Setting legacy policies on threads kills the current QoS */
528	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
529		thread_mtx_unlock(thread);
530
531		kr = thread_remove_qos_policy(thread);
532
533		thread_mtx_lock(thread);
534		if (!thread->active) {
535			return (KERN_TERMINATED);
536		}
537	}
538
539	switch (policy) {
540		case POLICY_TIMESHARE:
541			mode = TH_MODE_TIMESHARE;
542			break;
543		case POLICY_RR:
544		case POLICY_FIFO:
545			mode = TH_MODE_FIXED;
546			break;
547		default:
548			panic("unexpected sched policy: %d", policy);
549			break;
550	}
551
552	s = splsched();
553	thread_lock(thread);
554
555	/* This path isn't allowed to change a thread out of realtime. */
556	if ((thread->sched_mode != TH_MODE_REALTIME) &&
557	    (thread->saved_mode != TH_MODE_REALTIME)) {
558
559		/*
560		 * Reverse engineer and apply the correct importance value
561		 * from the requested absolute priority value.
562		 */
563
564		if (priority >= thread->max_priority)
565			priority = thread->max_priority - thread->task_priority;
566		else if (priority >= MINPRI_KERNEL)
567			priority -=  MINPRI_KERNEL;
568		else if (priority >= MINPRI_RESERVED)
569			priority -=  MINPRI_RESERVED;
570		else
571			priority -= BASEPRI_DEFAULT;
572
573		priority += thread->task_priority;
574
575		if (priority > thread->max_priority)
576			priority = thread->max_priority;
577		else if (priority < MINPRI)
578			priority = MINPRI;
579
580		thread->importance = priority - thread->task_priority;
581
582		boolean_t removed = thread_run_queue_remove(thread);
583
584		thread_set_user_sched_mode(thread, mode);
585
586		thread_recompute_priority(thread);
587
588		if (removed)
589			thread_setrun(thread, SCHED_TAILQ);
590	}
591
592	thread_unlock(thread);
593	splx(s);
594
595	sfi_reevaluate(thread);
596
597	return (kr);
598}
599
600/*
601 * Set the thread's requested mode
602 * Called with thread mutex and thread locked
603 */
604static void
605thread_set_user_sched_mode(thread_t thread, sched_mode_t mode)
606{
607	if (thread->policy_reset)
608		return;
609
610	/*
611	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
612	 * That way there's zero confusion over which the user wants
613	 * and which the kernel wants.
614	 */
615	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
616		thread->saved_mode = mode;
617	else
618		sched_set_thread_mode(thread, mode);
619}
620
621/* called with task lock locked */
622void
623thread_recompute_qos(thread_t thread) {
624	spl_t s;
625
626	thread_mtx_lock(thread);
627
628	if (!thread->active) {
629		thread_mtx_unlock(thread);
630		return;
631	}
632
633	s = splsched();
634	thread_lock(thread);
635
636	thread_recompute_priority(thread);
637
638	thread_unlock(thread);
639	splx(s);
640
641	thread_mtx_unlock(thread);
642}
643
644/* called with task lock locked and thread_mtx_lock locked */
645void
646thread_update_qos_cpu_time(thread_t thread, boolean_t lock_needed)
647{
648	uint64_t last_qos_change_balance;
649	ledger_amount_t thread_balance_credit;
650	ledger_amount_t thread_balance_debit;
651	ledger_amount_t effective_qos_time;
652	uint64_t ctime;
653	uint64_t remainder = 0, consumed = 0;
654	processor_t		processor;
655	spl_t s;
656	kern_return_t kr;
657
658	if (lock_needed) {
659		s = splsched();
660		thread_lock(thread);
661	}
662
663	/*
664	 * Calculation of time elapsed by the thread in the current qos.
665	 * Following is the timeline which shows all the variables used in the calculation below.
666	 *
667	 *       thread ledger      thread ledger
668	 *      cpu_time_last_qos     cpu_time
669	 *              |                |<-   consumed  ->|<- remainder  ->|
670	 * timeline  ----------------------------------------------------------->
671	 *                               |                 |                |
672	 *                            thread_dispatch    ctime           quantum end
673	 *
674	 *              |<-----  effective qos time  ----->|
675	 */
676
677	/*
678	 * Calculate time elapsed since last qos change on this thread.
679	 * For cpu time on thread ledger, do not use ledger_get_balance,
680	 * only use credit field of ledger, since
681	 * debit is used by per thread cpu limits and is not zero.
682	 */
683	kr = ledger_get_entries(thread->t_threadledger, thread_ledgers.cpu_time, &thread_balance_credit, &thread_balance_debit);
684	if (kr != KERN_SUCCESS)
685		goto out;
686	last_qos_change_balance = thread->cpu_time_last_qos;
687
688	/*
689	 * If thread running on CPU, calculate time elapsed since this thread was last dispatched on cpu.
690	 * The thread ledger is only updated at context switch, the time since last context swicth is not
691	 * updated in the thread ledger cpu time.
692	 */
693	processor = thread->last_processor;
694	if ((processor != PROCESSOR_NULL) && (processor->state == PROCESSOR_RUNNING) &&
695		   (processor->active_thread == thread)) {
696		ctime = mach_absolute_time();
697
698		if (processor->quantum_end > ctime)
699			remainder = processor->quantum_end - ctime;
700
701		consumed = thread->quantum_remaining - remainder;
702	}
703	/*
704	 * There can be multiple qos change in a quantum and in that case the cpu_time_last_qos will
705	 * lie between cpu_time marker and ctime marker shown below. The output of
706	 * thread_balance - last_qos_change_balance will be negative in such case, but overall outcome
707	 * when consumed is added to it would be positive.
708	 *
709	 *          thread ledger
710	 *            cpu_time
711	 *               |<------------  consumed    --------->|<- remainder  ->|
712	 * timeline  ----------------------------------------------------------->
713	 *               |              |                      |                |
714	 *         thread_dispatch  thread ledger            ctime           quantum end
715	 *                          cpu_time_last_qos
716	 *
717	 *                              |<-effective qos time->|
718	 */
719	effective_qos_time = (ledger_amount_t) consumed;
720	effective_qos_time += thread_balance_credit - last_qos_change_balance;
721
722	if (lock_needed) {
723		thread_unlock(thread);
724		splx(s);
725	}
726
727	if (effective_qos_time < 0)
728		return;
729
730	thread->cpu_time_last_qos += (uint64_t)effective_qos_time;
731
732	/*
733	 * Update the task-level qos stats. Its safe to perform operations on these fields, since we
734	 * hold the task lock.
735	 */
736	switch (thread->effective_policy.thep_qos) {
737
738	case THREAD_QOS_DEFAULT:
739		thread->task->cpu_time_qos_stats.cpu_time_qos_default += effective_qos_time;
740		break;
741
742	case THREAD_QOS_MAINTENANCE:
743		thread->task->cpu_time_qos_stats.cpu_time_qos_maintenance += effective_qos_time;
744		break;
745
746	case THREAD_QOS_BACKGROUND:
747		thread->task->cpu_time_qos_stats.cpu_time_qos_background += effective_qos_time;
748		break;
749
750	case THREAD_QOS_UTILITY:
751		thread->task->cpu_time_qos_stats.cpu_time_qos_utility += effective_qos_time;
752		break;
753
754	case THREAD_QOS_LEGACY:
755		thread->task->cpu_time_qos_stats.cpu_time_qos_legacy += effective_qos_time;
756		break;
757
758	case THREAD_QOS_USER_INITIATED:
759		thread->task->cpu_time_qos_stats.cpu_time_qos_user_initiated += effective_qos_time;
760		break;
761
762	case THREAD_QOS_USER_INTERACTIVE:
763		thread->task->cpu_time_qos_stats.cpu_time_qos_user_interactive += effective_qos_time;
764		break;
765	}
766
767	return;
768
769out:
770	if (lock_needed) {
771		thread_unlock(thread);
772		splx(s);
773	}
774}
775
776/*
777 * Calculate base priority from thread attributes, and set it on the thread
778 *
779 * Called with thread_lock and thread mutex held.
780 */
781static void
782thread_recompute_priority(
783	thread_t		thread)
784{
785	integer_t		priority;
786
787	if (thread->policy_reset)
788		return;
789
790	if (thread->sched_mode == TH_MODE_REALTIME) {
791		sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
792		return;
793	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
794		int qos = thread->effective_policy.thep_qos;
795		int qos_ui_is_urgent = thread->effective_policy.qos_ui_is_urgent;
796		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
797		int qos_scaled_relprio;
798
799		assert(qos >= 0 && qos < THREAD_QOS_LAST);
800		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
801
802		priority = thread_qos_policy_params.qos_pri[qos];
803		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
804
805		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
806			/* Bump priority 46 to 47 when in a frontmost app */
807			qos_scaled_relprio += 1;
808		}
809
810		priority += qos_scaled_relprio;
811	} else {
812		if (thread->importance > MAXPRI)
813			priority = MAXPRI;
814		else if (thread->importance < -MAXPRI)
815			priority = -MAXPRI;
816		else
817			priority = thread->importance;
818
819		priority += thread->task_priority;
820	}
821
822	if (priority > thread->max_priority)
823		priority = thread->max_priority;
824	else if (priority < MINPRI)
825		priority = MINPRI;
826
827
828	sched_set_thread_base_priority(thread, priority);
829}
830
831/* Called with the thread mutex held */
832void
833thread_task_priority(
834	thread_t		thread,
835	integer_t		priority,
836	integer_t		max_priority)
837{
838	spl_t s;
839
840	assert(thread != THREAD_NULL);
841
842	if (!thread->active || thread->policy_reset)
843		return;
844
845	s = splsched();
846	thread_lock(thread);
847
848	integer_t old_max_priority = thread->max_priority;
849
850	thread->task_priority = priority;
851	thread->max_priority = max_priority;
852
853	/* A thread is 'throttled' when its max priority is below MAXPRI_THROTTLE */
854	if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
855		sched_set_thread_throttled(thread, FALSE);
856	} else if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
857		sched_set_thread_throttled(thread, TRUE);
858	}
859
860	thread_recompute_priority(thread);
861
862	thread_unlock(thread);
863	splx(s);
864}
865
866/*
867 * Reset thread to default state in preparation for termination
868 * Called with thread mutex locked
869 *
870 * Always called on current thread, so we don't need a run queue remove
871 */
872void
873thread_policy_reset(
874	thread_t		thread)
875{
876	spl_t		s;
877
878	assert(thread == current_thread());
879
880	s = splsched();
881	thread_lock(thread);
882
883	assert_thread_sched_count(thread);
884
885	if (thread->sched_flags & TH_SFLAG_FAILSAFE)
886		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
887
888	assert_thread_sched_count(thread);
889
890	if (thread->sched_flags & TH_SFLAG_THROTTLED)
891		sched_set_thread_throttled(thread, FALSE);
892
893	assert_thread_sched_count(thread);
894
895	assert(thread->BG_COUNT == 0);
896
897	/* At this point, the various demotions should be inactive */
898	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
899	assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
900	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
901
902	/* Reset thread back to task-default basepri and mode  */
903	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
904
905	sched_set_thread_mode(thread, newmode);
906
907	thread->importance = 0;
908
909	sched_set_thread_base_priority(thread, thread->task_priority);
910
911	/* Prevent further changes to thread base priority or mode */
912	thread->policy_reset = 1;
913
914	assert(thread->BG_COUNT == 0);
915	assert_thread_sched_count(thread);
916
917	thread_unlock(thread);
918	splx(s);
919}
920
921kern_return_t
922thread_policy_get(
923	thread_t				thread,
924	thread_policy_flavor_t	flavor,
925	thread_policy_t			policy_info,
926	mach_msg_type_number_t	*count,
927	boolean_t				*get_default)
928{
929	kern_return_t			result = KERN_SUCCESS;
930	spl_t					s;
931
932	if (thread == THREAD_NULL)
933		return (KERN_INVALID_ARGUMENT);
934
935	thread_mtx_lock(thread);
936	if (!thread->active) {
937		thread_mtx_unlock(thread);
938
939		return (KERN_TERMINATED);
940	}
941
942	switch (flavor) {
943
944	case THREAD_EXTENDED_POLICY:
945	{
946		boolean_t		timeshare = TRUE;
947
948		if (!(*get_default)) {
949			s = splsched();
950			thread_lock(thread);
951
952			if (	 (thread->sched_mode != TH_MODE_REALTIME)	&&
953					 (thread->saved_mode != TH_MODE_REALTIME)			) {
954				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
955					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
956				else
957					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
958			}
959			else
960				*get_default = TRUE;
961
962			thread_unlock(thread);
963			splx(s);
964		}
965
966		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
967			thread_extended_policy_t	info;
968
969			info = (thread_extended_policy_t)policy_info;
970			info->timeshare = timeshare;
971		}
972
973		break;
974	}
975
976	case THREAD_TIME_CONSTRAINT_POLICY:
977	{
978		thread_time_constraint_policy_t		info;
979
980		if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
981			result = KERN_INVALID_ARGUMENT;
982			break;
983		}
984
985		info = (thread_time_constraint_policy_t)policy_info;
986
987		if (!(*get_default)) {
988			s = splsched();
989			thread_lock(thread);
990
991			if (	(thread->sched_mode == TH_MODE_REALTIME)	||
992					(thread->saved_mode == TH_MODE_REALTIME)		) {
993				info->period = thread->realtime.period;
994				info->computation = thread->realtime.computation;
995				info->constraint = thread->realtime.constraint;
996				info->preemptible = thread->realtime.preemptible;
997			}
998			else
999				*get_default = TRUE;
1000
1001			thread_unlock(thread);
1002			splx(s);
1003		}
1004
1005		if (*get_default) {
1006			info->period = 0;
1007			info->computation = default_timeshare_computation;
1008			info->constraint = default_timeshare_constraint;
1009			info->preemptible = TRUE;
1010		}
1011
1012		break;
1013	}
1014
1015	case THREAD_PRECEDENCE_POLICY:
1016	{
1017		thread_precedence_policy_t		info;
1018
1019		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1020			result = KERN_INVALID_ARGUMENT;
1021			break;
1022		}
1023
1024		info = (thread_precedence_policy_t)policy_info;
1025
1026		if (!(*get_default)) {
1027			s = splsched();
1028			thread_lock(thread);
1029
1030			info->importance = thread->importance;
1031
1032			thread_unlock(thread);
1033			splx(s);
1034		}
1035		else
1036			info->importance = 0;
1037
1038		break;
1039	}
1040
1041	case THREAD_AFFINITY_POLICY:
1042	{
1043		thread_affinity_policy_t		info;
1044
1045		if (!thread_affinity_is_supported()) {
1046			result = KERN_NOT_SUPPORTED;
1047			break;
1048		}
1049		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1050			result = KERN_INVALID_ARGUMENT;
1051			break;
1052		}
1053
1054		info = (thread_affinity_policy_t)policy_info;
1055
1056		if (!(*get_default))
1057			info->affinity_tag = thread_affinity_get(thread);
1058		else
1059			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1060
1061		break;
1062	}
1063
1064	case THREAD_POLICY_STATE:
1065	{
1066		thread_policy_state_t		info;
1067
1068		if (*count < THREAD_POLICY_STATE_COUNT) {
1069			result = KERN_INVALID_ARGUMENT;
1070			break;
1071		}
1072
1073		/* Only root can get this info */
1074		if (current_task()->sec_token.val[0] != 0) {
1075			result = KERN_PROTECTION_FAILURE;
1076			break;
1077		}
1078
1079		info = (thread_policy_state_t)policy_info;
1080
1081		if (!(*get_default)) {
1082			info->flags = 0;
1083
1084			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1085
1086			/*
1087			 * Unlock the thread mutex and directly return.
1088			 * This is necessary because proc_get_thread_policy()
1089			 * takes the task lock.
1090			 */
1091			thread_mtx_unlock(thread);
1092			proc_get_thread_policy(thread, info);
1093			return (result);
1094		} else {
1095			info->requested = 0;
1096			info->effective = 0;
1097			info->pending = 0;
1098		}
1099
1100		break;
1101	}
1102
1103	case THREAD_LATENCY_QOS_POLICY:
1104	{
1105		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1106		uint32_t plqos;
1107
1108		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1109			result = KERN_INVALID_ARGUMENT;
1110			break;
1111		}
1112
1113		if (*get_default) {
1114			plqos = 0;
1115		} else {
1116			plqos = thread->effective_policy.t_latency_qos;
1117		}
1118
1119		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1120	}
1121	break;
1122
1123	case THREAD_THROUGHPUT_QOS_POLICY:
1124	{
1125		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1126		uint32_t ptqos;
1127
1128		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1129			result = KERN_INVALID_ARGUMENT;
1130			break;
1131		}
1132
1133		if (*get_default) {
1134			ptqos = 0;
1135		} else {
1136			ptqos = thread->effective_policy.t_through_qos;
1137		}
1138
1139		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1140	}
1141	break;
1142
1143	case THREAD_QOS_POLICY:
1144	case THREAD_QOS_POLICY_OVERRIDE:
1145	{
1146		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1147
1148		if (*count < THREAD_QOS_POLICY_COUNT) {
1149			result = KERN_INVALID_ARGUMENT;
1150			break;
1151		}
1152
1153		if (!(*get_default)) {
1154			if (flavor == THREAD_QOS_POLICY_OVERRIDE) {
1155				info->qos_tier = thread->requested_policy.thrp_qos_override;
1156				/* TODO: handle importance overrides */
1157				info->tier_importance = 0;
1158			} else {
1159				info->qos_tier = thread->requested_policy.thrp_qos;
1160				info->tier_importance = thread->importance;
1161			}
1162		} else {
1163			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1164			info->tier_importance = 0;
1165		}
1166
1167		break;
1168	}
1169
1170	default:
1171		result = KERN_INVALID_ARGUMENT;
1172		break;
1173	}
1174
1175	thread_mtx_unlock(thread);
1176
1177	return (result);
1178}
1179