1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	sched_prim.c
60 *	Author:	Avadis Tevanian, Jr.
61 *	Date:	1986
62 *
63 *	Scheduling primitives
64 *
65 */
66
67#include <debug.h>
68
69#include <mach/mach_types.h>
70#include <mach/machine.h>
71#include <mach/policy.h>
72#include <mach/sync_policy.h>
73#include <mach/thread_act.h>
74
75#include <machine/machine_routines.h>
76#include <machine/sched_param.h>
77#include <machine/machine_cpu.h>
78#include <machine/machlimits.h>
79
80#include <kern/kern_types.h>
81#include <kern/clock.h>
82#include <kern/counters.h>
83#include <kern/cpu_number.h>
84#include <kern/cpu_data.h>
85#include <kern/debug.h>
86#include <kern/lock.h>
87#include <kern/macro_help.h>
88#include <kern/machine.h>
89#include <kern/misc_protos.h>
90#include <kern/processor.h>
91#include <kern/queue.h>
92#include <kern/sched.h>
93#include <kern/sched_prim.h>
94#include <kern/syscall_subr.h>
95#include <kern/task.h>
96#include <kern/thread.h>
97#include <kern/wait_queue.h>
98#include <kern/ledger.h>
99#include <kern/timer_queue.h>
100
101#include <vm/pmap.h>
102#include <vm/vm_kern.h>
103#include <vm/vm_map.h>
104
105#include <mach/sdt.h>
106
107#include <sys/kdebug.h>
108
109#include <kern/pms.h>
110
111struct rt_queue	rt_runq;
112#define RT_RUNQ		((processor_t)-1)
113decl_simple_lock_data(static,rt_lock);
114
115#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
116static struct fairshare_queue	fs_runq;
117#define FS_RUNQ		((processor_t)-2)
118decl_simple_lock_data(static,fs_lock);
119#endif
120
121#define		DEFAULT_PREEMPTION_RATE		100		/* (1/s) */
122int			default_preemption_rate = DEFAULT_PREEMPTION_RATE;
123
124#define		DEFAULT_BG_PREEMPTION_RATE	400		/* (1/s) */
125int			default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
126
127#define		MAX_UNSAFE_QUANTA			800
128int			max_unsafe_quanta = MAX_UNSAFE_QUANTA;
129
130#define		MAX_POLL_QUANTA				2
131int			max_poll_quanta = MAX_POLL_QUANTA;
132
133#define		SCHED_POLL_YIELD_SHIFT		4		/* 1/16 */
134int			sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
135
136uint64_t	max_poll_computation;
137
138uint64_t	max_unsafe_computation;
139uint64_t	sched_safe_duration;
140
141#if defined(CONFIG_SCHED_TRADITIONAL)
142
143uint32_t	std_quantum;
144uint32_t	min_std_quantum;
145uint32_t	bg_quantum;
146
147uint32_t	std_quantum_us;
148uint32_t	bg_quantum_us;
149
150#endif /* CONFIG_SCHED_TRADITIONAL */
151
152uint32_t	thread_depress_time;
153uint32_t	default_timeshare_computation;
154uint32_t	default_timeshare_constraint;
155
156uint32_t	max_rt_quantum;
157uint32_t	min_rt_quantum;
158
159#if defined(CONFIG_SCHED_TRADITIONAL)
160
161unsigned	sched_tick;
162uint32_t	sched_tick_interval;
163
164uint32_t	sched_pri_shift = INT8_MAX;
165uint32_t	sched_background_pri_shift = INT8_MAX;
166uint32_t	sched_combined_fgbg_pri_shift = INT8_MAX;
167uint32_t	sched_fixed_shift;
168uint32_t	sched_use_combined_fgbg_decay = 0;
169
170uint32_t	sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */
171
172static boolean_t sched_traditional_use_pset_runqueue = FALSE;
173
174/* Defaults for timer deadline profiling */
175#define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <=
176							* 2ms */
177#define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines
178							  <= 5ms */
179
180uint64_t timer_deadline_tracking_bin_1;
181uint64_t timer_deadline_tracking_bin_2;
182
183thread_t sched_maintenance_thread;
184
185__attribute__((always_inline))
186static inline run_queue_t runq_for_processor(processor_t processor)
187{
188	if (sched_traditional_use_pset_runqueue)
189		return &processor->processor_set->pset_runq;
190	else
191		return &processor->runq;
192}
193
194__attribute__((always_inline))
195static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread)
196{
197	if (thread->bound_processor == PROCESSOR_NULL)
198		return;
199
200	assert(thread->bound_processor == processor);
201
202	if (sched_traditional_use_pset_runqueue)
203		processor->processor_set->pset_runq_bound_count++;
204
205	processor->runq_bound_count++;
206}
207
208__attribute__((always_inline))
209static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread)
210{
211	if (thread->bound_processor == PROCESSOR_NULL)
212		return;
213
214	assert(thread->bound_processor == processor);
215
216	if (sched_traditional_use_pset_runqueue)
217		processor->processor_set->pset_runq_bound_count--;
218
219	processor->runq_bound_count--;
220}
221
222#endif /* CONFIG_SCHED_TRADITIONAL */
223
224uint64_t	sched_one_second_interval;
225
226uint32_t	sched_run_count, sched_share_count, sched_background_count;
227uint32_t	sched_load_average, sched_mach_factor;
228
229/* Forwards */
230
231#if defined(CONFIG_SCHED_TRADITIONAL)
232
233static void load_shift_init(void);
234static void preempt_pri_init(void);
235
236#endif /* CONFIG_SCHED_TRADITIONAL */
237
238static thread_t	thread_select(
239					thread_t			thread,
240					processor_t			processor);
241
242#if CONFIG_SCHED_IDLE_IN_PLACE
243static thread_t	thread_select_idle(
244					thread_t			thread,
245					processor_t			processor);
246#endif
247
248thread_t	processor_idle(
249					thread_t			thread,
250					processor_t			processor);
251
252ast_t
253csw_check_locked(	processor_t		processor,
254					processor_set_t	pset);
255
256#if defined(CONFIG_SCHED_TRADITIONAL)
257
258static thread_t	steal_thread(
259					processor_set_t		pset);
260
261static thread_t	steal_thread_disabled(
262					processor_set_t		pset) __attribute__((unused));
263
264
265static thread_t	steal_processor_thread(
266					processor_t			processor);
267
268static void		thread_update_scan(void);
269
270static void processor_setrun(
271				 processor_t			processor,
272				 thread_t			thread,
273				 integer_t			options);
274
275static boolean_t
276processor_enqueue(
277				  processor_t		processor,
278				  thread_t		thread,
279				  integer_t		options);
280
281static boolean_t
282processor_queue_remove(
283					   processor_t			processor,
284					   thread_t		thread);
285
286static boolean_t	processor_queue_empty(processor_t		processor);
287
288static boolean_t	priority_is_urgent(int priority);
289
290static ast_t		processor_csw_check(processor_t processor);
291
292static boolean_t	processor_queue_has_priority(processor_t		processor,
293											int				priority,
294											boolean_t		gte);
295
296static boolean_t	should_current_thread_rechoose_processor(processor_t			processor);
297
298static int     sched_traditional_processor_runq_count(processor_t   processor);
299
300static boolean_t	sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t		processor);
301
302static uint64_t     sched_traditional_processor_runq_stats_count_sum(processor_t   processor);
303
304static uint64_t		sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor);
305#endif
306
307
308#if defined(CONFIG_SCHED_TRADITIONAL)
309
310static void
311sched_traditional_init(void);
312
313static void
314sched_traditional_timebase_init(void);
315
316static void
317sched_traditional_processor_init(processor_t processor);
318
319static void
320sched_traditional_pset_init(processor_set_t pset);
321
322static void
323sched_traditional_with_pset_runqueue_init(void);
324
325#endif
326
327static void
328sched_realtime_init(void);
329
330static void
331sched_realtime_timebase_init(void);
332
333static void
334sched_timer_deadline_tracking_init(void);
335
336#if defined(CONFIG_SCHED_TRADITIONAL)
337static void
338sched_traditional_maintenance_continue(void);
339
340static uint32_t
341sched_traditional_initial_quantum_size(thread_t thread);
342
343static sched_mode_t
344sched_traditional_initial_thread_sched_mode(task_t parent_task);
345
346static boolean_t
347sched_traditional_supports_timeshare_mode(void);
348
349static thread_t
350sched_traditional_choose_thread(
351								processor_t		processor,
352								int				priority);
353
354#endif
355
356#if	DEBUG
357extern int debug_task;
358#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
359#else
360#define TLOG(a, fmt, args...) do {} while (0)
361#endif
362
363#if	DEBUG
364static
365boolean_t	thread_runnable(
366				thread_t		thread);
367
368#endif	/*DEBUG*/
369
370/*
371 *	State machine
372 *
373 * states are combinations of:
374 *  R	running
375 *  W	waiting (or on wait queue)
376 *  N	non-interruptible
377 *  O	swapped out
378 *  I	being swapped in
379 *
380 * init	action
381 *	assert_wait thread_block    clear_wait 		swapout	swapin
382 *
383 * R	RW, RWN	    R;   setrun	    -	       		-
384 * RN	RWN	    RN;  setrun	    -	       		-
385 *
386 * RW		    W		    R	       		-
387 * RWN		    WN		    RN	       		-
388 *
389 * W				    R;   setrun		WO
390 * WN				    RN;  setrun		-
391 *
392 * RO				    -			-	R
393 *
394 */
395
396#if defined(CONFIG_SCHED_TRADITIONAL)
397int8_t		sched_load_shifts[NRQS];
398int		sched_preempt_pri[NRQBM];
399#endif
400
401
402#if defined(CONFIG_SCHED_TRADITIONAL)
403
404const struct sched_dispatch_table sched_traditional_dispatch = {
405	sched_traditional_init,
406	sched_traditional_timebase_init,
407	sched_traditional_processor_init,
408	sched_traditional_pset_init,
409	sched_traditional_maintenance_continue,
410	sched_traditional_choose_thread,
411	steal_thread,
412	compute_priority,
413	choose_processor,
414	processor_enqueue,
415	processor_queue_shutdown,
416	processor_queue_remove,
417	processor_queue_empty,
418	priority_is_urgent,
419	processor_csw_check,
420	processor_queue_has_priority,
421	sched_traditional_initial_quantum_size,
422	sched_traditional_initial_thread_sched_mode,
423	sched_traditional_supports_timeshare_mode,
424	can_update_priority,
425	update_priority,
426	lightweight_update_priority,
427	sched_traditional_quantum_expire,
428	should_current_thread_rechoose_processor,
429	sched_traditional_processor_runq_count,
430	sched_traditional_processor_runq_stats_count_sum,
431	sched_traditional_fairshare_init,
432	sched_traditional_fairshare_runq_count,
433	sched_traditional_fairshare_runq_stats_count_sum,
434	sched_traditional_fairshare_enqueue,
435	sched_traditional_fairshare_dequeue,
436	sched_traditional_fairshare_queue_remove,
437	TRUE /* direct_dispatch_to_idle_processors */
438};
439
440const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = {
441	sched_traditional_with_pset_runqueue_init,
442	sched_traditional_timebase_init,
443	sched_traditional_processor_init,
444	sched_traditional_pset_init,
445	sched_traditional_maintenance_continue,
446	sched_traditional_choose_thread,
447	steal_thread,
448	compute_priority,
449	choose_processor,
450	processor_enqueue,
451	processor_queue_shutdown,
452	processor_queue_remove,
453	sched_traditional_with_pset_runqueue_processor_queue_empty,
454	priority_is_urgent,
455	processor_csw_check,
456	processor_queue_has_priority,
457	sched_traditional_initial_quantum_size,
458	sched_traditional_initial_thread_sched_mode,
459	sched_traditional_supports_timeshare_mode,
460	can_update_priority,
461	update_priority,
462	lightweight_update_priority,
463	sched_traditional_quantum_expire,
464	should_current_thread_rechoose_processor,
465	sched_traditional_processor_runq_count,
466	sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum,
467	sched_traditional_fairshare_init,
468	sched_traditional_fairshare_runq_count,
469	sched_traditional_fairshare_runq_stats_count_sum,
470	sched_traditional_fairshare_enqueue,
471	sched_traditional_fairshare_dequeue,
472	sched_traditional_fairshare_queue_remove,
473	FALSE /* direct_dispatch_to_idle_processors */
474};
475
476#endif
477
478const struct sched_dispatch_table *sched_current_dispatch = NULL;
479
480/*
481 * Statically allocate a buffer to hold the longest possible
482 * scheduler description string, as currently implemented.
483 * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/
484 * to export to userspace via sysctl(3). If either version
485 * changes, update the other.
486 *
487 * Note that in addition to being an upper bound on the strings
488 * in the kernel, it's also an exact parameter to PE_get_default(),
489 * which interrogates the device tree on some platforms. That
490 * API requires the caller know the exact size of the device tree
491 * property, so we need both a legacy size (32) and the current size
492 * (48) to deal with old and new device trees. The device tree property
493 * is similarly padded to a fixed size so that the same kernel image
494 * can run on multiple devices with different schedulers configured
495 * in the device tree.
496 */
497#define SCHED_STRING_MAX_LENGTH (48)
498
499char sched_string[SCHED_STRING_MAX_LENGTH];
500static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown;
501
502/* Global flag which indicates whether Background Stepper Context is enabled */
503static int cpu_throttle_enabled = 1;
504
505void
506sched_init(void)
507{
508	char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' };
509
510	/* Check for runtime selection of the scheduler algorithm */
511	if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) {
512		/* If no boot-args override, look in device tree */
513		if (!PE_get_default("kern.sched", sched_arg,
514							SCHED_STRING_MAX_LENGTH)) {
515			sched_arg[0] = '\0';
516		}
517	}
518
519	if (strlen(sched_arg) > 0) {
520		if (0) {
521			/* Allow pattern below */
522#if defined(CONFIG_SCHED_TRADITIONAL)
523		} else if (0 == strcmp(sched_arg, kSchedTraditionalString)) {
524			sched_current_dispatch = &sched_traditional_dispatch;
525			_sched_enum = sched_enum_traditional;
526			strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string));
527			kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalString);
528		} else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) {
529			sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
530			_sched_enum = sched_enum_traditional_with_pset_runqueue;
531			strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
532			kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalWithPsetRunqueueString);
533#endif
534#if defined(CONFIG_SCHED_PROTO)
535		} else if (0 == strcmp(sched_arg, kSchedProtoString)) {
536			sched_current_dispatch = &sched_proto_dispatch;
537			_sched_enum = sched_enum_proto;
538			strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
539			kprintf("Scheduler: Runtime selection of %s\n", kSchedProtoString);
540#endif
541#if defined(CONFIG_SCHED_GRRR)
542		} else if (0 == strcmp(sched_arg, kSchedGRRRString)) {
543			sched_current_dispatch = &sched_grrr_dispatch;
544			_sched_enum = sched_enum_grrr;
545			strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
546			kprintf("Scheduler: Runtime selection of %s\n", kSchedGRRRString);
547#endif
548#if defined(CONFIG_SCHED_FIXEDPRIORITY)
549		} else if (0 == strcmp(sched_arg, kSchedFixedPriorityString)) {
550			sched_current_dispatch = &sched_fixedpriority_dispatch;
551			_sched_enum = sched_enum_fixedpriority;
552			strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
553			kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityString);
554		} else if (0 == strcmp(sched_arg, kSchedFixedPriorityWithPsetRunqueueString)) {
555			sched_current_dispatch = &sched_fixedpriority_with_pset_runqueue_dispatch;
556			_sched_enum = sched_enum_fixedpriority_with_pset_runqueue;
557			strlcpy(sched_string, kSchedFixedPriorityWithPsetRunqueueString, sizeof(sched_string));
558			kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityWithPsetRunqueueString);
559#endif
560		} else {
561			panic("Unrecognized scheduler algorithm: %s", sched_arg);
562		}
563	} else {
564#if   defined(CONFIG_SCHED_TRADITIONAL)
565		sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
566		_sched_enum = sched_enum_traditional_with_pset_runqueue;
567		strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
568		kprintf("Scheduler: Default of %s\n", kSchedTraditionalWithPsetRunqueueString);
569#elif defined(CONFIG_SCHED_PROTO)
570		sched_current_dispatch = &sched_proto_dispatch;
571		_sched_enum = sched_enum_proto;
572		strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
573		kprintf("Scheduler: Default of %s\n", kSchedProtoString);
574#elif defined(CONFIG_SCHED_GRRR)
575		sched_current_dispatch = &sched_grrr_dispatch;
576		_sched_enum = sched_enum_grrr;
577		strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
578		kprintf("Scheduler: Default of %s\n", kSchedGRRRString);
579#elif defined(CONFIG_SCHED_FIXEDPRIORITY)
580		sched_current_dispatch = &sched_fixedpriority_dispatch;
581		_sched_enum = sched_enum_fixedpriority;
582		strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
583		kprintf("Scheduler: Default of %s\n", kSchedFixedPriorityString);
584#else
585#error No default scheduler implementation
586#endif
587	}
588
589	SCHED(init)();
590	SCHED(fairshare_init)();
591	sched_realtime_init();
592	ast_init();
593	sched_timer_deadline_tracking_init();
594
595	SCHED(pset_init)(&pset0);
596	SCHED(processor_init)(master_processor);
597}
598
599void
600sched_timebase_init(void)
601{
602	uint64_t	abstime;
603
604	clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime);
605	sched_one_second_interval = abstime;
606
607	SCHED(timebase_init)();
608	sched_realtime_timebase_init();
609}
610
611#if defined(CONFIG_SCHED_TRADITIONAL)
612
613static void
614sched_traditional_init(void)
615{
616	/*
617	 * Calculate the timeslicing quantum
618	 * in us.
619	 */
620	if (default_preemption_rate < 1)
621		default_preemption_rate = DEFAULT_PREEMPTION_RATE;
622	std_quantum_us = (1000 * 1000) / default_preemption_rate;
623
624	printf("standard timeslicing quantum is %d us\n", std_quantum_us);
625
626	if (default_bg_preemption_rate < 1)
627		default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
628	bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate;
629
630	printf("standard background quantum is %d us\n", bg_quantum_us);
631
632	load_shift_init();
633	preempt_pri_init();
634	sched_tick = 0;
635}
636
637static void
638sched_traditional_timebase_init(void)
639{
640	uint64_t	abstime;
641	uint32_t	shift;
642
643	/* standard timeslicing quantum */
644	clock_interval_to_absolutetime_interval(
645							std_quantum_us, NSEC_PER_USEC, &abstime);
646	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
647	std_quantum = (uint32_t)abstime;
648
649	/* smallest remaining quantum (250 us) */
650	clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
651	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
652	min_std_quantum = (uint32_t)abstime;
653
654	/* quantum for background tasks */
655	clock_interval_to_absolutetime_interval(
656							bg_quantum_us, NSEC_PER_USEC, &abstime);
657	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
658	bg_quantum = (uint32_t)abstime;
659
660	/* scheduler tick interval */
661	clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
662													NSEC_PER_USEC, &abstime);
663	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
664	sched_tick_interval = (uint32_t)abstime;
665
666	/*
667	 * Compute conversion factor from usage to
668	 * timesharing priorities with 5/8 ** n aging.
669	 */
670	abstime = (abstime * 5) / 3;
671	for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
672		abstime >>= 1;
673	sched_fixed_shift = shift;
674
675	max_unsafe_computation = max_unsafe_quanta * std_quantum;
676	sched_safe_duration = 2 * max_unsafe_quanta * std_quantum;
677
678	max_poll_computation = max_poll_quanta * std_quantum;
679	thread_depress_time = 1 * std_quantum;
680	default_timeshare_computation = std_quantum / 2;
681	default_timeshare_constraint = std_quantum;
682
683}
684
685static void
686sched_traditional_processor_init(processor_t processor)
687{
688	if (!sched_traditional_use_pset_runqueue) {
689		run_queue_init(&processor->runq);
690	}
691	processor->runq_bound_count = 0;
692}
693
694static void
695sched_traditional_pset_init(processor_set_t pset)
696{
697	if (sched_traditional_use_pset_runqueue) {
698		run_queue_init(&pset->pset_runq);
699	}
700	pset->pset_runq_bound_count = 0;
701}
702
703static void
704sched_traditional_with_pset_runqueue_init(void)
705{
706	sched_traditional_init();
707	sched_traditional_use_pset_runqueue = TRUE;
708}
709
710#endif /* CONFIG_SCHED_TRADITIONAL */
711
712#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
713void
714sched_traditional_fairshare_init(void)
715{
716	simple_lock_init(&fs_lock, 0);
717
718	fs_runq.count = 0;
719	queue_init(&fs_runq.queue);
720}
721#endif
722
723static void
724sched_realtime_init(void)
725{
726	simple_lock_init(&rt_lock, 0);
727
728	rt_runq.count = 0;
729	queue_init(&rt_runq.queue);
730}
731
732static void
733sched_realtime_timebase_init(void)
734{
735	uint64_t abstime;
736
737	/* smallest rt computaton (50 us) */
738	clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
739	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
740	min_rt_quantum = (uint32_t)abstime;
741
742	/* maximum rt computation (50 ms) */
743	clock_interval_to_absolutetime_interval(
744		50, 1000*NSEC_PER_USEC, &abstime);
745	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
746	max_rt_quantum = (uint32_t)abstime;
747
748}
749
750#if defined(CONFIG_SCHED_TRADITIONAL)
751
752/*
753 * Set up values for timeshare
754 * loading factors.
755 */
756static void
757load_shift_init(void)
758{
759	int8_t		k, *p = sched_load_shifts;
760	uint32_t	i, j;
761
762	uint32_t	sched_decay_penalty = 1;
763
764	if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) {
765		kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty);
766	}
767
768	if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) {
769		kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor);
770	}
771
772	if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) {
773		kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay);
774	}
775
776	if (sched_decay_penalty == 0) {
777		/*
778		 * There is no penalty for timeshare threads for using too much
779		 * CPU, so set all load shifts to INT8_MIN. Even under high load,
780		 * sched_pri_shift will be >INT8_MAX, and there will be no
781		 * penalty applied to threads (nor will sched_usage be updated per
782		 * thread).
783		 */
784		for (i = 0; i < NRQS; i++) {
785			sched_load_shifts[i] = INT8_MIN;
786		}
787
788		return;
789	}
790
791	*p++ = INT8_MIN; *p++ = 0;
792
793	/*
794	 * For a given system load "i", the per-thread priority
795	 * penalty per quantum of CPU usage is ~2^k priority
796	 * levels. "sched_decay_penalty" can cause more
797	 * array entries to be filled with smaller "k" values
798	 */
799	for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) {
800		for (j <<= 1; (i < j) && (i < NRQS); ++i)
801			*p++ = k;
802	}
803}
804
805static void
806preempt_pri_init(void)
807{
808	int		i, *p = sched_preempt_pri;
809
810	for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i)
811		setbit(i, p);
812
813	for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
814		setbit(i, p);
815}
816
817#endif /* CONFIG_SCHED_TRADITIONAL */
818
819/*
820 *	Thread wait timer expiration.
821 */
822void
823thread_timer_expire(
824	void			*p0,
825	__unused void	*p1)
826{
827	thread_t		thread = p0;
828	spl_t			s;
829
830	s = splsched();
831	thread_lock(thread);
832	if (--thread->wait_timer_active == 0) {
833		if (thread->wait_timer_is_set) {
834			thread->wait_timer_is_set = FALSE;
835			clear_wait_internal(thread, THREAD_TIMED_OUT);
836		}
837	}
838	thread_unlock(thread);
839	splx(s);
840}
841
842/*
843 *	thread_unblock:
844 *
845 *	Unblock thread on wake up.
846 *
847 *	Returns TRUE if the thread is still running.
848 *
849 *	Thread must be locked.
850 */
851boolean_t
852thread_unblock(
853	thread_t		thread,
854	wait_result_t	wresult)
855{
856	boolean_t		result = FALSE;
857	thread_t		cthread = current_thread();
858
859	/*
860	 *	Set wait_result.
861	 */
862	thread->wait_result = wresult;
863
864	/*
865	 *	Cancel pending wait timer.
866	 */
867	if (thread->wait_timer_is_set) {
868		if (timer_call_cancel(&thread->wait_timer))
869			thread->wait_timer_active--;
870		thread->wait_timer_is_set = FALSE;
871	}
872
873	/*
874	 *	Update scheduling state: not waiting,
875	 *	set running.
876	 */
877	thread->state &= ~(TH_WAIT|TH_UNINT);
878
879	if (!(thread->state & TH_RUN)) {
880		thread->state |= TH_RUN;
881
882		(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
883
884		/*
885		 *	Update run counts.
886		 */
887		sched_run_incr();
888		if (thread->sched_mode == TH_MODE_TIMESHARE) {
889			sched_share_incr();
890
891			if (thread->max_priority <= MAXPRI_THROTTLE)
892				sched_background_incr();
893		}
894	}
895	else {
896		/*
897		 *	Signal if idling on another processor.
898		 */
899#if CONFIG_SCHED_IDLE_IN_PLACE
900		if (thread->state & TH_IDLE) {
901			processor_t		processor = thread->last_processor;
902
903			if (processor != current_processor())
904				machine_signal_idle(processor);
905		}
906#else
907		assert((thread->state & TH_IDLE) == 0);
908#endif
909
910		result = TRUE;
911	}
912
913	/*
914	 * Calculate deadline for real-time threads.
915	 */
916	if (thread->sched_mode == TH_MODE_REALTIME) {
917		thread->realtime.deadline = thread->realtime.constraint + mach_absolute_time();
918	}
919
920	/*
921	 * Clear old quantum, fail-safe computation, etc.
922	 */
923	thread->current_quantum = 0;
924	thread->computation_metered = 0;
925	thread->reason = AST_NONE;
926
927	/* Obtain power-relevant interrupt and "platform-idle exit" statistics.
928	 * We also account for "double hop" thread signaling via
929	 * the thread callout infrastructure.
930	 * DRK: consider removing the callout wakeup counters in the future
931	 * they're present for verification at the moment.
932	 */
933	boolean_t aticontext, pidle;
934	ml_get_power_state(&aticontext, &pidle);
935
936	if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) {
937		ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
938		DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
939
940		uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd);
941
942		if (ttd) {
943			if (ttd <= timer_deadline_tracking_bin_1)
944				thread->thread_timer_wakeups_bin_1++;
945			else
946				if (ttd <= timer_deadline_tracking_bin_2)
947					thread->thread_timer_wakeups_bin_2++;
948		}
949
950		if (pidle) {
951			ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
952		}
953
954	} else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) {
955		if (cthread->callout_woken_from_icontext) {
956			ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1);
957			thread->thread_callout_interrupt_wakeups++;
958			if (cthread->callout_woken_from_platform_idle) {
959				ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1);
960				thread->thread_callout_platform_idle_wakeups++;
961			}
962
963			cthread->callout_woke_thread = TRUE;
964		}
965	}
966
967	if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) {
968		thread->callout_woken_from_icontext = aticontext;
969		thread->callout_woken_from_platform_idle = pidle;
970		thread->callout_woke_thread = FALSE;
971	}
972
973	/* Event should only be triggered if thread is not already running */
974	if (result == FALSE) {
975		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
976			MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
977			(uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, 0, 0);
978	}
979
980	DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
981
982	return (result);
983}
984
985/*
986 *	Routine:	thread_go
987 *	Purpose:
988 *		Unblock and dispatch thread.
989 *	Conditions:
990 *		thread lock held, IPC locks may be held.
991 *		thread must have been pulled from wait queue under same lock hold.
992 *  Returns:
993 *		KERN_SUCCESS - Thread was set running
994 *		KERN_NOT_WAITING - Thread was not waiting
995 */
996kern_return_t
997thread_go(
998	thread_t		thread,
999	wait_result_t	wresult)
1000{
1001	assert(thread->at_safe_point == FALSE);
1002	assert(thread->wait_event == NO_EVENT64);
1003	assert(thread->wait_queue == WAIT_QUEUE_NULL);
1004
1005	if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
1006		if (!thread_unblock(thread, wresult))
1007			thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1008
1009		return (KERN_SUCCESS);
1010	}
1011
1012	return (KERN_NOT_WAITING);
1013}
1014
1015/*
1016 *	Routine:	thread_mark_wait_locked
1017 *	Purpose:
1018 *		Mark a thread as waiting.  If, given the circumstances,
1019 *		it doesn't want to wait (i.e. already aborted), then
1020 *		indicate that in the return value.
1021 *	Conditions:
1022 *		at splsched() and thread is locked.
1023 */
1024__private_extern__
1025wait_result_t
1026thread_mark_wait_locked(
1027	thread_t			thread,
1028	wait_interrupt_t 	interruptible)
1029{
1030	boolean_t		at_safe_point;
1031
1032	assert(thread == current_thread());
1033
1034	/*
1035	 *	The thread may have certain types of interrupts/aborts masked
1036	 *	off.  Even if the wait location says these types of interrupts
1037	 *	are OK, we have to honor mask settings (outer-scoped code may
1038	 *	not be able to handle aborts at the moment).
1039	 */
1040	if (interruptible > (thread->options & TH_OPT_INTMASK))
1041		interruptible = thread->options & TH_OPT_INTMASK;
1042
1043	at_safe_point = (interruptible == THREAD_ABORTSAFE);
1044
1045	if (	interruptible == THREAD_UNINT			||
1046			!(thread->sched_flags & TH_SFLAG_ABORT)	||
1047			(!at_safe_point &&
1048				(thread->sched_flags & TH_SFLAG_ABORTSAFELY))) {
1049
1050		if ( !(thread->state & TH_TERMINATE))
1051			DTRACE_SCHED(sleep);
1052
1053		thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
1054		thread->at_safe_point = at_safe_point;
1055		return (thread->wait_result = THREAD_WAITING);
1056	}
1057	else
1058	if (thread->sched_flags & TH_SFLAG_ABORTSAFELY)
1059		thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
1060
1061	return (thread->wait_result = THREAD_INTERRUPTED);
1062}
1063
1064/*
1065 *	Routine:	thread_interrupt_level
1066 *	Purpose:
1067 *	        Set the maximum interruptible state for the
1068 *		current thread.  The effective value of any
1069 *		interruptible flag passed into assert_wait
1070 *		will never exceed this.
1071 *
1072 *		Useful for code that must not be interrupted,
1073 *		but which calls code that doesn't know that.
1074 *	Returns:
1075 *		The old interrupt level for the thread.
1076 */
1077__private_extern__
1078wait_interrupt_t
1079thread_interrupt_level(
1080	wait_interrupt_t new_level)
1081{
1082	thread_t thread = current_thread();
1083	wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
1084
1085	thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
1086
1087	return result;
1088}
1089
1090/*
1091 * Check to see if an assert wait is possible, without actually doing one.
1092 * This is used by debug code in locks and elsewhere to verify that it is
1093 * always OK to block when trying to take a blocking lock (since waiting
1094 * for the actual assert_wait to catch the case may make it hard to detect
1095 * this case.
1096 */
1097boolean_t
1098assert_wait_possible(void)
1099{
1100
1101	thread_t thread;
1102
1103#if	DEBUG
1104	if(debug_mode) return TRUE;		/* Always succeed in debug mode */
1105#endif
1106
1107	thread = current_thread();
1108
1109	return (thread == NULL || wait_queue_assert_possible(thread));
1110}
1111
1112/*
1113 *	assert_wait:
1114 *
1115 *	Assert that the current thread is about to go to
1116 *	sleep until the specified event occurs.
1117 */
1118wait_result_t
1119assert_wait(
1120	event_t				event,
1121	wait_interrupt_t	interruptible)
1122{
1123	register wait_queue_t	wq;
1124	register int		index;
1125
1126	assert(event != NO_EVENT);
1127
1128	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1129		MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1130		VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
1131
1132	index = wait_hash(event);
1133	wq = &wait_queues[index];
1134	return wait_queue_assert_wait(wq, event, interruptible, 0);
1135}
1136
1137wait_result_t
1138assert_wait_timeout(
1139	event_t				event,
1140	wait_interrupt_t	interruptible,
1141	uint32_t			interval,
1142	uint32_t			scale_factor)
1143{
1144	thread_t			thread = current_thread();
1145	wait_result_t		wresult;
1146	wait_queue_t		wqueue;
1147	uint64_t			deadline;
1148	spl_t				s;
1149
1150	assert(event != NO_EVENT);
1151	wqueue = &wait_queues[wait_hash(event)];
1152
1153	s = splsched();
1154	wait_queue_lock(wqueue);
1155	thread_lock(thread);
1156
1157	clock_interval_to_deadline(interval, scale_factor, &deadline);
1158
1159	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1160		MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1161		VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1162
1163	wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
1164						  interruptible,
1165						  TIMEOUT_URGENCY_SYS_NORMAL,
1166						  deadline, 0,
1167						  thread);
1168
1169	thread_unlock(thread);
1170	wait_queue_unlock(wqueue);
1171	splx(s);
1172
1173	return (wresult);
1174}
1175
1176wait_result_t
1177assert_wait_timeout_with_leeway(
1178	event_t				event,
1179	wait_interrupt_t	interruptible,
1180	wait_timeout_urgency_t	urgency,
1181	uint32_t			interval,
1182	uint32_t			leeway,
1183	uint32_t			scale_factor)
1184{
1185	thread_t			thread = current_thread();
1186	wait_result_t		wresult;
1187	wait_queue_t		wqueue;
1188	uint64_t			deadline;
1189	uint64_t			abstime;
1190	uint64_t			slop;
1191	uint64_t			now;
1192	spl_t				s;
1193
1194	now = mach_absolute_time();
1195	clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
1196	deadline = now + abstime;
1197
1198	clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop);
1199
1200	assert(event != NO_EVENT);
1201	wqueue = &wait_queues[wait_hash(event)];
1202
1203	s = splsched();
1204	wait_queue_lock(wqueue);
1205	thread_lock(thread);
1206
1207	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1208		MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1209		VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1210
1211	wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
1212						  interruptible,
1213						  urgency, deadline, slop,
1214						  thread);
1215
1216	thread_unlock(thread);
1217	wait_queue_unlock(wqueue);
1218	splx(s);
1219
1220	return (wresult);
1221}
1222
1223wait_result_t
1224assert_wait_deadline(
1225	event_t				event,
1226	wait_interrupt_t	interruptible,
1227	uint64_t			deadline)
1228{
1229	thread_t			thread = current_thread();
1230	wait_result_t		wresult;
1231	wait_queue_t		wqueue;
1232	spl_t				s;
1233
1234	assert(event != NO_EVENT);
1235	wqueue = &wait_queues[wait_hash(event)];
1236
1237	s = splsched();
1238	wait_queue_lock(wqueue);
1239	thread_lock(thread);
1240
1241	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1242		MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1243		VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1244
1245	wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
1246						  interruptible,
1247						  TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0,
1248						  thread);
1249
1250	thread_unlock(thread);
1251	wait_queue_unlock(wqueue);
1252	splx(s);
1253
1254	return (wresult);
1255}
1256
1257wait_result_t
1258assert_wait_deadline_with_leeway(
1259	event_t				event,
1260	wait_interrupt_t	interruptible,
1261	wait_timeout_urgency_t	urgency,
1262	uint64_t			deadline,
1263	uint64_t			leeway)
1264{
1265	thread_t			thread = current_thread();
1266	wait_result_t		wresult;
1267	wait_queue_t		wqueue;
1268	spl_t				s;
1269
1270	assert(event != NO_EVENT);
1271	wqueue = &wait_queues[wait_hash(event)];
1272
1273	s = splsched();
1274	wait_queue_lock(wqueue);
1275	thread_lock(thread);
1276
1277	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1278		MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
1279		VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
1280
1281	wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
1282						  interruptible,
1283						  urgency, deadline, leeway,
1284						  thread);
1285
1286	thread_unlock(thread);
1287	wait_queue_unlock(wqueue);
1288	splx(s);
1289
1290	return (wresult);
1291}
1292
1293/*
1294 *	thread_sleep_fast_usimple_lock:
1295 *
1296 *	Cause the current thread to wait until the specified event
1297 *	occurs.  The specified simple_lock is unlocked before releasing
1298 *	the cpu and re-acquired as part of waking up.
1299 *
1300 *	This is the simple lock sleep interface for components that use a
1301 *	faster version of simple_lock() than is provided by usimple_lock().
1302 */
1303__private_extern__ wait_result_t
1304thread_sleep_fast_usimple_lock(
1305	event_t			event,
1306	simple_lock_t		lock,
1307	wait_interrupt_t	interruptible)
1308{
1309	wait_result_t res;
1310
1311	res = assert_wait(event, interruptible);
1312	if (res == THREAD_WAITING) {
1313		simple_unlock(lock);
1314		res = thread_block(THREAD_CONTINUE_NULL);
1315		simple_lock(lock);
1316	}
1317	return res;
1318}
1319
1320
1321/*
1322 *	thread_sleep_usimple_lock:
1323 *
1324 *	Cause the current thread to wait until the specified event
1325 *	occurs.  The specified usimple_lock is unlocked before releasing
1326 *	the cpu and re-acquired as part of waking up.
1327 *
1328 *	This is the simple lock sleep interface for components where
1329 *	simple_lock() is defined in terms of usimple_lock().
1330 */
1331wait_result_t
1332thread_sleep_usimple_lock(
1333	event_t			event,
1334	usimple_lock_t		lock,
1335	wait_interrupt_t	interruptible)
1336{
1337	wait_result_t res;
1338
1339	res = assert_wait(event, interruptible);
1340	if (res == THREAD_WAITING) {
1341		usimple_unlock(lock);
1342		res = thread_block(THREAD_CONTINUE_NULL);
1343		usimple_lock(lock);
1344	}
1345	return res;
1346}
1347
1348/*
1349 *	thread_sleep_lock_write:
1350 *
1351 *	Cause the current thread to wait until the specified event
1352 *	occurs.  The specified (write) lock is unlocked before releasing
1353 *	the cpu. The (write) lock will be re-acquired before returning.
1354 */
1355wait_result_t
1356thread_sleep_lock_write(
1357	event_t			event,
1358	lock_t			*lock,
1359	wait_interrupt_t interruptible)
1360{
1361	wait_result_t	res;
1362
1363	res = assert_wait(event, interruptible);
1364	if (res == THREAD_WAITING) {
1365		lock_write_done(lock);
1366		res = thread_block(THREAD_CONTINUE_NULL);
1367		lock_write(lock);
1368	}
1369	return res;
1370}
1371
1372/*
1373 * thread_isoncpu:
1374 *
1375 * Return TRUE if a thread is running on a processor such that an AST
1376 * is needed to pull it out of userspace execution, or if executing in
1377 * the kernel, bring to a context switch boundary that would cause
1378 * thread state to be serialized in the thread PCB.
1379 *
1380 * Thread locked, returns the same way. While locked, fields
1381 * like "state" and "runq" cannot change.
1382 */
1383static inline boolean_t
1384thread_isoncpu(thread_t thread)
1385{
1386	/* Not running or runnable */
1387	if (!(thread->state & TH_RUN))
1388		return (FALSE);
1389
1390	/* Waiting on a runqueue, not currently running */
1391	if (thread->runq != PROCESSOR_NULL)
1392		return (FALSE);
1393
1394	/*
1395	 * Thread must be running on a processor, or
1396	 * about to run, or just did run. In all these
1397	 * cases, an AST to the processor is needed
1398	 * to guarantee that the thread is kicked out
1399	 * of userspace and the processor has
1400	 * context switched (and saved register state).
1401	 */
1402	return (TRUE);
1403}
1404
1405/*
1406 * thread_stop:
1407 *
1408 * Force a preemption point for a thread and wait
1409 * for it to stop running on a CPU. If a stronger
1410 * guarantee is requested, wait until no longer
1411 * runnable. Arbitrates access among
1412 * multiple stop requests. (released by unstop)
1413 *
1414 * The thread must enter a wait state and stop via a
1415 * separate means.
1416 *
1417 * Returns FALSE if interrupted.
1418 */
1419boolean_t
1420thread_stop(
1421	thread_t		thread,
1422	boolean_t	until_not_runnable)
1423{
1424	wait_result_t	wresult;
1425	spl_t			s = splsched();
1426	boolean_t		oncpu;
1427
1428	wake_lock(thread);
1429	thread_lock(thread);
1430
1431	while (thread->state & TH_SUSP) {
1432		thread->wake_active = TRUE;
1433		thread_unlock(thread);
1434
1435		wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1436		wake_unlock(thread);
1437		splx(s);
1438
1439		if (wresult == THREAD_WAITING)
1440			wresult = thread_block(THREAD_CONTINUE_NULL);
1441
1442		if (wresult != THREAD_AWAKENED)
1443			return (FALSE);
1444
1445		s = splsched();
1446		wake_lock(thread);
1447		thread_lock(thread);
1448	}
1449
1450	thread->state |= TH_SUSP;
1451
1452	while ((oncpu = thread_isoncpu(thread)) ||
1453		   (until_not_runnable && (thread->state & TH_RUN))) {
1454		processor_t		processor;
1455
1456		if (oncpu) {
1457			assert(thread->state & TH_RUN);
1458			processor = thread->chosen_processor;
1459			cause_ast_check(processor);
1460		}
1461
1462		thread->wake_active = TRUE;
1463		thread_unlock(thread);
1464
1465		wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
1466		wake_unlock(thread);
1467		splx(s);
1468
1469		if (wresult == THREAD_WAITING)
1470			wresult = thread_block(THREAD_CONTINUE_NULL);
1471
1472		if (wresult != THREAD_AWAKENED) {
1473			thread_unstop(thread);
1474			return (FALSE);
1475		}
1476
1477		s = splsched();
1478		wake_lock(thread);
1479		thread_lock(thread);
1480	}
1481
1482	thread_unlock(thread);
1483	wake_unlock(thread);
1484	splx(s);
1485
1486	/*
1487	 * We return with the thread unlocked. To prevent it from
1488	 * transitioning to a runnable state (or from TH_RUN to
1489	 * being on the CPU), the caller must ensure the thread
1490	 * is stopped via an external means (such as an AST)
1491	 */
1492
1493	return (TRUE);
1494}
1495
1496/*
1497 * thread_unstop:
1498 *
1499 * Release a previous stop request and set
1500 * the thread running if appropriate.
1501 *
1502 * Use only after a successful stop operation.
1503 */
1504void
1505thread_unstop(
1506	thread_t	thread)
1507{
1508	spl_t		s = splsched();
1509
1510	wake_lock(thread);
1511	thread_lock(thread);
1512
1513	if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
1514		thread->state &= ~TH_SUSP;
1515		thread_unblock(thread, THREAD_AWAKENED);
1516
1517		thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1518	}
1519	else
1520	if (thread->state & TH_SUSP) {
1521		thread->state &= ~TH_SUSP;
1522
1523		if (thread->wake_active) {
1524			thread->wake_active = FALSE;
1525			thread_unlock(thread);
1526
1527			thread_wakeup(&thread->wake_active);
1528			wake_unlock(thread);
1529			splx(s);
1530
1531			return;
1532		}
1533	}
1534
1535	thread_unlock(thread);
1536	wake_unlock(thread);
1537	splx(s);
1538}
1539
1540/*
1541 * thread_wait:
1542 *
1543 * Wait for a thread to stop running. (non-interruptible)
1544 *
1545 */
1546void
1547thread_wait(
1548	thread_t	thread,
1549	boolean_t	until_not_runnable)
1550{
1551	wait_result_t	wresult;
1552	boolean_t 	oncpu;
1553	processor_t	processor;
1554	spl_t		s = splsched();
1555
1556	wake_lock(thread);
1557	thread_lock(thread);
1558
1559	/*
1560	 * Wait until not running on a CPU.  If stronger requirement
1561	 * desired, wait until not runnable.  Assumption: if thread is
1562	 * on CPU, then TH_RUN is set, so we're not waiting in any case
1563	 * where the original, pure "TH_RUN" check would have let us
1564	 * finish.
1565	 */
1566	while ((oncpu = thread_isoncpu(thread)) ||
1567			(until_not_runnable && (thread->state & TH_RUN))) {
1568
1569		if (oncpu) {
1570			assert(thread->state & TH_RUN);
1571			processor = thread->chosen_processor;
1572			cause_ast_check(processor);
1573		}
1574
1575		thread->wake_active = TRUE;
1576		thread_unlock(thread);
1577
1578		wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
1579		wake_unlock(thread);
1580		splx(s);
1581
1582		if (wresult == THREAD_WAITING)
1583			thread_block(THREAD_CONTINUE_NULL);
1584
1585		s = splsched();
1586		wake_lock(thread);
1587		thread_lock(thread);
1588	}
1589
1590	thread_unlock(thread);
1591	wake_unlock(thread);
1592	splx(s);
1593}
1594
1595/*
1596 *	Routine: clear_wait_internal
1597 *
1598 *		Clear the wait condition for the specified thread.
1599 *		Start the thread executing if that is appropriate.
1600 *	Arguments:
1601 *		thread		thread to awaken
1602 *		result		Wakeup result the thread should see
1603 *	Conditions:
1604 *		At splsched
1605 *		the thread is locked.
1606 *	Returns:
1607 *		KERN_SUCCESS		thread was rousted out a wait
1608 *		KERN_FAILURE		thread was waiting but could not be rousted
1609 *		KERN_NOT_WAITING	thread was not waiting
1610 */
1611__private_extern__ kern_return_t
1612clear_wait_internal(
1613	thread_t		thread,
1614	wait_result_t	wresult)
1615{
1616	wait_queue_t	wq = thread->wait_queue;
1617	uint32_t	i = LockTimeOut;
1618
1619	do {
1620		if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1621			return (KERN_FAILURE);
1622
1623		if (wq != WAIT_QUEUE_NULL) {
1624			if (wait_queue_lock_try(wq)) {
1625				wait_queue_pull_thread_locked(wq, thread, TRUE);
1626				/* wait queue unlocked, thread still locked */
1627			}
1628			else {
1629				thread_unlock(thread);
1630				delay(1);
1631
1632				thread_lock(thread);
1633				if (wq != thread->wait_queue)
1634					return (KERN_NOT_WAITING);
1635
1636				continue;
1637			}
1638		}
1639
1640		return (thread_go(thread, wresult));
1641	} while ((--i > 0) || machine_timeout_suspended());
1642
1643	panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
1644		  thread, wq, cpu_number());
1645
1646	return (KERN_FAILURE);
1647}
1648
1649
1650/*
1651 *	clear_wait:
1652 *
1653 *	Clear the wait condition for the specified thread.  Start the thread
1654 *	executing if that is appropriate.
1655 *
1656 *	parameters:
1657 *	  thread		thread to awaken
1658 *	  result		Wakeup result the thread should see
1659 */
1660kern_return_t
1661clear_wait(
1662	thread_t		thread,
1663	wait_result_t	result)
1664{
1665	kern_return_t ret;
1666	spl_t		s;
1667
1668	s = splsched();
1669	thread_lock(thread);
1670	ret = clear_wait_internal(thread, result);
1671	thread_unlock(thread);
1672	splx(s);
1673	return ret;
1674}
1675
1676
1677/*
1678 *	thread_wakeup_prim:
1679 *
1680 *	Common routine for thread_wakeup, thread_wakeup_with_result,
1681 *	and thread_wakeup_one.
1682 *
1683 */
1684kern_return_t
1685thread_wakeup_prim(
1686	event_t			event,
1687	boolean_t		one_thread,
1688	wait_result_t		result)
1689{
1690	return (thread_wakeup_prim_internal(event, one_thread, result, -1));
1691}
1692
1693
1694kern_return_t
1695thread_wakeup_prim_internal(
1696	event_t			event,
1697	boolean_t		one_thread,
1698	wait_result_t		result,
1699	int			priority)
1700{
1701	register wait_queue_t	wq;
1702	register int			index;
1703
1704	index = wait_hash(event);
1705	wq = &wait_queues[index];
1706	if (one_thread)
1707		return (wait_queue_wakeup_one(wq, event, result, priority));
1708	else
1709	    return (wait_queue_wakeup_all(wq, event, result));
1710}
1711
1712/*
1713 *	thread_bind:
1714 *
1715 *	Force the current thread to execute on the specified processor.
1716 *
1717 *	Returns the previous binding.  PROCESSOR_NULL means
1718 *	not bound.
1719 *
1720 *	XXX - DO NOT export this to users - XXX
1721 */
1722processor_t
1723thread_bind(
1724	processor_t		processor)
1725{
1726	thread_t		self = current_thread();
1727	processor_t		prev;
1728	spl_t			s;
1729
1730	s = splsched();
1731	thread_lock(self);
1732
1733	prev = self->bound_processor;
1734	self->bound_processor = processor;
1735
1736	thread_unlock(self);
1737	splx(s);
1738
1739	return (prev);
1740}
1741
1742/*
1743 *	thread_select:
1744 *
1745 *	Select a new thread for the current processor to execute.
1746 *
1747 *	May select the current thread, which must be locked.
1748 */
1749static thread_t
1750thread_select(
1751	thread_t			thread,
1752	processor_t			processor)
1753{
1754	processor_set_t		pset = processor->processor_set;
1755	thread_t			new_thread = THREAD_NULL;
1756	boolean_t			inactive_state;
1757
1758	assert(processor == current_processor());
1759
1760	do {
1761		/*
1762		 *	Update the priority.
1763		 */
1764		if (SCHED(can_update_priority)(thread))
1765			SCHED(update_priority)(thread);
1766
1767		processor->current_pri = thread->sched_pri;
1768		processor->current_thmode = thread->sched_mode;
1769
1770		pset_lock(pset);
1771
1772		assert(pset->low_count);
1773		assert(pset->low_pri);
1774
1775		if (processor->processor_meta != PROCESSOR_META_NULL && processor->processor_meta->primary != processor) {
1776			/*
1777			 * Should this secondary SMT processor attempt to find work? For pset runqueue systems,
1778			 * we should look for work only under the same conditions that choose_processor()
1779			 * would have assigned work, which is when all primary processors have been assigned work.
1780			 *
1781			 * An exception is that bound threads are dispatched to a processor without going through
1782			 * choose_processor(), so in those cases we should continue trying to dequeue work.
1783			 */
1784			if (!processor->runq_bound_count && !queue_empty(&pset->idle_queue) && !rt_runq.count) {
1785				goto idle;
1786			}
1787		}
1788
1789		inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor);
1790
1791		simple_lock(&rt_lock);
1792
1793		/*
1794		 *	Test to see if the current thread should continue
1795		 *	to run on this processor.  Must be runnable, and not
1796		 *	bound to a different processor, nor be in the wrong
1797		 *	processor set.
1798		 */
1799		if (	((thread->state & ~TH_SUSP) == TH_RUN)			&&
1800		    (thread->sched_pri >= BASEPRI_RTQUEUES		||
1801		    processor->processor_meta == PROCESSOR_META_NULL ||
1802		    processor->processor_meta->primary == processor) &&
1803		    (thread->bound_processor == PROCESSOR_NULL	||
1804		    thread->bound_processor == processor)		&&
1805		    (thread->affinity_set == AFFINITY_SET_NULL	||
1806		    thread->affinity_set->aset_pset == pset)) {
1807			if (thread->sched_pri >= BASEPRI_RTQUEUES &&
1808			    first_timeslice(processor)) {
1809				if (rt_runq.count > 0) {
1810					register queue_t		q;
1811
1812					q = &rt_runq.queue;
1813					if (((thread_t)q->next)->realtime.deadline <
1814					    processor->deadline) {
1815						if ((((thread_t)q->next)->bound_processor == PROCESSOR_NULL) || (((thread_t)q->next)->bound_processor == processor)) {
1816						thread = (thread_t)dequeue_head(q);
1817						thread->runq = PROCESSOR_NULL;
1818						SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
1819						rt_runq.count--;
1820						}
1821					}
1822				}
1823
1824				simple_unlock(&rt_lock);
1825
1826				processor->deadline = thread->realtime.deadline;
1827
1828				pset_unlock(pset);
1829
1830				return (thread);
1831			}
1832
1833			if (!inactive_state && (thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) &&
1834					(new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri)) == THREAD_NULL) {
1835
1836				simple_unlock(&rt_lock);
1837
1838				/* I am the highest priority runnable (non-idle) thread */
1839
1840				pset_pri_hint(pset, processor, processor->current_pri);
1841
1842				pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
1843
1844				processor->deadline = UINT64_MAX;
1845
1846				pset_unlock(pset);
1847
1848				return (thread);
1849			}
1850		}
1851
1852		if (new_thread != THREAD_NULL ||
1853				(SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) &&
1854					 (new_thread = SCHED(choose_thread)(processor, MINPRI)) != THREAD_NULL)) {
1855				simple_unlock(&rt_lock);
1856
1857				if (!inactive_state) {
1858					pset_pri_hint(pset, processor, new_thread->sched_pri);
1859
1860					pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
1861				}
1862
1863				processor->deadline = UINT64_MAX;
1864				pset_unlock(pset);
1865
1866				return (new_thread);
1867		}
1868
1869		if (rt_runq.count > 0) {
1870			thread = (thread_t)dequeue_head(&rt_runq.queue);
1871
1872			if (__probable((thread->bound_processor == NULL || (thread->bound_processor == processor)))) {
1873				thread->runq = PROCESSOR_NULL;
1874				SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
1875				rt_runq.count--;
1876
1877				simple_unlock(&rt_lock);
1878
1879				processor->deadline = thread->realtime.deadline;
1880				pset_unlock(pset);
1881
1882				return (thread);
1883			} else {
1884				enqueue_head(&rt_runq.queue, (queue_entry_t)thread);
1885			}
1886		}
1887
1888		simple_unlock(&rt_lock);
1889
1890		/* No realtime threads and no normal threads on the per-processor
1891		 * runqueue. Finally check for global fairshare threads.
1892		 */
1893		if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) {
1894
1895			processor->deadline = UINT64_MAX;
1896			pset_unlock(pset);
1897
1898			return (new_thread);
1899		}
1900
1901		processor->deadline = UINT64_MAX;
1902
1903		/*
1904		 *	Set processor inactive based on
1905		 *	indication from the platform code.
1906		 */
1907		if (inactive_state) {
1908			if (processor->state == PROCESSOR_RUNNING)
1909				remqueue((queue_entry_t)processor);
1910			else
1911			if (processor->state == PROCESSOR_IDLE)
1912				remqueue((queue_entry_t)processor);
1913
1914			processor->state = PROCESSOR_INACTIVE;
1915
1916			pset_unlock(pset);
1917
1918			return (processor->idle_thread);
1919		}
1920
1921		/*
1922		 *	No runnable threads, attempt to steal
1923		 *	from other processors.
1924		 */
1925		new_thread = SCHED(steal_thread)(pset);
1926		if (new_thread != THREAD_NULL) {
1927			return (new_thread);
1928		}
1929
1930		/*
1931		 *	If other threads have appeared, shortcut
1932		 *	around again.
1933		 */
1934		if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0)
1935			continue;
1936
1937		pset_lock(pset);
1938
1939	idle:
1940		/*
1941		 *	Nothing is runnable, so set this processor idle if it
1942		 *	was running.
1943		 */
1944		if (processor->state == PROCESSOR_RUNNING) {
1945			remqueue((queue_entry_t)processor);
1946			processor->state = PROCESSOR_IDLE;
1947
1948			if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) {
1949				enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
1950				pset_pri_init_hint(pset, processor);
1951				pset_count_init_hint(pset, processor);
1952			}
1953			else {
1954				enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor);
1955					pset_unlock(pset);
1956					return (processor->idle_thread);
1957			}
1958		}
1959
1960		pset_unlock(pset);
1961
1962#if CONFIG_SCHED_IDLE_IN_PLACE
1963		/*
1964		 *	Choose idle thread if fast idle is not possible.
1965		 */
1966		if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES)
1967			return (processor->idle_thread);
1968
1969		/*
1970		 *	Perform idling activities directly without a
1971		 *	context switch.  Return dispatched thread,
1972		 *	else check again for a runnable thread.
1973		 */
1974		new_thread = thread_select_idle(thread, processor);
1975
1976#else /* !CONFIG_SCHED_IDLE_IN_PLACE */
1977
1978		/*
1979		 * Do a full context switch to idle so that the current
1980		 * thread can start running on another processor without
1981		 * waiting for the fast-idled processor to wake up.
1982		 */
1983		return (processor->idle_thread);
1984
1985#endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
1986
1987	} while (new_thread == THREAD_NULL);
1988
1989	return (new_thread);
1990}
1991
1992#if CONFIG_SCHED_IDLE_IN_PLACE
1993/*
1994 *	thread_select_idle:
1995 *
1996 *	Idle the processor using the current thread context.
1997 *
1998 *	Called with thread locked, then dropped and relocked.
1999 */
2000static thread_t
2001thread_select_idle(
2002	thread_t		thread,
2003	processor_t		processor)
2004{
2005	thread_t		new_thread;
2006	uint64_t		arg1, arg2;
2007	int			urgency;
2008
2009	if (thread->sched_mode == TH_MODE_TIMESHARE) {
2010		if (thread->max_priority <= MAXPRI_THROTTLE)
2011			sched_background_decr();
2012
2013		sched_share_decr();
2014	}
2015	sched_run_decr();
2016
2017	thread->state |= TH_IDLE;
2018	processor->current_pri = IDLEPRI;
2019	processor->current_thmode = TH_MODE_NONE;
2020
2021	/* Reload precise timing global policy to thread-local policy */
2022	thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
2023
2024	thread_unlock(thread);
2025
2026	/*
2027	 *	Switch execution timing to processor idle thread.
2028	 */
2029	processor->last_dispatch = mach_absolute_time();
2030	thread->last_run_time = processor->last_dispatch;
2031	thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
2032	PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
2033
2034	/*
2035	 *	Cancel the quantum timer while idling.
2036	 */
2037	timer_call_cancel(&processor->quantum_timer);
2038	processor->timeslice = 0;
2039
2040	(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
2041
2042	thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
2043
2044	/*
2045	 *	Enable interrupts and perform idling activities.  No
2046	 *	preemption due to TH_IDLE being set.
2047	 */
2048	spllo(); new_thread = processor_idle(thread, processor);
2049
2050	/*
2051	 *	Return at splsched.
2052	 */
2053	(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
2054
2055	thread_lock(thread);
2056
2057	/*
2058	 *	If awakened, switch to thread timer and start a new quantum.
2059	 *	Otherwise skip; we will context switch to another thread or return here.
2060	 */
2061	if (!(thread->state & TH_WAIT)) {
2062		processor->last_dispatch = mach_absolute_time();
2063		thread_timer_event(processor->last_dispatch, &thread->system_timer);
2064		PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2065
2066		thread_quantum_init(thread);
2067		thread->last_quantum_refill_time = processor->last_dispatch;
2068
2069		processor->quantum_end = processor->last_dispatch + thread->current_quantum;
2070		timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL);
2071		processor->timeslice = 1;
2072
2073		thread->computation_epoch = processor->last_dispatch;
2074	}
2075
2076	thread->state &= ~TH_IDLE;
2077
2078	/*
2079	 * If we idled in place, simulate a context switch back
2080	 * to the original priority of the thread so that the
2081	 * platform layer cannot distinguish this from a true
2082	 * switch to the idle thread.
2083	 */
2084
2085	urgency = thread_get_urgency(thread, &arg1, &arg2);
2086
2087	thread_tell_urgency(urgency, arg1, arg2, new_thread);
2088
2089	sched_run_incr();
2090	if (thread->sched_mode == TH_MODE_TIMESHARE) {
2091		sched_share_incr();
2092
2093		if (thread->max_priority <= MAXPRI_THROTTLE)
2094			sched_background_incr();
2095	}
2096
2097	return (new_thread);
2098}
2099#endif /* CONFIG_SCHED_IDLE_IN_PLACE */
2100
2101#if defined(CONFIG_SCHED_TRADITIONAL)
2102static thread_t
2103sched_traditional_choose_thread(
2104								processor_t		processor,
2105								int				priority)
2106{
2107	thread_t thread;
2108
2109	thread = choose_thread(processor, runq_for_processor(processor), priority);
2110	if (thread != THREAD_NULL) {
2111		runq_consider_decr_bound_count(processor, thread);
2112	}
2113
2114	return thread;
2115}
2116
2117#endif /* defined(CONFIG_SCHED_TRADITIONAL)  */
2118
2119#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY)
2120
2121/*
2122 *	choose_thread:
2123 *
2124 *	Locate a thread to execute from the processor run queue
2125 *	and return it.  Only choose a thread with greater or equal
2126 *	priority.
2127 *
2128 *	Associated pset must be locked.  Returns THREAD_NULL
2129 *	on failure.
2130 */
2131thread_t
2132choose_thread(
2133	processor_t		processor,
2134	run_queue_t		rq,
2135	int				priority)
2136{
2137	queue_t			queue = rq->queues + rq->highq;
2138	int				pri = rq->highq, count = rq->count;
2139	thread_t		thread;
2140
2141	while (count > 0 && pri >= priority) {
2142		thread = (thread_t)queue_first(queue);
2143		while (!queue_end(queue, (queue_entry_t)thread)) {
2144			if (thread->bound_processor == PROCESSOR_NULL ||
2145							thread->bound_processor == processor) {
2146				remqueue((queue_entry_t)thread);
2147
2148				thread->runq = PROCESSOR_NULL;
2149				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
2150				rq->count--;
2151				if (SCHED(priority_is_urgent)(pri)) {
2152					rq->urgency--; assert(rq->urgency >= 0);
2153				}
2154				if (queue_empty(queue)) {
2155					if (pri != IDLEPRI)
2156						clrbit(MAXPRI - pri, rq->bitmap);
2157					rq->highq = MAXPRI - ffsbit(rq->bitmap);
2158				}
2159
2160				return (thread);
2161			}
2162			count--;
2163
2164			thread = (thread_t)queue_next((queue_entry_t)thread);
2165		}
2166
2167		queue--; pri--;
2168	}
2169
2170	return (THREAD_NULL);
2171}
2172
2173#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
2174
2175/*
2176 *	Perform a context switch and start executing the new thread.
2177 *
2178 *	Returns FALSE on failure, and the thread is re-dispatched.
2179 *
2180 *	Called at splsched.
2181 */
2182
2183#define funnel_release_check(thread, debug)				\
2184MACRO_BEGIN												\
2185	if ((thread)->funnel_state & TH_FN_OWNED) {			\
2186		(thread)->funnel_state = TH_FN_REFUNNEL;		\
2187		KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE,			\
2188			(thread)->funnel_lock, (debug), 0, 0, 0);	\
2189		funnel_unlock((thread)->funnel_lock);			\
2190	}													\
2191MACRO_END
2192
2193#define funnel_refunnel_check(thread, debug)				\
2194MACRO_BEGIN													\
2195	if ((thread)->funnel_state & TH_FN_REFUNNEL) {			\
2196		kern_return_t	result = (thread)->wait_result;		\
2197															\
2198		(thread)->funnel_state = 0;							\
2199		KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE,				\
2200			(thread)->funnel_lock, (debug), 0, 0, 0);		\
2201		funnel_lock((thread)->funnel_lock);					\
2202		KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE,				\
2203			(thread)->funnel_lock, (debug), 0, 0, 0);		\
2204		(thread)->funnel_state = TH_FN_OWNED;				\
2205		(thread)->wait_result = result;						\
2206	}														\
2207MACRO_END
2208
2209/*
2210 * thread_invoke
2211 *
2212 * "self" is what is currently running on the processor,
2213 * "thread" is the new thread to context switch to
2214 * (which may be the same thread in some cases)
2215 */
2216static boolean_t
2217thread_invoke(
2218	thread_t			self,
2219	thread_t			thread,
2220	ast_t				reason)
2221{
2222	thread_continue_t	continuation = self->continuation;
2223	void			*parameter = self->parameter;
2224	processor_t		processor;
2225	uint64_t		ctime = mach_absolute_time();
2226
2227	if (__improbable(get_preemption_level() != 0)) {
2228		int pl = get_preemption_level();
2229		panic("thread_invoke: preemption_level %d, possible cause: %s",
2230		    pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" :
2231			"blocking while holding a spinlock, or within interrupt context"));
2232	}
2233
2234	assert(self == current_thread());
2235
2236#if defined(CONFIG_SCHED_TRADITIONAL)
2237	sched_traditional_consider_maintenance(ctime);
2238#endif /* CONFIG_SCHED_TRADITIONAL */
2239
2240	/*
2241	 * Mark thread interruptible.
2242	 */
2243	thread_lock(thread);
2244	thread->state &= ~TH_UNINT;
2245
2246#if DEBUG
2247	assert(thread_runnable(thread));
2248#endif
2249
2250	/* Reload precise timing global policy to thread-local policy */
2251	thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
2252
2253	/*
2254	 * Allow time constraint threads to hang onto
2255	 * a stack.
2256	 */
2257	if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack)
2258		self->reserved_stack = self->kernel_stack;
2259
2260	if (continuation != NULL) {
2261		if (!thread->kernel_stack) {
2262			/*
2263			 * If we are using a privileged stack,
2264			 * check to see whether we can exchange it with
2265			 * that of the other thread.
2266			 */
2267			if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
2268				goto need_stack;
2269
2270			/*
2271			 * Context switch by performing a stack handoff.
2272			 */
2273			continuation = thread->continuation;
2274			parameter = thread->parameter;
2275
2276			processor = current_processor();
2277			processor->active_thread = thread;
2278			processor->current_pri = thread->sched_pri;
2279			processor->current_thmode = thread->sched_mode;
2280			if (thread->last_processor != processor && thread->last_processor != NULL) {
2281				if (thread->last_processor->processor_set != processor->processor_set)
2282					thread->ps_switch++;
2283				thread->p_switch++;
2284			}
2285			thread->last_processor = processor;
2286			thread->c_switch++;
2287			ast_context(thread);
2288			thread_unlock(thread);
2289
2290			self->reason = reason;
2291
2292			processor->last_dispatch = ctime;
2293			self->last_run_time = ctime;
2294			thread_timer_event(ctime, &thread->system_timer);
2295			PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2296
2297			/*
2298			 * Since non-precise user/kernel time doesn't update the state timer
2299			 * during privilege transitions, synthesize an event now.
2300			 */
2301			if (!thread->precise_user_kernel_time) {
2302				timer_switch(PROCESSOR_DATA(processor, current_state),
2303							ctime,
2304							 PROCESSOR_DATA(processor, current_state));
2305			}
2306
2307			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2308				MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
2309				self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2310
2311			if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) {
2312				KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
2313						(uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2314			}
2315
2316			DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
2317
2318			SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2319
2320			TLOG(1, "thread_invoke: calling stack_handoff\n");
2321			stack_handoff(self, thread);
2322
2323			DTRACE_SCHED(on__cpu);
2324
2325			thread_dispatch(self, thread);
2326
2327			thread->continuation = thread->parameter = NULL;
2328
2329			counter(c_thread_invoke_hits++);
2330
2331			funnel_refunnel_check(thread, 2);
2332			(void) spllo();
2333
2334			assert(continuation);
2335			call_continuation(continuation, parameter, thread->wait_result);
2336			/*NOTREACHED*/
2337		}
2338		else if (thread == self) {
2339			/* same thread but with continuation */
2340			ast_context(self);
2341			counter(++c_thread_invoke_same);
2342			thread_unlock(self);
2343
2344			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2345				MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2346				self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2347
2348			self->continuation = self->parameter = NULL;
2349
2350			funnel_refunnel_check(self, 3);
2351			(void) spllo();
2352
2353			call_continuation(continuation, parameter, self->wait_result);
2354			/*NOTREACHED*/
2355		}
2356	}
2357	else {
2358		/*
2359		 * Check that the other thread has a stack
2360		 */
2361		if (!thread->kernel_stack) {
2362need_stack:
2363			if (!stack_alloc_try(thread)) {
2364				counter(c_thread_invoke_misses++);
2365				thread_unlock(thread);
2366				thread_stack_enqueue(thread);
2367				return (FALSE);
2368			}
2369		}
2370		else if (thread == self) {
2371			ast_context(self);
2372			counter(++c_thread_invoke_same);
2373			thread_unlock(self);
2374
2375			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2376				MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2377				self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2378
2379			return (TRUE);
2380		}
2381	}
2382
2383	/*
2384	 * Context switch by full context save.
2385	 */
2386	processor = current_processor();
2387	processor->active_thread = thread;
2388	processor->current_pri = thread->sched_pri;
2389	processor->current_thmode = thread->sched_mode;
2390	if (thread->last_processor != processor && thread->last_processor != NULL) {
2391		if (thread->last_processor->processor_set != processor->processor_set)
2392			thread->ps_switch++;
2393		thread->p_switch++;
2394	}
2395	thread->last_processor = processor;
2396	thread->c_switch++;
2397	ast_context(thread);
2398	thread_unlock(thread);
2399
2400	counter(c_thread_invoke_csw++);
2401
2402	assert(self->runq == PROCESSOR_NULL);
2403	self->reason = reason;
2404
2405	processor->last_dispatch = ctime;
2406	self->last_run_time = ctime;
2407	thread_timer_event(ctime, &thread->system_timer);
2408	PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
2409
2410	/*
2411	 * Since non-precise user/kernel time doesn't update the state timer
2412	 * during privilege transitions, synthesize an event now.
2413	 */
2414	if (!thread->precise_user_kernel_time) {
2415		timer_switch(PROCESSOR_DATA(processor, current_state),
2416					ctime,
2417					 PROCESSOR_DATA(processor, current_state));
2418	}
2419
2420
2421	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2422		MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
2423		self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
2424
2425	if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
2426		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
2427				(uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
2428	}
2429
2430	DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
2431
2432	SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
2433
2434	/*
2435	 * This is where we actually switch register context,
2436	 * and address space if required.  We will next run
2437	 * as a result of a subsequent context switch.
2438	 */
2439	assert(continuation == self->continuation);
2440	thread = machine_switch_context(self, continuation, thread);
2441	assert(self == current_thread());
2442	TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
2443
2444	DTRACE_SCHED(on__cpu);
2445
2446	/*
2447	 * We have been resumed and are set to run.
2448	 */
2449	thread_dispatch(thread, self);
2450
2451	if (continuation) {
2452		self->continuation = self->parameter = NULL;
2453
2454		funnel_refunnel_check(self, 3);
2455		(void) spllo();
2456
2457		call_continuation(continuation, parameter, self->wait_result);
2458		/*NOTREACHED*/
2459	}
2460
2461	return (TRUE);
2462}
2463
2464/*
2465 *	thread_dispatch:
2466 *
2467 *	Handle threads at context switch.  Re-dispatch other thread
2468 *	if still running, otherwise update run state and perform
2469 *	special actions.  Update quantum for other thread and begin
2470 *	the quantum for ourselves.
2471 *
2472 *     "self" is our new current thread that we have context switched
2473 *     to, "thread" is the old thread that we have switched away from.
2474 *
2475 *	Called at splsched.
2476 */
2477void
2478thread_dispatch(
2479	thread_t		thread,
2480	thread_t		self)
2481{
2482	processor_t		processor = self->last_processor;
2483
2484	if (thread != THREAD_NULL) {
2485		/*
2486		 *	If blocked at a continuation, discard
2487		 *	the stack.
2488		 */
2489		if (thread->continuation != NULL && thread->kernel_stack != 0)
2490			stack_free(thread);
2491
2492		if (!(thread->state & TH_IDLE)) {
2493			int64_t consumed;
2494			int64_t remainder = 0;
2495
2496			if (processor->quantum_end > processor->last_dispatch)
2497				remainder = processor->quantum_end -
2498				    processor->last_dispatch;
2499
2500			consumed = thread->current_quantum - remainder;
2501
2502			if ((thread->reason & AST_LEDGER) == 0) {
2503				/*
2504				 * Bill CPU time to both the task and
2505				 * the individual thread.
2506				 */
2507				ledger_credit(thread->t_ledger,
2508				    task_ledgers.cpu_time, consumed);
2509				ledger_credit(thread->t_threadledger,
2510				    thread_ledgers.cpu_time, consumed);
2511			}
2512
2513			wake_lock(thread);
2514			thread_lock(thread);
2515
2516			/*
2517			 *	Compute remainder of current quantum.
2518			 */
2519			if (first_timeslice(processor) &&
2520			    processor->quantum_end > processor->last_dispatch)
2521				thread->current_quantum = (uint32_t)remainder;
2522			else
2523				thread->current_quantum = 0;
2524
2525			if (thread->sched_mode == TH_MODE_REALTIME) {
2526				/*
2527				 *	Cancel the deadline if the thread has
2528				 *	consumed the entire quantum.
2529				 */
2530				if (thread->current_quantum == 0) {
2531					thread->realtime.deadline = UINT64_MAX;
2532					thread->reason |= AST_QUANTUM;
2533				}
2534			} else {
2535#if defined(CONFIG_SCHED_TRADITIONAL)
2536				/*
2537				 *	For non-realtime threads treat a tiny
2538				 *	remaining quantum as an expired quantum
2539				 *	but include what's left next time.
2540				 */
2541				if (thread->current_quantum < min_std_quantum) {
2542					thread->reason |= AST_QUANTUM;
2543					thread->current_quantum += SCHED(initial_quantum_size)(thread);
2544				}
2545#endif
2546			}
2547
2548			/*
2549			 *	If we are doing a direct handoff then
2550			 *	take the remainder of the quantum.
2551			 */
2552			if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
2553				self->current_quantum = thread->current_quantum;
2554				thread->reason |= AST_QUANTUM;
2555				thread->current_quantum = 0;
2556			}
2557
2558			thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
2559
2560			if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) {
2561				integer_t priority;
2562
2563				priority = thread->sched_pri;
2564
2565				if (priority < thread->priority)
2566					priority = thread->priority;
2567				if (priority < BASEPRI_BACKGROUND)
2568					priority = BASEPRI_BACKGROUND;
2569
2570				if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2571					KERNEL_DEBUG_CONSTANT(
2572						MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
2573						(uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0);
2574
2575					thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
2576
2577					if (thread->sched_pri < priority)
2578						set_sched_pri(thread, priority);
2579				}
2580			}
2581
2582			if (!(thread->state & TH_WAIT)) {
2583				/*
2584				 *	Still running.
2585				 */
2586				if (thread->reason & AST_QUANTUM)
2587					thread_setrun(thread, SCHED_TAILQ);
2588				else
2589				if (thread->reason & AST_PREEMPT)
2590					thread_setrun(thread, SCHED_HEADQ);
2591				else
2592					thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2593
2594				thread->reason = AST_NONE;
2595
2596				if (thread->wake_active) {
2597					thread->wake_active = FALSE;
2598					thread_unlock(thread);
2599
2600					thread_wakeup(&thread->wake_active);
2601				}
2602				else
2603					thread_unlock(thread);
2604
2605				wake_unlock(thread);
2606			}
2607			else {
2608				/*
2609				 *	Waiting.
2610				 */
2611				boolean_t should_terminate = FALSE;
2612
2613				/* Only the first call to thread_dispatch
2614				 * after explicit termination should add
2615				 * the thread to the termination queue
2616				 */
2617				if ((thread->state & (TH_TERMINATE|TH_TERMINATE2)) == TH_TERMINATE) {
2618					should_terminate = TRUE;
2619					thread->state |= TH_TERMINATE2;
2620				}
2621
2622				thread->state &= ~TH_RUN;
2623				thread->chosen_processor = PROCESSOR_NULL;
2624
2625				if (thread->sched_mode == TH_MODE_TIMESHARE) {
2626					if (thread->max_priority <= MAXPRI_THROTTLE)
2627						sched_background_decr();
2628
2629					sched_share_decr();
2630				}
2631				sched_run_decr();
2632
2633				(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
2634
2635				if (thread->wake_active) {
2636					thread->wake_active = FALSE;
2637					thread_unlock(thread);
2638
2639					thread_wakeup(&thread->wake_active);
2640				}
2641				else
2642					thread_unlock(thread);
2643
2644				wake_unlock(thread);
2645
2646				if (should_terminate)
2647					thread_terminate_enqueue(thread);
2648			}
2649		}
2650	}
2651
2652	if (!(self->state & TH_IDLE)) {
2653		uint64_t        arg1, arg2;
2654		int             urgency;
2655
2656		urgency = thread_get_urgency(self, &arg1, &arg2);
2657
2658		thread_tell_urgency(urgency, arg1, arg2, self);
2659
2660		/*
2661		 *	Get a new quantum if none remaining.
2662		 */
2663		if (self->current_quantum == 0) {
2664			thread_quantum_init(self);
2665			self->last_quantum_refill_time = processor->last_dispatch;
2666		}
2667
2668		/*
2669		 *	Set up quantum timer and timeslice.
2670		 */
2671		processor->quantum_end = (processor->last_dispatch + self->current_quantum);
2672		timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL);
2673
2674		processor->timeslice = 1;
2675
2676		self->computation_epoch = processor->last_dispatch;
2677	}
2678	else {
2679		timer_call_cancel(&processor->quantum_timer);
2680		processor->timeslice = 0;
2681
2682		thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
2683	}
2684}
2685
2686#include <libkern/OSDebug.h>
2687
2688uint32_t	kdebug_thread_block = 0;
2689
2690
2691/*
2692 *	thread_block_reason:
2693 *
2694 *	Forces a reschedule, blocking the caller if a wait
2695 *	has been asserted.
2696 *
2697 *	If a continuation is specified, then thread_invoke will
2698 *	attempt to discard the thread's kernel stack.  When the
2699 *	thread resumes, it will execute the continuation function
2700 *	on a new kernel stack.
2701 */
2702counter(mach_counter_t  c_thread_block_calls = 0;)
2703
2704wait_result_t
2705thread_block_reason(
2706	thread_continue_t	continuation,
2707	void				*parameter,
2708	ast_t				reason)
2709{
2710	register thread_t		self = current_thread();
2711	register processor_t	processor;
2712	register thread_t		new_thread;
2713	spl_t					s;
2714
2715	counter(++c_thread_block_calls);
2716
2717	s = splsched();
2718
2719	if (!(reason & AST_PREEMPT))
2720		funnel_release_check(self, 2);
2721
2722	processor = current_processor();
2723
2724	/* If we're explicitly yielding, force a subsequent quantum */
2725	if (reason & AST_YIELD)
2726		processor->timeslice = 0;
2727
2728	/* We're handling all scheduling AST's */
2729	ast_off(AST_SCHEDULING);
2730
2731	self->continuation = continuation;
2732	self->parameter = parameter;
2733
2734	if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) {
2735		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2736			MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK),
2737			reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0);
2738	}
2739
2740	do {
2741		thread_lock(self);
2742		new_thread = thread_select(self, processor);
2743		thread_unlock(self);
2744	} while (!thread_invoke(self, new_thread, reason));
2745
2746	funnel_refunnel_check(self, 5);
2747	splx(s);
2748
2749	return (self->wait_result);
2750}
2751
2752/*
2753 *	thread_block:
2754 *
2755 *	Block the current thread if a wait has been asserted.
2756 */
2757wait_result_t
2758thread_block(
2759	thread_continue_t	continuation)
2760{
2761	return thread_block_reason(continuation, NULL, AST_NONE);
2762}
2763
2764wait_result_t
2765thread_block_parameter(
2766	thread_continue_t	continuation,
2767	void				*parameter)
2768{
2769	return thread_block_reason(continuation, parameter, AST_NONE);
2770}
2771
2772/*
2773 *	thread_run:
2774 *
2775 *	Switch directly from the current thread to the
2776 *	new thread, handing off our quantum if appropriate.
2777 *
2778 *	New thread must be runnable, and not on a run queue.
2779 *
2780 *	Called at splsched.
2781 */
2782int
2783thread_run(
2784	thread_t			self,
2785	thread_continue_t	continuation,
2786	void				*parameter,
2787	thread_t			new_thread)
2788{
2789	ast_t		handoff = AST_HANDOFF;
2790
2791	funnel_release_check(self, 3);
2792
2793	self->continuation = continuation;
2794	self->parameter = parameter;
2795
2796	while (!thread_invoke(self, new_thread, handoff)) {
2797		processor_t		processor = current_processor();
2798
2799		thread_lock(self);
2800		new_thread = thread_select(self, processor);
2801		thread_unlock(self);
2802		handoff = AST_NONE;
2803	}
2804
2805	funnel_refunnel_check(self, 6);
2806
2807	return (self->wait_result);
2808}
2809
2810/*
2811 *	thread_continue:
2812 *
2813 *	Called at splsched when a thread first receives
2814 *	a new stack after a continuation.
2815 */
2816void
2817thread_continue(
2818	register thread_t	thread)
2819{
2820	register thread_t		self = current_thread();
2821	register thread_continue_t	continuation;
2822	register void			*parameter;
2823
2824	DTRACE_SCHED(on__cpu);
2825
2826	continuation = self->continuation;
2827	parameter = self->parameter;
2828
2829	thread_dispatch(thread, self);
2830
2831	self->continuation = self->parameter = NULL;
2832
2833	funnel_refunnel_check(self, 4);
2834
2835	if (thread != THREAD_NULL)
2836		(void)spllo();
2837
2838 TLOG(1, "thread_continue: calling call_continuation \n");
2839	call_continuation(continuation, parameter, self->wait_result);
2840	/*NOTREACHED*/
2841}
2842
2843void
2844thread_quantum_init(thread_t thread)
2845{
2846	if (thread->sched_mode == TH_MODE_REALTIME) {
2847		thread->current_quantum = thread->realtime.computation;
2848	} else {
2849		thread->current_quantum = SCHED(initial_quantum_size)(thread);
2850	}
2851}
2852
2853#if defined(CONFIG_SCHED_TRADITIONAL)
2854static uint32_t
2855sched_traditional_initial_quantum_size(thread_t thread)
2856{
2857	if ((thread == THREAD_NULL) || thread->priority > MAXPRI_THROTTLE)
2858		return std_quantum;
2859	else
2860		return bg_quantum;
2861}
2862
2863static sched_mode_t
2864sched_traditional_initial_thread_sched_mode(task_t parent_task)
2865{
2866	if (parent_task == kernel_task)
2867		return TH_MODE_FIXED;
2868	else
2869		return TH_MODE_TIMESHARE;
2870}
2871
2872static boolean_t
2873sched_traditional_supports_timeshare_mode(void)
2874{
2875	return TRUE;
2876}
2877
2878#endif /* CONFIG_SCHED_TRADITIONAL */
2879
2880/*
2881 *	run_queue_init:
2882 *
2883 *	Initialize a run queue before first use.
2884 */
2885void
2886run_queue_init(
2887	run_queue_t		rq)
2888{
2889	int				i;
2890
2891	rq->highq = IDLEPRI;
2892	for (i = 0; i < NRQBM; i++)
2893		rq->bitmap[i] = 0;
2894	setbit(MAXPRI - IDLEPRI, rq->bitmap);
2895	rq->urgency = rq->count = 0;
2896	for (i = 0; i < NRQS; i++)
2897		queue_init(&rq->queues[i]);
2898}
2899
2900#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
2901int
2902sched_traditional_fairshare_runq_count(void)
2903{
2904	return fs_runq.count;
2905}
2906
2907uint64_t
2908sched_traditional_fairshare_runq_stats_count_sum(void)
2909{
2910	return fs_runq.runq_stats.count_sum;
2911}
2912
2913void
2914sched_traditional_fairshare_enqueue(thread_t thread)
2915{
2916	queue_t				queue = &fs_runq.queue;
2917
2918	simple_lock(&fs_lock);
2919
2920	enqueue_tail(queue, (queue_entry_t)thread);
2921
2922	thread->runq = FS_RUNQ;
2923	SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2924	fs_runq.count++;
2925
2926	simple_unlock(&fs_lock);
2927}
2928
2929thread_t
2930sched_traditional_fairshare_dequeue(void)
2931{
2932	thread_t thread;
2933
2934	simple_lock(&fs_lock);
2935	if (fs_runq.count > 0) {
2936		thread = (thread_t)dequeue_head(&fs_runq.queue);
2937
2938		thread->runq = PROCESSOR_NULL;
2939		SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2940		fs_runq.count--;
2941
2942		simple_unlock(&fs_lock);
2943
2944		return (thread);
2945	}
2946	simple_unlock(&fs_lock);
2947
2948	return THREAD_NULL;
2949}
2950
2951boolean_t
2952sched_traditional_fairshare_queue_remove(thread_t thread)
2953{
2954	queue_t			q;
2955
2956	simple_lock(&fs_lock);
2957	q = &fs_runq.queue;
2958
2959	if (FS_RUNQ == thread->runq) {
2960		remqueue((queue_entry_t)thread);
2961		SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
2962		fs_runq.count--;
2963
2964		thread->runq = PROCESSOR_NULL;
2965		simple_unlock(&fs_lock);
2966		return (TRUE);
2967	}
2968	else {
2969		/*
2970		 *	The thread left the run queue before we could
2971		 * 	lock the run queue.
2972		 */
2973		assert(thread->runq == PROCESSOR_NULL);
2974		simple_unlock(&fs_lock);
2975		return (FALSE);
2976	}
2977}
2978
2979#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
2980
2981/*
2982 *	run_queue_dequeue:
2983 *
2984 *	Perform a dequeue operation on a run queue,
2985 *	and return the resulting thread.
2986 *
2987 *	The run queue must be locked (see thread_run_queue_remove()
2988 *	for more info), and not empty.
2989 */
2990thread_t
2991run_queue_dequeue(
2992	run_queue_t		rq,
2993	integer_t		options)
2994{
2995	thread_t		thread;
2996	queue_t			queue = rq->queues + rq->highq;
2997
2998	if (options & SCHED_HEADQ) {
2999		thread = (thread_t)dequeue_head(queue);
3000	}
3001	else {
3002		thread = (thread_t)dequeue_tail(queue);
3003	}
3004
3005	thread->runq = PROCESSOR_NULL;
3006	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3007	rq->count--;
3008	if (SCHED(priority_is_urgent)(rq->highq)) {
3009		rq->urgency--; assert(rq->urgency >= 0);
3010	}
3011	if (queue_empty(queue)) {
3012		if (rq->highq != IDLEPRI)
3013			clrbit(MAXPRI - rq->highq, rq->bitmap);
3014		rq->highq = MAXPRI - ffsbit(rq->bitmap);
3015	}
3016
3017	return (thread);
3018}
3019
3020/*
3021 *	run_queue_enqueue:
3022 *
3023 *	Perform a enqueue operation on a run queue.
3024 *
3025 *	The run queue must be locked (see thread_run_queue_remove()
3026 *	for more info).
3027 */
3028boolean_t
3029run_queue_enqueue(
3030							  run_queue_t		rq,
3031							  thread_t			thread,
3032							  integer_t		options)
3033{
3034	queue_t			queue = rq->queues + thread->sched_pri;
3035	boolean_t		result = FALSE;
3036
3037	if (queue_empty(queue)) {
3038		enqueue_tail(queue, (queue_entry_t)thread);
3039
3040		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
3041		if (thread->sched_pri > rq->highq) {
3042			rq->highq = thread->sched_pri;
3043			result = TRUE;
3044		}
3045	}
3046	else
3047		if (options & SCHED_TAILQ)
3048			enqueue_tail(queue, (queue_entry_t)thread);
3049		else
3050			enqueue_head(queue, (queue_entry_t)thread);
3051
3052	if (SCHED(priority_is_urgent)(thread->sched_pri))
3053		rq->urgency++;
3054	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3055	rq->count++;
3056
3057	return (result);
3058
3059}
3060
3061/*
3062 *	run_queue_remove:
3063 *
3064 *	Remove a specific thread from a runqueue.
3065 *
3066 *	The run queue must be locked.
3067 */
3068void
3069run_queue_remove(
3070				  run_queue_t		rq,
3071				  thread_t			thread)
3072{
3073
3074	remqueue((queue_entry_t)thread);
3075	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3076	rq->count--;
3077	if (SCHED(priority_is_urgent)(thread->sched_pri)) {
3078		rq->urgency--; assert(rq->urgency >= 0);
3079	}
3080
3081	if (queue_empty(rq->queues + thread->sched_pri)) {
3082		/* update run queue status */
3083		if (thread->sched_pri != IDLEPRI)
3084			clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
3085		rq->highq = MAXPRI - ffsbit(rq->bitmap);
3086	}
3087
3088	thread->runq = PROCESSOR_NULL;
3089}
3090
3091/*
3092 *	fairshare_setrun:
3093 *
3094 *	Dispatch a thread for round-robin execution.
3095 *
3096 *	Thread must be locked.  Associated pset must
3097 *	be locked, and is returned unlocked.
3098 */
3099static void
3100fairshare_setrun(
3101				  processor_t			processor,
3102				  thread_t			thread)
3103{
3104	processor_set_t		pset = processor->processor_set;
3105
3106	thread->chosen_processor = processor;
3107
3108	SCHED(fairshare_enqueue)(thread);
3109
3110	if (processor != current_processor())
3111		machine_signal_idle(processor);
3112
3113	pset_unlock(pset);
3114
3115}
3116
3117/*
3118 *	realtime_queue_insert:
3119 *
3120 *	Enqueue a thread for realtime execution.
3121 */
3122static boolean_t
3123realtime_queue_insert(
3124	thread_t			thread)
3125{
3126	queue_t				queue = &rt_runq.queue;
3127	uint64_t			deadline = thread->realtime.deadline;
3128	boolean_t			preempt = FALSE;
3129
3130	simple_lock(&rt_lock);
3131
3132	if (queue_empty(queue)) {
3133		enqueue_tail(queue, (queue_entry_t)thread);
3134		preempt = TRUE;
3135	}
3136	else {
3137		register thread_t	entry = (thread_t)queue_first(queue);
3138
3139		while (TRUE) {
3140			if (	queue_end(queue, (queue_entry_t)entry)	||
3141						deadline < entry->realtime.deadline		) {
3142				entry = (thread_t)queue_prev((queue_entry_t)entry);
3143				break;
3144			}
3145
3146			entry = (thread_t)queue_next((queue_entry_t)entry);
3147		}
3148
3149		if ((queue_entry_t)entry == queue)
3150			preempt = TRUE;
3151
3152		insque((queue_entry_t)thread, (queue_entry_t)entry);
3153	}
3154
3155	thread->runq = RT_RUNQ;
3156	SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
3157	rt_runq.count++;
3158
3159	simple_unlock(&rt_lock);
3160
3161	return (preempt);
3162}
3163
3164/*
3165 *	realtime_setrun:
3166 *
3167 *	Dispatch a thread for realtime execution.
3168 *
3169 *	Thread must be locked.  Associated pset must
3170 *	be locked, and is returned unlocked.
3171 */
3172static void
3173realtime_setrun(
3174	processor_t			processor,
3175	thread_t			thread)
3176{
3177	processor_set_t		pset = processor->processor_set;
3178	ast_t				preempt;
3179
3180	thread->chosen_processor = processor;
3181
3182	/*
3183	 *	Dispatch directly onto idle processor.
3184	 */
3185	if ( (thread->bound_processor == processor)
3186		&& processor->state == PROCESSOR_IDLE) {
3187		remqueue((queue_entry_t)processor);
3188		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3189
3190		processor->next_thread = thread;
3191		processor->current_pri = thread->sched_pri;
3192		processor->current_thmode = thread->sched_mode;
3193		processor->deadline = thread->realtime.deadline;
3194		processor->state = PROCESSOR_DISPATCHING;
3195
3196		if (processor != current_processor()) {
3197			if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3198				/* cleared on exit from main processor_idle() loop */
3199				pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3200				machine_signal_idle(processor);
3201			}
3202		}
3203
3204		pset_unlock(pset);
3205		return;
3206	}
3207
3208	if (processor->current_pri < BASEPRI_RTQUEUES)
3209		preempt = (AST_PREEMPT | AST_URGENT);
3210	else if (thread->realtime.deadline < processor->deadline)
3211		preempt = (AST_PREEMPT | AST_URGENT);
3212	else
3213		preempt = AST_NONE;
3214
3215	realtime_queue_insert(thread);
3216
3217	if (preempt != AST_NONE) {
3218		if (processor->state == PROCESSOR_IDLE) {
3219			remqueue((queue_entry_t)processor);
3220			enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3221			processor->next_thread = THREAD_NULL;
3222			processor->current_pri = thread->sched_pri;
3223			processor->current_thmode = thread->sched_mode;
3224			processor->deadline = thread->realtime.deadline;
3225			processor->state = PROCESSOR_DISPATCHING;
3226			if (processor == current_processor()) {
3227				ast_on(preempt);
3228			} else {
3229				if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3230					/* cleared on exit from main processor_idle() loop */
3231					pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3232					machine_signal_idle(processor);
3233				}
3234			}
3235		} else if (processor->state == PROCESSOR_DISPATCHING) {
3236			if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) {
3237				processor->current_pri = thread->sched_pri;
3238				processor->current_thmode = thread->sched_mode;
3239				processor->deadline = thread->realtime.deadline;
3240			}
3241		} else {
3242			if (processor == current_processor()) {
3243				ast_on(preempt);
3244			} else {
3245				if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3246					/* cleared after IPI causes csw_check() to be called */
3247					pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3248					cause_ast_check(processor);
3249				}
3250			}
3251		}
3252	} else {
3253		/* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */
3254	}
3255
3256	pset_unlock(pset);
3257}
3258
3259#if defined(CONFIG_SCHED_TRADITIONAL)
3260
3261static boolean_t
3262priority_is_urgent(int priority)
3263{
3264	return testbit(priority, sched_preempt_pri) ? TRUE : FALSE;
3265}
3266
3267/*
3268 *	processor_enqueue:
3269 *
3270 *	Enqueue thread on a processor run queue.  Thread must be locked,
3271 *	and not already be on a run queue.
3272 *
3273 *	Returns TRUE if a preemption is indicated based on the state
3274 *	of the run queue.
3275 *
3276 *	The run queue must be locked (see thread_run_queue_remove()
3277 *	for more info).
3278 */
3279static boolean_t
3280processor_enqueue(
3281	processor_t		processor,
3282	thread_t		thread,
3283	integer_t		options)
3284{
3285	run_queue_t		rq = runq_for_processor(processor);
3286	boolean_t		result;
3287
3288	result = run_queue_enqueue(rq, thread, options);
3289	thread->runq = processor;
3290	runq_consider_incr_bound_count(processor, thread);
3291
3292	return (result);
3293}
3294
3295#endif /* CONFIG_SCHED_TRADITIONAL */
3296
3297/*
3298 *	processor_setrun:
3299 *
3300 *	Dispatch a thread for execution on a
3301 *	processor.
3302 *
3303 *	Thread must be locked.  Associated pset must
3304 *	be locked, and is returned unlocked.
3305 */
3306static void
3307processor_setrun(
3308	processor_t			processor,
3309	thread_t			thread,
3310	integer_t			options)
3311{
3312	processor_set_t		pset = processor->processor_set;
3313	ast_t				preempt;
3314	enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing;
3315
3316	thread->chosen_processor = processor;
3317
3318	/*
3319	 *	Dispatch directly onto idle processor.
3320	 */
3321	if ( (SCHED(direct_dispatch_to_idle_processors) ||
3322		  thread->bound_processor == processor)
3323		&& processor->state == PROCESSOR_IDLE) {
3324		remqueue((queue_entry_t)processor);
3325		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3326
3327		processor->next_thread = thread;
3328		processor->current_pri = thread->sched_pri;
3329		processor->current_thmode = thread->sched_mode;
3330		processor->deadline = UINT64_MAX;
3331		processor->state = PROCESSOR_DISPATCHING;
3332
3333		if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3334			/* cleared on exit from main processor_idle() loop */
3335			pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3336			machine_signal_idle(processor);
3337		}
3338
3339		pset_unlock(pset);
3340		return;
3341	}
3342
3343	/*
3344	 *	Set preemption mode.
3345	 */
3346	if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri)
3347		preempt = (AST_PREEMPT | AST_URGENT);
3348	else if(processor->active_thread && thread_eager_preemption(processor->active_thread))
3349		preempt = (AST_PREEMPT | AST_URGENT);
3350	else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) {
3351		if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) {
3352			preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
3353		} else {
3354			preempt = AST_NONE;
3355		}
3356	} else
3357		preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
3358
3359	SCHED(processor_enqueue)(processor, thread, options);
3360
3361	if (preempt != AST_NONE) {
3362		if (processor->state == PROCESSOR_IDLE) {
3363			remqueue((queue_entry_t)processor);
3364			enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3365			processor->next_thread = THREAD_NULL;
3366			processor->current_pri = thread->sched_pri;
3367			processor->current_thmode = thread->sched_mode;
3368			processor->deadline = UINT64_MAX;
3369			processor->state = PROCESSOR_DISPATCHING;
3370
3371			ipi_action = eExitIdle;
3372		} else if ( processor->state == PROCESSOR_DISPATCHING) {
3373			if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) {
3374				processor->current_pri = thread->sched_pri;
3375				processor->current_thmode = thread->sched_mode;
3376				processor->deadline = UINT64_MAX;
3377			}
3378		} else if (	(processor->state == PROCESSOR_RUNNING		||
3379				 processor->state == PROCESSOR_SHUTDOWN)		&&
3380				(thread->sched_pri >= processor->current_pri	||
3381				processor->current_thmode == TH_MODE_FAIRSHARE)) {
3382			ipi_action = eInterruptRunning;
3383		}
3384	} else {
3385		/*
3386		 * New thread is not important enough to preempt what is running, but
3387		 * special processor states may need special handling
3388		 */
3389		if (processor->state == PROCESSOR_SHUTDOWN		&&
3390			thread->sched_pri >= processor->current_pri	) {
3391			ipi_action = eInterruptRunning;
3392		} else if (	processor->state == PROCESSOR_IDLE	&&
3393					processor != current_processor()	) {
3394			remqueue((queue_entry_t)processor);
3395			enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
3396			processor->next_thread = THREAD_NULL;
3397			processor->current_pri = thread->sched_pri;
3398			processor->current_thmode = thread->sched_mode;
3399			processor->deadline = UINT64_MAX;
3400			processor->state = PROCESSOR_DISPATCHING;
3401
3402			ipi_action = eExitIdle;
3403		}
3404	}
3405
3406	switch (ipi_action) {
3407		case eDoNothing:
3408			break;
3409		case eExitIdle:
3410			if (processor == current_processor()) {
3411				if (csw_check_locked(processor, pset) != AST_NONE)
3412					ast_on(preempt);
3413			} else {
3414				if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3415					/* cleared on exit from main processor_idle() loop */
3416					pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3417					machine_signal_idle(processor);
3418				}
3419			}
3420			break;
3421		case eInterruptRunning:
3422			if (processor == current_processor()) {
3423				if (csw_check_locked(processor, pset) != AST_NONE)
3424					ast_on(preempt);
3425			} else {
3426				if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
3427					/* cleared after IPI causes csw_check() to be called */
3428					pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
3429					cause_ast_check(processor);
3430				}
3431			}
3432			break;
3433	}
3434
3435	pset_unlock(pset);
3436}
3437
3438#if defined(CONFIG_SCHED_TRADITIONAL)
3439
3440static boolean_t
3441processor_queue_empty(processor_t		processor)
3442{
3443	return runq_for_processor(processor)->count == 0;
3444
3445}
3446
3447static boolean_t
3448sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t		processor)
3449{
3450	processor_set_t pset = processor->processor_set;
3451	int count = runq_for_processor(processor)->count;
3452
3453	/*
3454	 * The pset runq contains the count of all runnable threads
3455	 * for all processors in the pset. However, for threads that
3456	 * are bound to another processor, the current "processor"
3457	 * is not eligible to execute the thread. So we only
3458	 * include bound threads that our bound to the current
3459	 * "processor". This allows the processor to idle when the
3460	 * count of eligible threads drops to 0, even if there's
3461	 * a runnable thread bound to a different processor in the
3462	 * shared runq.
3463	 */
3464
3465	count -= pset->pset_runq_bound_count;
3466	count += processor->runq_bound_count;
3467
3468	return count == 0;
3469}
3470
3471static ast_t
3472processor_csw_check(processor_t processor)
3473{
3474	run_queue_t		runq;
3475	boolean_t		has_higher;
3476
3477	assert(processor->active_thread != NULL);
3478
3479	runq = runq_for_processor(processor);
3480	if (first_timeslice(processor)) {
3481		has_higher = (runq->highq > processor->current_pri);
3482	} else {
3483		has_higher = (runq->highq >= processor->current_pri);
3484	}
3485	if (has_higher) {
3486		if (runq->urgency > 0)
3487			return (AST_PREEMPT | AST_URGENT);
3488
3489		if (processor->active_thread && thread_eager_preemption(processor->active_thread))
3490			return (AST_PREEMPT | AST_URGENT);
3491
3492		return AST_PREEMPT;
3493	}
3494
3495	return AST_NONE;
3496}
3497
3498static boolean_t
3499processor_queue_has_priority(processor_t		processor,
3500							 int				priority,
3501							 boolean_t			gte)
3502{
3503	if (gte)
3504		return runq_for_processor(processor)->highq >= priority;
3505	else
3506		return runq_for_processor(processor)->highq > priority;
3507}
3508
3509static boolean_t
3510should_current_thread_rechoose_processor(processor_t			processor)
3511{
3512	return (processor->current_pri < BASEPRI_RTQUEUES
3513			&& processor->processor_meta != PROCESSOR_META_NULL
3514			&& processor->processor_meta->primary != processor);
3515}
3516
3517static int
3518sched_traditional_processor_runq_count(processor_t   processor)
3519{
3520	return runq_for_processor(processor)->count;
3521}
3522
3523
3524static uint64_t
3525sched_traditional_processor_runq_stats_count_sum(processor_t   processor)
3526{
3527	return runq_for_processor(processor)->runq_stats.count_sum;
3528}
3529
3530static uint64_t
3531sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor)
3532{
3533	if (processor->cpu_id == processor->processor_set->cpu_set_low)
3534		return runq_for_processor(processor)->runq_stats.count_sum;
3535	else
3536		return 0ULL;
3537}
3538
3539#endif /* CONFIG_SCHED_TRADITIONAL */
3540
3541#define next_pset(p)	(((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
3542
3543/*
3544 *	choose_next_pset:
3545 *
3546 *	Return the next sibling pset containing
3547 *	available processors.
3548 *
3549 *	Returns the original pset if none other is
3550 *	suitable.
3551 */
3552static processor_set_t
3553choose_next_pset(
3554	processor_set_t		pset)
3555{
3556	processor_set_t		nset = pset;
3557
3558	do {
3559		nset = next_pset(nset);
3560	} while (nset->online_processor_count < 1 && nset != pset);
3561
3562	return (nset);
3563}
3564
3565/*
3566 *	choose_processor:
3567 *
3568 *	Choose a processor for the thread, beginning at
3569 *	the pset.  Accepts an optional processor hint in
3570 *	the pset.
3571 *
3572 *	Returns a processor, possibly from a different pset.
3573 *
3574 *	The thread must be locked.  The pset must be locked,
3575 *	and the resulting pset is locked on return.
3576 */
3577processor_t
3578choose_processor(
3579	processor_set_t		pset,
3580	processor_t			processor,
3581	thread_t			thread)
3582{
3583	processor_set_t		nset, cset = pset;
3584	processor_meta_t	pmeta = PROCESSOR_META_NULL;
3585	processor_t             mprocessor;
3586
3587	/*
3588	 *	Prefer the hinted processor, when appropriate.
3589	 */
3590
3591	if (processor != PROCESSOR_NULL) {
3592		if (processor->processor_meta != PROCESSOR_META_NULL)
3593			processor = processor->processor_meta->primary;
3594	}
3595
3596	mprocessor = machine_choose_processor(pset, processor);
3597	if (mprocessor != PROCESSOR_NULL)
3598		processor = mprocessor;
3599
3600	if (processor != PROCESSOR_NULL) {
3601		if (processor->processor_set != pset ||
3602		    processor->state == PROCESSOR_INACTIVE ||
3603		    processor->state == PROCESSOR_SHUTDOWN ||
3604		    processor->state == PROCESSOR_OFF_LINE)
3605			processor = PROCESSOR_NULL;
3606		else
3607			if (processor->state == PROCESSOR_IDLE ||
3608			    ((thread->sched_pri >= BASEPRI_RTQUEUES) &&
3609			    (processor->current_pri < BASEPRI_RTQUEUES)))
3610				return (processor);
3611	}
3612
3613	/*
3614	 *	Iterate through the processor sets to locate
3615	 *	an appropriate processor.
3616	 */
3617	do {
3618		/*
3619		 *	Choose an idle processor.
3620		 */
3621		if (!queue_empty(&cset->idle_queue))
3622			return ((processor_t)queue_first(&cset->idle_queue));
3623
3624		if (thread->sched_pri >= BASEPRI_RTQUEUES) {
3625			integer_t lowest_priority = MAXPRI + 1;
3626			integer_t lowest_unpaired = MAXPRI + 1;
3627			uint64_t  furthest_deadline = 1;
3628			processor_t lp_processor = PROCESSOR_NULL;
3629			processor_t lp_unpaired = PROCESSOR_NULL;
3630			processor_t fd_processor = PROCESSOR_NULL;
3631
3632			lp_processor = cset->low_pri;
3633			/* Consider hinted processor */
3634			if (lp_processor != PROCESSOR_NULL &&
3635			    ((lp_processor->processor_meta == PROCESSOR_META_NULL) ||
3636			    ((lp_processor == lp_processor->processor_meta->primary) &&
3637			    !queue_empty(&lp_processor->processor_meta->idle_queue))) &&
3638			    lp_processor->state != PROCESSOR_INACTIVE &&
3639			    lp_processor->state != PROCESSOR_SHUTDOWN &&
3640			    lp_processor->state != PROCESSOR_OFF_LINE &&
3641			    (lp_processor->current_pri < thread->sched_pri))
3642				return lp_processor;
3643
3644			processor = (processor_t)queue_first(&cset->active_queue);
3645			while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
3646				/* Discover the processor executing the
3647				 * thread with the lowest priority within
3648				 * this pset, or the one with the furthest
3649				 * deadline
3650				 */
3651				integer_t cpri = processor->current_pri;
3652				if (cpri < lowest_priority) {
3653					lowest_priority = cpri;
3654					lp_processor = processor;
3655				}
3656
3657				if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) {
3658					furthest_deadline = processor->deadline;
3659					fd_processor = processor;
3660				}
3661
3662
3663				if (processor->processor_meta != PROCESSOR_META_NULL &&
3664				    !queue_empty(&processor->processor_meta->idle_queue)) {
3665					if (cpri < lowest_unpaired) {
3666						lowest_unpaired = cpri;
3667						lp_unpaired = processor;
3668						pmeta = processor->processor_meta;
3669					}
3670					else
3671						if (pmeta == PROCESSOR_META_NULL)
3672							pmeta = processor->processor_meta;
3673				}
3674				processor = (processor_t)queue_next((queue_entry_t)processor);
3675			}
3676
3677			if (thread->sched_pri > lowest_unpaired)
3678				return lp_unpaired;
3679
3680			if (pmeta != PROCESSOR_META_NULL)
3681				return ((processor_t)queue_first(&pmeta->idle_queue));
3682			if (thread->sched_pri > lowest_priority)
3683				return lp_processor;
3684			if (thread->realtime.deadline < furthest_deadline)
3685				return fd_processor;
3686
3687			processor = PROCESSOR_NULL;
3688		}
3689		else {
3690			/*
3691			 *	Check any hinted processors in the processor set if available.
3692			 */
3693			if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
3694					cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
3695						(processor == PROCESSOR_NULL ||
3696							(thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
3697				processor = cset->low_pri;
3698			}
3699			else
3700			if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
3701					cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
3702						(processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT &&
3703															SCHED(processor_runq_count)(cset->low_count) < SCHED(processor_runq_count)(processor)))) {
3704				processor = cset->low_count;
3705			}
3706
3707			/*
3708			 *	Otherwise, choose an available processor in the set.
3709			 */
3710			if (processor == PROCESSOR_NULL) {
3711				processor = (processor_t)dequeue_head(&cset->active_queue);
3712				if (processor != PROCESSOR_NULL)
3713					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
3714			}
3715
3716			if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) {
3717				if (processor->processor_meta != PROCESSOR_META_NULL &&
3718											!queue_empty(&processor->processor_meta->idle_queue))
3719					pmeta = processor->processor_meta;
3720			}
3721		}
3722
3723		/*
3724		 *	Move onto the next processor set.
3725		 */
3726		nset = next_pset(cset);
3727
3728		if (nset != pset) {
3729			pset_unlock(cset);
3730
3731			cset = nset;
3732			pset_lock(cset);
3733		}
3734	} while (nset != pset);
3735
3736	/*
3737	 *	Make sure that we pick a running processor,
3738	 *	and that the correct processor set is locked.
3739	 */
3740	do {
3741		if (pmeta != PROCESSOR_META_NULL) {
3742			if (cset != pmeta->primary->processor_set) {
3743				pset_unlock(cset);
3744
3745				cset = pmeta->primary->processor_set;
3746				pset_lock(cset);
3747			}
3748
3749			if (!queue_empty(&pmeta->idle_queue))
3750				return ((processor_t)queue_first(&pmeta->idle_queue));
3751
3752			pmeta = PROCESSOR_META_NULL;
3753		}
3754
3755		/*
3756		 *	If we haven't been able to choose a processor,
3757		 *	pick the boot processor and return it.
3758		 */
3759		if (processor == PROCESSOR_NULL) {
3760			processor = master_processor;
3761
3762			/*
3763			 *	Check that the correct processor set is
3764			 *	returned locked.
3765			 */
3766			if (cset != processor->processor_set) {
3767				pset_unlock(cset);
3768
3769				cset = processor->processor_set;
3770				pset_lock(cset);
3771			}
3772
3773			return (processor);
3774		}
3775
3776		/*
3777		 *	Check that the processor set for the chosen
3778		 *	processor is locked.
3779		 */
3780		if (cset != processor->processor_set) {
3781			pset_unlock(cset);
3782
3783			cset = processor->processor_set;
3784			pset_lock(cset);
3785		}
3786
3787		/*
3788		 *	We must verify that the chosen processor is still available.
3789		 */
3790		if (processor->state == PROCESSOR_INACTIVE ||
3791					processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
3792			processor = PROCESSOR_NULL;
3793	} while (processor == PROCESSOR_NULL);
3794
3795	return (processor);
3796}
3797
3798/*
3799 *	thread_setrun:
3800 *
3801 *	Dispatch thread for execution, onto an idle
3802 *	processor or run queue, and signal a preemption
3803 *	as appropriate.
3804 *
3805 *	Thread must be locked.
3806 */
3807void
3808thread_setrun(
3809	thread_t			thread,
3810	integer_t			options)
3811{
3812	processor_t			processor;
3813	processor_set_t		pset;
3814
3815#if DEBUG
3816	assert(thread_runnable(thread));
3817#endif
3818
3819	/*
3820	 *	Update priority if needed.
3821	 */
3822	if (SCHED(can_update_priority)(thread))
3823		SCHED(update_priority)(thread);
3824
3825	assert(thread->runq == PROCESSOR_NULL);
3826
3827	if (thread->bound_processor == PROCESSOR_NULL) {
3828		/*
3829		 *	Unbound case.
3830		 */
3831		if (thread->affinity_set != AFFINITY_SET_NULL) {
3832			/*
3833			 * Use affinity set policy hint.
3834			 */
3835			pset = thread->affinity_set->aset_pset;
3836			pset_lock(pset);
3837
3838			processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
3839
3840			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3841									  (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
3842		}
3843		else
3844		if (thread->last_processor != PROCESSOR_NULL) {
3845			/*
3846			 *	Simple (last processor) affinity case.
3847			 */
3848			processor = thread->last_processor;
3849			pset = processor->processor_set;
3850			pset_lock(pset);
3851			processor = SCHED(choose_processor)(pset, processor, thread);
3852
3853			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3854								  (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0);
3855		}
3856		else {
3857			/*
3858			 *	No Affinity case:
3859			 *
3860			 *	Utilitize a per task hint to spread threads
3861			 *	among the available processor sets.
3862			 */
3863			task_t		task = thread->task;
3864
3865			pset = task->pset_hint;
3866			if (pset == PROCESSOR_SET_NULL)
3867				pset = current_processor()->processor_set;
3868
3869			pset = choose_next_pset(pset);
3870			pset_lock(pset);
3871
3872			processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
3873			task->pset_hint = processor->processor_set;
3874
3875			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3876									  (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
3877		}
3878	}
3879	else {
3880		/*
3881		 *	Bound case:
3882		 *
3883		 *	Unconditionally dispatch on the processor.
3884		 */
3885		processor = thread->bound_processor;
3886		pset = processor->processor_set;
3887		pset_lock(pset);
3888
3889		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
3890							  (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0);
3891	}
3892
3893	/*
3894	 *	Dispatch the thread on the choosen processor.
3895	 */
3896	if (thread->sched_pri >= BASEPRI_RTQUEUES)
3897		realtime_setrun(processor, thread);
3898	else if (thread->sched_mode == TH_MODE_FAIRSHARE)
3899		fairshare_setrun(processor, thread);
3900	else
3901		processor_setrun(processor, thread, options);
3902}
3903
3904processor_set_t
3905task_choose_pset(
3906	task_t		task)
3907{
3908	processor_set_t		pset = task->pset_hint;
3909
3910	if (pset != PROCESSOR_SET_NULL)
3911		pset = choose_next_pset(pset);
3912
3913	return (pset);
3914}
3915
3916#if defined(CONFIG_SCHED_TRADITIONAL)
3917
3918/*
3919 *	processor_queue_shutdown:
3920 *
3921 *	Shutdown a processor run queue by
3922 *	re-dispatching non-bound threads.
3923 *
3924 *	Associated pset must be locked, and is
3925 *	returned unlocked.
3926 */
3927void
3928processor_queue_shutdown(
3929	processor_t			processor)
3930{
3931	processor_set_t		pset = processor->processor_set;
3932	run_queue_t			rq = runq_for_processor(processor);
3933	queue_t				queue = rq->queues + rq->highq;
3934	int					pri = rq->highq, count = rq->count;
3935	thread_t			next, thread;
3936	queue_head_t		tqueue;
3937
3938	queue_init(&tqueue);
3939
3940	while (count > 0) {
3941		thread = (thread_t)queue_first(queue);
3942		while (!queue_end(queue, (queue_entry_t)thread)) {
3943			next = (thread_t)queue_next((queue_entry_t)thread);
3944
3945			if (thread->bound_processor == PROCESSOR_NULL) {
3946				remqueue((queue_entry_t)thread);
3947
3948				thread->runq = PROCESSOR_NULL;
3949				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
3950				runq_consider_decr_bound_count(processor, thread);
3951				rq->count--;
3952				if (SCHED(priority_is_urgent)(pri)) {
3953					rq->urgency--; assert(rq->urgency >= 0);
3954				}
3955				if (queue_empty(queue)) {
3956					if (pri != IDLEPRI)
3957						clrbit(MAXPRI - pri, rq->bitmap);
3958					rq->highq = MAXPRI - ffsbit(rq->bitmap);
3959				}
3960
3961				enqueue_tail(&tqueue, (queue_entry_t)thread);
3962			}
3963			count--;
3964
3965			thread = next;
3966		}
3967
3968		queue--; pri--;
3969	}
3970
3971	pset_unlock(pset);
3972
3973	while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
3974		thread_lock(thread);
3975
3976		thread_setrun(thread, SCHED_TAILQ);
3977
3978		thread_unlock(thread);
3979	}
3980}
3981
3982#endif /* CONFIG_SCHED_TRADITIONAL */
3983
3984/*
3985 *	Check for a preemption point in
3986 *	the current context.
3987 *
3988 *	Called at splsched.
3989 */
3990ast_t
3991csw_check(
3992	processor_t		processor)
3993{
3994	processor_set_t	pset = processor->processor_set;
3995	ast_t			result;
3996
3997	pset_lock(pset);
3998
3999	/* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */
4000	pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
4001
4002	result = csw_check_locked(processor, pset);
4003
4004	pset_unlock(pset);
4005
4006	return result;
4007}
4008
4009/*
4010 * Check for preemption at splsched with
4011 * pset locked
4012 */
4013ast_t
4014csw_check_locked(
4015	processor_t		processor,
4016	processor_set_t	pset __unused)
4017{
4018	ast_t			result = AST_NONE;
4019	thread_t		thread = processor->active_thread;
4020
4021	if (first_timeslice(processor)) {
4022		if (rt_runq.count > 0)
4023			return (AST_PREEMPT | AST_URGENT);
4024	}
4025	else {
4026		if (rt_runq.count > 0) {
4027			if (BASEPRI_RTQUEUES > processor->current_pri)
4028				return (AST_PREEMPT | AST_URGENT);
4029			else
4030				return (AST_PREEMPT);
4031		}
4032	}
4033
4034	result = SCHED(processor_csw_check)(processor);
4035	if (result != AST_NONE)
4036		return (result);
4037
4038	if (SCHED(should_current_thread_rechoose_processor)(processor))
4039		return (AST_PREEMPT);
4040
4041	if (machine_processor_is_inactive(processor))
4042		return (AST_PREEMPT);
4043
4044	if (thread->state & TH_SUSP)
4045		return (AST_PREEMPT);
4046
4047	return (AST_NONE);
4048}
4049
4050/*
4051 *	set_sched_pri:
4052 *
4053 *	Set the scheduled priority of the specified thread.
4054 *
4055 *	This may cause the thread to change queues.
4056 *
4057 *	Thread must be locked.
4058 */
4059void
4060set_sched_pri(
4061	thread_t		thread,
4062	int				priority)
4063{
4064	boolean_t		removed = thread_run_queue_remove(thread);
4065
4066	thread->sched_pri = priority;
4067	if (removed)
4068		thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
4069	else
4070	if (thread->state & TH_RUN) {
4071		processor_t		processor = thread->last_processor;
4072
4073		if (thread == current_thread()) {
4074			ast_t			preempt;
4075
4076			processor->current_pri = priority;
4077			processor->current_thmode = thread->sched_mode;
4078			if ((preempt = csw_check(processor)) != AST_NONE)
4079				ast_on(preempt);
4080		}
4081		else
4082		if (	processor != PROCESSOR_NULL						&&
4083				processor->active_thread == thread	)
4084			cause_ast_check(processor);
4085	}
4086}
4087
4088#if		0
4089
4090static void
4091run_queue_check(
4092	run_queue_t		rq,
4093	thread_t		thread)
4094{
4095	queue_t			q;
4096	queue_entry_t	qe;
4097
4098	if (rq != thread->runq)
4099		panic("run_queue_check: thread runq");
4100
4101	if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
4102		panic("run_queue_check: thread sched_pri");
4103
4104	q = &rq->queues[thread->sched_pri];
4105	qe = queue_first(q);
4106	while (!queue_end(q, qe)) {
4107		if (qe == (queue_entry_t)thread)
4108			return;
4109
4110		qe = queue_next(qe);
4111	}
4112
4113	panic("run_queue_check: end");
4114}
4115
4116#endif	/* DEBUG */
4117
4118#if defined(CONFIG_SCHED_TRADITIONAL)
4119
4120/* locks the runqueue itself */
4121
4122static boolean_t
4123processor_queue_remove(
4124					   processor_t			processor,
4125					   thread_t		thread)
4126{
4127	void *			rqlock;
4128	run_queue_t		rq;
4129
4130	rqlock = &processor->processor_set->sched_lock;
4131	rq = runq_for_processor(processor);
4132
4133	simple_lock(rqlock);
4134	if (processor == thread->runq) {
4135		/*
4136		 *	Thread is on a run queue and we have a lock on
4137		 *	that run queue.
4138		 */
4139		runq_consider_decr_bound_count(processor, thread);
4140		run_queue_remove(rq, thread);
4141	}
4142	else {
4143		/*
4144		 *	The thread left the run queue before we could
4145		 * 	lock the run queue.
4146		 */
4147		assert(thread->runq == PROCESSOR_NULL);
4148		processor = PROCESSOR_NULL;
4149	}
4150
4151	simple_unlock(rqlock);
4152
4153	return (processor != PROCESSOR_NULL);
4154}
4155
4156#endif /* CONFIG_SCHED_TRADITIONAL */
4157
4158/*
4159 *	thread_run_queue_remove:
4160 *
4161 *	Remove a thread from a current run queue and
4162 *	return TRUE if successful.
4163 *
4164 *	Thread must be locked.
4165 */
4166boolean_t
4167thread_run_queue_remove(
4168	thread_t		thread)
4169{
4170	processor_t		processor = thread->runq;
4171
4172	/*
4173	 *	If processor is PROCESSOR_NULL, the thread will stay out of the
4174	 *	run queues because the caller locked the thread.  Otherwise
4175	 *	the thread is on a run queue, but could be chosen for dispatch
4176	 *	and removed.
4177	 */
4178	if (processor != PROCESSOR_NULL) {
4179		queue_t			q;
4180
4181		/*
4182		 *	The processor run queues are locked by the
4183		 *	processor set.  Real-time priorities use a
4184		 *	global queue with a dedicated lock.
4185		 */
4186		if (thread->sched_mode == TH_MODE_FAIRSHARE) {
4187			return SCHED(fairshare_queue_remove)(thread);
4188		}
4189
4190		if (thread->sched_pri < BASEPRI_RTQUEUES) {
4191			return SCHED(processor_queue_remove)(processor, thread);
4192		}
4193
4194		simple_lock(&rt_lock);
4195		q = &rt_runq.queue;
4196
4197		if (processor == thread->runq) {
4198			/*
4199			 *	Thread is on a run queue and we have a lock on
4200			 *	that run queue.
4201			 */
4202			remqueue((queue_entry_t)thread);
4203			SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
4204			rt_runq.count--;
4205
4206			thread->runq = PROCESSOR_NULL;
4207		}
4208		else {
4209			/*
4210			 *	The thread left the run queue before we could
4211			 * 	lock the run queue.
4212			 */
4213			assert(thread->runq == PROCESSOR_NULL);
4214			processor = PROCESSOR_NULL;
4215		}
4216
4217		simple_unlock(&rt_lock);
4218	}
4219
4220	return (processor != PROCESSOR_NULL);
4221}
4222
4223#if defined(CONFIG_SCHED_TRADITIONAL)
4224
4225/*
4226 *	steal_processor_thread:
4227 *
4228 *	Locate a thread to steal from the processor and
4229 *	return it.
4230 *
4231 *	Associated pset must be locked.  Returns THREAD_NULL
4232 *	on failure.
4233 */
4234static thread_t
4235steal_processor_thread(
4236	processor_t		processor)
4237{
4238	run_queue_t		rq = runq_for_processor(processor);
4239	queue_t			queue = rq->queues + rq->highq;
4240	int				pri = rq->highq, count = rq->count;
4241	thread_t		thread;
4242
4243	while (count > 0) {
4244		thread = (thread_t)queue_first(queue);
4245		while (!queue_end(queue, (queue_entry_t)thread)) {
4246			if (thread->bound_processor == PROCESSOR_NULL) {
4247				remqueue((queue_entry_t)thread);
4248
4249				thread->runq = PROCESSOR_NULL;
4250				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
4251				runq_consider_decr_bound_count(processor, thread);
4252				rq->count--;
4253				if (SCHED(priority_is_urgent)(pri)) {
4254					rq->urgency--; assert(rq->urgency >= 0);
4255				}
4256				if (queue_empty(queue)) {
4257					if (pri != IDLEPRI)
4258						clrbit(MAXPRI - pri, rq->bitmap);
4259					rq->highq = MAXPRI - ffsbit(rq->bitmap);
4260				}
4261
4262				return (thread);
4263			}
4264			count--;
4265
4266			thread = (thread_t)queue_next((queue_entry_t)thread);
4267		}
4268
4269		queue--; pri--;
4270	}
4271
4272	return (THREAD_NULL);
4273}
4274
4275/*
4276 *	Locate and steal a thread, beginning
4277 *	at the pset.
4278 *
4279 *	The pset must be locked, and is returned
4280 *	unlocked.
4281 *
4282 *	Returns the stolen thread, or THREAD_NULL on
4283 *	failure.
4284 */
4285static thread_t
4286steal_thread(
4287	processor_set_t		pset)
4288{
4289	processor_set_t		nset, cset = pset;
4290	processor_t			processor;
4291	thread_t			thread;
4292
4293	do {
4294		processor = (processor_t)queue_first(&cset->active_queue);
4295		while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
4296			if (runq_for_processor(processor)->count > 0) {
4297				thread = steal_processor_thread(processor);
4298				if (thread != THREAD_NULL) {
4299					remqueue((queue_entry_t)processor);
4300					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
4301
4302					pset_unlock(cset);
4303
4304					return (thread);
4305				}
4306			}
4307
4308			processor = (processor_t)queue_next((queue_entry_t)processor);
4309		}
4310
4311		nset = next_pset(cset);
4312
4313		if (nset != pset) {
4314			pset_unlock(cset);
4315
4316			cset = nset;
4317			pset_lock(cset);
4318		}
4319	} while (nset != pset);
4320
4321	pset_unlock(cset);
4322
4323	return (THREAD_NULL);
4324}
4325
4326static thread_t	steal_thread_disabled(
4327					processor_set_t		pset)
4328{
4329	pset_unlock(pset);
4330
4331	return (THREAD_NULL);
4332}
4333
4334#endif /* CONFIG_SCHED_TRADITIONAL */
4335
4336
4337void
4338sys_override_cpu_throttle(int flag)
4339{
4340	if (flag == CPU_THROTTLE_ENABLE)
4341		cpu_throttle_enabled = 1;
4342	if (flag == CPU_THROTTLE_DISABLE)
4343		cpu_throttle_enabled = 0;
4344}
4345
4346int
4347thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
4348{
4349	if (thread == NULL || (thread->state & TH_IDLE)) {
4350		*arg1 = 0;
4351		*arg2 = 0;
4352
4353		return (THREAD_URGENCY_NONE);
4354	} else if (thread->sched_mode == TH_MODE_REALTIME) {
4355		*arg1 = thread->realtime.period;
4356		*arg2 = thread->realtime.deadline;
4357
4358		return (THREAD_URGENCY_REAL_TIME);
4359	} else if (cpu_throttle_enabled &&
4360		   ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE)))  {
4361		/*
4362		 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
4363		 */
4364		*arg1 = thread->sched_pri;
4365		*arg2 = thread->priority;
4366
4367		return (THREAD_URGENCY_BACKGROUND);
4368	} else {
4369		*arg1 = thread->sched_pri;
4370		*arg2 = thread->priority;
4371
4372		return (THREAD_URGENCY_NORMAL);
4373	}
4374}
4375
4376
4377/*
4378 *	This is the processor idle loop, which just looks for other threads
4379 *	to execute.  Processor idle threads invoke this without supplying a
4380 *	current thread to idle without an asserted wait state.
4381 *
4382 *	Returns a the next thread to execute if dispatched directly.
4383 */
4384
4385#if 0
4386#define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__)
4387#else
4388#define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0)
4389#endif
4390
4391thread_t
4392processor_idle(
4393	thread_t			thread,
4394	processor_t			processor)
4395{
4396	processor_set_t		pset = processor->processor_set;
4397	thread_t			new_thread;
4398	int					state;
4399	(void)splsched();
4400
4401	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4402		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START,
4403		(uintptr_t)thread_tid(thread), 0, 0, 0, 0);
4404
4405	SCHED_STATS_CPU_IDLE_START(processor);
4406
4407	timer_switch(&PROCESSOR_DATA(processor, system_state),
4408									mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
4409	PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
4410
4411	while (1) {
4412
4413		if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */
4414			break;
4415		if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id))
4416			break;
4417		if (!SCHED(processor_queue_empty)(processor))
4418			break;
4419		if (rt_runq.count)
4420			break;
4421#if CONFIG_SCHED_IDLE_IN_PLACE
4422		if (thread != THREAD_NULL) {
4423			/* Did idle-in-place thread wake up */
4424			if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active)
4425				break;
4426		}
4427#endif
4428
4429		IDLE_KERNEL_DEBUG_CONSTANT(
4430			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0);
4431
4432		machine_track_platform_idle(TRUE);
4433
4434		machine_idle();
4435
4436		machine_track_platform_idle(FALSE);
4437
4438		(void)splsched();
4439
4440		IDLE_KERNEL_DEBUG_CONSTANT(
4441			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0);
4442
4443		if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor))
4444			break;
4445	}
4446
4447	timer_switch(&PROCESSOR_DATA(processor, idle_state),
4448									mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
4449	PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
4450
4451	pset_lock(pset);
4452
4453	/* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */
4454	pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
4455
4456	state = processor->state;
4457	if (state == PROCESSOR_DISPATCHING) {
4458		/*
4459		 *	Commmon case -- cpu dispatched.
4460		 */
4461		new_thread = processor->next_thread;
4462		processor->next_thread = THREAD_NULL;
4463		processor->state = PROCESSOR_RUNNING;
4464
4465		if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE)					||
4466											(rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri))	) {
4467			processor->current_pri = IDLEPRI;
4468			processor->current_thmode = TH_MODE_FIXED;
4469			processor->deadline = UINT64_MAX;
4470
4471			pset_unlock(pset);
4472
4473			thread_lock(new_thread);
4474			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0);
4475			thread_setrun(new_thread, SCHED_HEADQ);
4476			thread_unlock(new_thread);
4477
4478			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4479				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4480				(uintptr_t)thread_tid(thread), state, 0, 0, 0);
4481
4482			return (THREAD_NULL);
4483		}
4484
4485		pset_unlock(pset);
4486
4487		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4488			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4489			(uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
4490
4491		return (new_thread);
4492	}
4493	else
4494	if (state == PROCESSOR_IDLE) {
4495		remqueue((queue_entry_t)processor);
4496
4497		processor->state = PROCESSOR_RUNNING;
4498		processor->current_pri = IDLEPRI;
4499		processor->current_thmode = TH_MODE_FIXED;
4500		processor->deadline = UINT64_MAX;
4501		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
4502	}
4503	else
4504	if (state == PROCESSOR_INACTIVE) {
4505		processor->state = PROCESSOR_RUNNING;
4506		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
4507	}
4508	else
4509	if (state == PROCESSOR_SHUTDOWN) {
4510		/*
4511		 *	Going off-line.  Force a
4512		 *	reschedule.
4513		 */
4514		if ((new_thread = processor->next_thread) != THREAD_NULL) {
4515			processor->next_thread = THREAD_NULL;
4516			processor->current_pri = IDLEPRI;
4517			processor->current_thmode = TH_MODE_FIXED;
4518			processor->deadline = UINT64_MAX;
4519
4520			pset_unlock(pset);
4521
4522			thread_lock(new_thread);
4523			thread_setrun(new_thread, SCHED_HEADQ);
4524			thread_unlock(new_thread);
4525
4526			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4527				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4528				(uintptr_t)thread_tid(thread), state, 0, 0, 0);
4529
4530			return (THREAD_NULL);
4531		}
4532	}
4533
4534	pset_unlock(pset);
4535
4536	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4537		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END,
4538		(uintptr_t)thread_tid(thread), state, 0, 0, 0);
4539
4540	return (THREAD_NULL);
4541}
4542
4543/*
4544 *	Each processor has a dedicated thread which
4545 *	executes the idle loop when there is no suitable
4546 *	previous context.
4547 */
4548void
4549idle_thread(void)
4550{
4551	processor_t		processor = current_processor();
4552	thread_t		new_thread;
4553
4554	new_thread = processor_idle(THREAD_NULL, processor);
4555	if (new_thread != THREAD_NULL) {
4556		thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
4557		/*NOTREACHED*/
4558	}
4559
4560	thread_block((thread_continue_t)idle_thread);
4561	/*NOTREACHED*/
4562}
4563
4564kern_return_t
4565idle_thread_create(
4566	processor_t		processor)
4567{
4568	kern_return_t	result;
4569	thread_t		thread;
4570	spl_t			s;
4571
4572	result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
4573	if (result != KERN_SUCCESS)
4574		return (result);
4575
4576	s = splsched();
4577	thread_lock(thread);
4578	thread->bound_processor = processor;
4579	processor->idle_thread = thread;
4580	thread->sched_pri = thread->priority = IDLEPRI;
4581	thread->state = (TH_RUN | TH_IDLE);
4582	thread->options |= TH_OPT_IDLE_THREAD;
4583	thread_unlock(thread);
4584	splx(s);
4585
4586	thread_deallocate(thread);
4587
4588	return (KERN_SUCCESS);
4589}
4590
4591/*
4592 * sched_startup:
4593 *
4594 * Kicks off scheduler services.
4595 *
4596 * Called at splsched.
4597 */
4598void
4599sched_startup(void)
4600{
4601	kern_return_t	result;
4602	thread_t		thread;
4603
4604	result = kernel_thread_start_priority((thread_continue_t)sched_init_thread,
4605	    (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread);
4606	if (result != KERN_SUCCESS)
4607		panic("sched_startup");
4608
4609	thread_deallocate(thread);
4610
4611	/*
4612	 * Yield to the sched_init_thread once, to
4613	 * initialize our own thread after being switched
4614	 * back to.
4615	 *
4616	 * The current thread is the only other thread
4617	 * active at this point.
4618	 */
4619	thread_block(THREAD_CONTINUE_NULL);
4620}
4621
4622#if defined(CONFIG_SCHED_TRADITIONAL)
4623
4624static volatile uint64_t 		sched_maintenance_deadline;
4625static uint64_t				sched_tick_last_abstime;
4626static uint64_t				sched_tick_delta;
4627uint64_t				sched_tick_max_delta;
4628/*
4629 *	sched_init_thread:
4630 *
4631 *	Perform periodic bookkeeping functions about ten
4632 *	times per second.
4633 */
4634static void
4635sched_traditional_maintenance_continue(void)
4636{
4637	uint64_t	sched_tick_ctime;
4638	sched_tick_ctime = mach_absolute_time();
4639
4640	if (__improbable(sched_tick_last_abstime == 0)) {
4641		sched_tick_last_abstime = sched_tick_ctime;
4642		sched_tick_delta = 1;
4643	} else {
4644		sched_tick_delta = ((sched_tick_ctime) - sched_tick_last_abstime) / sched_tick_interval;
4645		/* Ensure a delta of 1, since the interval could be slightly
4646		 * smaller than the sched_tick_interval due to dispatch
4647		 * latencies.
4648		 */
4649		sched_tick_delta = MAX(sched_tick_delta, 1);
4650
4651		/* In the event interrupt latencies or platform
4652		 * idle events that advanced the timebase resulted
4653		 * in periods where no threads were dispatched,
4654		 * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA
4655		 * iterations.
4656		 */
4657		sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA);
4658
4659		sched_tick_last_abstime = sched_tick_ctime;
4660		sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta);
4661	}
4662
4663	/* Add a number of pseudo-ticks corresponding to the elapsed interval
4664	 * This could be greater than 1 if substantial intervals where
4665	 * all processors are idle occur, which rarely occurs in practice.
4666	 */
4667
4668	sched_tick += sched_tick_delta;
4669
4670	/*
4671	 *  Compute various averages.
4672	 */
4673	compute_averages(sched_tick_delta);
4674
4675	/*
4676	 *  Scan the run queues for threads which
4677	 *  may need to be updated.
4678	 */
4679	thread_update_scan();
4680
4681	assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT);
4682	thread_block((thread_continue_t)sched_traditional_maintenance_continue);
4683	/*NOTREACHED*/
4684}
4685
4686static uint64_t sched_maintenance_wakeups;
4687
4688/*
4689 * Determine if the set of routines formerly driven by a maintenance timer
4690 * must be invoked, based on a deadline comparison. Signals the scheduler
4691 * maintenance thread on deadline expiration. Must be invoked at an interval
4692 * lower than the "sched_tick_interval", currently accomplished by
4693 * invocation via the quantum expiration timer and at context switch time.
4694 * Performance matters: this routine reuses a timestamp approximating the
4695 * current absolute time received from the caller, and should perform
4696 * no more than a comparison against the deadline in the common case.
4697 */
4698void
4699sched_traditional_consider_maintenance(uint64_t ctime) {
4700	uint64_t ndeadline, deadline = sched_maintenance_deadline;
4701
4702	if (__improbable(ctime >= deadline)) {
4703		if (__improbable(current_thread() == sched_maintenance_thread))
4704			return;
4705		OSMemoryBarrier();
4706
4707		ndeadline = ctime + sched_tick_interval;
4708
4709		if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) {
4710			thread_wakeup((event_t)sched_traditional_maintenance_continue);
4711			sched_maintenance_wakeups++;
4712		}
4713	}
4714}
4715
4716#endif /* CONFIG_SCHED_TRADITIONAL */
4717
4718void
4719sched_init_thread(void (*continuation)(void))
4720{
4721	thread_block(THREAD_CONTINUE_NULL);
4722
4723	sched_maintenance_thread = current_thread();
4724	continuation();
4725
4726	/*NOTREACHED*/
4727}
4728
4729#if defined(CONFIG_SCHED_TRADITIONAL)
4730
4731/*
4732 *	thread_update_scan / runq_scan:
4733 *
4734 *	Scan the run queues to account for timesharing threads
4735 *	which need to be updated.
4736 *
4737 *	Scanner runs in two passes.  Pass one squirrels likely
4738 *	threads away in an array, pass two does the update.
4739 *
4740 *	This is necessary because the run queue is locked for
4741 *	the candidate scan, but	the thread is locked for the update.
4742 *
4743 *	Array should be sized to make forward progress, without
4744 *	disabling preemption for long periods.
4745 */
4746
4747#define	THREAD_UPDATE_SIZE		128
4748
4749static thread_t		thread_update_array[THREAD_UPDATE_SIZE];
4750static int			thread_update_count = 0;
4751
4752/*
4753 *	Scan a runq for candidate threads.
4754 *
4755 *	Returns TRUE if retry is needed.
4756 */
4757static boolean_t
4758runq_scan(
4759	run_queue_t				runq)
4760{
4761	register int			count;
4762	register queue_t		q;
4763	register thread_t		thread;
4764
4765	if ((count = runq->count) > 0) {
4766	    q = runq->queues + runq->highq;
4767		while (count > 0) {
4768			queue_iterate(q, thread, thread_t, links) {
4769				if (		thread->sched_stamp != sched_tick		&&
4770						(thread->sched_mode == TH_MODE_TIMESHARE)	) {
4771					if (thread_update_count == THREAD_UPDATE_SIZE)
4772						return (TRUE);
4773
4774					thread_update_array[thread_update_count++] = thread;
4775					thread_reference_internal(thread);
4776				}
4777
4778				count--;
4779			}
4780
4781			q--;
4782		}
4783	}
4784
4785	return (FALSE);
4786}
4787
4788static void
4789thread_update_scan(void)
4790{
4791	boolean_t			restart_needed = FALSE;
4792	processor_t			processor = processor_list;
4793	processor_set_t		pset;
4794	thread_t			thread;
4795	spl_t				s;
4796
4797	do {
4798		do {
4799			pset = processor->processor_set;
4800
4801			s = splsched();
4802			pset_lock(pset);
4803
4804			restart_needed = runq_scan(runq_for_processor(processor));
4805
4806			pset_unlock(pset);
4807			splx(s);
4808
4809			if (restart_needed)
4810				break;
4811
4812			thread = processor->idle_thread;
4813			if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
4814				if (thread_update_count == THREAD_UPDATE_SIZE) {
4815					restart_needed = TRUE;
4816					break;
4817				}
4818
4819				thread_update_array[thread_update_count++] = thread;
4820				thread_reference_internal(thread);
4821			}
4822		} while ((processor = processor->processor_list) != NULL);
4823
4824	    /*
4825	     *	Ok, we now have a collection of candidates -- fix them.
4826	     */
4827	    while (thread_update_count > 0) {
4828			thread = thread_update_array[--thread_update_count];
4829			thread_update_array[thread_update_count] = THREAD_NULL;
4830
4831			s = splsched();
4832			thread_lock(thread);
4833			if (	!(thread->state & (TH_WAIT))	) {
4834				if (SCHED(can_update_priority)(thread))
4835					SCHED(update_priority)(thread);
4836			}
4837			thread_unlock(thread);
4838			splx(s);
4839
4840			thread_deallocate(thread);
4841	    }
4842	} while (restart_needed);
4843}
4844
4845#endif /* CONFIG_SCHED_TRADITIONAL */
4846
4847boolean_t
4848thread_eager_preemption(thread_t thread)
4849{
4850	return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0);
4851}
4852
4853void
4854thread_set_eager_preempt(thread_t thread)
4855{
4856	spl_t x;
4857	processor_t p;
4858	ast_t ast = AST_NONE;
4859
4860	x = splsched();
4861	p = current_processor();
4862
4863	thread_lock(thread);
4864	thread->sched_flags |= TH_SFLAG_EAGERPREEMPT;
4865
4866	if (thread == current_thread()) {
4867		thread_unlock(thread);
4868
4869		ast = csw_check(p);
4870		if (ast != AST_NONE) {
4871			(void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
4872		}
4873	} else {
4874		p = thread->last_processor;
4875
4876		if (p != PROCESSOR_NULL	&& p->state == PROCESSOR_RUNNING &&
4877			p->active_thread == thread) {
4878			cause_ast_check(p);
4879		}
4880
4881		thread_unlock(thread);
4882	}
4883
4884	splx(x);
4885}
4886
4887void
4888thread_clear_eager_preempt(thread_t thread)
4889{
4890	spl_t x;
4891
4892	x = splsched();
4893	thread_lock(thread);
4894
4895	thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT;
4896
4897	thread_unlock(thread);
4898	splx(x);
4899}
4900/*
4901 * Scheduling statistics
4902 */
4903void
4904sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri)
4905{
4906	struct processor_sched_statistics *stats;
4907	boolean_t to_realtime = FALSE;
4908
4909	stats = &processor->processor_data.sched_stats;
4910	stats->csw_count++;
4911
4912	if (otherpri >= BASEPRI_REALTIME) {
4913		stats->rt_sched_count++;
4914		to_realtime = TRUE;
4915	}
4916
4917	if ((reasons & AST_PREEMPT) != 0) {
4918		stats->preempt_count++;
4919
4920		if (selfpri >= BASEPRI_REALTIME) {
4921			stats->preempted_rt_count++;
4922		}
4923
4924		if (to_realtime) {
4925			stats->preempted_by_rt_count++;
4926		}
4927
4928	}
4929}
4930
4931void
4932sched_stats_handle_runq_change(struct runq_stats *stats, int old_count)
4933{
4934	uint64_t timestamp = mach_absolute_time();
4935
4936	stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count;
4937	stats->last_change_timestamp = timestamp;
4938}
4939
4940/*
4941 *     For calls from assembly code
4942 */
4943#undef thread_wakeup
4944void
4945thread_wakeup(
4946       event_t         x);
4947
4948void
4949thread_wakeup(
4950       event_t         x)
4951{
4952       thread_wakeup_with_result(x, THREAD_AWAKENED);
4953}
4954
4955boolean_t
4956preemption_enabled(void)
4957{
4958	return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
4959}
4960
4961#if	DEBUG
4962static boolean_t
4963thread_runnable(
4964	thread_t	thread)
4965{
4966	return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
4967}
4968#endif	/* DEBUG */
4969
4970static void
4971sched_timer_deadline_tracking_init(void) {
4972	nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1);
4973	nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2);
4974}
4975