1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	sched_prim.c
60 *	Author:	Avadis Tevanian, Jr.
61 *	Date:	1986
62 *
63 *	Scheduling primitives
64 *
65 */
66
67#include <debug.h>
68#include <mach_kdb.h>
69
70#include <ddb/db_output.h>
71
72#include <mach/mach_types.h>
73#include <mach/machine.h>
74#include <mach/policy.h>
75#include <mach/sync_policy.h>
76
77#include <machine/machine_routines.h>
78#include <machine/sched_param.h>
79#include <machine/machine_cpu.h>
80
81#include <kern/kern_types.h>
82#include <kern/clock.h>
83#include <kern/counters.h>
84#include <kern/cpu_number.h>
85#include <kern/cpu_data.h>
86#include <kern/debug.h>
87#include <kern/lock.h>
88#include <kern/macro_help.h>
89#include <kern/machine.h>
90#include <kern/misc_protos.h>
91#include <kern/processor.h>
92#include <kern/queue.h>
93#include <kern/sched.h>
94#include <kern/sched_prim.h>
95#include <kern/syscall_subr.h>
96#include <kern/task.h>
97#include <kern/thread.h>
98#include <kern/wait_queue.h>
99
100#include <vm/pmap.h>
101#include <vm/vm_kern.h>
102#include <vm/vm_map.h>
103
104#include <sys/kdebug.h>
105
106#include <kern/pms.h>
107
108struct run_queue	rt_runq;
109#define RT_RUNQ		((processor_t)-1)
110decl_simple_lock_data(static,rt_lock);
111
112#define		DEFAULT_PREEMPTION_RATE		100		/* (1/s) */
113int			default_preemption_rate = DEFAULT_PREEMPTION_RATE;
114
115#define		MAX_UNSAFE_QUANTA			800
116int			max_unsafe_quanta = MAX_UNSAFE_QUANTA;
117
118#define		MAX_POLL_QUANTA				2
119int			max_poll_quanta = MAX_POLL_QUANTA;
120
121#define		SCHED_POLL_YIELD_SHIFT		4		/* 1/16 */
122int			sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
123
124uint64_t	max_unsafe_computation;
125uint32_t	sched_safe_duration;
126uint64_t	max_poll_computation;
127
128uint32_t	std_quantum;
129uint32_t	min_std_quantum;
130
131uint32_t	std_quantum_us;
132
133uint32_t	max_rt_quantum;
134uint32_t	min_rt_quantum;
135
136uint32_t	sched_cswtime;
137
138unsigned	sched_tick;
139uint32_t	sched_tick_interval;
140
141uint32_t	sched_pri_shift = INT8_MAX;
142uint32_t	sched_fixed_shift;
143
144uint32_t	sched_run_count, sched_share_count;
145uint32_t	sched_load_average, sched_mach_factor;
146
147/* Forwards */
148void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
149
150static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
151static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
152
153static thread_t	run_queue_dequeue(
154					run_queue_t		runq,
155					integer_t		options);
156
157static thread_t	thread_select_idle(
158					thread_t			thread,
159					processor_t			processor);
160
161static thread_t	processor_idle(
162					thread_t			thread,
163					processor_t			processor);
164
165static thread_t	steal_thread(
166					processor_set_t		pset);
167
168static thread_t	steal_processor_thread(
169					processor_t			processor);
170
171static void		thread_update_scan(void);
172
173#if	DEBUG
174extern int debug_task;
175#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
176#else
177#define TLOG(a, fmt, args...) do {} while (0)
178#endif
179
180#if	DEBUG
181static
182boolean_t	thread_runnable(
183				thread_t		thread);
184
185#endif	/*DEBUG*/
186
187/*
188 *	State machine
189 *
190 * states are combinations of:
191 *  R	running
192 *  W	waiting (or on wait queue)
193 *  N	non-interruptible
194 *  O	swapped out
195 *  I	being swapped in
196 *
197 * init	action
198 *	assert_wait thread_block    clear_wait 		swapout	swapin
199 *
200 * R	RW, RWN	    R;   setrun	    -	       		-
201 * RN	RWN	    RN;  setrun	    -	       		-
202 *
203 * RW		    W		    R	       		-
204 * RWN		    WN		    RN	       		-
205 *
206 * W				    R;   setrun		WO
207 * WN				    RN;  setrun		-
208 *
209 * RO				    -			-	R
210 *
211 */
212
213/*
214 *	Waiting protocols and implementation:
215 *
216 *	Each thread may be waiting for exactly one event; this event
217 *	is set using assert_wait().  That thread may be awakened either
218 *	by performing a thread_wakeup_prim() on its event,
219 *	or by directly waking that thread up with clear_wait().
220 *
221 *	The implementation of wait events uses a hash table.  Each
222 *	bucket is queue of threads having the same hash function
223 *	value; the chain for the queue (linked list) is the run queue
224 *	field.  [It is not possible to be waiting and runnable at the
225 *	same time.]
226 *
227 *	Locks on both the thread and on the hash buckets govern the
228 *	wait event field and the queue chain field.  Because wakeup
229 *	operations only have the event as an argument, the event hash
230 *	bucket must be locked before any thread.
231 *
232 *	Scheduling operations may also occur at interrupt level; therefore,
233 *	interrupts below splsched() must be prevented when holding
234 *	thread or hash bucket locks.
235 *
236 *	The wait event hash table declarations are as follows:
237 */
238
239#define NUMQUEUES	59
240
241struct wait_queue wait_queues[NUMQUEUES];
242
243#define wait_hash(event) \
244	((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES)
245
246int8_t		sched_load_shifts[NRQS];
247int			sched_preempt_pri[NRQBM];
248
249void
250sched_init(void)
251{
252	/*
253	 * Calculate the timeslicing quantum
254	 * in us.
255	 */
256	if (default_preemption_rate < 1)
257		default_preemption_rate = DEFAULT_PREEMPTION_RATE;
258	std_quantum_us = (1000 * 1000) / default_preemption_rate;
259
260	printf("standard timeslicing quantum is %d us\n", std_quantum_us);
261
262	sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
263											(1 << SCHED_TICK_SHIFT);
264
265	wait_queues_init();
266	load_shift_init();
267	preempt_pri_init();
268	simple_lock_init(&rt_lock, 0);
269	run_queue_init(&rt_runq);
270	sched_tick = 0;
271	ast_init();
272}
273
274void
275sched_timebase_init(void)
276{
277	uint64_t	abstime;
278	uint32_t	shift;
279
280	/* standard timeslicing quantum */
281	clock_interval_to_absolutetime_interval(
282							std_quantum_us, NSEC_PER_USEC, &abstime);
283	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
284	std_quantum = abstime;
285
286	/* smallest remaining quantum (250 us) */
287	clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
288	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
289	min_std_quantum = abstime;
290
291	/* smallest rt computaton (50 us) */
292	clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
293	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
294	min_rt_quantum = abstime;
295
296	/* maximum rt computation (50 ms) */
297	clock_interval_to_absolutetime_interval(
298							50, 1000*NSEC_PER_USEC, &abstime);
299	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
300	max_rt_quantum = abstime;
301
302	/* scheduler tick interval */
303	clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
304													NSEC_PER_USEC, &abstime);
305	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
306	sched_tick_interval = abstime;
307
308	/*
309	 * Compute conversion factor from usage to
310	 * timesharing priorities with 5/8 ** n aging.
311	 */
312	abstime = (abstime * 5) / 3;
313	for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
314		abstime >>= 1;
315	sched_fixed_shift = shift;
316
317	max_unsafe_computation = max_unsafe_quanta * std_quantum;
318	max_poll_computation = max_poll_quanta * std_quantum;
319}
320
321void
322wait_queues_init(void)
323{
324	register int	i;
325
326	for (i = 0; i < NUMQUEUES; i++) {
327		wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
328	}
329}
330
331/*
332 * Set up values for timeshare
333 * loading factors.
334 */
335static void
336load_shift_init(void)
337{
338	int8_t		k, *p = sched_load_shifts;
339	uint32_t	i, j;
340
341	*p++ = INT8_MIN; *p++ = 0;
342
343	for (i = j = 2, k = 1; i < NRQS; ++k) {
344		for (j <<= 1; i < j; ++i)
345			*p++ = k;
346	}
347}
348
349static void
350preempt_pri_init(void)
351{
352	int		i, *p = sched_preempt_pri;
353
354	for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
355		setbit(i, p);
356
357	for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
358		setbit(i, p);
359}
360
361/*
362 *	Thread wait timer expiration.
363 */
364void
365thread_timer_expire(
366	void			*p0,
367	__unused void	*p1)
368{
369	thread_t		thread = p0;
370	spl_t			s;
371
372	s = splsched();
373	thread_lock(thread);
374	if (--thread->wait_timer_active == 0) {
375		if (thread->wait_timer_is_set) {
376			thread->wait_timer_is_set = FALSE;
377			clear_wait_internal(thread, THREAD_TIMED_OUT);
378		}
379	}
380	thread_unlock(thread);
381	splx(s);
382}
383
384/*
385 *	thread_set_timer:
386 *
387 *	Set a timer for the current thread, if the thread
388 *	is ready to wait.  Must be called between assert_wait()
389 *	and thread_block().
390 */
391void
392thread_set_timer(
393	uint32_t		interval,
394	uint32_t		scale_factor)
395{
396	thread_t		thread = current_thread();
397	uint64_t		deadline;
398	spl_t			s;
399
400	s = splsched();
401	thread_lock(thread);
402	if ((thread->state & TH_WAIT) != 0) {
403		clock_interval_to_deadline(interval, scale_factor, &deadline);
404		if (!timer_call_enter(&thread->wait_timer, deadline))
405			thread->wait_timer_active++;
406		thread->wait_timer_is_set = TRUE;
407	}
408	thread_unlock(thread);
409	splx(s);
410}
411
412void
413thread_set_timer_deadline(
414	uint64_t		deadline)
415{
416	thread_t		thread = current_thread();
417	spl_t			s;
418
419	s = splsched();
420	thread_lock(thread);
421	if ((thread->state & TH_WAIT) != 0) {
422		if (!timer_call_enter(&thread->wait_timer, deadline))
423			thread->wait_timer_active++;
424		thread->wait_timer_is_set = TRUE;
425	}
426	thread_unlock(thread);
427	splx(s);
428}
429
430void
431thread_cancel_timer(void)
432{
433	thread_t		thread = current_thread();
434	spl_t			s;
435
436	s = splsched();
437	thread_lock(thread);
438	if (thread->wait_timer_is_set) {
439		if (timer_call_cancel(&thread->wait_timer))
440			thread->wait_timer_active--;
441		thread->wait_timer_is_set = FALSE;
442	}
443	thread_unlock(thread);
444	splx(s);
445}
446
447/*
448 *	thread_unblock:
449 *
450 *	Unblock thread on wake up.
451 *
452 *	Returns TRUE if the thread is still running.
453 *
454 *	Thread must be locked.
455 */
456boolean_t
457thread_unblock(
458	thread_t		thread,
459	wait_result_t	wresult)
460{
461	boolean_t		result = FALSE;
462
463	/*
464	 *	Set wait_result.
465	 */
466	thread->wait_result = wresult;
467
468	/*
469	 *	Cancel pending wait timer.
470	 */
471	if (thread->wait_timer_is_set) {
472		if (timer_call_cancel(&thread->wait_timer))
473			thread->wait_timer_active--;
474		thread->wait_timer_is_set = FALSE;
475	}
476
477	/*
478	 *	Update scheduling state: not waiting,
479	 *	set running.
480	 */
481	thread->state &= ~(TH_WAIT|TH_UNINT);
482
483	if (!(thread->state & TH_RUN)) {
484		thread->state |= TH_RUN;
485
486		(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
487
488		/*
489		 *	Update run counts.
490		 */
491		sched_run_incr();
492		if (thread->sched_mode & TH_MODE_TIMESHARE)
493			sched_share_incr();
494	}
495	else {
496		/*
497		 *	Signal if idling on another processor.
498		 */
499		if (thread->state & TH_IDLE) {
500			processor_t		processor = thread->last_processor;
501
502			if (processor != current_processor())
503				machine_signal_idle(processor);
504		}
505
506		result = TRUE;
507	}
508
509	/*
510	 * Calculate deadline for real-time threads.
511	 */
512	if (thread->sched_mode & TH_MODE_REALTIME) {
513		thread->realtime.deadline = mach_absolute_time();
514		thread->realtime.deadline += thread->realtime.constraint;
515	}
516
517	/*
518	 * Clear old quantum, fail-safe computation, etc.
519	 */
520	thread->current_quantum = 0;
521	thread->computation_metered = 0;
522	thread->reason = AST_NONE;
523
524	KERNEL_DEBUG_CONSTANT(
525		MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
526					(int)thread, (int)thread->sched_pri, 0, 0, 0);
527
528	return (result);
529}
530
531/*
532 *	Routine:	thread_go
533 *	Purpose:
534 *		Unblock and dispatch thread.
535 *	Conditions:
536 *		thread lock held, IPC locks may be held.
537 *		thread must have been pulled from wait queue under same lock hold.
538 *  Returns:
539 *		KERN_SUCCESS - Thread was set running
540 *		KERN_NOT_WAITING - Thread was not waiting
541 */
542kern_return_t
543thread_go(
544	thread_t		thread,
545	wait_result_t	wresult)
546{
547	assert(thread->at_safe_point == FALSE);
548	assert(thread->wait_event == NO_EVENT64);
549	assert(thread->wait_queue == WAIT_QUEUE_NULL);
550
551	if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
552		if (!thread_unblock(thread, wresult))
553			thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
554
555		return (KERN_SUCCESS);
556	}
557
558	return (KERN_NOT_WAITING);
559}
560
561/*
562 *	Routine:	thread_mark_wait_locked
563 *	Purpose:
564 *		Mark a thread as waiting.  If, given the circumstances,
565 *		it doesn't want to wait (i.e. already aborted), then
566 *		indicate that in the return value.
567 *	Conditions:
568 *		at splsched() and thread is locked.
569 */
570__private_extern__
571wait_result_t
572thread_mark_wait_locked(
573	thread_t			thread,
574	wait_interrupt_t 	interruptible)
575{
576	boolean_t		at_safe_point;
577
578	/*
579	 *	The thread may have certain types of interrupts/aborts masked
580	 *	off.  Even if the wait location says these types of interrupts
581	 *	are OK, we have to honor mask settings (outer-scoped code may
582	 *	not be able to handle aborts at the moment).
583	 */
584	if (interruptible > (thread->options & TH_OPT_INTMASK))
585		interruptible = thread->options & TH_OPT_INTMASK;
586
587	at_safe_point = (interruptible == THREAD_ABORTSAFE);
588
589	if (	interruptible == THREAD_UNINT			||
590			!(thread->sched_mode & TH_MODE_ABORT)	||
591			(!at_safe_point &&
592				(thread->sched_mode & TH_MODE_ABORTSAFELY))) {
593		thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
594		thread->at_safe_point = at_safe_point;
595		return (thread->wait_result = THREAD_WAITING);
596	}
597	else
598	if (thread->sched_mode & TH_MODE_ABORTSAFELY)
599		thread->sched_mode &= ~TH_MODE_ISABORTED;
600
601	return (thread->wait_result = THREAD_INTERRUPTED);
602}
603
604/*
605 *	Routine:	thread_interrupt_level
606 *	Purpose:
607 *	        Set the maximum interruptible state for the
608 *		current thread.  The effective value of any
609 *		interruptible flag passed into assert_wait
610 *		will never exceed this.
611 *
612 *		Useful for code that must not be interrupted,
613 *		but which calls code that doesn't know that.
614 *	Returns:
615 *		The old interrupt level for the thread.
616 */
617__private_extern__
618wait_interrupt_t
619thread_interrupt_level(
620	wait_interrupt_t new_level)
621{
622	thread_t thread = current_thread();
623	wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
624
625	thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
626
627	return result;
628}
629
630/*
631 * Check to see if an assert wait is possible, without actually doing one.
632 * This is used by debug code in locks and elsewhere to verify that it is
633 * always OK to block when trying to take a blocking lock (since waiting
634 * for the actual assert_wait to catch the case may make it hard to detect
635 * this case.
636 */
637boolean_t
638assert_wait_possible(void)
639{
640
641	thread_t thread;
642
643#if	DEBUG
644	if(debug_mode) return TRUE;		/* Always succeed in debug mode */
645#endif
646
647	thread = current_thread();
648
649	return (thread == NULL || wait_queue_assert_possible(thread));
650}
651
652/*
653 *	assert_wait:
654 *
655 *	Assert that the current thread is about to go to
656 *	sleep until the specified event occurs.
657 */
658wait_result_t
659assert_wait(
660	event_t				event,
661	wait_interrupt_t	interruptible)
662{
663	register wait_queue_t	wq;
664	register int		index;
665
666	assert(event != NO_EVENT);
667
668	index = wait_hash(event);
669	wq = &wait_queues[index];
670	return wait_queue_assert_wait(wq, event, interruptible, 0);
671}
672
673wait_result_t
674assert_wait_timeout(
675	event_t				event,
676	wait_interrupt_t	interruptible,
677	uint32_t			interval,
678	uint32_t			scale_factor)
679{
680	thread_t			thread = current_thread();
681	wait_result_t		wresult;
682	wait_queue_t		wqueue;
683	uint64_t			deadline;
684	spl_t				s;
685
686	assert(event != NO_EVENT);
687	wqueue = &wait_queues[wait_hash(event)];
688
689	s = splsched();
690	wait_queue_lock(wqueue);
691	thread_lock(thread);
692
693	clock_interval_to_deadline(interval, scale_factor, &deadline);
694	wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
695													interruptible, deadline, thread);
696
697	thread_unlock(thread);
698	wait_queue_unlock(wqueue);
699	splx(s);
700
701	return (wresult);
702}
703
704wait_result_t
705assert_wait_deadline(
706	event_t				event,
707	wait_interrupt_t	interruptible,
708	uint64_t			deadline)
709{
710	thread_t			thread = current_thread();
711	wait_result_t		wresult;
712	wait_queue_t		wqueue;
713	spl_t				s;
714
715	assert(event != NO_EVENT);
716	wqueue = &wait_queues[wait_hash(event)];
717
718	s = splsched();
719	wait_queue_lock(wqueue);
720	thread_lock(thread);
721
722	wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event,
723													interruptible, deadline, thread);
724
725	thread_unlock(thread);
726	wait_queue_unlock(wqueue);
727	splx(s);
728
729	return (wresult);
730}
731
732/*
733 *	thread_sleep_fast_usimple_lock:
734 *
735 *	Cause the current thread to wait until the specified event
736 *	occurs.  The specified simple_lock is unlocked before releasing
737 *	the cpu and re-acquired as part of waking up.
738 *
739 *	This is the simple lock sleep interface for components that use a
740 *	faster version of simple_lock() than is provided by usimple_lock().
741 */
742__private_extern__ wait_result_t
743thread_sleep_fast_usimple_lock(
744	event_t			event,
745	simple_lock_t		lock,
746	wait_interrupt_t	interruptible)
747{
748	wait_result_t res;
749
750	res = assert_wait(event, interruptible);
751	if (res == THREAD_WAITING) {
752		simple_unlock(lock);
753		res = thread_block(THREAD_CONTINUE_NULL);
754		simple_lock(lock);
755	}
756	return res;
757}
758
759
760/*
761 *	thread_sleep_usimple_lock:
762 *
763 *	Cause the current thread to wait until the specified event
764 *	occurs.  The specified usimple_lock is unlocked before releasing
765 *	the cpu and re-acquired as part of waking up.
766 *
767 *	This is the simple lock sleep interface for components where
768 *	simple_lock() is defined in terms of usimple_lock().
769 */
770wait_result_t
771thread_sleep_usimple_lock(
772	event_t			event,
773	usimple_lock_t		lock,
774	wait_interrupt_t	interruptible)
775{
776	wait_result_t res;
777
778	res = assert_wait(event, interruptible);
779	if (res == THREAD_WAITING) {
780		usimple_unlock(lock);
781		res = thread_block(THREAD_CONTINUE_NULL);
782		usimple_lock(lock);
783	}
784	return res;
785}
786
787/*
788 *	thread_sleep_mutex:
789 *
790 *	Cause the current thread to wait until the specified event
791 *	occurs.  The specified mutex is unlocked before releasing
792 *	the cpu. The mutex will be re-acquired before returning.
793 *
794 *	JMM - Add hint to make sure mutex is available before rousting
795 */
796wait_result_t
797thread_sleep_mutex(
798	event_t			event,
799	mutex_t			*mutex,
800	wait_interrupt_t interruptible)
801{
802	wait_result_t	res;
803
804	res = assert_wait(event, interruptible);
805	if (res == THREAD_WAITING) {
806		mutex_unlock(mutex);
807		res = thread_block(THREAD_CONTINUE_NULL);
808		mutex_lock(mutex);
809	}
810	return res;
811}
812
813/*
814 *	thread_sleep_mutex_deadline:
815 *
816 *	Cause the current thread to wait until the specified event
817 *	(or deadline) occurs.  The specified mutex is unlocked before
818 *	releasing the cpu. The mutex will be re-acquired before returning.
819 */
820wait_result_t
821thread_sleep_mutex_deadline(
822	event_t			event,
823	mutex_t			*mutex,
824	uint64_t		deadline,
825	wait_interrupt_t interruptible)
826{
827	wait_result_t	res;
828
829	res = assert_wait_deadline(event, interruptible, deadline);
830	if (res == THREAD_WAITING) {
831		mutex_unlock(mutex);
832		res = thread_block(THREAD_CONTINUE_NULL);
833		mutex_lock(mutex);
834	}
835	return res;
836}
837
838/*
839 *	thread_sleep_lock_write:
840 *
841 *	Cause the current thread to wait until the specified event
842 *	occurs.  The specified (write) lock is unlocked before releasing
843 *	the cpu. The (write) lock will be re-acquired before returning.
844 */
845wait_result_t
846thread_sleep_lock_write(
847	event_t			event,
848	lock_t			*lock,
849	wait_interrupt_t interruptible)
850{
851	wait_result_t	res;
852
853	res = assert_wait(event, interruptible);
854	if (res == THREAD_WAITING) {
855		lock_write_done(lock);
856		res = thread_block(THREAD_CONTINUE_NULL);
857		lock_write(lock);
858	}
859	return res;
860}
861
862/*
863 * thread_stop:
864 *
865 * Force a preemption point for a thread and wait
866 * for it to stop running.  Arbitrates access among
867 * multiple stop requests. (released by unstop)
868 *
869 * The thread must enter a wait state and stop via a
870 * separate means.
871 *
872 * Returns FALSE if interrupted.
873 */
874boolean_t
875thread_stop(
876	thread_t		thread)
877{
878	wait_result_t	wresult;
879	spl_t			s = splsched();
880
881	wake_lock(thread);
882	thread_lock(thread);
883
884	while (thread->state & TH_SUSP) {
885		thread->wake_active = TRUE;
886		thread_unlock(thread);
887
888		wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
889		wake_unlock(thread);
890		splx(s);
891
892		if (wresult == THREAD_WAITING)
893			wresult = thread_block(THREAD_CONTINUE_NULL);
894
895		if (wresult != THREAD_AWAKENED)
896			return (FALSE);
897
898		s = splsched();
899		wake_lock(thread);
900		thread_lock(thread);
901	}
902
903	thread->state |= TH_SUSP;
904
905	while (thread->state & TH_RUN) {
906		processor_t		processor = thread->last_processor;
907
908		if (processor != PROCESSOR_NULL && processor->active_thread == thread)
909			cause_ast_check(processor);
910
911		thread->wake_active = TRUE;
912		thread_unlock(thread);
913
914		wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
915		wake_unlock(thread);
916		splx(s);
917
918		if (wresult == THREAD_WAITING)
919			wresult = thread_block(THREAD_CONTINUE_NULL);
920
921		if (wresult != THREAD_AWAKENED) {
922			thread_unstop(thread);
923			return (FALSE);
924		}
925
926		s = splsched();
927		wake_lock(thread);
928		thread_lock(thread);
929	}
930
931	thread_unlock(thread);
932	wake_unlock(thread);
933	splx(s);
934
935	return (TRUE);
936}
937
938/*
939 * thread_unstop:
940 *
941 * Release a previous stop request and set
942 * the thread running if appropriate.
943 *
944 * Use only after a successful stop operation.
945 */
946void
947thread_unstop(
948	thread_t	thread)
949{
950	spl_t		s = splsched();
951
952	wake_lock(thread);
953	thread_lock(thread);
954
955	if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
956		thread->state &= ~TH_SUSP;
957		thread_unblock(thread, THREAD_AWAKENED);
958
959		thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
960	}
961	else
962	if (thread->state & TH_SUSP) {
963		thread->state &= ~TH_SUSP;
964
965		if (thread->wake_active) {
966			thread->wake_active = FALSE;
967			thread_unlock(thread);
968
969			thread_wakeup(&thread->wake_active);
970			wake_unlock(thread);
971			splx(s);
972
973			return;
974		}
975	}
976
977	thread_unlock(thread);
978	wake_unlock(thread);
979	splx(s);
980}
981
982/*
983 * thread_wait:
984 *
985 * Wait for a thread to stop running. (non-interruptible)
986 *
987 */
988void
989thread_wait(
990	thread_t		thread)
991{
992	wait_result_t	wresult;
993	spl_t			s = splsched();
994
995	wake_lock(thread);
996	thread_lock(thread);
997
998	while (thread->state & TH_RUN) {
999		processor_t		processor = thread->last_processor;
1000
1001		if (processor != PROCESSOR_NULL && processor->active_thread == thread)
1002			cause_ast_check(processor);
1003
1004		thread->wake_active = TRUE;
1005		thread_unlock(thread);
1006
1007		wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
1008		wake_unlock(thread);
1009		splx(s);
1010
1011		if (wresult == THREAD_WAITING)
1012			thread_block(THREAD_CONTINUE_NULL);
1013
1014		s = splsched();
1015		wake_lock(thread);
1016		thread_lock(thread);
1017	}
1018
1019	thread_unlock(thread);
1020	wake_unlock(thread);
1021	splx(s);
1022}
1023
1024/*
1025 *	Routine: clear_wait_internal
1026 *
1027 *		Clear the wait condition for the specified thread.
1028 *		Start the thread executing if that is appropriate.
1029 *	Arguments:
1030 *		thread		thread to awaken
1031 *		result		Wakeup result the thread should see
1032 *	Conditions:
1033 *		At splsched
1034 *		the thread is locked.
1035 *	Returns:
1036 *		KERN_SUCCESS		thread was rousted out a wait
1037 *		KERN_FAILURE		thread was waiting but could not be rousted
1038 *		KERN_NOT_WAITING	thread was not waiting
1039 */
1040__private_extern__ kern_return_t
1041clear_wait_internal(
1042	thread_t		thread,
1043	wait_result_t	wresult)
1044{
1045	wait_queue_t	wq = thread->wait_queue;
1046	int				i = LockTimeOut;
1047
1048	do {
1049		if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
1050			return (KERN_FAILURE);
1051
1052		if (wq != WAIT_QUEUE_NULL) {
1053			if (wait_queue_lock_try(wq)) {
1054				wait_queue_pull_thread_locked(wq, thread, TRUE);
1055				/* wait queue unlocked, thread still locked */
1056			}
1057			else {
1058				thread_unlock(thread);
1059				delay(1);
1060
1061				thread_lock(thread);
1062				if (wq != thread->wait_queue)
1063					return (KERN_NOT_WAITING);
1064
1065				continue;
1066			}
1067		}
1068
1069		return (thread_go(thread, wresult));
1070	} while (--i > 0);
1071
1072	panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
1073		  thread, wq, cpu_number());
1074
1075	return (KERN_FAILURE);
1076}
1077
1078
1079/*
1080 *	clear_wait:
1081 *
1082 *	Clear the wait condition for the specified thread.  Start the thread
1083 *	executing if that is appropriate.
1084 *
1085 *	parameters:
1086 *	  thread		thread to awaken
1087 *	  result		Wakeup result the thread should see
1088 */
1089kern_return_t
1090clear_wait(
1091	thread_t		thread,
1092	wait_result_t	result)
1093{
1094	kern_return_t ret;
1095	spl_t		s;
1096
1097	s = splsched();
1098	thread_lock(thread);
1099	ret = clear_wait_internal(thread, result);
1100	thread_unlock(thread);
1101	splx(s);
1102	return ret;
1103}
1104
1105
1106/*
1107 *	thread_wakeup_prim:
1108 *
1109 *	Common routine for thread_wakeup, thread_wakeup_with_result,
1110 *	and thread_wakeup_one.
1111 *
1112 */
1113kern_return_t
1114thread_wakeup_prim(
1115	event_t			event,
1116	boolean_t		one_thread,
1117	wait_result_t	result)
1118{
1119	register wait_queue_t	wq;
1120	register int			index;
1121
1122	index = wait_hash(event);
1123	wq = &wait_queues[index];
1124	if (one_thread)
1125	    return (wait_queue_wakeup_one(wq, event, result));
1126	else
1127	    return (wait_queue_wakeup_all(wq, event, result));
1128}
1129
1130/*
1131 *	thread_bind:
1132 *
1133 *	Force the current thread to execute on the specified processor.
1134 *
1135 *	Returns the previous binding.  PROCESSOR_NULL means
1136 *	not bound.
1137 *
1138 *	XXX - DO NOT export this to users - XXX
1139 */
1140processor_t
1141thread_bind(
1142	processor_t		processor)
1143{
1144	thread_t		self = current_thread();
1145	processor_t		prev;
1146	spl_t			s;
1147
1148	s = splsched();
1149	thread_lock(self);
1150
1151	prev = self->bound_processor;
1152	self->bound_processor = processor;
1153
1154	thread_unlock(self);
1155	splx(s);
1156
1157	return (prev);
1158}
1159
1160/*
1161 *	thread_select:
1162 *
1163 *	Select a new thread for the current processor to execute.
1164 *
1165 *	May select the current thread, which must be locked.
1166 */
1167static thread_t
1168thread_select(
1169	thread_t			thread,
1170	processor_t			processor)
1171{
1172	processor_set_t		pset = processor->processor_set;
1173	thread_t			new_thread = THREAD_NULL;
1174	boolean_t			other_runnable, inactive_state;
1175
1176	do {
1177		/*
1178		 *	Update the priority.
1179		 */
1180		if (thread->sched_stamp != sched_tick)
1181			update_priority(thread);
1182
1183		processor->current_pri = thread->sched_pri;
1184
1185		pset_lock(pset);
1186
1187		inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_num);
1188
1189		simple_lock(&rt_lock);
1190
1191		/*
1192		 *	Check for other runnable threads.
1193		 */
1194		other_runnable = processor->runq.count > 0 || rt_runq.count > 0;
1195
1196		/*
1197		 *	Test to see if the current thread should continue
1198		 *	to run on this processor.  Must be runnable, and not
1199		 *	bound to a different processor, nor be in the wrong
1200		 *	processor set.
1201		 */
1202		if (	thread->state == TH_RUN							&&
1203				(thread->bound_processor == PROCESSOR_NULL	||
1204				 thread->bound_processor == processor)			&&
1205				(thread->affinity_set == AFFINITY_SET_NULL	||
1206				 thread->affinity_set->aset_pset == pset)			) {
1207			if (	thread->sched_pri >= BASEPRI_RTQUEUES	&&
1208						first_timeslice(processor)				) {
1209				if (rt_runq.highq >= BASEPRI_RTQUEUES) {
1210					register run_queue_t	runq = &rt_runq;
1211					register queue_t		q;
1212
1213					q = runq->queues + runq->highq;
1214					if (((thread_t)q->next)->realtime.deadline <
1215													processor->deadline) {
1216						thread = (thread_t)q->next;
1217						((queue_entry_t)thread)->next->prev = q;
1218						q->next = ((queue_entry_t)thread)->next;
1219						thread->runq = PROCESSOR_NULL;
1220						runq->count--; runq->urgency--;
1221						assert(runq->urgency >= 0);
1222						if (queue_empty(q)) {
1223							if (runq->highq != IDLEPRI)
1224								clrbit(MAXPRI - runq->highq, runq->bitmap);
1225							runq->highq = MAXPRI - ffsbit(runq->bitmap);
1226						}
1227					}
1228				}
1229
1230				simple_unlock(&rt_lock);
1231
1232				processor->deadline = thread->realtime.deadline;
1233
1234				pset_unlock(pset);
1235
1236				return (thread);
1237			}
1238
1239			if (!inactive_state &&
1240					(!other_runnable							||
1241					 (processor->runq.highq < thread->sched_pri		&&
1242					  rt_runq.highq < thread->sched_pri))				) {
1243
1244				simple_unlock(&rt_lock);
1245
1246				/* I am the highest priority runnable (non-idle) thread */
1247
1248				pset_pri_hint(pset, processor, processor->current_pri);
1249
1250				pset_count_hint(pset, processor, processor->runq.count);
1251
1252				processor->deadline = UINT64_MAX;
1253
1254				pset_unlock(pset);
1255
1256				return (thread);
1257			}
1258		}
1259
1260		if (other_runnable) {
1261			if (processor->runq.count > 0 && processor->runq.highq >= rt_runq.highq) {
1262				simple_unlock(&rt_lock);
1263
1264				thread = run_queue_dequeue(&processor->runq, SCHED_HEADQ);
1265
1266				if (!inactive_state) {
1267					pset_pri_hint(pset, processor, thread->sched_pri);
1268
1269					pset_count_hint(pset, processor, processor->runq.count);
1270				}
1271
1272				processor->deadline = UINT64_MAX;
1273				pset_unlock(pset);
1274
1275				return (thread);
1276			}
1277
1278			thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
1279			simple_unlock(&rt_lock);
1280
1281			processor->deadline = thread->realtime.deadline;
1282			pset_unlock(pset);
1283
1284			return (thread);
1285		}
1286
1287		simple_unlock(&rt_lock);
1288
1289		processor->deadline = UINT64_MAX;
1290
1291		if (inactive_state) {
1292			if (processor->state == PROCESSOR_RUNNING)
1293				remqueue(&pset->active_queue, (queue_entry_t)processor);
1294			else
1295			if (processor->state == PROCESSOR_IDLE)
1296				remqueue(&pset->idle_queue, (queue_entry_t)processor);
1297
1298			processor->state = PROCESSOR_INACTIVE;
1299
1300			pset_unlock(pset);
1301
1302			return (processor->idle_thread);
1303		}
1304
1305		/*
1306		 *	No runnable threads, attempt to steal
1307		 *	from other processors.
1308		 */
1309		new_thread = steal_thread(pset);
1310		if (new_thread != THREAD_NULL)
1311			return (new_thread);
1312
1313		/*
1314		 *	If other threads have appeared, shortcut
1315		 *	around again.
1316		 */
1317		if (processor->runq.count > 0 || rt_runq.count > 0)
1318			continue;
1319
1320		pset_lock(pset);
1321
1322		/*
1323		 *	Nothing is runnable, so set this processor idle if it
1324		 *	was running.
1325		 */
1326		if (processor->state == PROCESSOR_RUNNING) {
1327			remqueue(&pset->active_queue, (queue_entry_t)processor);
1328			processor->state = PROCESSOR_IDLE;
1329
1330			enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
1331			pset->low_pri = pset->low_count = processor;
1332		}
1333
1334		pset_unlock(pset);
1335
1336		/*
1337		 *	Choose idle thread if fast idle is not possible.
1338		 */
1339		if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active)
1340			return (processor->idle_thread);
1341
1342		/*
1343		 *	Perform idling activities directly without a
1344		 *	context switch.  Return dispatched thread,
1345		 *	else check again for a runnable thread.
1346		 */
1347		new_thread = thread_select_idle(thread, processor);
1348
1349	} while (new_thread == THREAD_NULL);
1350
1351	return (new_thread);
1352}
1353
1354/*
1355 *	thread_select_idle:
1356 *
1357 *	Idle the processor using the current thread context.
1358 *
1359 *	Called with thread locked, then dropped and relocked.
1360 */
1361static thread_t
1362thread_select_idle(
1363	thread_t		thread,
1364	processor_t		processor)
1365{
1366	thread_t		new_thread;
1367
1368	if (thread->sched_mode & TH_MODE_TIMESHARE)
1369		sched_share_decr();
1370	sched_run_decr();
1371
1372	thread->state |= TH_IDLE;
1373	processor->current_pri = IDLEPRI;
1374
1375	thread_unlock(thread);
1376
1377	/*
1378	 *	Switch execution timing to processor idle thread.
1379	 */
1380	processor->last_dispatch = mach_absolute_time();
1381	thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
1382	PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
1383
1384	/*
1385	 *	Cancel the quantum timer while idling.
1386	 */
1387	timer_call_cancel(&processor->quantum_timer);
1388	processor->timeslice = 0;
1389
1390	(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1391
1392	/*
1393	 *	Enable interrupts and perform idling activities.  No
1394	 *	preemption due to TH_IDLE being set.
1395	 */
1396	spllo(); new_thread = processor_idle(thread, processor);
1397
1398	/*
1399	 *	Return at splsched.
1400	 */
1401	(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
1402
1403	thread_lock(thread);
1404
1405	/*
1406	 *	If awakened, switch to thread timer and start a new quantum.
1407	 *	Otherwise skip; we will context switch to another thread or return here.
1408	 */
1409	if (!(thread->state & TH_WAIT)) {
1410		processor->last_dispatch = mach_absolute_time();
1411		thread_timer_event(processor->last_dispatch, &thread->system_timer);
1412		PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1413
1414		thread_quantum_init(thread);
1415
1416		processor->quantum_end = processor->last_dispatch + thread->current_quantum;
1417		timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end);
1418		processor->timeslice = 1;
1419
1420		thread->computation_epoch = processor->last_dispatch;
1421	}
1422
1423	thread->state &= ~TH_IDLE;
1424
1425	sched_run_incr();
1426	if (thread->sched_mode & TH_MODE_TIMESHARE)
1427		sched_share_incr();
1428
1429	return (new_thread);
1430}
1431
1432/*
1433 *	Perform a context switch and start executing the new thread.
1434 *
1435 *	Returns FALSE on failure, and the thread is re-dispatched.
1436 *
1437 *	Called at splsched.
1438 */
1439
1440#define funnel_release_check(thread, debug)				\
1441MACRO_BEGIN												\
1442	if ((thread)->funnel_state & TH_FN_OWNED) {			\
1443		(thread)->funnel_state = TH_FN_REFUNNEL;		\
1444		KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE,			\
1445			(thread)->funnel_lock, (debug), 0, 0, 0);	\
1446		funnel_unlock((thread)->funnel_lock);			\
1447	}													\
1448MACRO_END
1449
1450#define funnel_refunnel_check(thread, debug)				\
1451MACRO_BEGIN													\
1452	if ((thread)->funnel_state & TH_FN_REFUNNEL) {			\
1453		kern_return_t	result = (thread)->wait_result;		\
1454															\
1455		(thread)->funnel_state = 0;							\
1456		KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE,				\
1457			(thread)->funnel_lock, (debug), 0, 0, 0);		\
1458		funnel_lock((thread)->funnel_lock);					\
1459		KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE,				\
1460			(thread)->funnel_lock, (debug), 0, 0, 0);		\
1461		(thread)->funnel_state = TH_FN_OWNED;				\
1462		(thread)->wait_result = result;						\
1463	}														\
1464MACRO_END
1465
1466static boolean_t
1467thread_invoke(
1468	register thread_t	self,
1469	register thread_t	thread,
1470	ast_t				reason)
1471{
1472	thread_continue_t	continuation = self->continuation;
1473	void				*parameter = self->parameter;
1474	processor_t			processor;
1475
1476	if (get_preemption_level() != 0)
1477		panic("thread_invoke: preemption_level %d\n",
1478				get_preemption_level());
1479
1480	assert(self == current_thread());
1481
1482	/*
1483	 * Mark thread interruptible.
1484	 */
1485	thread_lock(thread);
1486	thread->state &= ~TH_UNINT;
1487
1488#if DEBUG
1489	assert(thread_runnable(thread));
1490#endif
1491
1492	/*
1493	 * Allow time constraint threads to hang onto
1494	 * a stack.
1495	 */
1496	if ((self->sched_mode & TH_MODE_REALTIME) && !self->reserved_stack)
1497		self->reserved_stack = self->kernel_stack;
1498
1499	if (continuation != NULL) {
1500		if (!thread->kernel_stack) {
1501			/*
1502			 * If we are using a privileged stack,
1503			 * check to see whether we can exchange it with
1504			 * that of the other thread.
1505			 */
1506			if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
1507				goto need_stack;
1508
1509			/*
1510			 * Context switch by performing a stack handoff.
1511			 */
1512			continuation = thread->continuation;
1513			parameter = thread->parameter;
1514
1515			processor = current_processor();
1516			processor->active_thread = thread;
1517			processor->current_pri = thread->sched_pri;
1518			if (thread->last_processor != processor && thread->last_processor != NULL) {
1519				if (thread->last_processor->processor_set != processor->processor_set)
1520					thread->ps_switch++;
1521				thread->p_switch++;
1522			}
1523			thread->last_processor = processor;
1524			thread->c_switch++;
1525			ast_context(thread);
1526			thread_unlock(thread);
1527
1528			self->reason = reason;
1529
1530			processor->last_dispatch = mach_absolute_time();
1531			thread_timer_event(processor->last_dispatch, &thread->system_timer);
1532			PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1533
1534			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
1535										self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0);
1536
1537TLOG(1, "thread_invoke: calling machine_stack_handoff\n");
1538			machine_stack_handoff(self, thread);
1539
1540			thread_dispatch(self, thread);
1541
1542			thread->continuation = thread->parameter = NULL;
1543
1544			counter(c_thread_invoke_hits++);
1545
1546			funnel_refunnel_check(thread, 2);
1547			(void) spllo();
1548
1549			assert(continuation);
1550			call_continuation(continuation, parameter, thread->wait_result);
1551			/*NOTREACHED*/
1552		}
1553		else if (thread == self) {
1554			/* same thread but with continuation */
1555			ast_context(self);
1556			counter(++c_thread_invoke_same);
1557			thread_unlock(self);
1558
1559			self->continuation = self->parameter = NULL;
1560
1561			funnel_refunnel_check(self, 3);
1562			(void) spllo();
1563
1564			call_continuation(continuation, parameter, self->wait_result);
1565			/*NOTREACHED*/
1566		}
1567	}
1568	else {
1569		/*
1570		 * Check that the other thread has a stack
1571		 */
1572		if (!thread->kernel_stack) {
1573need_stack:
1574			if (!stack_alloc_try(thread)) {
1575				counter(c_thread_invoke_misses++);
1576				thread_unlock(thread);
1577				thread_stack_enqueue(thread);
1578				return (FALSE);
1579			}
1580		}
1581		else if (thread == self) {
1582			ast_context(self);
1583			counter(++c_thread_invoke_same);
1584			thread_unlock(self);
1585			return (TRUE);
1586		}
1587	}
1588
1589	/*
1590	 * Context switch by full context save.
1591	 */
1592	processor = current_processor();
1593	processor->active_thread = thread;
1594	processor->current_pri = thread->sched_pri;
1595	if (thread->last_processor != processor && thread->last_processor != NULL) {
1596		if (thread->last_processor->processor_set != processor->processor_set)
1597			thread->ps_switch++;
1598		thread->p_switch++;
1599	}
1600	thread->last_processor = processor;
1601	thread->c_switch++;
1602	ast_context(thread);
1603	thread_unlock(thread);
1604
1605	counter(c_thread_invoke_csw++);
1606
1607	assert(self->runq == PROCESSOR_NULL);
1608	self->reason = reason;
1609
1610	processor->last_dispatch = mach_absolute_time();
1611	thread_timer_event(processor->last_dispatch, &thread->system_timer);
1612	PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
1613
1614	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
1615							(int)self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0);
1616
1617	/*
1618	 * This is where we actually switch register context,
1619	 * and address space if required.  We will next run
1620	 * as a result of a subsequent context switch.
1621	 */
1622	thread = machine_switch_context(self, continuation, thread);
1623TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
1624
1625	/*
1626	 * We have been resumed and are set to run.
1627	 */
1628	thread_dispatch(thread, self);
1629
1630	if (continuation) {
1631		self->continuation = self->parameter = NULL;
1632
1633		funnel_refunnel_check(self, 3);
1634		(void) spllo();
1635
1636		call_continuation(continuation, parameter, self->wait_result);
1637		/*NOTREACHED*/
1638	}
1639
1640	return (TRUE);
1641}
1642
1643/*
1644 *	thread_dispatch:
1645 *
1646 *	Handle threads at context switch.  Re-dispatch other thread
1647 *	if still running, otherwise update run state and perform
1648 *	special actions.  Update quantum for other thread and begin
1649 *	the quantum for ourselves.
1650 *
1651 *	Called at splsched.
1652 */
1653void
1654thread_dispatch(
1655	thread_t		thread,
1656	thread_t		self)
1657{
1658	processor_t		processor = self->last_processor;
1659
1660	if (thread != THREAD_NULL) {
1661		/*
1662		 *	If blocked at a continuation, discard
1663		 *	the stack.
1664		 */
1665		if (thread->continuation != NULL && thread->kernel_stack != 0)
1666			stack_free(thread);
1667
1668		if (!(thread->state & TH_IDLE)) {
1669			wake_lock(thread);
1670			thread_lock(thread);
1671
1672			/*
1673			 *	Compute remainder of current quantum.
1674			 */
1675			if (	first_timeslice(processor)							&&
1676					processor->quantum_end > processor->last_dispatch		)
1677				thread->current_quantum = (processor->quantum_end - processor->last_dispatch);
1678			else
1679				thread->current_quantum = 0;
1680
1681			if (thread->sched_mode & TH_MODE_REALTIME) {
1682				/*
1683				 *	Cancel the deadline if the thread has
1684				 *	consumed the entire quantum.
1685				 */
1686				if (thread->current_quantum == 0) {
1687					thread->realtime.deadline = UINT64_MAX;
1688					thread->reason |= AST_QUANTUM;
1689				}
1690			}
1691			else {
1692				/*
1693				 *	For non-realtime threads treat a tiny
1694				 *	remaining quantum as an expired quantum
1695				 *	but include what's left next time.
1696				 */
1697				if (thread->current_quantum < min_std_quantum) {
1698					thread->reason |= AST_QUANTUM;
1699					thread->current_quantum += std_quantum;
1700				}
1701			}
1702
1703			/*
1704			 *	If we are doing a direct handoff then
1705			 *	take the remainder of the quantum.
1706			 */
1707			if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
1708				self->current_quantum = thread->current_quantum;
1709				thread->reason |= AST_QUANTUM;
1710				thread->current_quantum = 0;
1711			}
1712
1713			thread->last_switch = processor->last_dispatch;
1714
1715			thread->computation_metered += (thread->last_switch - thread->computation_epoch);
1716
1717			if (!(thread->state & TH_WAIT)) {
1718				/*
1719				 *	Still running.
1720				 */
1721				if (thread->reason & AST_QUANTUM)
1722					thread_setrun(thread, SCHED_TAILQ);
1723				else
1724				if (thread->reason & AST_PREEMPT)
1725					thread_setrun(thread, SCHED_HEADQ);
1726				else
1727					thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1728
1729				thread->reason = AST_NONE;
1730
1731				thread_unlock(thread);
1732				wake_unlock(thread);
1733			}
1734			else {
1735				/*
1736				 *	Waiting.
1737				 */
1738				thread->state &= ~TH_RUN;
1739
1740				if (thread->sched_mode & TH_MODE_TIMESHARE)
1741					sched_share_decr();
1742				sched_run_decr();
1743
1744				if (thread->wake_active) {
1745					thread->wake_active = FALSE;
1746					thread_unlock(thread);
1747
1748					thread_wakeup(&thread->wake_active);
1749				}
1750				else
1751					thread_unlock(thread);
1752
1753				wake_unlock(thread);
1754
1755				(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
1756
1757				if (thread->state & TH_TERMINATE)
1758					thread_terminate_enqueue(thread);
1759			}
1760		}
1761	}
1762
1763	if (!(self->state & TH_IDLE)) {
1764		/*
1765		 *	Get a new quantum if none remaining.
1766		 */
1767		if (self->current_quantum == 0)
1768			thread_quantum_init(self);
1769
1770		/*
1771		 *	Set up quantum timer and timeslice.
1772		 */
1773		processor->quantum_end = (processor->last_dispatch + self->current_quantum);
1774		timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end);
1775
1776		processor->timeslice = 1;
1777
1778		self->last_switch = processor->last_dispatch;
1779
1780		self->computation_epoch = self->last_switch;
1781	}
1782	else {
1783		timer_call_cancel(&processor->quantum_timer);
1784		processor->timeslice = 0;
1785	}
1786}
1787
1788/*
1789 *	thread_block_reason:
1790 *
1791 *	Forces a reschedule, blocking the caller if a wait
1792 *	has been asserted.
1793 *
1794 *	If a continuation is specified, then thread_invoke will
1795 *	attempt to discard the thread's kernel stack.  When the
1796 *	thread resumes, it will execute the continuation function
1797 *	on a new kernel stack.
1798 */
1799counter(mach_counter_t  c_thread_block_calls = 0;)
1800
1801wait_result_t
1802thread_block_reason(
1803	thread_continue_t	continuation,
1804	void				*parameter,
1805	ast_t				reason)
1806{
1807	register thread_t		self = current_thread();
1808	register processor_t	processor;
1809	register thread_t		new_thread;
1810	spl_t					s;
1811
1812	counter(++c_thread_block_calls);
1813
1814	s = splsched();
1815
1816	if (!(reason & AST_PREEMPT))
1817		funnel_release_check(self, 2);
1818
1819	processor = current_processor();
1820
1821	/* If we're explicitly yielding, force a subsequent quantum */
1822	if (reason & AST_YIELD)
1823		processor->timeslice = 0;
1824
1825	/* We're handling all scheduling AST's */
1826	ast_off(AST_SCHEDULING);
1827
1828	self->continuation = continuation;
1829	self->parameter = parameter;
1830
1831	do {
1832		thread_lock(self);
1833		new_thread = thread_select(self, processor);
1834		thread_unlock(self);
1835	} while (!thread_invoke(self, new_thread, reason));
1836
1837	funnel_refunnel_check(self, 5);
1838	splx(s);
1839
1840	return (self->wait_result);
1841}
1842
1843/*
1844 *	thread_block:
1845 *
1846 *	Block the current thread if a wait has been asserted.
1847 */
1848wait_result_t
1849thread_block(
1850	thread_continue_t	continuation)
1851{
1852	return thread_block_reason(continuation, NULL, AST_NONE);
1853}
1854
1855wait_result_t
1856thread_block_parameter(
1857	thread_continue_t	continuation,
1858	void				*parameter)
1859{
1860	return thread_block_reason(continuation, parameter, AST_NONE);
1861}
1862
1863/*
1864 *	thread_run:
1865 *
1866 *	Switch directly from the current thread to the
1867 *	new thread, handing off our quantum if appropriate.
1868 *
1869 *	New thread must be runnable, and not on a run queue.
1870 *
1871 *	Called at splsched.
1872 */
1873int
1874thread_run(
1875	thread_t			self,
1876	thread_continue_t	continuation,
1877	void				*parameter,
1878	thread_t			new_thread)
1879{
1880	ast_t		handoff = AST_HANDOFF;
1881
1882	funnel_release_check(self, 3);
1883
1884	self->continuation = continuation;
1885	self->parameter = parameter;
1886
1887	while (!thread_invoke(self, new_thread, handoff)) {
1888		processor_t		processor = current_processor();
1889
1890		thread_lock(self);
1891		new_thread = thread_select(self, processor);
1892		thread_unlock(self);
1893		handoff = AST_NONE;
1894	}
1895
1896	funnel_refunnel_check(self, 6);
1897
1898	return (self->wait_result);
1899}
1900
1901/*
1902 *	thread_continue:
1903 *
1904 *	Called at splsched when a thread first receives
1905 *	a new stack after a continuation.
1906 */
1907void
1908thread_continue(
1909	register thread_t	thread)
1910{
1911	register thread_t			self = current_thread();
1912	register thread_continue_t	continuation;
1913	register void				*parameter;
1914
1915	continuation = self->continuation;
1916	parameter = self->parameter;
1917
1918	thread_dispatch(thread, self);
1919
1920	self->continuation = self->parameter = NULL;
1921
1922	funnel_refunnel_check(self, 4);
1923
1924	if (thread != THREAD_NULL)
1925		(void)spllo();
1926
1927 TLOG(1, "thread_continue: calling call_continuation \n");
1928	call_continuation(continuation, parameter, self->wait_result);
1929	/*NOTREACHED*/
1930}
1931
1932/*
1933 *	run_queue_init:
1934 *
1935 *	Initialize a run queue before first use.
1936 */
1937void
1938run_queue_init(
1939	run_queue_t		rq)
1940{
1941	int				i;
1942
1943	rq->highq = IDLEPRI;
1944	for (i = 0; i < NRQBM; i++)
1945		rq->bitmap[i] = 0;
1946	setbit(MAXPRI - IDLEPRI, rq->bitmap);
1947	rq->urgency = rq->count = 0;
1948	for (i = 0; i < NRQS; i++)
1949		queue_init(&rq->queues[i]);
1950}
1951
1952/*
1953 *	run_queue_dequeue:
1954 *
1955 *	Perform a dequeue operation on a run queue,
1956 *	and return the resulting thread.
1957 *
1958 *	The run queue must be locked (see run_queue_remove()
1959 *	for more info), and not empty.
1960 */
1961static thread_t
1962run_queue_dequeue(
1963	run_queue_t		rq,
1964	integer_t		options)
1965{
1966	thread_t		thread;
1967	queue_t			queue = rq->queues + rq->highq;
1968
1969	if (options & SCHED_HEADQ) {
1970		thread = (thread_t)queue->next;
1971		((queue_entry_t)thread)->next->prev = queue;
1972		queue->next = ((queue_entry_t)thread)->next;
1973	}
1974	else {
1975		thread = (thread_t)queue->prev;
1976		((queue_entry_t)thread)->prev->next = queue;
1977		queue->prev = ((queue_entry_t)thread)->prev;
1978	}
1979
1980	thread->runq = PROCESSOR_NULL;
1981	rq->count--;
1982	if (testbit(rq->highq, sched_preempt_pri)) {
1983		rq->urgency--; assert(rq->urgency >= 0);
1984	}
1985	if (queue_empty(queue)) {
1986		if (rq->highq != IDLEPRI)
1987			clrbit(MAXPRI - rq->highq, rq->bitmap);
1988		rq->highq = MAXPRI - ffsbit(rq->bitmap);
1989	}
1990
1991	return (thread);
1992}
1993
1994/*
1995 *	realtime_queue_insert:
1996 *
1997 *	Enqueue a thread for realtime execution.
1998 */
1999static boolean_t
2000realtime_queue_insert(
2001	thread_t			thread)
2002{
2003	run_queue_t			rq = &rt_runq;
2004	queue_t				queue = rq->queues + thread->sched_pri;
2005	uint64_t			deadline = thread->realtime.deadline;
2006	boolean_t			preempt = FALSE;
2007
2008	simple_lock(&rt_lock);
2009
2010	if (queue_empty(queue)) {
2011		enqueue_tail(queue, (queue_entry_t)thread);
2012
2013		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
2014		if (thread->sched_pri > rq->highq)
2015			rq->highq = thread->sched_pri;
2016		preempt = TRUE;
2017	}
2018	else {
2019		register thread_t	entry = (thread_t)queue_first(queue);
2020
2021		while (TRUE) {
2022			if (	queue_end(queue, (queue_entry_t)entry)	||
2023						deadline < entry->realtime.deadline		) {
2024				entry = (thread_t)queue_prev((queue_entry_t)entry);
2025				break;
2026			}
2027
2028			entry = (thread_t)queue_next((queue_entry_t)entry);
2029		}
2030
2031		if ((queue_entry_t)entry == queue)
2032			preempt = TRUE;
2033
2034		insque((queue_entry_t)thread, (queue_entry_t)entry);
2035	}
2036
2037	thread->runq = RT_RUNQ;
2038	rq->count++; rq->urgency++;
2039
2040	simple_unlock(&rt_lock);
2041
2042	return (preempt);
2043}
2044
2045/*
2046 *	realtime_setrun:
2047 *
2048 *	Dispatch a thread for realtime execution.
2049 *
2050 *	Thread must be locked.  Associated pset must
2051 *	be locked, and is returned unlocked.
2052 */
2053static void
2054realtime_setrun(
2055	processor_t			processor,
2056	thread_t			thread)
2057{
2058	processor_set_t		pset = processor->processor_set;
2059
2060	/*
2061	 *	Dispatch directly onto idle processor.
2062	 */
2063	if (processor->state == PROCESSOR_IDLE) {
2064		remqueue(&pset->idle_queue, (queue_entry_t)processor);
2065		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2066
2067		processor->next_thread = thread;
2068		processor->deadline = thread->realtime.deadline;
2069		processor->state = PROCESSOR_DISPATCHING;
2070		pset_unlock(pset);
2071
2072		if (processor != current_processor())
2073			machine_signal_idle(processor);
2074		return;
2075	}
2076
2077	if (realtime_queue_insert(thread)) {
2078		if (processor == current_processor())
2079			ast_on(AST_PREEMPT | AST_URGENT);
2080		else
2081			cause_ast_check(processor);
2082	}
2083
2084	pset_unlock(pset);
2085}
2086
2087/*
2088 *	processor_enqueue:
2089 *
2090 *	Enqueue thread on a processor run queue.  Thread must be locked,
2091 *	and not already be on a run queue.
2092 *
2093 *	Returns TRUE if a preemption is indicated based on the state
2094 *	of the run queue.
2095 *
2096 *	The run queue must be locked (see run_queue_remove()
2097 *	for more info).
2098 */
2099static boolean_t
2100processor_enqueue(
2101	processor_t		processor,
2102	thread_t		thread,
2103	integer_t		options)
2104{
2105	run_queue_t		rq = &processor->runq;
2106	queue_t			queue = rq->queues + thread->sched_pri;
2107	boolean_t		result = FALSE;
2108
2109	if (queue_empty(queue)) {
2110		enqueue_tail(queue, (queue_entry_t)thread);
2111
2112		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
2113		if (thread->sched_pri > rq->highq) {
2114			rq->highq = thread->sched_pri;
2115			result = TRUE;
2116		}
2117	}
2118	else
2119	if (options & SCHED_TAILQ)
2120		enqueue_tail(queue, (queue_entry_t)thread);
2121	else
2122		enqueue_head(queue, (queue_entry_t)thread);
2123
2124	thread->runq = processor;
2125	if (testbit(thread->sched_pri, sched_preempt_pri))
2126		rq->urgency++;
2127	rq->count++;
2128
2129	return (result);
2130}
2131
2132/*
2133 *	processor_setrun:
2134 *
2135 *	Dispatch a thread for execution on a
2136 *	processor.
2137 *
2138 *	Thread must be locked.  Associated pset must
2139 *	be locked, and is returned unlocked.
2140 */
2141static void
2142processor_setrun(
2143	processor_t			processor,
2144	thread_t			thread,
2145	integer_t			options)
2146{
2147	processor_set_t		pset = processor->processor_set;
2148	ast_t				preempt;
2149
2150	/*
2151	 *	Dispatch directly onto idle processor.
2152	 */
2153	if (processor->state == PROCESSOR_IDLE) {
2154		remqueue(&pset->idle_queue, (queue_entry_t)processor);
2155		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2156
2157		processor->next_thread = thread;
2158		processor->deadline = UINT64_MAX;
2159		processor->state = PROCESSOR_DISPATCHING;
2160		pset_unlock(pset);
2161
2162		if (processor != current_processor())
2163			machine_signal_idle(processor);
2164		return;
2165	}
2166
2167	/*
2168	 *	Set preemption mode.
2169	 */
2170	if (testbit(thread->sched_pri, sched_preempt_pri))
2171		preempt = (AST_PREEMPT | AST_URGENT);
2172	else
2173	if (thread->sched_mode & TH_MODE_TIMESHARE && thread->sched_pri < thread->priority)
2174		preempt = AST_NONE;
2175	else
2176		preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
2177
2178	if (!processor_enqueue(processor, thread, options))
2179		preempt = AST_NONE;
2180
2181	if (preempt != AST_NONE) {
2182		if (processor == current_processor()) {
2183			if (csw_check(processor) != AST_NONE)
2184				ast_on(preempt);
2185		}
2186		else
2187		if (	(processor->state == PROCESSOR_RUNNING		||
2188				 processor->state == PROCESSOR_SHUTDOWN)		&&
2189				thread->sched_pri >= processor->current_pri		) {
2190			cause_ast_check(processor);
2191		}
2192	}
2193	else
2194	if (	processor->state == PROCESSOR_SHUTDOWN		&&
2195			thread->sched_pri >= processor->current_pri	) {
2196		cause_ast_check(processor);
2197	}
2198
2199	pset_unlock(pset);
2200}
2201
2202#define next_pset(p)	(((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
2203
2204/*
2205 *	choose_next_pset:
2206 *
2207 *	Return the next sibling pset containing
2208 *	available processors.
2209 *
2210 *	Returns the original pset if none other is
2211 *	suitable.
2212 */
2213static processor_set_t
2214choose_next_pset(
2215	processor_set_t		pset)
2216{
2217	processor_set_t		nset = pset;
2218
2219	do {
2220		nset = next_pset(nset);
2221	} while (nset->processor_count < 1 && nset != pset);
2222
2223	return (nset);
2224}
2225
2226/*
2227 *	choose_processor:
2228 *
2229 *	Choose a processor for the thread, beginning at
2230 *	the pset.
2231 *
2232 *	Returns a processor, possibly from a different pset.
2233 *
2234 *	The thread must be locked.  The pset must be locked,
2235 *	and the resulting pset is locked on return.
2236 */
2237static processor_t
2238choose_processor(
2239	processor_set_t		pset,
2240	thread_t			thread)
2241{
2242	processor_set_t		nset, cset = pset;
2243	processor_t			processor = thread->last_processor;
2244
2245	/*
2246	 *	Prefer the last processor, when appropriate.
2247	 */
2248	if (processor != PROCESSOR_NULL) {
2249		if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
2250				processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
2251			processor = PROCESSOR_NULL;
2252		else
2253		if (processor->state == PROCESSOR_IDLE || ( thread->sched_pri > BASEPRI_DEFAULT && processor->current_pri < thread->sched_pri))
2254			return (processor);
2255	}
2256
2257	/*
2258	 *	Iterate through the processor sets to locate
2259	 *	an appropriate processor.
2260	 */
2261	do {
2262		/*
2263		 *	Choose an idle processor.
2264		 */
2265		if (!queue_empty(&cset->idle_queue))
2266			return ((processor_t)queue_first(&cset->idle_queue));
2267
2268		if (thread->sched_pri >= BASEPRI_RTQUEUES) {
2269			/*
2270			 *	For an RT thread, iterate through active processors, first fit.
2271			 */
2272			processor = (processor_t)queue_first(&cset->active_queue);
2273			while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
2274				if (thread->sched_pri > processor->current_pri ||
2275						thread->realtime.deadline < processor->deadline)
2276					return (processor);
2277
2278				processor = (processor_t)queue_next((queue_entry_t)processor);
2279			}
2280
2281			processor = PROCESSOR_NULL;
2282		}
2283		else {
2284			/*
2285			 *	Check any hinted processors in the processor set if available.
2286			 */
2287			if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
2288					cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
2289						(processor == PROCESSOR_NULL ||
2290							(thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
2291				processor = cset->low_pri;
2292			}
2293			else
2294			if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
2295					cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
2296						(processor == PROCESSOR_NULL ||
2297						 ( thread->sched_pri <= BASEPRI_DEFAULT && cset->low_count->runq.count < processor->runq.count))) {
2298				processor = cset->low_count;
2299			}
2300
2301			/*
2302			 *	Otherwise, choose an available processor in the set.
2303			 */
2304			if (processor == PROCESSOR_NULL) {
2305				processor = (processor_t)dequeue_head(&cset->active_queue);
2306				if (processor != PROCESSOR_NULL)
2307					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
2308			}
2309		}
2310
2311		/*
2312		 *	Move onto the next processor set.
2313		 */
2314		nset = next_pset(cset);
2315
2316		if (nset != pset) {
2317			pset_unlock(cset);
2318
2319			cset = nset;
2320			pset_lock(cset);
2321		}
2322	} while (nset != pset);
2323
2324	/*
2325	 *	Make sure that we pick a running processor,
2326	 *	and that the correct processor set is locked.
2327	 */
2328	do {
2329		/*
2330		 *	If we haven't been able to choose a processor,
2331		 *	pick the boot processor and return it.
2332		 */
2333		if (processor == PROCESSOR_NULL) {
2334			processor = master_processor;
2335
2336			/*
2337			 *	Check that the correct processor set is
2338			 *	returned locked.
2339			 */
2340			if (cset != processor->processor_set) {
2341				pset_unlock(cset);
2342
2343				cset = processor->processor_set;
2344				pset_lock(cset);
2345			}
2346
2347			return (processor);
2348		}
2349
2350		/*
2351		 *	Check that the processor set for the chosen
2352		 *	processor is locked.
2353		 */
2354		if (cset != processor->processor_set) {
2355			pset_unlock(cset);
2356
2357			cset = processor->processor_set;
2358			pset_lock(cset);
2359		}
2360
2361		/*
2362		 *	We must verify that the chosen processor is still available.
2363		 */
2364		if (processor->state == PROCESSOR_INACTIVE ||
2365					processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
2366			processor = PROCESSOR_NULL;
2367	} while (processor == PROCESSOR_NULL);
2368
2369	return (processor);
2370}
2371
2372/*
2373 *	thread_setrun:
2374 *
2375 *	Dispatch thread for execution, onto an idle
2376 *	processor or run queue, and signal a preemption
2377 *	as appropriate.
2378 *
2379 *	Thread must be locked.
2380 */
2381void
2382thread_setrun(
2383	thread_t			thread,
2384	integer_t			options)
2385{
2386	processor_t			processor;
2387	processor_set_t		pset;
2388
2389#if DEBUG
2390	assert(thread_runnable(thread));
2391#endif
2392
2393	/*
2394	 *	Update priority if needed.
2395	 */
2396	if (thread->sched_stamp != sched_tick)
2397		update_priority(thread);
2398
2399	assert(thread->runq == PROCESSOR_NULL);
2400
2401	if (thread->bound_processor == PROCESSOR_NULL) {
2402		/*
2403		 *	Unbound case.
2404		 */
2405		if (thread->affinity_set != AFFINITY_SET_NULL) {
2406			/*
2407			 * Use affinity set policy hint.
2408			 */
2409			pset = thread->affinity_set->aset_pset;
2410			pset_lock(pset);
2411
2412			processor = choose_processor(pset, thread);
2413		}
2414		else
2415		if (thread->last_processor != PROCESSOR_NULL) {
2416			/*
2417			 *	Simple (last processor) affinity case.
2418			 */
2419			processor = thread->last_processor;
2420			pset = processor->processor_set;
2421			pset_lock(pset);
2422
2423			/*
2424			 *	Choose a different processor in certain cases.
2425			 */
2426			if (thread->sched_pri >= BASEPRI_RTQUEUES) {
2427				/*
2428				 *	If the processor is executing an RT thread with
2429				 *	an earlier deadline, choose another.
2430				 */
2431				if (thread->sched_pri <= processor->current_pri ||
2432						thread->realtime.deadline >= processor->deadline)
2433					processor = choose_processor(pset, thread);
2434			}
2435			else
2436				processor = choose_processor(pset, thread);
2437		}
2438		else {
2439			/*
2440			 *	No Affinity case:
2441			 *
2442			 *	Utilitize a per task hint to spread threads
2443			 *	among the available processor sets.
2444			 */
2445			task_t		task = thread->task;
2446
2447			pset = task->pset_hint;
2448			if (pset == PROCESSOR_SET_NULL)
2449				pset = current_processor()->processor_set;
2450
2451			pset = choose_next_pset(pset);
2452			pset_lock(pset);
2453
2454			processor = choose_processor(pset, thread);
2455			task->pset_hint = processor->processor_set;
2456		}
2457	}
2458	else {
2459		/*
2460		 *	Bound case:
2461		 *
2462		 *	Unconditionally dispatch on the processor.
2463		 */
2464		processor = thread->bound_processor;
2465		pset = processor->processor_set;
2466		pset_lock(pset);
2467	}
2468
2469	/*
2470	 *	Dispatch the thread on the choosen processor.
2471	 */
2472	if (thread->sched_pri >= BASEPRI_RTQUEUES)
2473		realtime_setrun(processor, thread);
2474	else
2475		processor_setrun(processor, thread, options);
2476}
2477
2478/*
2479 *	processor_queue_shutdown:
2480 *
2481 *	Shutdown a processor run queue by
2482 *	re-dispatching non-bound threads.
2483 *
2484 *	Associated pset must be locked, and is
2485 *	returned unlocked.
2486 */
2487void
2488processor_queue_shutdown(
2489	processor_t			processor)
2490{
2491	processor_set_t		pset = processor->processor_set;
2492	run_queue_t			rq = &processor->runq;
2493	queue_t				queue = rq->queues + rq->highq;
2494	int					pri = rq->highq, count = rq->count;
2495	thread_t			next, thread;
2496	queue_head_t		tqueue;
2497
2498	queue_init(&tqueue);
2499
2500	while (count > 0) {
2501		thread = (thread_t)queue_first(queue);
2502		while (!queue_end(queue, (queue_entry_t)thread)) {
2503			next = (thread_t)queue_next((queue_entry_t)thread);
2504
2505			if (thread->bound_processor != processor) {
2506				remqueue(queue, (queue_entry_t)thread);
2507
2508				thread->runq = PROCESSOR_NULL;
2509				rq->count--;
2510				if (testbit(pri, sched_preempt_pri)) {
2511					rq->urgency--; assert(rq->urgency >= 0);
2512				}
2513				if (queue_empty(queue)) {
2514					if (pri != IDLEPRI)
2515						clrbit(MAXPRI - pri, rq->bitmap);
2516					rq->highq = MAXPRI - ffsbit(rq->bitmap);
2517				}
2518
2519				enqueue_tail(&tqueue, (queue_entry_t)thread);
2520			}
2521			count--;
2522
2523			thread = next;
2524		}
2525
2526		queue--; pri--;
2527	}
2528
2529	pset_unlock(pset);
2530
2531	while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
2532		thread_lock(thread);
2533
2534		thread_setrun(thread, SCHED_TAILQ);
2535
2536		thread_unlock(thread);
2537	}
2538}
2539
2540/*
2541 *	Check for a preemption point in
2542 *	the current context.
2543 *
2544 *	Called at splsched.
2545 */
2546ast_t
2547csw_check(
2548	processor_t		processor)
2549{
2550	ast_t			result = AST_NONE;
2551	run_queue_t		runq;
2552
2553	if (first_timeslice(processor)) {
2554		runq = &rt_runq;
2555		if (runq->highq >= BASEPRI_RTQUEUES)
2556			return (AST_PREEMPT | AST_URGENT);
2557
2558		if (runq->highq > processor->current_pri) {
2559			if (runq->urgency > 0)
2560				return (AST_PREEMPT | AST_URGENT);
2561
2562			result |= AST_PREEMPT;
2563		}
2564
2565		runq = &processor->runq;
2566		if (runq->highq > processor->current_pri) {
2567			if (runq->urgency > 0)
2568				return (AST_PREEMPT | AST_URGENT);
2569
2570			result |= AST_PREEMPT;
2571		}
2572	}
2573	else {
2574		runq = &rt_runq;
2575		if (runq->highq >= processor->current_pri) {
2576			if (runq->urgency > 0)
2577				return (AST_PREEMPT | AST_URGENT);
2578
2579			result |= AST_PREEMPT;
2580		}
2581
2582		runq = &processor->runq;
2583		if (runq->highq >= processor->current_pri) {
2584			if (runq->urgency > 0)
2585				return (AST_PREEMPT | AST_URGENT);
2586
2587			result |= AST_PREEMPT;
2588		}
2589	}
2590
2591	if (result != AST_NONE)
2592		return (result);
2593
2594	if (machine_cpu_is_inactive(processor->cpu_num))
2595		return (AST_PREEMPT);
2596
2597	if (processor->active_thread->state & TH_SUSP)
2598		return (AST_PREEMPT);
2599
2600	return (AST_NONE);
2601}
2602
2603/*
2604 *	set_sched_pri:
2605 *
2606 *	Set the scheduled priority of the specified thread.
2607 *
2608 *	This may cause the thread to change queues.
2609 *
2610 *	Thread must be locked.
2611 */
2612void
2613set_sched_pri(
2614	thread_t		thread,
2615	int				priority)
2616{
2617	boolean_t		removed = run_queue_remove(thread);
2618
2619	thread->sched_pri = priority;
2620	if (removed)
2621		thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
2622	else
2623	if (thread->state & TH_RUN) {
2624		processor_t		processor = thread->last_processor;
2625
2626		if (thread == current_thread()) {
2627			ast_t			preempt;
2628
2629			processor->current_pri = priority;
2630			if ((preempt = csw_check(processor)) != AST_NONE)
2631				ast_on(preempt);
2632		}
2633		else
2634		if (	processor != PROCESSOR_NULL						&&
2635				processor->active_thread == thread	)
2636			cause_ast_check(processor);
2637	}
2638}
2639
2640#if		0
2641
2642static void
2643run_queue_check(
2644	run_queue_t		rq,
2645	thread_t		thread)
2646{
2647	queue_t			q;
2648	queue_entry_t	qe;
2649
2650	if (rq != thread->runq)
2651		panic("run_queue_check: thread runq");
2652
2653	if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
2654		panic("run_queue_check: thread sched_pri");
2655
2656	q = &rq->queues[thread->sched_pri];
2657	qe = queue_first(q);
2658	while (!queue_end(q, qe)) {
2659		if (qe == (queue_entry_t)thread)
2660			return;
2661
2662		qe = queue_next(qe);
2663	}
2664
2665	panic("run_queue_check: end");
2666}
2667
2668#endif	/* DEBUG */
2669
2670/*
2671 *	run_queue_remove:
2672 *
2673 *	Remove a thread from a current run queue and
2674 *	return TRUE if successful.
2675 *
2676 *	Thread must be locked.
2677 */
2678boolean_t
2679run_queue_remove(
2680	thread_t		thread)
2681{
2682	processor_t		processor = thread->runq;
2683
2684	/*
2685	 *	If processor is PROCESSOR_NULL, the thread will stay out of the
2686	 *	run queues because the caller locked the thread.  Otherwise
2687	 *	the thread is on a run queue, but could be chosen for dispatch
2688	 *	and removed.
2689	 */
2690	if (processor != PROCESSOR_NULL) {
2691		void *			rqlock;
2692		run_queue_t		rq;
2693
2694		/*
2695		 *	The processor run queues are locked by the
2696		 *	processor set.  Real-time priorities use a
2697		 *	global queue with a dedicated lock.
2698		 */
2699		if (thread->sched_pri < BASEPRI_RTQUEUES) {
2700			rqlock = &processor->processor_set->sched_lock;
2701			rq = &processor->runq;
2702		}
2703		else {
2704			rqlock = &rt_lock; rq = &rt_runq;
2705		}
2706
2707		simple_lock(rqlock);
2708
2709		if (processor == thread->runq) {
2710			/*
2711			 *	Thread is on a run queue and we have a lock on
2712			 *	that run queue.
2713			 */
2714			remqueue(&rq->queues[0], (queue_entry_t)thread);
2715			rq->count--;
2716			if (testbit(thread->sched_pri, sched_preempt_pri)) {
2717				rq->urgency--; assert(rq->urgency >= 0);
2718			}
2719
2720			if (queue_empty(rq->queues + thread->sched_pri)) {
2721				/* update run queue status */
2722				if (thread->sched_pri != IDLEPRI)
2723					clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
2724				rq->highq = MAXPRI - ffsbit(rq->bitmap);
2725			}
2726
2727			thread->runq = PROCESSOR_NULL;
2728		}
2729		else {
2730			/*
2731			 *	The thread left the run queue before we could
2732			 * 	lock the run queue.
2733			 */
2734			assert(thread->runq == PROCESSOR_NULL);
2735			processor = PROCESSOR_NULL;
2736		}
2737
2738		simple_unlock(rqlock);
2739	}
2740
2741	return (processor != PROCESSOR_NULL);
2742}
2743
2744/*
2745 *	steal_processor_thread:
2746 *
2747 *	Locate a thread to steal from the processor and
2748 *	return it.
2749 *
2750 *	Associated pset must be locked.  Returns THREAD_NULL
2751 *	on failure.
2752 */
2753static thread_t
2754steal_processor_thread(
2755	processor_t		processor)
2756{
2757	run_queue_t		rq = &processor->runq;
2758	queue_t			queue = rq->queues + rq->highq;
2759	int				pri = rq->highq, count = rq->count;
2760	thread_t		thread;
2761
2762	while (count > 0) {
2763		thread = (thread_t)queue_first(queue);
2764		while (!queue_end(queue, (queue_entry_t)thread)) {
2765			if (thread->bound_processor != processor) {
2766				remqueue(queue, (queue_entry_t)thread);
2767
2768				thread->runq = PROCESSOR_NULL;
2769				rq->count--;
2770				if (testbit(pri, sched_preempt_pri)) {
2771					rq->urgency--; assert(rq->urgency >= 0);
2772				}
2773				if (queue_empty(queue)) {
2774					if (pri != IDLEPRI)
2775						clrbit(MAXPRI - pri, rq->bitmap);
2776					rq->highq = MAXPRI - ffsbit(rq->bitmap);
2777				}
2778
2779				return (thread);
2780			}
2781			count--;
2782
2783			thread = (thread_t)queue_next((queue_entry_t)thread);
2784		}
2785
2786		queue--; pri--;
2787	}
2788
2789	return (THREAD_NULL);
2790}
2791
2792/*
2793 *	Locate and steal a thread, beginning
2794 *	at the pset.
2795 *
2796 *	The pset must be locked, and is returned
2797 *	unlocked.
2798 *
2799 *	Returns the stolen thread, or THREAD_NULL on
2800 *	failure.
2801 */
2802static thread_t
2803steal_thread(
2804	processor_set_t		pset)
2805{
2806	processor_set_t		nset, cset = pset;
2807	processor_t			processor;
2808	thread_t			thread;
2809
2810	do {
2811		processor = (processor_t)queue_first(&cset->active_queue);
2812		while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
2813			if (processor->runq.count > 0) {
2814				thread = steal_processor_thread(processor);
2815				if (thread != THREAD_NULL) {
2816					remqueue(&cset->active_queue, (queue_entry_t)processor);
2817					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
2818
2819					pset_unlock(cset);
2820
2821					return (thread);
2822				}
2823			}
2824
2825			processor = (processor_t)queue_next((queue_entry_t)processor);
2826		}
2827
2828		nset = next_pset(cset);
2829
2830		if (nset != pset) {
2831			pset_unlock(cset);
2832
2833			cset = nset;
2834			pset_lock(cset);
2835		}
2836	} while (nset != pset);
2837
2838	pset_unlock(cset);
2839
2840	return (THREAD_NULL);
2841}
2842
2843/*
2844 *	This is the processor idle loop, which just looks for other threads
2845 *	to execute.  Processor idle threads invoke this without supplying a
2846 *	current thread to idle without an asserted wait state.
2847 *
2848 *	Returns a the next thread to execute if dispatched directly.
2849 */
2850static thread_t
2851processor_idle(
2852	thread_t			thread,
2853	processor_t			processor)
2854{
2855	processor_set_t		pset = processor->processor_set;
2856	thread_t			new_thread;
2857	int					state;
2858
2859	(void)splsched();
2860
2861#ifdef __ppc__
2862	pmsDown();					/* Step power down */
2863#endif
2864
2865	KERNEL_DEBUG_CONSTANT(
2866		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (int)thread, 0, 0, 0, 0);
2867
2868	timer_switch(&PROCESSOR_DATA(processor, system_state),
2869									mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
2870	PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
2871
2872	while (processor->next_thread == THREAD_NULL && processor->runq.count == 0 && rt_runq.count == 0 &&
2873				(thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) {
2874		machine_idle();
2875
2876		(void)splsched();
2877
2878		if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_num))
2879			break;
2880	}
2881
2882	timer_switch(&PROCESSOR_DATA(processor, idle_state),
2883									mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
2884	PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
2885
2886	pset_lock(pset);
2887
2888#ifdef __ppc__
2889	pmsStep(0);					/* Step up out of idle power */
2890#endif
2891
2892	state = processor->state;
2893	if (state == PROCESSOR_DISPATCHING) {
2894		/*
2895		 *	Commmon case -- cpu dispatched.
2896		 */
2897		new_thread = processor->next_thread;
2898		processor->next_thread = THREAD_NULL;
2899		processor->state = PROCESSOR_RUNNING;
2900
2901		if (	processor->runq.highq > new_thread->sched_pri					||
2902				(rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri)	) {
2903			processor->deadline = UINT64_MAX;
2904
2905			pset_unlock(pset);
2906
2907			thread_lock(new_thread);
2908			thread_setrun(new_thread, SCHED_HEADQ);
2909			thread_unlock(new_thread);
2910
2911			KERNEL_DEBUG_CONSTANT(
2912				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
2913
2914			return (THREAD_NULL);
2915		}
2916
2917		pset_unlock(pset);
2918
2919		KERNEL_DEBUG_CONSTANT(
2920			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, (int)new_thread, 0, 0);
2921
2922		return (new_thread);
2923	}
2924	else
2925	if (state == PROCESSOR_IDLE) {
2926		remqueue(&pset->idle_queue, (queue_entry_t)processor);
2927
2928		processor->state = PROCESSOR_RUNNING;
2929		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2930	}
2931	else
2932	if (state == PROCESSOR_INACTIVE) {
2933		processor->state = PROCESSOR_RUNNING;
2934		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
2935	}
2936	else
2937	if (state == PROCESSOR_SHUTDOWN) {
2938		/*
2939		 *	Going off-line.  Force a
2940		 *	reschedule.
2941		 */
2942		if ((new_thread = processor->next_thread) != THREAD_NULL) {
2943			processor->next_thread = THREAD_NULL;
2944			processor->deadline = UINT64_MAX;
2945
2946			pset_unlock(pset);
2947
2948			thread_lock(new_thread);
2949			thread_setrun(new_thread, SCHED_HEADQ);
2950			thread_unlock(new_thread);
2951
2952			KERNEL_DEBUG_CONSTANT(
2953				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
2954
2955			return (THREAD_NULL);
2956		}
2957	}
2958
2959	pset_unlock(pset);
2960
2961	KERNEL_DEBUG_CONSTANT(
2962		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0);
2963
2964	return (THREAD_NULL);
2965}
2966
2967/*
2968 *	Each processor has a dedicated thread which
2969 *	executes the idle loop when there is no suitable
2970 *	previous context.
2971 */
2972void
2973idle_thread(void)
2974{
2975	processor_t		processor = current_processor();
2976	thread_t		new_thread;
2977
2978	new_thread = processor_idle(THREAD_NULL, processor);
2979	if (new_thread != THREAD_NULL) {
2980		thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
2981		/*NOTREACHED*/
2982	}
2983
2984	thread_block((thread_continue_t)idle_thread);
2985	/*NOTREACHED*/
2986}
2987
2988kern_return_t
2989idle_thread_create(
2990	processor_t		processor)
2991{
2992	kern_return_t	result;
2993	thread_t		thread;
2994	spl_t			s;
2995
2996	result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
2997	if (result != KERN_SUCCESS)
2998		return (result);
2999
3000	s = splsched();
3001	thread_lock(thread);
3002	thread->bound_processor = processor;
3003	processor->idle_thread = thread;
3004	thread->sched_pri = thread->priority = IDLEPRI;
3005	thread->state = (TH_RUN | TH_IDLE);
3006	thread_unlock(thread);
3007	splx(s);
3008
3009	thread_deallocate(thread);
3010
3011	return (KERN_SUCCESS);
3012}
3013
3014static uint64_t		sched_tick_deadline;
3015
3016/*
3017 * sched_startup:
3018 *
3019 * Kicks off scheduler services.
3020 *
3021 * Called at splsched.
3022 */
3023void
3024sched_startup(void)
3025{
3026	kern_return_t	result;
3027	thread_t		thread;
3028
3029	result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
3030	if (result != KERN_SUCCESS)
3031		panic("sched_startup");
3032
3033	thread_deallocate(thread);
3034
3035	/*
3036	 * Yield to the sched_tick_thread while it times
3037	 * a series of context switches back.  It stores
3038	 * the baseline value in sched_cswtime.
3039	 *
3040	 * The current thread is the only other thread
3041	 * active at this point.
3042	 */
3043	while (sched_cswtime == 0)
3044		thread_block(THREAD_CONTINUE_NULL);
3045
3046	thread_daemon_init();
3047
3048	thread_call_initialize();
3049}
3050
3051/*
3052 *	sched_tick_thread:
3053 *
3054 *	Perform periodic bookkeeping functions about ten
3055 *	times per second.
3056 */
3057static void
3058sched_tick_continue(void)
3059{
3060	uint64_t			abstime = mach_absolute_time();
3061
3062	sched_tick++;
3063
3064	/*
3065	 *  Compute various averages.
3066	 */
3067	compute_averages();
3068
3069	/*
3070	 *  Scan the run queues for threads which
3071	 *  may need to be updated.
3072	 */
3073	thread_update_scan();
3074
3075	clock_deadline_for_periodic_event(sched_tick_interval, abstime,
3076														&sched_tick_deadline);
3077
3078	assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
3079	thread_block((thread_continue_t)sched_tick_continue);
3080	/*NOTREACHED*/
3081}
3082
3083/*
3084 * Time a series of context switches to determine
3085 * a baseline.  Toss the high and low and return
3086 * the one-way value.
3087 */
3088static uint32_t
3089time_cswitch(void)
3090{
3091	uint32_t	new, hi, low, accum;
3092	uint64_t	abstime;
3093	int			i, tries = 7;
3094
3095	accum = hi = low = 0;
3096	for (i = 0; i < tries; ++i) {
3097		abstime = mach_absolute_time();
3098		thread_block(THREAD_CONTINUE_NULL);
3099
3100		new = mach_absolute_time() - abstime;
3101
3102		if (i == 0)
3103			accum = hi = low = new;
3104		else {
3105			if (new < low)
3106				low = new;
3107			else
3108			if (new > hi)
3109				hi = new;
3110			accum += new;
3111		}
3112	}
3113
3114	return ((accum - hi - low) / (2 * (tries - 2)));
3115}
3116
3117void
3118sched_tick_thread(void)
3119{
3120	sched_cswtime = time_cswitch();
3121
3122	sched_tick_deadline = mach_absolute_time();
3123
3124	sched_tick_continue();
3125	/*NOTREACHED*/
3126}
3127
3128/*
3129 *	thread_update_scan / runq_scan:
3130 *
3131 *	Scan the run queues to account for timesharing threads
3132 *	which need to be updated.
3133 *
3134 *	Scanner runs in two passes.  Pass one squirrels likely
3135 *	threads away in an array, pass two does the update.
3136 *
3137 *	This is necessary because the run queue is locked for
3138 *	the candidate scan, but	the thread is locked for the update.
3139 *
3140 *	Array should be sized to make forward progress, without
3141 *	disabling preemption for long periods.
3142 */
3143
3144#define	THREAD_UPDATE_SIZE		128
3145
3146static thread_t		thread_update_array[THREAD_UPDATE_SIZE];
3147static int			thread_update_count = 0;
3148
3149/*
3150 *	Scan a runq for candidate threads.
3151 *
3152 *	Returns TRUE if retry is needed.
3153 */
3154static boolean_t
3155runq_scan(
3156	run_queue_t				runq)
3157{
3158	register int			count;
3159	register queue_t		q;
3160	register thread_t		thread;
3161
3162	if ((count = runq->count) > 0) {
3163	    q = runq->queues + runq->highq;
3164		while (count > 0) {
3165			queue_iterate(q, thread, thread_t, links) {
3166				if (		thread->sched_stamp != sched_tick		&&
3167						(thread->sched_mode & TH_MODE_TIMESHARE)	) {
3168					if (thread_update_count == THREAD_UPDATE_SIZE)
3169						return (TRUE);
3170
3171					thread_update_array[thread_update_count++] = thread;
3172					thread_reference_internal(thread);
3173				}
3174
3175				count--;
3176			}
3177
3178			q--;
3179		}
3180	}
3181
3182	return (FALSE);
3183}
3184
3185static void
3186thread_update_scan(void)
3187{
3188	boolean_t			restart_needed = FALSE;
3189	processor_t			processor = processor_list;
3190	processor_set_t		pset;
3191	thread_t			thread;
3192	spl_t				s;
3193
3194	do {
3195		do {
3196			pset = processor->processor_set;
3197
3198			s = splsched();
3199			pset_lock(pset);
3200
3201			restart_needed = runq_scan(&processor->runq);
3202
3203			pset_unlock(pset);
3204			splx(s);
3205
3206			if (restart_needed)
3207				break;
3208
3209			thread = processor->idle_thread;
3210			if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
3211				if (thread_update_count == THREAD_UPDATE_SIZE) {
3212					restart_needed = TRUE;
3213					break;
3214				}
3215
3216				thread_update_array[thread_update_count++] = thread;
3217				thread_reference_internal(thread);
3218			}
3219		} while ((processor = processor->processor_list) != NULL);
3220
3221	    /*
3222	     *	Ok, we now have a collection of candidates -- fix them.
3223	     */
3224	    while (thread_update_count > 0) {
3225			thread = thread_update_array[--thread_update_count];
3226			thread_update_array[thread_update_count] = THREAD_NULL;
3227
3228			s = splsched();
3229			thread_lock(thread);
3230			if (	!(thread->state & (TH_WAIT|TH_SUSP))	&&
3231						thread->sched_stamp != sched_tick	)
3232				update_priority(thread);
3233			thread_unlock(thread);
3234			splx(s);
3235
3236			thread_deallocate(thread);
3237	    }
3238	} while (restart_needed);
3239}
3240
3241/*
3242 *	Just in case someone doesn't use the macro
3243 */
3244#undef	thread_wakeup
3245void
3246thread_wakeup(
3247	event_t		x);
3248
3249void
3250thread_wakeup(
3251	event_t		x)
3252{
3253	thread_wakeup_with_result(x, THREAD_AWAKENED);
3254}
3255
3256boolean_t
3257preemption_enabled(void)
3258{
3259	return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
3260}
3261
3262#if	DEBUG
3263static boolean_t
3264thread_runnable(
3265	thread_t	thread)
3266{
3267	return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
3268}
3269#endif	/* DEBUG */
3270
3271#if	MACH_KDB
3272#include <ddb/db_output.h>
3273#define	printf		kdbprintf
3274void			db_sched(void);
3275
3276void
3277db_sched(void)
3278{
3279	iprintf("Scheduling Statistics:\n");
3280	db_indent += 2;
3281	iprintf("Thread invocations:  csw %d same %d\n",
3282		c_thread_invoke_csw, c_thread_invoke_same);
3283#if	MACH_COUNTERS
3284	iprintf("Thread block:  calls %d\n",
3285		c_thread_block_calls);
3286	iprintf("Idle thread:\n\thandoff %d block %d\n",
3287		c_idle_thread_handoff,
3288		c_idle_thread_block);
3289	iprintf("Sched thread blocks:  %d\n", c_sched_thread_block);
3290#endif	/* MACH_COUNTERS */
3291	db_indent -= 2;
3292}
3293
3294#include <ddb/db_output.h>
3295void		db_show_thread_log(void);
3296
3297void
3298db_show_thread_log(void)
3299{
3300}
3301#endif	/* MACH_KDB */
3302