kern_switch.c revision 108338
150027Speter/*
272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
372376Sjake * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter *
2650027Speter * $FreeBSD: head/sys/kern/kern_switch.c 108338 2002-12-28 01:23:07Z julian $
2750027Speter */
2850027Speter
2999072Sjulian/***
3099072Sjulian
3199072SjulianHere is the logic..
3299072Sjulian
3399072SjulianIf there are N processors, then there are at most N KSEs (kernel
3499072Sjulianschedulable entities) working to process threads that belong to a
3599072SjulianKSEGOUP (kg). If there are X of these KSEs actually running at the
3699072Sjulianmoment in question, then there are at most M (N-X) of these KSEs on
3799072Sjulianthe run queue, as running KSEs are not on the queue.
3899072Sjulian
3999072SjulianRunnable threads are queued off the KSEGROUP in priority order.
4099072SjulianIf there are M or more threads runnable, the top M threads
4199072Sjulian(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
4299072Sjuliantheir priority from those threads and are put on the run queue.
4399072Sjulian
4499072SjulianThe last thread that had a priority high enough to have a KSE associated
4599072Sjulianwith it, AND IS ON THE RUN QUEUE is pointed to by
4699072Sjuliankg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
4799072Sjulianassigned as all the available KSEs are activly running, or because there
4899072Sjulianare no threads queued, that pointer is NULL.
4999072Sjulian
5099072SjulianWhen a KSE is removed from the run queue to become runnable, we know
5199072Sjulianit was associated with the highest priority thread in the queue (at the head
5299072Sjulianof the queue). If it is also the last assigned we know M was 1 and must
5399072Sjuliannow be 0. Since the thread is no longer queued that pointer must be
5499072Sjulianremoved from it. Since we know there were no more KSEs available,
5599072Sjulian(M was 1 and is now 0) and since we are not FREEING our KSE
5699072Sjulianbut using it, we know there are STILL no more KSEs available, we can prove
5799072Sjulianthat the next thread in the ksegrp list will not have a KSE to assign to
5899072Sjulianit, so we can show that the pointer must be made 'invalid' (NULL).
5999072Sjulian
6099072SjulianThe pointer exists so that when a new thread is made runnable, it can
6199072Sjulianhave its priority compared with the last assigned thread to see if
6299072Sjulianit should 'steal' its KSE or not.. i.e. is it 'earlier'
6399072Sjulianon the list than that thread or later.. If it's earlier, then the KSE is
6499072Sjulianremoved from the last assigned (which is now not assigned a KSE)
6599072Sjulianand reassigned to the new thread, which is placed earlier in the list.
6699072SjulianThe pointer is then backed up to the previous thread (which may or may not
6799072Sjulianbe the new thread).
6899072Sjulian
6999072SjulianWhen a thread sleeps or is removed, the KSE becomes available and if there
7099072Sjulianare queued threads that are not assigned KSEs, the highest priority one of
7199072Sjulianthem is assigned the KSE, which is then placed back on the run queue at
7299072Sjulianthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
7399072Sjulianto point to it.
7499072Sjulian
7599072SjulianThe following diagram shows 2 KSEs and 3 threads from a single process.
7699072Sjulian
7799072Sjulian RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
7899072Sjulian              \    \____
7999072Sjulian               \        \
8099072Sjulian    KSEGROUP---thread--thread--thread    (queued in priority order)
8199072Sjulian        \                 /
8299072Sjulian         \_______________/
8399072Sjulian          (last_assigned)
8499072Sjulian
8599072SjulianThe result of this scheme is that the M available KSEs are always
8699072Sjulianqueued at the priorities they have inherrited from the M highest priority
8799072Sjulianthreads for that KSEGROUP. If this situation changes, the KSEs are
8899072Sjulianreassigned to keep this true.
8999072Sjulian
9099072Sjulian*/
9199072Sjulian
9250027Speter#include <sys/param.h>
9350027Speter#include <sys/systm.h>
9450027Speter#include <sys/kernel.h>
9565557Sjasone#include <sys/ktr.h>
9674914Sjhb#include <sys/lock.h>
9767365Sjhb#include <sys/mutex.h>
9850027Speter#include <sys/proc.h>
9950027Speter#include <sys/queue.h>
100104964Sjeff#include <sys/sched.h>
10193607Sdillon#include <machine/critical.h>
10250027Speter
10397261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
10497261Sjake
105104695Sjulianvoid panc(char *string1, char *string2);
106104695Sjulian
107104695Sjulian#if 0
10899072Sjulianstatic void runq_readjust(struct runq *rq, struct kse *ke);
109104695Sjulian#endif
11099072Sjulian/************************************************************************
11199072Sjulian * Functions that manipulate runnability from a thread perspective.	*
11299072Sjulian ************************************************************************/
11350027Speter/*
11499072Sjulian * Select the KSE that will be run next.  From that find the thread, and x
11599072Sjulian * remove it from the KSEGRP's run queue.  If there is thread clustering,
11699072Sjulian * this will be what does it.
11750027Speter */
11883366Sjulianstruct thread *
11983366Sjulianchoosethread(void)
12050027Speter{
12199072Sjulian	struct kse *ke;
12299072Sjulian	struct thread *td;
12399072Sjulian	struct ksegrp *kg;
12499072Sjulian
125100209Sgallatinretry:
126104964Sjeff	if ((ke = sched_choose())) {
12799072Sjulian		td = ke->ke_thread;
12899072Sjulian		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
12999072Sjulian		kg = ke->ke_ksegrp;
130108338Sjulian		if (TD_IS_UNBOUND(td)) {
13199072Sjulian			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
132103832Sjulian			if (kg->kg_last_assigned == td) {
13399072Sjulian				kg->kg_last_assigned = TAILQ_PREV(td,
13499072Sjulian				    threadqueue, td_runq);
135103832Sjulian			}
13699072Sjulian		}
13799072Sjulian		kg->kg_runnable--;
13899072Sjulian		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
13999072Sjulian		    td, td->td_priority);
14099072Sjulian	} else {
14199889Sjulian		/* Simulate runq_choose() having returned the idle thread */
14299072Sjulian		td = PCPU_GET(idlethread);
143102592Sjulian		ke = td->td_kse;
14499072Sjulian		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
14599072Sjulian	}
146102592Sjulian	ke->ke_flags |= KEF_DIDRUN;
147108338Sjulian
148108338Sjulian	/*
149108338Sjulian	 * Only allow non system threads to run in panic
150108338Sjulian	 * if they are the one we are tracing.  (I think.. [JRE])
151108338Sjulian	 */
152100209Sgallatin	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
153100209Sgallatin	    (td->td_flags & TDF_INPANIC) == 0))
154100209Sgallatin		goto retry;
155108338Sjulian
156103216Sjulian	TD_SET_RUNNING(td);
15799072Sjulian	return (td);
15872376Sjake}
15950027Speter
16099072Sjulian/*
161104695Sjulian * Given a KSE (now surplus or at least loanable), either assign a new
162108338Sjulian * runable thread to it (and put it in the run queue) or put it in
163108338Sjulian * the ksegrp's idle KSE list.
164108338Sjulian * Or maybe give it back to its owner if it's been loaned.
165108338Sjulian * Assumes that the original thread is either not runnable or
166108338Sjulian * already on the run queue
16799072Sjulian */
16899072Sjulianvoid
16999072Sjuliankse_reassign(struct kse *ke)
17099072Sjulian{
17199072Sjulian	struct ksegrp *kg;
17299072Sjulian	struct thread *td;
173104157Sjulian	struct thread *owner;
174104695Sjulian	struct thread *original;
175108338Sjulian	int loaned;
17699072Sjulian
177108338Sjulian	KASSERT((ke->ke_owner), ("reassigning KSE with no owner"));
178108338Sjulian	KASSERT((ke->ke_thread && TD_IS_INHIBITED(ke->ke_thread)),
179108338Sjulian    	    ("reassigning KSE with no or runnable  thread"));
180103832Sjulian	mtx_assert(&sched_lock, MA_OWNED);
18199072Sjulian	kg = ke->ke_ksegrp;
182108338Sjulian	owner = ke->ke_owner;
183108338Sjulian	loaned = TD_LENDER(owner);
184104695Sjulian	original = ke->ke_thread;
18599072Sjulian
186108338Sjulian	if (TD_CAN_UNBIND(original) && (original->td_standin)) {
187108338Sjulian		KASSERT((owner == original),
188108338Sjulian		    ("Early thread borrowing?"));
189108338Sjulian		/*
190108338Sjulian		 * The outgoing thread is "threaded" and has never
191108338Sjulian		 * scheduled an upcall.
192108338Sjulian		 * decide whether this is a short or long term event
193108338Sjulian		 * and thus whether or not to schedule an upcall.
194108338Sjulian		 * if it is a short term event, just suspend it in
195108338Sjulian		 * a way that takes its KSE with it.
196108338Sjulian		 * Select the events for which we want to schedule upcalls.
197108338Sjulian		 * For now it's just sleep.
198108338Sjulian		 * Other threads that still have not fired an upcall
199108338Sjulian		 * are held to their KSE using the temorary Binding.
200108338Sjulian		 */
201108338Sjulian		if (TD_ON_SLEEPQ(original)) {
202108338Sjulian			/*
203108338Sjulian			 * An bound thread that can still unbind itself
204108338Sjulian			 * has been scheduled out.
205108338Sjulian			 * If it is sleeping, then we need to schedule an
206108338Sjulian			 * upcall.
207108338Sjulian			 * XXXKSE eventually almost any inhibition could do.
208108338Sjulian			 */
209108338Sjulian			original->td_flags &= ~TDF_CAN_UNBIND;
210108338Sjulian			original->td_flags |= TDF_UNBOUND;
211108338Sjulian			thread_schedule_upcall(original, ke);
212108338Sjulian			owner = ke->ke_owner;
213108338Sjulian			loaned = 1;
214108338Sjulian		}
215108338Sjulian	}
216108338Sjulian
217108338Sjulian	/*
218108338Sjulian	 * If the current thread was borrowing, then make things consistent
219108338Sjulian	 * by giving it back to the owner for the moment. The original thread
220108338Sjulian	 * must be unbound and have already used its chance for
221108338Sjulian	 * firing off an upcall. Threads that have not yet made an upcall
222108338Sjulian	 * can not borrow KSEs.
223108338Sjulian	 */
224108338Sjulian	if (loaned) {
225108338Sjulian		KASSERT((original->td_standin == NULL),
226108338Sjulian		    ("kse_reassign: borrower still has standin thread"));
227108338Sjulian		TD_CLR_LOAN(owner);
228108338Sjulian		ke->ke_thread = owner;
229108338Sjulian		original->td_kse = NULL; /* give it amnesia */
230108338Sjulian		/*
231108338Sjulian		 * Upcalling threads have lower priority than all
232108338Sjulian		 * in-kernel threads, However threads that have loaned out
233108338Sjulian		 * their KSE and are NOT upcalling have the priority that
234108338Sjulian		 * they have. In other words, only look for other work if
235108338Sjulian		 * the owner is not runnable, OR is upcalling.
236108338Sjulian		 */
237108338Sjulian		if (TD_CAN_RUN(owner) &&
238108338Sjulian		    ((owner->td_flags & TDF_UPCALLING) == 0)) {
239108338Sjulian			setrunnable(owner);
240108338Sjulian			CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
241108338Sjulian			    ke, owner);
242108338Sjulian			return;
243108338Sjulian		}
244108338Sjulian	}
245108338Sjulian
24699072Sjulian	/*
247108338Sjulian	 * Either the owner is not runnable, or is an upcall.
24899072Sjulian	 * Find the first unassigned thread
24999072Sjulian	 * If there is a 'last assigned' then see what's next.
25099072Sjulian	 * otherwise look at what is first.
25199072Sjulian	 */
25299072Sjulian	if ((td = kg->kg_last_assigned)) {
25399072Sjulian		td = TAILQ_NEXT(td, td_runq);
25499072Sjulian	} else {
25599072Sjulian		td = TAILQ_FIRST(&kg->kg_runq);
25699072Sjulian	}
25799072Sjulian
25899072Sjulian	/*
25999072Sjulian	 * If we found one assign it the kse, otherwise idle the kse.
26099072Sjulian	 */
26199072Sjulian	if (td) {
262108338Sjulian		/*
263108338Sjulian		 * Assign the new thread to the KSE.
264108338Sjulian		 * and make the KSE runnable again,
265104695Sjulian		 */
266108338Sjulian		if (TD_IS_BOUND(owner)) {
267108338Sjulian			/*
268108338Sjulian			 * If there is a reason to keep the previous
269108338Sjulian			 * owner, do so.
270108338Sjulian			 */
271108338Sjulian			TD_SET_LOAN(owner);
272108338Sjulian		} else {
273108338Sjulian			/* otherwise, cut it free */
274108338Sjulian			ke->ke_owner = td;
275108338Sjulian			owner->td_kse = NULL;
276104695Sjulian		}
27799072Sjulian		kg->kg_last_assigned = td;
27899072Sjulian		td->td_kse = ke;
27999072Sjulian		ke->ke_thread = td;
280104964Sjeff		sched_add(ke);
28199072Sjulian		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
282104695Sjulian		return;
283104695Sjulian	}
284104695Sjulian
285108338Sjulian	/*
286108338Sjulian	 * Now handle any waiting upcall.
287108338Sjulian	 * Since we didn't make them runnable before.
288108338Sjulian	 */
289108338Sjulian	if (TD_CAN_RUN(owner)) {
290108338Sjulian		setrunnable(owner);
291108338Sjulian		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
292108338Sjulian		    ke, owner);
293108338Sjulian		return;
29499072Sjulian	}
295108338Sjulian
296108338Sjulian	/*
297108338Sjulian	 * It is possible that this is the last thread in the group
298108338Sjulian	 * because the KSE is being shut down or the process
299108338Sjulian	 * is exiting.
300108338Sjulian	 */
301108338Sjulian	if (TD_IS_EXITING(owner) || (ke->ke_flags & KEF_EXIT)) {
302108338Sjulian		ke->ke_thread = NULL;
303108338Sjulian		owner->td_kse = NULL;
304108338Sjulian		kse_unlink(ke);
305108338Sjulian		return;
306108338Sjulian	}
307108338Sjulian
308104695Sjulian	/*
309108338Sjulian	 * At this stage all we know is that the owner
310108338Sjulian	 * is the same as the 'active' thread in the KSE
311108338Sjulian	 * and that it is
312108338Sjulian	 * Presently NOT loaned out.
313108338Sjulian	 * Put it on the loanable queue. Make it fifo
314108338Sjulian	 * so that long term sleepers donate their KSE's first.
315104695Sjulian	 */
316108338Sjulian	KASSERT((TD_IS_BOUND(owner)), ("kse_reassign: UNBOUND lender"));
317108338Sjulian	ke->ke_state = KES_THREAD;
318108338Sjulian	ke->ke_flags |= KEF_ONLOANQ;
319108338Sjulian	TAILQ_INSERT_TAIL(&kg->kg_lq, ke, ke_kgrlist);
320108338Sjulian	kg->kg_loan_kses++;
321108338Sjulian	CTR1(KTR_RUNQ, "kse_reassign: ke%p on loan queue", ke);
322108338Sjulian	return;
32399072Sjulian}
32499072Sjulian
325105127Sjulian#if 0
32699072Sjulian/*
32799072Sjulian * Remove a thread from its KSEGRP's run queue.
32899072Sjulian * This in turn may remove it from a KSE if it was already assigned
32999072Sjulian * to one, possibly causing a new thread to be assigned to the KSE
33099072Sjulian * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
33199072Sjulian */
332105127Sjulianstatic void
33383366Sjulianremrunqueue(struct thread *td)
33472376Sjake{
335104695Sjulian	struct thread *td2, *td3;
33699072Sjulian	struct ksegrp *kg;
33799072Sjulian	struct kse *ke;
33899072Sjulian
33999072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
340103216Sjulian	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
34199072Sjulian	kg = td->td_ksegrp;
34299072Sjulian	ke = td->td_kse;
34399072Sjulian	/*
34499072Sjulian	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
34599072Sjulian	 * threads are BOUND.
34699072Sjulian	 */
34799072Sjulian	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
34899072Sjulian	kg->kg_runnable--;
349103216Sjulian	TD_SET_CAN_RUN(td);
350108338Sjulian	if (TD_IS_BOUND(td))  {
35199072Sjulian		/* Bring its kse with it, leave the thread attached */
352104964Sjeff		sched_rem(ke);
35399942Sjulian		ke->ke_state = KES_THREAD;
35499072Sjulian		return;
35599072Sjulian	}
356104695Sjulian   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
357104695Sjulian	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
35899072Sjulian	if (ke) {
35999072Sjulian		/*
36099072Sjulian		 * This thread has been assigned to a KSE.
36199072Sjulian		 * We need to dissociate it and try assign the
36299072Sjulian		 * KSE to the next available thread. Then, we should
36399072Sjulian		 * see if we need to move the KSE in the run queues.
36499072Sjulian		 */
365108338Sjulian		sched_rem(ke);
366108338Sjulian		ke->ke_state = KES_THREAD;
36799072Sjulian		td2 = kg->kg_last_assigned;
36899072Sjulian		KASSERT((td2 != NULL), ("last assigned has wrong value "));
369104695Sjulian		if (td2 == td)
370104695Sjulian			kg->kg_last_assigned = td3;
371104695Sjulian		kse_reassign(ke);
37299072Sjulian	}
37372376Sjake}
374105127Sjulian#endif
37572376Sjake
376105127Sjulian/*
377105127Sjulian * Change the priority of a thread that is on the run queue.
378105127Sjulian */
37972376Sjakevoid
380105127Sjulianadjustrunqueue( struct thread *td, int newpri)
381105127Sjulian{
382105127Sjulian	struct ksegrp *kg;
383105127Sjulian	struct kse *ke;
384105127Sjulian
385105127Sjulian	mtx_assert(&sched_lock, MA_OWNED);
386105127Sjulian	KASSERT ((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
387105127Sjulian	/*
388105127Sjulian	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
389105127Sjulian	 * threads are BOUND.
390105127Sjulian	 */
391105127Sjulian	ke = td->td_kse;
392105127Sjulian	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
393108338Sjulian	if (TD_IS_BOUND(td))  {
394105127Sjulian		/* We only care about the kse in the run queue. */
395105129Sjulian		td->td_priority = newpri;
396105127Sjulian		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
397105127Sjulian			sched_rem(ke);
398105127Sjulian			sched_add(ke);
399105127Sjulian		}
400105127Sjulian		return;
401105127Sjulian	}
402105127Sjulian	/*
403105127Sjulian	 * An unbound thread. This is not optimised yet.
404105127Sjulian	 */
405105127Sjulian	kg = td->td_ksegrp;
406105127Sjulian	kg->kg_runnable--;
407105127Sjulian	TD_SET_CAN_RUN(td);
408105127Sjulian	if (ke) {
409105127Sjulian		if (kg->kg_last_assigned == td) {
410105127Sjulian			kg->kg_last_assigned =
411105127Sjulian			    TAILQ_PREV(td, threadqueue, td_runq);
412105127Sjulian		}
413105127Sjulian		sched_rem(ke);
414105127Sjulian	}
415105127Sjulian	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
416105127Sjulian	td->td_priority = newpri;
417105127Sjulian	setrunqueue(td);
418105127Sjulian}
419105127Sjulian
420105127Sjulianvoid
42183366Sjuliansetrunqueue(struct thread *td)
42250027Speter{
42399072Sjulian	struct kse *ke;
42499072Sjulian	struct ksegrp *kg;
42599072Sjulian	struct thread *td2;
42699072Sjulian	struct thread *tda;
42799072Sjulian
42899072Sjulian	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
42999072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
430103216Sjulian	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
431103216Sjulian	    ("setrunqueue: bad thread state"));
432103216Sjulian	TD_SET_RUNQ(td);
43399072Sjulian	kg = td->td_ksegrp;
43499072Sjulian	kg->kg_runnable++;
435104695Sjulian	if ((td->td_proc->p_flag & P_KSES) == 0) {
436104695Sjulian		/*
437104695Sjulian		 * Common path optimisation: Only one of everything
438104695Sjulian		 * and the KSE is always already attached.
439104695Sjulian		 * Totally ignore the ksegrp run queue.
440104695Sjulian		 */
441104964Sjeff		sched_add(td->td_kse);
442104695Sjulian		return;
443104695Sjulian	}
444108338Sjulian	/*
445108338Sjulian	 * If the process is threaded but the thread is bound then
446108338Sjulian	 * there is still a little extra to do re. KSE loaning.
447108338Sjulian	 */
448108338Sjulian	if (TD_IS_BOUND(td)) {
44999072Sjulian		KASSERT((td->td_kse != NULL),
45099072Sjulian		    ("queueing BAD thread to run queue"));
451104157Sjulian		ke = td->td_kse;
452108338Sjulian		KASSERT((ke->ke_owner == ke->ke_thread),
453108338Sjulian		    ("setrunqueue: Hey KSE loaned out"));
454104157Sjulian		if (ke->ke_flags & KEF_ONLOANQ) {
455104157Sjulian			ke->ke_flags &= ~KEF_ONLOANQ;
456104157Sjulian			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
457104157Sjulian			kg->kg_loan_kses--;
458104157Sjulian		}
459104964Sjeff		sched_add(td->td_kse);
46099072Sjulian		return;
46199072Sjulian	}
462104695Sjulian
46399072Sjulian	/*
46499072Sjulian	 * Ok, so we are threading with this thread.
46599072Sjulian	 * We don't have a KSE, see if we can get one..
46699072Sjulian	 */
46799072Sjulian	tda = kg->kg_last_assigned;
46899072Sjulian	if ((ke = td->td_kse) == NULL) {
46999072Sjulian		/*
47099072Sjulian		 * We will need a KSE, see if there is one..
47199072Sjulian		 * First look for a free one, before getting desperate.
47299072Sjulian		 * If we can't get one, our priority is not high enough..
47399072Sjulian		 * that's ok..
47499072Sjulian		 */
475108338Sjulian		if (kg->kg_loan_kses) {
47699072Sjulian			/*
477104695Sjulian			 * Failing that see if we can borrow one.
478104695Sjulian			 */
479104157Sjulian			ke = TAILQ_FIRST(&kg->kg_lq);
480104157Sjulian			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
481104157Sjulian			ke->ke_flags &= ~KEF_ONLOANQ;
482104157Sjulian			ke->ke_state = KES_THREAD;
483108338Sjulian			TD_SET_LOAN(ke->ke_owner);
484104695Sjulian			ke->ke_thread  = NULL;
485104157Sjulian			kg->kg_loan_kses--;
48699072Sjulian		} else if (tda && (tda->td_priority > td->td_priority)) {
48799072Sjulian			/*
48899072Sjulian			 * None free, but there is one we can commandeer.
48999072Sjulian			 */
49099072Sjulian			ke = tda->td_kse;
49199072Sjulian			tda->td_kse = NULL;
49299072Sjulian			ke->ke_thread = NULL;
49399072Sjulian			tda = kg->kg_last_assigned =
49499072Sjulian		    	    TAILQ_PREV(tda, threadqueue, td_runq);
495104964Sjeff			sched_rem(ke);
49699072Sjulian		}
49799072Sjulian	} else {
49899942Sjulian		/*
49999942Sjulian		 * Temporarily disassociate so it looks like the other cases.
500108338Sjulian		 * If the owner wasn't lending before, then it is now..
50199942Sjulian		 */
502108338Sjulian		if (!TD_LENDER(ke->ke_owner)) {
503108338Sjulian			TD_SET_LOAN(ke->ke_owner);
504108338Sjulian		}
50599072Sjulian		ke->ke_thread = NULL;
50699072Sjulian		td->td_kse = NULL;
50799072Sjulian	}
50899072Sjulian
50999072Sjulian	/*
51099072Sjulian	 * Add the thread to the ksegrp's run queue at
51199072Sjulian	 * the appropriate place.
51299072Sjulian	 */
51399072Sjulian	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
51499072Sjulian		if (td2->td_priority > td->td_priority) {
51599072Sjulian			TAILQ_INSERT_BEFORE(td2, td, td_runq);
51699072Sjulian			break;
51799072Sjulian		}
51899072Sjulian	}
51999072Sjulian	if (td2 == NULL) {
52099072Sjulian		/* We ran off the end of the TAILQ or it was empty. */
52199072Sjulian		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
52299072Sjulian	}
52399072Sjulian
52499072Sjulian	/*
52599072Sjulian	 * If we have a ke to use, then put it on the run queue and
52699072Sjulian	 * If needed, readjust the last_assigned pointer.
52799072Sjulian	 */
52899072Sjulian	if (ke) {
52999072Sjulian		if (tda == NULL) {
53099072Sjulian			/*
53199072Sjulian			 * No pre-existing last assigned so whoever is first
53299942Sjulian			 * gets the KSE we brought in.. (maybe us)
53399072Sjulian			 */
53499072Sjulian			td2 = TAILQ_FIRST(&kg->kg_runq);
53599072Sjulian			KASSERT((td2->td_kse == NULL),
53699072Sjulian			    ("unexpected ke present"));
53799072Sjulian			td2->td_kse = ke;
53899072Sjulian			ke->ke_thread = td2;
53999072Sjulian			kg->kg_last_assigned = td2;
54099072Sjulian		} else if (tda->td_priority > td->td_priority) {
54199072Sjulian			/*
54299072Sjulian			 * It's ours, grab it, but last_assigned is past us
54399072Sjulian			 * so don't change it.
54499072Sjulian			 */
54599072Sjulian			td->td_kse = ke;
54699072Sjulian			ke->ke_thread = td;
54799072Sjulian		} else {
54899072Sjulian			/*
54999072Sjulian			 * We are past last_assigned, so
55099072Sjulian			 * put the new kse on whatever is next,
55199072Sjulian			 * which may or may not be us.
55299072Sjulian			 */
55399072Sjulian			td2 = TAILQ_NEXT(tda, td_runq);
55499072Sjulian			kg->kg_last_assigned = td2;
55599072Sjulian			td2->td_kse = ke;
55699072Sjulian			ke->ke_thread = td2;
55799072Sjulian		}
558104964Sjeff		sched_add(ke);
55999072Sjulian	}
56072376Sjake}
56150027Speter
56299072Sjulian/************************************************************************
56399072Sjulian * Critical section marker functions					*
56499072Sjulian ************************************************************************/
56588088Sjhb/* Critical sections that prevent preemption. */
56688088Sjhbvoid
56788088Sjhbcritical_enter(void)
56888088Sjhb{
56988088Sjhb	struct thread *td;
57088088Sjhb
57188088Sjhb	td = curthread;
57288088Sjhb	if (td->td_critnest == 0)
57393264Sdillon		cpu_critical_enter();
57488088Sjhb	td->td_critnest++;
57588088Sjhb}
57688088Sjhb
57788088Sjhbvoid
57888088Sjhbcritical_exit(void)
57988088Sjhb{
58088088Sjhb	struct thread *td;
58188088Sjhb
58288088Sjhb	td = curthread;
58388088Sjhb	if (td->td_critnest == 1) {
58488088Sjhb		td->td_critnest = 0;
58593264Sdillon		cpu_critical_exit();
58693264Sdillon	} else {
58788088Sjhb		td->td_critnest--;
58893264Sdillon	}
58988088Sjhb}
59088088Sjhb
59199072Sjulian
59299072Sjulian/************************************************************************
59399072Sjulian * SYSTEM RUN QUEUE manipulations and tests				*
59499072Sjulian ************************************************************************/
59572376Sjake/*
59699072Sjulian * Initialize a run structure.
59799072Sjulian */
59899072Sjulianvoid
59999072Sjulianrunq_init(struct runq *rq)
60099072Sjulian{
60199072Sjulian	int i;
60299072Sjulian
60399072Sjulian	bzero(rq, sizeof *rq);
60499072Sjulian	for (i = 0; i < RQ_NQS; i++)
60599072Sjulian		TAILQ_INIT(&rq->rq_queues[i]);
60699072Sjulian}
60799072Sjulian
60899072Sjulian/*
60972376Sjake * Clear the status bit of the queue corresponding to priority level pri,
61072376Sjake * indicating that it is empty.
61172376Sjake */
61272376Sjakestatic __inline void
61372376Sjakerunq_clrbit(struct runq *rq, int pri)
61472376Sjake{
61572376Sjake	struct rqbits *rqb;
61665557Sjasone
61772376Sjake	rqb = &rq->rq_status;
61872376Sjake	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
61972376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
62072376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
62172376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
62272376Sjake	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
62350027Speter}
62450027Speter
62550027Speter/*
62672376Sjake * Find the index of the first non-empty run queue.  This is done by
62772376Sjake * scanning the status bits, a set bit indicates a non-empty queue.
62850027Speter */
62972376Sjakestatic __inline int
63072376Sjakerunq_findbit(struct runq *rq)
63172376Sjake{
63272376Sjake	struct rqbits *rqb;
63372376Sjake	int pri;
63472376Sjake	int i;
63572376Sjake
63672376Sjake	rqb = &rq->rq_status;
63772376Sjake	for (i = 0; i < RQB_LEN; i++)
63872376Sjake		if (rqb->rqb_bits[i]) {
63998469Speter			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
64072376Sjake			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
64172376Sjake			    rqb->rqb_bits[i], i, pri);
64272376Sjake			return (pri);
64372376Sjake		}
64472376Sjake
64572376Sjake	return (-1);
64672376Sjake}
64772376Sjake
64872376Sjake/*
64972376Sjake * Set the status bit of the queue corresponding to priority level pri,
65072376Sjake * indicating that it is non-empty.
65172376Sjake */
65272376Sjakestatic __inline void
65372376Sjakerunq_setbit(struct runq *rq, int pri)
65472376Sjake{
65572376Sjake	struct rqbits *rqb;
65672376Sjake
65772376Sjake	rqb = &rq->rq_status;
65872376Sjake	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
65972376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
66072376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
66172376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
66272376Sjake	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
66372376Sjake}
66472376Sjake
66572376Sjake/*
66699072Sjulian * Add the KSE to the queue specified by its priority, and set the
66772376Sjake * corresponding status bit.
66872376Sjake */
66950027Spetervoid
67083366Sjulianrunq_add(struct runq *rq, struct kse *ke)
67150027Speter{
67272376Sjake	struct rqhead *rqh;
67372376Sjake	int pri;
67450027Speter
67590538Sjulian	pri = ke->ke_thread->td_priority / RQ_PPQ;
67683366Sjulian	ke->ke_rqindex = pri;
67772376Sjake	runq_setbit(rq, pri);
67872376Sjake	rqh = &rq->rq_queues[pri];
67972376Sjake	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
68090538Sjulian	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
68183366Sjulian	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
68250027Speter}
68350027Speter
68450027Speter/*
68572376Sjake * Return true if there are runnable processes of any priority on the run
68672376Sjake * queue, false otherwise.  Has no side effects, does not modify the run
68772376Sjake * queue structure.
68850027Speter */
68972376Sjakeint
69072376Sjakerunq_check(struct runq *rq)
69150027Speter{
69272376Sjake	struct rqbits *rqb;
69372376Sjake	int i;
69472376Sjake
69572376Sjake	rqb = &rq->rq_status;
69672376Sjake	for (i = 0; i < RQB_LEN; i++)
69772376Sjake		if (rqb->rqb_bits[i]) {
69872376Sjake			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
69972376Sjake			    rqb->rqb_bits[i], i);
70072376Sjake			return (1);
70172376Sjake		}
70272376Sjake	CTR0(KTR_RUNQ, "runq_check: empty");
70372376Sjake
70472376Sjake	return (0);
70550027Speter}
70650027Speter
70750027Speter/*
708104964Sjeff * Find the highest priority process on the run queue.
70950027Speter */
71083366Sjulianstruct kse *
71172376Sjakerunq_choose(struct runq *rq)
71250027Speter{
71372376Sjake	struct rqhead *rqh;
71483366Sjulian	struct kse *ke;
71572376Sjake	int pri;
71650027Speter
71765557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
71899072Sjulian	while ((pri = runq_findbit(rq)) != -1) {
71972376Sjake		rqh = &rq->rq_queues[pri];
72083366Sjulian		ke = TAILQ_FIRST(rqh);
72183366Sjulian		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
72299072Sjulian		CTR3(KTR_RUNQ,
72399072Sjulian		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
72483366Sjulian		return (ke);
72550027Speter	}
72672376Sjake	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
72772376Sjake
72899072Sjulian	return (NULL);
72950027Speter}
73072376Sjake
73172376Sjake/*
73299072Sjulian * Remove the KSE from the queue specified by its priority, and clear the
73372376Sjake * corresponding status bit if the queue becomes empty.
73499072Sjulian * Caller must set ke->ke_state afterwards.
73572376Sjake */
73672376Sjakevoid
73783366Sjulianrunq_remove(struct runq *rq, struct kse *ke)
73872376Sjake{
73972376Sjake	struct rqhead *rqh;
74072376Sjake	int pri;
74172376Sjake
742100913Stanimura	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
743100913Stanimura		("runq_remove: process swapped out"));
74483366Sjulian	pri = ke->ke_rqindex;
74572376Sjake	rqh = &rq->rq_queues[pri];
74672376Sjake	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
74790538Sjulian	    ke, ke->ke_thread->td_priority, pri, rqh);
74883366Sjulian	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
74983366Sjulian	TAILQ_REMOVE(rqh, ke, ke_procq);
75072376Sjake	if (TAILQ_EMPTY(rqh)) {
75172376Sjake		CTR0(KTR_RUNQ, "runq_remove: empty");
75272376Sjake		runq_clrbit(rq, pri);
75372376Sjake	}
75472376Sjake}
75599072Sjulian
756104695Sjulian#if 0
75799072Sjulianvoid
758104695Sjulianpanc(char *string1, char *string2)
75999072Sjulian{
760104695Sjulian	printf("%s", string1);
761104695Sjulian	Debugger(string2);
762104695Sjulian}
763104695Sjulian
764104695Sjulianvoid
765104695Sjulianthread_sanity_check(struct thread *td, char *string)
766104695Sjulian{
76799072Sjulian	struct proc *p;
76899072Sjulian	struct ksegrp *kg;
76999072Sjulian	struct kse *ke;
770104695Sjulian	struct thread *td2 = NULL;
77199072Sjulian	unsigned int prevpri;
772104695Sjulian	int	saw_lastassigned = 0;
773104695Sjulian	int unassigned = 0;
774104695Sjulian	int assigned = 0;
77599072Sjulian
77699072Sjulian	p = td->td_proc;
77799072Sjulian	kg = td->td_ksegrp;
77899072Sjulian	ke = td->td_kse;
77999072Sjulian
78099072Sjulian
78199072Sjulian	if (ke) {
782103367Sjulian		if (p != ke->ke_proc) {
783104695Sjulian			panc(string, "wrong proc");
78499072Sjulian		}
78599072Sjulian		if (ke->ke_thread != td) {
786104695Sjulian			panc(string, "wrong thread");
78799072Sjulian		}
78899072Sjulian	}
78999072Sjulian
79099072Sjulian	if ((p->p_flag & P_KSES) == 0) {
79199072Sjulian		if (ke == NULL) {
792104695Sjulian			panc(string, "non KSE thread lost kse");
79399072Sjulian		}
79499072Sjulian	} else {
79599072Sjulian		prevpri = 0;
79699072Sjulian		saw_lastassigned = 0;
79799072Sjulian		unassigned = 0;
79899072Sjulian		assigned = 0;
79999072Sjulian		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
80099072Sjulian			if (td2->td_priority < prevpri) {
801104695Sjulian				panc(string, "thread runqueue unosorted");
80299072Sjulian			}
803104695Sjulian			if ((td2->td_state == TDS_RUNQ) &&
804104695Sjulian			    td2->td_kse &&
805104695Sjulian			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
806104695Sjulian				panc(string, "KSE wrong state");
807104695Sjulian			}
80899072Sjulian			prevpri = td2->td_priority;
80999072Sjulian			if (td2->td_kse) {
81099072Sjulian				assigned++;
81199072Sjulian				if (unassigned) {
812104695Sjulian					panc(string, "unassigned before assigned");
81399072Sjulian				}
81499072Sjulian 				if  (kg->kg_last_assigned == NULL) {
815104695Sjulian					panc(string, "lastassigned corrupt");
81699072Sjulian				}
81799072Sjulian				if (saw_lastassigned) {
818104695Sjulian					panc(string, "last assigned not last");
81999072Sjulian				}
82099072Sjulian				if (td2->td_kse->ke_thread != td2) {
821104695Sjulian					panc(string, "mismatched kse/thread");
82299072Sjulian				}
82399072Sjulian			} else {
82499072Sjulian				unassigned++;
82599072Sjulian			}
82699072Sjulian			if (td2 == kg->kg_last_assigned) {
82799072Sjulian				saw_lastassigned = 1;
82899072Sjulian				if (td2->td_kse == NULL) {
829104695Sjulian					panc(string, "last assigned not assigned");
83099072Sjulian				}
83199072Sjulian			}
83299072Sjulian		}
83399072Sjulian		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
834104695Sjulian			panc(string, "where on earth does lastassigned point?");
83599072Sjulian		}
83699072Sjulian		FOREACH_THREAD_IN_GROUP(kg, td2) {
83799072Sjulian			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
838103216Sjulian			    (TD_ON_RUNQ(td2))) {
83999072Sjulian				assigned++;
84099072Sjulian				if (td2->td_kse == NULL) {
841104695Sjulian					panc(string, "BOUND thread with no KSE");
84299072Sjulian				}
84399072Sjulian			}
84499072Sjulian		}
84599072Sjulian#if 0
84699072Sjulian		if ((unassigned + assigned) != kg->kg_runnable) {
847104695Sjulian			panc(string, "wrong number in runnable");
84899072Sjulian		}
84999072Sjulian#endif
85099072Sjulian	}
851104695Sjulian	if (assigned == 12345) {
852104695Sjulian		printf("%p %p %p %p %p %d, %d",
853104695Sjulian		    td, td2, ke, kg, p, assigned, saw_lastassigned);
854104695Sjulian	}
85599072Sjulian}
85699834Sjulian#endif
85799072Sjulian
858