kern_switch.c revision 131481
150027Speter/*
272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
372376Sjake * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter */
2650027Speter
2799072Sjulian/***
2899072SjulianHere is the logic..
2999072Sjulian
3099072SjulianIf there are N processors, then there are at most N KSEs (kernel
3199072Sjulianschedulable entities) working to process threads that belong to a
32123499SrwatsonKSEGROUP (kg). If there are X of these KSEs actually running at the
3399072Sjulianmoment in question, then there are at most M (N-X) of these KSEs on
3499072Sjulianthe run queue, as running KSEs are not on the queue.
3599072Sjulian
3699072SjulianRunnable threads are queued off the KSEGROUP in priority order.
3799072SjulianIf there are M or more threads runnable, the top M threads
3899072Sjulian(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
3999072Sjuliantheir priority from those threads and are put on the run queue.
4099072Sjulian
4199072SjulianThe last thread that had a priority high enough to have a KSE associated
4299072Sjulianwith it, AND IS ON THE RUN QUEUE is pointed to by
4399072Sjuliankg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
4499072Sjulianassigned as all the available KSEs are activly running, or because there
4599072Sjulianare no threads queued, that pointer is NULL.
4699072Sjulian
4799072SjulianWhen a KSE is removed from the run queue to become runnable, we know
4899072Sjulianit was associated with the highest priority thread in the queue (at the head
4999072Sjulianof the queue). If it is also the last assigned we know M was 1 and must
5099072Sjuliannow be 0. Since the thread is no longer queued that pointer must be
5199072Sjulianremoved from it. Since we know there were no more KSEs available,
5299072Sjulian(M was 1 and is now 0) and since we are not FREEING our KSE
5399072Sjulianbut using it, we know there are STILL no more KSEs available, we can prove
5499072Sjulianthat the next thread in the ksegrp list will not have a KSE to assign to
5599072Sjulianit, so we can show that the pointer must be made 'invalid' (NULL).
5699072Sjulian
5799072SjulianThe pointer exists so that when a new thread is made runnable, it can
5899072Sjulianhave its priority compared with the last assigned thread to see if
5999072Sjulianit should 'steal' its KSE or not.. i.e. is it 'earlier'
6099072Sjulianon the list than that thread or later.. If it's earlier, then the KSE is
6199072Sjulianremoved from the last assigned (which is now not assigned a KSE)
6299072Sjulianand reassigned to the new thread, which is placed earlier in the list.
6399072SjulianThe pointer is then backed up to the previous thread (which may or may not
6499072Sjulianbe the new thread).
6599072Sjulian
6699072SjulianWhen a thread sleeps or is removed, the KSE becomes available and if there
6799072Sjulianare queued threads that are not assigned KSEs, the highest priority one of
6899072Sjulianthem is assigned the KSE, which is then placed back on the run queue at
6999072Sjulianthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
7099072Sjulianto point to it.
7199072Sjulian
7299072SjulianThe following diagram shows 2 KSEs and 3 threads from a single process.
7399072Sjulian
7499072Sjulian RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
7599072Sjulian              \    \____
7699072Sjulian               \        \
7799072Sjulian    KSEGROUP---thread--thread--thread    (queued in priority order)
7899072Sjulian        \                 /
7999072Sjulian         \_______________/
8099072Sjulian          (last_assigned)
8199072Sjulian
8299072SjulianThe result of this scheme is that the M available KSEs are always
8399072Sjulianqueued at the priorities they have inherrited from the M highest priority
8499072Sjulianthreads for that KSEGROUP. If this situation changes, the KSEs are
8599072Sjulianreassigned to keep this true.
86116182Sobrien***/
8799072Sjulian
88116182Sobrien#include <sys/cdefs.h>
89116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 131481 2004-07-02 20:21:44Z jhb $");
90116182Sobrien
91131481Sjhb#include "opt_full_preemption.h"
92131481Sjhb
9350027Speter#include <sys/param.h>
9450027Speter#include <sys/systm.h>
9550027Speter#include <sys/kernel.h>
9665557Sjasone#include <sys/ktr.h>
9774914Sjhb#include <sys/lock.h>
9867365Sjhb#include <sys/mutex.h>
9950027Speter#include <sys/proc.h>
10050027Speter#include <sys/queue.h>
101104964Sjeff#include <sys/sched.h>
102122849Speter#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
103112993Speter#include <sys/smp.h>
104112993Speter#endif
10593607Sdillon#include <machine/critical.h>
10650027Speter
10797261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
10897261Sjake
109104695Sjulianvoid panc(char *string1, char *string2);
110104695Sjulian
111104695Sjulian#if 0
11299072Sjulianstatic void runq_readjust(struct runq *rq, struct kse *ke);
113104695Sjulian#endif
11499072Sjulian/************************************************************************
11599072Sjulian * Functions that manipulate runnability from a thread perspective.	*
11699072Sjulian ************************************************************************/
11750027Speter/*
118111028Sjeff * Select the KSE that will be run next.  From that find the thread, and
11999072Sjulian * remove it from the KSEGRP's run queue.  If there is thread clustering,
12099072Sjulian * this will be what does it.
12150027Speter */
12283366Sjulianstruct thread *
12383366Sjulianchoosethread(void)
12450027Speter{
12599072Sjulian	struct kse *ke;
12699072Sjulian	struct thread *td;
12799072Sjulian	struct ksegrp *kg;
12899072Sjulian
129122849Speter#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
130112993Speter	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
131112993Speter		/* Shutting down, run idlethread on AP's */
132112993Speter		td = PCPU_GET(idlethread);
133112993Speter		ke = td->td_kse;
134112993Speter		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
135112993Speter		ke->ke_flags |= KEF_DIDRUN;
136112993Speter		TD_SET_RUNNING(td);
137112993Speter		return (td);
138112993Speter	}
139112993Speter#endif
140112993Speter
141100209Sgallatinretry:
142112993Speter	ke = sched_choose();
143112993Speter	if (ke) {
14499072Sjulian		td = ke->ke_thread;
14599072Sjulian		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
14699072Sjulian		kg = ke->ke_ksegrp;
147116361Sdavidxu		if (td->td_proc->p_flag & P_SA) {
148103832Sjulian			if (kg->kg_last_assigned == td) {
14999072Sjulian				kg->kg_last_assigned = TAILQ_PREV(td,
15099072Sjulian				    threadqueue, td_runq);
151103832Sjulian			}
152112021Sdavidxu			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
15399072Sjulian		}
15499072Sjulian		kg->kg_runnable--;
15599072Sjulian		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
15699072Sjulian		    td, td->td_priority);
15799072Sjulian	} else {
15899889Sjulian		/* Simulate runq_choose() having returned the idle thread */
15999072Sjulian		td = PCPU_GET(idlethread);
160102592Sjulian		ke = td->td_kse;
16199072Sjulian		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
16299072Sjulian	}
163102592Sjulian	ke->ke_flags |= KEF_DIDRUN;
164108338Sjulian
165108338Sjulian	/*
166115215Sjulian	 * If we are in panic, only allow system threads,
167115215Sjulian	 * plus the one we are running in, to be run.
168108338Sjulian	 */
169100209Sgallatin	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
170115215Sjulian	    (td->td_flags & TDF_INPANIC) == 0)) {
171115215Sjulian		/* note that it is no longer on the run queue */
172115215Sjulian		TD_SET_CAN_RUN(td);
173100209Sgallatin		goto retry;
174115215Sjulian	}
175108338Sjulian
176103216Sjulian	TD_SET_RUNNING(td);
17799072Sjulian	return (td);
17872376Sjake}
17950027Speter
18099072Sjulian/*
181111028Sjeff * Given a surplus KSE, either assign a new runable thread to it
182111028Sjeff * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
183111128Sdavidxu * Assumes that the original thread is not runnable.
18499072Sjulian */
18599072Sjulianvoid
18699072Sjuliankse_reassign(struct kse *ke)
18799072Sjulian{
18899072Sjulian	struct ksegrp *kg;
18999072Sjulian	struct thread *td;
190104695Sjulian	struct thread *original;
19199072Sjulian
192103832Sjulian	mtx_assert(&sched_lock, MA_OWNED);
193111028Sjeff	original = ke->ke_thread;
194111028Sjeff	KASSERT(original == NULL || TD_IS_INHIBITED(original),
195111028Sjeff    	    ("reassigning KSE with runnable thread"));
196110190Sjulian	kg = ke->ke_ksegrp;
197112397Sdavidxu	if (original)
198111028Sjeff		original->td_kse = NULL;
199108338Sjulian
20099072Sjulian	/*
20199072Sjulian	 * Find the first unassigned thread
20299072Sjulian	 */
203111028Sjeff	if ((td = kg->kg_last_assigned) != NULL)
20499072Sjulian		td = TAILQ_NEXT(td, td_runq);
205111028Sjeff	else
20699072Sjulian		td = TAILQ_FIRST(&kg->kg_runq);
20799072Sjulian
20899072Sjulian	/*
209111028Sjeff	 * If we found one, assign it the kse, otherwise idle the kse.
21099072Sjulian	 */
21199072Sjulian	if (td) {
21299072Sjulian		kg->kg_last_assigned = td;
21399072Sjulian		td->td_kse = ke;
21499072Sjulian		ke->ke_thread = td;
215121127Sjeff		sched_add(td);
21699072Sjulian		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
217104695Sjulian		return;
218104695Sjulian	}
219104695Sjulian
220111028Sjeff	ke->ke_state = KES_IDLE;
221111028Sjeff	ke->ke_thread = NULL;
222111028Sjeff	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
223111028Sjeff	kg->kg_idle_kses++;
224111028Sjeff	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
225108338Sjulian	return;
22699072Sjulian}
22799072Sjulian
228105127Sjulian#if 0
22999072Sjulian/*
23099072Sjulian * Remove a thread from its KSEGRP's run queue.
23199072Sjulian * This in turn may remove it from a KSE if it was already assigned
23299072Sjulian * to one, possibly causing a new thread to be assigned to the KSE
233111028Sjeff * and the KSE getting a new priority.
23499072Sjulian */
235105127Sjulianstatic void
23683366Sjulianremrunqueue(struct thread *td)
23772376Sjake{
238104695Sjulian	struct thread *td2, *td3;
23999072Sjulian	struct ksegrp *kg;
24099072Sjulian	struct kse *ke;
24199072Sjulian
24299072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
243111028Sjeff	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
24499072Sjulian	kg = td->td_ksegrp;
24599072Sjulian	ke = td->td_kse;
24699072Sjulian	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
24799072Sjulian	kg->kg_runnable--;
248103216Sjulian	TD_SET_CAN_RUN(td);
249111028Sjeff	/*
250111028Sjeff	 * If it is not a threaded process, take the shortcut.
251111028Sjeff	 */
252116361Sdavidxu	if ((td->td_proc->p_flag & P_SA) == 0) {
25399072Sjulian		/* Bring its kse with it, leave the thread attached */
254121127Sjeff		sched_rem(td);
25599942Sjulian		ke->ke_state = KES_THREAD;
25699072Sjulian		return;
25799072Sjulian	}
258104695Sjulian   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
259104695Sjulian	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
26099072Sjulian	if (ke) {
26199072Sjulian		/*
26299072Sjulian		 * This thread has been assigned to a KSE.
26399072Sjulian		 * We need to dissociate it and try assign the
26499072Sjulian		 * KSE to the next available thread. Then, we should
26599072Sjulian		 * see if we need to move the KSE in the run queues.
26699072Sjulian		 */
267121127Sjeff		sched_rem(td);
268108338Sjulian		ke->ke_state = KES_THREAD;
26999072Sjulian		td2 = kg->kg_last_assigned;
270111028Sjeff		KASSERT((td2 != NULL), ("last assigned has wrong value"));
271104695Sjulian		if (td2 == td)
272104695Sjulian			kg->kg_last_assigned = td3;
273104695Sjulian		kse_reassign(ke);
27499072Sjulian	}
27572376Sjake}
276105127Sjulian#endif
27772376Sjake
278105127Sjulian/*
279105127Sjulian * Change the priority of a thread that is on the run queue.
280105127Sjulian */
28172376Sjakevoid
282105127Sjulianadjustrunqueue( struct thread *td, int newpri)
283105127Sjulian{
284105127Sjulian	struct ksegrp *kg;
285105127Sjulian	struct kse *ke;
286105127Sjulian
287105127Sjulian	mtx_assert(&sched_lock, MA_OWNED);
288111028Sjeff	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
289111028Sjeff
290111028Sjeff	ke = td->td_kse;
291111028Sjeff	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
292110190Sjulian	/*
293111028Sjeff	 * If it is not a threaded process, take the shortcut.
294110190Sjulian	 */
295116361Sdavidxu	if ((td->td_proc->p_flag & P_SA) == 0) {
296105127Sjulian		/* We only care about the kse in the run queue. */
297105129Sjulian		td->td_priority = newpri;
298105127Sjulian		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
299121127Sjeff			sched_rem(td);
300121127Sjeff			sched_add(td);
301105127Sjulian		}
302105127Sjulian		return;
303105127Sjulian	}
304111028Sjeff
305111028Sjeff	/* It is a threaded process */
306105127Sjulian	kg = td->td_ksegrp;
307105127Sjulian	kg->kg_runnable--;
308105127Sjulian	TD_SET_CAN_RUN(td);
309105127Sjulian	if (ke) {
310105127Sjulian		if (kg->kg_last_assigned == td) {
311105127Sjulian			kg->kg_last_assigned =
312105127Sjulian			    TAILQ_PREV(td, threadqueue, td_runq);
313105127Sjulian		}
314121127Sjeff		sched_rem(td);
315105127Sjulian	}
316105127Sjulian	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
317105127Sjulian	td->td_priority = newpri;
318105127Sjulian	setrunqueue(td);
319105127Sjulian}
320105127Sjulian
321105127Sjulianvoid
32283366Sjuliansetrunqueue(struct thread *td)
32350027Speter{
32499072Sjulian	struct kse *ke;
32599072Sjulian	struct ksegrp *kg;
32699072Sjulian	struct thread *td2;
32799072Sjulian	struct thread *tda;
32899072Sjulian
32999072Sjulian	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
33099072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
331103216Sjulian	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
332103216Sjulian	    ("setrunqueue: bad thread state"));
333103216Sjulian	TD_SET_RUNQ(td);
33499072Sjulian	kg = td->td_ksegrp;
33599072Sjulian	kg->kg_runnable++;
336116361Sdavidxu	if ((td->td_proc->p_flag & P_SA) == 0) {
337104695Sjulian		/*
338104695Sjulian		 * Common path optimisation: Only one of everything
339104695Sjulian		 * and the KSE is always already attached.
340104695Sjulian		 * Totally ignore the ksegrp run queue.
341104695Sjulian		 */
342121127Sjeff		sched_add(td);
343104695Sjulian		return;
344104695Sjulian	}
345104695Sjulian
34699072Sjulian	tda = kg->kg_last_assigned;
34799072Sjulian	if ((ke = td->td_kse) == NULL) {
348111028Sjeff		if (kg->kg_idle_kses) {
34999072Sjulian			/*
350111028Sjeff			 * There is a free one so it's ours for the asking..
351104695Sjulian			 */
352111028Sjeff			ke = TAILQ_FIRST(&kg->kg_iq);
353111028Sjeff			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
354104157Sjulian			ke->ke_state = KES_THREAD;
355111028Sjeff			kg->kg_idle_kses--;
35699072Sjulian		} else if (tda && (tda->td_priority > td->td_priority)) {
35799072Sjulian			/*
35899072Sjulian			 * None free, but there is one we can commandeer.
35999072Sjulian			 */
36099072Sjulian			ke = tda->td_kse;
361121171Sjeff			sched_rem(tda);
36299072Sjulian			tda->td_kse = NULL;
36399072Sjulian			ke->ke_thread = NULL;
36499072Sjulian			tda = kg->kg_last_assigned =
36599072Sjulian		    	    TAILQ_PREV(tda, threadqueue, td_runq);
36699072Sjulian		}
36799072Sjulian	} else {
36899942Sjulian		/*
36999942Sjulian		 * Temporarily disassociate so it looks like the other cases.
37099942Sjulian		 */
37199072Sjulian		ke->ke_thread = NULL;
37299072Sjulian		td->td_kse = NULL;
37399072Sjulian	}
37499072Sjulian
37599072Sjulian	/*
37699072Sjulian	 * Add the thread to the ksegrp's run queue at
37799072Sjulian	 * the appropriate place.
37899072Sjulian	 */
37999072Sjulian	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
38099072Sjulian		if (td2->td_priority > td->td_priority) {
38199072Sjulian			TAILQ_INSERT_BEFORE(td2, td, td_runq);
38299072Sjulian			break;
38399072Sjulian		}
38499072Sjulian	}
38599072Sjulian	if (td2 == NULL) {
38699072Sjulian		/* We ran off the end of the TAILQ or it was empty. */
38799072Sjulian		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
38899072Sjulian	}
38999072Sjulian
39099072Sjulian	/*
39199072Sjulian	 * If we have a ke to use, then put it on the run queue and
39299072Sjulian	 * If needed, readjust the last_assigned pointer.
39399072Sjulian	 */
39499072Sjulian	if (ke) {
39599072Sjulian		if (tda == NULL) {
39699072Sjulian			/*
39799072Sjulian			 * No pre-existing last assigned so whoever is first
39899942Sjulian			 * gets the KSE we brought in.. (maybe us)
39999072Sjulian			 */
40099072Sjulian			td2 = TAILQ_FIRST(&kg->kg_runq);
40199072Sjulian			KASSERT((td2->td_kse == NULL),
40299072Sjulian			    ("unexpected ke present"));
40399072Sjulian			td2->td_kse = ke;
40499072Sjulian			ke->ke_thread = td2;
40599072Sjulian			kg->kg_last_assigned = td2;
40699072Sjulian		} else if (tda->td_priority > td->td_priority) {
40799072Sjulian			/*
40899072Sjulian			 * It's ours, grab it, but last_assigned is past us
40999072Sjulian			 * so don't change it.
41099072Sjulian			 */
41199072Sjulian			td->td_kse = ke;
41299072Sjulian			ke->ke_thread = td;
41399072Sjulian		} else {
41499072Sjulian			/*
41599072Sjulian			 * We are past last_assigned, so
41699072Sjulian			 * put the new kse on whatever is next,
41799072Sjulian			 * which may or may not be us.
41899072Sjulian			 */
41999072Sjulian			td2 = TAILQ_NEXT(tda, td_runq);
42099072Sjulian			kg->kg_last_assigned = td2;
42199072Sjulian			td2->td_kse = ke;
42299072Sjulian			ke->ke_thread = td2;
42399072Sjulian		}
424121127Sjeff		sched_add(ke->ke_thread);
42599072Sjulian	}
42672376Sjake}
42750027Speter
428131481Sjhb/*
429131481Sjhb * Kernel thread preemption implementation.  Critical sections mark
430131481Sjhb * regions of code in which preemptions are not allowed.
431131481Sjhb */
43288088Sjhbvoid
43388088Sjhbcritical_enter(void)
43488088Sjhb{
43588088Sjhb	struct thread *td;
43688088Sjhb
43788088Sjhb	td = curthread;
43888088Sjhb	if (td->td_critnest == 0)
43993264Sdillon		cpu_critical_enter();
44088088Sjhb	td->td_critnest++;
44188088Sjhb}
44288088Sjhb
44388088Sjhbvoid
44488088Sjhbcritical_exit(void)
44588088Sjhb{
44688088Sjhb	struct thread *td;
44788088Sjhb
44888088Sjhb	td = curthread;
449125315Sjeff	KASSERT(td->td_critnest != 0,
450125315Sjeff	    ("critical_exit: td_critnest == 0"));
45188088Sjhb	if (td->td_critnest == 1) {
452131481Sjhb#ifdef PREEMPTION
453131481Sjhb		if (td->td_flags & TDF_OWEPREEMPT) {
454131481Sjhb			mtx_lock_spin(&sched_lock);
455131481Sjhb			mi_switch(SW_INVOL, NULL);
456131481Sjhb			mtx_unlock_spin(&sched_lock);
457131481Sjhb		}
458131481Sjhb#endif
45988088Sjhb		td->td_critnest = 0;
46093264Sdillon		cpu_critical_exit();
46193264Sdillon	} else {
46288088Sjhb		td->td_critnest--;
46393264Sdillon	}
46488088Sjhb}
46588088Sjhb
466131481Sjhb/*
467131481Sjhb * This function is called when a thread is about to be put on run queue
468131481Sjhb * because it has been made runnable or its priority has been adjusted.  It
469131481Sjhb * determines if the new thread should be immediately preempted to.  If so,
470131481Sjhb * it switches to it and eventually returns true.  If not, it returns false
471131481Sjhb * so that the caller may place the thread on an appropriate run queue.
472131481Sjhb */
473131481Sjhbint
474131481Sjhbmaybe_preempt(struct thread *td)
475131481Sjhb{
476131481Sjhb	struct thread *ctd;
477131481Sjhb	int cpri, pri;
47899072Sjulian
479131481Sjhb	mtx_assert(&sched_lock, MA_OWNED);
480131481Sjhb#ifdef PREEMPTION
481131481Sjhb	/*
482131481Sjhb	 * The new thread should not preempt the current thread if any of the
483131481Sjhb	 * following conditions are true:
484131481Sjhb	 *
485131481Sjhb	 *  - The current thread has a higher (numerically lower) priority.
486131481Sjhb	 *  - It is too early in the boot for context switches (cold is set).
487131481Sjhb	 *  - The current thread has an inhibitor set or is in the process of
488131481Sjhb	 *    exiting.  In this case, the current thread is about to switch
489131481Sjhb	 *    out anyways, so there's no point in preempting.  If we did,
490131481Sjhb	 *    the current thread would not be properly resumed as well, so
491131481Sjhb	 *    just avoid that whole landmine.
492131481Sjhb	 *  - If the new thread's priority is not a realtime priority and
493131481Sjhb	 *    the current thread's priority is not an idle priority and
494131481Sjhb	 *    FULL_PREEMPTION is disabled.
495131481Sjhb	 *
496131481Sjhb	 * If all of these conditions are false, but the current thread is in
497131481Sjhb	 * a nested critical section, then we have to defer the preemption
498131481Sjhb	 * until we exit the critical section.  Otherwise, switch immediately
499131481Sjhb	 * to the new thread.
500131481Sjhb	 */
501131481Sjhb	ctd = curthread;
502131481Sjhb	pri = td->td_priority;
503131481Sjhb	cpri = ctd->td_priority;
504131481Sjhb	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
505131481Sjhb	    td->td_kse->ke_state != KES_THREAD)
506131481Sjhb		return (0);
507131481Sjhb#ifndef FULL_PREEMPTION
508131481Sjhb	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
509131481Sjhb	    !(cpri >= PRI_MIN_IDLE))
510131481Sjhb		return (0);
511131481Sjhb#endif
512131481Sjhb	if (ctd->td_critnest > 1) {
513131481Sjhb		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
514131481Sjhb		    ctd->td_critnest);
515131481Sjhb		ctd->td_flags |= TDF_OWEPREEMPT;
516131481Sjhb		return (0);
517131481Sjhb	}
518131481Sjhb
519131481Sjhb	/*
520131481Sjhb	 * Our thread state says that we are already on a run queue, so
521131481Sjhb	 * update our state as if we had been dequeued by choosethread().
522131481Sjhb	 */
523131481Sjhb	MPASS(TD_ON_RUNQ(td));
524131481Sjhb	TD_SET_RUNNING(td);
525131481Sjhb	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
526131481Sjhb	    td->td_proc->p_pid, td->td_proc->p_comm);
527131481Sjhb	mi_switch(SW_INVOL, td);
528131481Sjhb	return (1);
529131481Sjhb#else
530131481Sjhb	return (0);
531131481Sjhb#endif
532131481Sjhb}
533131481Sjhb
534131481Sjhb#ifndef PREEMPTION
535131481Sjhb/* XXX: There should be a non-static version of this. */
536131481Sjhbstatic void
537131481Sjhbprintf_caddr_t(void *data)
538131481Sjhb{
539131481Sjhb	printf("%s", (char *)data);
540131481Sjhb}
541131481Sjhbstatic char preempt_warning[] =
542131481Sjhb    "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
543131481SjhbSYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
544131481Sjhb    preempt_warning)
545131481Sjhb#endif
546131481Sjhb
54799072Sjulian/************************************************************************
54899072Sjulian * SYSTEM RUN QUEUE manipulations and tests				*
54999072Sjulian ************************************************************************/
55072376Sjake/*
55199072Sjulian * Initialize a run structure.
55299072Sjulian */
55399072Sjulianvoid
55499072Sjulianrunq_init(struct runq *rq)
55599072Sjulian{
55699072Sjulian	int i;
55799072Sjulian
55899072Sjulian	bzero(rq, sizeof *rq);
55999072Sjulian	for (i = 0; i < RQ_NQS; i++)
56099072Sjulian		TAILQ_INIT(&rq->rq_queues[i]);
56199072Sjulian}
56299072Sjulian
56399072Sjulian/*
56472376Sjake * Clear the status bit of the queue corresponding to priority level pri,
56572376Sjake * indicating that it is empty.
56672376Sjake */
56772376Sjakestatic __inline void
56872376Sjakerunq_clrbit(struct runq *rq, int pri)
56972376Sjake{
57072376Sjake	struct rqbits *rqb;
57165557Sjasone
57272376Sjake	rqb = &rq->rq_status;
57372376Sjake	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
57472376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
57572376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
57672376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
57772376Sjake	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
57850027Speter}
57950027Speter
58050027Speter/*
58172376Sjake * Find the index of the first non-empty run queue.  This is done by
58272376Sjake * scanning the status bits, a set bit indicates a non-empty queue.
58350027Speter */
58472376Sjakestatic __inline int
58572376Sjakerunq_findbit(struct runq *rq)
58672376Sjake{
58772376Sjake	struct rqbits *rqb;
58872376Sjake	int pri;
58972376Sjake	int i;
59072376Sjake
59172376Sjake	rqb = &rq->rq_status;
59272376Sjake	for (i = 0; i < RQB_LEN; i++)
59372376Sjake		if (rqb->rqb_bits[i]) {
59498469Speter			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
59572376Sjake			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
59672376Sjake			    rqb->rqb_bits[i], i, pri);
59772376Sjake			return (pri);
59872376Sjake		}
59972376Sjake
60072376Sjake	return (-1);
60172376Sjake}
60272376Sjake
60372376Sjake/*
60472376Sjake * Set the status bit of the queue corresponding to priority level pri,
60572376Sjake * indicating that it is non-empty.
60672376Sjake */
60772376Sjakestatic __inline void
60872376Sjakerunq_setbit(struct runq *rq, int pri)
60972376Sjake{
61072376Sjake	struct rqbits *rqb;
61172376Sjake
61272376Sjake	rqb = &rq->rq_status;
61372376Sjake	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
61472376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
61572376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
61672376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
61772376Sjake	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
61872376Sjake}
61972376Sjake
62072376Sjake/*
62199072Sjulian * Add the KSE to the queue specified by its priority, and set the
62272376Sjake * corresponding status bit.
62372376Sjake */
62450027Spetervoid
62583366Sjulianrunq_add(struct runq *rq, struct kse *ke)
62650027Speter{
62772376Sjake	struct rqhead *rqh;
62872376Sjake	int pri;
62950027Speter
63090538Sjulian	pri = ke->ke_thread->td_priority / RQ_PPQ;
63183366Sjulian	ke->ke_rqindex = pri;
63272376Sjake	runq_setbit(rq, pri);
63372376Sjake	rqh = &rq->rq_queues[pri];
63472376Sjake	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
63590538Sjulian	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
63683366Sjulian	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
63750027Speter}
63850027Speter
63950027Speter/*
64072376Sjake * Return true if there are runnable processes of any priority on the run
64172376Sjake * queue, false otherwise.  Has no side effects, does not modify the run
64272376Sjake * queue structure.
64350027Speter */
64472376Sjakeint
64572376Sjakerunq_check(struct runq *rq)
64650027Speter{
64772376Sjake	struct rqbits *rqb;
64872376Sjake	int i;
64972376Sjake
65072376Sjake	rqb = &rq->rq_status;
65172376Sjake	for (i = 0; i < RQB_LEN; i++)
65272376Sjake		if (rqb->rqb_bits[i]) {
65372376Sjake			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
65472376Sjake			    rqb->rqb_bits[i], i);
65572376Sjake			return (1);
65672376Sjake		}
65772376Sjake	CTR0(KTR_RUNQ, "runq_check: empty");
65872376Sjake
65972376Sjake	return (0);
66050027Speter}
66150027Speter
66250027Speter/*
663104964Sjeff * Find the highest priority process on the run queue.
66450027Speter */
66583366Sjulianstruct kse *
66672376Sjakerunq_choose(struct runq *rq)
66750027Speter{
66872376Sjake	struct rqhead *rqh;
66983366Sjulian	struct kse *ke;
67072376Sjake	int pri;
67150027Speter
67265557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
67399072Sjulian	while ((pri = runq_findbit(rq)) != -1) {
67472376Sjake		rqh = &rq->rq_queues[pri];
67583366Sjulian		ke = TAILQ_FIRST(rqh);
67683366Sjulian		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
67799072Sjulian		CTR3(KTR_RUNQ,
67899072Sjulian		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
67983366Sjulian		return (ke);
68050027Speter	}
68172376Sjake	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
68272376Sjake
68399072Sjulian	return (NULL);
68450027Speter}
68572376Sjake
68672376Sjake/*
68799072Sjulian * Remove the KSE from the queue specified by its priority, and clear the
68872376Sjake * corresponding status bit if the queue becomes empty.
68999072Sjulian * Caller must set ke->ke_state afterwards.
69072376Sjake */
69172376Sjakevoid
69283366Sjulianrunq_remove(struct runq *rq, struct kse *ke)
69372376Sjake{
69472376Sjake	struct rqhead *rqh;
69572376Sjake	int pri;
69672376Sjake
697100913Stanimura	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
698100913Stanimura		("runq_remove: process swapped out"));
69983366Sjulian	pri = ke->ke_rqindex;
70072376Sjake	rqh = &rq->rq_queues[pri];
70172376Sjake	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
70290538Sjulian	    ke, ke->ke_thread->td_priority, pri, rqh);
70383366Sjulian	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
70483366Sjulian	TAILQ_REMOVE(rqh, ke, ke_procq);
70572376Sjake	if (TAILQ_EMPTY(rqh)) {
70672376Sjake		CTR0(KTR_RUNQ, "runq_remove: empty");
70772376Sjake		runq_clrbit(rq, pri);
70872376Sjake	}
70972376Sjake}
71099072Sjulian
711104695Sjulian#if 0
71299072Sjulianvoid
713104695Sjulianpanc(char *string1, char *string2)
71499072Sjulian{
715104695Sjulian	printf("%s", string1);
716104695Sjulian	Debugger(string2);
717104695Sjulian}
718104695Sjulian
719104695Sjulianvoid
720104695Sjulianthread_sanity_check(struct thread *td, char *string)
721104695Sjulian{
72299072Sjulian	struct proc *p;
72399072Sjulian	struct ksegrp *kg;
72499072Sjulian	struct kse *ke;
725104695Sjulian	struct thread *td2 = NULL;
72699072Sjulian	unsigned int prevpri;
727104695Sjulian	int	saw_lastassigned = 0;
728104695Sjulian	int unassigned = 0;
729104695Sjulian	int assigned = 0;
73099072Sjulian
73199072Sjulian	p = td->td_proc;
73299072Sjulian	kg = td->td_ksegrp;
73399072Sjulian	ke = td->td_kse;
73499072Sjulian
73599072Sjulian
73699072Sjulian	if (ke) {
737103367Sjulian		if (p != ke->ke_proc) {
738104695Sjulian			panc(string, "wrong proc");
73999072Sjulian		}
74099072Sjulian		if (ke->ke_thread != td) {
741104695Sjulian			panc(string, "wrong thread");
74299072Sjulian		}
74399072Sjulian	}
74499072Sjulian
745116361Sdavidxu	if ((p->p_flag & P_SA) == 0) {
74699072Sjulian		if (ke == NULL) {
747104695Sjulian			panc(string, "non KSE thread lost kse");
74899072Sjulian		}
74999072Sjulian	} else {
75099072Sjulian		prevpri = 0;
75199072Sjulian		saw_lastassigned = 0;
75299072Sjulian		unassigned = 0;
75399072Sjulian		assigned = 0;
75499072Sjulian		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
75599072Sjulian			if (td2->td_priority < prevpri) {
756104695Sjulian				panc(string, "thread runqueue unosorted");
75799072Sjulian			}
758104695Sjulian			if ((td2->td_state == TDS_RUNQ) &&
759104695Sjulian			    td2->td_kse &&
760104695Sjulian			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
761104695Sjulian				panc(string, "KSE wrong state");
762104695Sjulian			}
76399072Sjulian			prevpri = td2->td_priority;
76499072Sjulian			if (td2->td_kse) {
76599072Sjulian				assigned++;
76699072Sjulian				if (unassigned) {
767104695Sjulian					panc(string, "unassigned before assigned");
76899072Sjulian				}
76999072Sjulian 				if  (kg->kg_last_assigned == NULL) {
770104695Sjulian					panc(string, "lastassigned corrupt");
77199072Sjulian				}
77299072Sjulian				if (saw_lastassigned) {
773104695Sjulian					panc(string, "last assigned not last");
77499072Sjulian				}
77599072Sjulian				if (td2->td_kse->ke_thread != td2) {
776104695Sjulian					panc(string, "mismatched kse/thread");
77799072Sjulian				}
77899072Sjulian			} else {
77999072Sjulian				unassigned++;
78099072Sjulian			}
78199072Sjulian			if (td2 == kg->kg_last_assigned) {
78299072Sjulian				saw_lastassigned = 1;
78399072Sjulian				if (td2->td_kse == NULL) {
784104695Sjulian					panc(string, "last assigned not assigned");
78599072Sjulian				}
78699072Sjulian			}
78799072Sjulian		}
78899072Sjulian		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
789104695Sjulian			panc(string, "where on earth does lastassigned point?");
79099072Sjulian		}
791111028Sjeff#if 0
79299072Sjulian		FOREACH_THREAD_IN_GROUP(kg, td2) {
79399072Sjulian			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
794103216Sjulian			    (TD_ON_RUNQ(td2))) {
79599072Sjulian				assigned++;
79699072Sjulian				if (td2->td_kse == NULL) {
797104695Sjulian					panc(string, "BOUND thread with no KSE");
79899072Sjulian				}
79999072Sjulian			}
80099072Sjulian		}
801111028Sjeff#endif
80299072Sjulian#if 0
80399072Sjulian		if ((unassigned + assigned) != kg->kg_runnable) {
804104695Sjulian			panc(string, "wrong number in runnable");
80599072Sjulian		}
80699072Sjulian#endif
80799072Sjulian	}
808104695Sjulian	if (assigned == 12345) {
809104695Sjulian		printf("%p %p %p %p %p %d, %d",
810104695Sjulian		    td, td2, ke, kg, p, assigned, saw_lastassigned);
811104695Sjulian	}
81299072Sjulian}
81399834Sjulian#endif
81499072Sjulian
815