kern_switch.c revision 134591
11556Srgrimes/* 21556Srgrimes * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 31556Srgrimes * All rights reserved. 41556Srgrimes * 51556Srgrimes * Redistribution and use in source and binary forms, with or without 61556Srgrimes * modification, are permitted provided that the following conditions 71556Srgrimes * are met: 81556Srgrimes * 1. Redistributions of source code must retain the above copyright 91556Srgrimes * notice, this list of conditions and the following disclaimer. 101556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111556Srgrimes * notice, this list of conditions and the following disclaimer in the 121556Srgrimes * documentation and/or other materials provided with the distribution. 131556Srgrimes * 141556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 151556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 161556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 171556Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 181556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 191556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 201556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 211556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 221556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 231556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 241556Srgrimes * SUCH DAMAGE. 251556Srgrimes */ 261556Srgrimes 271556Srgrimes/*** 281556SrgrimesHere is the logic.. 291556Srgrimes 301556SrgrimesIf there are N processors, then there are at most N KSEs (kernel 311556Srgrimesschedulable entities) working to process threads that belong to a 321556SrgrimesKSEGROUP (kg). If there are X of these KSEs actually running at the 331556Srgrimesmoment in question, then there are at most M (N-X) of these KSEs on 3436150Scharnierthe run queue, as running KSEs are not on the queue. 3536150Scharnier 3636150ScharnierRunnable threads are queued off the KSEGROUP in priority order. 371556SrgrimesIf there are M or more threads runnable, the top M threads 3899110Sobrien(by priority) are 'preassigned' to the M KSEs not running. The KSEs take 3999110Sobrientheir priority from those threads and are put on the run queue. 401556Srgrimes 4117987SpeterThe last thread that had a priority high enough to have a KSE associated 4217987Speterwith it, AND IS ON THE RUN QUEUE is pointed to by 4317987Speterkg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 4417987Speterassigned as all the available KSEs are activly running, or because there 4517987Speterare no threads queued, that pointer is NULL. 4617987Speter 4717987SpeterWhen a KSE is removed from the run queue to become runnable, we know 481556Srgrimesit was associated with the highest priority thread in the queue (at the head 491556Srgrimesof the queue). If it is also the last assigned we know M was 1 and must 501556Srgrimesnow be 0. Since the thread is no longer queued that pointer must be 511556Srgrimesremoved from it. Since we know there were no more KSEs available, 521556Srgrimes(M was 1 and is now 0) and since we are not FREEING our KSE 5317987Speterbut using it, we know there are STILL no more KSEs available, we can prove 541556Srgrimesthat the next thread in the ksegrp list will not have a KSE to assign to 551556Srgrimesit, so we can show that the pointer must be made 'invalid' (NULL). 561556Srgrimes 571556SrgrimesThe pointer exists so that when a new thread is made runnable, it can 581556Srgrimeshave its priority compared with the last assigned thread to see if 591556Srgrimesit should 'steal' its KSE or not.. i.e. is it 'earlier' 601556Srgrimeson the list than that thread or later.. If it's earlier, then the KSE is 611556Srgrimesremoved from the last assigned (which is now not assigned a KSE) 621556Srgrimesand reassigned to the new thread, which is placed earlier in the list. 63100588StjrThe pointer is then backed up to the previous thread (which may or may not 641556Srgrimesbe the new thread). 651556Srgrimes 661556SrgrimesWhen a thread sleeps or is removed, the KSE becomes available and if there 671556Srgrimesare queued threads that are not assigned KSEs, the highest priority one of 681556Srgrimesthem is assigned the KSE, which is then placed back on the run queue at 691556Srgrimesthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down 701556Srgrimesto point to it. 711556Srgrimes 7212043SpeterThe following diagram shows 2 KSEs and 3 threads from a single process. 731556Srgrimes 741556Srgrimes RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 751556Srgrimes \ \____ 761556Srgrimes \ \ 771556Srgrimes KSEGROUP---thread--thread--thread (queued in priority order) 781556Srgrimes \ / 791556Srgrimes \_______________/ 801556Srgrimes (last_assigned) 811556Srgrimes 821556SrgrimesThe result of this scheme is that the M available KSEs are always 831556Srgrimesqueued at the priorities they have inherrited from the M highest priority 841556Srgrimesthreads for that KSEGROUP. If this situation changes, the KSEs are 851556Srgrimesreassigned to keep this true. 8620425Ssteve***/ 8720425Ssteve 881556Srgrimes#include <sys/cdefs.h> 891556Srgrimes__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 134591 2004-09-01 06:42:02Z julian $"); 901556Srgrimes 911556Srgrimes#include "opt_full_preemption.h" 921556Srgrimes#include "opt_sched.h" 931556Srgrimes 941556Srgrimes#include <sys/param.h> 951556Srgrimes#include <sys/systm.h> 96201053Sjilles#include <sys/kdb.h> 9712043Speter#include <sys/kernel.h> 981556Srgrimes#include <sys/ktr.h> 991556Srgrimes#include <sys/lock.h> 100230118Sjilles#include <sys/mutex.h> 101213760Sobrien#include <sys/proc.h> 1021556Srgrimes#include <sys/queue.h> 1031556Srgrimes#include <sys/sched.h> 1041556Srgrimes#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 1051556Srgrimes#include <sys/smp.h> 1061556Srgrimes#endif 107213811Sobrien#include <machine/critical.h> 108213811Sobrien#if defined(SMP) && defined(SCHED_4BSD) 109229220Sjilles#include <sys/sysctl.h> 1101556Srgrimes#endif 1111556Srgrimes 1121556Srgrimes 1131556SrgrimesCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 1141556Srgrimes 115201053Sjillesvoid panc(char *string1, char *string2); 116201053Sjilles 1171556Srgrimes#if 0 1181556Srgrimesstatic void runq_readjust(struct runq *rq, struct kse *ke); 1191556Srgrimes#endif 1201556Srgrimes/************************************************************************ 1211556Srgrimes * Functions that manipulate runnability from a thread perspective. * 122194406Sjilles ************************************************************************/ 123218306Sjilles/* 1241556Srgrimes * Select the KSE that will be run next. From that find the thread, and 1251556Srgrimes * remove it from the KSEGRP's run queue. If there is thread clustering, 1261556Srgrimes * this will be what does it. 1271556Srgrimes */ 1281556Srgrimesstruct thread * 1291556Srgrimeschoosethread(void) 1301556Srgrimes{ 1311556Srgrimes struct kse *ke; 1321556Srgrimes struct thread *td; 13390111Simp struct ksegrp *kg; 13417987Speter 13525225Ssteve#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 1361556Srgrimes if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 1371556Srgrimes /* Shutting down, run idlethread on AP's */ 1381556Srgrimes td = PCPU_GET(idlethread); 1391556Srgrimes ke = td->td_kse; 1401556Srgrimes CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 1411556Srgrimes ke->ke_flags |= KEF_DIDRUN; 1421556Srgrimes TD_SET_RUNNING(td); 1431556Srgrimes return (td); 1441556Srgrimes } 1451556Srgrimes#endif 1461556Srgrimes 1471556Srgrimesretry: 1481556Srgrimes ke = sched_choose(); 1491556Srgrimes if (ke) { 1501556Srgrimes td = ke->ke_thread; 1511556Srgrimes KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 1521556Srgrimes kg = ke->ke_ksegrp; 1531556Srgrimes if (td->td_proc->p_flag & P_SA) { 1541556Srgrimes if (kg->kg_last_assigned == td) { 1551556Srgrimes kg->kg_last_assigned = TAILQ_PREV(td, 1561556Srgrimes threadqueue, td_runq); 1571556Srgrimes } 1581556Srgrimes TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 1591556Srgrimes kg->kg_runnable--; 1601556Srgrimes } 1611556Srgrimes CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 16290111Simp td, td->td_priority); 16320425Ssteve } else { 1641556Srgrimes /* Simulate runq_choose() having returned the idle thread */ 1651556Srgrimes td = PCPU_GET(idlethread); 1661556Srgrimes ke = td->td_kse; 16720425Ssteve CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 168213811Sobrien } 16990111Simp ke->ke_flags |= KEF_DIDRUN; 17012043Speter 17112043Speter /* 17220425Ssteve * If we are in panic, only allow system threads, 1731556Srgrimes * plus the one we are running in, to be run. 174100588Stjr */ 175100588Stjr if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 176100588Stjr (td->td_flags & TDF_INPANIC) == 0)) { 177100588Stjr /* note that it is no longer on the run queue */ 178100588Stjr TD_SET_CAN_RUN(td); 179100588Stjr goto retry; 18012043Speter } 18125225Ssteve 18212043Speter TD_SET_RUNNING(td); 183158143Sstefanf return (td); 184158143Sstefanf} 18512043Speter 18612043Speter/* 187158143Sstefanf * Given a surplus KSE, either assign a new runable thread to it 188158143Sstefanf * (and put it in the run queue) or put it in the ksegrp's idle KSE list. 189238377Spfg * Assumes that the original thread is not runnable. 19012043Speter */ 191158143Sstefanfvoid 192230118Sjilleskse_reassign(struct kse *ke) 193230118Sjilles{ 194158143Sstefanf struct ksegrp *kg; 195158143Sstefanf struct thread *td; 196158143Sstefanf struct thread *original; 197158143Sstefanf 198158143Sstefanf mtx_assert(&sched_lock, MA_OWNED); 199158143Sstefanf original = ke->ke_thread; 20012043Speter KASSERT(original == NULL || TD_IS_INHIBITED(original), 20125225Ssteve ("reassigning KSE with runnable thread")); 20225225Ssteve kg = ke->ke_ksegrp; 203230118Sjilles if (original) 20412043Speter original->td_kse = NULL; 20512043Speter 20612043Speter /* 20712043Speter * Find the first unassigned thread 20812043Speter */ 20912043Speter if ((td = kg->kg_last_assigned) != NULL) 21012043Speter td = TAILQ_NEXT(td, td_runq); 21112043Speter else 21212043Speter td = TAILQ_FIRST(&kg->kg_runq); 21312043Speter 214199629Sjilles /* 21512043Speter * If we found one, assign it the kse, otherwise idle the kse. 21612043Speter */ 21712043Speter if (td) { 21812043Speter kg->kg_last_assigned = td; 21912043Speter td->td_kse = ke; 22020425Ssteve ke->ke_thread = td; 22112043Speter CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td); 22212043Speter sched_add(td, SRQ_BORING); 22312043Speter return; 22412043Speter } 2251556Srgrimes 2261556Srgrimes ke->ke_state = KES_IDLE; 2271556Srgrimes ke->ke_thread = NULL; 2281556Srgrimes TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist); 2291556Srgrimes kg->kg_idle_kses++; 2301556Srgrimes CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke); 23120425Ssteve return; 23220425Ssteve} 2331556Srgrimes 2341556Srgrimes#if 0 2351556Srgrimes/* 23690111Simp * Remove a thread from its KSEGRP's run queue. 23720425Ssteve * This in turn may remove it from a KSE if it was already assigned 23812043Speter * to one, possibly causing a new thread to be assigned to the KSE 23912043Speter * and the KSE getting a new priority. 24012043Speter */ 24112043Speterstatic void 2421556Srgrimesremrunqueue(struct thread *td) 2431556Srgrimes{ 2441556Srgrimes struct thread *td2, *td3; 2451556Srgrimes struct ksegrp *kg; 2461556Srgrimes struct kse *ke; 2471556Srgrimes 2481556Srgrimes mtx_assert(&sched_lock, MA_OWNED); 2491556Srgrimes KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 2501556Srgrimes kg = td->td_ksegrp; 2511556Srgrimes ke = td->td_kse; 2521556Srgrimes CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 25312043Speter TD_SET_CAN_RUN(td); 25412043Speter /* 25525225Ssteve * If it is not a threaded process, take the shortcut. 25612043Speter */ 25712043Speter if ((td->td_proc->p_flag & P_SA) == 0) { 2581556Srgrimes /* Bring its kse with it, leave the thread attached */ 2591556Srgrimes sched_rem(td); 2601556Srgrimes ke->ke_state = KES_THREAD; 26112043Speter return; 26212043Speter } 2631556Srgrimes td3 = TAILQ_PREV(td, threadqueue, td_runq); 2641556Srgrimes TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 26512043Speter kg->kg_runnable--; 26612043Speter if (ke) { 26712043Speter /* 26812043Speter * This thread has been assigned to a KSE. 26912043Speter * We need to dissociate it and try assign the 27012043Speter * KSE to the next available thread. Then, we should 27112043Speter * see if we need to move the KSE in the run queues. 27212043Speter */ 2731556Srgrimes sched_rem(td); 27412043Speter ke->ke_state = KES_THREAD; 27512043Speter td2 = kg->kg_last_assigned; 27612043Speter KASSERT((td2 != NULL), ("last assigned has wrong value")); 27712043Speter if (td2 == td) 27812043Speter kg->kg_last_assigned = td3; 27912043Speter kse_reassign(ke); 28012043Speter } 2811556Srgrimes} 28212043Speter#endif 2831556Srgrimes 28412043Speter/* 28512043Speter * Change the priority of a thread that is on the run queue. 28612043Speter */ 28712043Spetervoid 28812043Speteradjustrunqueue( struct thread *td, int newpri) 28912043Speter{ 29012043Speter struct ksegrp *kg; 29112043Speter struct kse *ke; 29212043Speter 29312043Speter mtx_assert(&sched_lock, MA_OWNED); 2941556Srgrimes KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 29512043Speter 29612043Speter ke = td->td_kse; 2971556Srgrimes CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 2981556Srgrimes /* 29918018Speter * If it is not a threaded process, take the shortcut. 3001556Srgrimes */ 30184261Sobrien if ((td->td_proc->p_flag & P_SA) == 0) { 3021556Srgrimes /* We only care about the kse in the run queue. */ 30384261Sobrien td->td_priority = newpri; 30484261Sobrien if (ke->ke_rqindex != (newpri / RQ_PPQ)) { 3051556Srgrimes sched_rem(td); 3061556Srgrimes sched_add(td, SRQ_BORING); 30718018Speter } 30812043Speter return; 3091556Srgrimes } 31012043Speter 3111556Srgrimes /* It is a threaded process */ 3121556Srgrimes kg = td->td_ksegrp; 31312043Speter TD_SET_CAN_RUN(td); 31412043Speter if (ke) { 31512043Speter if (kg->kg_last_assigned == td) { 3161556Srgrimes kg->kg_last_assigned = 3171556Srgrimes TAILQ_PREV(td, threadqueue, td_runq); 3181556Srgrimes } 3191556Srgrimes sched_rem(td); 320194128Sjilles } 321194128Sjilles TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 322194128Sjilles kg->kg_runnable--; 323194128Sjilles td->td_priority = newpri; 324194128Sjilles setrunqueue(td, SRQ_BORING); 325194128Sjilles} 326194128Sjilles 327194128Sjillesvoid 328194128Sjillessetrunqueue(struct thread *td, int flags) 329194128Sjilles{ 330194128Sjilles struct kse *ke; 331194128Sjilles struct ksegrp *kg; 332194128Sjilles struct thread *td2; 333194128Sjilles struct thread *tda; 334194128Sjilles int count; 335194128Sjilles 336194128Sjilles CTR4(KTR_RUNQ, "setrunqueue: td:%p ke:%p kg:%p pid:%d", 3371556Srgrimes td, td->td_kse, td->td_ksegrp, td->td_proc->p_pid); 3381556Srgrimes mtx_assert(&sched_lock, MA_OWNED); 3391556Srgrimes KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 3401556Srgrimes ("setrunqueue: bad thread state")); 3411556Srgrimes TD_SET_RUNQ(td); 34290111Simp kg = td->td_ksegrp; 34390111Simp if ((td->td_proc->p_flag & P_SA) == 0) { 3441556Srgrimes /* 3451556Srgrimes * Common path optimisation: Only one of everything 3461556Srgrimes * and the KSE is always already attached. 3471556Srgrimes * Totally ignore the ksegrp run queue. 3481556Srgrimes */ 3491556Srgrimes sched_add(td, flags); 3501556Srgrimes return; 3511556Srgrimes } 3521556Srgrimes 353242895Sjilles tda = kg->kg_last_assigned; 35490111Simp if ((ke = td->td_kse) == NULL) { 3551556Srgrimes if (kg->kg_idle_kses) { 3561556Srgrimes /* 3571556Srgrimes * There is a free one so it's ours for the asking.. 358199629Sjilles */ 3591556Srgrimes ke = TAILQ_FIRST(&kg->kg_iq); 3601556Srgrimes CTR2(KTR_RUNQ, "setrunqueue: kg:%p: Use free ke:%p", 3611556Srgrimes kg, ke); 3621556Srgrimes TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 3631556Srgrimes ke->ke_state = KES_THREAD; 3641556Srgrimes kg->kg_idle_kses--; 3651556Srgrimes } else if (tda && (tda->td_priority > td->td_priority)) { 3661556Srgrimes /* 36712043Speter * None free, but there is one we can commandeer. 368242895Sjilles */ 3691556Srgrimes ke = tda->td_kse; 370242895Sjilles CTR3(KTR_RUNQ, 3711556Srgrimes "setrunqueue: kg:%p: take ke:%p from td: %p", 3721556Srgrimes kg, ke, tda); 3731556Srgrimes sched_rem(tda); 3741556Srgrimes tda->td_kse = NULL; 3751556Srgrimes ke->ke_thread = NULL; 376229220Sjilles tda = kg->kg_last_assigned = 37790111Simp TAILQ_PREV(tda, threadqueue, td_runq); 3781556Srgrimes } 3791556Srgrimes } else { 3801556Srgrimes /* 3811556Srgrimes * Temporarily disassociate so it looks like the other cases. 3821556Srgrimes */ 3831556Srgrimes ke->ke_thread = NULL; 38412043Speter td->td_kse = NULL; 385199629Sjilles } 3861556Srgrimes 3871556Srgrimes /* 3881556Srgrimes * Add the thread to the ksegrp's run queue at 3891556Srgrimes * the appropriate place. 3901556Srgrimes */ 3911556Srgrimes count = 0; 3921556Srgrimes TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 3931556Srgrimes if (td2->td_priority > td->td_priority) { 3941556Srgrimes kg->kg_runnable++; 3951556Srgrimes TAILQ_INSERT_BEFORE(td2, td, td_runq); 3961556Srgrimes break; 3971556Srgrimes } 3981556Srgrimes /* XXX Debugging hack */ 3991556Srgrimes if (++count > 10000) { 400200956Sjilles printf("setrunqueue(): corrupt kq_runq, td= %p\n", td); 40117987Speter panic("deadlock in setrunqueue"); 4021556Srgrimes } 4031556Srgrimes } 4041556Srgrimes if (td2 == NULL) { 4051556Srgrimes /* We ran off the end of the TAILQ or it was empty. */ 4061556Srgrimes kg->kg_runnable++; 407222684Sjilles TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 4081556Srgrimes } 409124780Sdes 4101556Srgrimes /* 4111556Srgrimes * If we have a ke to use, then put it on the run queue and 4121556Srgrimes * If needed, readjust the last_assigned pointer. 4131556Srgrimes */ 4141556Srgrimes if (ke) { 4151556Srgrimes if (tda == NULL) { 4161556Srgrimes /* 4171556Srgrimes * No pre-existing last assigned so whoever is first 4181556Srgrimes * gets the KSE we brought in.. (maybe us) 4191556Srgrimes */ 4201556Srgrimes td2 = TAILQ_FIRST(&kg->kg_runq); 4211556Srgrimes KASSERT((td2->td_kse == NULL), 4221556Srgrimes ("unexpected ke present")); 4231556Srgrimes td2->td_kse = ke; 4241556Srgrimes ke->ke_thread = td2; 4251556Srgrimes kg->kg_last_assigned = td2; 42690111Simp } else if (tda->td_priority > td->td_priority) { 42717987Speter /* 42825225Ssteve * It's ours, grab it, but last_assigned is past us 4291556Srgrimes * so don't change it. 4301556Srgrimes */ 431230118Sjilles td->td_kse = ke; 4321556Srgrimes ke->ke_thread = td; 4331556Srgrimes } else { 4341556Srgrimes /* 4351556Srgrimes * We are past last_assigned, so 4361556Srgrimes * put the new kse on whatever is next, 437230118Sjilles * which may or may not be us. 43812043Speter */ 4391556Srgrimes td2 = TAILQ_NEXT(tda, td_runq); 4401556Srgrimes kg->kg_last_assigned = td2; 4411556Srgrimes td2->td_kse = ke; 4421556Srgrimes ke->ke_thread = td2; 4431556Srgrimes } 4441556Srgrimes sched_add(ke->ke_thread, flags); 4451556Srgrimes } else { 4461556Srgrimes CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", 4471556Srgrimes td, td->td_ksegrp, td->td_proc->p_pid); 44890111Simp } 44990111Simp} 4501556Srgrimes 4511556Srgrimes/* 4521556Srgrimes * Kernel thread preemption implementation. Critical sections mark 4531556Srgrimes * regions of code in which preemptions are not allowed. 45412043Speter */ 4551556Srgrimesvoid 4561556Srgrimescritical_enter(void) 4571556Srgrimes{ 4581556Srgrimes struct thread *td; 4591556Srgrimes 4601556Srgrimes td = curthread; 4611556Srgrimes if (td->td_critnest == 0) 4621556Srgrimes cpu_critical_enter(td); 4631556Srgrimes td->td_critnest++; 4641556Srgrimes} 4651556Srgrimes 4661556Srgrimesvoid 467213811Sobriencritical_exit(void) 46890111Simp{ 46990111Simp struct thread *td; 4701556Srgrimes 4711556Srgrimes td = curthread; 4721556Srgrimes KASSERT(td->td_critnest != 0, 47312043Speter ("critical_exit: td_critnest == 0")); 4741556Srgrimes if (td->td_critnest == 1) { 4751556Srgrimes#ifdef PREEMPTION 4761556Srgrimes mtx_assert(&sched_lock, MA_NOTOWNED); 4771556Srgrimes if (td->td_pflags & TDP_OWEPREEMPT) { 4781556Srgrimes mtx_lock_spin(&sched_lock); 4791556Srgrimes mi_switch(SW_INVOL, NULL); 4801556Srgrimes mtx_unlock_spin(&sched_lock); 4811556Srgrimes } 4821556Srgrimes#endif 4831556Srgrimes td->td_critnest = 0; 4841556Srgrimes cpu_critical_exit(td); 4851556Srgrimes } else { 48690111Simp td->td_critnest--; 48790111Simp } 4881556Srgrimes} 4891556Srgrimes 4901556Srgrimes/* 4911556Srgrimes * This function is called when a thread is about to be put on run queue 4921556Srgrimes * because it has been made runnable or its priority has been adjusted. It 4931556Srgrimes * determines if the new thread should be immediately preempted to. If so, 4941556Srgrimes * it switches to it and eventually returns true. If not, it returns false 4951556Srgrimes * so that the caller may place the thread on an appropriate run queue. 4961556Srgrimes */ 4971556Srgrimesint 4981556Srgrimesmaybe_preempt(struct thread *td) 4991556Srgrimes{ 50012043Speter#ifdef PREEMPTION 5011556Srgrimes struct thread *ctd; 5021556Srgrimes int cpri, pri; 5031556Srgrimes#endif 5041556Srgrimes 5051556Srgrimes mtx_assert(&sched_lock, MA_OWNED); 5061556Srgrimes#ifdef PREEMPTION 5071556Srgrimes /* 508199647Sjilles * The new thread should not preempt the current thread if any of the 509199647Sjilles * following conditions are true: 510199647Sjilles * 511199647Sjilles * - The current thread has a higher (numerically lower) or 512199647Sjilles * equivalent priority. Note that this prevents curthread from 513199647Sjilles * trying to preempt to itself. 514199647Sjilles * - It is too early in the boot for context switches (cold is set). 515199647Sjilles * - The current thread has an inhibitor set or is in the process of 516199647Sjilles * exiting. In this case, the current thread is about to switch 517199647Sjilles * out anyways, so there's no point in preempting. If we did, 518199647Sjilles * the current thread would not be properly resumed as well, so 519199647Sjilles * just avoid that whole landmine. 520199647Sjilles * - If the new thread's priority is not a realtime priority and 521199647Sjilles * the current thread's priority is not an idle priority and 522199647Sjilles * FULL_PREEMPTION is disabled. 523199647Sjilles * 524199647Sjilles * If all of these conditions are false, but the current thread is in 525199647Sjilles * a nested critical section, then we have to defer the preemption 526199647Sjilles * until we exit the critical section. Otherwise, switch immediately 527199647Sjilles * to the new thread. 528199647Sjilles */ 529199647Sjilles ctd = curthread; 530199647Sjilles if (ctd->td_kse == NULL || ctd->td_kse->ke_thread != ctd) 531199647Sjilles return (0); 532199647Sjilles pri = td->td_priority; 533199647Sjilles cpri = ctd->td_priority; 5341556Srgrimes if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) || 5351556Srgrimes td->td_kse->ke_state != KES_THREAD) 5361556Srgrimes return (0); 5371556Srgrimes#ifndef FULL_PREEMPTION 53890111Simp if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) && 53990111Simp !(cpri >= PRI_MIN_IDLE)) 5401556Srgrimes return (0); 5411556Srgrimes#endif 5421556Srgrimes if (ctd->td_critnest > 1) { 5431556Srgrimes CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 5441556Srgrimes ctd->td_critnest); 5451556Srgrimes ctd->td_pflags |= TDP_OWEPREEMPT; 5461556Srgrimes return (0); 5471556Srgrimes } 5481556Srgrimes 5491556Srgrimes /* 5501556Srgrimes * Our thread state says that we are already on a run queue, so 5511556Srgrimes * update our state as if we had been dequeued by choosethread(). 55290111Simp */ 55390111Simp MPASS(TD_ON_RUNQ(td)); 5541556Srgrimes TD_SET_RUNNING(td); 5551556Srgrimes CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 5561556Srgrimes td->td_proc->p_pid, td->td_proc->p_comm); 5571556Srgrimes mi_switch(SW_INVOL, td); 5581556Srgrimes return (1); 5591556Srgrimes#else 560 return (0); 561#endif 562} 563 564#if 0 565#ifndef PREEMPTION 566/* XXX: There should be a non-static version of this. */ 567static void 568printf_caddr_t(void *data) 569{ 570 printf("%s", (char *)data); 571} 572static char preempt_warning[] = 573 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 574SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 575 preempt_warning) 576#endif 577#endif 578 579/************************************************************************ 580 * SYSTEM RUN QUEUE manipulations and tests * 581 ************************************************************************/ 582/* 583 * Initialize a run structure. 584 */ 585void 586runq_init(struct runq *rq) 587{ 588 int i; 589 590 bzero(rq, sizeof *rq); 591 for (i = 0; i < RQ_NQS; i++) 592 TAILQ_INIT(&rq->rq_queues[i]); 593} 594 595/* 596 * Clear the status bit of the queue corresponding to priority level pri, 597 * indicating that it is empty. 598 */ 599static __inline void 600runq_clrbit(struct runq *rq, int pri) 601{ 602 struct rqbits *rqb; 603 604 rqb = &rq->rq_status; 605 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 606 rqb->rqb_bits[RQB_WORD(pri)], 607 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 608 RQB_BIT(pri), RQB_WORD(pri)); 609 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 610} 611 612/* 613 * Find the index of the first non-empty run queue. This is done by 614 * scanning the status bits, a set bit indicates a non-empty queue. 615 */ 616static __inline int 617runq_findbit(struct runq *rq) 618{ 619 struct rqbits *rqb; 620 int pri; 621 int i; 622 623 rqb = &rq->rq_status; 624 for (i = 0; i < RQB_LEN; i++) 625 if (rqb->rqb_bits[i]) { 626 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 627 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 628 rqb->rqb_bits[i], i, pri); 629 return (pri); 630 } 631 632 return (-1); 633} 634 635/* 636 * Set the status bit of the queue corresponding to priority level pri, 637 * indicating that it is non-empty. 638 */ 639static __inline void 640runq_setbit(struct runq *rq, int pri) 641{ 642 struct rqbits *rqb; 643 644 rqb = &rq->rq_status; 645 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 646 rqb->rqb_bits[RQB_WORD(pri)], 647 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 648 RQB_BIT(pri), RQB_WORD(pri)); 649 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 650} 651 652/* 653 * Add the KSE to the queue specified by its priority, and set the 654 * corresponding status bit. 655 */ 656void 657runq_add(struct runq *rq, struct kse *ke) 658{ 659 struct rqhead *rqh; 660 int pri; 661 662 pri = ke->ke_thread->td_priority / RQ_PPQ; 663 ke->ke_rqindex = pri; 664 runq_setbit(rq, pri); 665 rqh = &rq->rq_queues[pri]; 666 CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", 667 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 668 TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 669} 670 671/* 672 * Return true if there are runnable processes of any priority on the run 673 * queue, false otherwise. Has no side effects, does not modify the run 674 * queue structure. 675 */ 676int 677runq_check(struct runq *rq) 678{ 679 struct rqbits *rqb; 680 int i; 681 682 rqb = &rq->rq_status; 683 for (i = 0; i < RQB_LEN; i++) 684 if (rqb->rqb_bits[i]) { 685 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 686 rqb->rqb_bits[i], i); 687 return (1); 688 } 689 CTR0(KTR_RUNQ, "runq_check: empty"); 690 691 return (0); 692} 693 694#if defined(SMP) && defined(SCHED_4BSD) 695int runq_fuzz = 1; 696SYSCTL_DECL(_kern_sched); 697SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 698#endif 699 700/* 701 * Find the highest priority process on the run queue. 702 */ 703struct kse * 704runq_choose(struct runq *rq) 705{ 706 struct rqhead *rqh; 707 struct kse *ke; 708 int pri; 709 710 mtx_assert(&sched_lock, MA_OWNED); 711 while ((pri = runq_findbit(rq)) != -1) { 712 rqh = &rq->rq_queues[pri]; 713#if defined(SMP) && defined(SCHED_4BSD) 714 /* fuzz == 1 is normal.. 0 or less are ignored */ 715 if (runq_fuzz > 1) { 716 /* 717 * In the first couple of entries, check if 718 * there is one for our CPU as a preference. 719 */ 720 int count = runq_fuzz; 721 int cpu = PCPU_GET(cpuid); 722 struct kse *ke2; 723 ke2 = ke = TAILQ_FIRST(rqh); 724 725 while (count-- && ke2) { 726 if (ke->ke_thread->td_lastcpu == cpu) { 727 ke = ke2; 728 break; 729 } 730 ke2 = TAILQ_NEXT(ke2, ke_procq); 731 } 732 } else 733#endif 734 ke = TAILQ_FIRST(rqh); 735 KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 736 CTR3(KTR_RUNQ, 737 "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 738 return (ke); 739 } 740 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 741 742 return (NULL); 743} 744 745/* 746 * Remove the KSE from the queue specified by its priority, and clear the 747 * corresponding status bit if the queue becomes empty. 748 * Caller must set ke->ke_state afterwards. 749 */ 750void 751runq_remove(struct runq *rq, struct kse *ke) 752{ 753 struct rqhead *rqh; 754 int pri; 755 756 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 757 ("runq_remove: process swapped out")); 758 pri = ke->ke_rqindex; 759 rqh = &rq->rq_queues[pri]; 760 CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", 761 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 762 KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 763 TAILQ_REMOVE(rqh, ke, ke_procq); 764 if (TAILQ_EMPTY(rqh)) { 765 CTR0(KTR_RUNQ, "runq_remove: empty"); 766 runq_clrbit(rq, pri); 767 } 768} 769 770#if 0 771void 772panc(char *string1, char *string2) 773{ 774 printf("%s", string1); 775 kdb_enter(string2); 776} 777 778void 779thread_sanity_check(struct thread *td, char *string) 780{ 781 struct proc *p; 782 struct ksegrp *kg; 783 struct kse *ke; 784 struct thread *td2 = NULL; 785 unsigned int prevpri; 786 int saw_lastassigned = 0; 787 int unassigned = 0; 788 int assigned = 0; 789 790 p = td->td_proc; 791 kg = td->td_ksegrp; 792 ke = td->td_kse; 793 794 795 if (ke) { 796 if (p != ke->ke_proc) { 797 panc(string, "wrong proc"); 798 } 799 if (ke->ke_thread != td) { 800 panc(string, "wrong thread"); 801 } 802 } 803 804 if ((p->p_flag & P_SA) == 0) { 805 if (ke == NULL) { 806 panc(string, "non KSE thread lost kse"); 807 } 808 } else { 809 prevpri = 0; 810 saw_lastassigned = 0; 811 unassigned = 0; 812 assigned = 0; 813 TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 814 if (td2->td_priority < prevpri) { 815 panc(string, "thread runqueue unosorted"); 816 } 817 if ((td2->td_state == TDS_RUNQ) && 818 td2->td_kse && 819 (td2->td_kse->ke_state != KES_ONRUNQ)) { 820 panc(string, "KSE wrong state"); 821 } 822 prevpri = td2->td_priority; 823 if (td2->td_kse) { 824 assigned++; 825 if (unassigned) { 826 panc(string, "unassigned before assigned"); 827 } 828 if (kg->kg_last_assigned == NULL) { 829 panc(string, "lastassigned corrupt"); 830 } 831 if (saw_lastassigned) { 832 panc(string, "last assigned not last"); 833 } 834 if (td2->td_kse->ke_thread != td2) { 835 panc(string, "mismatched kse/thread"); 836 } 837 } else { 838 unassigned++; 839 } 840 if (td2 == kg->kg_last_assigned) { 841 saw_lastassigned = 1; 842 if (td2->td_kse == NULL) { 843 panc(string, "last assigned not assigned"); 844 } 845 } 846 } 847 if (kg->kg_last_assigned && (saw_lastassigned == 0)) { 848 panc(string, "where on earth does lastassigned point?"); 849 } 850#if 0 851 FOREACH_THREAD_IN_GROUP(kg, td2) { 852 if (((td2->td_flags & TDF_UNBOUND) == 0) && 853 (TD_ON_RUNQ(td2))) { 854 assigned++; 855 if (td2->td_kse == NULL) { 856 panc(string, "BOUND thread with no KSE"); 857 } 858 } 859 } 860#endif 861#if 0 862 if ((unassigned + assigned) != kg->kg_runnable) { 863 panc(string, "wrong number in runnable"); 864 } 865#endif 866 } 867 if (assigned == 12345) { 868 printf("%p %p %p %p %p %d, %d", 869 td, td2, ke, kg, p, assigned, saw_lastassigned); 870 } 871} 872#endif 873 874