kern_switch.c revision 131481
150027Speter/* 272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 372376Sjake * All rights reserved. 450027Speter * 550027Speter * Redistribution and use in source and binary forms, with or without 650027Speter * modification, are permitted provided that the following conditions 750027Speter * are met: 850027Speter * 1. Redistributions of source code must retain the above copyright 950027Speter * notice, this list of conditions and the following disclaimer. 1050027Speter * 2. Redistributions in binary form must reproduce the above copyright 1150027Speter * notice, this list of conditions and the following disclaimer in the 1250027Speter * documentation and/or other materials provided with the distribution. 1350027Speter * 1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1750027Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2450027Speter * SUCH DAMAGE. 2550027Speter */ 2650027Speter 2799072Sjulian/*** 2899072SjulianHere is the logic.. 2999072Sjulian 3099072SjulianIf there are N processors, then there are at most N KSEs (kernel 3199072Sjulianschedulable entities) working to process threads that belong to a 32123499SrwatsonKSEGROUP (kg). If there are X of these KSEs actually running at the 3399072Sjulianmoment in question, then there are at most M (N-X) of these KSEs on 3499072Sjulianthe run queue, as running KSEs are not on the queue. 3599072Sjulian 3699072SjulianRunnable threads are queued off the KSEGROUP in priority order. 3799072SjulianIf there are M or more threads runnable, the top M threads 3899072Sjulian(by priority) are 'preassigned' to the M KSEs not running. The KSEs take 3999072Sjuliantheir priority from those threads and are put on the run queue. 4099072Sjulian 4199072SjulianThe last thread that had a priority high enough to have a KSE associated 4299072Sjulianwith it, AND IS ON THE RUN QUEUE is pointed to by 4399072Sjuliankg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 4499072Sjulianassigned as all the available KSEs are activly running, or because there 4599072Sjulianare no threads queued, that pointer is NULL. 4699072Sjulian 4799072SjulianWhen a KSE is removed from the run queue to become runnable, we know 4899072Sjulianit was associated with the highest priority thread in the queue (at the head 4999072Sjulianof the queue). If it is also the last assigned we know M was 1 and must 5099072Sjuliannow be 0. Since the thread is no longer queued that pointer must be 5199072Sjulianremoved from it. Since we know there were no more KSEs available, 5299072Sjulian(M was 1 and is now 0) and since we are not FREEING our KSE 5399072Sjulianbut using it, we know there are STILL no more KSEs available, we can prove 5499072Sjulianthat the next thread in the ksegrp list will not have a KSE to assign to 5599072Sjulianit, so we can show that the pointer must be made 'invalid' (NULL). 5699072Sjulian 5799072SjulianThe pointer exists so that when a new thread is made runnable, it can 5899072Sjulianhave its priority compared with the last assigned thread to see if 5999072Sjulianit should 'steal' its KSE or not.. i.e. is it 'earlier' 6099072Sjulianon the list than that thread or later.. If it's earlier, then the KSE is 6199072Sjulianremoved from the last assigned (which is now not assigned a KSE) 6299072Sjulianand reassigned to the new thread, which is placed earlier in the list. 6399072SjulianThe pointer is then backed up to the previous thread (which may or may not 6499072Sjulianbe the new thread). 6599072Sjulian 6699072SjulianWhen a thread sleeps or is removed, the KSE becomes available and if there 6799072Sjulianare queued threads that are not assigned KSEs, the highest priority one of 6899072Sjulianthem is assigned the KSE, which is then placed back on the run queue at 6999072Sjulianthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down 7099072Sjulianto point to it. 7199072Sjulian 7299072SjulianThe following diagram shows 2 KSEs and 3 threads from a single process. 7399072Sjulian 7499072Sjulian RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 7599072Sjulian \ \____ 7699072Sjulian \ \ 7799072Sjulian KSEGROUP---thread--thread--thread (queued in priority order) 7899072Sjulian \ / 7999072Sjulian \_______________/ 8099072Sjulian (last_assigned) 8199072Sjulian 8299072SjulianThe result of this scheme is that the M available KSEs are always 8399072Sjulianqueued at the priorities they have inherrited from the M highest priority 8499072Sjulianthreads for that KSEGROUP. If this situation changes, the KSEs are 8599072Sjulianreassigned to keep this true. 86116182Sobrien***/ 8799072Sjulian 88116182Sobrien#include <sys/cdefs.h> 89116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 131481 2004-07-02 20:21:44Z jhb $"); 90116182Sobrien 91131481Sjhb#include "opt_full_preemption.h" 92131481Sjhb 9350027Speter#include <sys/param.h> 9450027Speter#include <sys/systm.h> 9550027Speter#include <sys/kernel.h> 9665557Sjasone#include <sys/ktr.h> 9774914Sjhb#include <sys/lock.h> 9867365Sjhb#include <sys/mutex.h> 9950027Speter#include <sys/proc.h> 10050027Speter#include <sys/queue.h> 101104964Sjeff#include <sys/sched.h> 102122849Speter#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 103112993Speter#include <sys/smp.h> 104112993Speter#endif 10593607Sdillon#include <machine/critical.h> 10650027Speter 10797261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 10897261Sjake 109104695Sjulianvoid panc(char *string1, char *string2); 110104695Sjulian 111104695Sjulian#if 0 11299072Sjulianstatic void runq_readjust(struct runq *rq, struct kse *ke); 113104695Sjulian#endif 11499072Sjulian/************************************************************************ 11599072Sjulian * Functions that manipulate runnability from a thread perspective. * 11699072Sjulian ************************************************************************/ 11750027Speter/* 118111028Sjeff * Select the KSE that will be run next. From that find the thread, and 11999072Sjulian * remove it from the KSEGRP's run queue. If there is thread clustering, 12099072Sjulian * this will be what does it. 12150027Speter */ 12283366Sjulianstruct thread * 12383366Sjulianchoosethread(void) 12450027Speter{ 12599072Sjulian struct kse *ke; 12699072Sjulian struct thread *td; 12799072Sjulian struct ksegrp *kg; 12899072Sjulian 129122849Speter#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 130112993Speter if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 131112993Speter /* Shutting down, run idlethread on AP's */ 132112993Speter td = PCPU_GET(idlethread); 133112993Speter ke = td->td_kse; 134112993Speter CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 135112993Speter ke->ke_flags |= KEF_DIDRUN; 136112993Speter TD_SET_RUNNING(td); 137112993Speter return (td); 138112993Speter } 139112993Speter#endif 140112993Speter 141100209Sgallatinretry: 142112993Speter ke = sched_choose(); 143112993Speter if (ke) { 14499072Sjulian td = ke->ke_thread; 14599072Sjulian KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 14699072Sjulian kg = ke->ke_ksegrp; 147116361Sdavidxu if (td->td_proc->p_flag & P_SA) { 148103832Sjulian if (kg->kg_last_assigned == td) { 14999072Sjulian kg->kg_last_assigned = TAILQ_PREV(td, 15099072Sjulian threadqueue, td_runq); 151103832Sjulian } 152112021Sdavidxu TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 15399072Sjulian } 15499072Sjulian kg->kg_runnable--; 15599072Sjulian CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 15699072Sjulian td, td->td_priority); 15799072Sjulian } else { 15899889Sjulian /* Simulate runq_choose() having returned the idle thread */ 15999072Sjulian td = PCPU_GET(idlethread); 160102592Sjulian ke = td->td_kse; 16199072Sjulian CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 16299072Sjulian } 163102592Sjulian ke->ke_flags |= KEF_DIDRUN; 164108338Sjulian 165108338Sjulian /* 166115215Sjulian * If we are in panic, only allow system threads, 167115215Sjulian * plus the one we are running in, to be run. 168108338Sjulian */ 169100209Sgallatin if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 170115215Sjulian (td->td_flags & TDF_INPANIC) == 0)) { 171115215Sjulian /* note that it is no longer on the run queue */ 172115215Sjulian TD_SET_CAN_RUN(td); 173100209Sgallatin goto retry; 174115215Sjulian } 175108338Sjulian 176103216Sjulian TD_SET_RUNNING(td); 17799072Sjulian return (td); 17872376Sjake} 17950027Speter 18099072Sjulian/* 181111028Sjeff * Given a surplus KSE, either assign a new runable thread to it 182111028Sjeff * (and put it in the run queue) or put it in the ksegrp's idle KSE list. 183111128Sdavidxu * Assumes that the original thread is not runnable. 18499072Sjulian */ 18599072Sjulianvoid 18699072Sjuliankse_reassign(struct kse *ke) 18799072Sjulian{ 18899072Sjulian struct ksegrp *kg; 18999072Sjulian struct thread *td; 190104695Sjulian struct thread *original; 19199072Sjulian 192103832Sjulian mtx_assert(&sched_lock, MA_OWNED); 193111028Sjeff original = ke->ke_thread; 194111028Sjeff KASSERT(original == NULL || TD_IS_INHIBITED(original), 195111028Sjeff ("reassigning KSE with runnable thread")); 196110190Sjulian kg = ke->ke_ksegrp; 197112397Sdavidxu if (original) 198111028Sjeff original->td_kse = NULL; 199108338Sjulian 20099072Sjulian /* 20199072Sjulian * Find the first unassigned thread 20299072Sjulian */ 203111028Sjeff if ((td = kg->kg_last_assigned) != NULL) 20499072Sjulian td = TAILQ_NEXT(td, td_runq); 205111028Sjeff else 20699072Sjulian td = TAILQ_FIRST(&kg->kg_runq); 20799072Sjulian 20899072Sjulian /* 209111028Sjeff * If we found one, assign it the kse, otherwise idle the kse. 21099072Sjulian */ 21199072Sjulian if (td) { 21299072Sjulian kg->kg_last_assigned = td; 21399072Sjulian td->td_kse = ke; 21499072Sjulian ke->ke_thread = td; 215121127Sjeff sched_add(td); 21699072Sjulian CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td); 217104695Sjulian return; 218104695Sjulian } 219104695Sjulian 220111028Sjeff ke->ke_state = KES_IDLE; 221111028Sjeff ke->ke_thread = NULL; 222111028Sjeff TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist); 223111028Sjeff kg->kg_idle_kses++; 224111028Sjeff CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke); 225108338Sjulian return; 22699072Sjulian} 22799072Sjulian 228105127Sjulian#if 0 22999072Sjulian/* 23099072Sjulian * Remove a thread from its KSEGRP's run queue. 23199072Sjulian * This in turn may remove it from a KSE if it was already assigned 23299072Sjulian * to one, possibly causing a new thread to be assigned to the KSE 233111028Sjeff * and the KSE getting a new priority. 23499072Sjulian */ 235105127Sjulianstatic void 23683366Sjulianremrunqueue(struct thread *td) 23772376Sjake{ 238104695Sjulian struct thread *td2, *td3; 23999072Sjulian struct ksegrp *kg; 24099072Sjulian struct kse *ke; 24199072Sjulian 24299072Sjulian mtx_assert(&sched_lock, MA_OWNED); 243111028Sjeff KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 24499072Sjulian kg = td->td_ksegrp; 24599072Sjulian ke = td->td_kse; 24699072Sjulian CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 24799072Sjulian kg->kg_runnable--; 248103216Sjulian TD_SET_CAN_RUN(td); 249111028Sjeff /* 250111028Sjeff * If it is not a threaded process, take the shortcut. 251111028Sjeff */ 252116361Sdavidxu if ((td->td_proc->p_flag & P_SA) == 0) { 25399072Sjulian /* Bring its kse with it, leave the thread attached */ 254121127Sjeff sched_rem(td); 25599942Sjulian ke->ke_state = KES_THREAD; 25699072Sjulian return; 25799072Sjulian } 258104695Sjulian td3 = TAILQ_PREV(td, threadqueue, td_runq); 259104695Sjulian TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 26099072Sjulian if (ke) { 26199072Sjulian /* 26299072Sjulian * This thread has been assigned to a KSE. 26399072Sjulian * We need to dissociate it and try assign the 26499072Sjulian * KSE to the next available thread. Then, we should 26599072Sjulian * see if we need to move the KSE in the run queues. 26699072Sjulian */ 267121127Sjeff sched_rem(td); 268108338Sjulian ke->ke_state = KES_THREAD; 26999072Sjulian td2 = kg->kg_last_assigned; 270111028Sjeff KASSERT((td2 != NULL), ("last assigned has wrong value")); 271104695Sjulian if (td2 == td) 272104695Sjulian kg->kg_last_assigned = td3; 273104695Sjulian kse_reassign(ke); 27499072Sjulian } 27572376Sjake} 276105127Sjulian#endif 27772376Sjake 278105127Sjulian/* 279105127Sjulian * Change the priority of a thread that is on the run queue. 280105127Sjulian */ 28172376Sjakevoid 282105127Sjulianadjustrunqueue( struct thread *td, int newpri) 283105127Sjulian{ 284105127Sjulian struct ksegrp *kg; 285105127Sjulian struct kse *ke; 286105127Sjulian 287105127Sjulian mtx_assert(&sched_lock, MA_OWNED); 288111028Sjeff KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 289111028Sjeff 290111028Sjeff ke = td->td_kse; 291111028Sjeff CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 292110190Sjulian /* 293111028Sjeff * If it is not a threaded process, take the shortcut. 294110190Sjulian */ 295116361Sdavidxu if ((td->td_proc->p_flag & P_SA) == 0) { 296105127Sjulian /* We only care about the kse in the run queue. */ 297105129Sjulian td->td_priority = newpri; 298105127Sjulian if (ke->ke_rqindex != (newpri / RQ_PPQ)) { 299121127Sjeff sched_rem(td); 300121127Sjeff sched_add(td); 301105127Sjulian } 302105127Sjulian return; 303105127Sjulian } 304111028Sjeff 305111028Sjeff /* It is a threaded process */ 306105127Sjulian kg = td->td_ksegrp; 307105127Sjulian kg->kg_runnable--; 308105127Sjulian TD_SET_CAN_RUN(td); 309105127Sjulian if (ke) { 310105127Sjulian if (kg->kg_last_assigned == td) { 311105127Sjulian kg->kg_last_assigned = 312105127Sjulian TAILQ_PREV(td, threadqueue, td_runq); 313105127Sjulian } 314121127Sjeff sched_rem(td); 315105127Sjulian } 316105127Sjulian TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 317105127Sjulian td->td_priority = newpri; 318105127Sjulian setrunqueue(td); 319105127Sjulian} 320105127Sjulian 321105127Sjulianvoid 32283366Sjuliansetrunqueue(struct thread *td) 32350027Speter{ 32499072Sjulian struct kse *ke; 32599072Sjulian struct ksegrp *kg; 32699072Sjulian struct thread *td2; 32799072Sjulian struct thread *tda; 32899072Sjulian 32999072Sjulian CTR1(KTR_RUNQ, "setrunqueue: td%p", td); 33099072Sjulian mtx_assert(&sched_lock, MA_OWNED); 331103216Sjulian KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 332103216Sjulian ("setrunqueue: bad thread state")); 333103216Sjulian TD_SET_RUNQ(td); 33499072Sjulian kg = td->td_ksegrp; 33599072Sjulian kg->kg_runnable++; 336116361Sdavidxu if ((td->td_proc->p_flag & P_SA) == 0) { 337104695Sjulian /* 338104695Sjulian * Common path optimisation: Only one of everything 339104695Sjulian * and the KSE is always already attached. 340104695Sjulian * Totally ignore the ksegrp run queue. 341104695Sjulian */ 342121127Sjeff sched_add(td); 343104695Sjulian return; 344104695Sjulian } 345104695Sjulian 34699072Sjulian tda = kg->kg_last_assigned; 34799072Sjulian if ((ke = td->td_kse) == NULL) { 348111028Sjeff if (kg->kg_idle_kses) { 34999072Sjulian /* 350111028Sjeff * There is a free one so it's ours for the asking.. 351104695Sjulian */ 352111028Sjeff ke = TAILQ_FIRST(&kg->kg_iq); 353111028Sjeff TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); 354104157Sjulian ke->ke_state = KES_THREAD; 355111028Sjeff kg->kg_idle_kses--; 35699072Sjulian } else if (tda && (tda->td_priority > td->td_priority)) { 35799072Sjulian /* 35899072Sjulian * None free, but there is one we can commandeer. 35999072Sjulian */ 36099072Sjulian ke = tda->td_kse; 361121171Sjeff sched_rem(tda); 36299072Sjulian tda->td_kse = NULL; 36399072Sjulian ke->ke_thread = NULL; 36499072Sjulian tda = kg->kg_last_assigned = 36599072Sjulian TAILQ_PREV(tda, threadqueue, td_runq); 36699072Sjulian } 36799072Sjulian } else { 36899942Sjulian /* 36999942Sjulian * Temporarily disassociate so it looks like the other cases. 37099942Sjulian */ 37199072Sjulian ke->ke_thread = NULL; 37299072Sjulian td->td_kse = NULL; 37399072Sjulian } 37499072Sjulian 37599072Sjulian /* 37699072Sjulian * Add the thread to the ksegrp's run queue at 37799072Sjulian * the appropriate place. 37899072Sjulian */ 37999072Sjulian TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 38099072Sjulian if (td2->td_priority > td->td_priority) { 38199072Sjulian TAILQ_INSERT_BEFORE(td2, td, td_runq); 38299072Sjulian break; 38399072Sjulian } 38499072Sjulian } 38599072Sjulian if (td2 == NULL) { 38699072Sjulian /* We ran off the end of the TAILQ or it was empty. */ 38799072Sjulian TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 38899072Sjulian } 38999072Sjulian 39099072Sjulian /* 39199072Sjulian * If we have a ke to use, then put it on the run queue and 39299072Sjulian * If needed, readjust the last_assigned pointer. 39399072Sjulian */ 39499072Sjulian if (ke) { 39599072Sjulian if (tda == NULL) { 39699072Sjulian /* 39799072Sjulian * No pre-existing last assigned so whoever is first 39899942Sjulian * gets the KSE we brought in.. (maybe us) 39999072Sjulian */ 40099072Sjulian td2 = TAILQ_FIRST(&kg->kg_runq); 40199072Sjulian KASSERT((td2->td_kse == NULL), 40299072Sjulian ("unexpected ke present")); 40399072Sjulian td2->td_kse = ke; 40499072Sjulian ke->ke_thread = td2; 40599072Sjulian kg->kg_last_assigned = td2; 40699072Sjulian } else if (tda->td_priority > td->td_priority) { 40799072Sjulian /* 40899072Sjulian * It's ours, grab it, but last_assigned is past us 40999072Sjulian * so don't change it. 41099072Sjulian */ 41199072Sjulian td->td_kse = ke; 41299072Sjulian ke->ke_thread = td; 41399072Sjulian } else { 41499072Sjulian /* 41599072Sjulian * We are past last_assigned, so 41699072Sjulian * put the new kse on whatever is next, 41799072Sjulian * which may or may not be us. 41899072Sjulian */ 41999072Sjulian td2 = TAILQ_NEXT(tda, td_runq); 42099072Sjulian kg->kg_last_assigned = td2; 42199072Sjulian td2->td_kse = ke; 42299072Sjulian ke->ke_thread = td2; 42399072Sjulian } 424121127Sjeff sched_add(ke->ke_thread); 42599072Sjulian } 42672376Sjake} 42750027Speter 428131481Sjhb/* 429131481Sjhb * Kernel thread preemption implementation. Critical sections mark 430131481Sjhb * regions of code in which preemptions are not allowed. 431131481Sjhb */ 43288088Sjhbvoid 43388088Sjhbcritical_enter(void) 43488088Sjhb{ 43588088Sjhb struct thread *td; 43688088Sjhb 43788088Sjhb td = curthread; 43888088Sjhb if (td->td_critnest == 0) 43993264Sdillon cpu_critical_enter(); 44088088Sjhb td->td_critnest++; 44188088Sjhb} 44288088Sjhb 44388088Sjhbvoid 44488088Sjhbcritical_exit(void) 44588088Sjhb{ 44688088Sjhb struct thread *td; 44788088Sjhb 44888088Sjhb td = curthread; 449125315Sjeff KASSERT(td->td_critnest != 0, 450125315Sjeff ("critical_exit: td_critnest == 0")); 45188088Sjhb if (td->td_critnest == 1) { 452131481Sjhb#ifdef PREEMPTION 453131481Sjhb if (td->td_flags & TDF_OWEPREEMPT) { 454131481Sjhb mtx_lock_spin(&sched_lock); 455131481Sjhb mi_switch(SW_INVOL, NULL); 456131481Sjhb mtx_unlock_spin(&sched_lock); 457131481Sjhb } 458131481Sjhb#endif 45988088Sjhb td->td_critnest = 0; 46093264Sdillon cpu_critical_exit(); 46193264Sdillon } else { 46288088Sjhb td->td_critnest--; 46393264Sdillon } 46488088Sjhb} 46588088Sjhb 466131481Sjhb/* 467131481Sjhb * This function is called when a thread is about to be put on run queue 468131481Sjhb * because it has been made runnable or its priority has been adjusted. It 469131481Sjhb * determines if the new thread should be immediately preempted to. If so, 470131481Sjhb * it switches to it and eventually returns true. If not, it returns false 471131481Sjhb * so that the caller may place the thread on an appropriate run queue. 472131481Sjhb */ 473131481Sjhbint 474131481Sjhbmaybe_preempt(struct thread *td) 475131481Sjhb{ 476131481Sjhb struct thread *ctd; 477131481Sjhb int cpri, pri; 47899072Sjulian 479131481Sjhb mtx_assert(&sched_lock, MA_OWNED); 480131481Sjhb#ifdef PREEMPTION 481131481Sjhb /* 482131481Sjhb * The new thread should not preempt the current thread if any of the 483131481Sjhb * following conditions are true: 484131481Sjhb * 485131481Sjhb * - The current thread has a higher (numerically lower) priority. 486131481Sjhb * - It is too early in the boot for context switches (cold is set). 487131481Sjhb * - The current thread has an inhibitor set or is in the process of 488131481Sjhb * exiting. In this case, the current thread is about to switch 489131481Sjhb * out anyways, so there's no point in preempting. If we did, 490131481Sjhb * the current thread would not be properly resumed as well, so 491131481Sjhb * just avoid that whole landmine. 492131481Sjhb * - If the new thread's priority is not a realtime priority and 493131481Sjhb * the current thread's priority is not an idle priority and 494131481Sjhb * FULL_PREEMPTION is disabled. 495131481Sjhb * 496131481Sjhb * If all of these conditions are false, but the current thread is in 497131481Sjhb * a nested critical section, then we have to defer the preemption 498131481Sjhb * until we exit the critical section. Otherwise, switch immediately 499131481Sjhb * to the new thread. 500131481Sjhb */ 501131481Sjhb ctd = curthread; 502131481Sjhb pri = td->td_priority; 503131481Sjhb cpri = ctd->td_priority; 504131481Sjhb if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) || 505131481Sjhb td->td_kse->ke_state != KES_THREAD) 506131481Sjhb return (0); 507131481Sjhb#ifndef FULL_PREEMPTION 508131481Sjhb if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) && 509131481Sjhb !(cpri >= PRI_MIN_IDLE)) 510131481Sjhb return (0); 511131481Sjhb#endif 512131481Sjhb if (ctd->td_critnest > 1) { 513131481Sjhb CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 514131481Sjhb ctd->td_critnest); 515131481Sjhb ctd->td_flags |= TDF_OWEPREEMPT; 516131481Sjhb return (0); 517131481Sjhb } 518131481Sjhb 519131481Sjhb /* 520131481Sjhb * Our thread state says that we are already on a run queue, so 521131481Sjhb * update our state as if we had been dequeued by choosethread(). 522131481Sjhb */ 523131481Sjhb MPASS(TD_ON_RUNQ(td)); 524131481Sjhb TD_SET_RUNNING(td); 525131481Sjhb CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 526131481Sjhb td->td_proc->p_pid, td->td_proc->p_comm); 527131481Sjhb mi_switch(SW_INVOL, td); 528131481Sjhb return (1); 529131481Sjhb#else 530131481Sjhb return (0); 531131481Sjhb#endif 532131481Sjhb} 533131481Sjhb 534131481Sjhb#ifndef PREEMPTION 535131481Sjhb/* XXX: There should be a non-static version of this. */ 536131481Sjhbstatic void 537131481Sjhbprintf_caddr_t(void *data) 538131481Sjhb{ 539131481Sjhb printf("%s", (char *)data); 540131481Sjhb} 541131481Sjhbstatic char preempt_warning[] = 542131481Sjhb "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 543131481SjhbSYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 544131481Sjhb preempt_warning) 545131481Sjhb#endif 546131481Sjhb 54799072Sjulian/************************************************************************ 54899072Sjulian * SYSTEM RUN QUEUE manipulations and tests * 54999072Sjulian ************************************************************************/ 55072376Sjake/* 55199072Sjulian * Initialize a run structure. 55299072Sjulian */ 55399072Sjulianvoid 55499072Sjulianrunq_init(struct runq *rq) 55599072Sjulian{ 55699072Sjulian int i; 55799072Sjulian 55899072Sjulian bzero(rq, sizeof *rq); 55999072Sjulian for (i = 0; i < RQ_NQS; i++) 56099072Sjulian TAILQ_INIT(&rq->rq_queues[i]); 56199072Sjulian} 56299072Sjulian 56399072Sjulian/* 56472376Sjake * Clear the status bit of the queue corresponding to priority level pri, 56572376Sjake * indicating that it is empty. 56672376Sjake */ 56772376Sjakestatic __inline void 56872376Sjakerunq_clrbit(struct runq *rq, int pri) 56972376Sjake{ 57072376Sjake struct rqbits *rqb; 57165557Sjasone 57272376Sjake rqb = &rq->rq_status; 57372376Sjake CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 57472376Sjake rqb->rqb_bits[RQB_WORD(pri)], 57572376Sjake rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 57672376Sjake RQB_BIT(pri), RQB_WORD(pri)); 57772376Sjake rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 57850027Speter} 57950027Speter 58050027Speter/* 58172376Sjake * Find the index of the first non-empty run queue. This is done by 58272376Sjake * scanning the status bits, a set bit indicates a non-empty queue. 58350027Speter */ 58472376Sjakestatic __inline int 58572376Sjakerunq_findbit(struct runq *rq) 58672376Sjake{ 58772376Sjake struct rqbits *rqb; 58872376Sjake int pri; 58972376Sjake int i; 59072376Sjake 59172376Sjake rqb = &rq->rq_status; 59272376Sjake for (i = 0; i < RQB_LEN; i++) 59372376Sjake if (rqb->rqb_bits[i]) { 59498469Speter pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 59572376Sjake CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 59672376Sjake rqb->rqb_bits[i], i, pri); 59772376Sjake return (pri); 59872376Sjake } 59972376Sjake 60072376Sjake return (-1); 60172376Sjake} 60272376Sjake 60372376Sjake/* 60472376Sjake * Set the status bit of the queue corresponding to priority level pri, 60572376Sjake * indicating that it is non-empty. 60672376Sjake */ 60772376Sjakestatic __inline void 60872376Sjakerunq_setbit(struct runq *rq, int pri) 60972376Sjake{ 61072376Sjake struct rqbits *rqb; 61172376Sjake 61272376Sjake rqb = &rq->rq_status; 61372376Sjake CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 61472376Sjake rqb->rqb_bits[RQB_WORD(pri)], 61572376Sjake rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 61672376Sjake RQB_BIT(pri), RQB_WORD(pri)); 61772376Sjake rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 61872376Sjake} 61972376Sjake 62072376Sjake/* 62199072Sjulian * Add the KSE to the queue specified by its priority, and set the 62272376Sjake * corresponding status bit. 62372376Sjake */ 62450027Spetervoid 62583366Sjulianrunq_add(struct runq *rq, struct kse *ke) 62650027Speter{ 62772376Sjake struct rqhead *rqh; 62872376Sjake int pri; 62950027Speter 63090538Sjulian pri = ke->ke_thread->td_priority / RQ_PPQ; 63183366Sjulian ke->ke_rqindex = pri; 63272376Sjake runq_setbit(rq, pri); 63372376Sjake rqh = &rq->rq_queues[pri]; 63472376Sjake CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p", 63590538Sjulian ke->ke_proc, ke->ke_thread->td_priority, pri, rqh); 63683366Sjulian TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 63750027Speter} 63850027Speter 63950027Speter/* 64072376Sjake * Return true if there are runnable processes of any priority on the run 64172376Sjake * queue, false otherwise. Has no side effects, does not modify the run 64272376Sjake * queue structure. 64350027Speter */ 64472376Sjakeint 64572376Sjakerunq_check(struct runq *rq) 64650027Speter{ 64772376Sjake struct rqbits *rqb; 64872376Sjake int i; 64972376Sjake 65072376Sjake rqb = &rq->rq_status; 65172376Sjake for (i = 0; i < RQB_LEN; i++) 65272376Sjake if (rqb->rqb_bits[i]) { 65372376Sjake CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 65472376Sjake rqb->rqb_bits[i], i); 65572376Sjake return (1); 65672376Sjake } 65772376Sjake CTR0(KTR_RUNQ, "runq_check: empty"); 65872376Sjake 65972376Sjake return (0); 66050027Speter} 66150027Speter 66250027Speter/* 663104964Sjeff * Find the highest priority process on the run queue. 66450027Speter */ 66583366Sjulianstruct kse * 66672376Sjakerunq_choose(struct runq *rq) 66750027Speter{ 66872376Sjake struct rqhead *rqh; 66983366Sjulian struct kse *ke; 67072376Sjake int pri; 67150027Speter 67265557Sjasone mtx_assert(&sched_lock, MA_OWNED); 67399072Sjulian while ((pri = runq_findbit(rq)) != -1) { 67472376Sjake rqh = &rq->rq_queues[pri]; 67583366Sjulian ke = TAILQ_FIRST(rqh); 67683366Sjulian KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 67799072Sjulian CTR3(KTR_RUNQ, 67899072Sjulian "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 67983366Sjulian return (ke); 68050027Speter } 68172376Sjake CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 68272376Sjake 68399072Sjulian return (NULL); 68450027Speter} 68572376Sjake 68672376Sjake/* 68799072Sjulian * Remove the KSE from the queue specified by its priority, and clear the 68872376Sjake * corresponding status bit if the queue becomes empty. 68999072Sjulian * Caller must set ke->ke_state afterwards. 69072376Sjake */ 69172376Sjakevoid 69283366Sjulianrunq_remove(struct runq *rq, struct kse *ke) 69372376Sjake{ 69472376Sjake struct rqhead *rqh; 69572376Sjake int pri; 69672376Sjake 697100913Stanimura KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 698100913Stanimura ("runq_remove: process swapped out")); 69983366Sjulian pri = ke->ke_rqindex; 70072376Sjake rqh = &rq->rq_queues[pri]; 70172376Sjake CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p", 70290538Sjulian ke, ke->ke_thread->td_priority, pri, rqh); 70383366Sjulian KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 70483366Sjulian TAILQ_REMOVE(rqh, ke, ke_procq); 70572376Sjake if (TAILQ_EMPTY(rqh)) { 70672376Sjake CTR0(KTR_RUNQ, "runq_remove: empty"); 70772376Sjake runq_clrbit(rq, pri); 70872376Sjake } 70972376Sjake} 71099072Sjulian 711104695Sjulian#if 0 71299072Sjulianvoid 713104695Sjulianpanc(char *string1, char *string2) 71499072Sjulian{ 715104695Sjulian printf("%s", string1); 716104695Sjulian Debugger(string2); 717104695Sjulian} 718104695Sjulian 719104695Sjulianvoid 720104695Sjulianthread_sanity_check(struct thread *td, char *string) 721104695Sjulian{ 72299072Sjulian struct proc *p; 72399072Sjulian struct ksegrp *kg; 72499072Sjulian struct kse *ke; 725104695Sjulian struct thread *td2 = NULL; 72699072Sjulian unsigned int prevpri; 727104695Sjulian int saw_lastassigned = 0; 728104695Sjulian int unassigned = 0; 729104695Sjulian int assigned = 0; 73099072Sjulian 73199072Sjulian p = td->td_proc; 73299072Sjulian kg = td->td_ksegrp; 73399072Sjulian ke = td->td_kse; 73499072Sjulian 73599072Sjulian 73699072Sjulian if (ke) { 737103367Sjulian if (p != ke->ke_proc) { 738104695Sjulian panc(string, "wrong proc"); 73999072Sjulian } 74099072Sjulian if (ke->ke_thread != td) { 741104695Sjulian panc(string, "wrong thread"); 74299072Sjulian } 74399072Sjulian } 74499072Sjulian 745116361Sdavidxu if ((p->p_flag & P_SA) == 0) { 74699072Sjulian if (ke == NULL) { 747104695Sjulian panc(string, "non KSE thread lost kse"); 74899072Sjulian } 74999072Sjulian } else { 75099072Sjulian prevpri = 0; 75199072Sjulian saw_lastassigned = 0; 75299072Sjulian unassigned = 0; 75399072Sjulian assigned = 0; 75499072Sjulian TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 75599072Sjulian if (td2->td_priority < prevpri) { 756104695Sjulian panc(string, "thread runqueue unosorted"); 75799072Sjulian } 758104695Sjulian if ((td2->td_state == TDS_RUNQ) && 759104695Sjulian td2->td_kse && 760104695Sjulian (td2->td_kse->ke_state != KES_ONRUNQ)) { 761104695Sjulian panc(string, "KSE wrong state"); 762104695Sjulian } 76399072Sjulian prevpri = td2->td_priority; 76499072Sjulian if (td2->td_kse) { 76599072Sjulian assigned++; 76699072Sjulian if (unassigned) { 767104695Sjulian panc(string, "unassigned before assigned"); 76899072Sjulian } 76999072Sjulian if (kg->kg_last_assigned == NULL) { 770104695Sjulian panc(string, "lastassigned corrupt"); 77199072Sjulian } 77299072Sjulian if (saw_lastassigned) { 773104695Sjulian panc(string, "last assigned not last"); 77499072Sjulian } 77599072Sjulian if (td2->td_kse->ke_thread != td2) { 776104695Sjulian panc(string, "mismatched kse/thread"); 77799072Sjulian } 77899072Sjulian } else { 77999072Sjulian unassigned++; 78099072Sjulian } 78199072Sjulian if (td2 == kg->kg_last_assigned) { 78299072Sjulian saw_lastassigned = 1; 78399072Sjulian if (td2->td_kse == NULL) { 784104695Sjulian panc(string, "last assigned not assigned"); 78599072Sjulian } 78699072Sjulian } 78799072Sjulian } 78899072Sjulian if (kg->kg_last_assigned && (saw_lastassigned == 0)) { 789104695Sjulian panc(string, "where on earth does lastassigned point?"); 79099072Sjulian } 791111028Sjeff#if 0 79299072Sjulian FOREACH_THREAD_IN_GROUP(kg, td2) { 79399072Sjulian if (((td2->td_flags & TDF_UNBOUND) == 0) && 794103216Sjulian (TD_ON_RUNQ(td2))) { 79599072Sjulian assigned++; 79699072Sjulian if (td2->td_kse == NULL) { 797104695Sjulian panc(string, "BOUND thread with no KSE"); 79899072Sjulian } 79999072Sjulian } 80099072Sjulian } 801111028Sjeff#endif 80299072Sjulian#if 0 80399072Sjulian if ((unassigned + assigned) != kg->kg_runnable) { 804104695Sjulian panc(string, "wrong number in runnable"); 80599072Sjulian } 80699072Sjulian#endif 80799072Sjulian } 808104695Sjulian if (assigned == 12345) { 809104695Sjulian printf("%p %p %p %p %p %d, %d", 810104695Sjulian td, td2, ke, kg, p, assigned, saw_lastassigned); 811104695Sjulian } 81299072Sjulian} 81399834Sjulian#endif 81499072Sjulian 815