kern_switch.c revision 136167
150276Speter/* 2176187Srafan * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 350276Speter * All rights reserved. 450276Speter * 550276Speter * Redistribution and use in source and binary forms, with or without 650276Speter * modification, are permitted provided that the following conditions 750276Speter * are met: 850276Speter * 1. Redistributions of source code must retain the above copyright 950276Speter * notice, this list of conditions and the following disclaimer. 1050276Speter * 2. Redistributions in binary form must reproduce the above copyright 1150276Speter * notice, this list of conditions and the following disclaimer in the 1250276Speter * documentation and/or other materials provided with the distribution. 1350276Speter * 1450276Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1550276Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1650276Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1750276Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1850276Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1950276Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2050276Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2150276Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2250276Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2350276Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2450276Speter * SUCH DAMAGE. 2550276Speter */ 2650276Speter 2750276Speter/*** 2850276SpeterHere is the logic.. 2950276Speter 3050276SpeterIf there are N processors, then there are at most N KSEs (kernel 3150276Speterschedulable entities) working to process threads that belong to a 32166124SrafanKSEGROUP (kg). If there are X of these KSEs actually running at the 3350276Spetermoment in question, then there are at most M (N-X) of these KSEs on 3450276Speterthe run queue, as running KSEs are not on the queue. 35184989Srafan 3650276SpeterRunnable threads are queued off the KSEGROUP in priority order. 3750276SpeterIf there are M or more threads runnable, the top M threads 3850276Speter(by priority) are 'preassigned' to the M KSEs not running. The KSEs take 3950276Spetertheir priority from those threads and are put on the run queue. 4050276Speter 4150276SpeterThe last thread that had a priority high enough to have a KSE associated 4250276Speterwith it, AND IS ON THE RUN QUEUE is pointed to by 4350276Speterkg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 44166124Srafanassigned as all the available KSEs are activly running, or because there 4550276Speterare no threads queued, that pointer is NULL. 4650276Speter 4750276SpeterWhen a KSE is removed from the run queue to become runnable, we know 4850276Speterit was associated with the highest priority thread in the queue (at the head 4950276Speterof the queue). If it is also the last assigned we know M was 1 and must 5050276Speternow be 0. Since the thread is no longer queued that pointer must be 5150276Speterremoved from it. Since we know there were no more KSEs available, 5250276Speter(M was 1 and is now 0) and since we are not FREEING our KSE 5350276Speterbut using it, we know there are STILL no more KSEs available, we can prove 5450276Speterthat the next thread in the ksegrp list will not have a KSE to assign to 5550276Speterit, so we can show that the pointer must be made 'invalid' (NULL). 5650276Speter 5750276SpeterThe pointer exists so that when a new thread is made runnable, it can 58166124Srafanhave its priority compared with the last assigned thread to see if 59166124Srafanit should 'steal' its KSE or not.. i.e. is it 'earlier' 60166124Srafanon the list than that thread or later.. If it's earlier, then the KSE is 61166124Srafanremoved from the last assigned (which is now not assigned a KSE) 62166124Srafanand reassigned to the new thread, which is placed earlier in the list. 63166124SrafanThe pointer is then backed up to the previous thread (which may or may not 64166124Srafanbe the new thread). 65166124Srafan 6676726SpeterWhen a thread sleeps or is removed, the KSE becomes available and if there 6776726Speterare queued threads that are not assigned KSEs, the highest priority one of 68166124Srafanthem is assigned the KSE, which is then placed back on the run queue at 69166124Srafanthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down 70166124Srafanto point to it. 71166124Srafan 72166124SrafanThe following diagram shows 2 KSEs and 3 threads from a single process. 73166124Srafan 74166124Srafan RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 75166124Srafan \ \____ 76166124Srafan \ \ 77166124Srafan KSEGROUP---thread--thread--thread (queued in priority order) 78166124Srafan \ / 7950276Speter \_______________/ 80166124Srafan (last_assigned) 8150276Speter 8250276SpeterThe result of this scheme is that the M available KSEs are always 83166124Srafanqueued at the priorities they have inherrited from the M highest priority 8450276Speterthreads for that KSEGROUP. If this situation changes, the KSEs are 8550276Speterreassigned to keep this true. 8650276Speter***/ 8750276Speter 8850276Speter#include <sys/cdefs.h> 89166124Srafan__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 136167 2004-10-05 21:10:44Z julian $"); 90166124Srafan 91166124Srafan#include "opt_sched.h" 92166124Srafan 9350276Speter#ifndef KERN_SWITCH_INCLUDE 9450276Speter#include <sys/param.h> 9550276Speter#include <sys/systm.h> 96166124Srafan#include <sys/kdb.h> 97166124Srafan#include <sys/kernel.h> 98166124Srafan#include <sys/ktr.h> 99166124Srafan#include <sys/lock.h> 100166124Srafan#include <sys/mutex.h> 101166124Srafan#include <sys/proc.h> 10262449Speter#include <sys/queue.h> 10362449Speter#include <sys/sched.h> 10462449Speter#else /* KERN_SWITCH_INCLUDE */ 105166124Srafan#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 106174993Srafan#include <sys/smp.h> 107174993Srafan#endif 108174993Srafan#include <machine/critical.h> 109174993Srafan#if defined(SMP) && defined(SCHED_4BSD) 110174993Srafan#include <sys/sysctl.h> 111174993Srafan#endif 112174993Srafan 113166124Srafan#ifdef FULL_PREEMPTION 114166124Srafan#ifndef PREEMPTION 11562449Speter#error "The FULL_PREEMPTION option requires the PREEMPTION option" 116174993Srafan#endif 11762449Speter#endif 118166124Srafan 119166124SrafanCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 120166124Srafan 121166124Srafan#define td_kse td_sched 122166124Srafan 123166124Srafan/************************************************************************ 124166124Srafan * Functions that manipulate runnability from a thread perspective. * 125166124Srafan ************************************************************************/ 126166124Srafan/* 127166124Srafan * Select the KSE that will be run next. From that find the thread, and 128166124Srafan * remove it from the KSEGRP's run queue. If there is thread clustering, 12997049Speter * this will be what does it. 13097049Speter */ 13197049Speterstruct thread * 132166124Srafanchoosethread(void) 133166124Srafan{ 134166124Srafan struct kse *ke; 135166124Srafan struct thread *td; 13650276Speter struct ksegrp *kg; 137166124Srafan 138166124Srafan#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 13950276Speter if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 14050276Speter /* Shutting down, run idlethread on AP's */ 14150276Speter td = PCPU_GET(idlethread); 14250276Speter ke = td->td_kse; 14350276Speter CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 14450276Speter ke->ke_flags |= KEF_DIDRUN; 14550276Speter TD_SET_RUNNING(td); 14650276Speter return (td); 147174993Srafan } 14850276Speter#endif 14950276Speter 15097049Speterretry: 15197049Speter ke = sched_choose(); 15297049Speter if (ke) { 15397049Speter td = ke->ke_thread; 15497049Speter KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 15597049Speter kg = ke->ke_ksegrp; 15697049Speter if (td->td_proc->p_flag & P_HADTHREADS) { 15750276Speter if (kg->kg_last_assigned == td) { 15850276Speter kg->kg_last_assigned = TAILQ_PREV(td, 15950276Speter threadqueue, td_runq); 16050276Speter } 16150276Speter TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 16250276Speter kg->kg_runnable--; 16350276Speter } 16450276Speter CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 16597049Speter td, td->td_priority); 16697049Speter } else { 167166124Srafan /* Simulate runq_choose() having returned the idle thread */ 16897049Speter td = PCPU_GET(idlethread); 169166124Srafan ke = td->td_kse; 170166124Srafan CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 171166124Srafan } 172166124Srafan ke->ke_flags |= KEF_DIDRUN; 173166124Srafan 174166124Srafan /* 17597049Speter * If we are in panic, only allow system threads, 176166124Srafan * plus the one we are running in, to be run. 177166124Srafan */ 178166124Srafan if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 179166124Srafan (td->td_flags & TDF_INPANIC) == 0)) { 18050276Speter /* note that it is no longer on the run queue */ 18197049Speter TD_SET_CAN_RUN(td); 18250276Speter goto retry; 18350276Speter } 18497049Speter 18597049Speter TD_SET_RUNNING(td); 18650276Speter return (td); 18750276Speter} 188166124Srafan 189166124Srafan/* 190166124Srafan * Given a surplus system slot, try assign a new runnable thread to it. 19150276Speter * Called from: 19250276Speter * sched_thread_exit() (local) 19350276Speter * sched_switch() (local) 194174993Srafan * sched_thread_exit() (local) 19550276Speter * remrunqueue() (local) (not at the moment) 19650276Speter */ 19750276Speterstatic void 19850276Speterslot_fill(struct ksegrp *kg) 19950276Speter{ 20050276Speter struct thread *td; 20150276Speter 20250276Speter mtx_assert(&sched_lock, MA_OWNED); 20350276Speter while (kg->kg_avail_opennings > 0) { 20450276Speter /* 20550276Speter * Find the first unassigned thread 20650276Speter */ 20750276Speter if ((td = kg->kg_last_assigned) != NULL) 20850276Speter td = TAILQ_NEXT(td, td_runq); 20950276Speter else 21050276Speter td = TAILQ_FIRST(&kg->kg_runq); 21150276Speter 21250276Speter /* 21350276Speter * If we found one, send it to the system scheduler. 21450276Speter */ 21550276Speter if (td) { 21650276Speter kg->kg_last_assigned = td; 21750276Speter sched_add(td, SRQ_BORING); 21850276Speter CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg); 21950276Speter } else { 22050276Speter /* no threads to use up the slots. quit now */ 22150276Speter break; 22250276Speter } 22350276Speter } 22450276Speter} 22550276Speter 22650276Speter#ifdef SCHED_4BSD 227174993Srafan/* 228174993Srafan * Remove a thread from its KSEGRP's run queue. 22997049Speter * This in turn may remove it from a KSE if it was already assigned 23097049Speter * to one, possibly causing a new thread to be assigned to the KSE 23176726Speter * and the KSE getting a new priority. 23297049Speter */ 23350276Speterstatic void 234166124Srafanremrunqueue(struct thread *td) 235166124Srafan{ 23650276Speter struct thread *td2, *td3; 237166124Srafan struct ksegrp *kg; 238166124Srafan struct kse *ke; 239166124Srafan 240166124Srafan mtx_assert(&sched_lock, MA_OWNED); 241166124Srafan KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 242166124Srafan kg = td->td_ksegrp; 243166124Srafan ke = td->td_kse; 244166124Srafan CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 245166124Srafan TD_SET_CAN_RUN(td); 246166124Srafan /* 247166124Srafan * If it is not a threaded process, take the shortcut. 248166124Srafan */ 249166124Srafan if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 250166124Srafan /* remve from sys run queue and free up a slot */ 251166124Srafan sched_rem(td); 252166124Srafan ke->ke_state = KES_THREAD; 253166124Srafan return; 254166124Srafan } 25550276Speter td3 = TAILQ_PREV(td, threadqueue, td_runq); 256166124Srafan TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 257166124Srafan kg->kg_runnable--; 258166124Srafan if (ke->ke_state == KES_ONRUNQ) { 259166124Srafan /* 260166124Srafan * This thread has been assigned to the system run queue. 261166124Srafan * We need to dissociate it and try assign the 262166124Srafan * KSE to the next available thread. Then, we should 26350276Speter * see if we need to move the KSE in the run queues. 26450276Speter */ 26550276Speter sched_rem(td); 26650276Speter ke->ke_state = KES_THREAD; 26750276Speter td2 = kg->kg_last_assigned; 268166124Srafan KASSERT((td2 != NULL), ("last assigned has wrong value")); 269166124Srafan if (td2 == td) 270166124Srafan kg->kg_last_assigned = td3; 271166124Srafan /* slot_fill(kg); */ /* will replace it with another */ 272166124Srafan } 273166124Srafan} 274166124Srafan#endif 27550276Speter 27650276Speter/* 27750276Speter * Change the priority of a thread that is on the run queue. 27850276Speter */ 27950276Spetervoid 28050276Speteradjustrunqueue( struct thread *td, int newpri) 28150276Speter{ 28250276Speter struct ksegrp *kg; 28350276Speter struct kse *ke; 28450276Speter 28550276Speter mtx_assert(&sched_lock, MA_OWNED); 28650276Speter KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 28750276Speter 28850276Speter ke = td->td_kse; 28950276Speter CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 29050276Speter /* 29150276Speter * If it is not a threaded process, take the shortcut. 29250276Speter */ 29350276Speter if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 29497049Speter /* We only care about the kse in the run queue. */ 29550276Speter td->td_priority = newpri; 29650276Speter if (ke->ke_rqindex != (newpri / RQ_PPQ)) { 29797049Speter sched_rem(td); 29850276Speter sched_add(td, SRQ_BORING); 29950276Speter } 30050276Speter return; 30150276Speter } 30250276Speter 30350276Speter /* It is a threaded process */ 30450276Speter kg = td->td_ksegrp; 30550276Speter if (ke->ke_state == KES_ONRUNQ) { 30650276Speter if (kg->kg_last_assigned == td) { 30750276Speter kg->kg_last_assigned = 30850276Speter TAILQ_PREV(td, threadqueue, td_runq); 30950276Speter } 31050276Speter sched_rem(td); 31150276Speter } 31250276Speter TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 31350276Speter kg->kg_runnable--; 31450276Speter TD_SET_CAN_RUN(td); 31550276Speter td->td_priority = newpri; 31650276Speter setrunqueue(td, SRQ_BORING); 31750276Speter} 31850276Speterint limitcount; 31950276Spetervoid 32050276Spetersetrunqueue(struct thread *td, int flags) 32150276Speter{ 32250276Speter struct ksegrp *kg; 32350276Speter struct thread *td2; 32450276Speter struct thread *tda; 32550276Speter 32650276Speter CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d", 32797049Speter td, td->td_ksegrp, td->td_proc->p_pid); 32897049Speter mtx_assert(&sched_lock, MA_OWNED); 32997049Speter KASSERT((td->td_inhibitors == 0), 33097049Speter ("setrunqueue: trying to run inhibitted thread")); 33197049Speter KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 33297049Speter ("setrunqueue: bad thread state")); 33397049Speter TD_SET_RUNQ(td); 33497049Speter kg = td->td_ksegrp; 335166124Srafan if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 33697049Speter /* 33797049Speter * Common path optimisation: Only one of everything 33897049Speter * and the KSE is always already attached. 339166124Srafan * Totally ignore the ksegrp run queue. 340166124Srafan */ 341166124Srafan if (kg->kg_avail_opennings != 1) { 34250276Speter if (limitcount < 1) { 343166124Srafan limitcount++; 344166124Srafan printf("pid %d: corrected slot count (%d->1)\n", 345166124Srafan td->td_proc->p_pid, kg->kg_avail_opennings); 346166124Srafan 34750276Speter } 34850276Speter kg->kg_avail_opennings = 1; 34950276Speter } 35050276Speter sched_add(td, flags); 35150276Speter return; 352166124Srafan } 353174993Srafan 354174993Srafan /* 355166124Srafan * If the concurrency has reduced, and we would go in the 356166124Srafan * assigned section, then keep removing entries from the 35750276Speter * system run queue, until we are not in that section 35850276Speter * or there is room for us to be put in that section. 35997049Speter * What we MUST avoid is the case where there are threads of less 36050276Speter * priority than the new one scheduled, but it can not 36150276Speter * be scheduled itself. That would lead to a non contiguous set 362174993Srafan * of scheduled threads, and everything would break. 36397049Speter */ 36450276Speter tda = kg->kg_last_assigned; 36550276Speter while ((kg->kg_avail_opennings <= 0) && 36650276Speter (tda && (tda->td_priority > td->td_priority))) { 36762449Speter /* 36850276Speter * None free, but there is one we can commandeer. 36950276Speter */ 37062449Speter CTR2(KTR_RUNQ, 37162449Speter "setrunqueue: kg:%p: take slot from td: %p", kg, tda); 37250276Speter sched_rem(tda); 37350276Speter tda = kg->kg_last_assigned = 37450276Speter TAILQ_PREV(tda, threadqueue, td_runq); 37550276Speter SLOT_RELEASE(kg); 37650276Speter } 37750276Speter 37850276Speter /* 37950276Speter * Add the thread to the ksegrp's run queue at 38050276Speter * the appropriate place. 38197049Speter */ 38297049Speter TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 38397049Speter if (td2->td_priority > td->td_priority) { 38497049Speter kg->kg_runnable++; 38597049Speter TAILQ_INSERT_BEFORE(td2, td, td_runq); 38650276Speter break; 38750276Speter } 38897049Speter } 38950276Speter if (td2 == NULL) { 39050276Speter /* We ran off the end of the TAILQ or it was empty. */ 39150276Speter kg->kg_runnable++; 39250276Speter TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 39350276Speter } 39462449Speter 39562449Speter /* 39650276Speter * If we have a slot to use, then put the thread on the system 39750276Speter * run queue and if needed, readjust the last_assigned pointer. 39850276Speter * it may be that we need to schedule something anyhow 39950276Speter * even if the availabel slots are -ve so that 40050276Speter * all the items < last_assigned are scheduled. 40150276Speter */ 40250276Speter if (kg->kg_avail_opennings > 0) { 40350276Speter if (tda == NULL) { 40450276Speter /* 40562449Speter * No pre-existing last assigned so whoever is first 40662449Speter * gets the slot.. (maybe us) 40762449Speter */ 40850276Speter td2 = TAILQ_FIRST(&kg->kg_runq); 40950276Speter kg->kg_last_assigned = td2; 41062449Speter } else if (tda->td_priority > td->td_priority) { 41197049Speter td2 = td; 41297049Speter } else { 41397049Speter /* 414166124Srafan * We are past last_assigned, so 415166124Srafan * give the next slot to whatever is next, 41697049Speter * which may or may not be us. 417166124Srafan */ 41850276Speter td2 = TAILQ_NEXT(tda, td_runq); 419174993Srafan kg->kg_last_assigned = td2; 42050276Speter } 42150276Speter sched_add(td2, flags); 422166124Srafan } else { 423166124Srafan CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", 424166124Srafan td, td->td_ksegrp, td->td_proc->p_pid); 425166124Srafan } 426184989Srafan} 427184989Srafan 428166124Srafan/* 429166124Srafan * Kernel thread preemption implementation. Critical sections mark 430166124Srafan * regions of code in which preemptions are not allowed. 431166124Srafan */ 432166124Srafanvoid 433166124Srafancritical_enter(void) 434166124Srafan{ 435166124Srafan struct thread *td; 436166124Srafan 437166124Srafan td = curthread; 438166124Srafan if (td->td_critnest == 0) 439166124Srafan cpu_critical_enter(td); 440166124Srafan td->td_critnest++; 441166124Srafan} 442166124Srafan 443166124Srafanvoid 444166124Srafancritical_exit(void) 445166124Srafan{ 446166124Srafan struct thread *td; 447166124Srafan 448166124Srafan td = curthread; 449166124Srafan KASSERT(td->td_critnest != 0, 450166124Srafan ("critical_exit: td_critnest == 0")); 451166124Srafan if (td->td_critnest == 1) { 452166124Srafan#ifdef PREEMPTION 453166124Srafan mtx_assert(&sched_lock, MA_NOTOWNED); 454166124Srafan if (td->td_pflags & TDP_OWEPREEMPT) { 455166124Srafan mtx_lock_spin(&sched_lock); 456166124Srafan mi_switch(SW_INVOL, NULL); 457166124Srafan mtx_unlock_spin(&sched_lock); 458166124Srafan } 459166124Srafan#endif 460166124Srafan td->td_critnest = 0; 461166124Srafan cpu_critical_exit(td); 462166124Srafan } else { 463166124Srafan td->td_critnest--; 464166124Srafan } 465166124Srafan} 466166124Srafan 467166124Srafan/* 468166124Srafan * This function is called when a thread is about to be put on run queue 469166124Srafan * because it has been made runnable or its priority has been adjusted. It 470166124Srafan * determines if the new thread should be immediately preempted to. If so, 47150276Speter * it switches to it and eventually returns true. If not, it returns false 47250276Speter * so that the caller may place the thread on an appropriate run queue. 47350276Speter */ 474166124Srafanint 475166124Srafanmaybe_preempt(struct thread *td) 47650276Speter{ 477166124Srafan#ifdef PREEMPTION 47850276Speter struct thread *ctd; 47950276Speter int cpri, pri; 48050276Speter#endif 48150276Speter 48250276Speter mtx_assert(&sched_lock, MA_OWNED); 483166124Srafan#ifdef PREEMPTION 484166124Srafan /* 485166124Srafan * The new thread should not preempt the current thread if any of the 486166124Srafan * following conditions are true: 487166124Srafan * 48850276Speter * - The current thread has a higher (numerically lower) or 48997049Speter * equivalent priority. Note that this prevents curthread from 49097049Speter * trying to preempt to itself. 49150276Speter * - It is too early in the boot for context switches (cold is set). 49250276Speter * - The current thread has an inhibitor set or is in the process of 49350276Speter * exiting. In this case, the current thread is about to switch 49450276Speter * out anyways, so there's no point in preempting. If we did, 49597049Speter * the current thread would not be properly resumed as well, so 49650276Speter * just avoid that whole landmine. 49797049Speter * - If the new thread's priority is not a realtime priority and 49897049Speter * the current thread's priority is not an idle priority and 49950276Speter * FULL_PREEMPTION is disabled. 50050276Speter * 50150276Speter * If all of these conditions are false, but the current thread is in 50250276Speter * a nested critical section, then we have to defer the preemption 50397049Speter * until we exit the critical section. Otherwise, switch immediately 50450276Speter * to the new thread. 50550276Speter */ 50650276Speter ctd = curthread; 50750276Speter KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd), 50850276Speter ("thread has no (or wrong) sched-private part.")); 50950276Speter KASSERT((td->td_inhibitors == 0), 51050276Speter ("maybe_preempt: trying to run inhibitted thread")); 51150276Speter pri = td->td_priority; 51250276Speter cpri = ctd->td_priority; 51350276Speter if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) || 514174993Srafan td->td_kse->ke_state != KES_THREAD) 51550276Speter return (0); 51650276Speter#ifndef FULL_PREEMPTION 51750276Speter if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) && 51850276Speter !(cpri >= PRI_MIN_IDLE)) 51950276Speter return (0); 52076726Speter#endif 52176726Speter if (ctd->td_critnest > 1) { 52276726Speter CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 52376726Speter ctd->td_critnest); 52476726Speter ctd->td_pflags |= TDP_OWEPREEMPT; 52576726Speter return (0); 52676726Speter } 52776726Speter 52876726Speter /* 52976726Speter * Our thread state says that we are already on a run queue, so 53076726Speter * update our state as if we had been dequeued by choosethread(). 53176726Speter * However we must not actually be on the system run queue yet. 53276726Speter */ 53376726Speter MPASS(TD_ON_RUNQ(td)); 53476726Speter MPASS(td->td_sched->ke_state != KES_ONRUNQ); 53576726Speter if (td->td_proc->p_flag & P_HADTHREADS) { 53676726Speter /* 53776726Speter * If this is a threaded process we actually ARE on the 53876726Speter * ksegrp run queue so take it off that first. 53976726Speter * Also undo any damage done to the last_assigned pointer. 54076726Speter * XXX Fix setrunqueue so this isn't needed 54176726Speter */ 54276726Speter struct ksegrp *kg; 54376726Speter 54476726Speter kg = td->td_ksegrp; 54576726Speter if (kg->kg_last_assigned == td) 54676726Speter kg->kg_last_assigned = 54776726Speter TAILQ_PREV(td, threadqueue, td_runq); 54876726Speter TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 54976726Speter } 55076726Speter 55176726Speter TD_SET_RUNNING(td); 55276726Speter CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 55376726Speter td->td_proc->p_pid, td->td_proc->p_comm); 55476726Speter mi_switch(SW_INVOL, td); 55576726Speter return (1); 55676726Speter#else 55776726Speter return (0); 55876726Speter#endif 55976726Speter} 56076726Speter 56176726Speter#if 0 56276726Speter#ifndef PREEMPTION 56376726Speter/* XXX: There should be a non-static version of this. */ 56476726Speterstatic void 56576726Speterprintf_caddr_t(void *data) 56676726Speter{ 56776726Speter printf("%s", (char *)data); 56876726Speter} 56976726Speterstatic char preempt_warning[] = 57076726Speter "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 57176726SpeterSYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 57276726Speter preempt_warning) 57376726Speter#endif 57476726Speter#endif 57576726Speter 57676726Speter/************************************************************************ 57776726Speter * SYSTEM RUN QUEUE manipulations and tests * 57876726Speter ************************************************************************/ 57976726Speter/* 58076726Speter * Initialize a run structure. 58176726Speter */ 58276726Spetervoid 58376726Speterrunq_init(struct runq *rq) 58476726Speter{ 58576726Speter int i; 58676726Speter 58776726Speter bzero(rq, sizeof *rq); 58876726Speter for (i = 0; i < RQ_NQS; i++) 58976726Speter TAILQ_INIT(&rq->rq_queues[i]); 59076726Speter} 59176726Speter 59276726Speter/* 59376726Speter * Clear the status bit of the queue corresponding to priority level pri, 59476726Speter * indicating that it is empty. 59576726Speter */ 59676726Speterstatic __inline void 59776726Speterrunq_clrbit(struct runq *rq, int pri) 59876726Speter{ 59976726Speter struct rqbits *rqb; 60076726Speter 60176726Speter rqb = &rq->rq_status; 60276726Speter CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 60376726Speter rqb->rqb_bits[RQB_WORD(pri)], 60476726Speter rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 60576726Speter RQB_BIT(pri), RQB_WORD(pri)); 60676726Speter rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 60776726Speter} 60876726Speter 60976726Speter/* 61076726Speter * Find the index of the first non-empty run queue. This is done by 61176726Speter * scanning the status bits, a set bit indicates a non-empty queue. 61276726Speter */ 61376726Speterstatic __inline int 61476726Speterrunq_findbit(struct runq *rq) 61576726Speter{ 61676726Speter struct rqbits *rqb; 61776726Speter int pri; 61876726Speter int i; 61976726Speter 62076726Speter rqb = &rq->rq_status; 62176726Speter for (i = 0; i < RQB_LEN; i++) 62276726Speter if (rqb->rqb_bits[i]) { 62376726Speter pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 62476726Speter CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 62576726Speter rqb->rqb_bits[i], i, pri); 626166124Srafan return (pri); 62750276Speter } 62876726Speter 62950276Speter return (-1); 63076726Speter} 63176726Speter 63276726Speter/* 63376726Speter * Set the status bit of the queue corresponding to priority level pri, 63476726Speter * indicating that it is non-empty. 63576726Speter */ 63676726Speterstatic __inline void 63776726Speterrunq_setbit(struct runq *rq, int pri) 63876726Speter{ 63976726Speter struct rqbits *rqb; 64076726Speter 64176726Speter rqb = &rq->rq_status; 64276726Speter CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 64376726Speter rqb->rqb_bits[RQB_WORD(pri)], 64476726Speter rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 64576726Speter RQB_BIT(pri), RQB_WORD(pri)); 64676726Speter rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 64776726Speter} 64876726Speter 64976726Speter/* 65076726Speter * Add the KSE to the queue specified by its priority, and set the 651166124Srafan * corresponding status bit. 65250276Speter */ 65376726Spetervoid 65450276Speterrunq_add(struct runq *rq, struct kse *ke) 65576726Speter{ 65676726Speter struct rqhead *rqh; 65776726Speter int pri; 65876726Speter 65976726Speter pri = ke->ke_thread->td_priority / RQ_PPQ; 66076726Speter ke->ke_rqindex = pri; 66176726Speter runq_setbit(rq, pri); 66276726Speter rqh = &rq->rq_queues[pri]; 66376726Speter CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", 66476726Speter ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 66576726Speter TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 66676726Speter} 66776726Speter 66876726Speter/* 66976726Speter * Return true if there are runnable processes of any priority on the run 67076726Speter * queue, false otherwise. Has no side effects, does not modify the run 67176726Speter * queue structure. 67276726Speter */ 67376726Speterint 67476726Speterrunq_check(struct runq *rq) 675166124Srafan{ 67650276Speter struct rqbits *rqb; 67776726Speter int i; 67876726Speter 67976726Speter rqb = &rq->rq_status; 68076726Speter for (i = 0; i < RQB_LEN; i++) 68176726Speter if (rqb->rqb_bits[i]) { 68276726Speter CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 68376726Speter rqb->rqb_bits[i], i); 68476726Speter return (1); 68597049Speter } 68676726Speter CTR0(KTR_RUNQ, "runq_check: empty"); 68776726Speter 68850276Speter return (0); 68976726Speter} 69076726Speter 69176726Speter#if defined(SMP) && defined(SCHED_4BSD) 69276726Speterint runq_fuzz = 1; 69376726SpeterSYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 69476726Speter#endif 69576726Speter 69676726Speter/* 69776726Speter * Find the highest priority process on the run queue. 69876726Speter */ 69976726Speterstruct kse * 70076726Speterrunq_choose(struct runq *rq) 70176726Speter{ 70276726Speter struct rqhead *rqh; 70376726Speter struct kse *ke; 70476726Speter int pri; 70576726Speter 70676726Speter mtx_assert(&sched_lock, MA_OWNED); 70776726Speter while ((pri = runq_findbit(rq)) != -1) { 70876726Speter rqh = &rq->rq_queues[pri]; 70976726Speter#if defined(SMP) && defined(SCHED_4BSD) 71076726Speter /* fuzz == 1 is normal.. 0 or less are ignored */ 71176726Speter if (runq_fuzz > 1) { 71276726Speter /* 71376726Speter * In the first couple of entries, check if 71476726Speter * there is one for our CPU as a preference. 71576726Speter */ 71676726Speter int count = runq_fuzz; 71776726Speter int cpu = PCPU_GET(cpuid); 718174993Srafan struct kse *ke2; 71976726Speter ke2 = ke = TAILQ_FIRST(rqh); 72076726Speter 72176726Speter while (count-- && ke2) { 72276726Speter if (ke->ke_thread->td_lastcpu == cpu) { 72376726Speter ke = ke2; 72476726Speter break; 72576726Speter } 72676726Speter ke2 = TAILQ_NEXT(ke2, ke_procq); 72776726Speter } 72876726Speter } else 72976726Speter#endif 73076726Speter ke = TAILQ_FIRST(rqh); 73176726Speter KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 732166124Srafan CTR3(KTR_RUNQ, 733166124Srafan "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 73476726Speter return (ke); 73576726Speter } 73676726Speter CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 737166124Srafan 73876726Speter return (NULL); 739166124Srafan} 74076726Speter 74176726Speter/* 74276726Speter * Remove the KSE from the queue specified by its priority, and clear the 74376726Speter * corresponding status bit if the queue becomes empty. 74476726Speter * Caller must set ke->ke_state afterwards. 745166124Srafan */ 746166124Srafanvoid 747166124Srafanrunq_remove(struct runq *rq, struct kse *ke) 748166124Srafan{ 74976726Speter struct rqhead *rqh; 75076726Speter int pri; 75176726Speter 75276726Speter KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 75376726Speter ("runq_remove: process swapped out")); 75476726Speter pri = ke->ke_rqindex; 75576726Speter rqh = &rq->rq_queues[pri]; 75676726Speter CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", 75776726Speter ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 75876726Speter KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 75976726Speter TAILQ_REMOVE(rqh, ke, ke_procq); 76076726Speter if (TAILQ_EMPTY(rqh)) { 76176726Speter CTR0(KTR_RUNQ, "runq_remove: empty"); 76276726Speter runq_clrbit(rq, pri); 76376726Speter } 76476726Speter} 76576726Speter 76676726Speter/****** functions that are temporarily here ***********/ 76776726Speter#include <vm/uma.h> 76876726Speter#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 76976726Speterextern struct mtx kse_zombie_lock; 77076726Speter 77176726Speter/* 77276726Speter * Allocate scheduler specific per-process resources. 77376726Speter * The thread and ksegrp have already been linked in. 77476726Speter * In this case just set the default concurrency value. 77576726Speter * 77676726Speter * Called from: 777166124Srafan * proc_init() (UMA init method) 77850276Speter */ 77976726Spetervoid 78076726Spetersched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) 78176726Speter{ 78250276Speter 78376726Speter /* This can go in sched_fork */ 78476726Speter sched_init_concurrency(kg); 78576726Speter} 78676726Speter 78776726Speter#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 78876726Speter/* 78976726Speter * thread is being either created or recycled. 79076726Speter * Fix up the per-scheduler resources associated with it. 79176726Speter * Called from: 79250276Speter * sched_fork_thread() 79397049Speter * thread_dtor() (*may go away) 794166124Srafan * thread_init() (*may go away) 79597049Speter */ 796166124Srafanvoid 797166124Srafansched_newthread(struct thread *td) 798166124Srafan{ 799166124Srafan struct td_sched *ke; 800166124Srafan 801166124Srafan ke = (struct td_sched *) (td + 1); 802174993Srafan bzero(ke, sizeof(*ke)); 803166124Srafan td->td_sched = ke; 804174993Srafan ke->ke_thread = td; 805174993Srafan ke->ke_oncpu = NOCPU; 806166124Srafan ke->ke_state = KES_THREAD; 807166124Srafan} 808166124Srafan 809166124Srafan/* 810166124Srafan * Set up an initial concurrency of 1 811174993Srafan * and set the given thread (if given) to be using that 812166124Srafan * concurrency slot. 813166124Srafan * May be used "offline"..before the ksegrp is attached to the world 814166124Srafan * and thus wouldn't need schedlock in that case. 815166124Srafan * Called from: 816166124Srafan * thr_create() 817166124Srafan * proc_init() (UMA) via sched_newproc() 818166124Srafan */ 819166124Srafanvoid 820166124Srafansched_init_concurrency(struct ksegrp *kg) 821166124Srafan{ 822174993Srafan 823166124Srafan CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg); 82497049Speter kg->kg_concurrency = 1; 82597049Speter kg->kg_avail_opennings = 1; 82697049Speter} 82750276Speter 828174993Srafan/* 829174993Srafan * Change the concurrency of an existing ksegrp to N 830174993Srafan * Called from: 831174993Srafan * kse_create() 832174993Srafan * kse_exit() 833174993Srafan * thread_exit() 834178866Srafan * thread_single() 835178866Srafan */ 836174993Srafanvoid 837174993Srafansched_set_concurrency(struct ksegrp *kg, int concurrency) 838174993Srafan{ 839174993Srafan 840174993Srafan CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d", 841174993Srafan kg, 842174993Srafan concurrency, 843174993Srafan kg->kg_avail_opennings, 844174993Srafan kg->kg_avail_opennings + (concurrency - kg->kg_concurrency)); 845176187Srafan kg->kg_avail_opennings += (concurrency - kg->kg_concurrency); 846176187Srafan kg->kg_concurrency = concurrency; 847174993Srafan} 848174993Srafan 849174993Srafan/* 850178866Srafan * Called from thread_exit() for all exiting thread 851178866Srafan * 852174993Srafan * Not to be confused with sched_exit_thread() 853174993Srafan * that is only called from thread_exit() for threads exiting 854174993Srafan * without the rest of the process exiting because it is also called from 855174993Srafan * sched_exit() and we wouldn't want to call it twice. 856174993Srafan * XXX This can probably be fixed. 857174993Srafan */ 858174993Srafanvoid 859174993Srafansched_thread_exit(struct thread *td) 860174993Srafan{ 861174993Srafan 862174993Srafan SLOT_RELEASE(td->td_ksegrp); 863174993Srafan slot_fill(td->td_ksegrp); 864174993Srafan} 865174993Srafan 866174993Srafan#endif /* KERN_SWITCH_INCLUDE */ 867174993Srafan